├── .gitignore ├── README.md ├── requirement.txt ├── setup.py ├── train_mcts_scripts ├── game24 │ ├── ds_config.json │ ├── mcts_game24_llama_deepspeed.yaml │ ├── test_policy_and_value.sh │ ├── train_game24_critic.py │ └── train_game24_sft.py ├── gsm8k │ ├── README.md │ ├── ds_config.json │ ├── it1_gsm8k.ipynb │ ├── mcts_gsm8k_llama_deepspeed.yaml │ ├── test_policy_and_value.sh │ ├── train_gsm8k_critic.py │ └── train_gsm8k_sft.py ├── prontoqa │ ├── ds_config.json │ ├── mcts_prontoqa_llama_deepspeed.yaml │ ├── test_policy_and_value.sh │ ├── train_prontoqa_critic.py │ └── train_prontoqa_sft.py └── rlhf │ ├── README.md │ ├── accelerate_config.yaml │ ├── ds_config_no_offload.json │ ├── filter_top_data_policy_training.py │ ├── mix_value_data.py │ ├── test_policy_and_value.sh │ ├── train_rlhf_critic.py │ └── train_rlhf_policy.py └── tsllm ├── argparse_utils.py ├── distributed └── utils.py ├── envs ├── __init__.py ├── base_env.py ├── game24 │ ├── 24.csv │ ├── __init__.py │ ├── data.py │ ├── env.py │ ├── prompt.py │ └── train_data │ │ ├── test_dedup.jsonl │ │ └── train_dedup.jsonl ├── gsm8k │ ├── __init__.py │ ├── data.py │ ├── env.py │ ├── prompt.py │ └── train_data │ │ └── sft_init.jsonl ├── prontoqa │ ├── __init__.py │ ├── data.py │ ├── env.py │ ├── prompt.py │ ├── prontoqa.json │ └── train_data │ │ ├── all.jsonl │ │ ├── test.jsonl │ │ └── train.jsonl ├── rlhf │ ├── __init__.py │ ├── data.py │ ├── env.py │ └── prompt.py ├── tests │ ├── test_game24.py │ ├── test_gsm8k.py │ ├── test_prontoqa.py │ └── test_rlhf.py └── utils.py ├── inference ├── evaluation │ └── vote_utils.py ├── lm_self_value.py ├── trajectory_collector.py └── value.py ├── llm ├── ct2_utils.py └── text_generation.py ├── mcts ├── tree.py └── utils.py ├── merge_jsonl.py ├── model ├── __init__.py ├── llama_flash_attn_monkey_patch.py ├── modeling_actor_critic.py ├── modeling_base.py ├── modeling_prm.py └── utils.py ├── offline_rl ├── dedup.py ├── game24 │ ├── gen_3.sh │ └── process.sh ├── generate_data.py ├── gsm8k_data │ ├── gen_3.sh │ └── process.sh ├── merge.py ├── prontoqa │ ├── gen_3.sh │ └── process.sh ├── rlhf │ ├── gen_3.sh │ ├── process.py │ └── process.sh ├── sample.py ├── split_two_test.py ├── test_sft_and_v.py ├── test_sft_and_v_rlhf.py └── utils.py └── rl ├── config.py ├── data ├── buffer.py ├── node_types_new.py ├── sft_buffer.py └── traj_buffer.py └── trainer ├── base_trainer.py ├── mcts_trainer_traj_ct2_sft.py ├── mcts_trainer_traj_ct2_value.py ├── opt_utils.py └── utils.py /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/waterhorse1/LLM_Tree_Search/HEAD/.gitignore -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/waterhorse1/LLM_Tree_Search/HEAD/README.md -------------------------------------------------------------------------------- /requirement.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/waterhorse1/LLM_Tree_Search/HEAD/requirement.txt -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/waterhorse1/LLM_Tree_Search/HEAD/setup.py -------------------------------------------------------------------------------- /train_mcts_scripts/game24/ds_config.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/waterhorse1/LLM_Tree_Search/HEAD/train_mcts_scripts/game24/ds_config.json -------------------------------------------------------------------------------- /train_mcts_scripts/game24/mcts_game24_llama_deepspeed.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/waterhorse1/LLM_Tree_Search/HEAD/train_mcts_scripts/game24/mcts_game24_llama_deepspeed.yaml -------------------------------------------------------------------------------- /train_mcts_scripts/game24/test_policy_and_value.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/waterhorse1/LLM_Tree_Search/HEAD/train_mcts_scripts/game24/test_policy_and_value.sh -------------------------------------------------------------------------------- /train_mcts_scripts/game24/train_game24_critic.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/waterhorse1/LLM_Tree_Search/HEAD/train_mcts_scripts/game24/train_game24_critic.py -------------------------------------------------------------------------------- /train_mcts_scripts/game24/train_game24_sft.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/waterhorse1/LLM_Tree_Search/HEAD/train_mcts_scripts/game24/train_game24_sft.py -------------------------------------------------------------------------------- /train_mcts_scripts/gsm8k/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/waterhorse1/LLM_Tree_Search/HEAD/train_mcts_scripts/gsm8k/README.md -------------------------------------------------------------------------------- /train_mcts_scripts/gsm8k/ds_config.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/waterhorse1/LLM_Tree_Search/HEAD/train_mcts_scripts/gsm8k/ds_config.json -------------------------------------------------------------------------------- /train_mcts_scripts/gsm8k/it1_gsm8k.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/waterhorse1/LLM_Tree_Search/HEAD/train_mcts_scripts/gsm8k/it1_gsm8k.ipynb -------------------------------------------------------------------------------- /train_mcts_scripts/gsm8k/mcts_gsm8k_llama_deepspeed.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/waterhorse1/LLM_Tree_Search/HEAD/train_mcts_scripts/gsm8k/mcts_gsm8k_llama_deepspeed.yaml -------------------------------------------------------------------------------- /train_mcts_scripts/gsm8k/test_policy_and_value.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/waterhorse1/LLM_Tree_Search/HEAD/train_mcts_scripts/gsm8k/test_policy_and_value.sh -------------------------------------------------------------------------------- /train_mcts_scripts/gsm8k/train_gsm8k_critic.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/waterhorse1/LLM_Tree_Search/HEAD/train_mcts_scripts/gsm8k/train_gsm8k_critic.py -------------------------------------------------------------------------------- /train_mcts_scripts/gsm8k/train_gsm8k_sft.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/waterhorse1/LLM_Tree_Search/HEAD/train_mcts_scripts/gsm8k/train_gsm8k_sft.py -------------------------------------------------------------------------------- /train_mcts_scripts/prontoqa/ds_config.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/waterhorse1/LLM_Tree_Search/HEAD/train_mcts_scripts/prontoqa/ds_config.json -------------------------------------------------------------------------------- /train_mcts_scripts/prontoqa/mcts_prontoqa_llama_deepspeed.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/waterhorse1/LLM_Tree_Search/HEAD/train_mcts_scripts/prontoqa/mcts_prontoqa_llama_deepspeed.yaml -------------------------------------------------------------------------------- /train_mcts_scripts/prontoqa/test_policy_and_value.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/waterhorse1/LLM_Tree_Search/HEAD/train_mcts_scripts/prontoqa/test_policy_and_value.sh -------------------------------------------------------------------------------- /train_mcts_scripts/prontoqa/train_prontoqa_critic.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/waterhorse1/LLM_Tree_Search/HEAD/train_mcts_scripts/prontoqa/train_prontoqa_critic.py -------------------------------------------------------------------------------- /train_mcts_scripts/prontoqa/train_prontoqa_sft.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/waterhorse1/LLM_Tree_Search/HEAD/train_mcts_scripts/prontoqa/train_prontoqa_sft.py -------------------------------------------------------------------------------- /train_mcts_scripts/rlhf/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/waterhorse1/LLM_Tree_Search/HEAD/train_mcts_scripts/rlhf/README.md -------------------------------------------------------------------------------- /train_mcts_scripts/rlhf/accelerate_config.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/waterhorse1/LLM_Tree_Search/HEAD/train_mcts_scripts/rlhf/accelerate_config.yaml -------------------------------------------------------------------------------- /train_mcts_scripts/rlhf/ds_config_no_offload.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/waterhorse1/LLM_Tree_Search/HEAD/train_mcts_scripts/rlhf/ds_config_no_offload.json -------------------------------------------------------------------------------- /train_mcts_scripts/rlhf/filter_top_data_policy_training.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/waterhorse1/LLM_Tree_Search/HEAD/train_mcts_scripts/rlhf/filter_top_data_policy_training.py -------------------------------------------------------------------------------- /train_mcts_scripts/rlhf/mix_value_data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/waterhorse1/LLM_Tree_Search/HEAD/train_mcts_scripts/rlhf/mix_value_data.py -------------------------------------------------------------------------------- /train_mcts_scripts/rlhf/test_policy_and_value.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/waterhorse1/LLM_Tree_Search/HEAD/train_mcts_scripts/rlhf/test_policy_and_value.sh -------------------------------------------------------------------------------- /train_mcts_scripts/rlhf/train_rlhf_critic.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/waterhorse1/LLM_Tree_Search/HEAD/train_mcts_scripts/rlhf/train_rlhf_critic.py -------------------------------------------------------------------------------- /train_mcts_scripts/rlhf/train_rlhf_policy.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/waterhorse1/LLM_Tree_Search/HEAD/train_mcts_scripts/rlhf/train_rlhf_policy.py -------------------------------------------------------------------------------- /tsllm/argparse_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/waterhorse1/LLM_Tree_Search/HEAD/tsllm/argparse_utils.py -------------------------------------------------------------------------------- /tsllm/distributed/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/waterhorse1/LLM_Tree_Search/HEAD/tsllm/distributed/utils.py -------------------------------------------------------------------------------- /tsllm/envs/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/waterhorse1/LLM_Tree_Search/HEAD/tsllm/envs/__init__.py -------------------------------------------------------------------------------- /tsllm/envs/base_env.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/waterhorse1/LLM_Tree_Search/HEAD/tsllm/envs/base_env.py -------------------------------------------------------------------------------- /tsllm/envs/game24/24.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/waterhorse1/LLM_Tree_Search/HEAD/tsllm/envs/game24/24.csv -------------------------------------------------------------------------------- /tsllm/envs/game24/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/waterhorse1/LLM_Tree_Search/HEAD/tsllm/envs/game24/__init__.py -------------------------------------------------------------------------------- /tsllm/envs/game24/data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/waterhorse1/LLM_Tree_Search/HEAD/tsllm/envs/game24/data.py -------------------------------------------------------------------------------- /tsllm/envs/game24/env.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/waterhorse1/LLM_Tree_Search/HEAD/tsllm/envs/game24/env.py -------------------------------------------------------------------------------- /tsllm/envs/game24/prompt.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/waterhorse1/LLM_Tree_Search/HEAD/tsllm/envs/game24/prompt.py -------------------------------------------------------------------------------- /tsllm/envs/game24/train_data/test_dedup.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/waterhorse1/LLM_Tree_Search/HEAD/tsllm/envs/game24/train_data/test_dedup.jsonl -------------------------------------------------------------------------------- /tsllm/envs/game24/train_data/train_dedup.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/waterhorse1/LLM_Tree_Search/HEAD/tsllm/envs/game24/train_data/train_dedup.jsonl -------------------------------------------------------------------------------- /tsllm/envs/gsm8k/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/waterhorse1/LLM_Tree_Search/HEAD/tsllm/envs/gsm8k/__init__.py -------------------------------------------------------------------------------- /tsllm/envs/gsm8k/data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/waterhorse1/LLM_Tree_Search/HEAD/tsllm/envs/gsm8k/data.py -------------------------------------------------------------------------------- /tsllm/envs/gsm8k/env.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/waterhorse1/LLM_Tree_Search/HEAD/tsllm/envs/gsm8k/env.py -------------------------------------------------------------------------------- /tsllm/envs/gsm8k/prompt.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/waterhorse1/LLM_Tree_Search/HEAD/tsllm/envs/gsm8k/prompt.py -------------------------------------------------------------------------------- /tsllm/envs/gsm8k/train_data/sft_init.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/waterhorse1/LLM_Tree_Search/HEAD/tsllm/envs/gsm8k/train_data/sft_init.jsonl -------------------------------------------------------------------------------- /tsllm/envs/prontoqa/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/waterhorse1/LLM_Tree_Search/HEAD/tsllm/envs/prontoqa/__init__.py -------------------------------------------------------------------------------- /tsllm/envs/prontoqa/data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/waterhorse1/LLM_Tree_Search/HEAD/tsllm/envs/prontoqa/data.py -------------------------------------------------------------------------------- /tsllm/envs/prontoqa/env.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/waterhorse1/LLM_Tree_Search/HEAD/tsllm/envs/prontoqa/env.py -------------------------------------------------------------------------------- /tsllm/envs/prontoqa/prompt.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/waterhorse1/LLM_Tree_Search/HEAD/tsllm/envs/prontoqa/prompt.py -------------------------------------------------------------------------------- /tsllm/envs/prontoqa/prontoqa.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/waterhorse1/LLM_Tree_Search/HEAD/tsllm/envs/prontoqa/prontoqa.json -------------------------------------------------------------------------------- /tsllm/envs/prontoqa/train_data/all.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/waterhorse1/LLM_Tree_Search/HEAD/tsllm/envs/prontoqa/train_data/all.jsonl -------------------------------------------------------------------------------- /tsllm/envs/prontoqa/train_data/test.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/waterhorse1/LLM_Tree_Search/HEAD/tsllm/envs/prontoqa/train_data/test.jsonl -------------------------------------------------------------------------------- /tsllm/envs/prontoqa/train_data/train.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/waterhorse1/LLM_Tree_Search/HEAD/tsllm/envs/prontoqa/train_data/train.jsonl -------------------------------------------------------------------------------- /tsllm/envs/rlhf/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/waterhorse1/LLM_Tree_Search/HEAD/tsllm/envs/rlhf/__init__.py -------------------------------------------------------------------------------- /tsllm/envs/rlhf/data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/waterhorse1/LLM_Tree_Search/HEAD/tsllm/envs/rlhf/data.py -------------------------------------------------------------------------------- /tsllm/envs/rlhf/env.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/waterhorse1/LLM_Tree_Search/HEAD/tsllm/envs/rlhf/env.py -------------------------------------------------------------------------------- /tsllm/envs/rlhf/prompt.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/waterhorse1/LLM_Tree_Search/HEAD/tsllm/envs/rlhf/prompt.py -------------------------------------------------------------------------------- /tsllm/envs/tests/test_game24.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/waterhorse1/LLM_Tree_Search/HEAD/tsllm/envs/tests/test_game24.py -------------------------------------------------------------------------------- /tsllm/envs/tests/test_gsm8k.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/waterhorse1/LLM_Tree_Search/HEAD/tsllm/envs/tests/test_gsm8k.py -------------------------------------------------------------------------------- /tsllm/envs/tests/test_prontoqa.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/waterhorse1/LLM_Tree_Search/HEAD/tsllm/envs/tests/test_prontoqa.py -------------------------------------------------------------------------------- /tsllm/envs/tests/test_rlhf.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/waterhorse1/LLM_Tree_Search/HEAD/tsllm/envs/tests/test_rlhf.py -------------------------------------------------------------------------------- /tsllm/envs/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/waterhorse1/LLM_Tree_Search/HEAD/tsllm/envs/utils.py -------------------------------------------------------------------------------- /tsllm/inference/evaluation/vote_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/waterhorse1/LLM_Tree_Search/HEAD/tsllm/inference/evaluation/vote_utils.py -------------------------------------------------------------------------------- /tsllm/inference/lm_self_value.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/waterhorse1/LLM_Tree_Search/HEAD/tsllm/inference/lm_self_value.py -------------------------------------------------------------------------------- /tsllm/inference/trajectory_collector.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/waterhorse1/LLM_Tree_Search/HEAD/tsllm/inference/trajectory_collector.py -------------------------------------------------------------------------------- /tsllm/inference/value.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/waterhorse1/LLM_Tree_Search/HEAD/tsllm/inference/value.py -------------------------------------------------------------------------------- /tsllm/llm/ct2_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/waterhorse1/LLM_Tree_Search/HEAD/tsllm/llm/ct2_utils.py -------------------------------------------------------------------------------- /tsllm/llm/text_generation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/waterhorse1/LLM_Tree_Search/HEAD/tsllm/llm/text_generation.py -------------------------------------------------------------------------------- /tsllm/mcts/tree.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/waterhorse1/LLM_Tree_Search/HEAD/tsllm/mcts/tree.py -------------------------------------------------------------------------------- /tsllm/mcts/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/waterhorse1/LLM_Tree_Search/HEAD/tsllm/mcts/utils.py -------------------------------------------------------------------------------- /tsllm/merge_jsonl.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/waterhorse1/LLM_Tree_Search/HEAD/tsllm/merge_jsonl.py -------------------------------------------------------------------------------- /tsllm/model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/waterhorse1/LLM_Tree_Search/HEAD/tsllm/model/__init__.py -------------------------------------------------------------------------------- /tsllm/model/llama_flash_attn_monkey_patch.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/waterhorse1/LLM_Tree_Search/HEAD/tsllm/model/llama_flash_attn_monkey_patch.py -------------------------------------------------------------------------------- /tsllm/model/modeling_actor_critic.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/waterhorse1/LLM_Tree_Search/HEAD/tsllm/model/modeling_actor_critic.py -------------------------------------------------------------------------------- /tsllm/model/modeling_base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/waterhorse1/LLM_Tree_Search/HEAD/tsllm/model/modeling_base.py -------------------------------------------------------------------------------- /tsllm/model/modeling_prm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/waterhorse1/LLM_Tree_Search/HEAD/tsllm/model/modeling_prm.py -------------------------------------------------------------------------------- /tsllm/model/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/waterhorse1/LLM_Tree_Search/HEAD/tsllm/model/utils.py -------------------------------------------------------------------------------- /tsllm/offline_rl/dedup.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/waterhorse1/LLM_Tree_Search/HEAD/tsllm/offline_rl/dedup.py -------------------------------------------------------------------------------- /tsllm/offline_rl/game24/gen_3.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/waterhorse1/LLM_Tree_Search/HEAD/tsllm/offline_rl/game24/gen_3.sh -------------------------------------------------------------------------------- /tsllm/offline_rl/game24/process.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/waterhorse1/LLM_Tree_Search/HEAD/tsllm/offline_rl/game24/process.sh -------------------------------------------------------------------------------- /tsllm/offline_rl/generate_data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/waterhorse1/LLM_Tree_Search/HEAD/tsllm/offline_rl/generate_data.py -------------------------------------------------------------------------------- /tsllm/offline_rl/gsm8k_data/gen_3.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/waterhorse1/LLM_Tree_Search/HEAD/tsllm/offline_rl/gsm8k_data/gen_3.sh -------------------------------------------------------------------------------- /tsllm/offline_rl/gsm8k_data/process.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/waterhorse1/LLM_Tree_Search/HEAD/tsllm/offline_rl/gsm8k_data/process.sh -------------------------------------------------------------------------------- /tsllm/offline_rl/merge.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/waterhorse1/LLM_Tree_Search/HEAD/tsllm/offline_rl/merge.py -------------------------------------------------------------------------------- /tsllm/offline_rl/prontoqa/gen_3.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/waterhorse1/LLM_Tree_Search/HEAD/tsllm/offline_rl/prontoqa/gen_3.sh -------------------------------------------------------------------------------- /tsllm/offline_rl/prontoqa/process.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/waterhorse1/LLM_Tree_Search/HEAD/tsllm/offline_rl/prontoqa/process.sh -------------------------------------------------------------------------------- /tsllm/offline_rl/rlhf/gen_3.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/waterhorse1/LLM_Tree_Search/HEAD/tsllm/offline_rl/rlhf/gen_3.sh -------------------------------------------------------------------------------- /tsllm/offline_rl/rlhf/process.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/waterhorse1/LLM_Tree_Search/HEAD/tsllm/offline_rl/rlhf/process.py -------------------------------------------------------------------------------- /tsllm/offline_rl/rlhf/process.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/waterhorse1/LLM_Tree_Search/HEAD/tsllm/offline_rl/rlhf/process.sh -------------------------------------------------------------------------------- /tsllm/offline_rl/sample.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/waterhorse1/LLM_Tree_Search/HEAD/tsllm/offline_rl/sample.py -------------------------------------------------------------------------------- /tsllm/offline_rl/split_two_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/waterhorse1/LLM_Tree_Search/HEAD/tsllm/offline_rl/split_two_test.py -------------------------------------------------------------------------------- /tsllm/offline_rl/test_sft_and_v.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/waterhorse1/LLM_Tree_Search/HEAD/tsllm/offline_rl/test_sft_and_v.py -------------------------------------------------------------------------------- /tsllm/offline_rl/test_sft_and_v_rlhf.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/waterhorse1/LLM_Tree_Search/HEAD/tsllm/offline_rl/test_sft_and_v_rlhf.py -------------------------------------------------------------------------------- /tsllm/offline_rl/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/waterhorse1/LLM_Tree_Search/HEAD/tsllm/offline_rl/utils.py -------------------------------------------------------------------------------- /tsllm/rl/config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/waterhorse1/LLM_Tree_Search/HEAD/tsllm/rl/config.py -------------------------------------------------------------------------------- /tsllm/rl/data/buffer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/waterhorse1/LLM_Tree_Search/HEAD/tsllm/rl/data/buffer.py -------------------------------------------------------------------------------- /tsllm/rl/data/node_types_new.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/waterhorse1/LLM_Tree_Search/HEAD/tsllm/rl/data/node_types_new.py -------------------------------------------------------------------------------- /tsllm/rl/data/sft_buffer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/waterhorse1/LLM_Tree_Search/HEAD/tsllm/rl/data/sft_buffer.py -------------------------------------------------------------------------------- /tsllm/rl/data/traj_buffer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/waterhorse1/LLM_Tree_Search/HEAD/tsllm/rl/data/traj_buffer.py -------------------------------------------------------------------------------- /tsllm/rl/trainer/base_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/waterhorse1/LLM_Tree_Search/HEAD/tsllm/rl/trainer/base_trainer.py -------------------------------------------------------------------------------- /tsllm/rl/trainer/mcts_trainer_traj_ct2_sft.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/waterhorse1/LLM_Tree_Search/HEAD/tsllm/rl/trainer/mcts_trainer_traj_ct2_sft.py -------------------------------------------------------------------------------- /tsllm/rl/trainer/mcts_trainer_traj_ct2_value.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/waterhorse1/LLM_Tree_Search/HEAD/tsllm/rl/trainer/mcts_trainer_traj_ct2_value.py -------------------------------------------------------------------------------- /tsllm/rl/trainer/opt_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/waterhorse1/LLM_Tree_Search/HEAD/tsllm/rl/trainer/opt_utils.py -------------------------------------------------------------------------------- /tsllm/rl/trainer/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/waterhorse1/LLM_Tree_Search/HEAD/tsllm/rl/trainer/utils.py --------------------------------------------------------------------------------