├── .gitignore
├── README.md
├── requirement.txt
├── setup.py
├── train_mcts_scripts
    ├── game24
    │   ├── ds_config.json
    │   ├── mcts_game24_llama_deepspeed.yaml
    │   ├── test_policy_and_value.sh
    │   ├── train_game24_critic.py
    │   └── train_game24_sft.py
    ├── gsm8k
    │   ├── README.md
    │   ├── ds_config.json
    │   ├── it1_gsm8k.ipynb
    │   ├── mcts_gsm8k_llama_deepspeed.yaml
    │   ├── test_policy_and_value.sh
    │   ├── train_gsm8k_critic.py
    │   └── train_gsm8k_sft.py
    ├── prontoqa
    │   ├── ds_config.json
    │   ├── mcts_prontoqa_llama_deepspeed.yaml
    │   ├── test_policy_and_value.sh
    │   ├── train_prontoqa_critic.py
    │   └── train_prontoqa_sft.py
    └── rlhf
    │   ├── README.md
    │   ├── accelerate_config.yaml
    │   ├── ds_config_no_offload.json
    │   ├── filter_top_data_policy_training.py
    │   ├── mix_value_data.py
    │   ├── test_policy_and_value.sh
    │   ├── train_rlhf_critic.py
    │   └── train_rlhf_policy.py
└── tsllm
    ├── argparse_utils.py
    ├── distributed
        └── utils.py
    ├── envs
        ├── __init__.py
        ├── base_env.py
        ├── game24
        │   ├── 24.csv
        │   ├── __init__.py
        │   ├── data.py
        │   ├── env.py
        │   ├── prompt.py
        │   └── train_data
        │   │   ├── test_dedup.jsonl
        │   │   └── train_dedup.jsonl
        ├── gsm8k
        │   ├── __init__.py
        │   ├── data.py
        │   ├── env.py
        │   ├── prompt.py
        │   └── train_data
        │   │   └── sft_init.jsonl
        ├── prontoqa
        │   ├── __init__.py
        │   ├── data.py
        │   ├── env.py
        │   ├── prompt.py
        │   ├── prontoqa.json
        │   └── train_data
        │   │   ├── all.jsonl
        │   │   ├── test.jsonl
        │   │   └── train.jsonl
        ├── rlhf
        │   ├── __init__.py
        │   ├── data.py
        │   ├── env.py
        │   └── prompt.py
        ├── tests
        │   ├── test_game24.py
        │   ├── test_gsm8k.py
        │   ├── test_prontoqa.py
        │   └── test_rlhf.py
        └── utils.py
    ├── inference
        ├── evaluation
        │   └── vote_utils.py
        ├── lm_self_value.py
        ├── trajectory_collector.py
        └── value.py
    ├── llm
        ├── ct2_utils.py
        └── text_generation.py
    ├── mcts
        ├── tree.py
        └── utils.py
    ├── merge_jsonl.py
    ├── model
        ├── __init__.py
        ├── llama_flash_attn_monkey_patch.py
        ├── modeling_actor_critic.py
        ├── modeling_base.py
        ├── modeling_prm.py
        └── utils.py
    ├── offline_rl
        ├── dedup.py
        ├── game24
        │   ├── gen_3.sh
        │   └── process.sh
        ├── generate_data.py
        ├── gsm8k_data
        │   ├── gen_3.sh
        │   └── process.sh
        ├── merge.py
        ├── prontoqa
        │   ├── gen_3.sh
        │   └── process.sh
        ├── rlhf
        │   ├── gen_3.sh
        │   ├── process.py
        │   └── process.sh
        ├── sample.py
        ├── split_two_test.py
        ├── test_sft_and_v.py
        ├── test_sft_and_v_rlhf.py
        └── utils.py
    └── rl
        ├── config.py
        ├── data
            ├── buffer.py
            ├── node_types_new.py
            ├── sft_buffer.py
            └── traj_buffer.py
        └── trainer
            ├── base_trainer.py
            ├── mcts_trainer_traj_ct2_sft.py
            ├── mcts_trainer_traj_ct2_value.py
            ├── opt_utils.py
            └── utils.py


/.gitignore:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/waterhorse1/LLM_Tree_Search/HEAD/.gitignore


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/waterhorse1/LLM_Tree_Search/HEAD/README.md


--------------------------------------------------------------------------------
/requirement.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/waterhorse1/LLM_Tree_Search/HEAD/requirement.txt


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/waterhorse1/LLM_Tree_Search/HEAD/setup.py


--------------------------------------------------------------------------------
/train_mcts_scripts/game24/ds_config.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/waterhorse1/LLM_Tree_Search/HEAD/train_mcts_scripts/game24/ds_config.json


--------------------------------------------------------------------------------
/train_mcts_scripts/game24/mcts_game24_llama_deepspeed.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/waterhorse1/LLM_Tree_Search/HEAD/train_mcts_scripts/game24/mcts_game24_llama_deepspeed.yaml


--------------------------------------------------------------------------------
/train_mcts_scripts/game24/test_policy_and_value.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/waterhorse1/LLM_Tree_Search/HEAD/train_mcts_scripts/game24/test_policy_and_value.sh


--------------------------------------------------------------------------------
/train_mcts_scripts/game24/train_game24_critic.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/waterhorse1/LLM_Tree_Search/HEAD/train_mcts_scripts/game24/train_game24_critic.py


--------------------------------------------------------------------------------
/train_mcts_scripts/game24/train_game24_sft.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/waterhorse1/LLM_Tree_Search/HEAD/train_mcts_scripts/game24/train_game24_sft.py


--------------------------------------------------------------------------------
/train_mcts_scripts/gsm8k/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/waterhorse1/LLM_Tree_Search/HEAD/train_mcts_scripts/gsm8k/README.md


--------------------------------------------------------------------------------
/train_mcts_scripts/gsm8k/ds_config.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/waterhorse1/LLM_Tree_Search/HEAD/train_mcts_scripts/gsm8k/ds_config.json


--------------------------------------------------------------------------------
/train_mcts_scripts/gsm8k/it1_gsm8k.ipynb:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/waterhorse1/LLM_Tree_Search/HEAD/train_mcts_scripts/gsm8k/it1_gsm8k.ipynb


--------------------------------------------------------------------------------
/train_mcts_scripts/gsm8k/mcts_gsm8k_llama_deepspeed.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/waterhorse1/LLM_Tree_Search/HEAD/train_mcts_scripts/gsm8k/mcts_gsm8k_llama_deepspeed.yaml


--------------------------------------------------------------------------------
/train_mcts_scripts/gsm8k/test_policy_and_value.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/waterhorse1/LLM_Tree_Search/HEAD/train_mcts_scripts/gsm8k/test_policy_and_value.sh


--------------------------------------------------------------------------------
/train_mcts_scripts/gsm8k/train_gsm8k_critic.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/waterhorse1/LLM_Tree_Search/HEAD/train_mcts_scripts/gsm8k/train_gsm8k_critic.py


--------------------------------------------------------------------------------
/train_mcts_scripts/gsm8k/train_gsm8k_sft.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/waterhorse1/LLM_Tree_Search/HEAD/train_mcts_scripts/gsm8k/train_gsm8k_sft.py


--------------------------------------------------------------------------------
/train_mcts_scripts/prontoqa/ds_config.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/waterhorse1/LLM_Tree_Search/HEAD/train_mcts_scripts/prontoqa/ds_config.json


--------------------------------------------------------------------------------
/train_mcts_scripts/prontoqa/mcts_prontoqa_llama_deepspeed.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/waterhorse1/LLM_Tree_Search/HEAD/train_mcts_scripts/prontoqa/mcts_prontoqa_llama_deepspeed.yaml


--------------------------------------------------------------------------------
/train_mcts_scripts/prontoqa/test_policy_and_value.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/waterhorse1/LLM_Tree_Search/HEAD/train_mcts_scripts/prontoqa/test_policy_and_value.sh


--------------------------------------------------------------------------------
/train_mcts_scripts/prontoqa/train_prontoqa_critic.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/waterhorse1/LLM_Tree_Search/HEAD/train_mcts_scripts/prontoqa/train_prontoqa_critic.py


--------------------------------------------------------------------------------
/train_mcts_scripts/prontoqa/train_prontoqa_sft.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/waterhorse1/LLM_Tree_Search/HEAD/train_mcts_scripts/prontoqa/train_prontoqa_sft.py


--------------------------------------------------------------------------------
/train_mcts_scripts/rlhf/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/waterhorse1/LLM_Tree_Search/HEAD/train_mcts_scripts/rlhf/README.md


--------------------------------------------------------------------------------
/train_mcts_scripts/rlhf/accelerate_config.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/waterhorse1/LLM_Tree_Search/HEAD/train_mcts_scripts/rlhf/accelerate_config.yaml


--------------------------------------------------------------------------------
/train_mcts_scripts/rlhf/ds_config_no_offload.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/waterhorse1/LLM_Tree_Search/HEAD/train_mcts_scripts/rlhf/ds_config_no_offload.json


--------------------------------------------------------------------------------
/train_mcts_scripts/rlhf/filter_top_data_policy_training.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/waterhorse1/LLM_Tree_Search/HEAD/train_mcts_scripts/rlhf/filter_top_data_policy_training.py


--------------------------------------------------------------------------------
/train_mcts_scripts/rlhf/mix_value_data.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/waterhorse1/LLM_Tree_Search/HEAD/train_mcts_scripts/rlhf/mix_value_data.py


--------------------------------------------------------------------------------
/train_mcts_scripts/rlhf/test_policy_and_value.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/waterhorse1/LLM_Tree_Search/HEAD/train_mcts_scripts/rlhf/test_policy_and_value.sh


--------------------------------------------------------------------------------
/train_mcts_scripts/rlhf/train_rlhf_critic.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/waterhorse1/LLM_Tree_Search/HEAD/train_mcts_scripts/rlhf/train_rlhf_critic.py


--------------------------------------------------------------------------------
/train_mcts_scripts/rlhf/train_rlhf_policy.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/waterhorse1/LLM_Tree_Search/HEAD/train_mcts_scripts/rlhf/train_rlhf_policy.py


--------------------------------------------------------------------------------
/tsllm/argparse_utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/waterhorse1/LLM_Tree_Search/HEAD/tsllm/argparse_utils.py


--------------------------------------------------------------------------------
/tsllm/distributed/utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/waterhorse1/LLM_Tree_Search/HEAD/tsllm/distributed/utils.py


--------------------------------------------------------------------------------
/tsllm/envs/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/waterhorse1/LLM_Tree_Search/HEAD/tsllm/envs/__init__.py


--------------------------------------------------------------------------------
/tsllm/envs/base_env.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/waterhorse1/LLM_Tree_Search/HEAD/tsllm/envs/base_env.py


--------------------------------------------------------------------------------
/tsllm/envs/game24/24.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/waterhorse1/LLM_Tree_Search/HEAD/tsllm/envs/game24/24.csv


--------------------------------------------------------------------------------
/tsllm/envs/game24/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/waterhorse1/LLM_Tree_Search/HEAD/tsllm/envs/game24/__init__.py


--------------------------------------------------------------------------------
/tsllm/envs/game24/data.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/waterhorse1/LLM_Tree_Search/HEAD/tsllm/envs/game24/data.py


--------------------------------------------------------------------------------
/tsllm/envs/game24/env.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/waterhorse1/LLM_Tree_Search/HEAD/tsllm/envs/game24/env.py


--------------------------------------------------------------------------------
/tsllm/envs/game24/prompt.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/waterhorse1/LLM_Tree_Search/HEAD/tsllm/envs/game24/prompt.py


--------------------------------------------------------------------------------
/tsllm/envs/game24/train_data/test_dedup.jsonl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/waterhorse1/LLM_Tree_Search/HEAD/tsllm/envs/game24/train_data/test_dedup.jsonl


--------------------------------------------------------------------------------
/tsllm/envs/game24/train_data/train_dedup.jsonl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/waterhorse1/LLM_Tree_Search/HEAD/tsllm/envs/game24/train_data/train_dedup.jsonl


--------------------------------------------------------------------------------
/tsllm/envs/gsm8k/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/waterhorse1/LLM_Tree_Search/HEAD/tsllm/envs/gsm8k/__init__.py


--------------------------------------------------------------------------------
/tsllm/envs/gsm8k/data.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/waterhorse1/LLM_Tree_Search/HEAD/tsllm/envs/gsm8k/data.py


--------------------------------------------------------------------------------
/tsllm/envs/gsm8k/env.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/waterhorse1/LLM_Tree_Search/HEAD/tsllm/envs/gsm8k/env.py


--------------------------------------------------------------------------------
/tsllm/envs/gsm8k/prompt.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/waterhorse1/LLM_Tree_Search/HEAD/tsllm/envs/gsm8k/prompt.py


--------------------------------------------------------------------------------
/tsllm/envs/gsm8k/train_data/sft_init.jsonl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/waterhorse1/LLM_Tree_Search/HEAD/tsllm/envs/gsm8k/train_data/sft_init.jsonl


--------------------------------------------------------------------------------
/tsllm/envs/prontoqa/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/waterhorse1/LLM_Tree_Search/HEAD/tsllm/envs/prontoqa/__init__.py


--------------------------------------------------------------------------------
/tsllm/envs/prontoqa/data.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/waterhorse1/LLM_Tree_Search/HEAD/tsllm/envs/prontoqa/data.py


--------------------------------------------------------------------------------
/tsllm/envs/prontoqa/env.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/waterhorse1/LLM_Tree_Search/HEAD/tsllm/envs/prontoqa/env.py


--------------------------------------------------------------------------------
/tsllm/envs/prontoqa/prompt.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/waterhorse1/LLM_Tree_Search/HEAD/tsllm/envs/prontoqa/prompt.py


--------------------------------------------------------------------------------
/tsllm/envs/prontoqa/prontoqa.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/waterhorse1/LLM_Tree_Search/HEAD/tsllm/envs/prontoqa/prontoqa.json


--------------------------------------------------------------------------------
/tsllm/envs/prontoqa/train_data/all.jsonl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/waterhorse1/LLM_Tree_Search/HEAD/tsllm/envs/prontoqa/train_data/all.jsonl


--------------------------------------------------------------------------------
/tsllm/envs/prontoqa/train_data/test.jsonl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/waterhorse1/LLM_Tree_Search/HEAD/tsllm/envs/prontoqa/train_data/test.jsonl


--------------------------------------------------------------------------------
/tsllm/envs/prontoqa/train_data/train.jsonl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/waterhorse1/LLM_Tree_Search/HEAD/tsllm/envs/prontoqa/train_data/train.jsonl


--------------------------------------------------------------------------------
/tsllm/envs/rlhf/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/waterhorse1/LLM_Tree_Search/HEAD/tsllm/envs/rlhf/__init__.py


--------------------------------------------------------------------------------
/tsllm/envs/rlhf/data.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/waterhorse1/LLM_Tree_Search/HEAD/tsllm/envs/rlhf/data.py


--------------------------------------------------------------------------------
/tsllm/envs/rlhf/env.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/waterhorse1/LLM_Tree_Search/HEAD/tsllm/envs/rlhf/env.py


--------------------------------------------------------------------------------
/tsllm/envs/rlhf/prompt.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/waterhorse1/LLM_Tree_Search/HEAD/tsllm/envs/rlhf/prompt.py


--------------------------------------------------------------------------------
/tsllm/envs/tests/test_game24.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/waterhorse1/LLM_Tree_Search/HEAD/tsllm/envs/tests/test_game24.py


--------------------------------------------------------------------------------
/tsllm/envs/tests/test_gsm8k.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/waterhorse1/LLM_Tree_Search/HEAD/tsllm/envs/tests/test_gsm8k.py


--------------------------------------------------------------------------------
/tsllm/envs/tests/test_prontoqa.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/waterhorse1/LLM_Tree_Search/HEAD/tsllm/envs/tests/test_prontoqa.py


--------------------------------------------------------------------------------
/tsllm/envs/tests/test_rlhf.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/waterhorse1/LLM_Tree_Search/HEAD/tsllm/envs/tests/test_rlhf.py


--------------------------------------------------------------------------------
/tsllm/envs/utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/waterhorse1/LLM_Tree_Search/HEAD/tsllm/envs/utils.py


--------------------------------------------------------------------------------
/tsllm/inference/evaluation/vote_utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/waterhorse1/LLM_Tree_Search/HEAD/tsllm/inference/evaluation/vote_utils.py


--------------------------------------------------------------------------------
/tsllm/inference/lm_self_value.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/waterhorse1/LLM_Tree_Search/HEAD/tsllm/inference/lm_self_value.py


--------------------------------------------------------------------------------
/tsllm/inference/trajectory_collector.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/waterhorse1/LLM_Tree_Search/HEAD/tsllm/inference/trajectory_collector.py


--------------------------------------------------------------------------------
/tsllm/inference/value.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/waterhorse1/LLM_Tree_Search/HEAD/tsllm/inference/value.py


--------------------------------------------------------------------------------
/tsllm/llm/ct2_utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/waterhorse1/LLM_Tree_Search/HEAD/tsllm/llm/ct2_utils.py


--------------------------------------------------------------------------------
/tsllm/llm/text_generation.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/waterhorse1/LLM_Tree_Search/HEAD/tsllm/llm/text_generation.py


--------------------------------------------------------------------------------
/tsllm/mcts/tree.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/waterhorse1/LLM_Tree_Search/HEAD/tsllm/mcts/tree.py


--------------------------------------------------------------------------------
/tsllm/mcts/utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/waterhorse1/LLM_Tree_Search/HEAD/tsllm/mcts/utils.py


--------------------------------------------------------------------------------
/tsllm/merge_jsonl.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/waterhorse1/LLM_Tree_Search/HEAD/tsllm/merge_jsonl.py


--------------------------------------------------------------------------------
/tsllm/model/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/waterhorse1/LLM_Tree_Search/HEAD/tsllm/model/__init__.py


--------------------------------------------------------------------------------
/tsllm/model/llama_flash_attn_monkey_patch.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/waterhorse1/LLM_Tree_Search/HEAD/tsllm/model/llama_flash_attn_monkey_patch.py


--------------------------------------------------------------------------------
/tsllm/model/modeling_actor_critic.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/waterhorse1/LLM_Tree_Search/HEAD/tsllm/model/modeling_actor_critic.py


--------------------------------------------------------------------------------
/tsllm/model/modeling_base.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/waterhorse1/LLM_Tree_Search/HEAD/tsllm/model/modeling_base.py


--------------------------------------------------------------------------------
/tsllm/model/modeling_prm.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/waterhorse1/LLM_Tree_Search/HEAD/tsllm/model/modeling_prm.py


--------------------------------------------------------------------------------
/tsllm/model/utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/waterhorse1/LLM_Tree_Search/HEAD/tsllm/model/utils.py


--------------------------------------------------------------------------------
/tsllm/offline_rl/dedup.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/waterhorse1/LLM_Tree_Search/HEAD/tsllm/offline_rl/dedup.py


--------------------------------------------------------------------------------
/tsllm/offline_rl/game24/gen_3.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/waterhorse1/LLM_Tree_Search/HEAD/tsllm/offline_rl/game24/gen_3.sh


--------------------------------------------------------------------------------
/tsllm/offline_rl/game24/process.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/waterhorse1/LLM_Tree_Search/HEAD/tsllm/offline_rl/game24/process.sh


--------------------------------------------------------------------------------
/tsllm/offline_rl/generate_data.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/waterhorse1/LLM_Tree_Search/HEAD/tsllm/offline_rl/generate_data.py


--------------------------------------------------------------------------------
/tsllm/offline_rl/gsm8k_data/gen_3.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/waterhorse1/LLM_Tree_Search/HEAD/tsllm/offline_rl/gsm8k_data/gen_3.sh


--------------------------------------------------------------------------------
/tsllm/offline_rl/gsm8k_data/process.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/waterhorse1/LLM_Tree_Search/HEAD/tsllm/offline_rl/gsm8k_data/process.sh


--------------------------------------------------------------------------------
/tsllm/offline_rl/merge.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/waterhorse1/LLM_Tree_Search/HEAD/tsllm/offline_rl/merge.py


--------------------------------------------------------------------------------
/tsllm/offline_rl/prontoqa/gen_3.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/waterhorse1/LLM_Tree_Search/HEAD/tsllm/offline_rl/prontoqa/gen_3.sh


--------------------------------------------------------------------------------
/tsllm/offline_rl/prontoqa/process.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/waterhorse1/LLM_Tree_Search/HEAD/tsllm/offline_rl/prontoqa/process.sh


--------------------------------------------------------------------------------
/tsllm/offline_rl/rlhf/gen_3.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/waterhorse1/LLM_Tree_Search/HEAD/tsllm/offline_rl/rlhf/gen_3.sh


--------------------------------------------------------------------------------
/tsllm/offline_rl/rlhf/process.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/waterhorse1/LLM_Tree_Search/HEAD/tsllm/offline_rl/rlhf/process.py


--------------------------------------------------------------------------------
/tsllm/offline_rl/rlhf/process.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/waterhorse1/LLM_Tree_Search/HEAD/tsllm/offline_rl/rlhf/process.sh


--------------------------------------------------------------------------------
/tsllm/offline_rl/sample.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/waterhorse1/LLM_Tree_Search/HEAD/tsllm/offline_rl/sample.py


--------------------------------------------------------------------------------
/tsllm/offline_rl/split_two_test.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/waterhorse1/LLM_Tree_Search/HEAD/tsllm/offline_rl/split_two_test.py


--------------------------------------------------------------------------------
/tsllm/offline_rl/test_sft_and_v.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/waterhorse1/LLM_Tree_Search/HEAD/tsllm/offline_rl/test_sft_and_v.py


--------------------------------------------------------------------------------
/tsllm/offline_rl/test_sft_and_v_rlhf.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/waterhorse1/LLM_Tree_Search/HEAD/tsllm/offline_rl/test_sft_and_v_rlhf.py


--------------------------------------------------------------------------------
/tsllm/offline_rl/utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/waterhorse1/LLM_Tree_Search/HEAD/tsllm/offline_rl/utils.py


--------------------------------------------------------------------------------
/tsllm/rl/config.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/waterhorse1/LLM_Tree_Search/HEAD/tsllm/rl/config.py


--------------------------------------------------------------------------------
/tsllm/rl/data/buffer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/waterhorse1/LLM_Tree_Search/HEAD/tsllm/rl/data/buffer.py


--------------------------------------------------------------------------------
/tsllm/rl/data/node_types_new.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/waterhorse1/LLM_Tree_Search/HEAD/tsllm/rl/data/node_types_new.py


--------------------------------------------------------------------------------
/tsllm/rl/data/sft_buffer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/waterhorse1/LLM_Tree_Search/HEAD/tsllm/rl/data/sft_buffer.py


--------------------------------------------------------------------------------
/tsllm/rl/data/traj_buffer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/waterhorse1/LLM_Tree_Search/HEAD/tsllm/rl/data/traj_buffer.py


--------------------------------------------------------------------------------
/tsllm/rl/trainer/base_trainer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/waterhorse1/LLM_Tree_Search/HEAD/tsllm/rl/trainer/base_trainer.py


--------------------------------------------------------------------------------
/tsllm/rl/trainer/mcts_trainer_traj_ct2_sft.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/waterhorse1/LLM_Tree_Search/HEAD/tsllm/rl/trainer/mcts_trainer_traj_ct2_sft.py


--------------------------------------------------------------------------------
/tsllm/rl/trainer/mcts_trainer_traj_ct2_value.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/waterhorse1/LLM_Tree_Search/HEAD/tsllm/rl/trainer/mcts_trainer_traj_ct2_value.py


--------------------------------------------------------------------------------
/tsllm/rl/trainer/opt_utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/waterhorse1/LLM_Tree_Search/HEAD/tsllm/rl/trainer/opt_utils.py


--------------------------------------------------------------------------------
/tsllm/rl/trainer/utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/waterhorse1/LLM_Tree_Search/HEAD/tsllm/rl/trainer/utils.py


--------------------------------------------------------------------------------