├── models ├── phi.py ├── reward_model_mixin.py └── string_rule_reward.py ├── general_util ├── __init__.py ├── mixin.py └── logger.py ├── conf ├── engines │ └── remax_defaults.yaml ├── hydra │ └── default.yaml ├── api │ ├── vllm │ │ ├── vllm_params │ │ │ ├── sampling_param_greedy.yaml │ │ │ └── sampling_param_sample.yaml │ │ ├── llama2-7b │ │ │ ├── logiqav2_qa_react_0shot_tem_v1_0.yaml │ │ │ ├── logiqav2_qa_react_test_0shot_tem_v1_0.yaml │ │ │ ├── logiqav2_qa_react_test_0shot_tem_v1_0_s0.yaml │ │ │ ├── logiqav2_qa_react_turbosft_test_0shot_tem_v1_0_s0.yaml │ │ │ ├── logiqav2_qa_react_70bdistil_dpo_test_0shot_tem_v1_0.yaml │ │ │ ├── logiqav2_tems │ │ │ │ ├── react_dev_0shot_tem_v1_0_o1.yaml │ │ │ │ ├── react_dev_0shot_tem_v1_0_o2.yaml │ │ │ │ ├── react_test_0shot_tem_v1_0_o1.yaml │ │ │ │ ├── react_test_0shot_tem_v1_0_o2.yaml │ │ │ │ ├── react_dev_0shot_tem_v1_0.yaml │ │ │ │ ├── react_dev_0shot_tem_v2_0.yaml │ │ │ │ ├── react_test_0shot_tem_v1_0.yaml │ │ │ │ ├── react_test_0shot_tem_v2_0.yaml │ │ │ │ ├── react_dev_0shot_tem_v2_1.yaml │ │ │ │ ├── react_test_0shot_tem_v2_1.yaml │ │ │ │ ├── react_test_1shot_tem_v2_1.yaml │ │ │ │ ├── react_dev_0shot_tem_v2_1_sc.yaml │ │ │ │ ├── react_test_0shot_tem_v2_1_sc.yaml │ │ │ │ └── react_train_0shot_sample_tem_v2_0.yaml │ │ │ ├── logiqav2_qa_react_dpo_test_0shot_tem_v1_0_s0.yaml │ │ │ ├── logiqav2_qa_react_70bdistil_step_dpo_test_0shot_tem_v1_0.yaml │ │ │ ├── reclor_tems │ │ │ │ ├── test_react_0shot_v1_0_vllm.yaml │ │ │ │ ├── dev_react_0shot_v1_0_vllm.yaml │ │ │ │ ├── test_react_0shot_v1_1_vllm.yaml │ │ │ │ ├── dev_react_0shot_v1_1_vllm.yaml │ │ │ │ ├── dev_react_0shot_v1_1_vllm_sc.yaml │ │ │ │ ├── test_react_0shot_v1_1_vllm_sc.yaml │ │ │ │ └── dev_react_0shot_v1_0_service.yaml │ │ │ ├── ar_lsat_tems │ │ │ │ ├── dev_react_v1_0.yaml │ │ │ │ └── test_react_v1_0.yaml │ │ │ ├── folio_tems │ │ │ │ └── react_dev_0shot_tem_v1_0.yaml │ │ │ ├── logiqav2_qa_rest_train_react_v1_0_0shot_sample.yaml │ │ │ ├── logiqav2_qa_dev_react_dpo_v1_0.yaml │ │ │ ├── logiqav2_qa_dev_react_dpo_v2_0.yaml │ │ │ ├── logiqav2_qa_dev_react_dpo_v2_0_0shot.yaml │ │ │ ├── logiqav2_qa_dev_react_step_dpo_v2_0.yaml │ │ │ ├── logiqav2_qa_dev_react_step_dpo_v5_0.yaml │ │ │ ├── logiqav2_qa_dev_react_step_dpo_v2_0_0shot.yaml │ │ │ ├── logiqav2_qa_dev_react_step_dpo_v5_1_0shot.yaml │ │ │ ├── logiqav2_qa_dev_react_step_dpo_v5_0_0shot.yaml │ │ │ ├── logiqav2_qa_dev_react_step_dpo_v5_0_fix_1shot.yaml │ │ │ ├── logiqav2_qa_dev_react_step_dpo_v5_0_fix_0shot.yaml │ │ │ ├── logiqav2_qa_sft70bdistil_dev_react_v1_0_0shot.yaml │ │ │ ├── logiqav2_qa_sft70bdistil_dev_react_v1_0_1shot.yaml │ │ │ ├── logiqav2_qa_dev_react_dpo_v4_1.yaml │ │ │ ├── logiqav2_qa_dev_react_dpo_v4_1_0shot.yaml │ │ │ ├── logiqav2_qa_dev_react_step_dpo_v3_0.yaml │ │ │ ├── logiqav2_qa_dev_react_step_dpo_v4_0.yaml │ │ │ ├── logiqav2_qa_dev_react_step_dpo_v4_3.yaml │ │ │ └── logiqav2_qa_dev_react_step_dpo_v4_1_1.yaml │ │ ├── mistral │ │ │ ├── logiqav2 │ │ │ │ └── tems │ │ │ │ │ ├── react_test_1shot_tem_v1_1.yaml │ │ │ │ │ ├── react_test_1shot_tem_v1_0.yaml │ │ │ │ │ └── react_test_1shot_tem_v2_0.yaml │ │ │ └── reclor │ │ │ │ ├── train_react_1shot_sample5_v1_0.yaml │ │ │ │ ├── dev_react_1shot_v1_0.yaml │ │ │ │ └── train_react_1shot_sample5_split_v1_0.yaml │ │ ├── llama2-70b │ │ │ ├── reclor │ │ │ │ ├── train_react_1shot_sample5_v1_0.yaml │ │ │ │ └── dev_react_1shot_v1_0.yaml │ │ │ └── ar_lsat │ │ │ │ ├── dev_react_v1_0.yaml │ │ │ │ ├── dev_react_v1_1.yaml │ │ │ │ └── dev_react_1shot_v2_0.yaml │ │ ├── math │ │ │ ├── gsm8k_gemma_test_0shot_tem_v1_0.yaml │ │ │ ├── math_gemma_test_0shot_tem_v1_0.yaml │ │ │ ├── gsm8k_gemma_test_0shot_tem_v1_1.yaml │ │ │ ├── math_gemma_test_0shot_tem_v1_1.yaml │ │ │ ├── math_deepseek_test_0shot_tem_v1_1.yaml │ │ │ └── gsm8k_deepseek_test_0shot_tem_v1_1.yaml │ │ ├── logiqav2_qa_dev_decompose_dpo_v2_0.yaml │ │ ├── logiqav2_qa_dev_decompose_dpo_v2_0_cp1800.yaml │ │ └── logiqav2_qa_dev_react_step_dpo_v1_0.yaml │ ├── gpt35turbo │ │ ├── reclor │ │ │ ├── train_react_v1_0_1shot_sample10.yaml │ │ │ └── dev_react_v1_0_1shot.yaml │ │ ├── ar_lsat │ │ │ └── dev_react_1shot_v1_0.yaml │ │ └── logiqav2 │ │ │ └── dev_react_v1_0_1shot.yaml │ └── gpt4 │ │ ├── reclor │ │ └── dev_react_v1_0_1shot.yaml │ │ └── logiqav2 │ │ └── dev_react_v1_0_1shot.yaml ├── post_process │ └── openai_react.yaml ├── reader │ ├── reclor │ │ ├── react_service_0shot_v1_0.yaml │ │ └── react_service_1shot_v1_0.yaml │ └── logiqav2 │ │ ├── react_service_0shot_v1_0.yaml │ │ └── react_service_1shot_v1_0.yaml └── deepspeed │ ├── train_hybrid_engine_zero1_lr.yaml │ ├── train_hybrid_engine_zero1_optim_offload_lr.yaml │ ├── train_hybrid_engine_zero2_lr.yaml │ ├── train_hybrid_engine_zero1.yaml │ ├── train_hybrid_engine_zero2.yaml │ ├── train_hybrid_engine_zero3.yaml │ ├── train_hybrid_engine_zero1_optim_offload.yaml │ ├── train_hybrid_engine_zero2_optim_offload.yaml │ └── train_hybrid_engine_zero1_optim_offload_cosine.yaml ├── scripts ├── cot │ ├── step_contrastive.py │ ├── rap_fix_pred.py │ ├── cot_step_accumulate.py │ └── deepseek_cot_sample_steps.py ├── inference │ ├── run_query_folio_vllm.sh │ ├── run_query_math_vllm_v1.0.sh │ ├── run_query_math_vllm_v1.1.sh │ ├── run_query_gsm8k_vllm_v1.0.sh │ ├── run_query_gsm8k_vllm_v1.1.sh │ ├── run_query_logiqav2_vllm_reclor.sh │ ├── run_query_logiqav2_vllm_reclor_v1.1.sh │ ├── run_query_reclor_vllm_sc_v1.1.sh │ ├── run_query_ar_lsat_vllm_v2.1.sh │ ├── run_query_logiqav2_vllm.sh │ ├── run_query_logiqav2_vllm_v2.1.sh │ ├── run_query_logiqav2_dev_order.sh │ ├── run_query_logiqav2_vllm_sc_v2.1.sh │ ├── calibra_results_reclor_sc.sh │ ├── calibra_results_reclor_v1.1.sh │ ├── calibra_results.sh │ ├── run_query_logiqav2_order.sh │ └── calibra_results_logiqav2_sc.sh ├── process_sft_inter.sh ├── process_llama.sh ├── process_inter.sh ├── fixed_explore_from_infer │ └── logiqav2 │ │ ├── check_rewards.sh │ │ ├── reward_eval.sh │ │ ├── construct_dpo.sh │ │ └── split_pair.sh ├── explore_from_inter │ ├── reject_sampling_best_of.sh │ ├── run_v2.0.sh │ ├── combine_worsen_response.sh │ ├── reclor │ │ └── best_of_filter_full.sh │ └── run_llama_sft_v2.0.sh ├── calculate_acc_w_clean.py ├── split_pairs_according_to_ids.py ├── construct_dpo_data_from_response.py ├── split_train_dev.py ├── process_turbo.sh ├── sent_tf_react_step_encoding.py ├── split_response_train_dev_according2item_id.py ├── merge_response.py └── deepspeed │ └── ds_full_checkpoint2hf.py ├── run_step_dpo.sh ├── requirements.txt ├── data ├── prompts │ └── logiqav2 │ │ ├── logic_form │ │ ├── prompt_0.md │ │ └── human │ │ │ ├── dev_7261_0_sim.md │ │ │ ├── dev_218_0_sim.md │ │ │ └── dev_218_0.md │ │ ├── compare_response │ │ ├── prompt_0.txt │ │ └── template_th_0.txt │ │ ├── react │ │ └── prompts.py │ │ ├── gpt4 │ │ └── dev_218_0.md │ │ └── decomposition │ │ └── human │ │ └── dev_218_0.md ├── meta_math │ └── react_prompt_1.txt ├── ar_lsat.py ├── reclor.py └── folio.py ├── write_deployment_to_cache.py ├── lora_share_trainer └── utils │ ├── fp8.py │ └── ds_utils.py ├── test_deepspeed.py ├── lora_merge.py └── post_processors └── dist_mixin.py /models/phi.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /general_util/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /conf/engines/remax_defaults.yaml: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /scripts/cot/step_contrastive.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /conf/hydra/default.yaml: -------------------------------------------------------------------------------- 1 | job: 2 | chdir: False 3 | run: 4 | dir: logs/${hydra.job.config_name}/${now:%Y-%m-%d_%H-%M-%S} 5 | -------------------------------------------------------------------------------- /conf/api/vllm/vllm_params/sampling_param_greedy.yaml: -------------------------------------------------------------------------------- 1 | _target_: vllm.SamplingParams 2 | n: 1 3 | temperature: 0.0 4 | stop: [ "", "\n\n\n\n" ] 5 | max_tokens: 2048 6 | -------------------------------------------------------------------------------- /conf/api/vllm/vllm_params/sampling_param_sample.yaml: -------------------------------------------------------------------------------- 1 | _target_: vllm.SamplingParams 2 | n: 5 3 | temperature: 1.0 4 | stop: [ "", "\n\n\n\n" ] 5 | max_tokens: 2048 6 | -------------------------------------------------------------------------------- /conf/post_process/openai_react.yaml: -------------------------------------------------------------------------------- 1 | _target_: post_processors.openai_api_callback.OpenAICallBack 2 | output_file: ${output_file} 3 | answer_clean: 4 | _target_: post_processors.openai_api_callback.ReActSeparatorClean -------------------------------------------------------------------------------- /scripts/inference/run_query_folio_vllm.sh: -------------------------------------------------------------------------------- 1 | exp_name=$1 2 | 3 | for ((i=2;i<=$#;i++)); do 4 | step=${!i} 5 | echo $step 6 | python vllm_inference.py -cp conf/api/vllm/llama2-7b/folio_tems -cn react_dev_0shot_tem_v1_0 exp_name=$exp_name step=$step 7 | done 8 | 9 | 10 | 11 | -------------------------------------------------------------------------------- /scripts/inference/run_query_math_vllm_v1.0.sh: -------------------------------------------------------------------------------- 1 | exp_name=$1 2 | model_name=$2 3 | 4 | for ((i=3;i<=$#;i++)); do 5 | step=${!i} 6 | echo $step 7 | python vllm_inference.py -cp conf/api/vllm/math -cn math_${model_name}_test_0shot_tem_v1_0 exp_name=$exp_name step=$step 8 | done 9 | 10 | 11 | 12 | -------------------------------------------------------------------------------- /scripts/inference/run_query_math_vllm_v1.1.sh: -------------------------------------------------------------------------------- 1 | exp_name=$1 2 | model_name=$2 3 | 4 | for ((i=3;i<=$#;i++)); do 5 | step=${!i} 6 | echo $step 7 | python vllm_inference.py -cp conf/api/vllm/math -cn math_${model_name}_test_0shot_tem_v1_1 exp_name=$exp_name step=$step 8 | done 9 | 10 | 11 | 12 | -------------------------------------------------------------------------------- /scripts/inference/run_query_gsm8k_vllm_v1.0.sh: -------------------------------------------------------------------------------- 1 | exp_name=$1 2 | model_name=$2 3 | 4 | for ((i=3;i<=$#;i++)); do 5 | step=${!i} 6 | echo $step 7 | python vllm_inference.py -cp conf/api/vllm/math -cn gsm8k_${model_name}_test_0shot_tem_v1_0 exp_name=$exp_name step=$step 8 | done 9 | 10 | 11 | 12 | -------------------------------------------------------------------------------- /scripts/inference/run_query_gsm8k_vllm_v1.1.sh: -------------------------------------------------------------------------------- 1 | exp_name=$1 2 | model_name=$2 3 | 4 | for ((i=3;i<=$#;i++)); do 5 | step=${!i} 6 | echo $step 7 | python vllm_inference.py -cp conf/api/vllm/math -cn gsm8k_${model_name}_test_0shot_tem_v1_1 exp_name=$exp_name step=$step 8 | done 9 | 10 | 11 | 12 | -------------------------------------------------------------------------------- /models/reward_model_mixin.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | 3 | import torch 4 | 5 | 6 | @dataclass 7 | class RewardModelOutputs: 8 | chosen_end_scores: torch.Tensor = None 9 | 10 | 11 | class RewardModelMixin: 12 | def forward_value(self, *args, **kwargs) -> RewardModelOutputs: 13 | raise NotImplementedError 14 | -------------------------------------------------------------------------------- /scripts/process_sft_inter.sh: -------------------------------------------------------------------------------- 1 | data_dir=experiments/llama2.7b.chat.logiqav2.gpt35turbo-dpo-sft.H100.w2.v2.0/checkpoint-1600 2 | 3 | python scripts/sample_react_inter_states.py --input_file $data_dir/logiqav2-train.full.qa.react.v1.0.1shot.sample20.json \ 4 | --output_file $data_dir/logiqav2-train.full.qa.react.v1.0.1shot.sample20.clean_inter_ver0.0.json \ 5 | --split_num 10 -------------------------------------------------------------------------------- /run_step_dpo.sh: -------------------------------------------------------------------------------- 1 | srun -p NH100q -w node15 deepspeed --include localhost:0,1,2,3 --master_port 10005 trainer_base_ds_mul.py seed=43 -cp conf/exp/dpo/logiqav2 -cn llama2_7b_70bdistil_step_dpo_v1_1_th 2 | srun -p NH100q -w node15 deepspeed --include localhost:0,1,2,3 --master_port 10005 trainer_base_ds_mul.py seed=44 -cp conf/exp/dpo/logiqav2 -cn llama2_7b_70bdistil_step_dpo_v1_1_th -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | accelerate==0.24.1 2 | bitsandbytes==0.41.1 3 | deepspeed==0.12.2 4 | einops==0.7.0 5 | fairscale==0.4.12 6 | flash-attn==2.3.3 7 | hydra-core==1.2.0 8 | nltk==3.8.1 9 | openai==0.27.0 10 | pandas==1.5.3 11 | peft==0.6.0 12 | sentencepiece==0.1.97 13 | tokenizers==0.15.0 14 | torch==2.1.1 15 | transformers==4.36.1 16 | vllm==0.2.5 17 | wandb==0.13.10 18 | xformers==0.0.23 19 | -------------------------------------------------------------------------------- /scripts/inference/run_query_logiqav2_vllm_reclor.sh: -------------------------------------------------------------------------------- 1 | exp_name=$1 2 | 3 | for ((i=2;i<=$#;i++)); do 4 | step=${!i} 5 | echo $step 6 | python vllm_inference.py -cp conf/api/vllm/llama2-7b/reclor_tems -cn test_react_0shot_v1_0_vllm exp_name=$exp_name step=$step 7 | python vllm_inference.py -cp conf/api/vllm/llama2-7b/reclor_tems -cn dev_react_0shot_v1_0_vllm exp_name=$exp_name step=$step 8 | done 9 | -------------------------------------------------------------------------------- /scripts/inference/run_query_logiqav2_vllm_reclor_v1.1.sh: -------------------------------------------------------------------------------- 1 | exp_name=$1 2 | 3 | for ((i=2;i<=$#;i++)); do 4 | step=${!i} 5 | echo $step 6 | python vllm_inference.py -cp conf/api/vllm/llama2-7b/reclor_tems -cn dev_react_0shot_v1_1_vllm exp_name=$exp_name step=$step 7 | python vllm_inference.py -cp conf/api/vllm/llama2-7b/reclor_tems -cn test_react_0shot_v1_1_vllm exp_name=$exp_name step=$step 8 | done 9 | -------------------------------------------------------------------------------- /conf/reader/reclor/react_service_0shot_v1_0.yaml: -------------------------------------------------------------------------------- 1 | _target_: data.logiqav2.ComposePromptGenerator 2 | read_func: 3 | _target_: data.reclor.ReClorReader 4 | flat_options: True 5 | template_id: 8 6 | instruction: 7 | _target_: data.prompts.logiqav2.react.prompts.get_prompt 8 | prompt_name: react_v2 9 | few_shot_prompt: 10 | compose_keys: [ "context", "question", "option_list" ] 11 | max_data_num: -1 12 | api_based: False 13 | service_based: False 14 | -------------------------------------------------------------------------------- /scripts/inference/run_query_reclor_vllm_sc_v1.1.sh: -------------------------------------------------------------------------------- 1 | exp_name=$1 2 | n=$2 3 | 4 | for ((i=3;i<=$#;i++)); do 5 | step=${!i} 6 | echo $step 7 | python vllm_inference.py -cp conf/api/vllm/llama2-7b/reclor_tems -cn dev_react_0shot_v1_1_vllm_sc exp_name=$exp_name sampling_params.n=$n step=$step 8 | python vllm_inference.py -cp conf/api/vllm/llama2-7b/reclor_tems -cn test_react_0shot_v1_1_vllm_sc exp_name=$exp_name sampling_params.n=$n step=$step 9 | done 10 | -------------------------------------------------------------------------------- /data/prompts/logiqav2/logic_form/prompt_0.md: -------------------------------------------------------------------------------- 1 | Convert the input texts following the keywords [Context], [Question] and each [Option] into logic forms. For each logic form, the format is [predicate](entity 1, ..., entity n). There is a predicate which indicates the relations among at most n entities and those entities are the arguments of the predicate. 2 | Use logical operations to derive the correct option. Common logical operators include AND, OR, NOT, and ==> (logically implies). -------------------------------------------------------------------------------- /scripts/process_llama.sh: -------------------------------------------------------------------------------- 1 | 2 | 3 | #python scripts/process_react_nodes.py --input_file data/trajectory/react/logiqav2-train-v1.0.react.1shot.turbo.sample5.json \ 4 | # --output_file data/trajectory/react/logiqav2-train-v1.0.react.1shot.turbo.sample5.clean_nodes.json 5 | 6 | python scripts/process_react_nodes.py --input_file data/trajectory/react/logiqav2-train-v1.1.react.1shot.llama2.70b.chat.sample10.json \ 7 | --output_file data/trajectory/react/logiqav2-train-v1.1.react.1shot.llama2.70b.chat.sample10.clean_nodes.json -------------------------------------------------------------------------------- /scripts/process_inter.sh: -------------------------------------------------------------------------------- 1 | python scripts/process_inter_response.py \ 2 | --input_file "experiments/llama2.7b.chat.logiqav2.gpt35turbo-dpo-sft.H100.w2.v2.0/checkpoint-1600/logiqav2-train.full.qa.react.v1.0.0shot.inter_completion.split-*.json" \ 3 | --output_file logiqav2-train.full.qa.react.v1.0.0shot.inter_completion.pair_diff3.json \ 4 | --diff 3 \ 5 | --inter_state_file "experiments/llama2.7b.chat.logiqav2.gpt35turbo-dpo-sft.H100.w2.v2.0/checkpoint-1600/logiqav2-train.full.qa.react.v1.0.1shot.sample20.clean_inter_ver0.0.*-of-10.json" -------------------------------------------------------------------------------- /scripts/inference/run_query_ar_lsat_vllm_v2.1.sh: -------------------------------------------------------------------------------- 1 | exp_name=$1 2 | #step=$2 3 | 4 | for ((i=2;i<=$#;i++)); do 5 | step=${!i} 6 | echo $step 7 | python vllm_inference.py -cp conf/api/vllm/llama2-7b/ar_lsat_tems -cn dev_react_v1_0 exp_name=$exp_name step=$step 8 | python vllm_inference.py -cp conf/api/vllm/llama2-7b/ar_lsat_tems -cn test_react_v1_0 exp_name=$exp_name step=$step 9 | done 10 | #python vllm_inference.py -cp conf/api/vllm/llama2-7b/logiqav2_tems -cn react_test_0shot_tem_v2_0 exp_name=$exp_name step=$step 11 | 12 | 13 | 14 | -------------------------------------------------------------------------------- /data/prompts/logiqav2/compare_response/prompt_0.txt: -------------------------------------------------------------------------------- 1 | Here is a logical reasoning problem, and there are two solutions describing their thinking process. Please tell me which one is better. You should consider the following criteria: 2 | 1. Reasonable: The reasoning process is reasonable, which means each conclusion should be inferred by collecting evidence, instead of making up unknown facts. 3 | 2. Concise: The reasoning process should not tell something irrelevant to the question. 4 | 3. Logically consistent: There must not be contradiction in the process itself. -------------------------------------------------------------------------------- /conf/reader/reclor/react_service_1shot_v1_0.yaml: -------------------------------------------------------------------------------- 1 | _target_: data.logiqav2.ComposePromptGenerator 2 | read_func: 3 | _target_: data.reclor.ReClorReader 4 | flat_options: True 5 | template_id: 8 6 | instruction: 7 | _target_: data.prompts.logiqav2.react.prompts.get_prompt 8 | prompt_name: react_v2 9 | few_shot_prompt: 10 | _target_: data.logiqav2.read_single_file 11 | file_path: data/prompts/logiqav2/react/train_4554.txt 12 | compose_keys: [ "context", "question", "option_list" ] 13 | max_data_num: -1 14 | api_based: False 15 | service_based: False 16 | -------------------------------------------------------------------------------- /scripts/inference/run_query_logiqav2_vllm.sh: -------------------------------------------------------------------------------- 1 | exp_name=$1 2 | #step=$2 3 | 4 | for ((i=2;i<=$#;i++)); do 5 | step=${!i} 6 | echo $step 7 | python vllm_inference.py -cp conf/api/vllm/llama2-7b/logiqav2_tems -cn react_test_0shot_tem_v2_0 exp_name=$exp_name step=$step 8 | python vllm_inference.py -cp conf/api/vllm/llama2-7b/logiqav2_tems -cn react_dev_0shot_tem_v2_0 exp_name=$exp_name step=$step 9 | done 10 | #python vllm_inference.py -cp conf/api/vllm/llama2-7b/logiqav2_tems -cn react_test_0shot_tem_v2_0 exp_name=$exp_name step=$step 11 | 12 | 13 | 14 | -------------------------------------------------------------------------------- /scripts/inference/run_query_logiqav2_vllm_v2.1.sh: -------------------------------------------------------------------------------- 1 | exp_name=$1 2 | #step=$2 3 | 4 | for ((i=2;i<=$#;i++)); do 5 | step=${!i} 6 | echo $step 7 | python vllm_inference.py -cp conf/api/vllm/llama2-7b/logiqav2_tems -cn react_dev_0shot_tem_v2_1 exp_name=$exp_name step=$step 8 | python vllm_inference.py -cp conf/api/vllm/llama2-7b/logiqav2_tems -cn react_test_0shot_tem_v2_1 exp_name=$exp_name step=$step 9 | done 10 | #python vllm_inference.py -cp conf/api/vllm/llama2-7b/logiqav2_tems -cn react_test_0shot_tem_v2_0 exp_name=$exp_name step=$step 11 | 12 | 13 | 14 | -------------------------------------------------------------------------------- /scripts/inference/run_query_logiqav2_dev_order.sh: -------------------------------------------------------------------------------- 1 | exp_dir=$1 2 | name=$2 3 | port=$3 4 | step=$4 5 | 6 | python service_api_caller_v1.py -cp conf/api/vllm/llama2-7b/logiqav2_tems -cn react_dev_0shot_tem_v1_0 exp_dir=$exp_dir model=$name port=$port step=$step 7 | 8 | python service_api_caller_v1.py -cp conf/api/vllm/llama2-7b/logiqav2_tems -cn react_dev_0shot_tem_v1_0_o1 exp_dir=$exp_dir model=$name port=$port step=$step 9 | 10 | python service_api_caller_v1.py -cp conf/api/vllm/llama2-7b/logiqav2_tems -cn react_dev_0shot_tem_v1_0_o2 exp_dir=$exp_dir model=$name port=$port step=$step 11 | 12 | -------------------------------------------------------------------------------- /scripts/fixed_explore_from_infer/logiqav2/check_rewards.sh: -------------------------------------------------------------------------------- 1 | data_dir=experiments/llama2.7b.chat.logiqav2.llama-2-70b-chat.dpo-sft.A6K.w4.v1.0/checkpoint-1600/react-inter-states 2 | 3 | rm_step=800 4 | reward_file="experiments/llama2.7b.chat.logiqav2.70b-distil.prm.fix_hack.A100.w4.v1.2.s42/train.rewards.raw_trajectory.product.v1.0/test-checkpoint-${rm_step}/eval_predictions_rank0.json" 5 | 6 | python scripts/check_rewards_v1.0.py \ 7 | --input_file "$data_dir/logiqav2-train.full.qa.react.v1.0.0shot.sample10.clean_inter_ver2.0.rs0.2.r0.3.*-of-20.json" \ 8 | --reward_file $reward_file \ 9 | --step_cutoff 50 10 | -------------------------------------------------------------------------------- /scripts/inference/run_query_logiqav2_vllm_sc_v2.1.sh: -------------------------------------------------------------------------------- 1 | exp_name=$1 2 | n=$2 3 | 4 | for ((i=3;i<=$#;i++)); do 5 | step=${!i} 6 | echo $step 7 | python vllm_inference.py -cp conf/api/vllm/llama2-7b/logiqav2_tems -cn react_dev_0shot_tem_v2_1_sc exp_name=$exp_name sampling_params.n=$n step=$step 8 | python vllm_inference.py -cp conf/api/vllm/llama2-7b/logiqav2_tems -cn react_test_0shot_tem_v2_1_sc exp_name=$exp_name sampling_params.n=$n step=$step 9 | done 10 | #python vllm_inference.py -cp conf/api/vllm/llama2-7b/logiqav2_tems -cn react_test_0shot_tem_v2_0 exp_name=$exp_name step=$step 11 | 12 | 13 | 14 | -------------------------------------------------------------------------------- /data/prompts/logiqav2/compare_response/template_th_0.txt: -------------------------------------------------------------------------------- 1 | Here is the problem containing a context, a question, and different options: 2 | 3 | Context: 4 | {} 5 | 6 | Question: 7 | {} 8 | 9 | Options: 10 | {} 11 | 12 | Reasoning process A: 13 | Thought 1: {} 14 | 15 | Reasoning process B: 16 | Thought 1: {} 17 | 18 | For each aspect of the above criteria, select one winner, or judge it as a tie, following the format: 19 | Reasonable: A/B/Tie 20 | Concise: A/B/Tie 21 | Logically consistent: A/B/Tie 22 | 23 | And finally, by considering all the criteria together, select one winner, or judge it as a tie, with the following format: 24 | Overall: A/B/Tie -------------------------------------------------------------------------------- /write_deployment_to_cache.py: -------------------------------------------------------------------------------- 1 | import json 2 | import argparse 3 | import os.path 4 | 5 | parser = argparse.ArgumentParser() 6 | parser.add_argument("--model", type=str) 7 | parser.add_argument("--name", type=str) 8 | parser.add_argument("--port", type=str) 9 | args = parser.parse_args() 10 | 11 | if os.path.exists("service.json"): 12 | with open("service.json", "r") as f: 13 | service = json.load(f) 14 | else: 15 | service = {} 16 | 17 | if args.port in service: 18 | service[args.port] = { 19 | "model": args.model, 20 | "name": args.name, 21 | } 22 | 23 | with open("service.json", "w") as f: 24 | json.dump(service, f, indent=4) 25 | -------------------------------------------------------------------------------- /conf/reader/logiqav2/react_service_0shot_v1_0.yaml: -------------------------------------------------------------------------------- 1 | _target_: data.logiqav2.ComposePromptGenerator 2 | read_func: 3 | _target_: data.logiqav2.LogicQAReader 4 | flat_options: True 5 | option_order: "ABCD" 6 | template_id: 8 7 | instruction: 8 | _target_: data.prompts.logiqav2.react.prompts.get_prompt 9 | prompt_name: react_v2 10 | few_shot_prompt: 11 | compose_keys: [ "context", "question", "option_list" ] 12 | max_data_num: -1 13 | api_based: False 14 | service_based: True 15 | service_processor: 16 | _target_: data.vllm.VLLMRequestGenerator 17 | api_url: http://0.0.0.0:${port}/v1/completions 18 | max_tokens: 2048 19 | model: ${model} 20 | stop: [ "", "\n\n\n\n" ] 21 | -------------------------------------------------------------------------------- /conf/reader/logiqav2/react_service_1shot_v1_0.yaml: -------------------------------------------------------------------------------- 1 | _target_: data.logiqav2.ComposePromptGenerator 2 | read_func: 3 | _target_: data.logiqav2.LogicQAReader 4 | flat_options: True 5 | template_id: 8 6 | instruction: 7 | _target_: data.prompts.logiqav2.react.prompts.get_prompt 8 | prompt_name: react_v2 9 | few_shot_prompt: 10 | _target_: data.logiqav2.read_single_file 11 | file_path: data/prompts/logiqav2/react/train_4554.txt 12 | compose_keys: [ "context", "question", "option_list" ] 13 | max_data_num: -1 14 | api_based: False 15 | service_based: True 16 | service_processor: 17 | _target_: data.vllm.VLLMRequestGenerator 18 | api_url: http://0.0.0.0:${port}/v1/completions 19 | max_tokens: 2048 20 | model: ${model} 21 | stop: [ "", "\n\n\n\n" ] 22 | -------------------------------------------------------------------------------- /scripts/inference/calibra_results_reclor_sc.sh: -------------------------------------------------------------------------------- 1 | exp_name=$1 2 | 3 | for ((i=2;i<=$#;i++)); do 4 | step=${!i} 5 | echo "********************* $step *********************" 6 | echo "============= Dev =============" 7 | cat /export/home2/fangkai/rl-hybrid-engine/experiments/${exp_name}/checkpoint-${step}/reclor.react.dev.n5.tem1.0.0shot.v1.1.metrics.json 8 | # echo "============= Test =============" 9 | # python scripts/calculate_react_acc_w_clean.py --input_file /export/home2/fangkai/rl-hybrid-engine/experiments/${exp_name}/checkpoint-${step}/logiqav2.test.react.n5.tem1.0.v1.0.0shot.json 10 | done 11 | #python vllm_inference.py -cp conf/api/vllm/llama2-7b/logiqav2_tems -cn react_test_0shot_tem_v2_0 exp_name=$exp_name step=$step 12 | 13 | 14 | 15 | -------------------------------------------------------------------------------- /scripts/explore_from_inter/reject_sampling_best_of.sh: -------------------------------------------------------------------------------- 1 | data_dir=experiments/llama2.7b.chat.logiqav2.llama-2-70b-chat.dpo-sft.A6K.w4.v1.0/checkpoint-1600/react-inter-states 2 | 3 | best_of=1 4 | inter_best_of=1 5 | 6 | python scripts/reject_sample_best_of_filter_by_reward_v1.0.py \ 7 | --input_file "$data_dir/logiqav2-train.full.qa.react.v1.0.0shot.sample10.clean_inter_ver2.0.rs0.2.r0.3.*-of-20.json" \ 8 | --reward_file "experiments/llama2.7b.chat.logiqav2.70b-distil.rm.H100.w4.v1.0/train_decay0.95.diff2.6.rewards.raw_response.v1.0/test-checkpoint-400/eval_predictions_rank0.json" \ 9 | --output_file "$data_dir/logiqav2-train.react.v1.0.0shot.sample10.clean_inter_ver2.0.rs0.2.r0.3.best_of_${best_of}_${inter_best_of}.json" \ 10 | --best_of $best_of --inter_best_of $inter_best_of 11 | -------------------------------------------------------------------------------- /scripts/inference/calibra_results_reclor_v1.1.sh: -------------------------------------------------------------------------------- 1 | exp_name=$1 2 | 3 | for ((i=2;i<=$#;i++)); do 4 | step=${!i} 5 | echo "********************* $step *********************" 6 | echo "============= Dev =============" 7 | python scripts/calculate_react_acc_w_clean.py --input_file /export/home2/fangkai/rl-hybrid-engine/experiments/${exp_name}/checkpoint-${step}/reclor.react.dev.0shot.v1.1.json 8 | echo "============= Test =============" 9 | python scripts/calculate_react_acc_w_clean.py --input_file /export/home2/fangkai/rl-hybrid-engine/experiments/${exp_name}/checkpoint-${step}/reclor.react.test.0shot.v1.1.json 10 | done 11 | #python vllm_inference.py -cp conf/api/vllm/llama2-7b/logiqav2_tems -cn react_test_0shot_tem_v2_0 exp_name=$exp_name step=$step 12 | 13 | 14 | 15 | -------------------------------------------------------------------------------- /scripts/inference/calibra_results.sh: -------------------------------------------------------------------------------- 1 | exp_name=$1 2 | 3 | for ((i=2;i<=$#;i++)); do 4 | step=${!i} 5 | echo "********************* $step *********************" 6 | echo "============= Dev =============" 7 | python scripts/calculate_react_acc_w_clean.py --input_file /export/home2/fangkai/rl-hybrid-engine/experiments/${exp_name}/checkpoint-${step}/logiqav2-dev.full.qa.react.v1.0.0shot.json 8 | echo "============= Test =============" 9 | python scripts/calculate_react_acc_w_clean.py --input_file /export/home2/fangkai/rl-hybrid-engine/experiments/${exp_name}/checkpoint-${step}/logiqav2-test.full.qa.react.v1.0.0shot.json 10 | done 11 | #python vllm_inference.py -cp conf/api/vllm/llama2-7b/logiqav2_tems -cn react_test_0shot_tem_v2_0 exp_name=$exp_name step=$step 12 | 13 | 14 | 15 | -------------------------------------------------------------------------------- /scripts/cot/rap_fix_pred.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import json 3 | import os 4 | import argparse 5 | 6 | sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))) 7 | 8 | from data.math import math_answer_cleaner 9 | 10 | parser = argparse.ArgumentParser() 11 | parser.add_argument("--response_file", type=str, required=True) 12 | parser.add_argument("--output_file", type=str, required=True) 13 | args = parser.parse_args() 14 | 15 | data = json.load(open(args.response_file)) 16 | cleaner = math_answer_cleaner(separator="The answer is") 17 | 18 | for item in data: 19 | new_preds = [] 20 | for resp in item["response"]: 21 | new_preds.append(cleaner(resp)) 22 | 23 | item["pred"] = new_preds 24 | 25 | json.dump(data, open(args.output_file, "w"), indent=2) 26 | -------------------------------------------------------------------------------- /lora_share_trainer/utils/fp8.py: -------------------------------------------------------------------------------- 1 | from typing import Callable 2 | from general_util import training_utils 3 | from general_util.dist_utils import print_rank_0 4 | from general_util.logger import get_child_logger 5 | from general_util.training_utils import get_zero_stage 6 | from general_util.transformer_engine import convert_model 7 | 8 | logger = get_child_logger(__name__) 9 | 10 | try: 11 | import transformer_engine.pytorch as transformer_engine 12 | from transformer_engine.common import recipe 13 | except ImportError: 14 | logger.info("Transformer Engine package is missing, skipping tests") 15 | 16 | 17 | def fp8_func_wrap(func: Callable, fp8_flag: bool, fp8_recipe, *args, **kwargs): 18 | if fp8_flag: 19 | with transformer_engine.fp8_autocast(enabled=True, fp8_recipe=fp8_recipe): 20 | return func(*args, **kwargs) 21 | else: 22 | return func(*args, **kwargs) 23 | -------------------------------------------------------------------------------- /data/prompts/logiqav2/logic_form/human/dev_7261_0_sim.md: -------------------------------------------------------------------------------- 1 | [Context] 2 | All Anxi people are vegetarians, while all Zhenyuan people are ascetics. Ascetics and vegetarians are like fire and water, and there is no conflict. Guo Shu is an ascetic. 3 | 4 | [Question] 5 | Which of the followings can be inferred 6 | 7 | [Options] 8 | A. Guo Shu is from Zhenyuan. 9 | B. Guo Shu is not from Zhenyuan. 10 | C. Guo Shu is from Anxi. 11 | D. Guo Shu is not from Anxi. 12 | 13 | Here are the transformed ones in logic form: 14 | 15 | [Context] 16 | 1. isVegetarian(AnxiPeople) 17 | 2. isAscetic(ZhenyuanPeople) 18 | 3. likeFireAndWater(Ascetics, Vegetarians) AND noConflict(Ascetics, Vegetarians) 19 | 4. isAscetic(GuoShu) 20 | 21 | [Question] 22 | Which of the followings can be inferred? 23 | 24 | [Options] 25 | A. fromPlace(GuoShu, Zhenyuan) 26 | B. NOT fromPlace(GuoShu, Zhenyuan) 27 | C. fromPlace(GuoShu, Anxi) 28 | D. NOT fromPlace(GuoShu, Anxi) -------------------------------------------------------------------------------- /conf/api/vllm/llama2-7b/logiqav2_qa_react_0shot_tem_v1_0.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - hydra: default 3 | - reader/logiqav2@read_tensor: react_service_0shot_v1_0 4 | - post_process: openai_react 5 | - _self_ 6 | 7 | hydra: 8 | searchpath: 9 | - file://conf/ 10 | 11 | train_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/train.txt 12 | dev_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/dev.txt 13 | test_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/dev.txt 14 | 15 | step: 800 16 | port: 6000 17 | exp_dir: 18 | model: 19 | 20 | output_file: ${exp_dir}/checkpoint-${step}/logiqav2-dev.full.qa.react.v1.0.0shot.json 21 | flush_file: ${output_file}l 22 | 23 | # Dataloader 24 | num_workers: 48 25 | prefetch_factor: 2 26 | 27 | ddp_eval: False 28 | no_cuda: False 29 | seed: 42 30 | local_rank: -1 31 | 32 | # Temporary variables 33 | n_gpu: 1 34 | device: 35 | train_batch_size: 36 | eval_batch_size: 37 | world_size: 38 | -------------------------------------------------------------------------------- /conf/api/vllm/llama2-7b/logiqav2_qa_react_test_0shot_tem_v1_0.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - hydra: default 3 | - reader/logiqav2@read_tensor: react_service_0shot_v1_0 4 | - post_process: openai_react 5 | - _self_ 6 | 7 | hydra: 8 | searchpath: 9 | - file://conf/ 10 | 11 | train_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/train.txt 12 | dev_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/dev.txt 13 | test_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/test.txt 14 | 15 | step: 800 16 | port: 6000 17 | exp_dir: 18 | model: 19 | 20 | output_file: ${exp_dir}/checkpoint-${step}/logiqav2-test.full.qa.react.v1.0.0shot.json 21 | flush_file: ${output_file}l 22 | 23 | # Dataloader 24 | num_workers: 48 25 | prefetch_factor: 2 26 | 27 | ddp_eval: False 28 | no_cuda: False 29 | seed: 42 30 | local_rank: -1 31 | 32 | # Temporary variables 33 | n_gpu: 1 34 | device: 35 | train_batch_size: 36 | eval_batch_size: 37 | world_size: 38 | -------------------------------------------------------------------------------- /scripts/calculate_acc_w_clean.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import json 3 | import re 4 | from collections import Counter 5 | 6 | parser = argparse.ArgumentParser() 7 | parser.add_argument("--input_file", type=str, required=True) 8 | parser.add_argument("--debug", default=False, action="store_true") 9 | args = parser.parse_args() 10 | 11 | data = json.load(open(args.input_file, "r")) 12 | 13 | cnt = 0 14 | tmp = 0 15 | for item in data: 16 | response = item["response"] 17 | if "[Context]" in response: 18 | tmp += 1 19 | if args.debug and tmp < 10: 20 | print(response) 21 | print("=========================") 22 | response = response.split("[Context]")[0] 23 | 24 | preds = re.findall(r"A|B|C|D", response) 25 | if len(preds) == 0: 26 | pred = "" 27 | else: 28 | pred = preds[-1] 29 | 30 | if pred and ord(pred) - ord("A") == item["label"]: 31 | cnt += 1 32 | 33 | print(cnt / len(data)) 34 | -------------------------------------------------------------------------------- /data/prompts/logiqav2/react/prompts.py: -------------------------------------------------------------------------------- 1 | REACT_PROMPT_V1 = ("Solve a question answering task by having a Thought, then Finish with your answer. Thought can reason about the current situation. " 2 | "Finish[answer] returns the answer and finishes the task. You will be given context that you should use to help you answer the question.\n" 3 | "Here are some examples:") 4 | 5 | REACT_PROMPT_V2 = ("Solve a question answering task by having a Thought, then Finish with your answer. Thought can reason about the current situation. " 6 | "Finish[answer] returns the answer and finishes the task. You will be given context that you should use to help you answer the question.") 7 | 8 | prompts = { 9 | "react_v1": REACT_PROMPT_V1, 10 | "react_v2": REACT_PROMPT_V2, 11 | "react_v2_mistral": "[INST] " + REACT_PROMPT_V2 + " [/INST]", 12 | } 13 | 14 | 15 | def get_prompt(prompt_name: str) -> str: 16 | return prompts[prompt_name] 17 | -------------------------------------------------------------------------------- /test_deepspeed.py: -------------------------------------------------------------------------------- 1 | from models.llama import LlamaForCausalLMDPO 2 | from transformers.models.llama import LlamaConfig 3 | from omegaconf import DictConfig, OmegaConf 4 | import omegaconf 5 | import datetime 6 | 7 | config = LlamaConfig(vocab_size=10, num_hidden_layers=1) 8 | 9 | model = LlamaForCausalLMDPO(config) 10 | 11 | print(model.__class__.__name__) 12 | 13 | import deepspeed 14 | 15 | ds_config = OmegaConf.load("conf/deepspeed/train_hybrid_engine_zero1.yaml") 16 | ds_config.train_micro_batch_size_per_gpu = 1 17 | ds_config.gradient_accumulation_steps = 1 18 | ds_config.scheduler.params.total_num_steps = 1000 19 | ds_config.scheduler.params.warmup_num_steps = 10 20 | ds_config = OmegaConf.to_container(ds_config, resolve=True) 21 | 22 | deepspeed.init_distributed(dist_backend="nccl", timeout=datetime.timedelta(seconds=9600)) 23 | engine = deepspeed.initialize(model=model, 24 | config=ds_config) 25 | 26 | print(engine.__class__.__name__) 27 | print(engine.module.__clas__.__name__) 28 | -------------------------------------------------------------------------------- /scripts/explore_from_inter/run_v2.0.sh: -------------------------------------------------------------------------------- 1 | data_dir=experiments/llama2.7b.chat.logiqav2.gpt35turbo-dpo-sft.H100.w2.v2.0/checkpoint-1600 2 | #diff=3.0 3 | #diff=2.6 4 | #diff=2.1 5 | diff=3.0 6 | #decay=0.9 7 | #decay=0.8 8 | #decay=0.95 9 | #decay=0.9 10 | decay=1.0 11 | 12 | #python scripts/sample_react_inter_states_v2.0.py --input_file $data_dir/logiqav2-train.full.qa.react.v1.0.1shot.sample20.json \ 13 | # --output_file $data_dir/logiqav2-train.full.qa.react.v1.0.1shot.sample20.clean_inter_ver2.0.rs0.2.r0.6.json \ 14 | # --split_num 20 --ratio_s 0.2 --ratio 0.6 15 | 16 | python scripts/process_inter_response_v2.0.py --input_file "$data_dir/logiqav2-train.full.qa.react.v1.0.0shot.inter.ver2.0.rs0.2.r0.6.split-*.sample3.json" \ 17 | --output_file "$data_dir/value-ver2.0/logiqav2-train.full.qa.react.v1.0.0shot.inter.ver2.0.rs0.2.r0.6.sample3.diff$diff.decay$decay.value.json" \ 18 | --diff $diff --decay $decay --inter_state_file "$data_dir/logiqav2-train.full.qa.react.v1.0.1shot.sample20.clean_inter_ver2.0.rs0.2.r0.6.*-of-20.json" 19 | -------------------------------------------------------------------------------- /conf/api/vllm/mistral/logiqav2/tems/react_test_1shot_tem_v1_1.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - hydra: default 3 | - reader/logiqav2@read_tensor: react_service_1shot_v1_0 4 | - post_process: openai_react 5 | - _self_ 6 | 7 | hydra: 8 | searchpath: 9 | - file://conf/ 10 | 11 | train_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/train.txt 12 | dev_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/dev.txt 13 | test_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/test.txt 14 | 15 | step: 16 | port: 8000 17 | exp_dir: 18 | model: 19 | 20 | instruction: 21 | _target_: data.prompts.logiqav2.react.prompts.get_prompt 22 | prompt_name: react_v2_mistral 23 | 24 | output_file: ${exp_dir}/logiqav2-test.qa.react.v1.1.1shot.json 25 | flush_file: ${output_file}l 26 | 27 | # Dataloader 28 | num_workers: 48 29 | prefetch_factor: 2 30 | 31 | ddp_eval: False 32 | no_cuda: False 33 | seed: 42 34 | local_rank: -1 35 | 36 | # Temporary variables 37 | n_gpu: 1 38 | device: 39 | train_batch_size: 40 | eval_batch_size: 41 | world_size: 42 | -------------------------------------------------------------------------------- /lora_merge.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import torch 3 | 4 | from peft import PeftModel 5 | from transformers import AutoModelForCausalLM 6 | 7 | parser = argparse.ArgumentParser() 8 | parser.add_argument("--base_model_path", type=str, default="gpt2") 9 | parser.add_argument("--lora_path", type=str, default="gpt2") 10 | parser.add_argument("--output_dir", type=str, default="output") 11 | args = parser.parse_args() 12 | 13 | print(f"Loading base model from {args.base_model_path}...") 14 | # model = AutoModelForCausalLM.from_pretrained(args.base_model_path, device_map={"": "cpu"}, low_cpu_mem_usage=True, torch_dtype=torch.float16) 15 | model = AutoModelForCausalLM.from_pretrained(args.base_model_path) 16 | print(f"Loading lora model from {args.lora_path}...") 17 | # model = PeftModel.from_pretrained(model, args.lora_path, device_map={"": "cpu"}, torch_dtype=torch.float16) 18 | model = PeftModel.from_pretrained(model, args.lora_path) 19 | print("Merging...") 20 | model = model.merge_and_unload() 21 | print(f"Saving to {args.output_dir}...") 22 | model.save_pretrained(args.output_dir) 23 | -------------------------------------------------------------------------------- /scripts/inference/run_query_logiqav2_order.sh: -------------------------------------------------------------------------------- 1 | exp_dir=$1 2 | name=$2 3 | port=$3 4 | step=$4 5 | 6 | echo "python service_api_caller_v1.py -cp conf/api/vllm/llama2-7b/logiqav2_temps -cn react_test_0shot_tem_v1_0 exp_dir=$exp_dir model=$name port=$port step=$step" 7 | python service_api_caller_v1.py -cp conf/api/vllm/llama2-7b/logiqav2_tems -cn react_test_0shot_tem_v1_0 exp_dir=$exp_dir model=$name port=$port step=$step 8 | 9 | echo "python service_api_caller_v1.py -cp conf/api/vllm/llama2-7b/logiqav2_temps -cn react_test_0shot_tem_v1_0_o1 exp_dir=$exp_dir model=$name port=$port step=$step" 10 | python service_api_caller_v1.py -cp conf/api/vllm/llama2-7b/logiqav2_tems -cn react_test_0shot_tem_v1_0_o1 exp_dir=$exp_dir model=$name port=$port step=$step 11 | 12 | echo "python service_api_caller_v1.py -cp conf/api/vllm/llama2-7b/logiqav2_temps -cn react_test_0shot_tem_v1_0_o2 exp_dir=$exp_dir model=$name port=$port step=$step" 13 | python service_api_caller_v1.py -cp conf/api/vllm/llama2-7b/logiqav2_tems -cn react_test_0shot_tem_v1_0_o2 exp_dir=$exp_dir model=$name port=$port step=$step 14 | -------------------------------------------------------------------------------- /scripts/inference/calibra_results_logiqav2_sc.sh: -------------------------------------------------------------------------------- 1 | exp_name=$1 2 | 3 | for ((i=2;i<=$#;i++)); do 4 | step=${!i} 5 | echo "********************* $step *********************" 6 | echo "============= Dev =============" 7 | # python scripts/calculate_react_acc_w_clean.py --input_file /export/home2/fangkai/rl-hybrid-engine/experiments/${exp_name}/checkpoint-${step}/logiqav2.dev.react.n5.tem1.0.v1.0.0shot.json 8 | cat /export/home2/fangkai/rl-hybrid-engine/experiments/${exp_name}/checkpoint-${step}/logiqav2.dev.react.n5.tem1.0.v1.0.0shot.metrics.json 9 | echo "============= Test =============" 10 | # python scripts/calculate_react_acc_w_clean.py --input_file /export/home2/fangkai/rl-hybrid-engine/experiments/${exp_name}/checkpoint-${step}/logiqav2.test.react.n5.tem1.0.v1.0.0shot.json 11 | cat /export/home2/fangkai/rl-hybrid-engine/experiments/${exp_name}/checkpoint-${step}/logiqav2.test.react.n5.tem1.0.v1.0.0shot.metrics.json 12 | done 13 | #python vllm_inference.py -cp conf/api/vllm/llama2-7b/logiqav2_tems -cn react_test_0shot_tem_v2_0 exp_name=$exp_name step=$step 14 | 15 | 16 | 17 | -------------------------------------------------------------------------------- /conf/api/vllm/llama2-70b/reclor/train_react_1shot_sample5_v1_0.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - hydra: default 3 | - reader/reclor@read_tensor: react_service_1shot_v1_0 4 | - post_process: openai_react 5 | - api/vllm/vllm_params@sampling_params: sampling_param_sample 6 | - _self_ 7 | 8 | hydra: 9 | searchpath: 10 | - file://conf/ 11 | 12 | train_file: ../research.data/reclor_data/train.json 13 | dev_file: ../research.data/reclor_data/val.json 14 | test_file: ${train_file} 15 | 16 | save_best: False 17 | exp_name: 18 | exp_notes: 19 | output_dir: ../pretrained-models/Llama-2-70b-chat-hf 20 | eval_sub_path: 21 | 22 | output_file: ${output_dir}/reclor.react.train.1shot.sample5.v1.0.json 23 | flush_file: ${output_file}l 24 | 25 | # Dataloader 26 | num_workers: 32 27 | prefetch_factor: 2 28 | 29 | # Training hyper-parameters 30 | per_gpu_train_batch_size: 1 31 | per_gpu_eval_batch_size: 1 32 | 33 | ddp_eval: False 34 | no_cuda: False 35 | seed: 42 36 | local_rank: -1 37 | 38 | # Temporary variables 39 | fp16: True 40 | fp16_bfloat16: True 41 | n_gpu: 1 42 | device: 43 | train_batch_size: 44 | eval_batch_size: 45 | world_size: 46 | -------------------------------------------------------------------------------- /conf/api/vllm/mistral/reclor/train_react_1shot_sample5_v1_0.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - hydra: default 3 | - reader/reclor@read_tensor: react_service_1shot_v1_0 4 | - post_process: openai_react 5 | - api/vllm/vllm_params@sampling_params: sampling_param_sample 6 | - _self_ 7 | 8 | hydra: 9 | searchpath: 10 | - file://conf/ 11 | 12 | train_file: ../research.data/reclor_data/train.json 13 | dev_file: ../research.data/reclor_data/val.json 14 | test_file: ${train_file} 15 | 16 | save_best: False 17 | exp_name: 18 | exp_notes: 19 | output_dir: ../pretrained-models/Mixtral-8x7B-Instruct-v0.1 20 | eval_sub_path: 21 | 22 | output_file: ${output_dir}/reclor.react.train.1shot.sample5.v1.0.json 23 | flush_file: ${output_file}l 24 | 25 | # Dataloader 26 | num_workers: 32 27 | prefetch_factor: 2 28 | 29 | # Training hyper-parameters 30 | per_gpu_train_batch_size: 1 31 | per_gpu_eval_batch_size: 1 32 | 33 | ddp_eval: False 34 | no_cuda: False 35 | seed: 42 36 | local_rank: -1 37 | 38 | # Temporary variables 39 | fp16: True 40 | fp16_bfloat16: True 41 | n_gpu: 1 42 | device: 43 | train_batch_size: 44 | eval_batch_size: 45 | world_size: 46 | -------------------------------------------------------------------------------- /conf/api/vllm/llama2-7b/logiqav2_qa_react_test_0shot_tem_v1_0_s0.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - hydra: default 3 | - reader/logiqav2@read_tensor: react_service_0shot_v1_0 4 | - post_process: openai_react 5 | - _self_ 6 | 7 | hydra: 8 | searchpath: 9 | - file://conf/ 10 | 11 | train_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/train.txt 12 | dev_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/dev.txt 13 | test_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/test.txt 14 | 15 | step: 1600 16 | port: 6000 17 | exp_dir: experiments/llama2.7b.chat.logiqav2.step.dpo.H100.w4.v6.0 18 | model: llama-2-7b-step-dpo-v6.0-cp1600 19 | read_tensor: 20 | read_func: 21 | # option_order: "BCAD" # o1 22 | option_order: "DCBA" # o2 23 | 24 | output_file: ${exp_dir}/checkpoint-${step}/logiqav2-test.full.qa.react.v1.0.0shot.o2.json 25 | flush_file: ${output_file}l 26 | 27 | # Dataloader 28 | num_workers: 48 29 | prefetch_factor: 2 30 | 31 | ddp_eval: False 32 | no_cuda: False 33 | seed: 42 34 | local_rank: -1 35 | 36 | # Temporary variables 37 | n_gpu: 1 38 | device: 39 | train_batch_size: 40 | eval_batch_size: 41 | world_size: 42 | -------------------------------------------------------------------------------- /scripts/fixed_explore_from_infer/logiqav2/reward_eval.sh: -------------------------------------------------------------------------------- 1 | 2 | sft_model_dir=experiments/llama2.7b.chat.logiqav2.llama-2-70b-chat.dpo-sft.A6K.w4.v1.0/checkpoint-1600/ 3 | 4 | 5 | reward_file="experiments/llama2.7b.chat.logiqav2.70b-distil.prm.fix_hack.A100.w4.v1.2.s42/sft.dev.n5.tem1.0.reclor.rewards.raw_trajectory.product.v1.1/test-checkpoint-800/eval_predictions_rank0.json" 6 | #margin=0.5 7 | index="(1,2,3)" 8 | reduction="product" 9 | python scripts/combine_reward_debug_v1.0.py \ 10 | --input_file "${sft_model_dir}/logiqav2.dev.react.n5.tem1.0.v1.0.0shot.json" \ 11 | --reward_file $reward_file \ 12 | --output_file "./debug.json" --reduction ${reduction} --prob_labels ${index} 13 | 14 | 15 | #reward_file="experiments/llama2.7b.chat.logiqav2.70b-distil.orm.fix_hack.A100.40.w4.v1.2.s42/sft.dev.n5.tem1.0.rewards.raw_trajectory.product.v1.0/test-checkpoint-400/eval_predictions_rank0.json" 16 | #python scripts/combine_reward_debug_v1.0.py \ 17 | # --input_file "${sft_model_dir}/logiqav2.dev.react.n5.tem1.0.v1.0.0shot.json" \ 18 | # --reward_file $reward_file \ 19 | # --output_file "./debug.json" --prob_labels "(1,)" --orm 20 | 21 | -------------------------------------------------------------------------------- /conf/api/vllm/llama2-7b/logiqav2_qa_react_turbosft_test_0shot_tem_v1_0_s0.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - hydra: default 3 | - reader/logiqav2@read_tensor: react_service_0shot_v1_0 4 | - post_process: openai_react 5 | - _self_ 6 | 7 | hydra: 8 | searchpath: 9 | - file://conf/ 10 | 11 | train_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/train.txt 12 | dev_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/dev.txt 13 | test_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/test.txt 14 | 15 | step: 1600 16 | port: 6000 17 | exp_dir: experiments/llama2.7b.chat.logiqav2.gpt35turbo-dpo-sft.H100.w2.v2.0 18 | model: llama-2-7b-sft-v2.0-cp1600 19 | read_tensor: 20 | read_func: 21 | # option_order: "BCAD" # o1 22 | option_order: "DCBA" # o2 23 | 24 | output_file: ${exp_dir}/checkpoint-${step}/logiqav2-test.full.qa.react.v1.0.0shot.o2.json 25 | flush_file: ${output_file}l 26 | 27 | # Dataloader 28 | num_workers: 48 29 | prefetch_factor: 2 30 | 31 | ddp_eval: False 32 | no_cuda: False 33 | seed: 42 34 | local_rank: -1 35 | 36 | # Temporary variables 37 | n_gpu: 1 38 | device: 39 | train_batch_size: 40 | eval_batch_size: 41 | world_size: 42 | -------------------------------------------------------------------------------- /conf/api/vllm/llama2-70b/ar_lsat/dev_react_v1_0.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - hydra: default 3 | - reader/logiqav2@read_tensor: react_service_1shot_v1_0 4 | - post_process: openai_react 5 | - _self_ 6 | 7 | hydra: 8 | searchpath: 9 | - file://conf/ 10 | 11 | train_file: ../research.data/AR-LSAT/data/AR_TrainingData.json 12 | dev_file: ../research.data/AR-LSAT/data/AR_DevelopmentData.json 13 | test_file: ../research.data/AR-LSAT/data/AR_TestData.json 14 | 15 | port: 8000 16 | model: llama-2-70b-chat 17 | 18 | output_file: ../pretrained-models/Llama-2-70b-chat-hf/ar-lsat.react.dev.1shot.json 19 | flush_file: ${output_file}l 20 | 21 | # Data loading 22 | read_tensor: 23 | read_func: 24 | _target_: data.ar_lsat.ARLSATReader 25 | flat_options: True 26 | 27 | # Dataloader 28 | num_workers: 32 29 | prefetch_factor: 2 30 | 31 | output_dir: 32 | 33 | 34 | # Training hyper-parameters 35 | per_gpu_train_batch_size: 1 36 | per_gpu_eval_batch_size: 1 37 | 38 | ddp_eval: False 39 | no_cuda: False 40 | seed: 42 41 | local_rank: -1 42 | 43 | # Temporary variables 44 | n_gpu: 1 45 | device: 46 | train_batch_size: 47 | eval_batch_size: 48 | world_size: 49 | -------------------------------------------------------------------------------- /conf/api/vllm/llama2-7b/logiqav2_qa_react_70bdistil_dpo_test_0shot_tem_v1_0.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - hydra: default 3 | - reader/logiqav2@read_tensor: react_service_0shot_v1_0 4 | - post_process: openai_react 5 | - _self_ 6 | 7 | hydra: 8 | searchpath: 9 | - file://conf/ 10 | 11 | train_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/train.txt 12 | dev_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/dev.txt 13 | test_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/test.txt 14 | 15 | step: 1600 16 | port: 6000 17 | exp_dir: experiments/llama2.7b.chat.logiqav2.70b-distil.dpo.H100.w4.v1.0/ 18 | model: llama-2-7b-70bdistil-dpo-v1.0-cp1600 19 | read_tensor: 20 | read_func: 21 | # option_order: "BCAD" # o1 22 | option_order: "DCBA" # o2 23 | 24 | output_file: ${exp_dir}/checkpoint-${step}/logiqav2-test.full.qa.react.v1.0.0shot.o2.json 25 | flush_file: ${output_file}l 26 | 27 | # Dataloader 28 | num_workers: 48 29 | prefetch_factor: 2 30 | 31 | ddp_eval: False 32 | no_cuda: False 33 | seed: 42 34 | local_rank: -1 35 | 36 | # Temporary variables 37 | n_gpu: 1 38 | device: 39 | train_batch_size: 40 | eval_batch_size: 41 | world_size: 42 | -------------------------------------------------------------------------------- /conf/api/vllm/llama2-7b/logiqav2_tems/react_dev_0shot_tem_v1_0_o1.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - hydra: default 3 | - reader/logiqav2@read_tensor: react_service_0shot_v1_0 4 | - post_process: openai_react 5 | - _self_ 6 | 7 | hydra: 8 | searchpath: 9 | - file://conf/ 10 | 11 | train_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/train.txt 12 | dev_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/dev.txt 13 | test_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/dev.txt 14 | 15 | step: 1600 16 | port: 6000 17 | exp_dir: experiments/llama2.7b.chat.logiqav2.70b-distil.step.dpo.H100.w4.v1.1 18 | model: llama-2-7b-70bdistil-step-dpo-v1.1-cp1600 19 | read_tensor: 20 | template_id: 6 21 | read_func: 22 | option_order: "BCAD" # o1 23 | # option_order: "DCBA" # o2 24 | 25 | output_file: ${exp_dir}/checkpoint-${step}/logiqav2-dev.full.qa.react.v1.0.0shot.o1.json 26 | flush_file: ${output_file}l 27 | 28 | # Dataloader 29 | num_workers: 48 30 | prefetch_factor: 2 31 | 32 | ddp_eval: False 33 | no_cuda: False 34 | seed: 42 35 | local_rank: -1 36 | 37 | # Temporary variables 38 | n_gpu: 1 39 | device: 40 | train_batch_size: 41 | eval_batch_size: 42 | world_size: 43 | -------------------------------------------------------------------------------- /conf/api/vllm/llama2-7b/logiqav2_tems/react_dev_0shot_tem_v1_0_o2.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - hydra: default 3 | - reader/logiqav2@read_tensor: react_service_0shot_v1_0 4 | - post_process: openai_react 5 | - _self_ 6 | 7 | hydra: 8 | searchpath: 9 | - file://conf/ 10 | 11 | train_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/train.txt 12 | dev_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/dev.txt 13 | test_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/dev.txt 14 | 15 | step: 1600 16 | port: 6000 17 | exp_dir: experiments/llama2.7b.chat.logiqav2.70b-distil.step.dpo.H100.w4.v1.1 18 | model: llama-2-7b-70bdistil-step-dpo-v1.1-cp1600 19 | read_tensor: 20 | template_id: 6 21 | read_func: 22 | # option_order: "BCAD" # o1 23 | option_order: "DCBA" # o2 24 | 25 | output_file: ${exp_dir}/checkpoint-${step}/logiqav2-dev.full.qa.react.v1.0.0shot.o2.json 26 | flush_file: ${output_file}l 27 | 28 | # Dataloader 29 | num_workers: 48 30 | prefetch_factor: 2 31 | 32 | ddp_eval: False 33 | no_cuda: False 34 | seed: 42 35 | local_rank: -1 36 | 37 | # Temporary variables 38 | n_gpu: 1 39 | device: 40 | train_batch_size: 41 | eval_batch_size: 42 | world_size: 43 | -------------------------------------------------------------------------------- /conf/api/vllm/llama2-7b/logiqav2_tems/react_test_0shot_tem_v1_0_o1.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - hydra: default 3 | - reader/logiqav2@read_tensor: react_service_0shot_v1_0 4 | - post_process: openai_react 5 | - _self_ 6 | 7 | hydra: 8 | searchpath: 9 | - file://conf/ 10 | 11 | train_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/train.txt 12 | dev_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/dev.txt 13 | test_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/test.txt 14 | 15 | step: 1600 16 | port: 6000 17 | exp_dir: experiments/llama2.7b.chat.logiqav2.70b-distil.step.dpo.H100.w4.v1.1 18 | model: llama-2-7b-70bdistil-step-dpo-v1.1-cp1600 19 | read_tensor: 20 | template_id: 6 21 | read_func: 22 | option_order: "BCAD" # o1 23 | # option_order: "DCBA" # o2 24 | 25 | output_file: ${exp_dir}/checkpoint-${step}/logiqav2-test.full.qa.react.v1.0.0shot.o1.json 26 | flush_file: ${output_file}l 27 | 28 | # Dataloader 29 | num_workers: 48 30 | prefetch_factor: 2 31 | 32 | ddp_eval: False 33 | no_cuda: False 34 | seed: 42 35 | local_rank: -1 36 | 37 | # Temporary variables 38 | n_gpu: 1 39 | device: 40 | train_batch_size: 41 | eval_batch_size: 42 | world_size: 43 | -------------------------------------------------------------------------------- /conf/api/vllm/llama2-7b/logiqav2_tems/react_test_0shot_tem_v1_0_o2.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - hydra: default 3 | - reader/logiqav2@read_tensor: react_service_0shot_v1_0 4 | - post_process: openai_react 5 | - _self_ 6 | 7 | hydra: 8 | searchpath: 9 | - file://conf/ 10 | 11 | train_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/train.txt 12 | dev_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/dev.txt 13 | test_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/test.txt 14 | 15 | step: 1600 16 | port: 6000 17 | exp_dir: experiments/llama2.7b.chat.logiqav2.70b-distil.step.dpo.H100.w4.v1.1 18 | model: llama-2-7b-70bdistil-step-dpo-v1.1-cp1600 19 | read_tensor: 20 | template_id: 6 21 | read_func: 22 | # option_order: "BCAD" # o1 23 | option_order: "DCBA" # o2 24 | 25 | output_file: ${exp_dir}/checkpoint-${step}/logiqav2-test.full.qa.react.v1.0.0shot.o2.json 26 | flush_file: ${output_file}l 27 | 28 | # Dataloader 29 | num_workers: 48 30 | prefetch_factor: 2 31 | 32 | ddp_eval: False 33 | no_cuda: False 34 | seed: 42 35 | local_rank: -1 36 | 37 | # Temporary variables 38 | n_gpu: 1 39 | device: 40 | train_batch_size: 41 | eval_batch_size: 42 | world_size: 43 | -------------------------------------------------------------------------------- /scripts/split_pairs_according_to_ids.py: -------------------------------------------------------------------------------- 1 | import json 2 | import argparse 3 | import random 4 | from collections import defaultdict 5 | 6 | 7 | def main(): 8 | parser = argparse.ArgumentParser() 9 | parser.add_argument("--input_file", type=str, required=True) 10 | parser.add_argument("--output_file", type=str, required=True) 11 | parser.add_argument("--ratio", type=float, required=True) 12 | parser.add_argument("--seed", type=int, default=42) 13 | args = parser.parse_args() 14 | 15 | random.seed(args.seed) 16 | 17 | data = json.load(open(args.input_file, "r")) 18 | print(len(data)) 19 | 20 | id2samples = defaultdict(list) 21 | for item in data: 22 | id2samples[item["id"]].append(item) 23 | 24 | print(len(id2samples)) 25 | 26 | sampled_data_ids = random.sample(list(id2samples.keys()), int(len(id2samples) * args.ratio)) 27 | sampled_data = [] 28 | for sample_id in sampled_data_ids: 29 | sampled_data.extend(id2samples[sample_id]) 30 | print(len(sampled_data)) 31 | 32 | json.dump(sampled_data, open(args.output_file.replace(".json", f".{args.seed}.json"), "w"), indent=2) 33 | 34 | 35 | if __name__ == "__main__": 36 | main() 37 | -------------------------------------------------------------------------------- /conf/api/vllm/llama2-7b/logiqav2_qa_react_dpo_test_0shot_tem_v1_0_s0.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - hydra: default 3 | - reader/logiqav2@read_tensor: react_service_0shot_v1_0 4 | - post_process: openai_react 5 | - _self_ 6 | 7 | hydra: 8 | searchpath: 9 | - file://conf/ 10 | 11 | train_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/train.txt 12 | dev_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/dev.txt 13 | test_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/test.txt 14 | 15 | step: 800 16 | port: 6000 17 | #exp_dir: experiments/llama2.7b.chat.logiqav2.dpo.A40.w4.v4.1 18 | exp_dir: experiments/llama2.7b.chat.logiqav2.dpo.A100.w4.v4.1 19 | #model: llama-2-7b-dpo-4.1-cp800 20 | model: llama-2-7b-dpo-v4.0-cp800 21 | read_tensor: 22 | read_func: 23 | # option_order: "BCAD" # o1 24 | option_order: "DCBA" # o2 25 | 26 | output_file: ${exp_dir}/checkpoint-${step}/logiqav2-test.full.qa.react.v1.0.0shot.o2.json 27 | flush_file: ${output_file}l 28 | 29 | # Dataloader 30 | num_workers: 48 31 | prefetch_factor: 2 32 | 33 | ddp_eval: False 34 | no_cuda: False 35 | seed: 42 36 | local_rank: -1 37 | 38 | # Temporary variables 39 | n_gpu: 1 40 | device: 41 | train_batch_size: 42 | eval_batch_size: 43 | world_size: 44 | -------------------------------------------------------------------------------- /conf/api/vllm/llama2-7b/logiqav2_tems/react_dev_0shot_tem_v1_0.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - hydra: default 3 | - reader/logiqav2@read_tensor: react_service_0shot_v1_0 4 | - post_process: openai_react 5 | - api/vllm/vllm_params@sampling_params: sampling_param_greedy 6 | - _self_ 7 | 8 | hydra: 9 | searchpath: 10 | - file://conf/ 11 | 12 | train_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/train.txt 13 | dev_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/dev.txt 14 | test_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/dev.txt 15 | 16 | step: 1600 17 | port: 6000 18 | #exp_dir: experiments/llama2.7b.chat.logiqav2.70b-distil.step.dpo.H100.w4.v1.0/ 19 | exp_dir: experiments/llama2.7b.chat.logiqav2.70b-distil.step.dpo.H100.w4.v1.1 20 | model: llama-2-7b-70bdistil-step-dpo-v1.1-cp1600 21 | read_tensor: 22 | template_id: 6 23 | 24 | output_file: ${exp_dir}/checkpoint-${step}/logiqav2-dev.full.qa.react.v1.0.0shot.json 25 | flush_file: ${output_file}l 26 | 27 | # Dataloader 28 | num_workers: 48 29 | prefetch_factor: 2 30 | 31 | ddp_eval: False 32 | no_cuda: False 33 | seed: 42 34 | local_rank: -1 35 | 36 | # Temporary variables 37 | n_gpu: 1 38 | device: 39 | train_batch_size: 40 | eval_batch_size: 41 | world_size: 42 | -------------------------------------------------------------------------------- /scripts/explore_from_inter/combine_worsen_response.sh: -------------------------------------------------------------------------------- 1 | data_dir=experiments/llama2.7b.chat.logiqav2.llama-2-70b-chat.dpo-sft.A6K.w4.v1.0/checkpoint-1600/react-inter-states 2 | 3 | 4 | # Worsen response to DPO pair 5 | 6 | python scripts/construct_dpo_data_via_worsen_response.py \ 7 | --input_file "$data_dir/logiqav2-train.react.v1.0.0shot.sample5.inter_ver2.1.rs0.4.r0.2.?-4.?-of-4.modify_worse.1shot.mistral-7b.json" \ 8 | --original_file "$data_dir/logiqav2-train.react.v1.0.0shot.sample10.clean_inter_ver2.1.rs0.4.r0.2.?-of-4.json" \ 9 | --is_inter_states \ 10 | --output_file "$data_dir/worsen/logiqav2-train.react.v1.0.0shot.sample5.inter_ver2.1.rs0.4.r0.2.modify_worse.1shot.mistral-7b.dpo.json" 11 | 12 | python scripts/construct_dpo_data_via_worsen_response.py \ 13 | --input_file "$data_dir/logiqav2-train.react.v1.0.0shot.sample5.inter_ver2.1.rs0.4.r0.2.?-4.?-of-4.modify_worse.1shot.mistral-7b.json" \ 14 | --original_file "$data_dir/logiqav2-train.react.v1.0.0shot.sample10.clean_inter_ver2.1.rs0.4.r0.2.?-of-4.json" \ 15 | --is_inter_states \ 16 | --output_file "$data_dir/worsen/logiqav2-train.react.v1.0.0shot.sample5.inter_ver2.1.rs0.4.r0.2.modify_worse.1shot.mistral-7b.dpo.w_wrong.json" \ 17 | --keep_wrong 18 | 19 | -------------------------------------------------------------------------------- /conf/api/vllm/llama2-7b/logiqav2_tems/react_dev_0shot_tem_v2_0.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - hydra: default 3 | - reader/logiqav2@read_tensor: react_service_0shot_v1_0 4 | - post_process: openai_react 5 | - api/vllm/vllm_params@sampling_params: sampling_param_greedy 6 | - _self_ 7 | 8 | hydra: 9 | searchpath: 10 | - file://conf/ 11 | 12 | train_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/train.txt 13 | dev_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/dev.txt 14 | test_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/dev.txt 15 | 16 | save_best: False 17 | exp_name: 18 | exp_notes: 19 | output_dir: experiments/${exp_name} 20 | 21 | step: 800 22 | eval_sub_path: checkpoint-${step} 23 | 24 | read_tensor: 25 | template_id: 6 26 | service_based: False 27 | service_processor: 28 | 29 | output_file: ${output_dir}/${eval_sub_path}/logiqav2-dev.full.qa.react.v1.0.0shot.json 30 | flush_file: ${output_file}l 31 | 32 | # Dataloader 33 | num_workers: 48 34 | prefetch_factor: 2 35 | 36 | ddp_eval: False 37 | no_cuda: False 38 | seed: 42 39 | local_rank: -1 40 | 41 | # Temporary variables 42 | fp16: True 43 | fp16_bfloat16: True 44 | n_gpu: 1 45 | device: 46 | train_batch_size: 47 | eval_batch_size: 48 | world_size: 49 | -------------------------------------------------------------------------------- /conf/api/vllm/llama2-7b/logiqav2_tems/react_test_0shot_tem_v1_0.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - hydra: default 3 | - reader/logiqav2@read_tensor: react_service_0shot_v1_0 4 | - post_process: openai_react 5 | - api/vllm/vllm_params@sampling_params: sampling_param_greedy 6 | - _self_ 7 | 8 | hydra: 9 | searchpath: 10 | - file://conf/ 11 | 12 | train_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/train.txt 13 | dev_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/dev.txt 14 | test_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/test.txt 15 | 16 | step: 1600 17 | port: 6000 18 | #exp_dir: experiments/llama2.7b.chat.logiqav2.70b-distil.step.dpo.H100.w4.v1.0/ 19 | exp_dir: experiments/llama2.7b.chat.logiqav2.70b-distil.step.dpo.H100.w4.v1.1 20 | model: llama-2-7b-70bdistil-step-dpo-v1.1-cp1600 21 | read_tensor: 22 | template_id: 6 23 | 24 | output_file: ${exp_dir}/checkpoint-${step}/logiqav2-test.full.qa.react.v1.0.0shot.json 25 | flush_file: ${output_file}l 26 | 27 | # Dataloader 28 | num_workers: 48 29 | prefetch_factor: 2 30 | 31 | ddp_eval: False 32 | no_cuda: False 33 | seed: 42 34 | local_rank: -1 35 | 36 | # Temporary variables 37 | n_gpu: 1 38 | device: 39 | train_batch_size: 40 | eval_batch_size: 41 | world_size: 42 | -------------------------------------------------------------------------------- /conf/api/vllm/llama2-7b/logiqav2_tems/react_test_0shot_tem_v2_0.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - hydra: default 3 | - reader/logiqav2@read_tensor: react_service_0shot_v1_0 4 | - post_process: openai_react 5 | - api/vllm/vllm_params@sampling_params: sampling_param_greedy 6 | - _self_ 7 | 8 | hydra: 9 | searchpath: 10 | - file://conf/ 11 | 12 | train_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/train.txt 13 | dev_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/dev.txt 14 | test_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/test.txt 15 | 16 | save_best: False 17 | exp_name: 18 | exp_notes: 19 | output_dir: experiments/${exp_name} 20 | 21 | step: 800 22 | eval_sub_path: checkpoint-${step} 23 | 24 | read_tensor: 25 | template_id: 6 26 | service_based: False 27 | service_processor: 28 | 29 | output_file: ${output_dir}/${eval_sub_path}/logiqav2-test.full.qa.react.v1.0.0shot.json 30 | flush_file: ${output_file}l 31 | 32 | # Dataloader 33 | num_workers: 48 34 | prefetch_factor: 2 35 | 36 | ddp_eval: False 37 | no_cuda: False 38 | seed: 42 39 | local_rank: -1 40 | 41 | # Temporary variables 42 | fp16: True 43 | fp16_bfloat16: True 44 | n_gpu: 1 45 | device: 46 | train_batch_size: 47 | eval_batch_size: 48 | world_size: 49 | -------------------------------------------------------------------------------- /data/meta_math/react_prompt_1.txt: -------------------------------------------------------------------------------- 1 | ### Question: Ali has four $10 bills and six $20 bills that he saved after working for Mr. James on his farm. Ali gives her sister half of the total money he has and uses 3/5 of the remaining amount of money to buy dinner. Calculate the amount of money he has after buying the dinner. 2 | 3 | SubQuestion 1: How much money does Ali have in total? 4 | Answer 1: Ali has four $10 bills and six $20 bills. So he has 4 * 10 + 6 * 20 = 160 dollars. The answer is 160. 5 | SubQuestion 2: How much money does Ali give to his sister? 6 | Answer 2: Ali gives half of the total money he has to his sister. So he gives 160 / 2 = 80 dollars to his sister. The answer is 80. 7 | SubQuestion 3: How much money does Ali have after giving his sister the money? 8 | Answer 3: After giving his sister the money, Ali has 160 - 80 = 80 dollars left. The answer is 80. 9 | SubQuestion 4: How much money does Ali use to buy dinner? 10 | Answer 4: Ali uses 3/5 of the remaining amount of money to buy dinner. So he uses 80 * 3/5 = 48 dollars to buy dinner. The answer is 48. 11 | SubQuestion 5: Now we can answer the question: How much money does Ali have after buying the dinner? 12 | Answer 5: After buying the dinner, Ali has 80 - 48 = 32 dollars left. The answer is 32. -------------------------------------------------------------------------------- /conf/api/vllm/llama2-7b/logiqav2_qa_react_70bdistil_step_dpo_test_0shot_tem_v1_0.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - hydra: default 3 | - reader/logiqav2@read_tensor: react_service_0shot_v1_0 4 | - post_process: openai_react 5 | - _self_ 6 | 7 | hydra: 8 | searchpath: 9 | - file://conf/ 10 | 11 | train_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/train.txt 12 | dev_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/dev.txt 13 | test_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/test.txt 14 | 15 | step: 1600 16 | port: 6000 17 | #exp_dir: experiments/llama2.7b.chat.logiqav2.70b-distil.step.dpo.H100.w4.v1.0/ 18 | exp_dir: experiments/llama2.7b.chat.logiqav2.70b-distil.step.dpo.H100.w4.v1.1 19 | model: llama-2-7b-70bdistil-step-dpo-v1.1-cp1600 20 | #read_tensor: 21 | # read_func: 22 | # option_order: "BCAD" # o1 23 | # option_order: "DCBA" # o2 24 | 25 | output_file: ${exp_dir}/checkpoint-${step}/logiqav2-test.full.qa.react.v1.0.0shot.json 26 | flush_file: ${output_file}l 27 | 28 | # Dataloader 29 | num_workers: 48 30 | prefetch_factor: 2 31 | 32 | ddp_eval: False 33 | no_cuda: False 34 | seed: 42 35 | local_rank: -1 36 | 37 | # Temporary variables 38 | n_gpu: 1 39 | device: 40 | train_batch_size: 41 | eval_batch_size: 42 | world_size: 43 | -------------------------------------------------------------------------------- /conf/api/vllm/mistral/logiqav2/tems/react_test_1shot_tem_v1_0.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - hydra: default 3 | - reader/logiqav2@read_tensor: react_service_1shot_v1_0 4 | - post_process: openai_react 5 | - _self_ 6 | 7 | hydra: 8 | searchpath: 9 | - file://conf/ 10 | 11 | train_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/train.txt 12 | dev_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/dev.txt 13 | #test_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/test.txt 14 | test_file: ${dev_file} 15 | 16 | step: 17 | port: 6000 18 | exp_dir: 19 | model: 20 | 21 | read_tensor: 22 | service_processor: 23 | _target_: data.vllm.VLLMRequestGenerator 24 | api_url: http://0.0.0.0:${port}/v1/completions 25 | max_tokens: 3072 26 | model: ${model} 27 | stop: [ "", "\n\n\n\n", "Context:\n" ] 28 | 29 | #output_file: ${exp_dir}/logiqav2-test.qa.react.v1.0.1shot.json 30 | output_file: ${exp_dir}/logiqav2-dev.qa.react.v1.0.1shot.json 31 | flush_file: ${output_file}l 32 | 33 | # Dataloader 34 | num_workers: 48 35 | prefetch_factor: 2 36 | 37 | ddp_eval: False 38 | no_cuda: False 39 | seed: 42 40 | local_rank: -1 41 | 42 | # Temporary variables 43 | n_gpu: 1 44 | device: 45 | train_batch_size: 46 | eval_batch_size: 47 | world_size: 48 | -------------------------------------------------------------------------------- /scripts/fixed_explore_from_infer/logiqav2/construct_dpo.sh: -------------------------------------------------------------------------------- 1 | #sft_model_dir=experiments/llama2.7b.chat.logiqav2.llama-2-70b-chat.dpo-sft.A6K.w4.v1.0/checkpoint-1600/ 2 | #fix_hack_data_dir=$sft_model_dir/fix_hack_data_dir/ 3 | # 4 | #python scripts/construct_dpo_data_from_react_response.py \ 5 | # --input_file $sft_model_dir/logiqav2-train.full.qa.react.v1.0.0shot.sample10.json \ 6 | # --output_file $fix_hack_data_dir/logiqav2-train.full.qa.react.v1.0.0shot.sample10.clean_dpo_pair.json 7 | # 8 | #python scripts/split_train_dev.py \ 9 | # --input_file $fix_hack_data_dir/logiqav2-train.full.qa.react.v1.0.0shot.sample10.clean_dpo_pair.json 10 | 11 | 12 | # ============================================ Iter 1 ============================================ 13 | 14 | 15 | sft_model_dir=experiments/llama2.7b.chat.logiqav2.70b-distil.step.dpo.fix_hack.H100.w4.v1.0.th.s42/checkpoint-400/ 16 | 17 | python scripts/construct_dpo_data_from_react_response_v1.1.py \ 18 | --input_file "$sft_model_dir/logiqav2.react.train.0shot.sample10.tem1.0.v1.0.*-of-2.json" \ 19 | --output_file $sft_model_dir/logiqav2-train.full.qa.react.v1.0.0shot.sample10.dpo_pair.json 20 | 21 | #python scripts/split_train_dev.py \ 22 | # --input_file $fix_hack_data_dir/logiqav2-train.full.qa.react.v1.0.0shot.sample10.clean_dpo_pair.json -------------------------------------------------------------------------------- /conf/api/vllm/llama2-7b/logiqav2_tems/react_dev_0shot_tem_v2_1.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - hydra: default 3 | - reader/logiqav2@read_tensor: react_service_0shot_v1_0 4 | - post_process: openai_react 5 | - api/vllm/vllm_params@sampling_params: sampling_param_greedy 6 | - _self_ 7 | 8 | hydra: 9 | searchpath: 10 | - file://conf/ 11 | 12 | train_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/train.txt 13 | dev_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/dev.txt 14 | test_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/dev.txt 15 | 16 | save_best: False 17 | exp_name: 18 | exp_notes: 19 | output_dir: experiments/${exp_name} 20 | 21 | step: 800 22 | eval_sub_path: checkpoint-${step} 23 | 24 | read_tensor: 25 | template_id: "Context:\n{}\n\nQuestion:\n{}\n\nOptions:\n{}\n\nThought 1: " # In version v2.1, we change the template to: ```xxx\n\nThought 1: ``` 26 | service_based: False 27 | service_processor: 28 | 29 | output_file: ${output_dir}/${eval_sub_path}/logiqav2-dev.full.qa.react.v1.0.0shot.json 30 | flush_file: ${output_file}l 31 | 32 | # Dataloader 33 | num_workers: 48 34 | prefetch_factor: 2 35 | 36 | ddp_eval: False 37 | no_cuda: False 38 | seed: 42 39 | local_rank: -1 40 | 41 | # Temporary variables 42 | fp16: True 43 | fp16_bfloat16: True 44 | n_gpu: 1 45 | device: 46 | train_batch_size: 47 | eval_batch_size: 48 | world_size: 49 | -------------------------------------------------------------------------------- /conf/api/vllm/llama2-7b/logiqav2_tems/react_test_0shot_tem_v2_1.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - hydra: default 3 | - reader/logiqav2@read_tensor: react_service_0shot_v1_0 4 | - post_process: openai_react 5 | - api/vllm/vllm_params@sampling_params: sampling_param_greedy 6 | - _self_ 7 | 8 | hydra: 9 | searchpath: 10 | - file://conf/ 11 | 12 | train_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/train.txt 13 | dev_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/dev.txt 14 | test_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/test.txt 15 | 16 | save_best: False 17 | exp_name: 18 | exp_notes: 19 | output_dir: experiments/${exp_name} 20 | 21 | step: 800 22 | eval_sub_path: checkpoint-${step} 23 | 24 | read_tensor: 25 | template_id: "Context:\n{}\n\nQuestion:\n{}\n\nOptions:\n{}\n\nThought 1: " # In version v2.1, we change the template to: ```xxx\n\nThought 1: ``` 26 | service_based: False 27 | service_processor: 28 | 29 | output_file: ${output_dir}/${eval_sub_path}/logiqav2-test.full.qa.react.v1.0.0shot.json 30 | flush_file: ${output_file}l 31 | 32 | # Dataloader 33 | num_workers: 48 34 | prefetch_factor: 2 35 | 36 | ddp_eval: False 37 | no_cuda: False 38 | seed: 42 39 | local_rank: -1 40 | 41 | # Temporary variables 42 | fp16: True 43 | fp16_bfloat16: True 44 | n_gpu: 1 45 | device: 46 | train_batch_size: 47 | eval_batch_size: 48 | world_size: 49 | -------------------------------------------------------------------------------- /conf/api/vllm/llama2-7b/logiqav2_tems/react_test_1shot_tem_v2_1.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - hydra: default 3 | - reader/logiqav2@read_tensor: react_service_1shot_v1_0 4 | - post_process: openai_react 5 | - api/vllm/vllm_params@sampling_params: sampling_param_greedy 6 | - _self_ 7 | 8 | hydra: 9 | searchpath: 10 | - file://conf/ 11 | 12 | train_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/train.txt 13 | dev_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/dev.txt 14 | test_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/test.txt 15 | 16 | save_best: False 17 | exp_name: 18 | exp_notes: 19 | output_dir: experiments/${exp_name} 20 | 21 | step: 800 22 | eval_sub_path: checkpoint-${step} 23 | 24 | read_tensor: 25 | template_id: "Context:\n{}\n\nQuestion:\n{}\n\nOptions:\n{}\n\nThought 1: " # In version v2.1, we change the template to: ```xxx\n\nThought 1: ``` 26 | service_based: False 27 | service_processor: 28 | 29 | output_file: ${output_dir}/${eval_sub_path}/logiqav2-test.full.qa.react.v1.0.0shot.json 30 | flush_file: ${output_file}l 31 | 32 | # Dataloader 33 | num_workers: 48 34 | prefetch_factor: 2 35 | 36 | ddp_eval: False 37 | no_cuda: False 38 | seed: 42 39 | local_rank: -1 40 | 41 | # Temporary variables 42 | fp16: True 43 | fp16_bfloat16: True 44 | n_gpu: 1 45 | device: 46 | train_batch_size: 47 | eval_batch_size: 48 | world_size: 49 | -------------------------------------------------------------------------------- /data/prompts/logiqav2/gpt4/dev_218_0.md: -------------------------------------------------------------------------------- 1 | Convert the input texts following the keywords [Context], [Question] and each [Option] into logic forms. For each logic form, the format is [predicate](entity 1, ..., entity n). There is a predicate which indicates the relations among at most n entities and those entities are the arguments of the predicate. 2 | Use logical operations to derive the correct option. Common logical operators include AND, OR, NOT, and ==> (logically implies). 3 | 4 | [Context]: 5 | Jupiter is a gas giant planet and the largest planet in the solar system. Its mass is 2.5 times the total mass of the other seven planets in the solar system. Observations have found that most of the more than 70 moons surrounding Jupiter are composed of water ice. Therefore, Jupiter's atmosphere should contain a considerable amount of water. 6 | 7 | [Question]: 8 | Which of the followings, if true, can best support the above statement? 9 | 10 | [Options]: 11 | A. After hundreds of millions of years, the satellite may slowly fall onto the planet. 12 | B. Many of the water in interstellar space exists in gaseous form. 13 | C. Uranus is also a gas giant planet, and it has been confirmed that it contains a lot of water ice. 14 | D. The satellite and the planets around it were formed from the same gas and dust at the same time. 15 | 16 | ----------------- 17 | 18 | 19 | 20 | -------------------------------------------------------------------------------- /conf/api/gpt35turbo/reclor/train_react_v1_0_1shot_sample10.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - hydra: default 3 | - reader/reclor@read_tensor: react_service_1shot_v1_0 4 | - post_process: openai_react 5 | # - api/vllm/vllm_params@sampling_params: sampling_param_sample 6 | - _self_ 7 | 8 | hydra: 9 | searchpath: 10 | - file://conf/ 11 | 12 | 13 | train_file: ../research.data/reclor_data/train.json 14 | dev_file: ../research.data/reclor_data/val.json 15 | test_file: ${train_file} 16 | 17 | #num_shot: 5 18 | 19 | output_file: api-outputs/gpt35turbo1106/logiqav2.train.react.1shot.gpt35turbo1106.sample10.tem${model.temperature}.json 20 | flush_file: ${output_file}l 21 | 22 | model: 23 | _target_: data.openai_api_caller.GPTTurbo 24 | model: "gpt-3.5-turbo-1106" 25 | max_tokens: 3072 26 | # temperature: 1.0 27 | temperature: 0.7 28 | api_time_interval: 1 29 | top_p: 0.8 30 | n: 10 31 | 32 | # Data loading 33 | read_tensor: 34 | api_based: True 35 | flush_file: ${flush_file} 36 | 37 | # Dataloader 38 | num_workers: 0 39 | prefetch_factor: 2 40 | 41 | output_dir: 42 | 43 | 44 | # Training hyper-parameters 45 | per_gpu_train_batch_size: 1 46 | per_gpu_eval_batch_size: 1 47 | 48 | ddp_eval: False 49 | no_cuda: False 50 | seed: 42 51 | local_rank: -1 52 | 53 | # Temporary variables 54 | n_gpu: 1 55 | device: 56 | train_batch_size: 57 | eval_batch_size: 58 | world_size: 59 | -------------------------------------------------------------------------------- /data/prompts/logiqav2/decomposition/human/dev_218_0.md: -------------------------------------------------------------------------------- 1 | Convert the input texts following the keywords [Context], [Question] and each [Option] into logic forms. For each logic form, the format is [predicate](entity 1, ..., entity n). There is a predicate which indicates the relations among at most n entities and those entities are the arguments of the predicate. 2 | Use logical operations to derive the correct option. Common logical operators include AND, OR, NOT, and ==> (logically implies). 3 | 4 | [Context]: 5 | Jupiter is a gas giant planet and the largest planet in the solar system. Its mass is 2.5 times the total mass of the other seven planets in the solar system. Observations have found that most of the more than 70 moons surrounding Jupiter are composed of water ice. Therefore, Jupiter's atmosphere should contain a considerable amount of water. 6 | 7 | [Question]: 8 | Which of the followings, if true, can best support the above statement? 9 | 10 | [Options]: 11 | A. After hundreds of millions of years, the satellite may slowly fall onto the planet. 12 | B. Many of the water in interstellar space exists in gaseous form. 13 | C. Uranus is also a gas giant planet, and it has been confirmed that it contains a lot of water ice. 14 | D. The satellite and the planets around it were formed from the same gas and dust at the same time. 15 | 16 | ----------------- 17 | 18 | 19 | 20 | -------------------------------------------------------------------------------- /conf/api/gpt4/reclor/dev_react_v1_0_1shot.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - hydra: default 3 | - reader/reclor@read_tensor: react_service_1shot_v1_0 4 | - post_process: openai_react 5 | # - api/vllm/vllm_params@sampling_params: sampling_param_sample 6 | - _self_ 7 | 8 | hydra: 9 | searchpath: 10 | - file://conf/ 11 | 12 | 13 | train_file: ../research.data/reclor_data/train.json 14 | dev_file: ../research.data/reclor_data/val.json 15 | test_file: ../research.data/reclor_data/val.json 16 | 17 | #num_shot: 5 18 | 19 | output_file: api-outputs/gpt35turbo1106/reclor.val.react.1shot.gpt35turbo1106.sample1.tem${model.temperature}.json 20 | flush_file: ${output_file}l 21 | 22 | model: 23 | _target_: data.openai_api_caller.GPTTurbo 24 | model: "gpt-4-1106-preview" 25 | max_tokens: 2048 26 | # temperature: 1.0 27 | # temperature: 0.7 28 | temperature: 0.0 29 | api_time_interval: 1 30 | # top_p: 0.8 31 | # n: 1 32 | 33 | # Data loading 34 | read_tensor: 35 | api_based: True 36 | flush_file: ${flush_file} 37 | 38 | # Dataloader 39 | num_workers: 0 40 | prefetch_factor: 2 41 | 42 | output_dir: 43 | 44 | 45 | # Training hyper-parameters 46 | per_gpu_train_batch_size: 1 47 | per_gpu_eval_batch_size: 1 48 | 49 | ddp_eval: False 50 | no_cuda: False 51 | seed: 42 52 | local_rank: -1 53 | 54 | # Temporary variables 55 | n_gpu: 1 56 | device: 57 | train_batch_size: 58 | eval_batch_size: 59 | world_size: 60 | -------------------------------------------------------------------------------- /conf/api/vllm/llama2-7b/logiqav2_tems/react_dev_0shot_tem_v2_1_sc.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - hydra: default 3 | - reader/logiqav2@read_tensor: react_service_0shot_v1_0 4 | - post_process: openai_react 5 | - api/vllm/vllm_params@sampling_params: sampling_param_sample 6 | - _self_ 7 | 8 | hydra: 9 | searchpath: 10 | - file://conf/ 11 | 12 | train_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/train.txt 13 | dev_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/dev.txt 14 | test_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/dev.txt 15 | 16 | save_best: False 17 | exp_name: 18 | exp_notes: 19 | output_dir: experiments/${exp_name} 20 | 21 | step: 800 22 | eval_sub_path: checkpoint-${step} 23 | 24 | read_tensor: 25 | template_id: "Context:\n{}\n\nQuestion:\n{}\n\nOptions:\n{}\n\nThought 1: " # In version v2.1, we change the template to: ```xxx\n\nThought 1: ``` 26 | service_based: False 27 | service_processor: 28 | 29 | output_file: ${output_dir}/${eval_sub_path}/logiqav2.dev.react.n${sampling_params.n}.tem${sampling_params.temperature}.v1.0.0shot.json 30 | flush_file: ${output_file}l 31 | 32 | # Dataloader 33 | num_workers: 48 34 | prefetch_factor: 2 35 | 36 | ddp_eval: False 37 | no_cuda: False 38 | seed: 42 39 | local_rank: -1 40 | 41 | # Temporary variables 42 | fp16: True 43 | fp16_bfloat16: True 44 | n_gpu: 1 45 | device: 46 | train_batch_size: 47 | eval_batch_size: 48 | world_size: 49 | -------------------------------------------------------------------------------- /conf/api/gpt35turbo/reclor/dev_react_v1_0_1shot.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - hydra: default 3 | - reader/reclor@read_tensor: react_service_1shot_v1_0 4 | - post_process: openai_react 5 | # - api/vllm/vllm_params@sampling_params: sampling_param_sample 6 | - _self_ 7 | 8 | hydra: 9 | searchpath: 10 | - file://conf/ 11 | 12 | 13 | train_file: ../research.data/reclor_data/train.json 14 | dev_file: ../research.data/reclor_data/val.json 15 | test_file: ../research.data/reclor_data/test.json 16 | 17 | #num_shot: 5 18 | 19 | output_file: api-outputs/gpt35turbo1106/reclor.test.react.1shot.gpt35turbo1106.sample1.tem${model.temperature}.json 20 | flush_file: ${output_file}l 21 | 22 | model: 23 | _target_: data.openai_api_caller.GPTTurbo 24 | model: "gpt-3.5-turbo-1106" 25 | max_tokens: 2048 26 | # temperature: 1.0 27 | # temperature: 0.7 28 | temperature: 0.0 29 | api_time_interval: 1 30 | # top_p: 0.8 31 | # n: 1 32 | 33 | # Data loading 34 | read_tensor: 35 | api_based: True 36 | flush_file: ${flush_file} 37 | 38 | # Dataloader 39 | num_workers: 0 40 | prefetch_factor: 2 41 | 42 | output_dir: 43 | 44 | 45 | # Training hyper-parameters 46 | per_gpu_train_batch_size: 1 47 | per_gpu_eval_batch_size: 1 48 | 49 | ddp_eval: False 50 | no_cuda: False 51 | seed: 42 52 | local_rank: -1 53 | 54 | # Temporary variables 55 | n_gpu: 1 56 | device: 57 | train_batch_size: 58 | eval_batch_size: 59 | world_size: 60 | -------------------------------------------------------------------------------- /conf/api/vllm/llama2-7b/logiqav2_tems/react_test_0shot_tem_v2_1_sc.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - hydra: default 3 | - reader/logiqav2@read_tensor: react_service_0shot_v1_0 4 | - post_process: openai_react 5 | - api/vllm/vllm_params@sampling_params: sampling_param_sample 6 | - _self_ 7 | 8 | hydra: 9 | searchpath: 10 | - file://conf/ 11 | 12 | train_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/train.txt 13 | dev_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/dev.txt 14 | test_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/test.txt 15 | 16 | save_best: False 17 | exp_name: 18 | exp_notes: 19 | output_dir: experiments/${exp_name} 20 | 21 | step: 800 22 | eval_sub_path: checkpoint-${step} 23 | 24 | read_tensor: 25 | template_id: "Context:\n{}\n\nQuestion:\n{}\n\nOptions:\n{}\n\nThought 1: " # In version v2.1, we change the template to: ```xxx\n\nThought 1: ``` 26 | service_based: False 27 | service_processor: 28 | 29 | output_file: ${output_dir}/${eval_sub_path}/logiqav2.test.react.n${sampling_params.n}.tem${sampling_params.temperature}.v1.0.0shot.json 30 | flush_file: ${output_file}l 31 | 32 | # Dataloader 33 | num_workers: 48 34 | prefetch_factor: 2 35 | 36 | ddp_eval: False 37 | no_cuda: False 38 | seed: 42 39 | local_rank: -1 40 | 41 | # Temporary variables 42 | fp16: True 43 | fp16_bfloat16: True 44 | n_gpu: 1 45 | device: 46 | train_batch_size: 47 | eval_batch_size: 48 | world_size: 49 | -------------------------------------------------------------------------------- /post_processors/dist_mixin.py: -------------------------------------------------------------------------------- 1 | import torch.distributed as dist 2 | from typing import List, Any, Dict 3 | import torch 4 | import numpy as np 5 | 6 | 7 | class DistGatherMixin: 8 | def gather(self): 9 | pass 10 | 11 | @staticmethod 12 | def gather_object(objects: List[Any]): 13 | output = [None for _ in range(dist.get_world_size())] 14 | dist.gather_object(objects, 15 | object_gather_list=output if dist.get_rank() == 0 else None, 16 | dst=0) 17 | 18 | if dist.get_rank() == 0: 19 | return output 20 | else: 21 | return None 22 | 23 | 24 | class SFTLossOnlyPostProcessor(DistGatherMixin): 25 | def __init__(self): 26 | super().__init__() 27 | self.losses = [] 28 | 29 | def __call__(self, meta_data: Dict[str, Any], batch_model_outputs: Dict[str, Any], ddp: bool = False): 30 | loss = batch_model_outputs["loss"].item() 31 | 32 | if ddp: 33 | gather_res = self.gather_object(loss) 34 | if dist.get_rank() == 0: 35 | loss = sum(gather_res) / len(gather_res) 36 | 37 | self.losses.append(loss) 38 | 39 | def get_results(self, output_dir: str): 40 | avg_loss = np.mean(self.losses).item() 41 | 42 | metrics = { 43 | "loss": avg_loss, 44 | } 45 | 46 | return metrics, [] 47 | -------------------------------------------------------------------------------- /conf/api/vllm/mistral/reclor/dev_react_1shot_v1_0.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - hydra: default 3 | - reader/reclor@read_tensor: react_service_1shot_v1_0 4 | - post_process: openai_react 5 | - api/vllm/vllm_params@sampling_params: sampling_param_greedy 6 | - _self_ 7 | 8 | hydra: 9 | searchpath: 10 | - file://conf/ 11 | 12 | train_file: ../research.data/reclor_data/train.json 13 | dev_file: ../research.data/reclor_data/val.json 14 | test_file: ../research.data/reclor_data/test.json 15 | 16 | model: 17 | port: 6000 18 | save_best: False 19 | exp_name: 20 | exp_notes: 21 | output_dir: ../pretrained-models/Mixtral-8x7B-Instruct-v0.1 22 | eval_sub_path: 23 | 24 | output_file: ${output_dir}/reclor.react.test.1shot.v1.0.json 25 | flush_file: ${output_file}l 26 | 27 | read_tensor: 28 | service_based: True 29 | service_processor: 30 | _target_: data.vllm.VLLMRequestGenerator 31 | api_url: http://0.0.0.0:${port}/v1/completions 32 | max_tokens: 3072 33 | model: ${model} 34 | stop: [ "", "\n\n\n\n", "Context:\n" ] 35 | 36 | # Dataloader 37 | num_workers: 64 38 | prefetch_factor: 2 39 | 40 | # Training hyper-parameters 41 | per_gpu_train_batch_size: 1 42 | per_gpu_eval_batch_size: 1 43 | 44 | ddp_eval: False 45 | no_cuda: False 46 | seed: 42 47 | local_rank: -1 48 | 49 | # Temporary variables 50 | fp16: True 51 | fp16_bfloat16: True 52 | n_gpu: 1 53 | device: 54 | train_batch_size: 55 | eval_batch_size: 56 | world_size: 57 | -------------------------------------------------------------------------------- /conf/api/gpt35turbo/ar_lsat/dev_react_1shot_v1_0.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - hydra: default 3 | - reader/logiqav2@read_tensor: react_service_1shot_v1_0 4 | - post_process: openai_react 5 | - _self_ 6 | 7 | hydra: 8 | searchpath: 9 | - file://conf/ 10 | 11 | train_file: ../research.data/AR-LSAT/data/AR_TrainingData.json 12 | dev_file: ../research.data/AR-LSAT/data/AR_DevelopmentData.json 13 | test_file: ../research.data/AR-LSAT/data/AR_TestData.json 14 | 15 | port: 8000 16 | model: 17 | _target_: data.openai_api_caller.GPTTurbo 18 | model: "gpt-3.5-turbo" 19 | max_tokens: 2048 20 | temperature: 0.0 21 | api_time_interval: 0 22 | 23 | output_file: api-outputs/gpt35turbo/ar-lsat.react.dev.1shot.v1.1.json 24 | flush_file: ${output_file}l 25 | 26 | # Data loading 27 | read_tensor: 28 | read_func: 29 | _target_: data.ar_lsat.ARLSATReader 30 | flat_options: True 31 | few_shot_prompt: 32 | _target_: data.logiqav2.read_single_file 33 | file_path: data/prompts/ar_lsat/react/train_200006_1-G_1_1.txt 34 | api_based: True 35 | service_based: False 36 | 37 | # Dataloader 38 | num_workers: 0 39 | prefetch_factor: 2 40 | 41 | output_dir: 42 | 43 | 44 | # Training hyper-parameters 45 | per_gpu_train_batch_size: 1 46 | per_gpu_eval_batch_size: 1 47 | 48 | ddp_eval: False 49 | no_cuda: False 50 | seed: 42 51 | local_rank: -1 52 | 53 | # Temporary variables 54 | n_gpu: 1 55 | device: 56 | train_batch_size: 57 | eval_batch_size: 58 | world_size: 59 | -------------------------------------------------------------------------------- /scripts/construct_dpo_data_from_response.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import json 3 | import re 4 | from collections import Counter 5 | 6 | 7 | parser = argparse.ArgumentParser() 8 | parser.add_argument("--input_file", type=str) 9 | parser.add_argument("--output_file", type=str) 10 | args = parser.parse_args() 11 | 12 | data = json.load(open(args.input_file)) 13 | 14 | 15 | outputs = [] 16 | 17 | for item in data: 18 | chosen = [] 19 | reject = [] 20 | for response in item["response"]: 21 | if "[Context]" in response: 22 | response = response.split("[Context]")[0] 23 | 24 | preds = re.findall(r"A|B|C|D", response) 25 | if len(preds) == 0: 26 | pred = "" 27 | else: 28 | pred = preds[-1] 29 | 30 | if pred and ord(pred) - ord("A") == item["label"]: 31 | chosen.append(response) 32 | else: 33 | reject.append(response) 34 | 35 | if len(chosen) > 0 and len(reject) > 0: 36 | outputs.append({ 37 | "input": item["text"], 38 | "chosen": chosen, 39 | "reject": reject, 40 | "id": item["id"], 41 | }) 42 | 43 | print(len(outputs)) 44 | 45 | 46 | a_cnt = Counter() 47 | b_cnt = Counter() 48 | for x in outputs: 49 | a_cnt[len(x["chosen"])] += 1 50 | b_cnt[len(x["reject"])] += 1 51 | print(a_cnt) 52 | print(b_cnt) 53 | 54 | json.dump(outputs, open(args.output_file, "w"), indent=2, ensure_ascii=False) 55 | -------------------------------------------------------------------------------- /conf/api/vllm/llama2-70b/ar_lsat/dev_react_v1_1.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - hydra: default 3 | - reader/logiqav2@read_tensor: react_service_1shot_v1_0 4 | - post_process: openai_react 5 | - _self_ 6 | 7 | hydra: 8 | searchpath: 9 | - file://conf/ 10 | 11 | train_file: ../research.data/AR-LSAT/data/AR_TrainingData.json 12 | dev_file: ../research.data/AR-LSAT/data/AR_DevelopmentData.json 13 | test_file: ../research.data/AR-LSAT/data/AR_TestData.json 14 | 15 | port: 8000 16 | model: llama-2-70b-chat 17 | 18 | output_file: ../pretrained-models/Llama-2-70b-chat-hf/ar-lsat.react.dev.1shot.v1.1.json 19 | flush_file: ${output_file}l 20 | 21 | # Data loading 22 | read_tensor: 23 | read_func: 24 | _target_: data.ar_lsat.ARLSATReader 25 | flat_options: True 26 | option_order: "ABCDE" 27 | few_shot_prompt: 28 | _target_: data.logiqav2.read_single_file 29 | file_path: data/prompts/ar_lsat/react/train_200006_1-G_1_1.txt 30 | 31 | post_process: 32 | answer_clean: 33 | _target_: post_processors.openai_api_callback.ReActSeparatorClean 34 | regrex: "A|B|C|D|E" 35 | 36 | # Dataloader 37 | num_workers: 32 38 | prefetch_factor: 2 39 | 40 | output_dir: 41 | 42 | 43 | # Training hyper-parameters 44 | per_gpu_train_batch_size: 1 45 | per_gpu_eval_batch_size: 1 46 | 47 | ddp_eval: False 48 | no_cuda: False 49 | seed: 42 50 | local_rank: -1 51 | 52 | # Temporary variables 53 | n_gpu: 1 54 | device: 55 | train_batch_size: 56 | eval_batch_size: 57 | world_size: 58 | -------------------------------------------------------------------------------- /conf/api/vllm/llama2-70b/reclor/dev_react_1shot_v1_0.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - hydra: default 3 | - reader/reclor@read_tensor: react_service_1shot_v1_0 4 | - post_process: openai_react 5 | - api/vllm/vllm_params@sampling_params: sampling_param_greedy 6 | - _self_ 7 | 8 | hydra: 9 | searchpath: 10 | - file://conf/ 11 | 12 | train_file: ../research.data/reclor_data/train.json 13 | dev_file: ../research.data/reclor_data/val.json 14 | #test_file: ${dev_file} 15 | test_file: ../research.data/reclor_data/test.json 16 | 17 | model: llama-2-70b-chat 18 | port: 6000 19 | save_best: False 20 | exp_name: 21 | exp_notes: 22 | output_dir: ../pretrained-models/Llama-2-70b-chat-hf 23 | eval_sub_path: 24 | 25 | output_file: ${output_dir}/reclor.react.test.1shot.v1.0.json 26 | flush_file: ${output_file}l 27 | 28 | read_tensor: 29 | service_based: True 30 | service_processor: 31 | _target_: data.vllm.VLLMRequestGenerator 32 | api_url: http://0.0.0.0:${port}/v1/completions 33 | max_tokens: 2048 34 | model: ${model} 35 | stop: [ "", "\n\n\n\n", "Context:\n" ] 36 | 37 | # Dataloader 38 | num_workers: 64 39 | prefetch_factor: 2 40 | 41 | # Training hyper-parameters 42 | per_gpu_train_batch_size: 1 43 | per_gpu_eval_batch_size: 1 44 | 45 | ddp_eval: False 46 | no_cuda: False 47 | seed: 42 48 | local_rank: -1 49 | 50 | # Temporary variables 51 | fp16: True 52 | fp16_bfloat16: True 53 | n_gpu: 1 54 | device: 55 | train_batch_size: 56 | eval_batch_size: 57 | world_size: 58 | -------------------------------------------------------------------------------- /conf/api/vllm/mistral/logiqav2/tems/react_test_1shot_tem_v2_0.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - hydra: default 3 | - reader/logiqav2@read_tensor: react_service_1shot_v1_0 4 | - post_process: openai_react 5 | - api/vllm/vllm_params@sampling_params: sampling_param_greedy 6 | - _self_ 7 | 8 | hydra: 9 | searchpath: 10 | - file://conf/ 11 | 12 | train_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/train.txt 13 | dev_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/dev.txt 14 | test_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/test.txt 15 | 16 | save_best: False 17 | exp_name: 18 | exp_notes: 19 | output_dir: experiments/${exp_name} 20 | 21 | step: 800 22 | eval_sub_path: checkpoint-${step} 23 | 24 | read_tensor: 25 | template_id: "Context:\n{}\n\nQuestion:\n{}\n\nOptions:\n{}\n\nThought 1: " # In version v2.1, we change the template to: ```xxx\n\nThought 1: ``` 26 | service_based: False 27 | service_processor: 28 | 29 | sampling_params: 30 | stop: [ "\n\n\n\n", "Context:\n", "<|end▁of▁sentence|>" ] 31 | stop_token_ids: [100001] 32 | 33 | output_file: ${output_dir}/${eval_sub_path}/logiqav2-dev.full.qa.react.v1.0.0shot.json 34 | flush_file: ${output_file}l 35 | 36 | # Dataloader 37 | num_workers: 48 38 | prefetch_factor: 2 39 | 40 | ddp_eval: False 41 | no_cuda: False 42 | seed: 42 43 | local_rank: -1 44 | 45 | # Temporary variables 46 | fp16: True 47 | fp16_bfloat16: True 48 | n_gpu: 1 49 | device: 50 | train_batch_size: 51 | eval_batch_size: 52 | world_size: 53 | -------------------------------------------------------------------------------- /conf/api/vllm/llama2-7b/reclor_tems/test_react_0shot_v1_0_vllm.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - hydra: default 3 | - reader/reclor@read_tensor: react_service_0shot_v1_0 4 | - post_process: openai_react 5 | - api/vllm/vllm_params@sampling_params: sampling_param_greedy 6 | - _self_ 7 | 8 | hydra: 9 | searchpath: 10 | - file://conf/ 11 | 12 | train_file: ../research.data/reclor_data/train.json 13 | dev_file: ../research.data/reclor_data/val.json 14 | test_file: ../research.data/reclor_data/test.json 15 | 16 | save_best: False 17 | exp_name: 18 | exp_notes: 19 | output_dir: experiments/${exp_name} 20 | 21 | step: 22 | eval_sub_path: checkpoint-${step} 23 | 24 | sampling_params: 25 | max_tokens: 2048 26 | gpu_memory_utilization: 0.95 27 | 28 | read_tensor: 29 | template_id: 6 30 | 31 | output_file: ${output_dir}/${eval_sub_path}/reclor.react.test.0shot.v1.0.json 32 | flush_file: ${output_file}l 33 | 34 | # Dataloader 35 | num_workers: 32 36 | prefetch_factor: 37 | 38 | # Training hyper-parameters 39 | per_gpu_train_batch_size: 1 40 | per_gpu_eval_batch_size: 1 41 | 42 | ddp_eval: False 43 | no_cuda: False 44 | seed: 42 45 | local_rank: -1 46 | 47 | # Temporary variables 48 | fp16: True 49 | fp16_bfloat16: True 50 | n_gpu: 1 51 | device: 52 | train_batch_size: 53 | eval_batch_size: 54 | world_size: 55 | 56 | # CUDA_VISIBLE_DEVICES=0 run15 python vllm_inference.py -cp conf/api/vllm/mistral/reclor/ -cn train_react_1shot_sample5_split_v1_0 read_tensor.split_size=4 read_tensor.split_id=0 57 | -------------------------------------------------------------------------------- /scripts/split_train_dev.py: -------------------------------------------------------------------------------- 1 | import json 2 | import argparse 3 | import random 4 | import os 5 | 6 | parser = argparse.ArgumentParser() 7 | parser.add_argument("--input_file", type=str) 8 | parser.add_argument("--input_file2", type=str, default=None) 9 | args = parser.parse_args() 10 | 11 | data = json.load(open(args.input_file)) 12 | print("data size: {}".format(len(data))) 13 | if args.input_file2 is not None: 14 | data2 = json.load(open(args.input_file2)) 15 | print("data2 size: {}".format(len(data2))) 16 | data_ids = list(range(len(data))) 17 | # read `dev_num` from command line 18 | dev_num = int(input("dev_num: ")) 19 | dev_ids = random.sample(data_ids, dev_num) 20 | dev_ids = set(dev_ids) 21 | 22 | dev_data = [] 23 | train_data = [] 24 | for i, item in enumerate(data): 25 | if i in dev_ids: 26 | dev_data.append(item) 27 | else: 28 | train_data.append(item) 29 | 30 | print("dev size: {}".format(len(dev_data))) 31 | print("train size: {}".format(len(train_data))) 32 | 33 | if args.input_file2 is not None: 34 | output_file_name = str(input("output file name: ")) 35 | output_file = os.path.join(os.path.dirname(args.input_file), output_file_name) 36 | else: 37 | output_file = args.input_file 38 | json.dump(dev_data, open(output_file.replace(".json", f".sub_dev.{len(dev_data)}.json"), "w"), indent=2, ensure_ascii=False) 39 | json.dump(train_data, open(output_file.replace(".json", f".sub_train.{len(train_data)}.json"), "w"), indent=2, ensure_ascii=False) 40 | -------------------------------------------------------------------------------- /scripts/explore_from_inter/reclor/best_of_filter_full.sh: -------------------------------------------------------------------------------- 1 | data_dir=experiments/llama2.7b.chat.mixtral.dpo-sft.A100.40.w8.v1.0/checkpoint-1200/react-inter-states 2 | 3 | 4 | best_of=10 5 | pos_margin=0.7 6 | max_neg_num=10 7 | index="(1,2,3,4,5)" 8 | reward_file="experiments/llama2.7b.chat.reclor.mixtral-distil.prm.A100.40.w8.v1.2.s42/train.rewards.raw_trajectory.product.v1.0/test-checkpoint-2400/eval_predictions_rank0.json" 9 | python scripts/best_of_filter_by_reward_v2.2.py \ 10 | --input_file "$data_dir/reclor.train.react.v1.0.0shot.sample10.clean_inter_ver2.0.rs0.2.r0.3.json" \ 11 | --reward_file $reward_file \ 12 | --output_file "$data_dir/reclor.train.react.v1.0.0shot.sample10.clean_inter_ver2.0.rs0.2.r0.3.prm_v12_cp2400_best_of_${best_of}.neg${max_neg_num}.pos${pos_margin}.v2.2.${index}.pair.product.full_only.json" \ 13 | --best_of $best_of --max_neg_num $max_neg_num --pos_margin $pos_margin --prob_labels ${index} --reduction "product" 14 | 15 | 16 | # =============================== Debug 17 | #index="(1,2,3,4,5)" 18 | #reward_file="experiments/llama2.7b.chat.reclor.mixtral-distil.prm.A100.40.w8.v1.2.s42/train.rewards.raw_trajectory.product.v1.0/test-checkpoint-2400/eval_predictions_rank0.json" 19 | #python scripts/combine_reward_debug_v1.0.py \ 20 | # --input_file "$data_dir/reclor.train.react.v1.0.0shot.sample10.clean_inter_ver2.0.rs0.2.r0.3.json" \ 21 | # --reward_file $reward_file \ 22 | # --output_file "./reward_reclor_debug_cp2400_${index}.json" --reduction product --prob_labels ${index} 23 | -------------------------------------------------------------------------------- /scripts/process_turbo.sh: -------------------------------------------------------------------------------- 1 | 2 | 3 | #python scripts/process_react_nodes.py --input_file data/trajectory/react/logiqav2-train-v1.0.react.1shot.turbo.sample5.json \ 4 | # --output_file data/trajectory/react/logiqav2-train-v1.0.react.1shot.turbo.sample5.clean_nodes.json 5 | 6 | #python scripts/sent_tf_react_step_encoding.py --input_file data/trajectory/react/logiqav2-train-v1.0.react.1shot.turbo.sample5.clean_nodes.json \ 7 | # --model_path ../pretrained-models/bge-large-en-v1.5 \ 8 | # --output_file data/trajectory/react/logiqav2-train-v1.0.react.1shot.turbo.sample5.clean_nodes.emb.npy 9 | 10 | python scripts/react_step_union_find.py --input_file data/trajectory/react/logiqav2-train-v1.0.react.1shot.turbo.sample5.clean_nodes.json \ 11 | --embedding_path data/trajectory/react/logiqav2-train-v1.0.react.1shot.turbo.sample5.clean_nodes.emb.npy --threshold 0.95 \ 12 | --output_file data/trajectory/react/logiqav2-train-v1.0.react.1shot.turbo.sample5.clean_nodes.cluster.t0.95.TO.json 13 | 14 | python scripts/construct_dpo_data_via_step_value_v1.py \ 15 | --input_file data/trajectory/react/logiqav2-train-v1.0.react.1shot.turbo.sample5.clean_nodes.cluster.t0.95.TO.json \ 16 | --output_file data/trajectory/react/logiqav2-train-v1.0.react.1shot.turbo.sample5.clean_nodes.cluster.t0.95.TO.len2.in4.v0.1.json \ 17 | --save_full_data 18 | 19 | python scripts/split_train_dev.py --input_file data/trajectory/react/logiqav2-train-v1.0.react.1shot.turbo.sample5.clean_nodes.cluster.t0.95.TO.len2.in4.v0.1.json \ 20 | --dev_num 5000 -------------------------------------------------------------------------------- /conf/api/vllm/llama2-7b/reclor_tems/dev_react_0shot_v1_0_vllm.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - hydra: default 3 | - reader/reclor@read_tensor: react_service_0shot_v1_0 4 | - post_process: openai_react 5 | - api/vllm/vllm_params@sampling_params: sampling_param_greedy 6 | - _self_ 7 | 8 | hydra: 9 | searchpath: 10 | - file://conf/ 11 | 12 | train_file: ../research.data/reclor_data/train.json 13 | dev_file: ../research.data/reclor_data/val.json 14 | test_file: ${dev_file} 15 | 16 | save_best: False 17 | exp_name: 18 | exp_notes: 19 | output_dir: experiments/${exp_name} 20 | 21 | step: 22 | eval_sub_path: checkpoint-${step} 23 | 24 | sampling_params: 25 | max_tokens: 2048 26 | gpu_memory_utilization: 0.95 27 | 28 | read_tensor: 29 | template_id: 6 30 | service_based: False 31 | service_processor: 32 | 33 | output_file: ${output_dir}/${eval_sub_path}/reclor.react.dev.0shot.v1.0.json 34 | flush_file: ${output_file}l 35 | 36 | # Dataloader 37 | num_workers: 32 38 | prefetch_factor: 39 | 40 | # Training hyper-parameters 41 | per_gpu_train_batch_size: 1 42 | per_gpu_eval_batch_size: 1 43 | 44 | ddp_eval: False 45 | no_cuda: False 46 | seed: 42 47 | local_rank: -1 48 | 49 | # Temporary variables 50 | fp16: True 51 | fp16_bfloat16: True 52 | n_gpu: 1 53 | device: 54 | train_batch_size: 55 | eval_batch_size: 56 | world_size: 57 | 58 | # CUDA_VISIBLE_DEVICES=0 run15 python vllm_inference.py -cp conf/api/vllm/mistral/reclor/ -cn train_react_1shot_sample5_split_v1_0 read_tensor.split_size=4 read_tensor.split_id=0 59 | -------------------------------------------------------------------------------- /conf/api/gpt35turbo/logiqav2/dev_react_v1_0_1shot.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - hydra: default 3 | - reader/logiqav2@read_tensor: react_service_1shot_v1_0 4 | - post_process: openai_react 5 | # - api/vllm/vllm_params@sampling_params: sampling_param_sample 6 | - _self_ 7 | 8 | hydra: 9 | searchpath: 10 | - file://conf/ 11 | 12 | 13 | train_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/train.txt 14 | dev_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/dev.txt 15 | test_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/test.txt 16 | 17 | #num_shot: 5 18 | 19 | output_file: api-outputs/gpt35turbo1106/logiqav2.test.react.1shot.gpt35turbo1106.sample1.tem${model.temperature}.json 20 | flush_file: ${output_file}l 21 | 22 | model: 23 | _target_: data.openai_api_caller.GPTTurbo 24 | model: "gpt-3.5-turbo-1106" 25 | max_tokens: 2048 26 | # temperature: 1.0 27 | # temperature: 0.7 28 | temperature: 0.0 29 | api_time_interval: 1 30 | # top_p: 0.8 31 | # n: 1 32 | 33 | # Data loading 34 | read_tensor: 35 | max_data_num: 500 36 | service_based: False 37 | service_processor: 38 | api_based: True 39 | flush_file: ${flush_file} 40 | 41 | # Dataloader 42 | num_workers: 0 43 | prefetch_factor: 2 44 | 45 | output_dir: 46 | 47 | 48 | # Training hyper-parameters 49 | per_gpu_train_batch_size: 1 50 | per_gpu_eval_batch_size: 1 51 | 52 | ddp_eval: False 53 | no_cuda: False 54 | seed: 42 55 | local_rank: -1 56 | 57 | # Temporary variables 58 | n_gpu: 1 59 | device: 60 | train_batch_size: 61 | eval_batch_size: 62 | world_size: 63 | -------------------------------------------------------------------------------- /conf/api/vllm/math/gsm8k_gemma_test_0shot_tem_v1_0.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - hydra: default 3 | - post_process: gsm8k 4 | - api/vllm/vllm_params@sampling_params: sampling_param_greedy 5 | - _self_ 6 | 7 | hydra: 8 | searchpath: 9 | - file://conf/ 10 | 11 | train_file: 12 | dev_file: 13 | test_file: ../research.data/MetaMathQA/test/GSM8K_test.json 14 | 15 | save_best: False 16 | exp_name: 17 | exp_notes: 18 | output_dir: experiments/${exp_name} 19 | 20 | step: 800 21 | eval_sub_path: checkpoint-${step} 22 | 23 | read_tensor: 24 | _target_: data.logic_combine.ResponseAlignDataset 25 | aligner: 26 | _target_: data.math.gsm8k_gold_answer_extractor 27 | template: "### Instruction:\n{query}\n\n### Response: Let's think step by step." 28 | instruction: "Below is an instruction that describes a task. Write a response that appropriately completes the request." 29 | max_data_num: -1 30 | service_based: False 31 | api_based: False 32 | index_field: "index" 33 | 34 | sampling_params: 35 | stop: [ "", "\n\n\n\n", "### Instruction" ] 36 | 37 | output_file: ${output_dir}/${eval_sub_path}/gsm8k.test.v1.0.0shot.json 38 | flush_file: ${output_file}l 39 | 40 | # Dataloader 41 | num_workers: 48 42 | prefetch_factor: 2 43 | 44 | post_process: 45 | resume: False 46 | index_field: "index" 47 | label_field: "label" 48 | 49 | ddp_eval: False 50 | no_cuda: False 51 | seed: 42 52 | local_rank: -1 53 | 54 | # Temporary variables 55 | fp16: True 56 | fp16_bfloat16: True 57 | n_gpu: 1 58 | device: 59 | train_batch_size: 60 | eval_batch_size: 61 | world_size: 62 | -------------------------------------------------------------------------------- /conf/deepspeed/train_hybrid_engine_zero1_lr.yaml: -------------------------------------------------------------------------------- 1 | train_micro_batch_size_per_gpu: 2 | gradient_accumulation_steps: 3 | scheduler: 4 | type: WarmupLR 5 | params: 6 | warmup_max_lr: ${learning_rate} 7 | warmup_num_steps: 8 | warmup_type: linear 9 | optimizer: 10 | type: AdamW 11 | params: 12 | lr: 1e-4 13 | betas: [ 0.9, 0.999 ] 14 | eps: 1e-6 15 | weight_decay: 0.0 16 | bf16: 17 | enabled: True 18 | zero_optimization: 19 | stage: 1 20 | # offload_optimizer: 21 | # device: cpu 22 | # pin_memory: True 23 | # offload_param: 24 | # device: cpu 25 | # pin_memory: True 26 | # activation_checkpointing: 27 | # partition_activations: True 28 | # cpu_checkpointing: True 29 | # contiguous_memory_optimization: False 30 | # number_checkpoints: False 31 | # synchronize_checkpoint_boundary: False 32 | # profile: False 33 | # zero_quantized_nontrainable_weights: False # If `enable_mixed_precision_lora` is True, this should be True 34 | stage3_param_persistence_threshold: 1e5 # (1e4,1e6) 35 | stage3_max_live_parameters: 1e8 # (3e7, 1e9) 36 | stage3_prefetch_bucket_size: 1e8 # (3e7, 5e8) 37 | memory_efficient_linear: False 38 | steps_per_print: 25 39 | gradient_clipping: 1.0 40 | prescale_gradients: False 41 | #wall_clock_breakdown: False 42 | #hybrid_engine: 43 | # enabled: True 44 | # max_out_tokens: max_out_tokens 45 | # inference_tp_size: inference_tp_size 46 | # release_inference_cache: release_inference_cache 47 | # pin_parameters: pin_parameters 48 | # tp_gather_partition_size: tp_gather_partition_size 49 | -------------------------------------------------------------------------------- /conf/api/vllm/llama2-70b/ar_lsat/dev_react_1shot_v2_0.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - hydra: default 3 | - reader/logiqav2@read_tensor: react_service_1shot_v1_0 4 | - post_process: openai_react 5 | - _self_ 6 | 7 | hydra: 8 | searchpath: 9 | - file://conf/ 10 | 11 | train_file: ../research.data/AR-LSAT/data/AR_TrainingData.json 12 | dev_file: ../research.data/AR-LSAT/data/AR_DevelopmentData.json 13 | #test_file: ../research.data/AR-LSAT/data/AR_TestData.json 14 | test_file: ../research.data/AR-LSAT/data/AR_DevelopmentData.json 15 | 16 | port: 6000 17 | model: mixtral-ins 18 | 19 | output_file: ../pretrained-models/Mixtral-8x7B-Instruct-v0.1/ar-lsat.react.dev.1shot.v2.0.json 20 | flush_file: ${output_file}l 21 | 22 | # Data loading 23 | read_tensor: 24 | read_func: 25 | _target_: data.ar_lsat.ARLSATReader 26 | flat_options: True 27 | few_shot_prompt: 28 | _target_: data.logiqav2.read_single_file 29 | file_path: data/prompts/ar_lsat/react/train_200006_1-G_1_1.txt 30 | service_processor: 31 | _target_: data.vllm.VLLMRequestGenerator 32 | api_url: http://0.0.0.0:6000/v1/completions 33 | max_tokens: 8192 34 | stop: [ "", "\n\n\n\n", "Context:\n", "Thought 42:" ] 35 | 36 | # Dataloader 37 | num_workers: 32 38 | prefetch_factor: 2 39 | 40 | output_dir: 41 | 42 | 43 | # Training hyper-parameters 44 | per_gpu_train_batch_size: 1 45 | per_gpu_eval_batch_size: 1 46 | 47 | ddp_eval: False 48 | no_cuda: False 49 | seed: 42 50 | local_rank: -1 51 | 52 | # Temporary variables 53 | n_gpu: 1 54 | device: 55 | train_batch_size: 56 | eval_batch_size: 57 | world_size: 58 | -------------------------------------------------------------------------------- /conf/deepspeed/train_hybrid_engine_zero1_optim_offload_lr.yaml: -------------------------------------------------------------------------------- 1 | train_micro_batch_size_per_gpu: 2 | gradient_accumulation_steps: 3 | scheduler: 4 | type: WarmupLR 5 | params: 6 | warmup_max_lr: ${learning_rate} 7 | warmup_num_steps: 8 | warmup_type: linear 9 | optimizer: 10 | type: AdamW 11 | params: 12 | lr: 1e-4 13 | betas: [ 0.9, 0.999 ] 14 | eps: 1e-6 15 | weight_decay: 0.0 16 | bf16: 17 | enabled: True 18 | zero_optimization: 19 | stage: 1 20 | offload_optimizer: 21 | device: cpu 22 | pin_memory: True 23 | # offload_param: 24 | # device: cpu 25 | # pin_memory: True 26 | # activation_checkpointing: 27 | # partition_activations: True 28 | # cpu_checkpointing: True 29 | # contiguous_memory_optimization: False 30 | # number_checkpoints: False 31 | # synchronize_checkpoint_boundary: False 32 | # profile: False 33 | # zero_quantized_nontrainable_weights: False # If `enable_mixed_precision_lora` is True, this should be True 34 | stage3_param_persistence_threshold: 1e5 # (1e4,1e6) 35 | stage3_max_live_parameters: 1e8 # (3e7, 1e9) 36 | stage3_prefetch_bucket_size: 1e8 # (3e7, 5e8) 37 | memory_efficient_linear: False 38 | steps_per_print: 25 39 | gradient_clipping: 1.0 40 | prescale_gradients: False 41 | #wall_clock_breakdown: False 42 | #hybrid_engine: 43 | # enabled: True 44 | # max_out_tokens: max_out_tokens 45 | # inference_tp_size: inference_tp_size 46 | # release_inference_cache: release_inference_cache 47 | # pin_parameters: pin_parameters 48 | # tp_gather_partition_size: tp_gather_partition_size 49 | -------------------------------------------------------------------------------- /conf/api/vllm/llama2-7b/logiqav2_tems/react_train_0shot_sample_tem_v2_0.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - hydra: default 3 | - reader/logiqav2@read_tensor: react_service_0shot_v1_0 4 | - post_process: openai_react 5 | - api/vllm/vllm_params@sampling_params: sampling_param_sample 6 | - _self_ 7 | 8 | hydra: 9 | searchpath: 10 | - file://conf/ 11 | 12 | train_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/train.txt 13 | dev_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/dev.txt 14 | test_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/train.txt 15 | 16 | save_best: False 17 | exp_name: llama2.7b.chat.logiqav2.70b-distil.step.dpo.fix_hack.H100.w4.v1.0.th.s43 18 | exp_notes: 19 | output_dir: experiments/${exp_name} 20 | 21 | step: 800 22 | eval_sub_path: checkpoint-${step} 23 | 24 | sampling_params: 25 | max_tokens: 3072 26 | temperature: 2.0 27 | gpu_memory_utilization: 0.95 28 | 29 | read_tensor: 30 | split_size: -1 31 | split_id: 0 32 | template_id: "Context:\n{}\n\nQuestion:\n{}\n\nOptions:\n{}\n\nThought 1: " 33 | service_based: False 34 | service_processor: 35 | 36 | #swap_space: 8 37 | 38 | output_file: ${output_dir}/${eval_sub_path}/logiqav2-train.react.sample${sampling_params.n}.tem${sampling_params.temperature}.v1.0.0shot.json 39 | flush_file: ${output_file}l 40 | 41 | # Dataloader 42 | num_workers: 48 43 | prefetch_factor: 2 44 | 45 | ddp_eval: True 46 | no_cuda: False 47 | seed: 42 48 | local_rank: -1 49 | 50 | # Temporary variables 51 | fp16: True 52 | fp16_bfloat16: True 53 | n_gpu: 1 54 | device: 55 | train_batch_size: 56 | eval_batch_size: 57 | world_size: 58 | -------------------------------------------------------------------------------- /conf/api/vllm/math/math_gemma_test_0shot_tem_v1_0.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - hydra: default 3 | - post_process: math 4 | - api/vllm/vllm_params@sampling_params: sampling_param_greedy 5 | - _self_ 6 | 7 | hydra: 8 | searchpath: 9 | - file://conf/ 10 | 11 | train_file: 12 | dev_file: 13 | test_file: ../research.data/MetaMathQA/test/MATH_test.json 14 | 15 | save_best: False 16 | exp_name: 17 | exp_notes: 18 | output_dir: experiments/${exp_name} 19 | 20 | step: 800 21 | eval_sub_path: checkpoint-${step} 22 | 23 | read_tensor: 24 | _target_: data.logic_combine.ResponseAlignDataset 25 | aligner: 26 | _target_: data.math.math_gold_answer_extractor 27 | kv_mapping: 28 | instruction: query 29 | template: "### Instruction:\n{query}\n\n### Response: Let's think step by step." 30 | instruction: "Below is an instruction that describes a task. Write a response that appropriately completes the request." 31 | max_data_num: -1 32 | service_based: False 33 | api_based: False 34 | index_field: "idx" 35 | 36 | sampling_params: 37 | stop: [ "", "\n\n\n\n", "### Instruction" ] 38 | 39 | output_file: ${output_dir}/${eval_sub_path}/math.test.v1.0.0shot.json 40 | flush_file: ${output_file}l 41 | 42 | # Dataloader 43 | num_workers: 48 44 | prefetch_factor: 2 45 | 46 | post_process: 47 | resume: False 48 | index_field: "idx" 49 | label_field: "label" 50 | 51 | ddp_eval: False 52 | no_cuda: False 53 | seed: 42 54 | local_rank: -1 55 | 56 | # Temporary variables 57 | fp16: True 58 | fp16_bfloat16: True 59 | n_gpu: 1 60 | device: 61 | train_batch_size: 62 | eval_batch_size: 63 | world_size: 64 | -------------------------------------------------------------------------------- /conf/deepspeed/train_hybrid_engine_zero2_lr.yaml: -------------------------------------------------------------------------------- 1 | train_micro_batch_size_per_gpu: 2 | gradient_accumulation_steps: 3 | scheduler: 4 | type: WarmupLR 5 | params: 6 | # total_num_steps: 7 | warmup_max_lr: ${learning_rate} 8 | warmup_num_steps: 9 | warmup_type: linear 10 | optimizer: 11 | type: AdamW 12 | params: 13 | lr: 1e-4 14 | betas: [ 0.9, 0.999 ] 15 | eps: 1e-6 16 | weight_decay: 0.0 17 | bf16: 18 | enabled: True 19 | zero_optimization: 20 | stage: 2 21 | # offload_optimizer: 22 | # device: cpu 23 | # pin_memory: True 24 | # offload_param: 25 | # device: cpu 26 | # pin_memory: True 27 | # activation_checkpointing: 28 | # partition_activations: True 29 | # cpu_checkpointing: True 30 | # contiguous_memory_optimization: False 31 | # number_checkpoints: False 32 | # synchronize_checkpoint_boundary: False 33 | # profile: False 34 | # zero_quantized_nontrainable_weights: False # If `enable_mixed_precision_lora` is True, this should be True 35 | stage3_param_persistence_threshold: 1e5 # (1e4,1e6) 36 | stage3_max_live_parameters: 1e8 # (3e7, 1e9) 37 | stage3_prefetch_bucket_size: 1e8 # (3e7, 5e8) 38 | memory_efficient_linear: False 39 | steps_per_print: 25 40 | gradient_clipping: 1.0 41 | prescale_gradients: False 42 | #wall_clock_breakdown: False 43 | #hybrid_engine: 44 | # enabled: True 45 | # max_out_tokens: max_out_tokens 46 | # inference_tp_size: inference_tp_size 47 | # release_inference_cache: release_inference_cache 48 | # pin_parameters: pin_parameters 49 | # tp_gather_partition_size: tp_gather_partition_size 50 | -------------------------------------------------------------------------------- /data/prompts/logiqav2/logic_form/human/dev_218_0_sim.md: -------------------------------------------------------------------------------- 1 | [Context]: 2 | Jupiter is a gas giant planet and the largest planet in the solar system. Its mass is 2.5 times the total mass of the other seven planets in the solar system. Observations have found that most of the more than 70 moons surrounding Jupiter are composed of water ice. Therefore, Jupiter's atmosphere should contain a considerable amount of water. 3 | 4 | [Question]: 5 | Which of the followings, if true, can best support the above statement? 6 | 7 | [Options]: 8 | A. After hundreds of millions of years, the satellite may slowly fall onto the planet. 9 | B. Many of the water in interstellar space exists in gaseous form. 10 | C. Uranus is also a gas giant planet, and it has been confirmed that it contains a lot of water ice. 11 | D. The satellite and the planets around it were formed from the same gas and dust at the same time. 12 | 13 | Here are the logic forms for context, question and options: 14 | 15 | [Context] 16 | 1. isGasGiant(Jupiter) AND isLargestInSolarSystem(Jupiter) 17 | 2. mass(Jupiter) = 2.5 * sumOfMass(otherSevenPlanetsInSolarSystem) 18 | 3. composedOfWaterIce(surroundingMoons(Jupiter)) > 70 19 | 4. containsConsiderableWater(atmosphere(Jupiter)) 20 | 21 | [Question] 22 | Which of the followings, if true, can best support the statement Context-4? 23 | 24 | [Options] 25 | A. fallOntoPlanet(satellite, planet) AND afterHundredsOfMillionsOfYears() 26 | B. existsInGaseousForm(water, interstellarSpace) 27 | C. isGasGiant(Uranus) AND containsLotsOfWaterIce(Uranus) 28 | D. formedFromSameGasAndDust(satellite, planet) AND atSameTime(satellite, planet) -------------------------------------------------------------------------------- /conf/deepspeed/train_hybrid_engine_zero1.yaml: -------------------------------------------------------------------------------- 1 | train_micro_batch_size_per_gpu: 2 | gradient_accumulation_steps: 3 | scheduler: 4 | type: WarmupDecayLR 5 | params: 6 | total_num_steps: 7 | warmup_max_lr: ${learning_rate} 8 | warmup_num_steps: 9 | warmup_type: linear 10 | optimizer: 11 | type: AdamW 12 | params: 13 | lr: 1e-4 14 | betas: [ 0.9, 0.999 ] 15 | eps: 1e-6 16 | weight_decay: 0.0 17 | bf16: 18 | enabled: True 19 | zero_optimization: 20 | stage: 1 21 | # offload_optimizer: 22 | # device: cpu 23 | # pin_memory: True 24 | # offload_param: 25 | # device: cpu 26 | # pin_memory: True 27 | # activation_checkpointing: 28 | # partition_activations: True 29 | # cpu_checkpointing: True 30 | # contiguous_memory_optimization: False 31 | # number_checkpoints: False 32 | # synchronize_checkpoint_boundary: False 33 | # profile: False 34 | # zero_quantized_nontrainable_weights: False # If `enable_mixed_precision_lora` is True, this should be True 35 | stage3_param_persistence_threshold: 1e5 # (1e4,1e6) 36 | stage3_max_live_parameters: 1e8 # (3e7, 1e9) 37 | stage3_prefetch_bucket_size: 1e8 # (3e7, 5e8) 38 | memory_efficient_linear: False 39 | steps_per_print: 25 40 | gradient_clipping: 1.0 41 | prescale_gradients: False 42 | #wall_clock_breakdown: False 43 | #hybrid_engine: 44 | # enabled: True 45 | # max_out_tokens: max_out_tokens 46 | # inference_tp_size: inference_tp_size 47 | # release_inference_cache: release_inference_cache 48 | # pin_parameters: pin_parameters 49 | # tp_gather_partition_size: tp_gather_partition_size 50 | -------------------------------------------------------------------------------- /conf/deepspeed/train_hybrid_engine_zero2.yaml: -------------------------------------------------------------------------------- 1 | train_micro_batch_size_per_gpu: 2 | gradient_accumulation_steps: 3 | scheduler: 4 | type: WarmupDecayLR 5 | params: 6 | total_num_steps: 7 | warmup_max_lr: ${learning_rate} 8 | warmup_num_steps: 9 | warmup_type: linear 10 | optimizer: 11 | type: AdamW 12 | params: 13 | lr: 1e-4 14 | betas: [ 0.9, 0.999 ] 15 | eps: 1e-6 16 | weight_decay: 0.0 17 | bf16: 18 | enabled: True 19 | zero_optimization: 20 | stage: 2 21 | # offload_optimizer: 22 | # device: cpu 23 | # pin_memory: True 24 | # offload_param: 25 | # device: cpu 26 | # pin_memory: True 27 | # activation_checkpointing: 28 | # partition_activations: True 29 | # cpu_checkpointing: True 30 | # contiguous_memory_optimization: False 31 | # number_checkpoints: False 32 | # synchronize_checkpoint_boundary: False 33 | # profile: False 34 | # zero_quantized_nontrainable_weights: False # If `enable_mixed_precision_lora` is True, this should be True 35 | stage3_param_persistence_threshold: 1e5 # (1e4,1e6) 36 | stage3_max_live_parameters: 1e8 # (3e7, 1e9) 37 | stage3_prefetch_bucket_size: 1e8 # (3e7, 5e8) 38 | memory_efficient_linear: False 39 | steps_per_print: 25 40 | gradient_clipping: 1.0 41 | prescale_gradients: False 42 | #wall_clock_breakdown: False 43 | #hybrid_engine: 44 | # enabled: True 45 | # max_out_tokens: max_out_tokens 46 | # inference_tp_size: inference_tp_size 47 | # release_inference_cache: release_inference_cache 48 | # pin_parameters: pin_parameters 49 | # tp_gather_partition_size: tp_gather_partition_size 50 | -------------------------------------------------------------------------------- /conf/deepspeed/train_hybrid_engine_zero3.yaml: -------------------------------------------------------------------------------- 1 | train_micro_batch_size_per_gpu: 2 | gradient_accumulation_steps: 3 | scheduler: 4 | type: WarmupDecayLR 5 | params: 6 | total_num_steps: 7 | warmup_max_lr: ${learning_rate} 8 | warmup_num_steps: 9 | warmup_type: linear 10 | optimizer: 11 | type: AdamW 12 | params: 13 | lr: 1e-4 14 | betas: [ 0.9, 0.999 ] 15 | eps: 1e-6 16 | weight_decay: 0.0 17 | bf16: 18 | enabled: True 19 | zero_optimization: 20 | stage: 3 21 | # offload_optimizer: 22 | # device: cpu 23 | # pin_memory: True 24 | # offload_param: 25 | # device: cpu 26 | # pin_memory: True 27 | # activation_checkpointing: 28 | # partition_activations: True 29 | # cpu_checkpointing: True 30 | # contiguous_memory_optimization: False 31 | # number_checkpoints: False 32 | # synchronize_checkpoint_boundary: False 33 | # profile: False 34 | # zero_quantized_nontrainable_weights: False # If `enable_mixed_precision_lora` is True, this should be True 35 | stage3_param_persistence_threshold: 1e5 # (1e4,1e6) 36 | stage3_max_live_parameters: 1e8 # (3e7, 1e9) 37 | stage3_prefetch_bucket_size: 1e8 # (3e7, 5e8) 38 | memory_efficient_linear: False 39 | steps_per_print: 25 40 | gradient_clipping: 1.0 41 | prescale_gradients: False 42 | #wall_clock_breakdown: False 43 | #hybrid_engine: 44 | # enabled: True 45 | # max_out_tokens: max_out_tokens 46 | # inference_tp_size: inference_tp_size 47 | # release_inference_cache: release_inference_cache 48 | # pin_parameters: pin_parameters 49 | # tp_gather_partition_size: tp_gather_partition_size 50 | -------------------------------------------------------------------------------- /conf/api/vllm/llama2-7b/reclor_tems/test_react_0shot_v1_1_vllm.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - hydra: default 3 | - reader/reclor@read_tensor: react_service_0shot_v1_0 4 | - post_process: openai_react 5 | - api/vllm/vllm_params@sampling_params: sampling_param_greedy 6 | - _self_ 7 | 8 | hydra: 9 | searchpath: 10 | - file://conf/ 11 | 12 | train_file: ../research.data/reclor_data/train.json 13 | dev_file: ../research.data/reclor_data/val.json 14 | test_file: ../research.data/reclor_data/test.json 15 | 16 | save_best: False 17 | exp_name: 18 | exp_notes: 19 | output_dir: experiments/${exp_name} 20 | 21 | step: 22 | eval_sub_path: checkpoint-${step} 23 | 24 | sampling_params: 25 | max_tokens: 2048 26 | gpu_memory_utilization: 0.95 27 | 28 | read_tensor: 29 | template_id: "Context:\n{}\n\nQuestion:\n{}\n\nOptions:\n{}\n\nThought 1: " # In version v2.1, we change the template to: ```xxx\n\nThought 1: ``` 30 | 31 | output_file: ${output_dir}/${eval_sub_path}/reclor.react.test.0shot.v1.1.json 32 | flush_file: ${output_file}l 33 | 34 | # Dataloader 35 | num_workers: 32 36 | prefetch_factor: 37 | 38 | # Training hyper-parameters 39 | per_gpu_train_batch_size: 1 40 | per_gpu_eval_batch_size: 1 41 | 42 | ddp_eval: False 43 | no_cuda: False 44 | seed: 42 45 | local_rank: -1 46 | 47 | # Temporary variables 48 | fp16: True 49 | fp16_bfloat16: True 50 | n_gpu: 1 51 | device: 52 | train_batch_size: 53 | eval_batch_size: 54 | world_size: 55 | 56 | # CUDA_VISIBLE_DEVICES=0 run15 python vllm_inference.py -cp conf/api/vllm/mistral/reclor/ -cn train_react_1shot_sample5_split_v1_0 read_tensor.split_size=4 read_tensor.split_id=0 57 | -------------------------------------------------------------------------------- /conf/deepspeed/train_hybrid_engine_zero1_optim_offload.yaml: -------------------------------------------------------------------------------- 1 | train_micro_batch_size_per_gpu: 2 | gradient_accumulation_steps: 3 | scheduler: 4 | type: WarmupDecayLR 5 | params: 6 | total_num_steps: 7 | warmup_max_lr: ${learning_rate} 8 | warmup_num_steps: 9 | warmup_type: linear 10 | optimizer: 11 | type: AdamW 12 | params: 13 | lr: 1e-4 14 | betas: [ 0.9, 0.999 ] 15 | eps: 1e-6 16 | weight_decay: 0.0 17 | bf16: 18 | enabled: True 19 | zero_optimization: 20 | stage: 1 21 | offload_optimizer: 22 | device: cpu 23 | pin_memory: True 24 | # offload_param: 25 | # device: cpu 26 | # pin_memory: True 27 | # activation_checkpointing: 28 | # partition_activations: True 29 | # cpu_checkpointing: True 30 | # contiguous_memory_optimization: False 31 | # number_checkpoints: False 32 | # synchronize_checkpoint_boundary: False 33 | # profile: False 34 | # zero_quantized_nontrainable_weights: False # If `enable_mixed_precision_lora` is True, this should be True 35 | stage3_param_persistence_threshold: 1e5 # (1e4,1e6) 36 | stage3_max_live_parameters: 1e8 # (3e7, 1e9) 37 | stage3_prefetch_bucket_size: 1e8 # (3e7, 5e8) 38 | memory_efficient_linear: False 39 | steps_per_print: 25 40 | gradient_clipping: 1.0 41 | prescale_gradients: False 42 | #wall_clock_breakdown: False 43 | #hybrid_engine: 44 | # enabled: True 45 | # max_out_tokens: max_out_tokens 46 | # inference_tp_size: inference_tp_size 47 | # release_inference_cache: release_inference_cache 48 | # pin_parameters: pin_parameters 49 | # tp_gather_partition_size: tp_gather_partition_size 50 | -------------------------------------------------------------------------------- /conf/deepspeed/train_hybrid_engine_zero2_optim_offload.yaml: -------------------------------------------------------------------------------- 1 | train_micro_batch_size_per_gpu: 2 | gradient_accumulation_steps: 3 | scheduler: 4 | type: WarmupDecayLR 5 | params: 6 | total_num_steps: 7 | warmup_max_lr: ${learning_rate} 8 | warmup_num_steps: 9 | warmup_type: linear 10 | optimizer: 11 | type: AdamW 12 | params: 13 | lr: 1e-4 14 | betas: [ 0.9, 0.999 ] 15 | eps: 1e-6 16 | weight_decay: 0.0 17 | bf16: 18 | enabled: True 19 | zero_optimization: 20 | stage: 2 21 | offload_optimizer: 22 | device: cpu 23 | pin_memory: True 24 | # offload_param: 25 | # device: cpu 26 | # pin_memory: True 27 | # activation_checkpointing: 28 | # partition_activations: True 29 | # cpu_checkpointing: True 30 | # contiguous_memory_optimization: False 31 | # number_checkpoints: False 32 | # synchronize_checkpoint_boundary: False 33 | # profile: False 34 | # zero_quantized_nontrainable_weights: False # If `enable_mixed_precision_lora` is True, this should be True 35 | stage3_param_persistence_threshold: 1e5 # (1e4,1e6) 36 | stage3_max_live_parameters: 1e8 # (3e7, 1e9) 37 | stage3_prefetch_bucket_size: 1e8 # (3e7, 5e8) 38 | memory_efficient_linear: False 39 | steps_per_print: 25 40 | gradient_clipping: 1.0 41 | prescale_gradients: False 42 | #wall_clock_breakdown: False 43 | #hybrid_engine: 44 | # enabled: True 45 | # max_out_tokens: max_out_tokens 46 | # inference_tp_size: inference_tp_size 47 | # release_inference_cache: release_inference_cache 48 | # pin_parameters: pin_parameters 49 | # tp_gather_partition_size: tp_gather_partition_size 50 | -------------------------------------------------------------------------------- /conf/api/gpt4/logiqav2/dev_react_v1_0_1shot.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - hydra: default 3 | - reader/logiqav2@read_tensor: react_service_1shot_v1_0 4 | - post_process: openai_react 5 | # - api/vllm/vllm_params@sampling_params: sampling_param_sample 6 | - _self_ 7 | 8 | hydra: 9 | searchpath: 10 | - file://conf/ 11 | 12 | 13 | train_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/train.txt 14 | dev_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/dev.txt 15 | test_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/test.txt 16 | 17 | #num_shot: 5 18 | 19 | output_file: api-outputs/gpt-4-1106-preview/logiqav2.dev.react.1shot.gpt4-1106-preview.sample1.tem${model.temperature}.first${read_tensor.max_data_num}.json 20 | flush_file: ${output_file}l 21 | 22 | model: 23 | _target_: data.openai_api_caller.GPTTurbo 24 | # model: "gpt-4-1106-preview" 25 | model: "gpt-4-0125-preview" 26 | max_tokens: 2048 27 | # temperature: 1.0 28 | # temperature: 0.7 29 | temperature: 0.0 30 | api_time_interval: 1 31 | # top_p: 0.8 32 | # n: 1 33 | 34 | # Data loading 35 | read_tensor: 36 | # max_data_num: 500 37 | max_data_num: 250 38 | service_based: False 39 | service_processor: 40 | api_based: True 41 | flush_file: ${flush_file} 42 | 43 | # Dataloader 44 | num_workers: 0 45 | prefetch_factor: 2 46 | 47 | output_dir: 48 | 49 | 50 | # Training hyper-parameters 51 | per_gpu_train_batch_size: 1 52 | per_gpu_eval_batch_size: 1 53 | 54 | ddp_eval: False 55 | no_cuda: False 56 | seed: 42 57 | local_rank: -1 58 | 59 | # Temporary variables 60 | n_gpu: 1 61 | device: 62 | train_batch_size: 63 | eval_batch_size: 64 | world_size: 65 | -------------------------------------------------------------------------------- /conf/api/vllm/llama2-7b/reclor_tems/dev_react_0shot_v1_1_vllm.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - hydra: default 3 | - reader/reclor@read_tensor: react_service_0shot_v1_0 4 | - post_process: openai_react 5 | - api/vllm/vllm_params@sampling_params: sampling_param_greedy 6 | - _self_ 7 | 8 | hydra: 9 | searchpath: 10 | - file://conf/ 11 | 12 | train_file: ../research.data/reclor_data/train.json 13 | dev_file: ../research.data/reclor_data/val.json 14 | test_file: ${dev_file} 15 | 16 | save_best: False 17 | exp_name: 18 | exp_notes: 19 | output_dir: experiments/${exp_name} 20 | 21 | step: 22 | eval_sub_path: checkpoint-${step} 23 | 24 | sampling_params: 25 | max_tokens: 2048 26 | gpu_memory_utilization: 0.95 27 | 28 | read_tensor: 29 | template_id: "Context:\n{}\n\nQuestion:\n{}\n\nOptions:\n{}\n\nThought 1: " # In version v2.1, we change the template to: ```xxx\n\nThought 1: ``` 30 | service_based: False 31 | service_processor: 32 | 33 | output_file: ${output_dir}/${eval_sub_path}/reclor.react.dev.0shot.v1.1.json 34 | flush_file: ${output_file}l 35 | 36 | # Dataloader 37 | num_workers: 32 38 | prefetch_factor: 39 | 40 | # Training hyper-parameters 41 | per_gpu_train_batch_size: 1 42 | per_gpu_eval_batch_size: 1 43 | 44 | ddp_eval: False 45 | no_cuda: False 46 | seed: 42 47 | local_rank: -1 48 | 49 | # Temporary variables 50 | fp16: True 51 | fp16_bfloat16: True 52 | n_gpu: 1 53 | device: 54 | train_batch_size: 55 | eval_batch_size: 56 | world_size: 57 | 58 | # CUDA_VISIBLE_DEVICES=0 run15 python vllm_inference.py -cp conf/api/vllm/mistral/reclor/ -cn train_react_1shot_sample5_split_v1_0 read_tensor.split_size=4 read_tensor.split_id=0 59 | -------------------------------------------------------------------------------- /conf/deepspeed/train_hybrid_engine_zero1_optim_offload_cosine.yaml: -------------------------------------------------------------------------------- 1 | train_micro_batch_size_per_gpu: 2 | gradient_accumulation_steps: 3 | scheduler: 4 | type: WarmupCosineLR # requires deepspeed >= 0.12.3 5 | params: 6 | total_num_steps: 7 | # warmup_max_lr: ${learning_rate} 8 | warmup_num_steps: 9 | warmup_type: linear 10 | optimizer: 11 | type: AdamW 12 | params: 13 | lr: 1e-4 14 | betas: [ 0.9, 0.999 ] 15 | eps: 1e-6 16 | weight_decay: 0.0 17 | bf16: 18 | enabled: True 19 | zero_optimization: 20 | stage: 1 21 | offload_optimizer: 22 | device: cpu 23 | pin_memory: True 24 | # offload_param: 25 | # device: cpu 26 | # pin_memory: True 27 | # activation_checkpointing: 28 | # partition_activations: True 29 | # cpu_checkpointing: True 30 | # contiguous_memory_optimization: False 31 | # number_checkpoints: False 32 | # synchronize_checkpoint_boundary: False 33 | # profile: False 34 | # zero_quantized_nontrainable_weights: False # If `enable_mixed_precision_lora` is True, this should be True 35 | stage3_param_persistence_threshold: 1e5 # (1e4,1e6) 36 | stage3_max_live_parameters: 1e8 # (3e7, 1e9) 37 | stage3_prefetch_bucket_size: 1e8 # (3e7, 5e8) 38 | memory_efficient_linear: False 39 | steps_per_print: 25 40 | gradient_clipping: 1.0 41 | prescale_gradients: False 42 | #wall_clock_breakdown: False 43 | #hybrid_engine: 44 | # enabled: True 45 | # max_out_tokens: max_out_tokens 46 | # inference_tp_size: inference_tp_size 47 | # release_inference_cache: release_inference_cache 48 | # pin_parameters: pin_parameters 49 | # tp_gather_partition_size: tp_gather_partition_size 50 | -------------------------------------------------------------------------------- /conf/api/vllm/llama2-7b/ar_lsat_tems/dev_react_v1_0.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - hydra: default 3 | - reader/logiqav2@read_tensor: react_service_0shot_v1_0 4 | - post_process: openai_react 5 | - api/vllm/vllm_params@sampling_params: sampling_param_greedy 6 | - _self_ 7 | 8 | hydra: 9 | searchpath: 10 | - file://conf/ 11 | 12 | train_file: ../research.data/AR-LSAT/data/AR_TrainingData.json 13 | dev_file: ../research.data/AR-LSAT/data/AR_DevelopmentData.json 14 | test_file: ${dev_file} 15 | 16 | save_best: False 17 | exp_name: 18 | exp_notes: 19 | output_dir: experiments/${exp_name} 20 | 21 | step: 800 22 | eval_sub_path: checkpoint-${step} 23 | 24 | # Data loading 25 | read_tensor: 26 | read_func: 27 | _target_: data.ar_lsat.ARLSATReader 28 | flat_options: True 29 | option_order: "ABCDE" 30 | template_id: "Context:\n{}\n\nQuestion:\n{}\n\nOptions:\n{}\n\nThought 1: " 31 | service_based: False 32 | service_processor: 33 | 34 | sampling_params: 35 | max_tokens: 3072 36 | 37 | output_file: ${output_dir}/${eval_sub_path}/ar-lsat.dev.react.v1.0.0shot.json 38 | flush_file: ${output_file}l 39 | 40 | post_process: 41 | answer_clean: 42 | _target_: post_processors.openai_api_callback.ReActSeparatorClean 43 | regrex: "A|B|C|D|E" 44 | 45 | # Dataloader 46 | num_workers: 32 47 | prefetch_factor: 2 48 | 49 | 50 | # Training hyper-parameters 51 | per_gpu_train_batch_size: 1 52 | per_gpu_eval_batch_size: 1 53 | 54 | ddp_eval: False 55 | no_cuda: False 56 | seed: 42 57 | local_rank: -1 58 | 59 | # Temporary variables 60 | fp16: True 61 | fp16_bfloat16: True 62 | n_gpu: 1 63 | device: 64 | train_batch_size: 65 | eval_batch_size: 66 | world_size: 67 | -------------------------------------------------------------------------------- /scripts/cot/cot_step_accumulate.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import json 3 | from multiprocessing import Pool 4 | from functools import partial 5 | 6 | from tqdm import tqdm 7 | 8 | """ 9 | The output should come from `cot_clean.py`. 10 | """ 11 | 12 | 13 | def acc_func(item, response_field="response"): 14 | responses = item[response_field] 15 | preds = item["pred"] 16 | item_id = item["id"] 17 | 18 | acc_steps = [] 19 | for resp_id, (resp, pred) in enumerate(zip(responses, preds)): 20 | acc = "" 21 | # for i, step in enumerate(resp): 22 | for i, step in enumerate(resp[:-2]): 23 | if "### The answer is" in step: 24 | break 25 | acc_resp = acc + step 26 | acc_id = f"{item_id}_{resp_id}_{i}" 27 | acc_steps.append({"id": acc_id, "response": acc_resp}) 28 | acc += step 29 | 30 | item["accumulated_response"] = acc_steps 31 | return item 32 | 33 | 34 | def main(): 35 | parser = argparse.ArgumentParser() 36 | parser.add_argument("--input_file", type=str, required=True) 37 | parser.add_argument("--response_field", type=str, default="response") 38 | parser.add_argument("--num_workers", type=int, default=16) 39 | args = parser.parse_args() 40 | 41 | data = json.load(open(args.input_file)) 42 | 43 | annotate = partial(acc_func, response_field=args.response_field) 44 | with Pool(args.num_workers) as p: 45 | data = list(tqdm(p.imap(annotate, data), total=len(data))) 46 | 47 | save_path = args.input_file.replace(".json", "_accumulated.json") 48 | json.dump(data, open(save_path, "w")) 49 | 50 | 51 | if __name__ == '__main__': 52 | main() 53 | -------------------------------------------------------------------------------- /conf/api/vllm/llama2-7b/reclor_tems/dev_react_0shot_v1_1_vllm_sc.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - hydra: default 3 | - reader/reclor@read_tensor: react_service_0shot_v1_0 4 | - post_process: openai_react 5 | - api/vllm/vllm_params@sampling_params: sampling_param_sample 6 | - _self_ 7 | 8 | hydra: 9 | searchpath: 10 | - file://conf/ 11 | 12 | train_file: ../research.data/reclor_data/train.json 13 | dev_file: ../research.data/reclor_data/val.json 14 | test_file: ${dev_file} 15 | 16 | save_best: False 17 | exp_name: 18 | exp_notes: 19 | output_dir: experiments/${exp_name} 20 | 21 | step: 22 | eval_sub_path: checkpoint-${step} 23 | 24 | sampling_params: 25 | max_tokens: 2048 26 | gpu_memory_utilization: 0.95 27 | 28 | read_tensor: 29 | template_id: "Context:\n{}\n\nQuestion:\n{}\n\nOptions:\n{}\n\nThought 1: " # In version v2.1, we change the template to: ```xxx\n\nThought 1: ``` 30 | service_based: False 31 | service_processor: 32 | 33 | output_file: ${output_dir}/${eval_sub_path}/reclor.react.dev.n${sampling_params.n}.tem${sampling_params.temperature}.0shot.v1.1.json 34 | flush_file: ${output_file}l 35 | 36 | # Dataloader 37 | num_workers: 32 38 | prefetch_factor: 39 | 40 | # Training hyper-parameters 41 | per_gpu_train_batch_size: 1 42 | per_gpu_eval_batch_size: 1 43 | 44 | ddp_eval: False 45 | no_cuda: False 46 | seed: 42 47 | local_rank: -1 48 | 49 | # Temporary variables 50 | fp16: True 51 | fp16_bfloat16: True 52 | n_gpu: 1 53 | device: 54 | train_batch_size: 55 | eval_batch_size: 56 | world_size: 57 | 58 | # CUDA_VISIBLE_DEVICES=0 run15 python vllm_inference.py -cp conf/api/vllm/mistral/reclor/ -cn train_react_1shot_sample5_split_v1_0 read_tensor.split_size=4 read_tensor.split_id=0 59 | -------------------------------------------------------------------------------- /conf/api/vllm/llama2-7b/ar_lsat_tems/test_react_v1_0.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - hydra: default 3 | - reader/logiqav2@read_tensor: react_service_0shot_v1_0 4 | - post_process: openai_react 5 | - api/vllm/vllm_params@sampling_params: sampling_param_greedy 6 | - _self_ 7 | 8 | hydra: 9 | searchpath: 10 | - file://conf/ 11 | 12 | train_file: ../research.data/AR-LSAT/data/AR_TrainingData.json 13 | dev_file: ../research.data/AR-LSAT/data/AR_DevelopmentData.json 14 | test_file: ../research.data/AR-LSAT/data/AR_TestData.json 15 | 16 | save_best: False 17 | exp_name: 18 | exp_notes: 19 | output_dir: experiments/${exp_name} 20 | 21 | step: 800 22 | eval_sub_path: checkpoint-${step} 23 | 24 | # Data loading 25 | read_tensor: 26 | read_func: 27 | _target_: data.ar_lsat.ARLSATReader 28 | flat_options: True 29 | option_order: "ABCDE" 30 | template_id: "Context:\n{}\n\nQuestion:\n{}\n\nOptions:\n{}\n\nThought 1: " 31 | service_based: False 32 | service_processor: 33 | 34 | sampling_params: 35 | max_tokens: 3072 36 | 37 | output_file: ${output_dir}/${eval_sub_path}/ar-lsat.test.react.v1.0.0shot.json 38 | flush_file: ${output_file}l 39 | 40 | post_process: 41 | answer_clean: 42 | _target_: post_processors.openai_api_callback.ReActSeparatorClean 43 | regrex: "A|B|C|D|E" 44 | 45 | # Dataloader 46 | num_workers: 32 47 | prefetch_factor: 2 48 | 49 | 50 | # Training hyper-parameters 51 | per_gpu_train_batch_size: 1 52 | per_gpu_eval_batch_size: 1 53 | 54 | ddp_eval: False 55 | no_cuda: False 56 | seed: 42 57 | local_rank: -1 58 | 59 | # Temporary variables 60 | fp16: True 61 | fp16_bfloat16: True 62 | n_gpu: 1 63 | device: 64 | train_batch_size: 65 | eval_batch_size: 66 | world_size: 67 | -------------------------------------------------------------------------------- /models/string_rule_reward.py: -------------------------------------------------------------------------------- 1 | import re 2 | 3 | import torch 4 | from torch import nn 5 | from transformers import PreTrainedTokenizer, PreTrainedModel 6 | from typing import List, Dict 7 | 8 | from models.reward_model_mixin import RewardModelMixin, RewardModelOutputs 9 | 10 | 11 | class MultipleChoiceAccuracyReward(nn.Module, RewardModelMixin): 12 | def __init__(self, base_model: PreTrainedModel, tokenizer: PreTrainedTokenizer): 13 | super().__init__() 14 | self.tokenizer = tokenizer 15 | self.option2int = {"A": 0, "B": 1, "C": 2, "D": 3, "E": 4} 16 | 17 | def forward(self, *args, **kwargs): 18 | pass 19 | 20 | def forward_value(self, seq: torch.LongTensor, attention_mask: torch.LongTensor, prompt_length: int, labels: List[int], *args, **kwargs) -> Dict: 21 | if prompt_length > 0: 22 | seq = seq[:, prompt_length:] 23 | decoded_outputs = self.tokenizer.batch_decode(seq, skip_special_tokens=True) 24 | 25 | regrex = "A|B|C|D|E" 26 | preds = [re.findall(regrex, text) for text in decoded_outputs] 27 | 28 | rewards = [] 29 | for pred, label in zip(preds, labels): 30 | if len(pred) == 0: 31 | rewards.append(-1) 32 | else: 33 | # rewards.append(int(self.option2int[pred[-1]] == label)) 34 | if self.option2int[pred[-1]] == label: 35 | rewards.append(1) 36 | else: 37 | rewards.append(-1) 38 | 39 | rewards = torch.tensor(rewards, dtype=torch.bfloat16, device=seq.device) 40 | return { 41 | "values": rewards, 42 | "chosen_end_scores": rewards, 43 | } 44 | -------------------------------------------------------------------------------- /conf/api/vllm/llama2-7b/reclor_tems/test_react_0shot_v1_1_vllm_sc.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - hydra: default 3 | - reader/reclor@read_tensor: react_service_0shot_v1_0 4 | - post_process: openai_react 5 | - api/vllm/vllm_params@sampling_params: sampling_param_sample 6 | - _self_ 7 | 8 | hydra: 9 | searchpath: 10 | - file://conf/ 11 | 12 | train_file: ../research.data/reclor_data/train.json 13 | dev_file: ../research.data/reclor_data/val.json 14 | test_file: ../research.data/reclor_data/test.json 15 | 16 | save_best: False 17 | exp_name: 18 | exp_notes: 19 | output_dir: experiments/${exp_name} 20 | 21 | step: 22 | eval_sub_path: checkpoint-${step} 23 | 24 | sampling_params: 25 | max_tokens: 2048 26 | gpu_memory_utilization: 0.95 27 | 28 | read_tensor: 29 | template_id: "Context:\n{}\n\nQuestion:\n{}\n\nOptions:\n{}\n\nThought 1: " # In version v2.1, we change the template to: ```xxx\n\nThought 1: ``` 30 | service_based: False 31 | service_processor: 32 | 33 | output_file: ${output_dir}/${eval_sub_path}/reclor.react.test.n${sampling_params.n}.tem${sampling_params.temperature}.0shot.v1.1.json 34 | flush_file: ${output_file}l 35 | 36 | # Dataloader 37 | num_workers: 32 38 | prefetch_factor: 39 | 40 | # Training hyper-parameters 41 | per_gpu_train_batch_size: 1 42 | per_gpu_eval_batch_size: 1 43 | 44 | ddp_eval: False 45 | no_cuda: False 46 | seed: 42 47 | local_rank: -1 48 | 49 | # Temporary variables 50 | fp16: True 51 | fp16_bfloat16: True 52 | n_gpu: 1 53 | device: 54 | train_batch_size: 55 | eval_batch_size: 56 | world_size: 57 | 58 | # CUDA_VISIBLE_DEVICES=0 run15 python vllm_inference.py -cp conf/api/vllm/mistral/reclor/ -cn train_react_1shot_sample5_split_v1_0 read_tensor.split_size=4 read_tensor.split_id=0 59 | -------------------------------------------------------------------------------- /data/ar_lsat.py: -------------------------------------------------------------------------------- 1 | import json 2 | 3 | from data.logiqav2 import _format_option_list 4 | 5 | 6 | class ARLSATReader: 7 | rank2option = ['A', 'B', 'C', 'D', 'E'] 8 | 9 | def __init__(self, flat_options: bool = False, option_order: str = "ABCDE"): 10 | self.flat_options = flat_options 11 | self.option_order = option_order 12 | 13 | def __call__(self, file): 14 | all_context = [] 15 | all_question = [] 16 | all_option_list = [] 17 | all_label = [] 18 | print(file) 19 | data = json.load(open(file, "r")) 20 | for item in data: 21 | for q in item["questions"]: 22 | all_context.append(item["passage"]) 23 | all_question.append(q["question"]) 24 | 25 | options = [] 26 | ordered_label = -1 27 | for i, x in enumerate(self.option_order): 28 | idx = ord(x) - ord('A') 29 | options.append(q["options"][idx]) 30 | 31 | if x == q["answer"]: 32 | ordered_label = i 33 | 34 | # if "Test" not in file: 35 | assert ordered_label != -1, (q["answer"], q["options"], x) 36 | 37 | all_label.append(ordered_label) 38 | all_option_list.append(options) 39 | 40 | return [ 41 | { 42 | "context": context, 43 | "question": question, 44 | "option_list": _format_option_list(option_list, self.rank2option) if self.flat_options else option_list, 45 | "label": label, 46 | } for context, question, option_list, label in zip(all_context, all_question, all_option_list, all_label) 47 | ] 48 | -------------------------------------------------------------------------------- /conf/api/vllm/llama2-7b/folio_tems/react_dev_0shot_tem_v1_0.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - hydra: default 3 | - post_process: openai_react 4 | - api/vllm/vllm_params@sampling_params: sampling_param_greedy 5 | - _self_ 6 | 7 | hydra: 8 | searchpath: 9 | - file://conf/ 10 | 11 | train_file: ../research.data/FOLIO/data/v0.0/folio-train.jsonl 12 | dev_file: ../research.data/FOLIO/data/v0.0/folio-validation.jsonl 13 | test_file: ../research.data/FOLIO/data/v0.0/folio-validation.jsonl 14 | 15 | save_best: False 16 | exp_name: 17 | exp_notes: 18 | output_dir: experiments/${exp_name} 19 | 20 | step: 800 21 | eval_sub_path: checkpoint-${step} 22 | 23 | read_tensor: 24 | _target_: data.logiqav2.ComposePromptGenerator 25 | read_func: 26 | _target_: data.folio.FOLIO2QAReader 27 | instruction: 28 | _target_: data.prompts.logiqav2.react.prompts.get_prompt 29 | prompt_name: react_v2 30 | few_shot_prompt: 31 | compose_keys: [ "context", "question", "option_list" ] 32 | max_data_num: -1 33 | api_based: False 34 | template_id: "Context:\n{}\n\nQuestion:\n{}\n\nOptions:\n{}\n\nThought 1: " # In version v2.1, we change the template to: ```xxx\n\nThought 1: ``` 35 | service_based: False 36 | service_processor: 37 | 38 | output_file: ${output_dir}/${eval_sub_path}/folio.dev.qa.react.v1.0.0shot.json 39 | flush_file: ${output_file}l 40 | 41 | post_process: 42 | answer_clean: 43 | _target_: post_processors.openai_api_callback.ReActSeparatorClean 44 | regrex: "A|B" 45 | 46 | # Dataloader 47 | num_workers: 48 48 | prefetch_factor: 2 49 | 50 | ddp_eval: False 51 | no_cuda: False 52 | seed: 42 53 | local_rank: -1 54 | 55 | # Temporary variables 56 | fp16: True 57 | fp16_bfloat16: True 58 | n_gpu: 1 59 | device: 60 | train_batch_size: 61 | eval_batch_size: 62 | world_size: 63 | -------------------------------------------------------------------------------- /conf/api/vllm/llama2-7b/reclor_tems/dev_react_0shot_v1_0_service.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - hydra: default 3 | - reader/reclor@read_tensor: react_service_0shot_v1_0 4 | - post_process: openai_react 5 | - api/vllm/vllm_params@sampling_params: sampling_param_greedy 6 | - _self_ 7 | 8 | hydra: 9 | searchpath: 10 | - file://conf/ 11 | 12 | train_file: ../research.data/reclor_data/train.json 13 | dev_file: ../research.data/reclor_data/val.json 14 | test_file: ${dev_file} 15 | 16 | save_best: False 17 | exp_name: 18 | exp_notes: 19 | output_dir: 20 | eval_sub_path: 21 | 22 | sampling_params: 23 | max_tokens: 2048 24 | gpu_memory_utilization: 0.95 25 | 26 | read_tensor: 27 | # split_size: -1 28 | # split_id: 0 29 | service_based: True 30 | service_processor: 31 | _target_: data.vllm.VLLMRequestGenerator 32 | api_url: http://0.0.0.0:6000/v1/completions 33 | max_tokens: ${sampling_params.max_tokens} 34 | model: llama2-7b-reclor-distil 35 | stop: [ "", "\n\n\n\n", "Context:\n" ] 36 | n: ${sampling_params.n} 37 | temperature: ${sampling_params.temperature} 38 | flush_file: ${flush_file} 39 | 40 | output_file: ${output_dir}/reclor.react.dev.0shot.v1.0.json 41 | flush_file: ${output_file}l 42 | 43 | # Dataloader 44 | num_workers: 16 45 | prefetch_factor: 46 | 47 | # Training hyper-parameters 48 | per_gpu_train_batch_size: 1 49 | per_gpu_eval_batch_size: 1 50 | 51 | ddp_eval: False 52 | no_cuda: False 53 | seed: 42 54 | local_rank: -1 55 | 56 | # Temporary variables 57 | fp16: True 58 | fp16_bfloat16: True 59 | n_gpu: 1 60 | device: 61 | train_batch_size: 62 | eval_batch_size: 63 | world_size: 64 | 65 | # CUDA_VISIBLE_DEVICES=0 run15 python vllm_inference.py -cp conf/api/vllm/mistral/reclor/ -cn train_react_1shot_sample5_split_v1_0 read_tensor.split_size=4 read_tensor.split_id=0 66 | -------------------------------------------------------------------------------- /conf/api/vllm/math/gsm8k_gemma_test_0shot_tem_v1_1.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - hydra: default 3 | - post_process: math 4 | - api/vllm/vllm_params@sampling_params: sampling_param_greedy 5 | - _self_ 6 | 7 | hydra: 8 | searchpath: 9 | - file://conf/ 10 | 11 | train_file: 12 | dev_file: 13 | test_file: ../research.data/MetaMathQA/test/GSM8K_test.json 14 | 15 | save_best: False 16 | exp_name: 17 | exp_notes: 18 | output_dir: experiments/${exp_name} 19 | 20 | step: 800 21 | eval_sub_path: checkpoint-${step} 22 | 23 | read_tensor: 24 | _target_: data.logic_combine.ResponseAlignDataset 25 | aligner: 26 | _target_: data.math.gsm8k_gold_answer_extractor 27 | template: "{instruction}\n\n### Question: {query}\n\nSubQuestion 1: " 28 | instruction: "Given a question, please decompose it into sub-questions. For each sub-question, please answer it in a complete sentence, ending with \"The answer is\". When the original question is answerable, please start the sub-question with \"Now we can answer the question: \"." 29 | max_data_num: -1 30 | service_based: False 31 | api_based: False 32 | index_field: "index" 33 | 34 | sampling_params: 35 | stop: [ "", "\n\n\n\n", "### Instruction" ] 36 | 37 | output_file: ${output_dir}/${eval_sub_path}/gsm8k.test.v1.1.0shot.json 38 | flush_file: ${output_file}l 39 | 40 | # Dataloader 41 | num_workers: 48 42 | prefetch_factor: 2 43 | 44 | post_process: 45 | answer_clean: 46 | _target_: data.math.math_answer_cleaner 47 | separator: "The answer is" 48 | resume: False 49 | index_field: "index" 50 | label_field: "label" 51 | 52 | ddp_eval: False 53 | no_cuda: False 54 | seed: 42 55 | local_rank: -1 56 | 57 | # Temporary variables 58 | fp16: True 59 | fp16_bfloat16: True 60 | n_gpu: 1 61 | device: 62 | train_batch_size: 63 | eval_batch_size: 64 | world_size: 65 | -------------------------------------------------------------------------------- /scripts/sent_tf_react_step_encoding.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | from FlagEmbedding import FlagModel 3 | import json 4 | from tqdm import tqdm 5 | import numpy as np 6 | 7 | 8 | def main(): 9 | parser = argparse.ArgumentParser() 10 | parser.add_argument("--input_file", type=str) 11 | parser.add_argument("--model_path", type=str) 12 | parser.add_argument("--output_file", type=str) 13 | args = parser.parse_args() 14 | 15 | data = json.load(open(args.input_file, "r")) 16 | 17 | node_ids = [] 18 | node_types = [] 19 | node_rationales = [] 20 | node2offset = {} 21 | node2idx = {} 22 | for j, item in tqdm(enumerate(data)): 23 | node2offset[j] = [len(node_rationales), -1] 24 | node2idx[j] = {} 25 | for i in range(len(item["response"])): 26 | chain_nodes = item["nodes"][i] 27 | 28 | chain_node_ids = [node["id"] for node in chain_nodes] 29 | chain_node_types = [node["type"] for node in chain_nodes] 30 | chain_node_rationales = [node["content"] for node in chain_nodes] 31 | 32 | node2idx[j][i] = len(node_ids) 33 | 34 | node_ids.extend(chain_node_ids) 35 | node_types.extend(chain_node_types) 36 | node_rationales.extend(chain_node_rationales) 37 | 38 | assert "Finish[The answer is" in item["nodes"][i][-1]["content"] 39 | 40 | node2offset[j][1] = len(node_rationales) 41 | 42 | model = FlagModel(args.model_path, 43 | # query_instruction_for_retrieval="", 44 | use_fp16=True) # Setting use_fp16 to True speeds up computation with a slight performance degradation 45 | embeddings = model.encode(node_rationales) 46 | 47 | np.save(args.output_file, embeddings) 48 | 49 | 50 | if __name__ == '__main__': 51 | main() 52 | -------------------------------------------------------------------------------- /conf/api/vllm/llama2-7b/logiqav2_qa_rest_train_react_v1_0_0shot_sample.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - hydra: default 3 | - reader/logiqav2@read_tensor: react_service_0shot_v1_0 4 | - post_process: openai_react 5 | # - api/vllm/vllm_params@sampling_params: sampling_param_greedy 6 | - _self_ 7 | 8 | hydra: 9 | searchpath: 10 | - file://conf/ 11 | 12 | train_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/train.txt 13 | dev_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/train.txt 14 | test_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/train.txt 15 | 16 | step: 2000 17 | eval_sub_path: checkpoint-${step} 18 | 19 | n: 10 20 | split_size: 4 21 | split_id: 0 22 | 23 | 24 | # Data loading 25 | read_tensor: 26 | template_id: "Context:\n{}\n\nQuestion:\n{}\n\nOptions:\n{}\n\nThought 1: " 27 | split_size: ${split_size} 28 | split_id: ${split_id} 29 | service_based: False 30 | service_processor: 31 | 32 | sampling_params: 33 | _target_: vllm.SamplingParams 34 | n: ${n} 35 | temperature: 1.0 36 | top_p: 0.8 37 | stop: [ "", "\n\n\n\n" ] 38 | max_tokens: 2048 39 | 40 | save_best: False 41 | output_dir: experiments/llama2.7b.chat.logiqav2.70b-distil.self-sft.A40.w8.v1.0 42 | 43 | suffix: ${n}.tem${sampling_params.temperature}.p${sampling_params.top_p}.s${split_id}-of-${split_size} 44 | output_file: ${output_dir}/${eval_sub_path}/logiqav2-train.full.qa.react.v1.0.0shot.${suffix}.json 45 | flush_file: ${output_file}l 46 | 47 | # Dataloader 48 | num_workers: 96 49 | prefetch_factor: 2 50 | 51 | 52 | # Training hyper-parameters 53 | per_gpu_train_batch_size: 1 54 | per_gpu_eval_batch_size: 1 55 | 56 | ddp_eval: False 57 | no_cuda: False 58 | seed: 42 59 | local_rank: -1 60 | 61 | # Temporary variables 62 | fp16: True 63 | fp16_bfloat16: True 64 | n_gpu: 1 65 | device: 66 | train_batch_size: 67 | eval_batch_size: 68 | world_size: 69 | -------------------------------------------------------------------------------- /conf/api/vllm/math/math_gemma_test_0shot_tem_v1_1.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - hydra: default 3 | - post_process: math 4 | - api/vllm/vllm_params@sampling_params: sampling_param_greedy 5 | - _self_ 6 | 7 | hydra: 8 | searchpath: 9 | - file://conf/ 10 | 11 | train_file: 12 | dev_file: 13 | test_file: ../research.data/MetaMathQA/test/MATH_test.json 14 | 15 | save_best: False 16 | exp_name: 17 | exp_notes: 18 | output_dir: experiments/${exp_name} 19 | 20 | step: 800 21 | eval_sub_path: checkpoint-${step} 22 | 23 | read_tensor: 24 | _target_: data.logic_combine.ResponseAlignDataset 25 | aligner: 26 | _target_: data.math.math_gold_answer_extractor 27 | kv_mapping: 28 | instruction: query 29 | template: "{instruction}\n\n### Question: {query}\n\nSubQuestion 1: " 30 | instruction: "Given a question, please decompose it into sub-questions. For each sub-question, please answer it in a complete sentence, ending with \"The answer is\". When the original question is answerable, please start the sub-question with \"Now we can answer the question: \"." 31 | max_data_num: -1 32 | service_based: False 33 | api_based: False 34 | index_field: "idx" 35 | 36 | sampling_params: 37 | stop: [ "", "\n\n\n\n", "### Instruction" ] 38 | 39 | output_file: ${output_dir}/${eval_sub_path}/math.test.v1.1.0shot.json 40 | flush_file: ${output_file}l 41 | 42 | # Dataloader 43 | num_workers: 48 44 | prefetch_factor: 2 45 | 46 | post_process: 47 | answer_clean: 48 | _target_: data.math.math_answer_cleaner 49 | separator: "The answer is" 50 | resume: False 51 | index_field: "idx" 52 | label_field: "label" 53 | 54 | ddp_eval: False 55 | no_cuda: False 56 | seed: 42 57 | local_rank: -1 58 | 59 | # Temporary variables 60 | fp16: True 61 | fp16_bfloat16: True 62 | n_gpu: 1 63 | device: 64 | train_batch_size: 65 | eval_batch_size: 66 | world_size: 67 | -------------------------------------------------------------------------------- /conf/api/vllm/mistral/reclor/train_react_1shot_sample5_split_v1_0.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - hydra: default 3 | - reader/reclor@read_tensor: react_service_1shot_v1_0 4 | - post_process: openai_react 5 | - api/vllm/vllm_params@sampling_params: sampling_param_sample 6 | - _self_ 7 | 8 | hydra: 9 | searchpath: 10 | - file://conf/ 11 | 12 | train_file: ../research.data/reclor_data/train.json 13 | dev_file: ../research.data/reclor_data/val.json 14 | test_file: ${train_file} 15 | 16 | save_best: False 17 | exp_name: 18 | exp_notes: 19 | output_dir: ../pretrained-models/Mixtral-8x7B-Instruct-v0.1 20 | eval_sub_path: 21 | 22 | sampling_params: 23 | max_tokens: 4096 24 | gpu_memory_utilization: 0.95 25 | 26 | read_tensor: 27 | split_size: 4 28 | split_id: 0 29 | service_based: True 30 | service_processor: 31 | _target_: data.vllm.VLLMRequestGenerator 32 | api_url: http://0.0.0.0:6000/v1/completions 33 | max_tokens: ${sampling_params.max_tokens} 34 | model: mixtral-ins 35 | stop: [ "", "\n\n\n\n", "Context:\n" ] 36 | n: ${sampling_params.n} 37 | temperature: ${sampling_params.temperature} 38 | flush_file: ${flush_file} 39 | 40 | output_file: ${output_dir}/reclor.react.train.1shot.sample5.${read_tensor.split_id}-${read_tensor.split_size}.v1.0.json 41 | flush_file: ${output_file}l 42 | 43 | # Dataloader 44 | num_workers: 16 45 | prefetch_factor: 46 | 47 | # Training hyper-parameters 48 | per_gpu_train_batch_size: 1 49 | per_gpu_eval_batch_size: 1 50 | 51 | ddp_eval: False 52 | no_cuda: False 53 | seed: 42 54 | local_rank: -1 55 | 56 | # Temporary variables 57 | fp16: True 58 | fp16_bfloat16: True 59 | n_gpu: 1 60 | device: 61 | train_batch_size: 62 | eval_batch_size: 63 | world_size: 64 | 65 | # CUDA_VISIBLE_DEVICES=0 run15 python vllm_inference.py -cp conf/api/vllm/mistral/reclor/ -cn train_react_1shot_sample5_split_v1_0 read_tensor.split_size=4 read_tensor.split_id=0 66 | -------------------------------------------------------------------------------- /data/reclor.py: -------------------------------------------------------------------------------- 1 | import copy 2 | import json 3 | import os.path 4 | from typing import List, Dict, Tuple, Union, Any, Callable 5 | 6 | from omegaconf.listconfig import ListConfig 7 | from torch.utils.data import Dataset 8 | from transformers import PreTrainedTokenizer 9 | 10 | from general_util.logger import get_child_logger 11 | from data.logiqav2 import _format_option_list 12 | 13 | logger = get_child_logger(__name__) 14 | 15 | 16 | class ReClorReader: 17 | rank2option = ['A', 'B', 'C', 'D'] 18 | 19 | def __init__(self, flat_options: bool = False, option_order: str = "ABCD"): 20 | self.flat_options = flat_options 21 | self.option_order = option_order 22 | 23 | def __call__(self, file): 24 | data = json.load(open(file, 'r')) 25 | 26 | all_context = [] 27 | all_question = [] 28 | all_option_list = [] 29 | all_label = [] 30 | for sample in data: 31 | all_context.append(sample["context"]) 32 | all_question.append(sample["question"]) 33 | 34 | options = [] 35 | ordered_label = -1 36 | for i, x in enumerate(self.option_order): 37 | idx = ord(x) - ord('A') 38 | options.append(sample["answers"][idx]) 39 | 40 | if "label" in sample and ord(x) - ord('A') == sample["label"]: 41 | ordered_label = i 42 | 43 | all_option_list.append(options) 44 | all_label.append(ordered_label) 45 | 46 | return [ 47 | { 48 | "context": context, 49 | "question": question, 50 | "option_list": _format_option_list(option_list, self.rank2option) if self.flat_options else option_list, 51 | "label": label, 52 | } for context, question, option_list, label in zip(all_context, all_question, all_option_list, all_label) 53 | ] 54 | -------------------------------------------------------------------------------- /conf/api/vllm/logiqav2_qa_dev_decompose_dpo_v2_0.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - hydra: default 3 | - _self_ 4 | 5 | hydra: 6 | searchpath: 7 | - file://conf/ 8 | 9 | train_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/train.txt 10 | dev_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/dev.txt 11 | test_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/dev.txt 12 | 13 | num_shot: 5 14 | 15 | output_file: experiments/llama2.7b.chat.70b-chat-distil.logiqav2.dpo.A100.w3.v2.1/checkpoint-1000/logiqav2-dev.full.qa.decompose.llama2.7b.distil.dpo.v2.0.json 16 | lush_file: ${output_file}l 17 | 18 | # Data loading 19 | read_tensor: 20 | _target_: data.logiqav2.ComposePromptGenerator 21 | read_func: 22 | _target_: data.logiqav2.LogicQAReader 23 | flat_options: True 24 | template_id: 4 25 | instruction: 26 | few_shot_prompt: 27 | _target_: data.logiqav2.read_single_file 28 | file_path: data/prompts/logiqav2/decomposition/gpt4/dev_10741_0.md 29 | compose_keys: [ "context", "question", "option_list" ] 30 | max_data_num: -1 31 | api_based: False 32 | service_based: True 33 | service_processor: 34 | _target_: data.vllm.VLLMRequestGenerator 35 | api_url: http://localhost:8000/v1/completions 36 | max_tokens: 2048 37 | model: llama-2-7b-distil-dpo 38 | 39 | # Dataloader 40 | num_workers: 96 41 | prefetch_factor: 2 42 | 43 | output_dir: 44 | 45 | post_process: 46 | _target_: post_processors.openai_api_callback.OpenAICallBack 47 | output_file: ${output_file} 48 | answer_clean: 49 | _target_: post_processors.openai_api_callback.MCQAAnswerClean 50 | prompt: few-shot 51 | 52 | 53 | # Training hyper-parameters 54 | per_gpu_train_batch_size: 1 55 | per_gpu_eval_batch_size: 1 56 | 57 | ddp_eval: False 58 | no_cuda: False 59 | seed: 42 60 | local_rank: -1 61 | 62 | # Temporary variables 63 | n_gpu: 1 64 | device: 65 | train_batch_size: 66 | eval_batch_size: 67 | world_size: 68 | -------------------------------------------------------------------------------- /conf/api/vllm/logiqav2_qa_dev_decompose_dpo_v2_0_cp1800.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - hydra: default 3 | - _self_ 4 | 5 | hydra: 6 | searchpath: 7 | - file://conf/ 8 | 9 | train_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/train.txt 10 | dev_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/dev.txt 11 | test_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/dev.txt 12 | 13 | num_shot: 5 14 | 15 | output_file: experiments/llama2.7b.chat.70b-chat-distil.logiqav2.dpo.A100.w3.v2.1/checkpoint-1800/logiqav2-dev.full.qa.decompose.llama2.7b.distil.dpo.v2.0.json 16 | lush_file: ${output_file}l 17 | 18 | # Data loading 19 | read_tensor: 20 | _target_: data.logiqav2.ComposePromptGenerator 21 | read_func: 22 | _target_: data.logiqav2.LogicQAReader 23 | flat_options: True 24 | template_id: 4 25 | instruction: 26 | few_shot_prompt: 27 | _target_: data.logiqav2.read_single_file 28 | file_path: data/prompts/logiqav2/decomposition/gpt4/dev_10741_0.md 29 | compose_keys: [ "context", "question", "option_list" ] 30 | max_data_num: -1 31 | api_based: False 32 | service_based: True 33 | service_processor: 34 | _target_: data.vllm.VLLMRequestGenerator 35 | api_url: http://localhost:6001/v1/completions 36 | max_tokens: 2048 37 | model: llama-2-7b-distil-dpo-cp1800 38 | 39 | # Dataloader 40 | num_workers: 96 41 | prefetch_factor: 2 42 | 43 | output_dir: 44 | 45 | post_process: 46 | _target_: post_processors.openai_api_callback.OpenAICallBack 47 | output_file: ${output_file} 48 | answer_clean: 49 | _target_: post_processors.openai_api_callback.MCQAAnswerClean 50 | prompt: few-shot 51 | 52 | 53 | # Training hyper-parameters 54 | per_gpu_train_batch_size: 1 55 | per_gpu_eval_batch_size: 1 56 | 57 | ddp_eval: False 58 | no_cuda: False 59 | seed: 42 60 | local_rank: -1 61 | 62 | # Temporary variables 63 | n_gpu: 1 64 | device: 65 | train_batch_size: 66 | eval_batch_size: 67 | world_size: 68 | -------------------------------------------------------------------------------- /lora_share_trainer/utils/ds_utils.py: -------------------------------------------------------------------------------- 1 | from transformers import PreTrainedModel 2 | import deepspeed 3 | from fairscale.nn.model_parallel import initialize as mpu 4 | from omegaconf import DictConfig, OmegaConf 5 | from general_util import training_utils 6 | 7 | 8 | def init_ds_training_engine(model: PreTrainedModel, ds_cfg: DictConfig, global_cfg: DictConfig, ): 9 | ds_config = ds_cfg 10 | if "total_num_steps" in ds_config.scheduler.params: 11 | ds_config.scheduler.params.total_num_steps = global_cfg.max_steps 12 | ds_config.scheduler.params.warmup_num_steps = global_cfg.warmup_steps 13 | ds_config = OmegaConf.to_container(ds_config, resolve=True) 14 | ds_config["train_mirco_batch_size_per_gpu"] = global_cfg.per_gpu_train_batch_size 15 | 16 | optim_params = training_utils.get_optimizer_grouped_parameters(model, global_cfg.actor_weight_decay) 17 | 18 | engine, optimizer, _, scheduler = deepspeed.initialize( 19 | model=model, 20 | model_parameters=optim_params, 21 | config_params=ds_config, 22 | mpu=mpu if mpu.model_parallel_is_initialized() else None, 23 | ) 24 | 25 | return engine, optimizer, scheduler 26 | 27 | 28 | def init_ds_eval_engine(model: PreTrainedModel, ds_cfg: DictConfig, global_cfg: DictConfig): 29 | ds_config = ds_cfg 30 | if ds_config.zero_optimization.stage != 3: 31 | ds_config.zero_optimization.stage = 0 32 | 33 | ds_config = OmegaConf.to_container(ds_config, resolve=True) 34 | ds_config["train_mirco_batch_size_per_gpu"] = global_cfg.per_gpu_train_batch_size 35 | if "optimizer" in ds_config: 36 | ds_config.pop("optimizer") 37 | if "scheduler" in ds_config: 38 | ds_config.pop("scheduler") 39 | 40 | engine, *_ = deepspeed.initialize( 41 | model=model, 42 | config_params=ds_config, 43 | mpu=mpu if mpu.model_parallel_is_initialized() else None, 44 | ) 45 | 46 | return engine 47 | -------------------------------------------------------------------------------- /general_util/mixin.py: -------------------------------------------------------------------------------- 1 | from collections import defaultdict 2 | from typing import Dict, List, Tuple 3 | 4 | import torch 5 | 6 | from general_util.average_meter import LogMetric, AverageMeter 7 | from general_util.logger import get_child_logger 8 | 9 | logger = get_child_logger("Mixin") 10 | 11 | 12 | class LogMixin: 13 | eval_metrics: LogMetric = None 14 | 15 | def init_metric(self, *metric_names): 16 | self.eval_metrics = LogMetric(*metric_names) 17 | 18 | def get_eval_log(self, reset=False, ddp=False, device='cpu'): 19 | 20 | if self.eval_metrics is None: 21 | logger.warning("The `eval_metrics` attribute hasn't been initialized.") 22 | 23 | if ddp: 24 | for metric in self.eval_metrics.metrics.values(): 25 | metric.gather(device=device) 26 | 27 | results = self.eval_metrics.get_log() 28 | 29 | _eval_metric_log = '\t'.join([f"{k}: {v}" for k, v in results.items()]) 30 | 31 | if reset: 32 | self.eval_metrics.reset() 33 | 34 | return _eval_metric_log, results 35 | 36 | 37 | class MetricMixin: 38 | # TODO: 如何利用hydra解耦计算metric的方式和模型? 39 | def __init__(self, metrics: List[Tuple[str, str, str, str]]): 40 | self.metrics = { 41 | name: { 42 | "key": key, 43 | "val": val, 44 | "func": func, 45 | "meter": AverageMeter() 46 | } for key, val, func, name in metrics 47 | } 48 | 49 | 50 | class PredictionMixin: 51 | tensor_dict: Dict[str, List] = defaultdict(list) 52 | 53 | def reset_predict_tensors(self): 54 | self.tensor_dict = defaultdict(list) 55 | 56 | def concat_predict_tensors(self, **tensors: torch.Tensor): 57 | for k, v in tensors.items(): 58 | self.tensor_dict[k].extend(v.detach().cpu().tolist()) 59 | 60 | def get_predict_tensors(self): 61 | return self.tensor_dict 62 | -------------------------------------------------------------------------------- /data/folio.py: -------------------------------------------------------------------------------- 1 | import copy 2 | import json 3 | import os.path 4 | from typing import List, Dict, Tuple, Union, Any, Callable 5 | 6 | from omegaconf.listconfig import ListConfig 7 | from torch.utils.data import Dataset 8 | from transformers import PreTrainedTokenizer 9 | 10 | from general_util.logger import get_child_logger 11 | from data.logiqav2 import _format_option_list 12 | 13 | logger = get_child_logger(__name__) 14 | 15 | 16 | class FOLIO2QAReader: 17 | rank2option = ['A', 'B'] 18 | 19 | def __init__(self,): 20 | self.context = "There is one hypothesis and a group of premises:\n\nHypothesis:\n{}\n\nPremises:\n{}" 21 | self.question = "Verify the hypothesis is true or false based on the premises." 22 | self.option = "A. True\nB. False" 23 | 24 | def __call__(self, file): 25 | all_context = [] 26 | all_option_list = [] 27 | all_label = [] 28 | with open(file) as f: 29 | for line in f.readlines(): 30 | item = json.loads(line) 31 | 32 | conclusion = item["conclusion"] 33 | premises = item["premises"] 34 | premises_str = [] 35 | for i, premise in enumerate(premises): 36 | premises_str.append("{}. {}".format(i + 1, premise)) 37 | premises_str = "\n".join(premises_str) 38 | label = 0 if item["label"] == "True" else 1 39 | 40 | all_context.append(self.context.format(conclusion, premises_str)) 41 | all_option_list.append(self.option) 42 | all_label.append(label) 43 | 44 | return [ 45 | { 46 | "context": context, 47 | "question": self.question, 48 | "option_list": option_list, 49 | "label": label, 50 | } for context, option_list, label in zip(all_context, all_option_list, all_label) 51 | ] 52 | 53 | 54 | -------------------------------------------------------------------------------- /scripts/split_response_train_dev_according2item_id.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import collections 3 | import json 4 | import os.path 5 | import random 6 | from glob import glob 7 | 8 | 9 | def main(): 10 | parser = argparse.ArgumentParser() 11 | parser.add_argument('--input_file', type=str, required=True) 12 | parser.add_argument("--output_file", type=str, required=True) 13 | args = parser.parse_args() 14 | 15 | item_id2responses = collections.defaultdict(list) 16 | if os.path.exists(args.input_file): 17 | data = json.load(open(args.input_file)) 18 | else: 19 | files = glob(args.input_file) 20 | print(files) 21 | data = [] 22 | for file in files: 23 | data.extend(json.load(open(file))) 24 | 25 | for item in data: 26 | item_id, state_id = item['id'].split("_") 27 | item_id2responses[item_id].append(item) 28 | 29 | print("data size: {}".format(len(item_id2responses))) 30 | print(f"Response size: {len(data)}") 31 | 32 | data_ids = list(item_id2responses.keys()) 33 | # read `dev_num` from command line 34 | dev_num = int(input("dev_num: ")) 35 | dev_ids = random.sample(data_ids, dev_num) 36 | dev_ids = set(dev_ids) 37 | 38 | dev_data = [] 39 | train_data = [] 40 | for item_id, responses in item_id2responses.items(): 41 | if item_id in dev_ids: 42 | dev_data.extend(responses) 43 | else: 44 | train_data.extend(responses) 45 | 46 | print("dev size: {}".format(len(dev_data))) 47 | print("train size: {}".format(len(train_data))) 48 | 49 | json.dump(dev_data, open(args.output_file.replace(".json", f".sub_dev_itemid.{len(dev_data)}.json"), "w"), indent=2, ensure_ascii=False) 50 | json.dump(train_data, open(args.output_file.replace(".json", f".sub_train_itemid.{len(train_data)}.json"), "w"), indent=2, ensure_ascii=False) 51 | 52 | 53 | if __name__ == "__main__": 54 | main() 55 | -------------------------------------------------------------------------------- /scripts/fixed_explore_from_infer/logiqav2/split_pair.sh: -------------------------------------------------------------------------------- 1 | sft_model_dir=experiments/llama2.7b.chat.logiqav2.llama-2-70b-chat.dpo-sft.A6K.w4.v1.0/checkpoint-1600/fix_hack_data_dir/ 2 | dpo_data="logiqav2-train.full.qa.react.v1.0.0shot.sample10.clean_dpo_pair.json" 3 | step_dpo_data="logiqav2-train.react.v1.0.0shot.sample10.clean_inter_ver2.0.rs0.2.r0.3.prm_hack_fix_v10_cp800_best_of_10.neg10.pos0.5.v2.2.(2,3).pair.product.(2,3).full_only.json" 4 | 5 | seed=43 6 | 7 | python scripts/split_pairs_according_to_ids.py --input_file $sft_model_dir/$dpo_data --output_file $sft_model_dir/logiqav2-train.full.qa.react.v1.0.0shot.sample10.clean_dpo_pair.ratio40.json --ratio 0.4 8 | python scripts/split_pairs_according_to_ids.py --input_file $sft_model_dir/$dpo_data --output_file $sft_model_dir/logiqav2-train.full.qa.react.v1.0.0shot.sample10.clean_dpo_pair.ratio60.s$seed.json --ratio 0.6 9 | python scripts/split_pairs_according_to_ids.py --input_file $sft_model_dir/$dpo_data --output_file $sft_model_dir/logiqav2-train.full.qa.react.v1.0.0shot.sample10.clean_dpo_pair.ratio80.json --ratio 0.8 10 | 11 | python scripts/split_pairs_according_to_ids.py --input_file $sft_model_dir/$step_dpo_data --output_file "$sft_model_dir/logiqav2-train.react.v1.0.0shot.sample10.clean_inter_ver2.0.rs0.2.r0.3.prm_hack_fix_v10_cp800_best_of_10.neg10.pos0.5.v2.2.(2,3).pair.product.(2,3).full_only.ratio40.json" --ratio 0.4 12 | python scripts/split_pairs_according_to_ids.py --input_file $sft_model_dir/$step_dpo_data --output_file "$sft_model_dir/logiqav2-train.react.v1.0.0shot.sample10.clean_inter_ver2.0.rs0.2.r0.3.prm_hack_fix_v10_cp800_best_of_10.neg10.pos0.5.v2.2.(2,3).pair.product.(2,3).full_only.ratio60.s$seed.json" --ratio 0.6 13 | python scripts/split_pairs_according_to_ids.py --input_file $sft_model_dir/$step_dpo_data --output_file "$sft_model_dir/logiqav2-train.react.v1.0.0shot.sample10.clean_inter_ver2.0.rs0.2.r0.3.prm_hack_fix_v10_cp800_best_of_10.neg10.pos0.5.v2.2.(2,3).pair.product.(2,3).full_only.ratio80.json" --ratio 0.8 -------------------------------------------------------------------------------- /conf/api/vllm/math/math_deepseek_test_0shot_tem_v1_1.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - hydra: default 3 | # - post_process: deepseek 4 | - api/vllm/vllm_params@sampling_params: sampling_param_greedy 5 | - _self_ 6 | 7 | hydra: 8 | searchpath: 9 | - file://conf/ 10 | 11 | train_file: 12 | dev_file: 13 | test_file: ../research.data/MetaMathQA/test/MATH_test.json 14 | 15 | save_best: False 16 | exp_name: 17 | exp_notes: 18 | output_dir: experiments/${exp_name} 19 | 20 | step: 800 21 | eval_sub_path: checkpoint-${step} 22 | 23 | read_tensor: 24 | _target_: data.logic_combine.ResponseAlignDataset 25 | aligner: 26 | # _target_: data.math.math_gold_answer_extractor 27 | _target_: data.math.math_gold_answer_extractor_deepseek 28 | kv_mapping: 29 | instruction: question 30 | template: "User: {question}\nPlease reason step by step, and put your final answer within {instruction}.\n\nAssistant:" 31 | instruction: "\\boxed{}" # Hack here! because {} wil report error. 32 | max_data_num: -1 33 | service_based: False 34 | api_based: False 35 | index_field: "idx" 36 | 37 | sampling_params: 38 | stop: [ "", "\n\n\n\n", "### Instruction", "<|end▁of▁sentence|>" ] 39 | 40 | output_file: ${output_dir}/${eval_sub_path}/math.test.v1.1.0shot.json 41 | flush_file: ${output_file}l 42 | 43 | # Dataloader 44 | num_workers: 48 45 | prefetch_factor: 2 46 | 47 | 48 | post_process: 49 | # _target_: post_processors.openai_api_callback.OpenAIMATHCallBack 50 | _target_: post_processors.openai_api_callback.DeepSeekMathCallBack 51 | output_file: ${output_file} 52 | # answer_clean: 53 | # _target_: data.math.math_boxed_answer_cleaner_proxy 54 | eval_fn: math 55 | answer_clean: math 56 | resume: False 57 | index_field: "idx" 58 | label_field: "label" 59 | 60 | ddp_eval: False 61 | no_cuda: False 62 | seed: 42 63 | local_rank: -1 64 | 65 | # Temporary variables 66 | fp16: True 67 | fp16_bfloat16: True 68 | n_gpu: 1 69 | device: 70 | train_batch_size: 71 | eval_batch_size: 72 | world_size: 73 | -------------------------------------------------------------------------------- /conf/api/vllm/llama2-7b/logiqav2_qa_dev_react_dpo_v1_0.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - hydra: default 3 | - _self_ 4 | 5 | hydra: 6 | searchpath: 7 | - file://conf/ 8 | 9 | train_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/train.txt 10 | dev_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/dev.txt 11 | test_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/dev.txt 12 | 13 | num_shot: 5 14 | 15 | output_file: experiments/llama2.7b.chat.logiqav2.dpo.A100.w2.v1.0/checkpoint-1600/logiqav2-dev.full.qa.react.v1.0.json 16 | flush_file: ${output_file}l 17 | 18 | # Data loading 19 | read_tensor: 20 | _target_: data.logiqav2.ComposePromptGenerator 21 | read_func: 22 | _target_: data.logiqav2.LogicQAReader 23 | flat_options: True 24 | template_id: 7 25 | instruction: 26 | _target_: data.prompts.logiqav2.react.prompts.get_prompt 27 | prompt_name: react_v2 28 | few_shot_prompt: 29 | _target_: data.logiqav2.read_single_file 30 | file_path: data/prompts/logiqav2/react/train_4554_p1.txt 31 | compose_keys: [ "context", "question", "option_list" ] 32 | max_data_num: -1 33 | api_based: False 34 | service_based: True 35 | service_processor: 36 | _target_: data.vllm.VLLMRequestGenerator 37 | api_url: http://localhost:8000/v1/completions 38 | max_tokens: 2048 39 | model: llama-2-7b-dpo-cp1600 40 | 41 | # Dataloader 42 | num_workers: 96 43 | prefetch_factor: 2 44 | 45 | output_dir: 46 | 47 | post_process: 48 | _target_: post_processors.openai_api_callback.OpenAICallBack 49 | output_file: ${output_file} 50 | answer_clean: 51 | _target_: post_processors.openai_api_callback.ReActSeparatorClean 52 | # prompt: "few-shot" 53 | # separator: "Finish" 54 | # separate_idx: 1 55 | 56 | 57 | # Training hyper-parameters 58 | per_gpu_train_batch_size: 1 59 | per_gpu_eval_batch_size: 1 60 | 61 | ddp_eval: False 62 | no_cuda: False 63 | seed: 42 64 | local_rank: -1 65 | 66 | # Temporary variables 67 | n_gpu: 1 68 | device: 69 | train_batch_size: 70 | eval_batch_size: 71 | world_size: 72 | -------------------------------------------------------------------------------- /conf/api/vllm/logiqav2_qa_dev_react_step_dpo_v1_0.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - hydra: default 3 | - _self_ 4 | 5 | hydra: 6 | searchpath: 7 | - file://conf/ 8 | 9 | train_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/train.txt 10 | dev_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/dev.txt 11 | test_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/dev.txt 12 | 13 | num_shot: 5 14 | 15 | output_file: experiments/llama2.7b.chat.logiqav2.step.dpo.A100.w3.v1.0.fix/checkpoint-1600/logiqav2-dev.full.qa.react.v1.0.json 16 | flush_file: ${output_file}l 17 | 18 | # Data loading 19 | read_tensor: 20 | _target_: data.logiqav2.ComposePromptGenerator 21 | read_func: 22 | _target_: data.logiqav2.LogicQAReader 23 | flat_options: True 24 | template_id: 7 25 | instruction: 26 | _target_: data.prompts.logiqav2.react.prompts.get_prompt 27 | prompt_name: react_v2 28 | few_shot_prompt: 29 | _target_: data.logiqav2.read_single_file 30 | file_path: data/prompts/logiqav2/react/train_4554_p1.txt 31 | compose_keys: [ "context", "question", "option_list" ] 32 | max_data_num: -1 33 | api_based: False 34 | service_based: True 35 | service_processor: 36 | _target_: data.vllm.VLLMRequestGenerator 37 | api_url: http://localhost:6000/v1/completions 38 | max_tokens: 2048 39 | model: llama-2-7b-step-dpo-cp1600 40 | 41 | # Dataloader 42 | num_workers: 96 43 | prefetch_factor: 2 44 | 45 | output_dir: 46 | 47 | post_process: 48 | _target_: post_processors.openai_api_callback.OpenAICallBack 49 | output_file: ${output_file} 50 | answer_clean: 51 | _target_: post_processors.openai_api_callback.ReActSeparatorClean 52 | # prompt: "few-shot" 53 | # separator: "Finish" 54 | # separate_idx: 1 55 | 56 | 57 | # Training hyper-parameters 58 | per_gpu_train_batch_size: 1 59 | per_gpu_eval_batch_size: 1 60 | 61 | ddp_eval: False 62 | no_cuda: False 63 | seed: 42 64 | local_rank: -1 65 | 66 | # Temporary variables 67 | n_gpu: 1 68 | device: 69 | train_batch_size: 70 | eval_batch_size: 71 | world_size: 72 | -------------------------------------------------------------------------------- /scripts/cot/deepseek_cot_sample_steps.py: -------------------------------------------------------------------------------- 1 | import json 2 | import argparse 3 | import os 4 | from glob import glob 5 | from functools import partial 6 | from multiprocessing import Pool 7 | from tqdm import tqdm 8 | 9 | 10 | def acc_func(item, response_field: str = "response", offset: int = 0): 11 | s = set() 12 | acc_steps = [] 13 | for i, (resp, p) in enumerate(zip(item[response_field], item["pred"])): 14 | steps = resp.split("\n") 15 | acc = "" 16 | if offset > 0: 17 | steps = steps[:-offset] 18 | for j, step in enumerate(steps): 19 | if j == 0: 20 | acc = step 21 | else: 22 | acc += "\n" + step 23 | 24 | if acc in s: 25 | continue 26 | 27 | s.add(acc) 28 | acc_steps.append({"id": f"{item['id']}_{i}_{j}", "response": acc}) 29 | 30 | item["accumulated_response"] = acc_steps 31 | return item 32 | 33 | 34 | def main(): 35 | parser = argparse.ArgumentParser() 36 | parser.add_argument("--input_file", type=str, required=True) 37 | parser.add_argument("--offset", type=int, default=0) 38 | parser.add_argument("--num_workers", type=int, default=16) 39 | args = parser.parse_args() 40 | 41 | if os.path.exists(args.input_file): 42 | files = [args.input_file] 43 | else: 44 | files = glob(args.input_file) 45 | data = [] 46 | for file in files: 47 | data += json.load(open(file, "r")) 48 | 49 | annotate = partial(acc_func, response_field="response", offset=args.offset) 50 | with Pool(args.num_workers) as p: 51 | data = list(tqdm(p.imap(annotate, data), total=len(data))) 52 | 53 | outputs = [item for item in data if "accumulated_response" in item and item["accumulated_response"]] 54 | print(f"Number of items with accumulated responses: {len(outputs)}") 55 | json.dump(outputs, open(args.input_file.replace(".json", f"_accumulated_off{args.offset}.json"), "w")) 56 | 57 | 58 | if __name__ == '__main__': 59 | main() 60 | -------------------------------------------------------------------------------- /conf/api/vllm/llama2-7b/logiqav2_qa_dev_react_dpo_v2_0.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - hydra: default 3 | - _self_ 4 | 5 | hydra: 6 | searchpath: 7 | - file://conf/ 8 | 9 | train_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/train.txt 10 | dev_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/dev.txt 11 | test_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/dev.txt 12 | 13 | num_shot: 5 14 | 15 | output_file: experiments/llama2.7b.chat.logiqav2.dpo.A100.w2.v2.1/checkpoint-1600/logiqav2-dev.full.qa.react.v1.0.json 16 | flush_file: ${output_file}l 17 | 18 | # Data loading 19 | read_tensor: 20 | _target_: data.logiqav2.ComposePromptGenerator 21 | read_func: 22 | _target_: data.logiqav2.LogicQAReader 23 | flat_options: True 24 | template_id: 8 25 | instruction: 26 | _target_: data.prompts.logiqav2.react.prompts.get_prompt 27 | prompt_name: react_v2 28 | few_shot_prompt: 29 | _target_: data.logiqav2.read_single_file 30 | file_path: data/prompts/logiqav2/react/train_4554.txt 31 | compose_keys: [ "context", "question", "option_list" ] 32 | max_data_num: -1 33 | api_based: False 34 | service_based: True 35 | service_processor: 36 | _target_: data.vllm.VLLMRequestGenerator 37 | api_url: http://0.0.0.0:6001/v1/completions 38 | max_tokens: 2048 39 | model: llama-2-7b-dpo-v2.1-cp1600 40 | stop: [ "", "\n\n\n\n" ] 41 | 42 | # Dataloader 43 | num_workers: 96 44 | prefetch_factor: 2 45 | 46 | output_dir: 47 | 48 | post_process: 49 | _target_: post_processors.openai_api_callback.OpenAICallBack 50 | output_file: ${output_file} 51 | answer_clean: 52 | _target_: post_processors.openai_api_callback.ReActSeparatorClean 53 | # prompt: "few-shot" 54 | # separator: "Finish" 55 | # separate_idx: 1 56 | 57 | 58 | # Training hyper-parameters 59 | per_gpu_train_batch_size: 1 60 | per_gpu_eval_batch_size: 1 61 | 62 | ddp_eval: False 63 | no_cuda: False 64 | seed: 42 65 | local_rank: -1 66 | 67 | # Temporary variables 68 | n_gpu: 1 69 | device: 70 | train_batch_size: 71 | eval_batch_size: 72 | world_size: 73 | -------------------------------------------------------------------------------- /conf/api/vllm/math/gsm8k_deepseek_test_0shot_tem_v1_1.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - hydra: default 3 | # - post_process: deepseek 4 | - api/vllm/vllm_params@sampling_params: sampling_param_greedy 5 | - _self_ 6 | 7 | hydra: 8 | searchpath: 9 | - file://conf/ 10 | 11 | train_file: 12 | dev_file: 13 | test_file: ../research.data/MetaMathQA/test/GSM8K_test.json 14 | 15 | save_best: False 16 | exp_name: 17 | exp_notes: 18 | output_dir: experiments/${exp_name} 19 | 20 | step: 800 21 | eval_sub_path: checkpoint-${step} 22 | 23 | read_tensor: 24 | _target_: data.logic_combine.ResponseAlignDataset 25 | aligner: 26 | # _target_: data.math.math_gold_answer_extractor 27 | _target_: data.math.gsm8k_gold_answer_extractor 28 | # query_field: "query" 29 | response_field: "response" 30 | # kv_mapping: 31 | # instruction: question 32 | template: "User: {query}\nPlease reason step by step, and put your final answer within {instruction}.\n\nAssistant:" 33 | instruction: "\\boxed{}" # Hack here! because {} wil report error. 34 | max_data_num: -1 35 | service_based: False 36 | api_based: False 37 | index_field: "index" 38 | 39 | sampling_params: 40 | stop: [ "", "\n\n\n\n", "### Instruction", "<|end▁of▁sentence|>" ] 41 | 42 | output_file: ${output_dir}/${eval_sub_path}/gsm8k.test.v1.1.0shot.json 43 | flush_file: ${output_file}l 44 | 45 | # Dataloader 46 | num_workers: 48 47 | prefetch_factor: 2 48 | 49 | 50 | post_process: 51 | # _target_: post_processors.openai_api_callback.OpenAIMATHCallBack 52 | _target_: post_processors.openai_api_callback.DeepSeekMathCallBack 53 | output_file: ${output_file} 54 | # answer_clean: 55 | # _target_: data.math.math_boxed_answer_cleaner_proxy 56 | eval_fn: gsm8k 57 | answer_clean: gsm8k 58 | resume: False 59 | index_field: "index" 60 | label_field: "label" 61 | 62 | ddp_eval: False 63 | no_cuda: False 64 | seed: 42 65 | local_rank: -1 66 | 67 | # Temporary variables 68 | fp16: True 69 | fp16_bfloat16: True 70 | n_gpu: 1 71 | device: 72 | train_batch_size: 73 | eval_batch_size: 74 | world_size: 75 | -------------------------------------------------------------------------------- /data/prompts/logiqav2/logic_form/human/dev_218_0.md: -------------------------------------------------------------------------------- 1 | Convert the input texts following the keywords [Context], [Question] and each [Option] into logic forms. For each logic form, the format is [predicate](entity 1, ..., entity n). There is a predicate which indicates the relations among at most n entities and those entities are the arguments of the predicate. 2 | Use logical operations to derive the correct option. Common logical operators include AND, OR, NOT, and ==> (logically implies). 3 | 4 | [Context]: 5 | Jupiter is a gas giant planet and the largest planet in the solar system. Its mass is 2.5 times the total mass of the other seven planets in the solar system. Observations have found that most of the more than 70 moons surrounding Jupiter are composed of water ice. Therefore, Jupiter's atmosphere should contain a considerable amount of water. 6 | 7 | [Question]: 8 | Which of the followings, if true, can best support the above statement? 9 | 10 | [Options]: 11 | A. After hundreds of millions of years, the satellite may slowly fall onto the planet. 12 | B. Many of the water in interstellar space exists in gaseous form. 13 | C. Uranus is also a gas giant planet, and it has been confirmed that it contains a lot of water ice. 14 | D. The satellite and the planets around it were formed from the same gas and dust at the same time. 15 | 16 | Here are the logic forms for context, question and options: 17 | 18 | [Context] 19 | 1. isGasGiant(Jupiter) AND isLargestInSolarSystem(Jupiter) 20 | 2. mass(Jupiter) = 2.5 * sumOfMass(otherSevenPlanetsInSolarSystem) 21 | 3. composedOfWaterIce(surroundingMoons(Jupiter)) > 70 22 | 4. containsConsiderableWater(atmosphere(Jupiter)) 23 | 24 | [Question] 25 | Which of the followings, if true, can best support the statement Context-4? 26 | 27 | [Options] 28 | A. fallOntoPlanet(satellite, planet) AND afterHundredsOfMillionsOfYears() 29 | B. existsInGaseousForm(water, interstellarSpace) 30 | C. isGasGiant(Uranus) AND containsLotsOfWaterIce(Uranus) 31 | D. formedFromSameGasAndDust(satellite, planet) AND atSameTime(satellite, planet) 32 | 33 | 34 | 35 | -------------------------------------------------------------------------------- /conf/api/vllm/llama2-7b/logiqav2_qa_dev_react_dpo_v2_0_0shot.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - hydra: default 3 | - _self_ 4 | 5 | hydra: 6 | searchpath: 7 | - file://conf/ 8 | 9 | train_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/train.txt 10 | dev_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/dev.txt 11 | test_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/dev.txt 12 | 13 | num_shot: 5 14 | 15 | output_file: experiments/llama2.7b.chat.logiqav2.dpo.A100.w2.v2.1/checkpoint-1600/logiqav2-dev.full.qa.react.0shot.v1.0.json 16 | flush_file: ${output_file}l 17 | 18 | # Data loading 19 | read_tensor: 20 | _target_: data.logiqav2.ComposePromptGenerator 21 | read_func: 22 | _target_: data.logiqav2.LogicQAReader 23 | flat_options: True 24 | template_id: 8 25 | instruction: 26 | _target_: data.prompts.logiqav2.react.prompts.get_prompt 27 | prompt_name: react_v2 28 | # few_shot_prompt: 29 | # _target_: data.logiqav2.read_single_file 30 | # file_path: data/prompts/logiqav2/react/train_4554.txt 31 | compose_keys: [ "context", "question", "option_list" ] 32 | max_data_num: -1 33 | api_based: False 34 | service_based: True 35 | service_processor: 36 | _target_: data.vllm.VLLMRequestGenerator 37 | api_url: http://0.0.0.0:8000/v1/completions 38 | max_tokens: 2048 39 | model: llama-2-7b-dpo-v2.1-cp1600 40 | stop: [ "", "\n\n\n\n" ] 41 | 42 | # Dataloader 43 | num_workers: 96 44 | prefetch_factor: 2 45 | 46 | output_dir: 47 | 48 | post_process: 49 | _target_: post_processors.openai_api_callback.OpenAICallBack 50 | output_file: ${output_file} 51 | answer_clean: 52 | _target_: post_processors.openai_api_callback.ReActSeparatorClean 53 | # prompt: "few-shot" 54 | # separator: "Finish" 55 | # separate_idx: 1 56 | 57 | 58 | # Training hyper-parameters 59 | per_gpu_train_batch_size: 1 60 | per_gpu_eval_batch_size: 1 61 | 62 | ddp_eval: False 63 | no_cuda: False 64 | seed: 42 65 | local_rank: -1 66 | 67 | # Temporary variables 68 | n_gpu: 1 69 | device: 70 | train_batch_size: 71 | eval_batch_size: 72 | world_size: 73 | -------------------------------------------------------------------------------- /conf/api/vllm/llama2-7b/logiqav2_qa_dev_react_step_dpo_v2_0.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - hydra: default 3 | - _self_ 4 | 5 | hydra: 6 | searchpath: 7 | - file://conf/ 8 | 9 | train_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/train.txt 10 | dev_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/dev.txt 11 | test_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/dev.txt 12 | 13 | num_shot: 5 14 | 15 | output_file: experiments/llama2.7b.chat.logiqav2.step.dpo.A100.w2.v2.1/checkpoint-1600/logiqav2-dev.full.qa.react.v1.0.json 16 | flush_file: ${output_file}l 17 | 18 | # Data loading 19 | read_tensor: 20 | _target_: data.logiqav2.ComposePromptGenerator 21 | read_func: 22 | _target_: data.logiqav2.LogicQAReader 23 | flat_options: True 24 | template_id: 8 25 | instruction: 26 | _target_: data.prompts.logiqav2.react.prompts.get_prompt 27 | prompt_name: react_v2 28 | few_shot_prompt: 29 | _target_: data.logiqav2.read_single_file 30 | file_path: data/prompts/logiqav2/react/train_4554.txt 31 | compose_keys: [ "context", "question", "option_list" ] 32 | max_data_num: -1 33 | api_based: False 34 | service_based: True 35 | service_processor: 36 | _target_: data.vllm.VLLMRequestGenerator 37 | api_url: http://0.0.0.0:6000/v1/completions 38 | max_tokens: 2048 39 | model: llama-2-7b-step-dpo-v2.1-cp1600 40 | stop: [ "", "\n\n\n\n" ] 41 | 42 | # Dataloader 43 | num_workers: 96 44 | prefetch_factor: 2 45 | 46 | output_dir: 47 | 48 | post_process: 49 | _target_: post_processors.openai_api_callback.OpenAICallBack 50 | output_file: ${output_file} 51 | answer_clean: 52 | _target_: post_processors.openai_api_callback.ReActSeparatorClean 53 | # prompt: "few-shot" 54 | # separator: "Finish" 55 | # separate_idx: 1 56 | 57 | 58 | # Training hyper-parameters 59 | per_gpu_train_batch_size: 1 60 | per_gpu_eval_batch_size: 1 61 | 62 | ddp_eval: False 63 | no_cuda: False 64 | seed: 42 65 | local_rank: -1 66 | 67 | # Temporary variables 68 | n_gpu: 1 69 | device: 70 | train_batch_size: 71 | eval_batch_size: 72 | world_size: 73 | -------------------------------------------------------------------------------- /conf/api/vllm/llama2-7b/logiqav2_qa_dev_react_step_dpo_v5_0.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - hydra: default 3 | - _self_ 4 | 5 | hydra: 6 | searchpath: 7 | - file://conf/ 8 | 9 | train_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/train.txt 10 | dev_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/dev.txt 11 | test_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/dev.txt 12 | 13 | step: 1600 14 | 15 | output_file: experiments/llama2.7b.chat.logiqav2.step.dpo.H100.w4.v5.0/checkpoint-${step}/logiqav2-dev.full.qa.react.v1.0.json 16 | flush_file: ${output_file}l 17 | 18 | # Data loading 19 | read_tensor: 20 | _target_: data.logiqav2.ComposePromptGenerator 21 | read_func: 22 | _target_: data.logiqav2.LogicQAReader 23 | flat_options: True 24 | template_id: 8 25 | instruction: 26 | _target_: data.prompts.logiqav2.react.prompts.get_prompt 27 | prompt_name: react_v2 28 | few_shot_prompt: 29 | _target_: data.logiqav2.read_single_file 30 | file_path: data/prompts/logiqav2/react/train_4554.txt 31 | compose_keys: [ "context", "question", "option_list" ] 32 | max_data_num: -1 33 | api_based: False 34 | service_based: True 35 | service_processor: 36 | _target_: data.vllm.VLLMRequestGenerator 37 | api_url: http://0.0.0.0:6000/v1/completions 38 | max_tokens: 2048 39 | model: llama-2-7b-step-dpo-v5.0-cp${step} 40 | stop: [ "", "\n\n\n\n" ] 41 | 42 | # Dataloader 43 | num_workers: 48 44 | prefetch_factor: 2 45 | 46 | output_dir: 47 | 48 | post_process: 49 | _target_: post_processors.openai_api_callback.OpenAICallBack 50 | output_file: ${output_file} 51 | answer_clean: 52 | _target_: post_processors.openai_api_callback.ReActSeparatorClean 53 | # prompt: "few-shot" 54 | # separator: "Finish" 55 | # separate_idx: 1 56 | 57 | 58 | # Training hyper-parameters 59 | per_gpu_train_batch_size: 1 60 | per_gpu_eval_batch_size: 1 61 | 62 | ddp_eval: False 63 | no_cuda: False 64 | seed: 42 65 | local_rank: -1 66 | 67 | # Temporary variables 68 | n_gpu: 1 69 | device: 70 | train_batch_size: 71 | eval_batch_size: 72 | world_size: 73 | -------------------------------------------------------------------------------- /conf/api/vllm/llama2-7b/logiqav2_qa_dev_react_step_dpo_v2_0_0shot.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - hydra: default 3 | - _self_ 4 | 5 | hydra: 6 | searchpath: 7 | - file://conf/ 8 | 9 | train_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/train.txt 10 | dev_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/dev.txt 11 | test_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/dev.txt 12 | 13 | num_shot: 5 14 | 15 | output_file: experiments/llama2.7b.chat.logiqav2.step.dpo.A100.w2.v2.1/checkpoint-1600/logiqav2-dev.full.qa.react.0shot.v1.0.json 16 | flush_file: ${output_file}l 17 | 18 | # Data loading 19 | read_tensor: 20 | _target_: data.logiqav2.ComposePromptGenerator 21 | read_func: 22 | _target_: data.logiqav2.LogicQAReader 23 | flat_options: True 24 | template_id: 8 25 | instruction: 26 | _target_: data.prompts.logiqav2.react.prompts.get_prompt 27 | prompt_name: react_v2 28 | # few_shot_prompt: 29 | # _target_: data.logiqav2.read_single_file 30 | # file_path: data/prompts/logiqav2/react/train_4554.txt 31 | compose_keys: [ "context", "question", "option_list" ] 32 | max_data_num: -1 33 | api_based: False 34 | service_based: True 35 | service_processor: 36 | _target_: data.vllm.VLLMRequestGenerator 37 | api_url: http://0.0.0.0:8000/v1/completions 38 | max_tokens: 2048 39 | model: llama-2-7b-step-dpo-v2.1-cp1600 40 | stop: [ "", "\n\n\n\n" ] 41 | 42 | # Dataloader 43 | num_workers: 96 44 | prefetch_factor: 2 45 | 46 | output_dir: 47 | 48 | post_process: 49 | _target_: post_processors.openai_api_callback.OpenAICallBack 50 | output_file: ${output_file} 51 | answer_clean: 52 | _target_: post_processors.openai_api_callback.ReActSeparatorClean 53 | # prompt: "few-shot" 54 | # separator: "Finish" 55 | # separate_idx: 1 56 | 57 | 58 | # Training hyper-parameters 59 | per_gpu_train_batch_size: 1 60 | per_gpu_eval_batch_size: 1 61 | 62 | ddp_eval: False 63 | no_cuda: False 64 | seed: 42 65 | local_rank: -1 66 | 67 | # Temporary variables 68 | n_gpu: 1 69 | device: 70 | train_batch_size: 71 | eval_batch_size: 72 | world_size: 73 | -------------------------------------------------------------------------------- /conf/api/vllm/llama2-7b/logiqav2_qa_dev_react_step_dpo_v5_1_0shot.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - hydra: default 3 | - _self_ 4 | 5 | hydra: 6 | searchpath: 7 | - file://conf/ 8 | 9 | train_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/train.txt 10 | dev_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/dev.txt 11 | test_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/dev.txt 12 | 13 | step: 800 14 | 15 | output_file: experiments/llama2.7b.chat.logiqav2.step.dpo.H100.w4.v5.1/checkpoint-${step}/logiqav2-dev.full.qa.react.v1.0.0shot.json 16 | flush_file: ${output_file}l 17 | 18 | # Data loading 19 | read_tensor: 20 | _target_: data.logiqav2.ComposePromptGenerator 21 | read_func: 22 | _target_: data.logiqav2.LogicQAReader 23 | flat_options: True 24 | template_id: 8 25 | instruction: 26 | _target_: data.prompts.logiqav2.react.prompts.get_prompt 27 | prompt_name: react_v2 28 | # few_shot_prompt: 29 | # _target_: data.logiqav2.read_single_file 30 | # file_path: data/prompts/logiqav2/react/train_4554.txt 31 | compose_keys: [ "context", "question", "option_list" ] 32 | max_data_num: -1 33 | api_based: False 34 | service_based: True 35 | service_processor: 36 | _target_: data.vllm.VLLMRequestGenerator 37 | api_url: http://0.0.0.0:6000/v1/completions 38 | max_tokens: 2048 39 | model: llama-2-7b-step-dpo-v5.1-cp${step} 40 | stop: [ "", "\n\n\n\n" ] 41 | 42 | # Dataloader 43 | num_workers: 48 44 | prefetch_factor: 2 45 | 46 | output_dir: 47 | 48 | post_process: 49 | _target_: post_processors.openai_api_callback.OpenAICallBack 50 | output_file: ${output_file} 51 | answer_clean: 52 | _target_: post_processors.openai_api_callback.ReActSeparatorClean 53 | # prompt: "few-shot" 54 | # separator: "Finish" 55 | # separate_idx: 1 56 | 57 | 58 | # Training hyper-parameters 59 | per_gpu_train_batch_size: 1 60 | per_gpu_eval_batch_size: 1 61 | 62 | ddp_eval: False 63 | no_cuda: False 64 | seed: 42 65 | local_rank: -1 66 | 67 | # Temporary variables 68 | n_gpu: 1 69 | device: 70 | train_batch_size: 71 | eval_batch_size: 72 | world_size: 73 | -------------------------------------------------------------------------------- /conf/api/vllm/llama2-7b/logiqav2_qa_dev_react_step_dpo_v5_0_0shot.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - hydra: default 3 | - _self_ 4 | 5 | hydra: 6 | searchpath: 7 | - file://conf/ 8 | 9 | train_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/train.txt 10 | dev_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/dev.txt 11 | test_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/dev.txt 12 | 13 | step: 1600 14 | 15 | output_file: experiments/llama2.7b.chat.logiqav2.step.dpo.H100.w4.v5.0/checkpoint-${step}/logiqav2-dev.full.qa.react.v1.0.0shot.json 16 | flush_file: ${output_file}l 17 | 18 | # Data loading 19 | read_tensor: 20 | _target_: data.logiqav2.ComposePromptGenerator 21 | read_func: 22 | _target_: data.logiqav2.LogicQAReader 23 | flat_options: True 24 | template_id: 8 25 | instruction: 26 | _target_: data.prompts.logiqav2.react.prompts.get_prompt 27 | prompt_name: react_v2 28 | # few_shot_prompt: 29 | # _target_: data.logiqav2.read_single_file 30 | # file_path: data/prompts/logiqav2/react/train_4554.txt 31 | compose_keys: [ "context", "question", "option_list" ] 32 | max_data_num: -1 33 | api_based: False 34 | service_based: True 35 | service_processor: 36 | _target_: data.vllm.VLLMRequestGenerator 37 | api_url: http://0.0.0.0:6000/v1/completions 38 | max_tokens: 2048 39 | model: llama-2-7b-step-dpo-v5.0-cp${step} 40 | stop: [ "", "\n\n\n\n" ] 41 | 42 | # Dataloader 43 | num_workers: 48 44 | prefetch_factor: 2 45 | 46 | output_dir: 47 | 48 | post_process: 49 | _target_: post_processors.openai_api_callback.OpenAICallBack 50 | output_file: ${output_file} 51 | answer_clean: 52 | _target_: post_processors.openai_api_callback.ReActSeparatorClean 53 | # prompt: "few-shot" 54 | # separator: "Finish" 55 | # separate_idx: 1 56 | 57 | 58 | # Training hyper-parameters 59 | per_gpu_train_batch_size: 1 60 | per_gpu_eval_batch_size: 1 61 | 62 | ddp_eval: False 63 | no_cuda: False 64 | seed: 42 65 | local_rank: -1 66 | 67 | # Temporary variables 68 | n_gpu: 1 69 | device: 70 | train_batch_size: 71 | eval_batch_size: 72 | world_size: 73 | -------------------------------------------------------------------------------- /conf/api/vllm/llama2-7b/logiqav2_qa_dev_react_step_dpo_v5_0_fix_1shot.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - hydra: default 3 | - _self_ 4 | 5 | hydra: 6 | searchpath: 7 | - file://conf/ 8 | 9 | train_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/train.txt 10 | dev_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/dev.txt 11 | test_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/dev.txt 12 | 13 | step: 800 14 | 15 | output_file: experiments/llama2.7b.chat.logiqav2.step.dpo.H100.w4.v5.0.fix/checkpoint-${step}/logiqav2-dev.full.qa.react.v1.0.1shot.json 16 | flush_file: ${output_file}l 17 | 18 | # Data loading 19 | read_tensor: 20 | _target_: data.logiqav2.ComposePromptGenerator 21 | read_func: 22 | _target_: data.logiqav2.LogicQAReader 23 | flat_options: True 24 | template_id: 8 25 | instruction: 26 | _target_: data.prompts.logiqav2.react.prompts.get_prompt 27 | prompt_name: react_v2 28 | few_shot_prompt: 29 | _target_: data.logiqav2.read_single_file 30 | file_path: data/prompts/logiqav2/react/train_4554.txt 31 | compose_keys: [ "context", "question", "option_list" ] 32 | max_data_num: -1 33 | api_based: False 34 | service_based: True 35 | service_processor: 36 | _target_: data.vllm.VLLMRequestGenerator 37 | api_url: http://0.0.0.0:6000/v1/completions 38 | max_tokens: 2048 39 | model: llama-2-7b-step-dpo-v5.0-fix-cp${step} 40 | stop: [ "", "\n\n\n\n" ] 41 | 42 | # Dataloader 43 | num_workers: 48 44 | prefetch_factor: 2 45 | 46 | output_dir: 47 | 48 | post_process: 49 | _target_: post_processors.openai_api_callback.OpenAICallBack 50 | output_file: ${output_file} 51 | answer_clean: 52 | _target_: post_processors.openai_api_callback.ReActSeparatorClean 53 | # prompt: "few-shot" 54 | # separator: "Finish" 55 | # separate_idx: 1 56 | 57 | 58 | # Training hyper-parameters 59 | per_gpu_train_batch_size: 1 60 | per_gpu_eval_batch_size: 1 61 | 62 | ddp_eval: False 63 | no_cuda: False 64 | seed: 42 65 | local_rank: -1 66 | 67 | # Temporary variables 68 | n_gpu: 1 69 | device: 70 | train_batch_size: 71 | eval_batch_size: 72 | world_size: 73 | -------------------------------------------------------------------------------- /conf/api/vllm/llama2-7b/logiqav2_qa_dev_react_step_dpo_v5_0_fix_0shot.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - hydra: default 3 | - _self_ 4 | 5 | hydra: 6 | searchpath: 7 | - file://conf/ 8 | 9 | train_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/train.txt 10 | dev_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/dev.txt 11 | test_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/dev.txt 12 | 13 | step: 800 14 | 15 | output_file: experiments/llama2.7b.chat.logiqav2.step.dpo.H100.w4.v5.0.fix/checkpoint-${step}/logiqav2-dev.full.qa.react.v1.0.0shot.json 16 | flush_file: ${output_file}l 17 | 18 | # Data loading 19 | read_tensor: 20 | _target_: data.logiqav2.ComposePromptGenerator 21 | read_func: 22 | _target_: data.logiqav2.LogicQAReader 23 | flat_options: True 24 | template_id: 8 25 | instruction: 26 | _target_: data.prompts.logiqav2.react.prompts.get_prompt 27 | prompt_name: react_v2 28 | # few_shot_prompt: 29 | # _target_: data.logiqav2.read_single_file 30 | # file_path: data/prompts/logiqav2/react/train_4554.txt 31 | compose_keys: [ "context", "question", "option_list" ] 32 | max_data_num: -1 33 | api_based: False 34 | service_based: True 35 | service_processor: 36 | _target_: data.vllm.VLLMRequestGenerator 37 | api_url: http://0.0.0.0:6000/v1/completions 38 | max_tokens: 2048 39 | model: llama-2-7b-step-dpo-v5.0-fix-cp${step} 40 | stop: [ "", "\n\n\n\n" ] 41 | 42 | # Dataloader 43 | num_workers: 48 44 | prefetch_factor: 2 45 | 46 | output_dir: 47 | 48 | post_process: 49 | _target_: post_processors.openai_api_callback.OpenAICallBack 50 | output_file: ${output_file} 51 | answer_clean: 52 | _target_: post_processors.openai_api_callback.ReActSeparatorClean 53 | # prompt: "few-shot" 54 | # separator: "Finish" 55 | # separate_idx: 1 56 | 57 | 58 | # Training hyper-parameters 59 | per_gpu_train_batch_size: 1 60 | per_gpu_eval_batch_size: 1 61 | 62 | ddp_eval: False 63 | no_cuda: False 64 | seed: 42 65 | local_rank: -1 66 | 67 | # Temporary variables 68 | n_gpu: 1 69 | device: 70 | train_batch_size: 71 | eval_batch_size: 72 | world_size: 73 | -------------------------------------------------------------------------------- /conf/api/vllm/llama2-7b/logiqav2_qa_sft70bdistil_dev_react_v1_0_0shot.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - hydra: default 3 | - _self_ 4 | 5 | hydra: 6 | searchpath: 7 | - file://conf/ 8 | 9 | train_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/train.txt 10 | dev_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/dev.txt 11 | test_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/dev.txt 12 | 13 | step: 1600 14 | 15 | output_file: experiments/llama2.7b.chat.logiqav2.llama-2-70b-chat.dpo-sft.A6K.w4.v1.0/checkpoint-${step}/logiqav2-dev.full.qa.react.v1.0.0shot.json 16 | flush_file: ${output_file}l 17 | 18 | # Data loading 19 | read_tensor: 20 | _target_: data.logiqav2.ComposePromptGenerator 21 | read_func: 22 | _target_: data.logiqav2.LogicQAReader 23 | flat_options: True 24 | template_id: 8 25 | instruction: 26 | _target_: data.prompts.logiqav2.react.prompts.get_prompt 27 | prompt_name: react_v2 28 | # few_shot_prompt: 29 | # _target_: data.logiqav2.read_single_file 30 | # file_path: data/prompts/logiqav2/react/train_4554.txt 31 | compose_keys: [ "context", "question", "option_list" ] 32 | max_data_num: -1 33 | api_based: False 34 | service_based: True 35 | service_processor: 36 | _target_: data.vllm.VLLMRequestGenerator 37 | api_url: http://0.0.0.0:6000/v1/completions 38 | max_tokens: 2048 39 | model: llama-2-7b-sft70b-v1.0-cp1600 40 | stop: [ "", "\n\n\n\n" ] 41 | 42 | # Dataloader 43 | num_workers: 48 44 | prefetch_factor: 2 45 | 46 | output_dir: 47 | 48 | post_process: 49 | _target_: post_processors.openai_api_callback.OpenAICallBack 50 | output_file: ${output_file} 51 | answer_clean: 52 | _target_: post_processors.openai_api_callback.ReActSeparatorClean 53 | # prompt: "few-shot" 54 | # separator: "Finish" 55 | # separate_idx: 1 56 | 57 | 58 | # Training hyper-parameters 59 | per_gpu_train_batch_size: 1 60 | per_gpu_eval_batch_size: 1 61 | 62 | ddp_eval: False 63 | no_cuda: False 64 | seed: 42 65 | local_rank: -1 66 | 67 | # Temporary variables 68 | n_gpu: 1 69 | device: 70 | train_batch_size: 71 | eval_batch_size: 72 | world_size: 73 | -------------------------------------------------------------------------------- /conf/api/vllm/llama2-7b/logiqav2_qa_sft70bdistil_dev_react_v1_0_1shot.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - hydra: default 3 | - _self_ 4 | 5 | hydra: 6 | searchpath: 7 | - file://conf/ 8 | 9 | train_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/train.txt 10 | dev_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/dev.txt 11 | test_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/dev.txt 12 | 13 | step: 1600 14 | 15 | output_file: experiments/llama2.7b.chat.logiqav2.llama-2-70b-chat.dpo-sft.A6K.w4.v1.0/checkpoint-${step}/logiqav2-dev.full.qa.react.v1.0.1shot.json 16 | flush_file: ${output_file}l 17 | 18 | # Data loading 19 | read_tensor: 20 | _target_: data.logiqav2.ComposePromptGenerator 21 | read_func: 22 | _target_: data.logiqav2.LogicQAReader 23 | flat_options: True 24 | template_id: 8 25 | instruction: 26 | _target_: data.prompts.logiqav2.react.prompts.get_prompt 27 | prompt_name: react_v2 28 | few_shot_prompt: 29 | _target_: data.logiqav2.read_single_file 30 | file_path: data/prompts/logiqav2/react/train_4554.txt 31 | compose_keys: [ "context", "question", "option_list" ] 32 | max_data_num: -1 33 | api_based: False 34 | service_based: True 35 | service_processor: 36 | _target_: data.vllm.VLLMRequestGenerator 37 | api_url: http://0.0.0.0:6000/v1/completions 38 | max_tokens: 2048 39 | model: llama-2-7b-sft70b-v1.0-cp1600 40 | stop: [ "", "\n\n\n\n" ] 41 | 42 | # Dataloader 43 | num_workers: 48 44 | prefetch_factor: 2 45 | 46 | output_dir: 47 | 48 | post_process: 49 | _target_: post_processors.openai_api_callback.OpenAICallBack 50 | output_file: ${output_file} 51 | answer_clean: 52 | _target_: post_processors.openai_api_callback.ReActSeparatorClean 53 | # prompt: "few-shot" 54 | # separator: "Finish" 55 | # separate_idx: 1 56 | 57 | 58 | # Training hyper-parameters 59 | per_gpu_train_batch_size: 1 60 | per_gpu_eval_batch_size: 1 61 | 62 | ddp_eval: False 63 | no_cuda: False 64 | seed: 42 65 | local_rank: -1 66 | 67 | # Temporary variables 68 | n_gpu: 1 69 | device: 70 | train_batch_size: 71 | eval_batch_size: 72 | world_size: 73 | -------------------------------------------------------------------------------- /conf/api/vllm/llama2-7b/logiqav2_qa_dev_react_dpo_v4_1.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - hydra: default 3 | - _self_ 4 | 5 | hydra: 6 | searchpath: 7 | - file://conf/ 8 | 9 | train_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/train.txt 10 | dev_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/dev.txt 11 | test_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/dev.txt 12 | 13 | step: 800 14 | 15 | output_file: experiments/llama2.7b.chat.logiqav2.dpo.A100.w4.v4.1/checkpoint-${step}/logiqav2-dev.full.qa.react.v1.0.json 16 | flush_file: ${output_file}l 17 | 18 | # Data loading 19 | read_tensor: 20 | _target_: data.logiqav2.ComposePromptGenerator 21 | read_func: 22 | _target_: data.logiqav2.LogicQAReader 23 | flat_options: True 24 | template_id: 8 25 | instruction: 26 | _target_: data.prompts.logiqav2.react.prompts.get_prompt 27 | prompt_name: react_v2 28 | few_shot_prompt: 29 | _target_: data.logiqav2.read_single_file 30 | file_path: data/prompts/logiqav2/react/train_4554.txt 31 | compose_keys: [ "context", "question", "option_list" ] 32 | max_data_num: -1 33 | api_based: False 34 | service_based: True 35 | service_processor: 36 | _target_: data.vllm.VLLMRequestGenerator 37 | # api_url: http://0.0.0.0:8000/v1/completions 38 | api_url: http://0.0.0.0:6000/v1/completions 39 | max_tokens: 2048 40 | model: llama-2-7b-dpo-v4.1-cp${step} 41 | stop: [ "", "\n\n\n\n" ] 42 | 43 | # Dataloader 44 | num_workers: 48 45 | prefetch_factor: 2 46 | 47 | output_dir: 48 | 49 | post_process: 50 | _target_: post_processors.openai_api_callback.OpenAICallBack 51 | output_file: ${output_file} 52 | answer_clean: 53 | _target_: post_processors.openai_api_callback.ReActSeparatorClean 54 | # prompt: "few-shot" 55 | # separator: "Finish" 56 | # separate_idx: 1 57 | 58 | 59 | # Training hyper-parameters 60 | per_gpu_train_batch_size: 1 61 | per_gpu_eval_batch_size: 1 62 | 63 | ddp_eval: False 64 | no_cuda: False 65 | seed: 42 66 | local_rank: -1 67 | 68 | # Temporary variables 69 | n_gpu: 1 70 | device: 71 | train_batch_size: 72 | eval_batch_size: 73 | world_size: 74 | -------------------------------------------------------------------------------- /scripts/explore_from_inter/run_llama_sft_v2.0.sh: -------------------------------------------------------------------------------- 1 | data_dir=experiments/llama2.7b.chat.logiqav2.llama-2-70b-chat.dpo-sft.A6K.w4.v1.0/checkpoint-1600 2 | #data_dir=experiments/llama2.7b.chat.logiqav2.70b-distil.dpo.H100.w4.v1.0/checkpoint-1600 3 | 4 | #ratio_s=0.2 5 | #ratio=0.3 6 | ratio_s=0.4 7 | ratio=0.2 8 | 9 | #python scripts/sample_react_inter_states_v2.0.py --input_file $data_dir/logiqav2-train.full.qa.react.v1.0.0shot.sample10.json \ 10 | # --output_file $data_dir/react-inter-states/logiqav2-train.full.qa.react.v1.0.0shot.sample10.clean_inter_ver2.0.rs0.2.r0.6.json \ 11 | # --split_num 20 --ratio_s 0.2 --ratio 0.6 12 | 13 | #python scripts/sample_react_inter_states_v2.0.py --input_file $data_dir/logiqav2-train.full.qa.react.v1.0.0shot.sample10.json \ 14 | # --output_file $data_dir/react-inter-states/logiqav2-train.full.qa.react.v1.0.0shot.sample10.clean_inter_ver2.0.rs${ratio_s}.r${ratio}.json \ 15 | # --split_num 20 --ratio_s ${ratio_s} --ratio ${ratio} 16 | 17 | #python scripts/sample_react_inter_states_v2.0.py --input_file $data_dir/logiqav2-train.react.sample5.v1.0.0shot.json \ 18 | # --output_file $data_dir/react-inter-states/logiqav2-train.react.v1.0.0shot.sample5.clean_inter_ver2.0.rs${ratio_s}.r${ratio}.json \ 19 | # --split_num 10 --ratio_s ${ratio_s} --ratio ${ratio} 20 | 21 | #python scripts/sample_react_inter_states_v2.1.py --input_file $data_dir/logiqav2-train.full.qa.react.v1.0.0shot.sample10.json \ 22 | # --output_file $data_dir/react-inter-states/logiqav2-train.react.v1.0.0shot.sample10.clean_inter_ver2.1.rs${ratio_s}.r${ratio}.json \ 23 | # --split_num 4 --ratio_s ${ratio_s} --ratio ${ratio} 24 | 25 | 26 | 27 | # ================================= ReClor 28 | data_dir="experiments/llama2.7b.chat.mixtral.dpo-sft.A100.40.w8.v1.0/checkpoint-1200" 29 | ratio_s=0.2 30 | ratio=0.3 31 | python scripts/sample_react_inter_states_v2.0.py --input_file $data_dir/reclor.react.train.0shot.sample10.v1.0.json \ 32 | --output_file $data_dir/react-inter-states/reclor.train.react.v1.0.0shot.sample10.clean_inter_ver2.0.rs${ratio_s}.r${ratio}.json \ 33 | --split_num 1 --ratio_s ${ratio_s} --ratio ${ratio} 34 | 35 | 36 | -------------------------------------------------------------------------------- /general_util/logger.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import os 3 | import sys 4 | from torch import distributed as dist 5 | 6 | _root_name = 'FK' 7 | 8 | 9 | def get_child_logger(child_name): 10 | # _local_rank = getattr(os.environ, "LOCAL_RANK", "") 11 | # 12 | # if _root_name == "FK" and _local_rank: 13 | # return logging.getLogger(_root_name + '.' + _local_rank + '.' + child_name) 14 | 15 | return logging.getLogger(_root_name + '.' + child_name) 16 | 17 | 18 | def setting_logger(log_file: str, local_rank: int = -1): 19 | logging.basicConfig(format='%(asctime)s - %(levelname)s - %(name)s - %(message)s', 20 | datefmt='%m/%d/%Y %H:%M:%S', 21 | level=logging.INFO if local_rank in [-1, 0] else logging.WARNING) 22 | 23 | # global _root_name 24 | # if local_rank != -1 and _root_name == "FK": 25 | # _root_name = _root_name + '.' + str(local_rank) 26 | logger = logging.getLogger(_root_name) 27 | logger.setLevel(logging.INFO if local_rank in [-1, 0] else logging.WARNING) 28 | 29 | rf_handler = logging.StreamHandler(sys.stderr) 30 | rf_handler.setLevel(logging.INFO) 31 | rf_handler.setFormatter(logging.Formatter(fmt='%(asctime)s - %(levelname)s - %(name)s - %(message)s', 32 | datefmt='%m/%d/%Y %H:%M:%S')) 33 | 34 | output_dir = './log_dir' 35 | if local_rank not in [-1, 0]: 36 | dist.barrier() 37 | 38 | if not os.path.exists(output_dir): 39 | os.makedirs(output_dir) 40 | 41 | if local_rank == 0: 42 | dist.barrier() 43 | 44 | if log_file: 45 | model_name = "-".join(log_file.replace('/', ' ').split()[1:]) 46 | f_handler = logging.FileHandler(os.path.join( 47 | output_dir, model_name + '-output.log')) 48 | f_handler.setLevel(logging.INFO) 49 | f_handler.setFormatter(logging.Formatter(fmt="%(asctime)s - %(levelname)s - %(name)s - %(message)s", 50 | datefmt='%m/%d/%Y %H:%M:%S')) 51 | 52 | logger.addHandler(f_handler) 53 | 54 | return logger 55 | -------------------------------------------------------------------------------- /conf/api/vllm/llama2-7b/logiqav2_qa_dev_react_dpo_v4_1_0shot.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - hydra: default 3 | - _self_ 4 | 5 | hydra: 6 | searchpath: 7 | - file://conf/ 8 | 9 | train_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/train.txt 10 | dev_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/dev.txt 11 | test_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/dev.txt 12 | 13 | step: 800 14 | 15 | output_file: experiments/llama2.7b.chat.logiqav2.dpo.A100.w4.v4.1/checkpoint-${step}/logiqav2-dev.full.qa.react.v1.0.0shot.json 16 | flush_file: ${output_file}l 17 | 18 | # Data loading 19 | read_tensor: 20 | _target_: data.logiqav2.ComposePromptGenerator 21 | read_func: 22 | _target_: data.logiqav2.LogicQAReader 23 | flat_options: True 24 | template_id: 8 25 | instruction: 26 | _target_: data.prompts.logiqav2.react.prompts.get_prompt 27 | prompt_name: react_v2 28 | # few_shot_prompt: 29 | # _target_: data.logiqav2.read_single_file 30 | # file_path: data/prompts/logiqav2/react/train_4554.txt 31 | compose_keys: [ "context", "question", "option_list" ] 32 | max_data_num: -1 33 | api_based: False 34 | service_based: True 35 | service_processor: 36 | _target_: data.vllm.VLLMRequestGenerator 37 | # api_url: http://0.0.0.0:8000/v1/completions 38 | api_url: http://0.0.0.0:6000/v1/completions 39 | max_tokens: 2048 40 | model: llama-2-7b-dpo-v4.1-cp${step} 41 | stop: [ "", "\n\n\n\n" ] 42 | 43 | # Dataloader 44 | num_workers: 48 45 | prefetch_factor: 2 46 | 47 | output_dir: 48 | 49 | post_process: 50 | _target_: post_processors.openai_api_callback.OpenAICallBack 51 | output_file: ${output_file} 52 | answer_clean: 53 | _target_: post_processors.openai_api_callback.ReActSeparatorClean 54 | # prompt: "few-shot" 55 | # separator: "Finish" 56 | # separate_idx: 1 57 | 58 | 59 | # Training hyper-parameters 60 | per_gpu_train_batch_size: 1 61 | per_gpu_eval_batch_size: 1 62 | 63 | ddp_eval: False 64 | no_cuda: False 65 | seed: 42 66 | local_rank: -1 67 | 68 | # Temporary variables 69 | n_gpu: 1 70 | device: 71 | train_batch_size: 72 | eval_batch_size: 73 | world_size: 74 | -------------------------------------------------------------------------------- /conf/api/vllm/llama2-7b/logiqav2_qa_dev_react_step_dpo_v3_0.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - hydra: default 3 | - _self_ 4 | 5 | hydra: 6 | searchpath: 7 | - file://conf/ 8 | 9 | train_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/train.txt 10 | dev_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/dev.txt 11 | test_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/dev.txt 12 | 13 | step: 3200 14 | 15 | output_file: experiments/llama2.7b.chat.logiqav2.step.dpo.H100.w2.v3.0/checkpoint-${step}/logiqav2-dev.full.qa.react.v1.0.json 16 | flush_file: ${output_file}l 17 | 18 | # Data loading 19 | read_tensor: 20 | _target_: data.logiqav2.ComposePromptGenerator 21 | read_func: 22 | _target_: data.logiqav2.LogicQAReader 23 | flat_options: True 24 | template_id: 8 25 | instruction: 26 | _target_: data.prompts.logiqav2.react.prompts.get_prompt 27 | prompt_name: react_v2 28 | few_shot_prompt: 29 | _target_: data.logiqav2.read_single_file 30 | file_path: data/prompts/logiqav2/react/train_4554.txt 31 | compose_keys: [ "context", "question", "option_list" ] 32 | max_data_num: -1 33 | api_based: False 34 | service_based: True 35 | service_processor: 36 | _target_: data.vllm.VLLMRequestGenerator 37 | # api_url: http://0.0.0.0:8000/v1/completions 38 | api_url: http://0.0.0.0:6000/v1/completions 39 | max_tokens: 2048 40 | model: llama-2-7b-step-dpo-v3.0-cp${step} 41 | stop: [ "", "\n\n\n\n" ] 42 | 43 | # Dataloader 44 | num_workers: 48 45 | prefetch_factor: 2 46 | 47 | output_dir: 48 | 49 | post_process: 50 | _target_: post_processors.openai_api_callback.OpenAICallBack 51 | output_file: ${output_file} 52 | answer_clean: 53 | _target_: post_processors.openai_api_callback.ReActSeparatorClean 54 | # prompt: "few-shot" 55 | # separator: "Finish" 56 | # separate_idx: 1 57 | 58 | 59 | # Training hyper-parameters 60 | per_gpu_train_batch_size: 1 61 | per_gpu_eval_batch_size: 1 62 | 63 | ddp_eval: False 64 | no_cuda: False 65 | seed: 42 66 | local_rank: -1 67 | 68 | # Temporary variables 69 | n_gpu: 1 70 | device: 71 | train_batch_size: 72 | eval_batch_size: 73 | world_size: 74 | -------------------------------------------------------------------------------- /conf/api/vllm/llama2-7b/logiqav2_qa_dev_react_step_dpo_v4_0.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - hydra: default 3 | - _self_ 4 | 5 | hydra: 6 | searchpath: 7 | - file://conf/ 8 | 9 | train_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/train.txt 10 | dev_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/dev.txt 11 | test_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/dev.txt 12 | 13 | step: 1200 14 | 15 | output_file: experiments/llama2.7b.chat.logiqav2.step.dpo.A40.w4.v4.0/checkpoint-${step}/logiqav2-dev.full.qa.react.v1.0.json 16 | flush_file: ${output_file}l 17 | 18 | # Data loading 19 | read_tensor: 20 | _target_: data.logiqav2.ComposePromptGenerator 21 | read_func: 22 | _target_: data.logiqav2.LogicQAReader 23 | flat_options: True 24 | template_id: 8 25 | instruction: 26 | _target_: data.prompts.logiqav2.react.prompts.get_prompt 27 | prompt_name: react_v2 28 | few_shot_prompt: 29 | _target_: data.logiqav2.read_single_file 30 | file_path: data/prompts/logiqav2/react/train_4554.txt 31 | compose_keys: [ "context", "question", "option_list" ] 32 | max_data_num: -1 33 | api_based: False 34 | service_based: True 35 | service_processor: 36 | _target_: data.vllm.VLLMRequestGenerator 37 | # api_url: http://0.0.0.0:8000/v1/completions 38 | api_url: http://0.0.0.0:6000/v1/completions 39 | max_tokens: 2048 40 | model: llama-2-7b-step-dpo-v4.0-cp${step} 41 | stop: [ "", "\n\n\n\n" ] 42 | 43 | # Dataloader 44 | num_workers: 48 45 | prefetch_factor: 2 46 | 47 | output_dir: 48 | 49 | post_process: 50 | _target_: post_processors.openai_api_callback.OpenAICallBack 51 | output_file: ${output_file} 52 | answer_clean: 53 | _target_: post_processors.openai_api_callback.ReActSeparatorClean 54 | # prompt: "few-shot" 55 | # separator: "Finish" 56 | # separate_idx: 1 57 | 58 | 59 | # Training hyper-parameters 60 | per_gpu_train_batch_size: 1 61 | per_gpu_eval_batch_size: 1 62 | 63 | ddp_eval: False 64 | no_cuda: False 65 | seed: 42 66 | local_rank: -1 67 | 68 | # Temporary variables 69 | n_gpu: 1 70 | device: 71 | train_batch_size: 72 | eval_batch_size: 73 | world_size: 74 | -------------------------------------------------------------------------------- /conf/api/vllm/llama2-7b/logiqav2_qa_dev_react_step_dpo_v4_3.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - hydra: default 3 | - _self_ 4 | 5 | hydra: 6 | searchpath: 7 | - file://conf/ 8 | 9 | train_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/train.txt 10 | dev_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/dev.txt 11 | test_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/dev.txt 12 | 13 | step: 800 14 | 15 | output_file: experiments/llama2.7b.chat.logiqav2.step.dpo.H100.w4.v4.3/checkpoint-${step}/logiqav2-dev.full.qa.react.v1.0.json 16 | flush_file: ${output_file}l 17 | 18 | # Data loading 19 | read_tensor: 20 | _target_: data.logiqav2.ComposePromptGenerator 21 | read_func: 22 | _target_: data.logiqav2.LogicQAReader 23 | flat_options: True 24 | template_id: 8 25 | instruction: 26 | _target_: data.prompts.logiqav2.react.prompts.get_prompt 27 | prompt_name: react_v2 28 | few_shot_prompt: 29 | _target_: data.logiqav2.read_single_file 30 | file_path: data/prompts/logiqav2/react/train_4554.txt 31 | compose_keys: [ "context", "question", "option_list" ] 32 | max_data_num: -1 33 | api_based: False 34 | service_based: True 35 | service_processor: 36 | _target_: data.vllm.VLLMRequestGenerator 37 | # api_url: http://0.0.0.0:8000/v1/completions 38 | api_url: http://0.0.0.0:6000/v1/completions 39 | max_tokens: 2048 40 | model: llama-2-7b-step-dpo-v4.3-cp${step} 41 | stop: [ "", "\n\n\n\n" ] 42 | 43 | # Dataloader 44 | num_workers: 48 45 | prefetch_factor: 2 46 | 47 | output_dir: 48 | 49 | post_process: 50 | _target_: post_processors.openai_api_callback.OpenAICallBack 51 | output_file: ${output_file} 52 | answer_clean: 53 | _target_: post_processors.openai_api_callback.ReActSeparatorClean 54 | # prompt: "few-shot" 55 | # separator: "Finish" 56 | # separate_idx: 1 57 | 58 | 59 | # Training hyper-parameters 60 | per_gpu_train_batch_size: 1 61 | per_gpu_eval_batch_size: 1 62 | 63 | ddp_eval: False 64 | no_cuda: False 65 | seed: 42 66 | local_rank: -1 67 | 68 | # Temporary variables 69 | n_gpu: 1 70 | device: 71 | train_batch_size: 72 | eval_batch_size: 73 | world_size: 74 | -------------------------------------------------------------------------------- /conf/api/vllm/llama2-7b/logiqav2_qa_dev_react_step_dpo_v4_1_1.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - hydra: default 3 | - _self_ 4 | 5 | hydra: 6 | searchpath: 7 | - file://conf/ 8 | 9 | train_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/train.txt 10 | dev_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/dev.txt 11 | test_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/dev.txt 12 | 13 | step: 2000 14 | 15 | output_file: experiments/llama2.7b.chat.logiqav2.step.dpo.H100.w4.v4.1.1/checkpoint-${step}/logiqav2-dev.full.qa.react.v1.0.json 16 | flush_file: ${output_file}l 17 | 18 | # Data loading 19 | read_tensor: 20 | _target_: data.logiqav2.ComposePromptGenerator 21 | read_func: 22 | _target_: data.logiqav2.LogicQAReader 23 | flat_options: True 24 | template_id: 8 25 | instruction: 26 | _target_: data.prompts.logiqav2.react.prompts.get_prompt 27 | prompt_name: react_v2 28 | few_shot_prompt: 29 | _target_: data.logiqav2.read_single_file 30 | file_path: data/prompts/logiqav2/react/train_4554.txt 31 | compose_keys: [ "context", "question", "option_list" ] 32 | max_data_num: -1 33 | api_based: False 34 | service_based: True 35 | service_processor: 36 | _target_: data.vllm.VLLMRequestGenerator 37 | # api_url: http://0.0.0.0:8000/v1/completions 38 | api_url: http://0.0.0.0:6000/v1/completions 39 | max_tokens: 2048 40 | model: llama-2-7b-step-dpo-v4.1.1-cp${step} 41 | stop: [ "", "\n\n\n\n" ] 42 | 43 | # Dataloader 44 | num_workers: 48 45 | prefetch_factor: 2 46 | 47 | output_dir: 48 | 49 | post_process: 50 | _target_: post_processors.openai_api_callback.OpenAICallBack 51 | output_file: ${output_file} 52 | answer_clean: 53 | _target_: post_processors.openai_api_callback.ReActSeparatorClean 54 | # prompt: "few-shot" 55 | # separator: "Finish" 56 | # separate_idx: 1 57 | 58 | 59 | # Training hyper-parameters 60 | per_gpu_train_batch_size: 1 61 | per_gpu_eval_batch_size: 1 62 | 63 | ddp_eval: False 64 | no_cuda: False 65 | seed: 42 66 | local_rank: -1 67 | 68 | # Temporary variables 69 | n_gpu: 1 70 | device: 71 | train_batch_size: 72 | eval_batch_size: 73 | world_size: 74 | -------------------------------------------------------------------------------- /scripts/merge_response.py: -------------------------------------------------------------------------------- 1 | import json 2 | from glob import glob 3 | import os 4 | import argparse 5 | 6 | """ 7 | In this script, we simple merge the response, and use `construct_dpo_data_from_react_response_v1.1.py` to remove duplicate and calibrate the predictions. 8 | """ 9 | 10 | 11 | def merge_response(item_a, item_b): 12 | a_responses = item_a["response"] 13 | b_responses = item_b["response"] 14 | 15 | preds_a = item_a["pred"] 16 | preds_b = item_b["pred"] 17 | 18 | new_response = a_responses + b_responses 19 | new_pred = preds_a + preds_b 20 | 21 | assert item_a["id"] == item_b["id"] 22 | assert item_a["text"] == item_b["text"] 23 | assert item_a["label"] == item_b["label"] 24 | 25 | new_item = { 26 | "id": item_a["id"], 27 | "text": item_a["text"], 28 | "label": item_a["label"], 29 | "response": new_response, 30 | "pred": new_pred, 31 | } 32 | return new_item 33 | 34 | 35 | def main(): 36 | parser = argparse.ArgumentParser() 37 | parser.add_argument("--input_file", type=str) 38 | parser.add_argument("--output_file", type=str) 39 | args = parser.parse_args() 40 | 41 | if os.path.exists(args.input_file): 42 | files = [args.input_file] 43 | else: 44 | files = glob(args.input_file) 45 | print(files) 46 | 47 | data = [] 48 | for file in files: 49 | data.extend(json.load(open(file))) 50 | print(f"Total number of data: ", len(data)) 51 | 52 | id2data = {} 53 | for item in data: 54 | if item["id"] in id2data: 55 | id2data[item["id"]] = merge_response(id2data[item["id"]], item) 56 | else: 57 | id2data[item["id"]] = item 58 | print(f"Total number of data after merging: ", len(id2data)) 59 | 60 | avg_resp_num = 0 61 | for item in id2data.values(): 62 | avg_resp_num += len(item["response"]) 63 | avg_resp_num /= len(id2data) 64 | print(f"Average number of responses: {avg_resp_num}") 65 | 66 | data = list(id2data.values()) 67 | json.dump(data, open(args.output_file, "w")) 68 | 69 | 70 | if __name__ == "__main__": 71 | main() 72 | -------------------------------------------------------------------------------- /scripts/deepspeed/ds_full_checkpoint2hf.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import json 3 | import os 4 | from glob import glob 5 | from pathlib import Path 6 | 7 | import torch 8 | import transformers 9 | from accelerate import init_empty_weights 10 | from transformers import AutoModelForCausalLM 11 | 12 | 13 | def extract_weight(mp_states): 14 | state_dicts = torch.load(mp_states, map_location="cpu") 15 | state_dicts = state_dicts["module"] 16 | return state_dicts 17 | 18 | 19 | def write_model(input_base_path, mp_states_name, config_dir): 20 | config = transformers.AutoConfig.from_pretrained(config_dir) 21 | with init_empty_weights(): 22 | model = AutoModelForCausalLM.from_config(config) 23 | 24 | if os.path.exists(input_base_path): 25 | checkpoint_dirs = [input_base_path] 26 | else: 27 | checkpoint_dirs = glob(input_base_path, recursive=True) 28 | print(f"Found checkpoints: {checkpoint_dirs}") 29 | 30 | for checkpoint_dir in checkpoint_dirs: 31 | print(f"Writing checkpoint: {checkpoint_dir}") 32 | states_file = os.path.join(checkpoint_dir, mp_states_name) 33 | checkpoint_state_dict = extract_weight(states_file) 34 | step = checkpoint_dir.split("global_step")[-1] 35 | save_dir = os.path.join(os.path.dirname(checkpoint_dir), f"checkpoint-{step}") 36 | print(f"Saving checkpoint to {save_dir}") 37 | model.save_pretrained(save_dir, state_dict=checkpoint_state_dict, max_shard_size="3GB", safe_serialization=False) 38 | 39 | 40 | def main(): 41 | parser = argparse.ArgumentParser() 42 | parser.add_argument( 43 | "--input_dir", 44 | help="Location of LLaMA weights, which contains tokenizer.model and model folders", 45 | ) 46 | parser.add_argument("--mp_states_name", type=str, default="mp_rank_00_model_states.pt") 47 | parser.add_argument( 48 | "--config_dir", 49 | ) 50 | args = parser.parse_args() 51 | write_model( 52 | input_base_path=args.input_dir, 53 | mp_states_name=args.mp_states_name, 54 | config_dir=args.config_dir, 55 | ) 56 | 57 | 58 | if __name__ == "__main__": 59 | main() 60 | --------------------------------------------------------------------------------