├── models
    ├── phi.py
    ├── reward_model_mixin.py
    └── string_rule_reward.py
├── general_util
    ├── __init__.py
    ├── mixin.py
    └── logger.py
├── conf
    ├── engines
    │   └── remax_defaults.yaml
    ├── hydra
    │   └── default.yaml
    ├── api
    │   ├── vllm
    │   │   ├── vllm_params
    │   │   │   ├── sampling_param_greedy.yaml
    │   │   │   └── sampling_param_sample.yaml
    │   │   ├── llama2-7b
    │   │   │   ├── logiqav2_qa_react_0shot_tem_v1_0.yaml
    │   │   │   ├── logiqav2_qa_react_test_0shot_tem_v1_0.yaml
    │   │   │   ├── logiqav2_qa_react_test_0shot_tem_v1_0_s0.yaml
    │   │   │   ├── logiqav2_qa_react_turbosft_test_0shot_tem_v1_0_s0.yaml
    │   │   │   ├── logiqav2_qa_react_70bdistil_dpo_test_0shot_tem_v1_0.yaml
    │   │   │   ├── logiqav2_tems
    │   │   │   │   ├── react_dev_0shot_tem_v1_0_o1.yaml
    │   │   │   │   ├── react_dev_0shot_tem_v1_0_o2.yaml
    │   │   │   │   ├── react_test_0shot_tem_v1_0_o1.yaml
    │   │   │   │   ├── react_test_0shot_tem_v1_0_o2.yaml
    │   │   │   │   ├── react_dev_0shot_tem_v1_0.yaml
    │   │   │   │   ├── react_dev_0shot_tem_v2_0.yaml
    │   │   │   │   ├── react_test_0shot_tem_v1_0.yaml
    │   │   │   │   ├── react_test_0shot_tem_v2_0.yaml
    │   │   │   │   ├── react_dev_0shot_tem_v2_1.yaml
    │   │   │   │   ├── react_test_0shot_tem_v2_1.yaml
    │   │   │   │   ├── react_test_1shot_tem_v2_1.yaml
    │   │   │   │   ├── react_dev_0shot_tem_v2_1_sc.yaml
    │   │   │   │   ├── react_test_0shot_tem_v2_1_sc.yaml
    │   │   │   │   └── react_train_0shot_sample_tem_v2_0.yaml
    │   │   │   ├── logiqav2_qa_react_dpo_test_0shot_tem_v1_0_s0.yaml
    │   │   │   ├── logiqav2_qa_react_70bdistil_step_dpo_test_0shot_tem_v1_0.yaml
    │   │   │   ├── reclor_tems
    │   │   │   │   ├── test_react_0shot_v1_0_vllm.yaml
    │   │   │   │   ├── dev_react_0shot_v1_0_vllm.yaml
    │   │   │   │   ├── test_react_0shot_v1_1_vllm.yaml
    │   │   │   │   ├── dev_react_0shot_v1_1_vllm.yaml
    │   │   │   │   ├── dev_react_0shot_v1_1_vllm_sc.yaml
    │   │   │   │   ├── test_react_0shot_v1_1_vllm_sc.yaml
    │   │   │   │   └── dev_react_0shot_v1_0_service.yaml
    │   │   │   ├── ar_lsat_tems
    │   │   │   │   ├── dev_react_v1_0.yaml
    │   │   │   │   └── test_react_v1_0.yaml
    │   │   │   ├── folio_tems
    │   │   │   │   └── react_dev_0shot_tem_v1_0.yaml
    │   │   │   ├── logiqav2_qa_rest_train_react_v1_0_0shot_sample.yaml
    │   │   │   ├── logiqav2_qa_dev_react_dpo_v1_0.yaml
    │   │   │   ├── logiqav2_qa_dev_react_dpo_v2_0.yaml
    │   │   │   ├── logiqav2_qa_dev_react_dpo_v2_0_0shot.yaml
    │   │   │   ├── logiqav2_qa_dev_react_step_dpo_v2_0.yaml
    │   │   │   ├── logiqav2_qa_dev_react_step_dpo_v5_0.yaml
    │   │   │   ├── logiqav2_qa_dev_react_step_dpo_v2_0_0shot.yaml
    │   │   │   ├── logiqav2_qa_dev_react_step_dpo_v5_1_0shot.yaml
    │   │   │   ├── logiqav2_qa_dev_react_step_dpo_v5_0_0shot.yaml
    │   │   │   ├── logiqav2_qa_dev_react_step_dpo_v5_0_fix_1shot.yaml
    │   │   │   ├── logiqav2_qa_dev_react_step_dpo_v5_0_fix_0shot.yaml
    │   │   │   ├── logiqav2_qa_sft70bdistil_dev_react_v1_0_0shot.yaml
    │   │   │   ├── logiqav2_qa_sft70bdistil_dev_react_v1_0_1shot.yaml
    │   │   │   ├── logiqav2_qa_dev_react_dpo_v4_1.yaml
    │   │   │   ├── logiqav2_qa_dev_react_dpo_v4_1_0shot.yaml
    │   │   │   ├── logiqav2_qa_dev_react_step_dpo_v3_0.yaml
    │   │   │   ├── logiqav2_qa_dev_react_step_dpo_v4_0.yaml
    │   │   │   ├── logiqav2_qa_dev_react_step_dpo_v4_3.yaml
    │   │   │   └── logiqav2_qa_dev_react_step_dpo_v4_1_1.yaml
    │   │   ├── mistral
    │   │   │   ├── logiqav2
    │   │   │   │   └── tems
    │   │   │   │   │   ├── react_test_1shot_tem_v1_1.yaml
    │   │   │   │   │   ├── react_test_1shot_tem_v1_0.yaml
    │   │   │   │   │   └── react_test_1shot_tem_v2_0.yaml
    │   │   │   └── reclor
    │   │   │   │   ├── train_react_1shot_sample5_v1_0.yaml
    │   │   │   │   ├── dev_react_1shot_v1_0.yaml
    │   │   │   │   └── train_react_1shot_sample5_split_v1_0.yaml
    │   │   ├── llama2-70b
    │   │   │   ├── reclor
    │   │   │   │   ├── train_react_1shot_sample5_v1_0.yaml
    │   │   │   │   └── dev_react_1shot_v1_0.yaml
    │   │   │   └── ar_lsat
    │   │   │   │   ├── dev_react_v1_0.yaml
    │   │   │   │   ├── dev_react_v1_1.yaml
    │   │   │   │   └── dev_react_1shot_v2_0.yaml
    │   │   ├── math
    │   │   │   ├── gsm8k_gemma_test_0shot_tem_v1_0.yaml
    │   │   │   ├── math_gemma_test_0shot_tem_v1_0.yaml
    │   │   │   ├── gsm8k_gemma_test_0shot_tem_v1_1.yaml
    │   │   │   ├── math_gemma_test_0shot_tem_v1_1.yaml
    │   │   │   ├── math_deepseek_test_0shot_tem_v1_1.yaml
    │   │   │   └── gsm8k_deepseek_test_0shot_tem_v1_1.yaml
    │   │   ├── logiqav2_qa_dev_decompose_dpo_v2_0.yaml
    │   │   ├── logiqav2_qa_dev_decompose_dpo_v2_0_cp1800.yaml
    │   │   └── logiqav2_qa_dev_react_step_dpo_v1_0.yaml
    │   ├── gpt35turbo
    │   │   ├── reclor
    │   │   │   ├── train_react_v1_0_1shot_sample10.yaml
    │   │   │   └── dev_react_v1_0_1shot.yaml
    │   │   ├── ar_lsat
    │   │   │   └── dev_react_1shot_v1_0.yaml
    │   │   └── logiqav2
    │   │   │   └── dev_react_v1_0_1shot.yaml
    │   └── gpt4
    │   │   ├── reclor
    │   │       └── dev_react_v1_0_1shot.yaml
    │   │   └── logiqav2
    │   │       └── dev_react_v1_0_1shot.yaml
    ├── post_process
    │   └── openai_react.yaml
    ├── reader
    │   ├── reclor
    │   │   ├── react_service_0shot_v1_0.yaml
    │   │   └── react_service_1shot_v1_0.yaml
    │   └── logiqav2
    │   │   ├── react_service_0shot_v1_0.yaml
    │   │   └── react_service_1shot_v1_0.yaml
    └── deepspeed
    │   ├── train_hybrid_engine_zero1_lr.yaml
    │   ├── train_hybrid_engine_zero1_optim_offload_lr.yaml
    │   ├── train_hybrid_engine_zero2_lr.yaml
    │   ├── train_hybrid_engine_zero1.yaml
    │   ├── train_hybrid_engine_zero2.yaml
    │   ├── train_hybrid_engine_zero3.yaml
    │   ├── train_hybrid_engine_zero1_optim_offload.yaml
    │   ├── train_hybrid_engine_zero2_optim_offload.yaml
    │   └── train_hybrid_engine_zero1_optim_offload_cosine.yaml
├── scripts
    ├── cot
    │   ├── step_contrastive.py
    │   ├── rap_fix_pred.py
    │   ├── cot_step_accumulate.py
    │   └── deepseek_cot_sample_steps.py
    ├── inference
    │   ├── run_query_folio_vllm.sh
    │   ├── run_query_math_vllm_v1.0.sh
    │   ├── run_query_math_vllm_v1.1.sh
    │   ├── run_query_gsm8k_vllm_v1.0.sh
    │   ├── run_query_gsm8k_vllm_v1.1.sh
    │   ├── run_query_logiqav2_vllm_reclor.sh
    │   ├── run_query_logiqav2_vllm_reclor_v1.1.sh
    │   ├── run_query_reclor_vllm_sc_v1.1.sh
    │   ├── run_query_ar_lsat_vllm_v2.1.sh
    │   ├── run_query_logiqav2_vllm.sh
    │   ├── run_query_logiqav2_vllm_v2.1.sh
    │   ├── run_query_logiqav2_dev_order.sh
    │   ├── run_query_logiqav2_vllm_sc_v2.1.sh
    │   ├── calibra_results_reclor_sc.sh
    │   ├── calibra_results_reclor_v1.1.sh
    │   ├── calibra_results.sh
    │   ├── run_query_logiqav2_order.sh
    │   └── calibra_results_logiqav2_sc.sh
    ├── process_sft_inter.sh
    ├── process_llama.sh
    ├── process_inter.sh
    ├── fixed_explore_from_infer
    │   └── logiqav2
    │   │   ├── check_rewards.sh
    │   │   ├── reward_eval.sh
    │   │   ├── construct_dpo.sh
    │   │   └── split_pair.sh
    ├── explore_from_inter
    │   ├── reject_sampling_best_of.sh
    │   ├── run_v2.0.sh
    │   ├── combine_worsen_response.sh
    │   ├── reclor
    │   │   └── best_of_filter_full.sh
    │   └── run_llama_sft_v2.0.sh
    ├── calculate_acc_w_clean.py
    ├── split_pairs_according_to_ids.py
    ├── construct_dpo_data_from_response.py
    ├── split_train_dev.py
    ├── process_turbo.sh
    ├── sent_tf_react_step_encoding.py
    ├── split_response_train_dev_according2item_id.py
    ├── merge_response.py
    └── deepspeed
    │   └── ds_full_checkpoint2hf.py
├── run_step_dpo.sh
├── requirements.txt
├── data
    ├── prompts
    │   └── logiqav2
    │   │   ├── logic_form
    │   │       ├── prompt_0.md
    │   │       └── human
    │   │       │   ├── dev_7261_0_sim.md
    │   │       │   ├── dev_218_0_sim.md
    │   │       │   └── dev_218_0.md
    │   │   ├── compare_response
    │   │       ├── prompt_0.txt
    │   │       └── template_th_0.txt
    │   │   ├── react
    │   │       └── prompts.py
    │   │   ├── gpt4
    │   │       └── dev_218_0.md
    │   │   └── decomposition
    │   │       └── human
    │   │           └── dev_218_0.md
    ├── meta_math
    │   └── react_prompt_1.txt
    ├── ar_lsat.py
    ├── reclor.py
    └── folio.py
├── write_deployment_to_cache.py
├── lora_share_trainer
    └── utils
    │   ├── fp8.py
    │   └── ds_utils.py
├── test_deepspeed.py
├── lora_merge.py
└── post_processors
    └── dist_mixin.py


/models/phi.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/general_util/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/conf/engines/remax_defaults.yaml:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/scripts/cot/step_contrastive.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/conf/hydra/default.yaml:
--------------------------------------------------------------------------------
1 | job:
2 |   chdir: False
3 | run:
4 |   dir: logs/${hydra.job.config_name}/${now:%Y-%m-%d_%H-%M-%S}
5 | 


--------------------------------------------------------------------------------
/conf/api/vllm/vllm_params/sampling_param_greedy.yaml:
--------------------------------------------------------------------------------
1 | _target_: vllm.SamplingParams
2 | n: 1
3 | temperature: 0.0
4 | stop: [ "</s>", "\n\n\n\n" ]
5 | max_tokens: 2048
6 | 


--------------------------------------------------------------------------------
/conf/api/vllm/vllm_params/sampling_param_sample.yaml:
--------------------------------------------------------------------------------
1 | _target_: vllm.SamplingParams
2 | n: 5
3 | temperature: 1.0
4 | stop: [ "</s>", "\n\n\n\n" ]
5 | max_tokens: 2048
6 | 


--------------------------------------------------------------------------------
/conf/post_process/openai_react.yaml:
--------------------------------------------------------------------------------
1 | _target_: post_processors.openai_api_callback.OpenAICallBack
2 | output_file: ${output_file}
3 | answer_clean:
4 |   _target_: post_processors.openai_api_callback.ReActSeparatorClean


--------------------------------------------------------------------------------
/scripts/inference/run_query_folio_vllm.sh:
--------------------------------------------------------------------------------
 1 | exp_name=$1
 2 | 
 3 | for ((i=2;i<=$#;i++)); do
 4 |     step=${!i}
 5 |     echo $step
 6 |     python vllm_inference.py -cp conf/api/vllm/llama2-7b/folio_tems -cn react_dev_0shot_tem_v1_0 exp_name=$exp_name step=$step
 7 | done
 8 | 
 9 | 
10 | 
11 | 


--------------------------------------------------------------------------------
/scripts/inference/run_query_math_vllm_v1.0.sh:
--------------------------------------------------------------------------------
 1 | exp_name=$1
 2 | model_name=$2
 3 | 
 4 | for ((i=3;i<=$#;i++)); do
 5 |     step=${!i}
 6 |     echo $step
 7 |     python vllm_inference.py -cp conf/api/vllm/math -cn math_${model_name}_test_0shot_tem_v1_0 exp_name=$exp_name step=$step
 8 | done
 9 | 
10 | 
11 | 
12 | 


--------------------------------------------------------------------------------
/scripts/inference/run_query_math_vllm_v1.1.sh:
--------------------------------------------------------------------------------
 1 | exp_name=$1
 2 | model_name=$2
 3 | 
 4 | for ((i=3;i<=$#;i++)); do
 5 |     step=${!i}
 6 |     echo $step
 7 |     python vllm_inference.py -cp conf/api/vllm/math -cn math_${model_name}_test_0shot_tem_v1_1 exp_name=$exp_name step=$step
 8 | done
 9 | 
10 | 
11 | 
12 | 


--------------------------------------------------------------------------------
/scripts/inference/run_query_gsm8k_vllm_v1.0.sh:
--------------------------------------------------------------------------------
 1 | exp_name=$1
 2 | model_name=$2
 3 | 
 4 | for ((i=3;i<=$#;i++)); do
 5 |     step=${!i}
 6 |     echo $step
 7 |     python vllm_inference.py -cp conf/api/vllm/math -cn gsm8k_${model_name}_test_0shot_tem_v1_0 exp_name=$exp_name step=$step
 8 | done
 9 | 
10 | 
11 | 
12 | 


--------------------------------------------------------------------------------
/scripts/inference/run_query_gsm8k_vllm_v1.1.sh:
--------------------------------------------------------------------------------
 1 | exp_name=$1
 2 | model_name=$2
 3 | 
 4 | for ((i=3;i<=$#;i++)); do
 5 |     step=${!i}
 6 |     echo $step
 7 |     python vllm_inference.py -cp conf/api/vllm/math -cn gsm8k_${model_name}_test_0shot_tem_v1_1 exp_name=$exp_name step=$step
 8 | done
 9 | 
10 | 
11 | 
12 | 


--------------------------------------------------------------------------------
/models/reward_model_mixin.py:
--------------------------------------------------------------------------------
 1 | from dataclasses import dataclass
 2 | 
 3 | import torch
 4 | 
 5 | 
 6 | @dataclass
 7 | class RewardModelOutputs:
 8 |     chosen_end_scores: torch.Tensor = None
 9 | 
10 | 
11 | class RewardModelMixin:
12 |     def forward_value(self, *args, **kwargs) -> RewardModelOutputs:
13 |         raise NotImplementedError
14 | 


--------------------------------------------------------------------------------
/scripts/process_sft_inter.sh:
--------------------------------------------------------------------------------
1 | data_dir=experiments/llama2.7b.chat.logiqav2.gpt35turbo-dpo-sft.H100.w2.v2.0/checkpoint-1600
2 | 
3 | python scripts/sample_react_inter_states.py --input_file $data_dir/logiqav2-train.full.qa.react.v1.0.1shot.sample20.json \
4 |   --output_file $data_dir/logiqav2-train.full.qa.react.v1.0.1shot.sample20.clean_inter_ver0.0.json \
5 |   --split_num 10


--------------------------------------------------------------------------------
/run_step_dpo.sh:
--------------------------------------------------------------------------------
1 | srun -p NH100q -w node15 deepspeed --include localhost:0,1,2,3 --master_port 10005 trainer_base_ds_mul.py seed=43 -cp conf/exp/dpo/logiqav2 -cn llama2_7b_70bdistil_step_dpo_v1_1_th
2 | srun -p NH100q -w node15 deepspeed --include localhost:0,1,2,3 --master_port 10005 trainer_base_ds_mul.py seed=44 -cp conf/exp/dpo/logiqav2 -cn llama2_7b_70bdistil_step_dpo_v1_1_th


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | accelerate==0.24.1
 2 | bitsandbytes==0.41.1
 3 | deepspeed==0.12.2
 4 | einops==0.7.0
 5 | fairscale==0.4.12
 6 | flash-attn==2.3.3
 7 | hydra-core==1.2.0
 8 | nltk==3.8.1
 9 | openai==0.27.0
10 | pandas==1.5.3
11 | peft==0.6.0
12 | sentencepiece==0.1.97
13 | tokenizers==0.15.0
14 | torch==2.1.1
15 | transformers==4.36.1
16 | vllm==0.2.5
17 | wandb==0.13.10
18 | xformers==0.0.23
19 | 


--------------------------------------------------------------------------------
/scripts/inference/run_query_logiqav2_vllm_reclor.sh:
--------------------------------------------------------------------------------
1 | exp_name=$1
2 | 
3 | for ((i=2;i<=$#;i++)); do
4 |     step=${!i}
5 |     echo $step
6 |     python vllm_inference.py -cp conf/api/vllm/llama2-7b/reclor_tems -cn test_react_0shot_v1_0_vllm exp_name=$exp_name step=$step
7 |     python vllm_inference.py -cp conf/api/vllm/llama2-7b/reclor_tems -cn dev_react_0shot_v1_0_vllm exp_name=$exp_name step=$step
8 | done
9 | 


--------------------------------------------------------------------------------
/scripts/inference/run_query_logiqav2_vllm_reclor_v1.1.sh:
--------------------------------------------------------------------------------
1 | exp_name=$1
2 | 
3 | for ((i=2;i<=$#;i++)); do
4 |     step=${!i}
5 |     echo $step
6 |     python vllm_inference.py -cp conf/api/vllm/llama2-7b/reclor_tems -cn dev_react_0shot_v1_1_vllm exp_name=$exp_name step=$step
7 |     python vllm_inference.py -cp conf/api/vllm/llama2-7b/reclor_tems -cn test_react_0shot_v1_1_vllm exp_name=$exp_name step=$step
8 | done
9 | 


--------------------------------------------------------------------------------
/conf/reader/reclor/react_service_0shot_v1_0.yaml:
--------------------------------------------------------------------------------
 1 | _target_: data.logiqav2.ComposePromptGenerator
 2 | read_func:
 3 |   _target_: data.reclor.ReClorReader
 4 |   flat_options: True
 5 | template_id: 8
 6 | instruction:
 7 |   _target_: data.prompts.logiqav2.react.prompts.get_prompt
 8 |   prompt_name: react_v2
 9 | few_shot_prompt:
10 | compose_keys: [ "context", "question", "option_list" ]
11 | max_data_num: -1
12 | api_based: False
13 | service_based: False
14 | 


--------------------------------------------------------------------------------
/scripts/inference/run_query_reclor_vllm_sc_v1.1.sh:
--------------------------------------------------------------------------------
 1 | exp_name=$1
 2 | n=$2
 3 | 
 4 | for ((i=3;i<=$#;i++)); do
 5 |     step=${!i}
 6 |     echo $step
 7 |     python vllm_inference.py -cp conf/api/vllm/llama2-7b/reclor_tems -cn dev_react_0shot_v1_1_vllm_sc exp_name=$exp_name sampling_params.n=$n step=$step
 8 |     python vllm_inference.py -cp conf/api/vllm/llama2-7b/reclor_tems -cn test_react_0shot_v1_1_vllm_sc exp_name=$exp_name sampling_params.n=$n step=$step
 9 | done
10 | 


--------------------------------------------------------------------------------
/data/prompts/logiqav2/logic_form/prompt_0.md:
--------------------------------------------------------------------------------
1 | Convert the input texts following the keywords [Context], [Question] and each [Option] into logic forms.  For each logic form, the format is [predicate](entity 1,  ..., entity n). There is a predicate which indicates the relations among at most n entities and those entities are the arguments of the predicate.   
2 | Use logical operations to derive the correct option.  Common logical operators include AND, OR, NOT, and ==> (logically implies).


--------------------------------------------------------------------------------
/scripts/process_llama.sh:
--------------------------------------------------------------------------------
1 | 
2 | 
3 | #python scripts/process_react_nodes.py --input_file data/trajectory/react/logiqav2-train-v1.0.react.1shot.turbo.sample5.json \
4 | #  --output_file data/trajectory/react/logiqav2-train-v1.0.react.1shot.turbo.sample5.clean_nodes.json
5 | 
6 | python scripts/process_react_nodes.py --input_file data/trajectory/react/logiqav2-train-v1.1.react.1shot.llama2.70b.chat.sample10.json \
7 |   --output_file data/trajectory/react/logiqav2-train-v1.1.react.1shot.llama2.70b.chat.sample10.clean_nodes.json


--------------------------------------------------------------------------------
/scripts/process_inter.sh:
--------------------------------------------------------------------------------
1 | python scripts/process_inter_response.py \
2 |   --input_file "experiments/llama2.7b.chat.logiqav2.gpt35turbo-dpo-sft.H100.w2.v2.0/checkpoint-1600/logiqav2-train.full.qa.react.v1.0.0shot.inter_completion.split-*.json" \
3 |   --output_file logiqav2-train.full.qa.react.v1.0.0shot.inter_completion.pair_diff3.json \
4 |   --diff 3 \
5 |   --inter_state_file "experiments/llama2.7b.chat.logiqav2.gpt35turbo-dpo-sft.H100.w2.v2.0/checkpoint-1600/logiqav2-train.full.qa.react.v1.0.1shot.sample20.clean_inter_ver0.0.*-of-10.json"


--------------------------------------------------------------------------------
/scripts/inference/run_query_ar_lsat_vllm_v2.1.sh:
--------------------------------------------------------------------------------
 1 | exp_name=$1
 2 | #step=$2
 3 | 
 4 | for ((i=2;i<=$#;i++)); do
 5 |     step=${!i}
 6 |     echo $step
 7 |     python vllm_inference.py -cp conf/api/vllm/llama2-7b/ar_lsat_tems -cn dev_react_v1_0 exp_name=$exp_name step=$step
 8 |     python vllm_inference.py -cp conf/api/vllm/llama2-7b/ar_lsat_tems -cn test_react_v1_0 exp_name=$exp_name step=$step
 9 | done
10 | #python vllm_inference.py -cp conf/api/vllm/llama2-7b/logiqav2_tems -cn react_test_0shot_tem_v2_0 exp_name=$exp_name step=$step
11 | 
12 | 
13 | 
14 | 


--------------------------------------------------------------------------------
/data/prompts/logiqav2/compare_response/prompt_0.txt:
--------------------------------------------------------------------------------
1 | Here is a logical reasoning problem, and there are two solutions describing their thinking process. Please tell me which one is better. You should consider the following criteria:
2 | 1. Reasonable: The reasoning process is reasonable, which means each conclusion should be inferred by collecting evidence, instead of making up unknown facts.
3 | 2. Concise: The reasoning process should not tell something irrelevant to the question.
4 | 3. Logically consistent: There must not be contradiction in the process itself.


--------------------------------------------------------------------------------
/conf/reader/reclor/react_service_1shot_v1_0.yaml:
--------------------------------------------------------------------------------
 1 | _target_: data.logiqav2.ComposePromptGenerator
 2 | read_func:
 3 |   _target_: data.reclor.ReClorReader
 4 |   flat_options: True
 5 | template_id: 8
 6 | instruction:
 7 |   _target_: data.prompts.logiqav2.react.prompts.get_prompt
 8 |   prompt_name: react_v2
 9 | few_shot_prompt:
10 |   _target_: data.logiqav2.read_single_file
11 |   file_path: data/prompts/logiqav2/react/train_4554.txt
12 | compose_keys: [ "context", "question", "option_list" ]
13 | max_data_num: -1
14 | api_based: False
15 | service_based: False
16 | 


--------------------------------------------------------------------------------
/scripts/inference/run_query_logiqav2_vllm.sh:
--------------------------------------------------------------------------------
 1 | exp_name=$1
 2 | #step=$2
 3 | 
 4 | for ((i=2;i<=$#;i++)); do
 5 |     step=${!i}
 6 |     echo $step
 7 |     python vllm_inference.py -cp conf/api/vllm/llama2-7b/logiqav2_tems -cn react_test_0shot_tem_v2_0 exp_name=$exp_name step=$step
 8 |     python vllm_inference.py -cp conf/api/vllm/llama2-7b/logiqav2_tems -cn react_dev_0shot_tem_v2_0 exp_name=$exp_name step=$step
 9 | done
10 | #python vllm_inference.py -cp conf/api/vllm/llama2-7b/logiqav2_tems -cn react_test_0shot_tem_v2_0 exp_name=$exp_name step=$step
11 | 
12 | 
13 | 
14 | 


--------------------------------------------------------------------------------
/scripts/inference/run_query_logiqav2_vllm_v2.1.sh:
--------------------------------------------------------------------------------
 1 | exp_name=$1
 2 | #step=$2
 3 | 
 4 | for ((i=2;i<=$#;i++)); do
 5 |     step=${!i}
 6 |     echo $step
 7 |     python vllm_inference.py -cp conf/api/vllm/llama2-7b/logiqav2_tems -cn react_dev_0shot_tem_v2_1 exp_name=$exp_name step=$step
 8 |     python vllm_inference.py -cp conf/api/vllm/llama2-7b/logiqav2_tems -cn react_test_0shot_tem_v2_1 exp_name=$exp_name step=$step
 9 | done
10 | #python vllm_inference.py -cp conf/api/vllm/llama2-7b/logiqav2_tems -cn react_test_0shot_tem_v2_0 exp_name=$exp_name step=$step
11 | 
12 | 
13 | 
14 | 


--------------------------------------------------------------------------------
/scripts/inference/run_query_logiqav2_dev_order.sh:
--------------------------------------------------------------------------------
 1 | exp_dir=$1
 2 | name=$2
 3 | port=$3
 4 | step=$4
 5 | 
 6 | python service_api_caller_v1.py -cp conf/api/vllm/llama2-7b/logiqav2_tems -cn react_dev_0shot_tem_v1_0 exp_dir=$exp_dir model=$name port=$port step=$step
 7 | 
 8 | python service_api_caller_v1.py -cp conf/api/vllm/llama2-7b/logiqav2_tems -cn react_dev_0shot_tem_v1_0_o1 exp_dir=$exp_dir model=$name port=$port step=$step
 9 | 
10 | python service_api_caller_v1.py -cp conf/api/vllm/llama2-7b/logiqav2_tems -cn react_dev_0shot_tem_v1_0_o2 exp_dir=$exp_dir model=$name port=$port step=$step
11 | 
12 | 


--------------------------------------------------------------------------------
/scripts/fixed_explore_from_infer/logiqav2/check_rewards.sh:
--------------------------------------------------------------------------------
 1 | data_dir=experiments/llama2.7b.chat.logiqav2.llama-2-70b-chat.dpo-sft.A6K.w4.v1.0/checkpoint-1600/react-inter-states
 2 | 
 3 | rm_step=800
 4 | reward_file="experiments/llama2.7b.chat.logiqav2.70b-distil.prm.fix_hack.A100.w4.v1.2.s42/train.rewards.raw_trajectory.product.v1.0/test-checkpoint-${rm_step}/eval_predictions_rank0.json"
 5 | 
 6 | python scripts/check_rewards_v1.0.py \
 7 |   --input_file "$data_dir/logiqav2-train.full.qa.react.v1.0.0shot.sample10.clean_inter_ver2.0.rs0.2.r0.3.*-of-20.json" \
 8 |   --reward_file $reward_file \
 9 |   --step_cutoff 50
10 | 


--------------------------------------------------------------------------------
/scripts/inference/run_query_logiqav2_vllm_sc_v2.1.sh:
--------------------------------------------------------------------------------
 1 | exp_name=$1
 2 | n=$2
 3 | 
 4 | for ((i=3;i<=$#;i++)); do
 5 |     step=${!i}
 6 |     echo $step
 7 |     python vllm_inference.py -cp conf/api/vllm/llama2-7b/logiqav2_tems -cn react_dev_0shot_tem_v2_1_sc exp_name=$exp_name sampling_params.n=$n step=$step
 8 |     python vllm_inference.py -cp conf/api/vllm/llama2-7b/logiqav2_tems -cn react_test_0shot_tem_v2_1_sc exp_name=$exp_name sampling_params.n=$n step=$step
 9 | done
10 | #python vllm_inference.py -cp conf/api/vllm/llama2-7b/logiqav2_tems -cn react_test_0shot_tem_v2_0 exp_name=$exp_name step=$step
11 | 
12 | 
13 | 
14 | 


--------------------------------------------------------------------------------
/data/prompts/logiqav2/compare_response/template_th_0.txt:
--------------------------------------------------------------------------------
 1 | Here is the problem containing a context, a question, and different options:
 2 | 
 3 | Context:
 4 | {}
 5 | 
 6 | Question:
 7 | {}
 8 | 
 9 | Options:
10 | {}
11 | 
12 | Reasoning process A:
13 | Thought 1: {}
14 | 
15 | Reasoning process B:
16 | Thought 1: {}
17 | 
18 | For each aspect of the above criteria, select one winner, or judge it as a tie, following the format:
19 | Reasonable: A/B/Tie
20 | Concise: A/B/Tie
21 | Logically consistent: A/B/Tie
22 | 
23 | And finally, by considering all the criteria together, select one winner, or judge it as a tie, with the following format:
24 | Overall: A/B/Tie


--------------------------------------------------------------------------------
/write_deployment_to_cache.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import argparse
 3 | import os.path
 4 | 
 5 | parser = argparse.ArgumentParser()
 6 | parser.add_argument("--model", type=str)
 7 | parser.add_argument("--name", type=str)
 8 | parser.add_argument("--port", type=str)
 9 | args = parser.parse_args()
10 | 
11 | if os.path.exists("service.json"):
12 |     with open("service.json", "r") as f:
13 |         service = json.load(f)
14 | else:
15 |     service = {}
16 | 
17 | if args.port in service:
18 |     service[args.port] = {
19 |         "model": args.model,
20 |         "name": args.name,
21 |     }
22 | 
23 | with open("service.json", "w") as f:
24 |     json.dump(service, f, indent=4)
25 | 


--------------------------------------------------------------------------------
/conf/reader/logiqav2/react_service_0shot_v1_0.yaml:
--------------------------------------------------------------------------------
 1 | _target_: data.logiqav2.ComposePromptGenerator
 2 | read_func:
 3 |   _target_: data.logiqav2.LogicQAReader
 4 |   flat_options: True
 5 |   option_order: "ABCD"
 6 | template_id: 8
 7 | instruction:
 8 |   _target_: data.prompts.logiqav2.react.prompts.get_prompt
 9 |   prompt_name: react_v2
10 | few_shot_prompt:
11 | compose_keys: [ "context", "question", "option_list" ]
12 | max_data_num: -1
13 | api_based: False
14 | service_based: True
15 | service_processor:
16 |   _target_: data.vllm.VLLMRequestGenerator
17 |   api_url: http://0.0.0.0:${port}/v1/completions
18 |   max_tokens: 2048
19 |   model: ${model}
20 |   stop: [ "</s>", "\n\n\n\n" ]
21 | 


--------------------------------------------------------------------------------
/conf/reader/logiqav2/react_service_1shot_v1_0.yaml:
--------------------------------------------------------------------------------
 1 | _target_: data.logiqav2.ComposePromptGenerator
 2 | read_func:
 3 |   _target_: data.logiqav2.LogicQAReader
 4 |   flat_options: True
 5 | template_id: 8
 6 | instruction:
 7 |   _target_: data.prompts.logiqav2.react.prompts.get_prompt
 8 |   prompt_name: react_v2
 9 | few_shot_prompt:
10 |   _target_: data.logiqav2.read_single_file
11 |   file_path: data/prompts/logiqav2/react/train_4554.txt
12 | compose_keys: [ "context", "question", "option_list" ]
13 | max_data_num: -1
14 | api_based: False
15 | service_based: True
16 | service_processor:
17 |   _target_: data.vllm.VLLMRequestGenerator
18 |   api_url: http://0.0.0.0:${port}/v1/completions
19 |   max_tokens: 2048
20 |   model: ${model}
21 |   stop: [ "</s>", "\n\n\n\n" ]
22 | 


--------------------------------------------------------------------------------
/scripts/inference/calibra_results_reclor_sc.sh:
--------------------------------------------------------------------------------
 1 | exp_name=$1
 2 | 
 3 | for ((i=2;i<=$#;i++)); do
 4 |     step=${!i}
 5 |     echo "*********************  $step *********************"
 6 |     echo "============= Dev ============="
 7 |     cat /export/home2/fangkai/rl-hybrid-engine/experiments/${exp_name}/checkpoint-${step}/reclor.react.dev.n5.tem1.0.0shot.v1.1.metrics.json
 8 | #    echo "============= Test ============="
 9 | #    python scripts/calculate_react_acc_w_clean.py --input_file /export/home2/fangkai/rl-hybrid-engine/experiments/${exp_name}/checkpoint-${step}/logiqav2.test.react.n5.tem1.0.v1.0.0shot.json
10 | done
11 | #python vllm_inference.py -cp conf/api/vllm/llama2-7b/logiqav2_tems -cn react_test_0shot_tem_v2_0 exp_name=$exp_name step=$step
12 | 
13 | 
14 | 
15 | 


--------------------------------------------------------------------------------
/scripts/explore_from_inter/reject_sampling_best_of.sh:
--------------------------------------------------------------------------------
 1 | data_dir=experiments/llama2.7b.chat.logiqav2.llama-2-70b-chat.dpo-sft.A6K.w4.v1.0/checkpoint-1600/react-inter-states
 2 | 
 3 | best_of=1
 4 | inter_best_of=1
 5 | 
 6 | python scripts/reject_sample_best_of_filter_by_reward_v1.0.py \
 7 |   --input_file "$data_dir/logiqav2-train.full.qa.react.v1.0.0shot.sample10.clean_inter_ver2.0.rs0.2.r0.3.*-of-20.json" \
 8 |   --reward_file "experiments/llama2.7b.chat.logiqav2.70b-distil.rm.H100.w4.v1.0/train_decay0.95.diff2.6.rewards.raw_response.v1.0/test-checkpoint-400/eval_predictions_rank0.json" \
 9 |   --output_file "$data_dir/logiqav2-train.react.v1.0.0shot.sample10.clean_inter_ver2.0.rs0.2.r0.3.best_of_${best_of}_${inter_best_of}.json" \
10 |   --best_of $best_of --inter_best_of $inter_best_of
11 | 


--------------------------------------------------------------------------------
/scripts/inference/calibra_results_reclor_v1.1.sh:
--------------------------------------------------------------------------------
 1 | exp_name=$1
 2 | 
 3 | for ((i=2;i<=$#;i++)); do
 4 |     step=${!i}
 5 |     echo "*********************  $step *********************"
 6 |     echo "============= Dev ============="
 7 |     python scripts/calculate_react_acc_w_clean.py --input_file /export/home2/fangkai/rl-hybrid-engine/experiments/${exp_name}/checkpoint-${step}/reclor.react.dev.0shot.v1.1.json
 8 |     echo "============= Test ============="
 9 |     python scripts/calculate_react_acc_w_clean.py --input_file /export/home2/fangkai/rl-hybrid-engine/experiments/${exp_name}/checkpoint-${step}/reclor.react.test.0shot.v1.1.json
10 | done
11 | #python vllm_inference.py -cp conf/api/vllm/llama2-7b/logiqav2_tems -cn react_test_0shot_tem_v2_0 exp_name=$exp_name step=$step
12 | 
13 | 
14 | 
15 | 


--------------------------------------------------------------------------------
/scripts/inference/calibra_results.sh:
--------------------------------------------------------------------------------
 1 | exp_name=$1
 2 | 
 3 | for ((i=2;i<=$#;i++)); do
 4 |     step=${!i}
 5 |     echo "*********************  $step *********************"
 6 |     echo "============= Dev ============="
 7 |     python scripts/calculate_react_acc_w_clean.py --input_file /export/home2/fangkai/rl-hybrid-engine/experiments/${exp_name}/checkpoint-${step}/logiqav2-dev.full.qa.react.v1.0.0shot.json
 8 |     echo "============= Test ============="
 9 |     python scripts/calculate_react_acc_w_clean.py --input_file /export/home2/fangkai/rl-hybrid-engine/experiments/${exp_name}/checkpoint-${step}/logiqav2-test.full.qa.react.v1.0.0shot.json
10 | done
11 | #python vllm_inference.py -cp conf/api/vllm/llama2-7b/logiqav2_tems -cn react_test_0shot_tem_v2_0 exp_name=$exp_name step=$step
12 | 
13 | 
14 | 
15 | 


--------------------------------------------------------------------------------
/scripts/cot/rap_fix_pred.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | import json
 3 | import os
 4 | import argparse
 5 | 
 6 | sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
 7 | 
 8 | from data.math import math_answer_cleaner
 9 | 
10 | parser = argparse.ArgumentParser()
11 | parser.add_argument("--response_file", type=str, required=True)
12 | parser.add_argument("--output_file", type=str, required=True)
13 | args = parser.parse_args()
14 | 
15 | data = json.load(open(args.response_file))
16 | cleaner = math_answer_cleaner(separator="The answer is")
17 | 
18 | for item in data:
19 |     new_preds = []
20 |     for resp in item["response"]:
21 |         new_preds.append(cleaner(resp))
22 | 
23 |     item["pred"] = new_preds
24 | 
25 | json.dump(data, open(args.output_file, "w"), indent=2)
26 | 


--------------------------------------------------------------------------------
/lora_share_trainer/utils/fp8.py:
--------------------------------------------------------------------------------
 1 | from typing import Callable
 2 | from general_util import training_utils
 3 | from general_util.dist_utils import print_rank_0
 4 | from general_util.logger import get_child_logger
 5 | from general_util.training_utils import get_zero_stage
 6 | from general_util.transformer_engine import convert_model
 7 | 
 8 | logger = get_child_logger(__name__)
 9 | 
10 | try:
11 |     import transformer_engine.pytorch as transformer_engine
12 |     from transformer_engine.common import recipe
13 | except ImportError:
14 |     logger.info("Transformer Engine package is missing, skipping tests")
15 | 
16 | 
17 | def fp8_func_wrap(func: Callable, fp8_flag: bool, fp8_recipe, *args, **kwargs):
18 |     if fp8_flag:
19 |         with transformer_engine.fp8_autocast(enabled=True, fp8_recipe=fp8_recipe):
20 |             return func(*args, **kwargs)
21 |     else:
22 |         return func(*args, **kwargs)
23 | 


--------------------------------------------------------------------------------
/data/prompts/logiqav2/logic_form/human/dev_7261_0_sim.md:
--------------------------------------------------------------------------------
 1 | [Context]
 2 | All Anxi people are vegetarians, while all Zhenyuan people are ascetics. Ascetics and vegetarians are like fire and water, and there is no conflict. Guo Shu is an ascetic.
 3 | 
 4 | [Question]
 5 | Which of the followings can be inferred
 6 | 
 7 | [Options]
 8 | A. Guo Shu is from Zhenyuan.
 9 | B. Guo Shu is not from Zhenyuan. 
10 | C. Guo Shu is from Anxi. 
11 | D. Guo Shu is not from Anxi.
12 | 
13 | Here are the transformed ones in logic form:
14 | 
15 | [Context]
16 | 1. isVegetarian(AnxiPeople)
17 | 2. isAscetic(ZhenyuanPeople)
18 | 3. likeFireAndWater(Ascetics, Vegetarians) AND noConflict(Ascetics, Vegetarians)
19 | 4. isAscetic(GuoShu)
20 | 
21 | [Question]
22 | Which of the followings can be inferred?
23 | 
24 | [Options]
25 | A. fromPlace(GuoShu, Zhenyuan)
26 | B. NOT fromPlace(GuoShu, Zhenyuan)
27 | C. fromPlace(GuoShu, Anxi)
28 | D. NOT fromPlace(GuoShu, Anxi)


--------------------------------------------------------------------------------
/conf/api/vllm/llama2-7b/logiqav2_qa_react_0shot_tem_v1_0.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - hydra: default
 3 |   - reader/logiqav2@read_tensor: react_service_0shot_v1_0
 4 |   - post_process: openai_react
 5 |   - _self_
 6 | 
 7 | hydra:
 8 |   searchpath:
 9 |     - file://conf/
10 | 
11 | train_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/train.txt
12 | dev_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/dev.txt
13 | test_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/dev.txt
14 | 
15 | step: 800
16 | port: 6000
17 | exp_dir:
18 | model:
19 | 
20 | output_file: ${exp_dir}/checkpoint-${step}/logiqav2-dev.full.qa.react.v1.0.0shot.json
21 | flush_file: ${output_file}l
22 | 
23 | # Dataloader
24 | num_workers: 48
25 | prefetch_factor: 2
26 | 
27 | ddp_eval: False
28 | no_cuda: False
29 | seed: 42
30 | local_rank: -1
31 | 
32 | # Temporary variables
33 | n_gpu: 1
34 | device:
35 | train_batch_size:
36 | eval_batch_size:
37 | world_size:
38 | 


--------------------------------------------------------------------------------
/conf/api/vllm/llama2-7b/logiqav2_qa_react_test_0shot_tem_v1_0.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - hydra: default
 3 |   - reader/logiqav2@read_tensor: react_service_0shot_v1_0
 4 |   - post_process: openai_react
 5 |   - _self_
 6 | 
 7 | hydra:
 8 |   searchpath:
 9 |     - file://conf/
10 | 
11 | train_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/train.txt
12 | dev_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/dev.txt
13 | test_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/test.txt
14 | 
15 | step: 800
16 | port: 6000
17 | exp_dir:
18 | model:
19 | 
20 | output_file: ${exp_dir}/checkpoint-${step}/logiqav2-test.full.qa.react.v1.0.0shot.json
21 | flush_file: ${output_file}l
22 | 
23 | # Dataloader
24 | num_workers: 48
25 | prefetch_factor: 2
26 | 
27 | ddp_eval: False
28 | no_cuda: False
29 | seed: 42
30 | local_rank: -1
31 | 
32 | # Temporary variables
33 | n_gpu: 1
34 | device:
35 | train_batch_size:
36 | eval_batch_size:
37 | world_size:
38 | 


--------------------------------------------------------------------------------
/scripts/calculate_acc_w_clean.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import json
 3 | import re
 4 | from collections import Counter
 5 | 
 6 | parser = argparse.ArgumentParser()
 7 | parser.add_argument("--input_file", type=str, required=True)
 8 | parser.add_argument("--debug", default=False, action="store_true")
 9 | args = parser.parse_args()
10 | 
11 | data = json.load(open(args.input_file, "r"))
12 | 
13 | cnt = 0
14 | tmp = 0
15 | for item in data:
16 |     response = item["response"]
17 |     if "[Context]" in response:
18 |         tmp += 1
19 |         if args.debug and tmp < 10:
20 |             print(response)
21 |             print("=========================")
22 |         response = response.split("[Context]")[0]
23 | 
24 |     preds = re.findall(r"A|B|C|D", response)
25 |     if len(preds) == 0:
26 |         pred = ""
27 |     else:
28 |         pred = preds[-1]
29 | 
30 |     if pred and ord(pred) - ord("A") == item["label"]:
31 |         cnt += 1
32 | 
33 | print(cnt / len(data))
34 | 


--------------------------------------------------------------------------------
/data/prompts/logiqav2/react/prompts.py:
--------------------------------------------------------------------------------
 1 | REACT_PROMPT_V1 = ("Solve a question answering task by having a Thought, then Finish with your answer. Thought can reason about the current situation. "
 2 |                    "Finish[answer] returns the answer and finishes the task. You will be given context that you should use to help you answer the question.\n"
 3 |                    "Here are some examples:")
 4 | 
 5 | REACT_PROMPT_V2 = ("Solve a question answering task by having a Thought, then Finish with your answer. Thought can reason about the current situation. "
 6 |                    "Finish[answer] returns the answer and finishes the task. You will be given context that you should use to help you answer the question.")
 7 | 
 8 | prompts = {
 9 |     "react_v1": REACT_PROMPT_V1,
10 |     "react_v2": REACT_PROMPT_V2,
11 |     "react_v2_mistral": "[INST] " + REACT_PROMPT_V2 + " [/INST]",
12 | }
13 | 
14 | 
15 | def get_prompt(prompt_name: str) -> str:
16 |     return prompts[prompt_name]
17 | 


--------------------------------------------------------------------------------
/test_deepspeed.py:
--------------------------------------------------------------------------------
 1 | from models.llama import LlamaForCausalLMDPO
 2 | from transformers.models.llama import LlamaConfig
 3 | from omegaconf import DictConfig, OmegaConf
 4 | import omegaconf
 5 | import datetime
 6 | 
 7 | config = LlamaConfig(vocab_size=10, num_hidden_layers=1)
 8 | 
 9 | model = LlamaForCausalLMDPO(config)
10 | 
11 | print(model.__class__.__name__)
12 | 
13 | import deepspeed
14 | 
15 | ds_config = OmegaConf.load("conf/deepspeed/train_hybrid_engine_zero1.yaml")
16 | ds_config.train_micro_batch_size_per_gpu = 1
17 | ds_config.gradient_accumulation_steps = 1
18 | ds_config.scheduler.params.total_num_steps = 1000
19 | ds_config.scheduler.params.warmup_num_steps = 10
20 | ds_config = OmegaConf.to_container(ds_config, resolve=True)
21 | 
22 | deepspeed.init_distributed(dist_backend="nccl", timeout=datetime.timedelta(seconds=9600))
23 | engine = deepspeed.initialize(model=model,
24 |                               config=ds_config)
25 | 
26 | print(engine.__class__.__name__)
27 | print(engine.module.__clas__.__name__)
28 | 


--------------------------------------------------------------------------------
/scripts/explore_from_inter/run_v2.0.sh:
--------------------------------------------------------------------------------
 1 | data_dir=experiments/llama2.7b.chat.logiqav2.gpt35turbo-dpo-sft.H100.w2.v2.0/checkpoint-1600
 2 | #diff=3.0
 3 | #diff=2.6
 4 | #diff=2.1
 5 | diff=3.0
 6 | #decay=0.9
 7 | #decay=0.8
 8 | #decay=0.95
 9 | #decay=0.9
10 | decay=1.0
11 | 
12 | #python scripts/sample_react_inter_states_v2.0.py --input_file $data_dir/logiqav2-train.full.qa.react.v1.0.1shot.sample20.json \
13 | #  --output_file $data_dir/logiqav2-train.full.qa.react.v1.0.1shot.sample20.clean_inter_ver2.0.rs0.2.r0.6.json \
14 | #  --split_num 20 --ratio_s 0.2 --ratio 0.6
15 | 
16 | python scripts/process_inter_response_v2.0.py --input_file "$data_dir/logiqav2-train.full.qa.react.v1.0.0shot.inter.ver2.0.rs0.2.r0.6.split-*.sample3.json" \
17 |   --output_file "$data_dir/value-ver2.0/logiqav2-train.full.qa.react.v1.0.0shot.inter.ver2.0.rs0.2.r0.6.sample3.diff$diff.decay$decay.value.json" \
18 |   --diff $diff --decay $decay --inter_state_file "$data_dir/logiqav2-train.full.qa.react.v1.0.1shot.sample20.clean_inter_ver2.0.rs0.2.r0.6.*-of-20.json"
19 | 


--------------------------------------------------------------------------------
/conf/api/vllm/mistral/logiqav2/tems/react_test_1shot_tem_v1_1.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - hydra: default
 3 |   - reader/logiqav2@read_tensor: react_service_1shot_v1_0
 4 |   - post_process: openai_react
 5 |   - _self_
 6 | 
 7 | hydra:
 8 |   searchpath:
 9 |     - file://conf/
10 | 
11 | train_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/train.txt
12 | dev_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/dev.txt
13 | test_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/test.txt
14 | 
15 | step:
16 | port: 8000
17 | exp_dir:
18 | model:
19 | 
20 | instruction:
21 |   _target_: data.prompts.logiqav2.react.prompts.get_prompt
22 |   prompt_name: react_v2_mistral
23 | 
24 | output_file: ${exp_dir}/logiqav2-test.qa.react.v1.1.1shot.json
25 | flush_file: ${output_file}l
26 | 
27 | # Dataloader
28 | num_workers: 48
29 | prefetch_factor: 2
30 | 
31 | ddp_eval: False
32 | no_cuda: False
33 | seed: 42
34 | local_rank: -1
35 | 
36 | # Temporary variables
37 | n_gpu: 1
38 | device:
39 | train_batch_size:
40 | eval_batch_size:
41 | world_size:
42 | 


--------------------------------------------------------------------------------
/lora_merge.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import torch
 3 | 
 4 | from peft import PeftModel
 5 | from transformers import AutoModelForCausalLM
 6 | 
 7 | parser = argparse.ArgumentParser()
 8 | parser.add_argument("--base_model_path", type=str, default="gpt2")
 9 | parser.add_argument("--lora_path", type=str, default="gpt2")
10 | parser.add_argument("--output_dir", type=str, default="output")
11 | args = parser.parse_args()
12 | 
13 | print(f"Loading base model from {args.base_model_path}...")
14 | # model = AutoModelForCausalLM.from_pretrained(args.base_model_path, device_map={"": "cpu"}, low_cpu_mem_usage=True, torch_dtype=torch.float16)
15 | model = AutoModelForCausalLM.from_pretrained(args.base_model_path)
16 | print(f"Loading lora model from {args.lora_path}...")
17 | # model = PeftModel.from_pretrained(model, args.lora_path, device_map={"": "cpu"}, torch_dtype=torch.float16)
18 | model = PeftModel.from_pretrained(model, args.lora_path)
19 | print("Merging...")
20 | model = model.merge_and_unload()
21 | print(f"Saving to {args.output_dir}...")
22 | model.save_pretrained(args.output_dir)
23 | 


--------------------------------------------------------------------------------
/scripts/inference/run_query_logiqav2_order.sh:
--------------------------------------------------------------------------------
 1 | exp_dir=$1
 2 | name=$2
 3 | port=$3
 4 | step=$4
 5 | 
 6 | echo "python service_api_caller_v1.py -cp conf/api/vllm/llama2-7b/logiqav2_temps -cn react_test_0shot_tem_v1_0 exp_dir=$exp_dir model=$name port=$port step=$step"
 7 | python service_api_caller_v1.py -cp conf/api/vllm/llama2-7b/logiqav2_tems -cn react_test_0shot_tem_v1_0 exp_dir=$exp_dir model=$name port=$port step=$step
 8 | 
 9 | echo "python service_api_caller_v1.py -cp conf/api/vllm/llama2-7b/logiqav2_temps -cn react_test_0shot_tem_v1_0_o1 exp_dir=$exp_dir model=$name port=$port step=$step"
10 | python service_api_caller_v1.py -cp conf/api/vllm/llama2-7b/logiqav2_tems -cn react_test_0shot_tem_v1_0_o1 exp_dir=$exp_dir model=$name port=$port step=$step
11 | 
12 | echo "python service_api_caller_v1.py -cp conf/api/vllm/llama2-7b/logiqav2_temps -cn react_test_0shot_tem_v1_0_o2 exp_dir=$exp_dir model=$name port=$port step=$step"
13 | python service_api_caller_v1.py -cp conf/api/vllm/llama2-7b/logiqav2_tems -cn react_test_0shot_tem_v1_0_o2 exp_dir=$exp_dir model=$name port=$port step=$step
14 | 


--------------------------------------------------------------------------------
/scripts/inference/calibra_results_logiqav2_sc.sh:
--------------------------------------------------------------------------------
 1 | exp_name=$1
 2 | 
 3 | for ((i=2;i<=$#;i++)); do
 4 |     step=${!i}
 5 |     echo "*********************  $step *********************"
 6 |     echo "============= Dev ============="
 7 | #    python scripts/calculate_react_acc_w_clean.py --input_file /export/home2/fangkai/rl-hybrid-engine/experiments/${exp_name}/checkpoint-${step}/logiqav2.dev.react.n5.tem1.0.v1.0.0shot.json
 8 |     cat  /export/home2/fangkai/rl-hybrid-engine/experiments/${exp_name}/checkpoint-${step}/logiqav2.dev.react.n5.tem1.0.v1.0.0shot.metrics.json
 9 |     echo "============= Test ============="
10 | #    python scripts/calculate_react_acc_w_clean.py --input_file /export/home2/fangkai/rl-hybrid-engine/experiments/${exp_name}/checkpoint-${step}/logiqav2.test.react.n5.tem1.0.v1.0.0shot.json
11 |     cat /export/home2/fangkai/rl-hybrid-engine/experiments/${exp_name}/checkpoint-${step}/logiqav2.test.react.n5.tem1.0.v1.0.0shot.metrics.json
12 | done
13 | #python vllm_inference.py -cp conf/api/vllm/llama2-7b/logiqav2_tems -cn react_test_0shot_tem_v2_0 exp_name=$exp_name step=$step
14 | 
15 | 
16 | 
17 | 


--------------------------------------------------------------------------------
/conf/api/vllm/llama2-70b/reclor/train_react_1shot_sample5_v1_0.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - hydra: default
 3 |   - reader/reclor@read_tensor: react_service_1shot_v1_0
 4 |   - post_process: openai_react
 5 |   - api/vllm/vllm_params@sampling_params: sampling_param_sample
 6 |   - _self_
 7 | 
 8 | hydra:
 9 |   searchpath:
10 |     - file://conf/
11 | 
12 | train_file: ../research.data/reclor_data/train.json
13 | dev_file: ../research.data/reclor_data/val.json
14 | test_file: ${train_file}
15 | 
16 | save_best: False
17 | exp_name:
18 | exp_notes:
19 | output_dir: ../pretrained-models/Llama-2-70b-chat-hf
20 | eval_sub_path:
21 | 
22 | output_file: ${output_dir}/reclor.react.train.1shot.sample5.v1.0.json
23 | flush_file: ${output_file}l
24 | 
25 | # Dataloader
26 | num_workers: 32
27 | prefetch_factor: 2
28 | 
29 | # Training hyper-parameters
30 | per_gpu_train_batch_size: 1
31 | per_gpu_eval_batch_size: 1
32 | 
33 | ddp_eval: False
34 | no_cuda: False
35 | seed: 42
36 | local_rank: -1
37 | 
38 | # Temporary variables
39 | fp16: True
40 | fp16_bfloat16: True
41 | n_gpu: 1
42 | device:
43 | train_batch_size:
44 | eval_batch_size:
45 | world_size:
46 | 


--------------------------------------------------------------------------------
/conf/api/vllm/mistral/reclor/train_react_1shot_sample5_v1_0.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - hydra: default
 3 |   - reader/reclor@read_tensor: react_service_1shot_v1_0
 4 |   - post_process: openai_react
 5 |   - api/vllm/vllm_params@sampling_params: sampling_param_sample
 6 |   - _self_
 7 | 
 8 | hydra:
 9 |   searchpath:
10 |     - file://conf/
11 | 
12 | train_file: ../research.data/reclor_data/train.json
13 | dev_file: ../research.data/reclor_data/val.json
14 | test_file: ${train_file}
15 | 
16 | save_best: False
17 | exp_name:
18 | exp_notes:
19 | output_dir: ../pretrained-models/Mixtral-8x7B-Instruct-v0.1
20 | eval_sub_path:
21 | 
22 | output_file: ${output_dir}/reclor.react.train.1shot.sample5.v1.0.json
23 | flush_file: ${output_file}l
24 | 
25 | # Dataloader
26 | num_workers: 32
27 | prefetch_factor: 2
28 | 
29 | # Training hyper-parameters
30 | per_gpu_train_batch_size: 1
31 | per_gpu_eval_batch_size: 1
32 | 
33 | ddp_eval: False
34 | no_cuda: False
35 | seed: 42
36 | local_rank: -1
37 | 
38 | # Temporary variables
39 | fp16: True
40 | fp16_bfloat16: True
41 | n_gpu: 1
42 | device:
43 | train_batch_size:
44 | eval_batch_size:
45 | world_size:
46 | 


--------------------------------------------------------------------------------
/conf/api/vllm/llama2-7b/logiqav2_qa_react_test_0shot_tem_v1_0_s0.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - hydra: default
 3 |   - reader/logiqav2@read_tensor: react_service_0shot_v1_0
 4 |   - post_process: openai_react
 5 |   - _self_
 6 | 
 7 | hydra:
 8 |   searchpath:
 9 |     - file://conf/
10 | 
11 | train_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/train.txt
12 | dev_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/dev.txt
13 | test_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/test.txt
14 | 
15 | step: 1600
16 | port: 6000
17 | exp_dir: experiments/llama2.7b.chat.logiqav2.step.dpo.H100.w4.v6.0
18 | model: llama-2-7b-step-dpo-v6.0-cp1600
19 | read_tensor:
20 |   read_func:
21 | #    option_order: "BCAD"  # o1
22 |     option_order: "DCBA"  # o2
23 | 
24 | output_file: ${exp_dir}/checkpoint-${step}/logiqav2-test.full.qa.react.v1.0.0shot.o2.json
25 | flush_file: ${output_file}l
26 | 
27 | # Dataloader
28 | num_workers: 48
29 | prefetch_factor: 2
30 | 
31 | ddp_eval: False
32 | no_cuda: False
33 | seed: 42
34 | local_rank: -1
35 | 
36 | # Temporary variables
37 | n_gpu: 1
38 | device:
39 | train_batch_size:
40 | eval_batch_size:
41 | world_size:
42 | 


--------------------------------------------------------------------------------
/scripts/fixed_explore_from_infer/logiqav2/reward_eval.sh:
--------------------------------------------------------------------------------
 1 | 
 2 | sft_model_dir=experiments/llama2.7b.chat.logiqav2.llama-2-70b-chat.dpo-sft.A6K.w4.v1.0/checkpoint-1600/
 3 | 
 4 | 
 5 | reward_file="experiments/llama2.7b.chat.logiqav2.70b-distil.prm.fix_hack.A100.w4.v1.2.s42/sft.dev.n5.tem1.0.reclor.rewards.raw_trajectory.product.v1.1/test-checkpoint-800/eval_predictions_rank0.json"
 6 | #margin=0.5
 7 | index="(1,2,3)"
 8 | reduction="product"
 9 | python scripts/combine_reward_debug_v1.0.py \
10 |   --input_file "${sft_model_dir}/logiqav2.dev.react.n5.tem1.0.v1.0.0shot.json" \
11 |   --reward_file $reward_file \
12 |   --output_file "./debug.json" --reduction ${reduction} --prob_labels ${index}
13 | 
14 | 
15 | #reward_file="experiments/llama2.7b.chat.logiqav2.70b-distil.orm.fix_hack.A100.40.w4.v1.2.s42/sft.dev.n5.tem1.0.rewards.raw_trajectory.product.v1.0/test-checkpoint-400/eval_predictions_rank0.json"
16 | #python scripts/combine_reward_debug_v1.0.py \
17 | #  --input_file "${sft_model_dir}/logiqav2.dev.react.n5.tem1.0.v1.0.0shot.json" \
18 | #  --reward_file $reward_file \
19 | #  --output_file "./debug.json"  --prob_labels "(1,)"  --orm
20 | 
21 | 


--------------------------------------------------------------------------------
/conf/api/vllm/llama2-7b/logiqav2_qa_react_turbosft_test_0shot_tem_v1_0_s0.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - hydra: default
 3 |   - reader/logiqav2@read_tensor: react_service_0shot_v1_0
 4 |   - post_process: openai_react
 5 |   - _self_
 6 | 
 7 | hydra:
 8 |   searchpath:
 9 |     - file://conf/
10 | 
11 | train_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/train.txt
12 | dev_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/dev.txt
13 | test_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/test.txt
14 | 
15 | step: 1600
16 | port: 6000
17 | exp_dir: experiments/llama2.7b.chat.logiqav2.gpt35turbo-dpo-sft.H100.w2.v2.0
18 | model: llama-2-7b-sft-v2.0-cp1600
19 | read_tensor:
20 |   read_func:
21 | #    option_order: "BCAD"  # o1
22 |     option_order: "DCBA"  # o2
23 | 
24 | output_file: ${exp_dir}/checkpoint-${step}/logiqav2-test.full.qa.react.v1.0.0shot.o2.json
25 | flush_file: ${output_file}l
26 | 
27 | # Dataloader
28 | num_workers: 48
29 | prefetch_factor: 2
30 | 
31 | ddp_eval: False
32 | no_cuda: False
33 | seed: 42
34 | local_rank: -1
35 | 
36 | # Temporary variables
37 | n_gpu: 1
38 | device:
39 | train_batch_size:
40 | eval_batch_size:
41 | world_size:
42 | 


--------------------------------------------------------------------------------
/conf/api/vllm/llama2-70b/ar_lsat/dev_react_v1_0.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - hydra: default
 3 |   - reader/logiqav2@read_tensor: react_service_1shot_v1_0
 4 |   - post_process: openai_react
 5 |   - _self_
 6 | 
 7 | hydra:
 8 |   searchpath:
 9 |     - file://conf/
10 | 
11 | train_file: ../research.data/AR-LSAT/data/AR_TrainingData.json
12 | dev_file: ../research.data/AR-LSAT/data/AR_DevelopmentData.json
13 | test_file: ../research.data/AR-LSAT/data/AR_TestData.json
14 | 
15 | port: 8000
16 | model: llama-2-70b-chat
17 | 
18 | output_file: ../pretrained-models/Llama-2-70b-chat-hf/ar-lsat.react.dev.1shot.json
19 | flush_file: ${output_file}l
20 | 
21 | # Data loading
22 | read_tensor:
23 |   read_func:
24 |     _target_: data.ar_lsat.ARLSATReader
25 |     flat_options: True
26 | 
27 | # Dataloader
28 | num_workers: 32
29 | prefetch_factor: 2
30 | 
31 | output_dir:
32 | 
33 | 
34 | # Training hyper-parameters
35 | per_gpu_train_batch_size: 1
36 | per_gpu_eval_batch_size: 1
37 | 
38 | ddp_eval: False
39 | no_cuda: False
40 | seed: 42
41 | local_rank: -1
42 | 
43 | # Temporary variables
44 | n_gpu: 1
45 | device:
46 | train_batch_size:
47 | eval_batch_size:
48 | world_size:
49 | 


--------------------------------------------------------------------------------
/conf/api/vllm/llama2-7b/logiqav2_qa_react_70bdistil_dpo_test_0shot_tem_v1_0.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - hydra: default
 3 |   - reader/logiqav2@read_tensor: react_service_0shot_v1_0
 4 |   - post_process: openai_react
 5 |   - _self_
 6 | 
 7 | hydra:
 8 |   searchpath:
 9 |     - file://conf/
10 | 
11 | train_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/train.txt
12 | dev_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/dev.txt
13 | test_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/test.txt
14 | 
15 | step: 1600
16 | port: 6000
17 | exp_dir: experiments/llama2.7b.chat.logiqav2.70b-distil.dpo.H100.w4.v1.0/
18 | model: llama-2-7b-70bdistil-dpo-v1.0-cp1600
19 | read_tensor:
20 |   read_func:
21 | #    option_order: "BCAD"  # o1
22 |     option_order: "DCBA"  # o2
23 | 
24 | output_file: ${exp_dir}/checkpoint-${step}/logiqav2-test.full.qa.react.v1.0.0shot.o2.json
25 | flush_file: ${output_file}l
26 | 
27 | # Dataloader
28 | num_workers: 48
29 | prefetch_factor: 2
30 | 
31 | ddp_eval: False
32 | no_cuda: False
33 | seed: 42
34 | local_rank: -1
35 | 
36 | # Temporary variables
37 | n_gpu: 1
38 | device:
39 | train_batch_size:
40 | eval_batch_size:
41 | world_size:
42 | 


--------------------------------------------------------------------------------
/conf/api/vllm/llama2-7b/logiqav2_tems/react_dev_0shot_tem_v1_0_o1.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - hydra: default
 3 |   - reader/logiqav2@read_tensor: react_service_0shot_v1_0
 4 |   - post_process: openai_react
 5 |   - _self_
 6 | 
 7 | hydra:
 8 |   searchpath:
 9 |     - file://conf/
10 | 
11 | train_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/train.txt
12 | dev_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/dev.txt
13 | test_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/dev.txt
14 | 
15 | step: 1600
16 | port: 6000
17 | exp_dir: experiments/llama2.7b.chat.logiqav2.70b-distil.step.dpo.H100.w4.v1.1
18 | model: llama-2-7b-70bdistil-step-dpo-v1.1-cp1600
19 | read_tensor:
20 |   template_id: 6
21 |   read_func:
22 |     option_order: "BCAD"  # o1
23 | #    option_order: "DCBA"  # o2
24 | 
25 | output_file: ${exp_dir}/checkpoint-${step}/logiqav2-dev.full.qa.react.v1.0.0shot.o1.json
26 | flush_file: ${output_file}l
27 | 
28 | # Dataloader
29 | num_workers: 48
30 | prefetch_factor: 2
31 | 
32 | ddp_eval: False
33 | no_cuda: False
34 | seed: 42
35 | local_rank: -1
36 | 
37 | # Temporary variables
38 | n_gpu: 1
39 | device:
40 | train_batch_size:
41 | eval_batch_size:
42 | world_size:
43 | 


--------------------------------------------------------------------------------
/conf/api/vllm/llama2-7b/logiqav2_tems/react_dev_0shot_tem_v1_0_o2.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - hydra: default
 3 |   - reader/logiqav2@read_tensor: react_service_0shot_v1_0
 4 |   - post_process: openai_react
 5 |   - _self_
 6 | 
 7 | hydra:
 8 |   searchpath:
 9 |     - file://conf/
10 | 
11 | train_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/train.txt
12 | dev_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/dev.txt
13 | test_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/dev.txt
14 | 
15 | step: 1600
16 | port: 6000
17 | exp_dir: experiments/llama2.7b.chat.logiqav2.70b-distil.step.dpo.H100.w4.v1.1
18 | model: llama-2-7b-70bdistil-step-dpo-v1.1-cp1600
19 | read_tensor:
20 |   template_id: 6
21 |   read_func:
22 | #    option_order: "BCAD"  # o1
23 |     option_order: "DCBA"  # o2
24 | 
25 | output_file: ${exp_dir}/checkpoint-${step}/logiqav2-dev.full.qa.react.v1.0.0shot.o2.json
26 | flush_file: ${output_file}l
27 | 
28 | # Dataloader
29 | num_workers: 48
30 | prefetch_factor: 2
31 | 
32 | ddp_eval: False
33 | no_cuda: False
34 | seed: 42
35 | local_rank: -1
36 | 
37 | # Temporary variables
38 | n_gpu: 1
39 | device:
40 | train_batch_size:
41 | eval_batch_size:
42 | world_size:
43 | 


--------------------------------------------------------------------------------
/conf/api/vllm/llama2-7b/logiqav2_tems/react_test_0shot_tem_v1_0_o1.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - hydra: default
 3 |   - reader/logiqav2@read_tensor: react_service_0shot_v1_0
 4 |   - post_process: openai_react
 5 |   - _self_
 6 | 
 7 | hydra:
 8 |   searchpath:
 9 |     - file://conf/
10 | 
11 | train_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/train.txt
12 | dev_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/dev.txt
13 | test_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/test.txt
14 | 
15 | step: 1600
16 | port: 6000
17 | exp_dir: experiments/llama2.7b.chat.logiqav2.70b-distil.step.dpo.H100.w4.v1.1
18 | model: llama-2-7b-70bdistil-step-dpo-v1.1-cp1600
19 | read_tensor:
20 |   template_id: 6
21 |   read_func:
22 |     option_order: "BCAD"  # o1
23 | #    option_order: "DCBA"  # o2
24 | 
25 | output_file: ${exp_dir}/checkpoint-${step}/logiqav2-test.full.qa.react.v1.0.0shot.o1.json
26 | flush_file: ${output_file}l
27 | 
28 | # Dataloader
29 | num_workers: 48
30 | prefetch_factor: 2
31 | 
32 | ddp_eval: False
33 | no_cuda: False
34 | seed: 42
35 | local_rank: -1
36 | 
37 | # Temporary variables
38 | n_gpu: 1
39 | device:
40 | train_batch_size:
41 | eval_batch_size:
42 | world_size:
43 | 


--------------------------------------------------------------------------------
/conf/api/vllm/llama2-7b/logiqav2_tems/react_test_0shot_tem_v1_0_o2.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - hydra: default
 3 |   - reader/logiqav2@read_tensor: react_service_0shot_v1_0
 4 |   - post_process: openai_react
 5 |   - _self_
 6 | 
 7 | hydra:
 8 |   searchpath:
 9 |     - file://conf/
10 | 
11 | train_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/train.txt
12 | dev_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/dev.txt
13 | test_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/test.txt
14 | 
15 | step: 1600
16 | port: 6000
17 | exp_dir: experiments/llama2.7b.chat.logiqav2.70b-distil.step.dpo.H100.w4.v1.1
18 | model: llama-2-7b-70bdistil-step-dpo-v1.1-cp1600
19 | read_tensor:
20 |   template_id: 6
21 |   read_func:
22 | #    option_order: "BCAD"  # o1
23 |     option_order: "DCBA"  # o2
24 | 
25 | output_file: ${exp_dir}/checkpoint-${step}/logiqav2-test.full.qa.react.v1.0.0shot.o2.json
26 | flush_file: ${output_file}l
27 | 
28 | # Dataloader
29 | num_workers: 48
30 | prefetch_factor: 2
31 | 
32 | ddp_eval: False
33 | no_cuda: False
34 | seed: 42
35 | local_rank: -1
36 | 
37 | # Temporary variables
38 | n_gpu: 1
39 | device:
40 | train_batch_size:
41 | eval_batch_size:
42 | world_size:
43 | 


--------------------------------------------------------------------------------
/scripts/split_pairs_according_to_ids.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import argparse
 3 | import random
 4 | from collections import defaultdict
 5 | 
 6 | 
 7 | def main():
 8 |     parser = argparse.ArgumentParser()
 9 |     parser.add_argument("--input_file", type=str, required=True)
10 |     parser.add_argument("--output_file", type=str, required=True)
11 |     parser.add_argument("--ratio", type=float, required=True)
12 |     parser.add_argument("--seed", type=int, default=42)
13 |     args = parser.parse_args()
14 | 
15 |     random.seed(args.seed)
16 | 
17 |     data = json.load(open(args.input_file, "r"))
18 |     print(len(data))
19 | 
20 |     id2samples = defaultdict(list)
21 |     for item in data:
22 |         id2samples[item["id"]].append(item)
23 | 
24 |     print(len(id2samples))
25 | 
26 |     sampled_data_ids = random.sample(list(id2samples.keys()), int(len(id2samples) * args.ratio))
27 |     sampled_data = []
28 |     for sample_id in sampled_data_ids:
29 |         sampled_data.extend(id2samples[sample_id])
30 |     print(len(sampled_data))
31 | 
32 |     json.dump(sampled_data, open(args.output_file.replace(".json", f".{args.seed}.json"), "w"), indent=2)
33 | 
34 | 
35 | if __name__ == "__main__":
36 |     main()
37 | 


--------------------------------------------------------------------------------
/conf/api/vllm/llama2-7b/logiqav2_qa_react_dpo_test_0shot_tem_v1_0_s0.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - hydra: default
 3 |   - reader/logiqav2@read_tensor: react_service_0shot_v1_0
 4 |   - post_process: openai_react
 5 |   - _self_
 6 | 
 7 | hydra:
 8 |   searchpath:
 9 |     - file://conf/
10 | 
11 | train_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/train.txt
12 | dev_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/dev.txt
13 | test_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/test.txt
14 | 
15 | step: 800
16 | port: 6000
17 | #exp_dir: experiments/llama2.7b.chat.logiqav2.dpo.A40.w4.v4.1
18 | exp_dir: experiments/llama2.7b.chat.logiqav2.dpo.A100.w4.v4.1
19 | #model: llama-2-7b-dpo-4.1-cp800
20 | model: llama-2-7b-dpo-v4.0-cp800
21 | read_tensor:
22 |   read_func:
23 | #    option_order: "BCAD"  # o1
24 |     option_order: "DCBA"  # o2
25 | 
26 | output_file: ${exp_dir}/checkpoint-${step}/logiqav2-test.full.qa.react.v1.0.0shot.o2.json
27 | flush_file: ${output_file}l
28 | 
29 | # Dataloader
30 | num_workers: 48
31 | prefetch_factor: 2
32 | 
33 | ddp_eval: False
34 | no_cuda: False
35 | seed: 42
36 | local_rank: -1
37 | 
38 | # Temporary variables
39 | n_gpu: 1
40 | device:
41 | train_batch_size:
42 | eval_batch_size:
43 | world_size:
44 | 


--------------------------------------------------------------------------------
/conf/api/vllm/llama2-7b/logiqav2_tems/react_dev_0shot_tem_v1_0.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - hydra: default
 3 |   - reader/logiqav2@read_tensor: react_service_0shot_v1_0
 4 |   - post_process: openai_react
 5 |   - api/vllm/vllm_params@sampling_params: sampling_param_greedy
 6 |   - _self_
 7 | 
 8 | hydra:
 9 |   searchpath:
10 |     - file://conf/
11 | 
12 | train_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/train.txt
13 | dev_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/dev.txt
14 | test_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/dev.txt
15 | 
16 | step: 1600
17 | port: 6000
18 | #exp_dir: experiments/llama2.7b.chat.logiqav2.70b-distil.step.dpo.H100.w4.v1.0/
19 | exp_dir: experiments/llama2.7b.chat.logiqav2.70b-distil.step.dpo.H100.w4.v1.1
20 | model: llama-2-7b-70bdistil-step-dpo-v1.1-cp1600
21 | read_tensor:
22 |   template_id: 6
23 | 
24 | output_file: ${exp_dir}/checkpoint-${step}/logiqav2-dev.full.qa.react.v1.0.0shot.json
25 | flush_file: ${output_file}l
26 | 
27 | # Dataloader
28 | num_workers: 48
29 | prefetch_factor: 2
30 | 
31 | ddp_eval: False
32 | no_cuda: False
33 | seed: 42
34 | local_rank: -1
35 | 
36 | # Temporary variables
37 | n_gpu: 1
38 | device:
39 | train_batch_size:
40 | eval_batch_size:
41 | world_size:
42 | 


--------------------------------------------------------------------------------
/scripts/explore_from_inter/combine_worsen_response.sh:
--------------------------------------------------------------------------------
 1 | data_dir=experiments/llama2.7b.chat.logiqav2.llama-2-70b-chat.dpo-sft.A6K.w4.v1.0/checkpoint-1600/react-inter-states
 2 | 
 3 | 
 4 | # Worsen response to DPO pair
 5 | 
 6 | python scripts/construct_dpo_data_via_worsen_response.py \
 7 |   --input_file "$data_dir/logiqav2-train.react.v1.0.0shot.sample5.inter_ver2.1.rs0.4.r0.2.?-4.?-of-4.modify_worse.1shot.mistral-7b.json" \
 8 |   --original_file "$data_dir/logiqav2-train.react.v1.0.0shot.sample10.clean_inter_ver2.1.rs0.4.r0.2.?-of-4.json" \
 9 |   --is_inter_states  \
10 |   --output_file "$data_dir/worsen/logiqav2-train.react.v1.0.0shot.sample5.inter_ver2.1.rs0.4.r0.2.modify_worse.1shot.mistral-7b.dpo.json"
11 | 
12 | python scripts/construct_dpo_data_via_worsen_response.py \
13 |   --input_file "$data_dir/logiqav2-train.react.v1.0.0shot.sample5.inter_ver2.1.rs0.4.r0.2.?-4.?-of-4.modify_worse.1shot.mistral-7b.json" \
14 |   --original_file "$data_dir/logiqav2-train.react.v1.0.0shot.sample10.clean_inter_ver2.1.rs0.4.r0.2.?-of-4.json" \
15 |   --is_inter_states  \
16 |   --output_file "$data_dir/worsen/logiqav2-train.react.v1.0.0shot.sample5.inter_ver2.1.rs0.4.r0.2.modify_worse.1shot.mistral-7b.dpo.w_wrong.json" \
17 |   --keep_wrong
18 | 
19 | 


--------------------------------------------------------------------------------
/conf/api/vllm/llama2-7b/logiqav2_tems/react_dev_0shot_tem_v2_0.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - hydra: default
 3 |   - reader/logiqav2@read_tensor: react_service_0shot_v1_0
 4 |   - post_process: openai_react
 5 |   - api/vllm/vllm_params@sampling_params: sampling_param_greedy
 6 |   - _self_
 7 | 
 8 | hydra:
 9 |   searchpath:
10 |     - file://conf/
11 | 
12 | train_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/train.txt
13 | dev_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/dev.txt
14 | test_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/dev.txt
15 | 
16 | save_best: False
17 | exp_name:
18 | exp_notes:
19 | output_dir: experiments/${exp_name}
20 | 
21 | step: 800
22 | eval_sub_path: checkpoint-${step}
23 | 
24 | read_tensor:
25 |   template_id: 6
26 |   service_based: False
27 |   service_processor:
28 | 
29 | output_file: ${output_dir}/${eval_sub_path}/logiqav2-dev.full.qa.react.v1.0.0shot.json
30 | flush_file: ${output_file}l
31 | 
32 | # Dataloader
33 | num_workers: 48
34 | prefetch_factor: 2
35 | 
36 | ddp_eval: False
37 | no_cuda: False
38 | seed: 42
39 | local_rank: -1
40 | 
41 | # Temporary variables
42 | fp16: True
43 | fp16_bfloat16: True
44 | n_gpu: 1
45 | device:
46 | train_batch_size:
47 | eval_batch_size:
48 | world_size:
49 | 


--------------------------------------------------------------------------------
/conf/api/vllm/llama2-7b/logiqav2_tems/react_test_0shot_tem_v1_0.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - hydra: default
 3 |   - reader/logiqav2@read_tensor: react_service_0shot_v1_0
 4 |   - post_process: openai_react
 5 |   - api/vllm/vllm_params@sampling_params: sampling_param_greedy
 6 |   - _self_
 7 | 
 8 | hydra:
 9 |   searchpath:
10 |     - file://conf/
11 | 
12 | train_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/train.txt
13 | dev_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/dev.txt
14 | test_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/test.txt
15 | 
16 | step: 1600
17 | port: 6000
18 | #exp_dir: experiments/llama2.7b.chat.logiqav2.70b-distil.step.dpo.H100.w4.v1.0/
19 | exp_dir: experiments/llama2.7b.chat.logiqav2.70b-distil.step.dpo.H100.w4.v1.1
20 | model: llama-2-7b-70bdistil-step-dpo-v1.1-cp1600
21 | read_tensor:
22 |   template_id: 6
23 | 
24 | output_file: ${exp_dir}/checkpoint-${step}/logiqav2-test.full.qa.react.v1.0.0shot.json
25 | flush_file: ${output_file}l
26 | 
27 | # Dataloader
28 | num_workers: 48
29 | prefetch_factor: 2
30 | 
31 | ddp_eval: False
32 | no_cuda: False
33 | seed: 42
34 | local_rank: -1
35 | 
36 | # Temporary variables
37 | n_gpu: 1
38 | device:
39 | train_batch_size:
40 | eval_batch_size:
41 | world_size:
42 | 


--------------------------------------------------------------------------------
/conf/api/vllm/llama2-7b/logiqav2_tems/react_test_0shot_tem_v2_0.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - hydra: default
 3 |   - reader/logiqav2@read_tensor: react_service_0shot_v1_0
 4 |   - post_process: openai_react
 5 |   - api/vllm/vllm_params@sampling_params: sampling_param_greedy
 6 |   - _self_
 7 | 
 8 | hydra:
 9 |   searchpath:
10 |     - file://conf/
11 | 
12 | train_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/train.txt
13 | dev_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/dev.txt
14 | test_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/test.txt
15 | 
16 | save_best: False
17 | exp_name:
18 | exp_notes:
19 | output_dir: experiments/${exp_name}
20 | 
21 | step: 800
22 | eval_sub_path: checkpoint-${step}
23 | 
24 | read_tensor:
25 |   template_id: 6
26 |   service_based: False
27 |   service_processor:
28 | 
29 | output_file: ${output_dir}/${eval_sub_path}/logiqav2-test.full.qa.react.v1.0.0shot.json
30 | flush_file: ${output_file}l
31 | 
32 | # Dataloader
33 | num_workers: 48
34 | prefetch_factor: 2
35 | 
36 | ddp_eval: False
37 | no_cuda: False
38 | seed: 42
39 | local_rank: -1
40 | 
41 | # Temporary variables
42 | fp16: True
43 | fp16_bfloat16: True
44 | n_gpu: 1
45 | device:
46 | train_batch_size:
47 | eval_batch_size:
48 | world_size:
49 | 


--------------------------------------------------------------------------------
/data/meta_math/react_prompt_1.txt:
--------------------------------------------------------------------------------
 1 | ### Question: Ali has four $10 bills and six $20 bills that he saved after working for Mr. James on his farm. Ali gives her sister half of the total money he has and uses 3/5 of the remaining amount of money to buy dinner. Calculate the amount of money he has after buying the dinner.
 2 | 
 3 | SubQuestion 1: How much money does Ali have in total?
 4 | Answer 1: Ali has four $10 bills and six $20 bills. So he has 4 * 10 + 6 * 20 = 160 dollars. The answer is 160.
 5 | SubQuestion 2: How much money does Ali give to his sister?
 6 | Answer 2: Ali gives half of the total money he has to his sister. So he gives 160 / 2 = 80 dollars to his sister. The answer is 80.
 7 | SubQuestion 3: How much money does Ali have after giving his sister the money?
 8 | Answer 3: After giving his sister the money, Ali has 160 - 80 = 80 dollars left. The answer is 80.
 9 | SubQuestion 4: How much money does Ali use to buy dinner?
10 | Answer 4: Ali uses 3/5 of the remaining amount of money to buy dinner. So he uses 80 * 3/5 = 48 dollars to buy dinner. The answer is 48.
11 | SubQuestion 5: Now we can answer the question: How much money does Ali have after buying the dinner?
12 | Answer 5: After buying the dinner, Ali has 80 - 48 = 32 dollars left. The answer is 32.


--------------------------------------------------------------------------------
/conf/api/vllm/llama2-7b/logiqav2_qa_react_70bdistil_step_dpo_test_0shot_tem_v1_0.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - hydra: default
 3 |   - reader/logiqav2@read_tensor: react_service_0shot_v1_0
 4 |   - post_process: openai_react
 5 |   - _self_
 6 | 
 7 | hydra:
 8 |   searchpath:
 9 |     - file://conf/
10 | 
11 | train_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/train.txt
12 | dev_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/dev.txt
13 | test_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/test.txt
14 | 
15 | step: 1600
16 | port: 6000
17 | #exp_dir: experiments/llama2.7b.chat.logiqav2.70b-distil.step.dpo.H100.w4.v1.0/
18 | exp_dir: experiments/llama2.7b.chat.logiqav2.70b-distil.step.dpo.H100.w4.v1.1
19 | model: llama-2-7b-70bdistil-step-dpo-v1.1-cp1600
20 | #read_tensor:
21 | #  read_func:
22 | #    option_order: "BCAD"  # o1
23 | #    option_order: "DCBA"  # o2
24 | 
25 | output_file: ${exp_dir}/checkpoint-${step}/logiqav2-test.full.qa.react.v1.0.0shot.json
26 | flush_file: ${output_file}l
27 | 
28 | # Dataloader
29 | num_workers: 48
30 | prefetch_factor: 2
31 | 
32 | ddp_eval: False
33 | no_cuda: False
34 | seed: 42
35 | local_rank: -1
36 | 
37 | # Temporary variables
38 | n_gpu: 1
39 | device:
40 | train_batch_size:
41 | eval_batch_size:
42 | world_size:
43 | 


--------------------------------------------------------------------------------
/conf/api/vllm/mistral/logiqav2/tems/react_test_1shot_tem_v1_0.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - hydra: default
 3 |   - reader/logiqav2@read_tensor: react_service_1shot_v1_0
 4 |   - post_process: openai_react
 5 |   - _self_
 6 | 
 7 | hydra:
 8 |   searchpath:
 9 |     - file://conf/
10 | 
11 | train_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/train.txt
12 | dev_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/dev.txt
13 | #test_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/test.txt
14 | test_file: ${dev_file}
15 | 
16 | step:
17 | port: 6000
18 | exp_dir:
19 | model:
20 | 
21 | read_tensor:
22 |   service_processor:
23 |     _target_: data.vllm.VLLMRequestGenerator
24 |     api_url: http://0.0.0.0:${port}/v1/completions
25 |     max_tokens: 3072
26 |     model: ${model}
27 |     stop: [ "</s>", "\n\n\n\n", "Context:\n" ]
28 | 
29 | #output_file: ${exp_dir}/logiqav2-test.qa.react.v1.0.1shot.json
30 | output_file: ${exp_dir}/logiqav2-dev.qa.react.v1.0.1shot.json
31 | flush_file: ${output_file}l
32 | 
33 | # Dataloader
34 | num_workers: 48
35 | prefetch_factor: 2
36 | 
37 | ddp_eval: False
38 | no_cuda: False
39 | seed: 42
40 | local_rank: -1
41 | 
42 | # Temporary variables
43 | n_gpu: 1
44 | device:
45 | train_batch_size:
46 | eval_batch_size:
47 | world_size:
48 | 


--------------------------------------------------------------------------------
/scripts/fixed_explore_from_infer/logiqav2/construct_dpo.sh:
--------------------------------------------------------------------------------
 1 | #sft_model_dir=experiments/llama2.7b.chat.logiqav2.llama-2-70b-chat.dpo-sft.A6K.w4.v1.0/checkpoint-1600/
 2 | #fix_hack_data_dir=$sft_model_dir/fix_hack_data_dir/
 3 | #
 4 | #python scripts/construct_dpo_data_from_react_response.py \
 5 | #  --input_file $sft_model_dir/logiqav2-train.full.qa.react.v1.0.0shot.sample10.json \
 6 | #  --output_file $fix_hack_data_dir/logiqav2-train.full.qa.react.v1.0.0shot.sample10.clean_dpo_pair.json
 7 | #
 8 | #python scripts/split_train_dev.py \
 9 | #  --input_file $fix_hack_data_dir/logiqav2-train.full.qa.react.v1.0.0shot.sample10.clean_dpo_pair.json
10 | 
11 | 
12 | # ============================================  Iter 1 ============================================
13 | 
14 | 
15 | sft_model_dir=experiments/llama2.7b.chat.logiqav2.70b-distil.step.dpo.fix_hack.H100.w4.v1.0.th.s42/checkpoint-400/
16 | 
17 | python scripts/construct_dpo_data_from_react_response_v1.1.py \
18 |   --input_file "$sft_model_dir/logiqav2.react.train.0shot.sample10.tem1.0.v1.0.*-of-2.json" \
19 |   --output_file $sft_model_dir/logiqav2-train.full.qa.react.v1.0.0shot.sample10.dpo_pair.json
20 | 
21 | #python scripts/split_train_dev.py \
22 | #  --input_file $fix_hack_data_dir/logiqav2-train.full.qa.react.v1.0.0shot.sample10.clean_dpo_pair.json


--------------------------------------------------------------------------------
/conf/api/vllm/llama2-7b/logiqav2_tems/react_dev_0shot_tem_v2_1.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - hydra: default
 3 |   - reader/logiqav2@read_tensor: react_service_0shot_v1_0
 4 |   - post_process: openai_react
 5 |   - api/vllm/vllm_params@sampling_params: sampling_param_greedy
 6 |   - _self_
 7 | 
 8 | hydra:
 9 |   searchpath:
10 |     - file://conf/
11 | 
12 | train_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/train.txt
13 | dev_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/dev.txt
14 | test_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/dev.txt
15 | 
16 | save_best: False
17 | exp_name:
18 | exp_notes:
19 | output_dir: experiments/${exp_name}
20 | 
21 | step: 800
22 | eval_sub_path: checkpoint-${step}
23 | 
24 | read_tensor:
25 |   template_id: "Context:\n{}\n\nQuestion:\n{}\n\nOptions:\n{}\n\nThought 1: "  # In version v2.1, we change the template to: ```xxx\n\nThought 1: ```
26 |   service_based: False
27 |   service_processor:
28 | 
29 | output_file: ${output_dir}/${eval_sub_path}/logiqav2-dev.full.qa.react.v1.0.0shot.json
30 | flush_file: ${output_file}l
31 | 
32 | # Dataloader
33 | num_workers: 48
34 | prefetch_factor: 2
35 | 
36 | ddp_eval: False
37 | no_cuda: False
38 | seed: 42
39 | local_rank: -1
40 | 
41 | # Temporary variables
42 | fp16: True
43 | fp16_bfloat16: True
44 | n_gpu: 1
45 | device:
46 | train_batch_size:
47 | eval_batch_size:
48 | world_size:
49 | 


--------------------------------------------------------------------------------
/conf/api/vllm/llama2-7b/logiqav2_tems/react_test_0shot_tem_v2_1.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - hydra: default
 3 |   - reader/logiqav2@read_tensor: react_service_0shot_v1_0
 4 |   - post_process: openai_react
 5 |   - api/vllm/vllm_params@sampling_params: sampling_param_greedy
 6 |   - _self_
 7 | 
 8 | hydra:
 9 |   searchpath:
10 |     - file://conf/
11 | 
12 | train_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/train.txt
13 | dev_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/dev.txt
14 | test_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/test.txt
15 | 
16 | save_best: False
17 | exp_name:
18 | exp_notes:
19 | output_dir: experiments/${exp_name}
20 | 
21 | step: 800
22 | eval_sub_path: checkpoint-${step}
23 | 
24 | read_tensor:
25 |   template_id: "Context:\n{}\n\nQuestion:\n{}\n\nOptions:\n{}\n\nThought 1: "  # In version v2.1, we change the template to: ```xxx\n\nThought 1: ```
26 |   service_based: False
27 |   service_processor:
28 | 
29 | output_file: ${output_dir}/${eval_sub_path}/logiqav2-test.full.qa.react.v1.0.0shot.json
30 | flush_file: ${output_file}l
31 | 
32 | # Dataloader
33 | num_workers: 48
34 | prefetch_factor: 2
35 | 
36 | ddp_eval: False
37 | no_cuda: False
38 | seed: 42
39 | local_rank: -1
40 | 
41 | # Temporary variables
42 | fp16: True
43 | fp16_bfloat16: True
44 | n_gpu: 1
45 | device:
46 | train_batch_size:
47 | eval_batch_size:
48 | world_size:
49 | 


--------------------------------------------------------------------------------
/conf/api/vllm/llama2-7b/logiqav2_tems/react_test_1shot_tem_v2_1.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - hydra: default
 3 |   - reader/logiqav2@read_tensor: react_service_1shot_v1_0
 4 |   - post_process: openai_react
 5 |   - api/vllm/vllm_params@sampling_params: sampling_param_greedy
 6 |   - _self_
 7 | 
 8 | hydra:
 9 |   searchpath:
10 |     - file://conf/
11 | 
12 | train_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/train.txt
13 | dev_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/dev.txt
14 | test_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/test.txt
15 | 
16 | save_best: False
17 | exp_name:
18 | exp_notes:
19 | output_dir: experiments/${exp_name}
20 | 
21 | step: 800
22 | eval_sub_path: checkpoint-${step}
23 | 
24 | read_tensor:
25 |   template_id: "Context:\n{}\n\nQuestion:\n{}\n\nOptions:\n{}\n\nThought 1: "  # In version v2.1, we change the template to: ```xxx\n\nThought 1: ```
26 |   service_based: False
27 |   service_processor:
28 | 
29 | output_file: ${output_dir}/${eval_sub_path}/logiqav2-test.full.qa.react.v1.0.0shot.json
30 | flush_file: ${output_file}l
31 | 
32 | # Dataloader
33 | num_workers: 48
34 | prefetch_factor: 2
35 | 
36 | ddp_eval: False
37 | no_cuda: False
38 | seed: 42
39 | local_rank: -1
40 | 
41 | # Temporary variables
42 | fp16: True
43 | fp16_bfloat16: True
44 | n_gpu: 1
45 | device:
46 | train_batch_size:
47 | eval_batch_size:
48 | world_size:
49 | 


--------------------------------------------------------------------------------
/data/prompts/logiqav2/gpt4/dev_218_0.md:
--------------------------------------------------------------------------------
 1 | Convert the input texts following the keywords [Context], [Question] and each [Option] into logic forms.  For each logic form, the format is [predicate](entity 1,  ..., entity n). There is a predicate which indicates the relations among at most n entities and those entities are the arguments of the predicate.   
 2 | Use logical operations to derive the correct option.  Common logical operators include AND, OR, NOT,   and  ==> (logically implies). 
 3 | 
 4 | [Context]:
 5 | Jupiter is a gas giant planet and the largest planet in the solar system. Its mass is 2.5 times the total mass of the other seven planets in the solar system. Observations have found that most of the more than 70 moons surrounding Jupiter are composed of water ice. Therefore, Jupiter's atmosphere should contain a considerable amount of water.
 6 | 
 7 | [Question]:
 8 | Which of the followings, if true, can best support the above statement?
 9 | 
10 | [Options]:
11 | A. After hundreds of millions of years, the satellite may slowly fall onto the planet.
12 | B. Many of the water in interstellar space exists in gaseous form.
13 | C. Uranus is also a gas giant planet, and it has been confirmed that it contains a lot of water ice.
14 | D. The satellite and the planets around it were formed from the same gas and dust at the same time.
15 | 
16 | -----------------
17 | 
18 | 
19 | 
20 | 


--------------------------------------------------------------------------------
/conf/api/gpt35turbo/reclor/train_react_v1_0_1shot_sample10.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - hydra: default
 3 |   - reader/reclor@read_tensor: react_service_1shot_v1_0
 4 |   - post_process: openai_react
 5 | #  - api/vllm/vllm_params@sampling_params: sampling_param_sample
 6 |   - _self_
 7 | 
 8 | hydra:
 9 |   searchpath:
10 |     - file://conf/
11 | 
12 | 
13 | train_file: ../research.data/reclor_data/train.json
14 | dev_file: ../research.data/reclor_data/val.json
15 | test_file: ${train_file}
16 | 
17 | #num_shot: 5
18 | 
19 | output_file: api-outputs/gpt35turbo1106/logiqav2.train.react.1shot.gpt35turbo1106.sample10.tem${model.temperature}.json
20 | flush_file: ${output_file}l
21 | 
22 | model:
23 |   _target_: data.openai_api_caller.GPTTurbo
24 |   model: "gpt-3.5-turbo-1106"
25 |   max_tokens: 3072
26 | #  temperature: 1.0
27 |   temperature: 0.7
28 |   api_time_interval: 1
29 |   top_p: 0.8
30 |   n: 10
31 | 
32 | # Data loading
33 | read_tensor:
34 |   api_based: True
35 |   flush_file: ${flush_file}
36 | 
37 | # Dataloader
38 | num_workers: 0
39 | prefetch_factor: 2
40 | 
41 | output_dir:
42 | 
43 | 
44 | # Training hyper-parameters
45 | per_gpu_train_batch_size: 1
46 | per_gpu_eval_batch_size: 1
47 | 
48 | ddp_eval: False
49 | no_cuda: False
50 | seed: 42
51 | local_rank: -1
52 | 
53 | # Temporary variables
54 | n_gpu: 1
55 | device:
56 | train_batch_size:
57 | eval_batch_size:
58 | world_size:
59 | 


--------------------------------------------------------------------------------
/data/prompts/logiqav2/decomposition/human/dev_218_0.md:
--------------------------------------------------------------------------------
 1 | Convert the input texts following the keywords [Context], [Question] and each [Option] into logic forms.  For each logic form, the format is [predicate](entity 1,  ..., entity n). There is a predicate which indicates the relations among at most n entities and those entities are the arguments of the predicate.   
 2 | Use logical operations to derive the correct option.  Common logical operators include AND, OR, NOT,   and  ==> (logically implies). 
 3 | 
 4 | [Context]:
 5 | Jupiter is a gas giant planet and the largest planet in the solar system. Its mass is 2.5 times the total mass of the other seven planets in the solar system. Observations have found that most of the more than 70 moons surrounding Jupiter are composed of water ice. Therefore, Jupiter's atmosphere should contain a considerable amount of water.
 6 | 
 7 | [Question]:
 8 | Which of the followings, if true, can best support the above statement?
 9 | 
10 | [Options]:
11 | A. After hundreds of millions of years, the satellite may slowly fall onto the planet.
12 | B. Many of the water in interstellar space exists in gaseous form.
13 | C. Uranus is also a gas giant planet, and it has been confirmed that it contains a lot of water ice.
14 | D. The satellite and the planets around it were formed from the same gas and dust at the same time.
15 | 
16 | -----------------
17 | 
18 | 
19 | 
20 | 


--------------------------------------------------------------------------------
/conf/api/gpt4/reclor/dev_react_v1_0_1shot.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - hydra: default
 3 |   - reader/reclor@read_tensor: react_service_1shot_v1_0
 4 |   - post_process: openai_react
 5 | #  - api/vllm/vllm_params@sampling_params: sampling_param_sample
 6 |   - _self_
 7 | 
 8 | hydra:
 9 |   searchpath:
10 |     - file://conf/
11 | 
12 | 
13 | train_file: ../research.data/reclor_data/train.json
14 | dev_file: ../research.data/reclor_data/val.json
15 | test_file: ../research.data/reclor_data/val.json
16 | 
17 | #num_shot: 5
18 | 
19 | output_file: api-outputs/gpt35turbo1106/reclor.val.react.1shot.gpt35turbo1106.sample1.tem${model.temperature}.json
20 | flush_file: ${output_file}l
21 | 
22 | model:
23 |   _target_: data.openai_api_caller.GPTTurbo
24 |   model: "gpt-4-1106-preview"
25 |   max_tokens: 2048
26 | #  temperature: 1.0
27 | #  temperature: 0.7
28 |   temperature: 0.0
29 |   api_time_interval: 1
30 | #  top_p: 0.8
31 | #  n: 1
32 | 
33 | # Data loading
34 | read_tensor:
35 |   api_based: True
36 |   flush_file: ${flush_file}
37 | 
38 | # Dataloader
39 | num_workers: 0
40 | prefetch_factor: 2
41 | 
42 | output_dir:
43 | 
44 | 
45 | # Training hyper-parameters
46 | per_gpu_train_batch_size: 1
47 | per_gpu_eval_batch_size: 1
48 | 
49 | ddp_eval: False
50 | no_cuda: False
51 | seed: 42
52 | local_rank: -1
53 | 
54 | # Temporary variables
55 | n_gpu: 1
56 | device:
57 | train_batch_size:
58 | eval_batch_size:
59 | world_size:
60 | 


--------------------------------------------------------------------------------
/conf/api/vllm/llama2-7b/logiqav2_tems/react_dev_0shot_tem_v2_1_sc.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - hydra: default
 3 |   - reader/logiqav2@read_tensor: react_service_0shot_v1_0
 4 |   - post_process: openai_react
 5 |   - api/vllm/vllm_params@sampling_params: sampling_param_sample
 6 |   - _self_
 7 | 
 8 | hydra:
 9 |   searchpath:
10 |     - file://conf/
11 | 
12 | train_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/train.txt
13 | dev_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/dev.txt
14 | test_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/dev.txt
15 | 
16 | save_best: False
17 | exp_name:
18 | exp_notes:
19 | output_dir: experiments/${exp_name}
20 | 
21 | step: 800
22 | eval_sub_path: checkpoint-${step}
23 | 
24 | read_tensor:
25 |   template_id: "Context:\n{}\n\nQuestion:\n{}\n\nOptions:\n{}\n\nThought 1: "  # In version v2.1, we change the template to: ```xxx\n\nThought 1: ```
26 |   service_based: False
27 |   service_processor:
28 | 
29 | output_file: ${output_dir}/${eval_sub_path}/logiqav2.dev.react.n${sampling_params.n}.tem${sampling_params.temperature}.v1.0.0shot.json
30 | flush_file: ${output_file}l
31 | 
32 | # Dataloader
33 | num_workers: 48
34 | prefetch_factor: 2
35 | 
36 | ddp_eval: False
37 | no_cuda: False
38 | seed: 42
39 | local_rank: -1
40 | 
41 | # Temporary variables
42 | fp16: True
43 | fp16_bfloat16: True
44 | n_gpu: 1
45 | device:
46 | train_batch_size:
47 | eval_batch_size:
48 | world_size:
49 | 


--------------------------------------------------------------------------------
/conf/api/gpt35turbo/reclor/dev_react_v1_0_1shot.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - hydra: default
 3 |   - reader/reclor@read_tensor: react_service_1shot_v1_0
 4 |   - post_process: openai_react
 5 | #  - api/vllm/vllm_params@sampling_params: sampling_param_sample
 6 |   - _self_
 7 | 
 8 | hydra:
 9 |   searchpath:
10 |     - file://conf/
11 | 
12 | 
13 | train_file: ../research.data/reclor_data/train.json
14 | dev_file: ../research.data/reclor_data/val.json
15 | test_file: ../research.data/reclor_data/test.json
16 | 
17 | #num_shot: 5
18 | 
19 | output_file: api-outputs/gpt35turbo1106/reclor.test.react.1shot.gpt35turbo1106.sample1.tem${model.temperature}.json
20 | flush_file: ${output_file}l
21 | 
22 | model:
23 |   _target_: data.openai_api_caller.GPTTurbo
24 |   model: "gpt-3.5-turbo-1106"
25 |   max_tokens: 2048
26 | #  temperature: 1.0
27 | #  temperature: 0.7
28 |   temperature: 0.0
29 |   api_time_interval: 1
30 | #  top_p: 0.8
31 | #  n: 1
32 | 
33 | # Data loading
34 | read_tensor:
35 |   api_based: True
36 |   flush_file: ${flush_file}
37 | 
38 | # Dataloader
39 | num_workers: 0
40 | prefetch_factor: 2
41 | 
42 | output_dir:
43 | 
44 | 
45 | # Training hyper-parameters
46 | per_gpu_train_batch_size: 1
47 | per_gpu_eval_batch_size: 1
48 | 
49 | ddp_eval: False
50 | no_cuda: False
51 | seed: 42
52 | local_rank: -1
53 | 
54 | # Temporary variables
55 | n_gpu: 1
56 | device:
57 | train_batch_size:
58 | eval_batch_size:
59 | world_size:
60 | 


--------------------------------------------------------------------------------
/conf/api/vllm/llama2-7b/logiqav2_tems/react_test_0shot_tem_v2_1_sc.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - hydra: default
 3 |   - reader/logiqav2@read_tensor: react_service_0shot_v1_0
 4 |   - post_process: openai_react
 5 |   - api/vllm/vllm_params@sampling_params: sampling_param_sample
 6 |   - _self_
 7 | 
 8 | hydra:
 9 |   searchpath:
10 |     - file://conf/
11 | 
12 | train_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/train.txt
13 | dev_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/dev.txt
14 | test_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/test.txt
15 | 
16 | save_best: False
17 | exp_name:
18 | exp_notes:
19 | output_dir: experiments/${exp_name}
20 | 
21 | step: 800
22 | eval_sub_path: checkpoint-${step}
23 | 
24 | read_tensor:
25 |   template_id: "Context:\n{}\n\nQuestion:\n{}\n\nOptions:\n{}\n\nThought 1: "  # In version v2.1, we change the template to: ```xxx\n\nThought 1: ```
26 |   service_based: False
27 |   service_processor:
28 | 
29 | output_file: ${output_dir}/${eval_sub_path}/logiqav2.test.react.n${sampling_params.n}.tem${sampling_params.temperature}.v1.0.0shot.json
30 | flush_file: ${output_file}l
31 | 
32 | # Dataloader
33 | num_workers: 48
34 | prefetch_factor: 2
35 | 
36 | ddp_eval: False
37 | no_cuda: False
38 | seed: 42
39 | local_rank: -1
40 | 
41 | # Temporary variables
42 | fp16: True
43 | fp16_bfloat16: True
44 | n_gpu: 1
45 | device:
46 | train_batch_size:
47 | eval_batch_size:
48 | world_size:
49 | 


--------------------------------------------------------------------------------
/post_processors/dist_mixin.py:
--------------------------------------------------------------------------------
 1 | import torch.distributed as dist
 2 | from typing import List, Any, Dict
 3 | import torch
 4 | import numpy as np
 5 | 
 6 | 
 7 | class DistGatherMixin:
 8 |     def gather(self):
 9 |         pass
10 | 
11 |     @staticmethod
12 |     def gather_object(objects: List[Any]):
13 |         output = [None for _ in range(dist.get_world_size())]
14 |         dist.gather_object(objects,
15 |                            object_gather_list=output if dist.get_rank() == 0 else None,
16 |                            dst=0)
17 | 
18 |         if dist.get_rank() == 0:
19 |             return output
20 |         else:
21 |             return None
22 | 
23 | 
24 | class SFTLossOnlyPostProcessor(DistGatherMixin):
25 |     def __init__(self):
26 |         super().__init__()
27 |         self.losses = []
28 | 
29 |     def __call__(self, meta_data: Dict[str, Any], batch_model_outputs: Dict[str, Any], ddp: bool = False):
30 |         loss = batch_model_outputs["loss"].item()
31 | 
32 |         if ddp:
33 |             gather_res = self.gather_object(loss)
34 |             if dist.get_rank() == 0:
35 |                 loss = sum(gather_res) / len(gather_res)
36 | 
37 |         self.losses.append(loss)
38 | 
39 |     def get_results(self, output_dir: str):
40 |         avg_loss = np.mean(self.losses).item()
41 | 
42 |         metrics = {
43 |             "loss": avg_loss,
44 |         }
45 | 
46 |         return metrics, []
47 | 


--------------------------------------------------------------------------------
/conf/api/vllm/mistral/reclor/dev_react_1shot_v1_0.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - hydra: default
 3 |   - reader/reclor@read_tensor: react_service_1shot_v1_0
 4 |   - post_process: openai_react
 5 |   - api/vllm/vllm_params@sampling_params: sampling_param_greedy
 6 |   - _self_
 7 | 
 8 | hydra:
 9 |   searchpath:
10 |     - file://conf/
11 | 
12 | train_file: ../research.data/reclor_data/train.json
13 | dev_file: ../research.data/reclor_data/val.json
14 | test_file: ../research.data/reclor_data/test.json
15 | 
16 | model:
17 | port: 6000
18 | save_best: False
19 | exp_name:
20 | exp_notes:
21 | output_dir: ../pretrained-models/Mixtral-8x7B-Instruct-v0.1
22 | eval_sub_path:
23 | 
24 | output_file: ${output_dir}/reclor.react.test.1shot.v1.0.json
25 | flush_file: ${output_file}l
26 | 
27 | read_tensor:
28 |   service_based: True
29 |   service_processor:
30 |     _target_: data.vllm.VLLMRequestGenerator
31 |     api_url: http://0.0.0.0:${port}/v1/completions
32 |     max_tokens: 3072
33 |     model: ${model}
34 |     stop: [ "</s>", "\n\n\n\n", "Context:\n" ]
35 | 
36 | # Dataloader
37 | num_workers: 64
38 | prefetch_factor: 2
39 | 
40 | # Training hyper-parameters
41 | per_gpu_train_batch_size: 1
42 | per_gpu_eval_batch_size: 1
43 | 
44 | ddp_eval: False
45 | no_cuda: False
46 | seed: 42
47 | local_rank: -1
48 | 
49 | # Temporary variables
50 | fp16: True
51 | fp16_bfloat16: True
52 | n_gpu: 1
53 | device:
54 | train_batch_size:
55 | eval_batch_size:
56 | world_size:
57 | 


--------------------------------------------------------------------------------
/conf/api/gpt35turbo/ar_lsat/dev_react_1shot_v1_0.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - hydra: default
 3 |   - reader/logiqav2@read_tensor: react_service_1shot_v1_0
 4 |   - post_process: openai_react
 5 |   - _self_
 6 | 
 7 | hydra:
 8 |   searchpath:
 9 |     - file://conf/
10 | 
11 | train_file: ../research.data/AR-LSAT/data/AR_TrainingData.json
12 | dev_file: ../research.data/AR-LSAT/data/AR_DevelopmentData.json
13 | test_file: ../research.data/AR-LSAT/data/AR_TestData.json
14 | 
15 | port: 8000
16 | model:
17 |   _target_: data.openai_api_caller.GPTTurbo
18 |   model: "gpt-3.5-turbo"
19 |   max_tokens: 2048
20 |   temperature: 0.0
21 |   api_time_interval: 0
22 | 
23 | output_file: api-outputs/gpt35turbo/ar-lsat.react.dev.1shot.v1.1.json
24 | flush_file: ${output_file}l
25 | 
26 | # Data loading
27 | read_tensor:
28 |   read_func:
29 |     _target_: data.ar_lsat.ARLSATReader
30 |     flat_options: True
31 |   few_shot_prompt:
32 |     _target_: data.logiqav2.read_single_file
33 |     file_path: data/prompts/ar_lsat/react/train_200006_1-G_1_1.txt
34 |   api_based: True
35 |   service_based: False
36 | 
37 | # Dataloader
38 | num_workers: 0
39 | prefetch_factor: 2
40 | 
41 | output_dir:
42 | 
43 | 
44 | # Training hyper-parameters
45 | per_gpu_train_batch_size: 1
46 | per_gpu_eval_batch_size: 1
47 | 
48 | ddp_eval: False
49 | no_cuda: False
50 | seed: 42
51 | local_rank: -1
52 | 
53 | # Temporary variables
54 | n_gpu: 1
55 | device:
56 | train_batch_size:
57 | eval_batch_size:
58 | world_size:
59 | 


--------------------------------------------------------------------------------
/scripts/construct_dpo_data_from_response.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import json
 3 | import re
 4 | from collections import Counter
 5 | 
 6 | 
 7 | parser = argparse.ArgumentParser()
 8 | parser.add_argument("--input_file", type=str)
 9 | parser.add_argument("--output_file", type=str)
10 | args = parser.parse_args()
11 | 
12 | data = json.load(open(args.input_file))
13 | 
14 | 
15 | outputs = []
16 | 
17 | for item in data:
18 |     chosen = []
19 |     reject = []
20 |     for response in item["response"]:
21 |         if "[Context]" in response:
22 |             response = response.split("[Context]")[0]
23 | 
24 |         preds = re.findall(r"A|B|C|D", response)
25 |         if len(preds) == 0:
26 |             pred = ""
27 |         else:
28 |             pred = preds[-1]
29 | 
30 |         if pred and ord(pred) - ord("A") == item["label"]:
31 |             chosen.append(response)
32 |         else:
33 |             reject.append(response)
34 | 
35 |     if len(chosen) > 0 and len(reject) > 0:
36 |         outputs.append({
37 |             "input": item["text"],
38 |             "chosen": chosen,
39 |             "reject": reject,
40 |             "id": item["id"],
41 |         })
42 | 
43 | print(len(outputs))
44 | 
45 | 
46 | a_cnt = Counter()
47 | b_cnt = Counter()
48 | for x in outputs:
49 |     a_cnt[len(x["chosen"])] += 1
50 |     b_cnt[len(x["reject"])] += 1
51 | print(a_cnt)
52 | print(b_cnt)
53 | 
54 | json.dump(outputs, open(args.output_file, "w"), indent=2, ensure_ascii=False)
55 | 


--------------------------------------------------------------------------------
/conf/api/vllm/llama2-70b/ar_lsat/dev_react_v1_1.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - hydra: default
 3 |   - reader/logiqav2@read_tensor: react_service_1shot_v1_0
 4 |   - post_process: openai_react
 5 |   - _self_
 6 | 
 7 | hydra:
 8 |   searchpath:
 9 |     - file://conf/
10 | 
11 | train_file: ../research.data/AR-LSAT/data/AR_TrainingData.json
12 | dev_file: ../research.data/AR-LSAT/data/AR_DevelopmentData.json
13 | test_file: ../research.data/AR-LSAT/data/AR_TestData.json
14 | 
15 | port: 8000
16 | model: llama-2-70b-chat
17 | 
18 | output_file: ../pretrained-models/Llama-2-70b-chat-hf/ar-lsat.react.dev.1shot.v1.1.json
19 | flush_file: ${output_file}l
20 | 
21 | # Data loading
22 | read_tensor:
23 |   read_func:
24 |     _target_: data.ar_lsat.ARLSATReader
25 |     flat_options: True
26 |     option_order: "ABCDE"
27 |   few_shot_prompt:
28 |     _target_: data.logiqav2.read_single_file
29 |     file_path: data/prompts/ar_lsat/react/train_200006_1-G_1_1.txt
30 | 
31 | post_process:
32 |   answer_clean:
33 |     _target_: post_processors.openai_api_callback.ReActSeparatorClean
34 |     regrex: "A|B|C|D|E"
35 | 
36 | # Dataloader
37 | num_workers: 32
38 | prefetch_factor: 2
39 | 
40 | output_dir:
41 | 
42 | 
43 | # Training hyper-parameters
44 | per_gpu_train_batch_size: 1
45 | per_gpu_eval_batch_size: 1
46 | 
47 | ddp_eval: False
48 | no_cuda: False
49 | seed: 42
50 | local_rank: -1
51 | 
52 | # Temporary variables
53 | n_gpu: 1
54 | device:
55 | train_batch_size:
56 | eval_batch_size:
57 | world_size:
58 | 


--------------------------------------------------------------------------------
/conf/api/vllm/llama2-70b/reclor/dev_react_1shot_v1_0.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - hydra: default
 3 |   - reader/reclor@read_tensor: react_service_1shot_v1_0
 4 |   - post_process: openai_react
 5 |   - api/vllm/vllm_params@sampling_params: sampling_param_greedy
 6 |   - _self_
 7 | 
 8 | hydra:
 9 |   searchpath:
10 |     - file://conf/
11 | 
12 | train_file: ../research.data/reclor_data/train.json
13 | dev_file: ../research.data/reclor_data/val.json
14 | #test_file: ${dev_file}
15 | test_file: ../research.data/reclor_data/test.json
16 | 
17 | model: llama-2-70b-chat
18 | port: 6000
19 | save_best: False
20 | exp_name:
21 | exp_notes:
22 | output_dir: ../pretrained-models/Llama-2-70b-chat-hf
23 | eval_sub_path:
24 | 
25 | output_file: ${output_dir}/reclor.react.test.1shot.v1.0.json
26 | flush_file: ${output_file}l
27 | 
28 | read_tensor:
29 |   service_based: True
30 |   service_processor:
31 |     _target_: data.vllm.VLLMRequestGenerator
32 |     api_url: http://0.0.0.0:${port}/v1/completions
33 |     max_tokens: 2048
34 |     model: ${model}
35 |     stop: [ "</s>", "\n\n\n\n", "Context:\n" ]
36 | 
37 | # Dataloader
38 | num_workers: 64
39 | prefetch_factor: 2
40 | 
41 | # Training hyper-parameters
42 | per_gpu_train_batch_size: 1
43 | per_gpu_eval_batch_size: 1
44 | 
45 | ddp_eval: False
46 | no_cuda: False
47 | seed: 42
48 | local_rank: -1
49 | 
50 | # Temporary variables
51 | fp16: True
52 | fp16_bfloat16: True
53 | n_gpu: 1
54 | device:
55 | train_batch_size:
56 | eval_batch_size:
57 | world_size:
58 | 


--------------------------------------------------------------------------------
/conf/api/vllm/mistral/logiqav2/tems/react_test_1shot_tem_v2_0.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - hydra: default
 3 |   - reader/logiqav2@read_tensor: react_service_1shot_v1_0
 4 |   - post_process: openai_react
 5 |   - api/vllm/vllm_params@sampling_params: sampling_param_greedy
 6 |   - _self_
 7 | 
 8 | hydra:
 9 |   searchpath:
10 |     - file://conf/
11 | 
12 | train_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/train.txt
13 | dev_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/dev.txt
14 | test_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/test.txt
15 | 
16 | save_best: False
17 | exp_name:
18 | exp_notes:
19 | output_dir: experiments/${exp_name}
20 | 
21 | step: 800
22 | eval_sub_path: checkpoint-${step}
23 | 
24 | read_tensor:
25 |   template_id: "Context:\n{}\n\nQuestion:\n{}\n\nOptions:\n{}\n\nThought 1: "  # In version v2.1, we change the template to: ```xxx\n\nThought 1: ```
26 |   service_based: False
27 |   service_processor:
28 | 
29 | sampling_params:
30 |   stop: [ "\n\n\n\n", "Context:\n", "<｜end▁of▁sentence｜>" ]
31 |   stop_token_ids: [100001]
32 | 
33 | output_file: ${output_dir}/${eval_sub_path}/logiqav2-dev.full.qa.react.v1.0.0shot.json
34 | flush_file: ${output_file}l
35 | 
36 | # Dataloader
37 | num_workers: 48
38 | prefetch_factor: 2
39 | 
40 | ddp_eval: False
41 | no_cuda: False
42 | seed: 42
43 | local_rank: -1
44 | 
45 | # Temporary variables
46 | fp16: True
47 | fp16_bfloat16: True
48 | n_gpu: 1
49 | device:
50 | train_batch_size:
51 | eval_batch_size:
52 | world_size:
53 | 


--------------------------------------------------------------------------------
/conf/api/vllm/llama2-7b/reclor_tems/test_react_0shot_v1_0_vllm.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - hydra: default
 3 |   - reader/reclor@read_tensor: react_service_0shot_v1_0
 4 |   - post_process: openai_react
 5 |   - api/vllm/vllm_params@sampling_params: sampling_param_greedy
 6 |   - _self_
 7 | 
 8 | hydra:
 9 |   searchpath:
10 |     - file://conf/
11 | 
12 | train_file: ../research.data/reclor_data/train.json
13 | dev_file: ../research.data/reclor_data/val.json
14 | test_file: ../research.data/reclor_data/test.json
15 | 
16 | save_best: False
17 | exp_name:
18 | exp_notes:
19 | output_dir: experiments/${exp_name}
20 | 
21 | step:
22 | eval_sub_path: checkpoint-${step}
23 | 
24 | sampling_params:
25 |   max_tokens: 2048
26 | gpu_memory_utilization: 0.95
27 | 
28 | read_tensor:
29 |   template_id: 6
30 | 
31 | output_file: ${output_dir}/${eval_sub_path}/reclor.react.test.0shot.v1.0.json
32 | flush_file: ${output_file}l
33 | 
34 | # Dataloader
35 | num_workers: 32
36 | prefetch_factor:
37 | 
38 | # Training hyper-parameters
39 | per_gpu_train_batch_size: 1
40 | per_gpu_eval_batch_size: 1
41 | 
42 | ddp_eval: False
43 | no_cuda: False
44 | seed: 42
45 | local_rank: -1
46 | 
47 | # Temporary variables
48 | fp16: True
49 | fp16_bfloat16: True
50 | n_gpu: 1
51 | device:
52 | train_batch_size:
53 | eval_batch_size:
54 | world_size:
55 | 
56 | # CUDA_VISIBLE_DEVICES=0 run15 python vllm_inference.py  -cp conf/api/vllm/mistral/reclor/ -cn train_react_1shot_sample5_split_v1_0 read_tensor.split_size=4 read_tensor.split_id=0
57 | 


--------------------------------------------------------------------------------
/scripts/split_train_dev.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import argparse
 3 | import random
 4 | import os
 5 | 
 6 | parser = argparse.ArgumentParser()
 7 | parser.add_argument("--input_file", type=str)
 8 | parser.add_argument("--input_file2", type=str, default=None)
 9 | args = parser.parse_args()
10 | 
11 | data = json.load(open(args.input_file))
12 | print("data size: {}".format(len(data)))
13 | if args.input_file2 is not None:
14 |     data2 = json.load(open(args.input_file2))
15 |     print("data2 size: {}".format(len(data2)))
16 | data_ids = list(range(len(data)))
17 | # read `dev_num` from command line
18 | dev_num = int(input("dev_num: "))
19 | dev_ids = random.sample(data_ids, dev_num)
20 | dev_ids = set(dev_ids)
21 | 
22 | dev_data = []
23 | train_data = []
24 | for i, item in enumerate(data):
25 |     if i in dev_ids:
26 |         dev_data.append(item)
27 |     else:
28 |         train_data.append(item)
29 | 
30 | print("dev size: {}".format(len(dev_data)))
31 | print("train size: {}".format(len(train_data)))
32 | 
33 | if args.input_file2 is not None:
34 |     output_file_name = str(input("output file name: "))
35 |     output_file = os.path.join(os.path.dirname(args.input_file), output_file_name)
36 | else:
37 |     output_file = args.input_file
38 | json.dump(dev_data, open(output_file.replace(".json", f".sub_dev.{len(dev_data)}.json"), "w"), indent=2, ensure_ascii=False)
39 | json.dump(train_data, open(output_file.replace(".json", f".sub_train.{len(train_data)}.json"), "w"), indent=2, ensure_ascii=False)
40 | 


--------------------------------------------------------------------------------
/scripts/explore_from_inter/reclor/best_of_filter_full.sh:
--------------------------------------------------------------------------------
 1 | data_dir=experiments/llama2.7b.chat.mixtral.dpo-sft.A100.40.w8.v1.0/checkpoint-1200/react-inter-states
 2 | 
 3 | 
 4 | best_of=10
 5 | pos_margin=0.7
 6 | max_neg_num=10
 7 | index="(1,2,3,4,5)"
 8 | reward_file="experiments/llama2.7b.chat.reclor.mixtral-distil.prm.A100.40.w8.v1.2.s42/train.rewards.raw_trajectory.product.v1.0/test-checkpoint-2400/eval_predictions_rank0.json"
 9 | python scripts/best_of_filter_by_reward_v2.2.py \
10 |   --input_file "$data_dir/reclor.train.react.v1.0.0shot.sample10.clean_inter_ver2.0.rs0.2.r0.3.json" \
11 |   --reward_file $reward_file \
12 |   --output_file "$data_dir/reclor.train.react.v1.0.0shot.sample10.clean_inter_ver2.0.rs0.2.r0.3.prm_v12_cp2400_best_of_${best_of}.neg${max_neg_num}.pos${pos_margin}.v2.2.${index}.pair.product.full_only.json" \
13 |   --best_of $best_of --max_neg_num $max_neg_num --pos_margin $pos_margin --prob_labels ${index} --reduction "product"
14 | 
15 | 
16 | # =============================== Debug
17 | #index="(1,2,3,4,5)"
18 | #reward_file="experiments/llama2.7b.chat.reclor.mixtral-distil.prm.A100.40.w8.v1.2.s42/train.rewards.raw_trajectory.product.v1.0/test-checkpoint-2400/eval_predictions_rank0.json"
19 | #python scripts/combine_reward_debug_v1.0.py \
20 | #  --input_file "$data_dir/reclor.train.react.v1.0.0shot.sample10.clean_inter_ver2.0.rs0.2.r0.3.json" \
21 | #  --reward_file $reward_file \
22 | #  --output_file "./reward_reclor_debug_cp2400_${index}.json" --reduction product --prob_labels ${index}
23 | 


--------------------------------------------------------------------------------
/scripts/process_turbo.sh:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | #python scripts/process_react_nodes.py --input_file data/trajectory/react/logiqav2-train-v1.0.react.1shot.turbo.sample5.json \
 4 | #  --output_file data/trajectory/react/logiqav2-train-v1.0.react.1shot.turbo.sample5.clean_nodes.json
 5 | 
 6 | #python scripts/sent_tf_react_step_encoding.py --input_file data/trajectory/react/logiqav2-train-v1.0.react.1shot.turbo.sample5.clean_nodes.json \
 7 | #  --model_path ../pretrained-models/bge-large-en-v1.5 \
 8 | #  --output_file data/trajectory/react/logiqav2-train-v1.0.react.1shot.turbo.sample5.clean_nodes.emb.npy
 9 | 
10 | python scripts/react_step_union_find.py --input_file data/trajectory/react/logiqav2-train-v1.0.react.1shot.turbo.sample5.clean_nodes.json \
11 |   --embedding_path data/trajectory/react/logiqav2-train-v1.0.react.1shot.turbo.sample5.clean_nodes.emb.npy --threshold 0.95 \
12 |   --output_file data/trajectory/react/logiqav2-train-v1.0.react.1shot.turbo.sample5.clean_nodes.cluster.t0.95.TO.json
13 | 
14 | python scripts/construct_dpo_data_via_step_value_v1.py \
15 |   --input_file data/trajectory/react/logiqav2-train-v1.0.react.1shot.turbo.sample5.clean_nodes.cluster.t0.95.TO.json \
16 |   --output_file data/trajectory/react/logiqav2-train-v1.0.react.1shot.turbo.sample5.clean_nodes.cluster.t0.95.TO.len2.in4.v0.1.json \
17 |   --save_full_data
18 | 
19 | python scripts/split_train_dev.py --input_file data/trajectory/react/logiqav2-train-v1.0.react.1shot.turbo.sample5.clean_nodes.cluster.t0.95.TO.len2.in4.v0.1.json \
20 |   --dev_num 5000


--------------------------------------------------------------------------------
/conf/api/vllm/llama2-7b/reclor_tems/dev_react_0shot_v1_0_vllm.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - hydra: default
 3 |   - reader/reclor@read_tensor: react_service_0shot_v1_0
 4 |   - post_process: openai_react
 5 |   - api/vllm/vllm_params@sampling_params: sampling_param_greedy
 6 |   - _self_
 7 | 
 8 | hydra:
 9 |   searchpath:
10 |     - file://conf/
11 | 
12 | train_file: ../research.data/reclor_data/train.json
13 | dev_file: ../research.data/reclor_data/val.json
14 | test_file: ${dev_file}
15 | 
16 | save_best: False
17 | exp_name:
18 | exp_notes:
19 | output_dir: experiments/${exp_name}
20 | 
21 | step:
22 | eval_sub_path: checkpoint-${step}
23 | 
24 | sampling_params:
25 |   max_tokens: 2048
26 | gpu_memory_utilization: 0.95
27 | 
28 | read_tensor:
29 |   template_id: 6
30 |   service_based: False
31 |   service_processor:
32 | 
33 | output_file: ${output_dir}/${eval_sub_path}/reclor.react.dev.0shot.v1.0.json
34 | flush_file: ${output_file}l
35 | 
36 | # Dataloader
37 | num_workers: 32
38 | prefetch_factor:
39 | 
40 | # Training hyper-parameters
41 | per_gpu_train_batch_size: 1
42 | per_gpu_eval_batch_size: 1
43 | 
44 | ddp_eval: False
45 | no_cuda: False
46 | seed: 42
47 | local_rank: -1
48 | 
49 | # Temporary variables
50 | fp16: True
51 | fp16_bfloat16: True
52 | n_gpu: 1
53 | device:
54 | train_batch_size:
55 | eval_batch_size:
56 | world_size:
57 | 
58 | # CUDA_VISIBLE_DEVICES=0 run15 python vllm_inference.py  -cp conf/api/vllm/mistral/reclor/ -cn train_react_1shot_sample5_split_v1_0 read_tensor.split_size=4 read_tensor.split_id=0
59 | 


--------------------------------------------------------------------------------
/conf/api/gpt35turbo/logiqav2/dev_react_v1_0_1shot.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - hydra: default
 3 |   - reader/logiqav2@read_tensor: react_service_1shot_v1_0
 4 |   - post_process: openai_react
 5 | #  - api/vllm/vllm_params@sampling_params: sampling_param_sample
 6 |   - _self_
 7 | 
 8 | hydra:
 9 |   searchpath:
10 |     - file://conf/
11 | 
12 | 
13 | train_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/train.txt
14 | dev_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/dev.txt
15 | test_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/test.txt
16 | 
17 | #num_shot: 5
18 | 
19 | output_file: api-outputs/gpt35turbo1106/logiqav2.test.react.1shot.gpt35turbo1106.sample1.tem${model.temperature}.json
20 | flush_file: ${output_file}l
21 | 
22 | model:
23 |   _target_: data.openai_api_caller.GPTTurbo
24 |   model: "gpt-3.5-turbo-1106"
25 |   max_tokens: 2048
26 | #  temperature: 1.0
27 | #  temperature: 0.7
28 |   temperature: 0.0
29 |   api_time_interval: 1
30 | #  top_p: 0.8
31 | #  n: 1
32 | 
33 | # Data loading
34 | read_tensor:
35 |   max_data_num: 500
36 |   service_based: False
37 |   service_processor:
38 |   api_based: True
39 |   flush_file: ${flush_file}
40 | 
41 | # Dataloader
42 | num_workers: 0
43 | prefetch_factor: 2
44 | 
45 | output_dir:
46 | 
47 | 
48 | # Training hyper-parameters
49 | per_gpu_train_batch_size: 1
50 | per_gpu_eval_batch_size: 1
51 | 
52 | ddp_eval: False
53 | no_cuda: False
54 | seed: 42
55 | local_rank: -1
56 | 
57 | # Temporary variables
58 | n_gpu: 1
59 | device:
60 | train_batch_size:
61 | eval_batch_size:
62 | world_size:
63 | 


--------------------------------------------------------------------------------
/conf/api/vllm/math/gsm8k_gemma_test_0shot_tem_v1_0.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - hydra: default
 3 |   - post_process: gsm8k
 4 |   - api/vllm/vllm_params@sampling_params: sampling_param_greedy
 5 |   - _self_
 6 | 
 7 | hydra:
 8 |   searchpath:
 9 |     - file://conf/
10 | 
11 | train_file:
12 | dev_file:
13 | test_file: ../research.data/MetaMathQA/test/GSM8K_test.json
14 | 
15 | save_best: False
16 | exp_name:
17 | exp_notes:
18 | output_dir: experiments/${exp_name}
19 | 
20 | step: 800
21 | eval_sub_path: checkpoint-${step}
22 | 
23 | read_tensor:
24 |   _target_: data.logic_combine.ResponseAlignDataset
25 |   aligner:
26 |     _target_: data.math.gsm8k_gold_answer_extractor
27 |   template: "### Instruction:\n{query}\n\n### Response: Let's think step by step."
28 |   instruction: "Below is an instruction that describes a task. Write a response that appropriately completes the request."
29 |   max_data_num: -1
30 |   service_based: False
31 |   api_based: False
32 |   index_field: "index"
33 | 
34 | sampling_params:
35 |   stop: [ "<eos>", "\n\n\n\n", "### Instruction" ]
36 | 
37 | output_file: ${output_dir}/${eval_sub_path}/gsm8k.test.v1.0.0shot.json
38 | flush_file: ${output_file}l
39 | 
40 | # Dataloader
41 | num_workers: 48
42 | prefetch_factor: 2
43 | 
44 | post_process:
45 |   resume: False
46 |   index_field: "index"
47 |   label_field: "label"
48 | 
49 | ddp_eval: False
50 | no_cuda: False
51 | seed: 42
52 | local_rank: -1
53 | 
54 | # Temporary variables
55 | fp16: True
56 | fp16_bfloat16: True
57 | n_gpu: 1
58 | device:
59 | train_batch_size:
60 | eval_batch_size:
61 | world_size:
62 | 


--------------------------------------------------------------------------------
/conf/deepspeed/train_hybrid_engine_zero1_lr.yaml:
--------------------------------------------------------------------------------
 1 | train_micro_batch_size_per_gpu:
 2 | gradient_accumulation_steps:
 3 | scheduler:
 4 |   type: WarmupLR
 5 |   params:
 6 |     warmup_max_lr: ${learning_rate}
 7 |     warmup_num_steps:
 8 |     warmup_type: linear
 9 | optimizer:
10 |   type: AdamW
11 |   params:
12 |     lr: 1e-4
13 |     betas: [ 0.9, 0.999 ]
14 |     eps: 1e-6
15 |     weight_decay: 0.0
16 | bf16:
17 |   enabled: True
18 | zero_optimization:
19 |   stage: 1
20 | #  offload_optimizer:
21 | #    device: cpu
22 | #    pin_memory: True
23 | #  offload_param:
24 | #    device: cpu
25 | #    pin_memory: True
26 |   #  activation_checkpointing:
27 |   #    partition_activations: True
28 |   #    cpu_checkpointing: True
29 |   #    contiguous_memory_optimization: False
30 |   #    number_checkpoints: False
31 |   #    synchronize_checkpoint_boundary: False
32 |   #    profile: False
33 |   #  zero_quantized_nontrainable_weights: False  # If `enable_mixed_precision_lora` is True, this should be True
34 |   stage3_param_persistence_threshold: 1e5  # (1e4,1e6)
35 |   stage3_max_live_parameters: 1e8  # (3e7, 1e9)
36 |   stage3_prefetch_bucket_size: 1e8  # (3e7, 5e8)
37 |   memory_efficient_linear: False
38 | steps_per_print: 25
39 | gradient_clipping: 1.0
40 | prescale_gradients: False
41 | #wall_clock_breakdown: False
42 | #hybrid_engine:
43 | #  enabled: True
44 | #  max_out_tokens: max_out_tokens
45 | #  inference_tp_size: inference_tp_size
46 | #  release_inference_cache: release_inference_cache
47 | #  pin_parameters: pin_parameters
48 | #  tp_gather_partition_size: tp_gather_partition_size
49 | 


--------------------------------------------------------------------------------
/conf/api/vllm/llama2-70b/ar_lsat/dev_react_1shot_v2_0.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - hydra: default
 3 |   - reader/logiqav2@read_tensor: react_service_1shot_v1_0
 4 |   - post_process: openai_react
 5 |   - _self_
 6 | 
 7 | hydra:
 8 |   searchpath:
 9 |     - file://conf/
10 | 
11 | train_file: ../research.data/AR-LSAT/data/AR_TrainingData.json
12 | dev_file: ../research.data/AR-LSAT/data/AR_DevelopmentData.json
13 | #test_file: ../research.data/AR-LSAT/data/AR_TestData.json
14 | test_file: ../research.data/AR-LSAT/data/AR_DevelopmentData.json
15 | 
16 | port: 6000
17 | model: mixtral-ins
18 | 
19 | output_file: ../pretrained-models/Mixtral-8x7B-Instruct-v0.1/ar-lsat.react.dev.1shot.v2.0.json
20 | flush_file: ${output_file}l
21 | 
22 | # Data loading
23 | read_tensor:
24 |   read_func:
25 |     _target_: data.ar_lsat.ARLSATReader
26 |     flat_options: True
27 |   few_shot_prompt:
28 |     _target_: data.logiqav2.read_single_file
29 |     file_path: data/prompts/ar_lsat/react/train_200006_1-G_1_1.txt
30 |   service_processor:
31 |     _target_: data.vllm.VLLMRequestGenerator
32 |     api_url: http://0.0.0.0:6000/v1/completions
33 |     max_tokens: 8192
34 |     stop: [ "</s>", "\n\n\n\n", "Context:\n", "Thought 42:" ]
35 | 
36 | # Dataloader
37 | num_workers: 32
38 | prefetch_factor: 2
39 | 
40 | output_dir:
41 | 
42 | 
43 | # Training hyper-parameters
44 | per_gpu_train_batch_size: 1
45 | per_gpu_eval_batch_size: 1
46 | 
47 | ddp_eval: False
48 | no_cuda: False
49 | seed: 42
50 | local_rank: -1
51 | 
52 | # Temporary variables
53 | n_gpu: 1
54 | device:
55 | train_batch_size:
56 | eval_batch_size:
57 | world_size:
58 | 


--------------------------------------------------------------------------------
/conf/deepspeed/train_hybrid_engine_zero1_optim_offload_lr.yaml:
--------------------------------------------------------------------------------
 1 | train_micro_batch_size_per_gpu:
 2 | gradient_accumulation_steps:
 3 | scheduler:
 4 |   type: WarmupLR
 5 |   params:
 6 |     warmup_max_lr: ${learning_rate}
 7 |     warmup_num_steps:
 8 |     warmup_type: linear
 9 | optimizer:
10 |   type: AdamW
11 |   params:
12 |     lr: 1e-4
13 |     betas: [ 0.9, 0.999 ]
14 |     eps: 1e-6
15 |     weight_decay: 0.0
16 | bf16:
17 |   enabled: True
18 | zero_optimization:
19 |   stage: 1
20 |   offload_optimizer:
21 |     device: cpu
22 |     pin_memory: True
23 | #  offload_param:
24 | #    device: cpu
25 | #    pin_memory: True
26 |   #  activation_checkpointing:
27 |   #    partition_activations: True
28 |   #    cpu_checkpointing: True
29 |   #    contiguous_memory_optimization: False
30 |   #    number_checkpoints: False
31 |   #    synchronize_checkpoint_boundary: False
32 |   #    profile: False
33 |   #  zero_quantized_nontrainable_weights: False  # If `enable_mixed_precision_lora` is True, this should be True
34 |   stage3_param_persistence_threshold: 1e5  # (1e4,1e6)
35 |   stage3_max_live_parameters: 1e8  # (3e7, 1e9)
36 |   stage3_prefetch_bucket_size: 1e8  # (3e7, 5e8)
37 |   memory_efficient_linear: False
38 | steps_per_print: 25
39 | gradient_clipping: 1.0
40 | prescale_gradients: False
41 | #wall_clock_breakdown: False
42 | #hybrid_engine:
43 | #  enabled: True
44 | #  max_out_tokens: max_out_tokens
45 | #  inference_tp_size: inference_tp_size
46 | #  release_inference_cache: release_inference_cache
47 | #  pin_parameters: pin_parameters
48 | #  tp_gather_partition_size: tp_gather_partition_size
49 | 


--------------------------------------------------------------------------------
/conf/api/vllm/llama2-7b/logiqav2_tems/react_train_0shot_sample_tem_v2_0.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - hydra: default
 3 |   - reader/logiqav2@read_tensor: react_service_0shot_v1_0
 4 |   - post_process: openai_react
 5 |   - api/vllm/vllm_params@sampling_params: sampling_param_sample
 6 |   - _self_
 7 | 
 8 | hydra:
 9 |   searchpath:
10 |     - file://conf/
11 | 
12 | train_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/train.txt
13 | dev_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/dev.txt
14 | test_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/train.txt
15 | 
16 | save_best: False
17 | exp_name: llama2.7b.chat.logiqav2.70b-distil.step.dpo.fix_hack.H100.w4.v1.0.th.s43
18 | exp_notes:
19 | output_dir: experiments/${exp_name}
20 | 
21 | step: 800
22 | eval_sub_path: checkpoint-${step}
23 | 
24 | sampling_params:
25 |   max_tokens: 3072
26 |   temperature: 2.0
27 | gpu_memory_utilization: 0.95
28 | 
29 | read_tensor:
30 |   split_size: -1
31 |   split_id: 0
32 |   template_id: "Context:\n{}\n\nQuestion:\n{}\n\nOptions:\n{}\n\nThought 1: "
33 |   service_based: False
34 |   service_processor:
35 | 
36 | #swap_space: 8
37 | 
38 | output_file: ${output_dir}/${eval_sub_path}/logiqav2-train.react.sample${sampling_params.n}.tem${sampling_params.temperature}.v1.0.0shot.json
39 | flush_file: ${output_file}l
40 | 
41 | # Dataloader
42 | num_workers: 48
43 | prefetch_factor: 2
44 | 
45 | ddp_eval: True
46 | no_cuda: False
47 | seed: 42
48 | local_rank: -1
49 | 
50 | # Temporary variables
51 | fp16: True
52 | fp16_bfloat16: True
53 | n_gpu: 1
54 | device:
55 | train_batch_size:
56 | eval_batch_size:
57 | world_size:
58 | 


--------------------------------------------------------------------------------
/conf/api/vllm/math/math_gemma_test_0shot_tem_v1_0.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - hydra: default
 3 |   - post_process: math
 4 |   - api/vllm/vllm_params@sampling_params: sampling_param_greedy
 5 |   - _self_
 6 | 
 7 | hydra:
 8 |   searchpath:
 9 |     - file://conf/
10 | 
11 | train_file:
12 | dev_file:
13 | test_file: ../research.data/MetaMathQA/test/MATH_test.json
14 | 
15 | save_best: False
16 | exp_name:
17 | exp_notes:
18 | output_dir: experiments/${exp_name}
19 | 
20 | step: 800
21 | eval_sub_path: checkpoint-${step}
22 | 
23 | read_tensor:
24 |   _target_: data.logic_combine.ResponseAlignDataset
25 |   aligner:
26 |     _target_: data.math.math_gold_answer_extractor
27 |     kv_mapping:
28 |       instruction: query
29 |   template: "### Instruction:\n{query}\n\n### Response: Let's think step by step."
30 |   instruction: "Below is an instruction that describes a task. Write a response that appropriately completes the request."
31 |   max_data_num: -1
32 |   service_based: False
33 |   api_based: False
34 |   index_field: "idx"
35 | 
36 | sampling_params:
37 |   stop: [ "<eos>", "\n\n\n\n", "### Instruction" ]
38 | 
39 | output_file: ${output_dir}/${eval_sub_path}/math.test.v1.0.0shot.json
40 | flush_file: ${output_file}l
41 | 
42 | # Dataloader
43 | num_workers: 48
44 | prefetch_factor: 2
45 | 
46 | post_process:
47 |   resume: False
48 |   index_field: "idx"
49 |   label_field: "label"
50 | 
51 | ddp_eval: False
52 | no_cuda: False
53 | seed: 42
54 | local_rank: -1
55 | 
56 | # Temporary variables
57 | fp16: True
58 | fp16_bfloat16: True
59 | n_gpu: 1
60 | device:
61 | train_batch_size:
62 | eval_batch_size:
63 | world_size:
64 | 


--------------------------------------------------------------------------------
/conf/deepspeed/train_hybrid_engine_zero2_lr.yaml:
--------------------------------------------------------------------------------
 1 | train_micro_batch_size_per_gpu:
 2 | gradient_accumulation_steps:
 3 | scheduler:
 4 |   type: WarmupLR
 5 |   params:
 6 | #    total_num_steps:
 7 |     warmup_max_lr: ${learning_rate}
 8 |     warmup_num_steps:
 9 |     warmup_type: linear
10 | optimizer:
11 |   type: AdamW
12 |   params:
13 |     lr: 1e-4
14 |     betas: [ 0.9, 0.999 ]
15 |     eps: 1e-6
16 |     weight_decay: 0.0
17 | bf16:
18 |   enabled: True
19 | zero_optimization:
20 |   stage: 2
21 | #  offload_optimizer:
22 | #    device: cpu
23 | #    pin_memory: True
24 | #  offload_param:
25 | #    device: cpu
26 | #    pin_memory: True
27 |   #  activation_checkpointing:
28 |   #    partition_activations: True
29 |   #    cpu_checkpointing: True
30 |   #    contiguous_memory_optimization: False
31 |   #    number_checkpoints: False
32 |   #    synchronize_checkpoint_boundary: False
33 |   #    profile: False
34 |   #  zero_quantized_nontrainable_weights: False  # If `enable_mixed_precision_lora` is True, this should be True
35 |   stage3_param_persistence_threshold: 1e5  # (1e4,1e6)
36 |   stage3_max_live_parameters: 1e8  # (3e7, 1e9)
37 |   stage3_prefetch_bucket_size: 1e8  # (3e7, 5e8)
38 |   memory_efficient_linear: False
39 | steps_per_print: 25
40 | gradient_clipping: 1.0
41 | prescale_gradients: False
42 | #wall_clock_breakdown: False
43 | #hybrid_engine:
44 | #  enabled: True
45 | #  max_out_tokens: max_out_tokens
46 | #  inference_tp_size: inference_tp_size
47 | #  release_inference_cache: release_inference_cache
48 | #  pin_parameters: pin_parameters
49 | #  tp_gather_partition_size: tp_gather_partition_size
50 | 


--------------------------------------------------------------------------------
/data/prompts/logiqav2/logic_form/human/dev_218_0_sim.md:
--------------------------------------------------------------------------------
 1 | [Context]:
 2 | Jupiter is a gas giant planet and the largest planet in the solar system. Its mass is 2.5 times the total mass of the other seven planets in the solar system. Observations have found that most of the more than 70 moons surrounding Jupiter are composed of water ice. Therefore, Jupiter's atmosphere should contain a considerable amount of water.
 3 | 
 4 | [Question]:
 5 | Which of the followings, if true, can best support the above statement?
 6 | 
 7 | [Options]:
 8 | A. After hundreds of millions of years, the satellite may slowly fall onto the planet.
 9 | B. Many of the water in interstellar space exists in gaseous form.
10 | C. Uranus is also a gas giant planet, and it has been confirmed that it contains a lot of water ice.
11 | D. The satellite and the planets around it were formed from the same gas and dust at the same time.
12 | 
13 | Here are the logic forms for context, question and options:
14 | 
15 | [Context]
16 | 1. isGasGiant(Jupiter) AND isLargestInSolarSystem(Jupiter)
17 | 2. mass(Jupiter) = 2.5 * sumOfMass(otherSevenPlanetsInSolarSystem)
18 | 3. composedOfWaterIce(surroundingMoons(Jupiter)) > 70
19 | 4. containsConsiderableWater(atmosphere(Jupiter))
20 | 
21 | [Question]
22 | Which of the followings, if true, can best support the statement Context-4?
23 | 
24 | [Options]
25 | A. fallOntoPlanet(satellite, planet) AND afterHundredsOfMillionsOfYears()
26 | B. existsInGaseousForm(water, interstellarSpace)
27 | C. isGasGiant(Uranus) AND containsLotsOfWaterIce(Uranus)
28 | D. formedFromSameGasAndDust(satellite, planet) AND atSameTime(satellite, planet)


--------------------------------------------------------------------------------
/conf/deepspeed/train_hybrid_engine_zero1.yaml:
--------------------------------------------------------------------------------
 1 | train_micro_batch_size_per_gpu:
 2 | gradient_accumulation_steps:
 3 | scheduler:
 4 |   type: WarmupDecayLR
 5 |   params:
 6 |     total_num_steps:
 7 |     warmup_max_lr: ${learning_rate}
 8 |     warmup_num_steps:
 9 |     warmup_type: linear
10 | optimizer:
11 |   type: AdamW
12 |   params:
13 |     lr: 1e-4
14 |     betas: [ 0.9, 0.999 ]
15 |     eps: 1e-6
16 |     weight_decay: 0.0
17 | bf16:
18 |   enabled: True
19 | zero_optimization:
20 |   stage: 1
21 | #  offload_optimizer:
22 | #    device: cpu
23 | #    pin_memory: True
24 | #  offload_param:
25 | #    device: cpu
26 | #    pin_memory: True
27 |   #  activation_checkpointing:
28 |   #    partition_activations: True
29 |   #    cpu_checkpointing: True
30 |   #    contiguous_memory_optimization: False
31 |   #    number_checkpoints: False
32 |   #    synchronize_checkpoint_boundary: False
33 |   #    profile: False
34 |   #  zero_quantized_nontrainable_weights: False  # If `enable_mixed_precision_lora` is True, this should be True
35 |   stage3_param_persistence_threshold: 1e5  # (1e4,1e6)
36 |   stage3_max_live_parameters: 1e8  # (3e7, 1e9)
37 |   stage3_prefetch_bucket_size: 1e8  # (3e7, 5e8)
38 |   memory_efficient_linear: False
39 | steps_per_print: 25
40 | gradient_clipping: 1.0
41 | prescale_gradients: False
42 | #wall_clock_breakdown: False
43 | #hybrid_engine:
44 | #  enabled: True
45 | #  max_out_tokens: max_out_tokens
46 | #  inference_tp_size: inference_tp_size
47 | #  release_inference_cache: release_inference_cache
48 | #  pin_parameters: pin_parameters
49 | #  tp_gather_partition_size: tp_gather_partition_size
50 | 


--------------------------------------------------------------------------------
/conf/deepspeed/train_hybrid_engine_zero2.yaml:
--------------------------------------------------------------------------------
 1 | train_micro_batch_size_per_gpu:
 2 | gradient_accumulation_steps:
 3 | scheduler:
 4 |   type: WarmupDecayLR
 5 |   params:
 6 |     total_num_steps:
 7 |     warmup_max_lr: ${learning_rate}
 8 |     warmup_num_steps:
 9 |     warmup_type: linear
10 | optimizer:
11 |   type: AdamW
12 |   params:
13 |     lr: 1e-4
14 |     betas: [ 0.9, 0.999 ]
15 |     eps: 1e-6
16 |     weight_decay: 0.0
17 | bf16:
18 |   enabled: True
19 | zero_optimization:
20 |   stage: 2
21 | #  offload_optimizer:
22 | #    device: cpu
23 | #    pin_memory: True
24 | #  offload_param:
25 | #    device: cpu
26 | #    pin_memory: True
27 |   #  activation_checkpointing:
28 |   #    partition_activations: True
29 |   #    cpu_checkpointing: True
30 |   #    contiguous_memory_optimization: False
31 |   #    number_checkpoints: False
32 |   #    synchronize_checkpoint_boundary: False
33 |   #    profile: False
34 |   #  zero_quantized_nontrainable_weights: False  # If `enable_mixed_precision_lora` is True, this should be True
35 |   stage3_param_persistence_threshold: 1e5  # (1e4,1e6)
36 |   stage3_max_live_parameters: 1e8  # (3e7, 1e9)
37 |   stage3_prefetch_bucket_size: 1e8  # (3e7, 5e8)
38 |   memory_efficient_linear: False
39 | steps_per_print: 25
40 | gradient_clipping: 1.0
41 | prescale_gradients: False
42 | #wall_clock_breakdown: False
43 | #hybrid_engine:
44 | #  enabled: True
45 | #  max_out_tokens: max_out_tokens
46 | #  inference_tp_size: inference_tp_size
47 | #  release_inference_cache: release_inference_cache
48 | #  pin_parameters: pin_parameters
49 | #  tp_gather_partition_size: tp_gather_partition_size
50 | 


--------------------------------------------------------------------------------
/conf/deepspeed/train_hybrid_engine_zero3.yaml:
--------------------------------------------------------------------------------
 1 | train_micro_batch_size_per_gpu:
 2 | gradient_accumulation_steps:
 3 | scheduler:
 4 |   type: WarmupDecayLR
 5 |   params:
 6 |     total_num_steps:
 7 |     warmup_max_lr: ${learning_rate}
 8 |     warmup_num_steps:
 9 |     warmup_type: linear
10 | optimizer:
11 |   type: AdamW
12 |   params:
13 |     lr: 1e-4
14 |     betas: [ 0.9, 0.999 ]
15 |     eps: 1e-6
16 |     weight_decay: 0.0
17 | bf16:
18 |   enabled: True
19 | zero_optimization:
20 |   stage: 3
21 | #  offload_optimizer:
22 | #    device: cpu
23 | #    pin_memory: True
24 | #  offload_param:
25 | #    device: cpu
26 | #    pin_memory: True
27 |   #  activation_checkpointing:
28 |   #    partition_activations: True
29 |   #    cpu_checkpointing: True
30 |   #    contiguous_memory_optimization: False
31 |   #    number_checkpoints: False
32 |   #    synchronize_checkpoint_boundary: False
33 |   #    profile: False
34 |   #  zero_quantized_nontrainable_weights: False  # If `enable_mixed_precision_lora` is True, this should be True
35 |   stage3_param_persistence_threshold: 1e5  # (1e4,1e6)
36 |   stage3_max_live_parameters: 1e8  # (3e7, 1e9)
37 |   stage3_prefetch_bucket_size: 1e8  # (3e7, 5e8)
38 |   memory_efficient_linear: False
39 | steps_per_print: 25
40 | gradient_clipping: 1.0
41 | prescale_gradients: False
42 | #wall_clock_breakdown: False
43 | #hybrid_engine:
44 | #  enabled: True
45 | #  max_out_tokens: max_out_tokens
46 | #  inference_tp_size: inference_tp_size
47 | #  release_inference_cache: release_inference_cache
48 | #  pin_parameters: pin_parameters
49 | #  tp_gather_partition_size: tp_gather_partition_size
50 | 


--------------------------------------------------------------------------------
/conf/api/vllm/llama2-7b/reclor_tems/test_react_0shot_v1_1_vllm.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - hydra: default
 3 |   - reader/reclor@read_tensor: react_service_0shot_v1_0
 4 |   - post_process: openai_react
 5 |   - api/vllm/vllm_params@sampling_params: sampling_param_greedy
 6 |   - _self_
 7 | 
 8 | hydra:
 9 |   searchpath:
10 |     - file://conf/
11 | 
12 | train_file: ../research.data/reclor_data/train.json
13 | dev_file: ../research.data/reclor_data/val.json
14 | test_file: ../research.data/reclor_data/test.json
15 | 
16 | save_best: False
17 | exp_name:
18 | exp_notes:
19 | output_dir: experiments/${exp_name}
20 | 
21 | step:
22 | eval_sub_path: checkpoint-${step}
23 | 
24 | sampling_params:
25 |   max_tokens: 2048
26 | gpu_memory_utilization: 0.95
27 | 
28 | read_tensor:
29 |   template_id: "Context:\n{}\n\nQuestion:\n{}\n\nOptions:\n{}\n\nThought 1: "  # In version v2.1, we change the template to: ```xxx\n\nThought 1: ```
30 | 
31 | output_file: ${output_dir}/${eval_sub_path}/reclor.react.test.0shot.v1.1.json
32 | flush_file: ${output_file}l
33 | 
34 | # Dataloader
35 | num_workers: 32
36 | prefetch_factor:
37 | 
38 | # Training hyper-parameters
39 | per_gpu_train_batch_size: 1
40 | per_gpu_eval_batch_size: 1
41 | 
42 | ddp_eval: False
43 | no_cuda: False
44 | seed: 42
45 | local_rank: -1
46 | 
47 | # Temporary variables
48 | fp16: True
49 | fp16_bfloat16: True
50 | n_gpu: 1
51 | device:
52 | train_batch_size:
53 | eval_batch_size:
54 | world_size:
55 | 
56 | # CUDA_VISIBLE_DEVICES=0 run15 python vllm_inference.py  -cp conf/api/vllm/mistral/reclor/ -cn train_react_1shot_sample5_split_v1_0 read_tensor.split_size=4 read_tensor.split_id=0
57 | 


--------------------------------------------------------------------------------
/conf/deepspeed/train_hybrid_engine_zero1_optim_offload.yaml:
--------------------------------------------------------------------------------
 1 | train_micro_batch_size_per_gpu:
 2 | gradient_accumulation_steps:
 3 | scheduler:
 4 |   type: WarmupDecayLR
 5 |   params:
 6 |     total_num_steps:
 7 |     warmup_max_lr: ${learning_rate}
 8 |     warmup_num_steps:
 9 |     warmup_type: linear
10 | optimizer:
11 |   type: AdamW
12 |   params:
13 |     lr: 1e-4
14 |     betas: [ 0.9, 0.999 ]
15 |     eps: 1e-6
16 |     weight_decay: 0.0
17 | bf16:
18 |   enabled: True
19 | zero_optimization:
20 |   stage: 1
21 |   offload_optimizer:
22 |     device: cpu
23 |     pin_memory: True
24 | #  offload_param:
25 | #    device: cpu
26 | #    pin_memory: True
27 |   #  activation_checkpointing:
28 |   #    partition_activations: True
29 |   #    cpu_checkpointing: True
30 |   #    contiguous_memory_optimization: False
31 |   #    number_checkpoints: False
32 |   #    synchronize_checkpoint_boundary: False
33 |   #    profile: False
34 |   #  zero_quantized_nontrainable_weights: False  # If `enable_mixed_precision_lora` is True, this should be True
35 |   stage3_param_persistence_threshold: 1e5  # (1e4,1e6)
36 |   stage3_max_live_parameters: 1e8  # (3e7, 1e9)
37 |   stage3_prefetch_bucket_size: 1e8  # (3e7, 5e8)
38 |   memory_efficient_linear: False
39 | steps_per_print: 25
40 | gradient_clipping: 1.0
41 | prescale_gradients: False
42 | #wall_clock_breakdown: False
43 | #hybrid_engine:
44 | #  enabled: True
45 | #  max_out_tokens: max_out_tokens
46 | #  inference_tp_size: inference_tp_size
47 | #  release_inference_cache: release_inference_cache
48 | #  pin_parameters: pin_parameters
49 | #  tp_gather_partition_size: tp_gather_partition_size
50 | 


--------------------------------------------------------------------------------
/conf/deepspeed/train_hybrid_engine_zero2_optim_offload.yaml:
--------------------------------------------------------------------------------
 1 | train_micro_batch_size_per_gpu:
 2 | gradient_accumulation_steps:
 3 | scheduler:
 4 |   type: WarmupDecayLR
 5 |   params:
 6 |     total_num_steps:
 7 |     warmup_max_lr: ${learning_rate}
 8 |     warmup_num_steps:
 9 |     warmup_type: linear
10 | optimizer:
11 |   type: AdamW
12 |   params:
13 |     lr: 1e-4
14 |     betas: [ 0.9, 0.999 ]
15 |     eps: 1e-6
16 |     weight_decay: 0.0
17 | bf16:
18 |   enabled: True
19 | zero_optimization:
20 |   stage: 2
21 |   offload_optimizer:
22 |     device: cpu
23 |     pin_memory: True
24 | #  offload_param:
25 | #    device: cpu
26 | #    pin_memory: True
27 |   #  activation_checkpointing:
28 |   #    partition_activations: True
29 |   #    cpu_checkpointing: True
30 |   #    contiguous_memory_optimization: False
31 |   #    number_checkpoints: False
32 |   #    synchronize_checkpoint_boundary: False
33 |   #    profile: False
34 |   #  zero_quantized_nontrainable_weights: False  # If `enable_mixed_precision_lora` is True, this should be True
35 |   stage3_param_persistence_threshold: 1e5  # (1e4,1e6)
36 |   stage3_max_live_parameters: 1e8  # (3e7, 1e9)
37 |   stage3_prefetch_bucket_size: 1e8  # (3e7, 5e8)
38 |   memory_efficient_linear: False
39 | steps_per_print: 25
40 | gradient_clipping: 1.0
41 | prescale_gradients: False
42 | #wall_clock_breakdown: False
43 | #hybrid_engine:
44 | #  enabled: True
45 | #  max_out_tokens: max_out_tokens
46 | #  inference_tp_size: inference_tp_size
47 | #  release_inference_cache: release_inference_cache
48 | #  pin_parameters: pin_parameters
49 | #  tp_gather_partition_size: tp_gather_partition_size
50 | 


--------------------------------------------------------------------------------
/conf/api/gpt4/logiqav2/dev_react_v1_0_1shot.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - hydra: default
 3 |   - reader/logiqav2@read_tensor: react_service_1shot_v1_0
 4 |   - post_process: openai_react
 5 | #  - api/vllm/vllm_params@sampling_params: sampling_param_sample
 6 |   - _self_
 7 | 
 8 | hydra:
 9 |   searchpath:
10 |     - file://conf/
11 | 
12 | 
13 | train_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/train.txt
14 | dev_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/dev.txt
15 | test_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/test.txt
16 | 
17 | #num_shot: 5
18 | 
19 | output_file: api-outputs/gpt-4-1106-preview/logiqav2.dev.react.1shot.gpt4-1106-preview.sample1.tem${model.temperature}.first${read_tensor.max_data_num}.json
20 | flush_file: ${output_file}l
21 | 
22 | model:
23 |   _target_: data.openai_api_caller.GPTTurbo
24 | #  model: "gpt-4-1106-preview"
25 |   model: "gpt-4-0125-preview"
26 |   max_tokens: 2048
27 | #  temperature: 1.0
28 | #  temperature: 0.7
29 |   temperature: 0.0
30 |   api_time_interval: 1
31 | #  top_p: 0.8
32 | #  n: 1
33 | 
34 | # Data loading
35 | read_tensor:
36 | #  max_data_num: 500
37 |   max_data_num: 250
38 |   service_based: False
39 |   service_processor:
40 |   api_based: True
41 |   flush_file: ${flush_file}
42 | 
43 | # Dataloader
44 | num_workers: 0
45 | prefetch_factor: 2
46 | 
47 | output_dir:
48 | 
49 | 
50 | # Training hyper-parameters
51 | per_gpu_train_batch_size: 1
52 | per_gpu_eval_batch_size: 1
53 | 
54 | ddp_eval: False
55 | no_cuda: False
56 | seed: 42
57 | local_rank: -1
58 | 
59 | # Temporary variables
60 | n_gpu: 1
61 | device:
62 | train_batch_size:
63 | eval_batch_size:
64 | world_size:
65 | 


--------------------------------------------------------------------------------
/conf/api/vllm/llama2-7b/reclor_tems/dev_react_0shot_v1_1_vllm.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - hydra: default
 3 |   - reader/reclor@read_tensor: react_service_0shot_v1_0
 4 |   - post_process: openai_react
 5 |   - api/vllm/vllm_params@sampling_params: sampling_param_greedy
 6 |   - _self_
 7 | 
 8 | hydra:
 9 |   searchpath:
10 |     - file://conf/
11 | 
12 | train_file: ../research.data/reclor_data/train.json
13 | dev_file: ../research.data/reclor_data/val.json
14 | test_file: ${dev_file}
15 | 
16 | save_best: False
17 | exp_name:
18 | exp_notes:
19 | output_dir: experiments/${exp_name}
20 | 
21 | step:
22 | eval_sub_path: checkpoint-${step}
23 | 
24 | sampling_params:
25 |   max_tokens: 2048
26 | gpu_memory_utilization: 0.95
27 | 
28 | read_tensor:
29 |   template_id: "Context:\n{}\n\nQuestion:\n{}\n\nOptions:\n{}\n\nThought 1: "  # In version v2.1, we change the template to: ```xxx\n\nThought 1: ```
30 |   service_based: False
31 |   service_processor:
32 | 
33 | output_file: ${output_dir}/${eval_sub_path}/reclor.react.dev.0shot.v1.1.json
34 | flush_file: ${output_file}l
35 | 
36 | # Dataloader
37 | num_workers: 32
38 | prefetch_factor:
39 | 
40 | # Training hyper-parameters
41 | per_gpu_train_batch_size: 1
42 | per_gpu_eval_batch_size: 1
43 | 
44 | ddp_eval: False
45 | no_cuda: False
46 | seed: 42
47 | local_rank: -1
48 | 
49 | # Temporary variables
50 | fp16: True
51 | fp16_bfloat16: True
52 | n_gpu: 1
53 | device:
54 | train_batch_size:
55 | eval_batch_size:
56 | world_size:
57 | 
58 | # CUDA_VISIBLE_DEVICES=0 run15 python vllm_inference.py  -cp conf/api/vllm/mistral/reclor/ -cn train_react_1shot_sample5_split_v1_0 read_tensor.split_size=4 read_tensor.split_id=0
59 | 


--------------------------------------------------------------------------------
/conf/deepspeed/train_hybrid_engine_zero1_optim_offload_cosine.yaml:
--------------------------------------------------------------------------------
 1 | train_micro_batch_size_per_gpu:
 2 | gradient_accumulation_steps:
 3 | scheduler:
 4 |   type: WarmupCosineLR  # requires deepspeed >= 0.12.3
 5 |   params:
 6 |     total_num_steps:
 7 | #    warmup_max_lr: ${learning_rate}
 8 |     warmup_num_steps:
 9 |     warmup_type: linear
10 | optimizer:
11 |   type: AdamW
12 |   params:
13 |     lr: 1e-4
14 |     betas: [ 0.9, 0.999 ]
15 |     eps: 1e-6
16 |     weight_decay: 0.0
17 | bf16:
18 |   enabled: True
19 | zero_optimization:
20 |   stage: 1
21 |   offload_optimizer:
22 |     device: cpu
23 |     pin_memory: True
24 | #  offload_param:
25 | #    device: cpu
26 | #    pin_memory: True
27 |   #  activation_checkpointing:
28 |   #    partition_activations: True
29 |   #    cpu_checkpointing: True
30 |   #    contiguous_memory_optimization: False
31 |   #    number_checkpoints: False
32 |   #    synchronize_checkpoint_boundary: False
33 |   #    profile: False
34 |   #  zero_quantized_nontrainable_weights: False  # If `enable_mixed_precision_lora` is True, this should be True
35 |   stage3_param_persistence_threshold: 1e5  # (1e4,1e6)
36 |   stage3_max_live_parameters: 1e8  # (3e7, 1e9)
37 |   stage3_prefetch_bucket_size: 1e8  # (3e7, 5e8)
38 |   memory_efficient_linear: False
39 | steps_per_print: 25
40 | gradient_clipping: 1.0
41 | prescale_gradients: False
42 | #wall_clock_breakdown: False
43 | #hybrid_engine:
44 | #  enabled: True
45 | #  max_out_tokens: max_out_tokens
46 | #  inference_tp_size: inference_tp_size
47 | #  release_inference_cache: release_inference_cache
48 | #  pin_parameters: pin_parameters
49 | #  tp_gather_partition_size: tp_gather_partition_size
50 | 


--------------------------------------------------------------------------------
/conf/api/vllm/llama2-7b/ar_lsat_tems/dev_react_v1_0.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - hydra: default
 3 |   - reader/logiqav2@read_tensor: react_service_0shot_v1_0
 4 |   - post_process: openai_react
 5 |   - api/vllm/vllm_params@sampling_params: sampling_param_greedy
 6 |   - _self_
 7 | 
 8 | hydra:
 9 |   searchpath:
10 |     - file://conf/
11 | 
12 | train_file: ../research.data/AR-LSAT/data/AR_TrainingData.json
13 | dev_file: ../research.data/AR-LSAT/data/AR_DevelopmentData.json
14 | test_file: ${dev_file}
15 | 
16 | save_best: False
17 | exp_name:
18 | exp_notes:
19 | output_dir: experiments/${exp_name}
20 | 
21 | step: 800
22 | eval_sub_path: checkpoint-${step}
23 | 
24 | # Data loading
25 | read_tensor:
26 |   read_func:
27 |     _target_: data.ar_lsat.ARLSATReader
28 |     flat_options: True
29 |     option_order: "ABCDE"
30 |   template_id: "Context:\n{}\n\nQuestion:\n{}\n\nOptions:\n{}\n\nThought 1: "
31 |   service_based: False
32 |   service_processor:
33 | 
34 | sampling_params:
35 |   max_tokens: 3072
36 | 
37 | output_file: ${output_dir}/${eval_sub_path}/ar-lsat.dev.react.v1.0.0shot.json
38 | flush_file: ${output_file}l
39 | 
40 | post_process:
41 |   answer_clean:
42 |     _target_: post_processors.openai_api_callback.ReActSeparatorClean
43 |     regrex: "A|B|C|D|E"
44 | 
45 | # Dataloader
46 | num_workers: 32
47 | prefetch_factor: 2
48 | 
49 | 
50 | # Training hyper-parameters
51 | per_gpu_train_batch_size: 1
52 | per_gpu_eval_batch_size: 1
53 | 
54 | ddp_eval: False
55 | no_cuda: False
56 | seed: 42
57 | local_rank: -1
58 | 
59 | # Temporary variables
60 | fp16: True
61 | fp16_bfloat16: True
62 | n_gpu: 1
63 | device:
64 | train_batch_size:
65 | eval_batch_size:
66 | world_size:
67 | 


--------------------------------------------------------------------------------
/scripts/cot/cot_step_accumulate.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import json
 3 | from multiprocessing import Pool
 4 | from functools import partial
 5 | 
 6 | from tqdm import tqdm
 7 | 
 8 | """
 9 | The output should come from `cot_clean.py`.
10 | """
11 | 
12 | 
13 | def acc_func(item, response_field="response"):
14 |     responses = item[response_field]
15 |     preds = item["pred"]
16 |     item_id = item["id"]
17 | 
18 |     acc_steps = []
19 |     for resp_id, (resp, pred) in enumerate(zip(responses, preds)):
20 |         acc = ""
21 |         # for i, step in enumerate(resp):
22 |         for i, step in enumerate(resp[:-2]):
23 |             if "### The answer is" in step:
24 |                 break
25 |             acc_resp = acc + step
26 |             acc_id = f"{item_id}_{resp_id}_{i}"
27 |             acc_steps.append({"id": acc_id, "response": acc_resp})
28 |             acc += step
29 | 
30 |     item["accumulated_response"] = acc_steps
31 |     return item
32 | 
33 | 
34 | def main():
35 |     parser = argparse.ArgumentParser()
36 |     parser.add_argument("--input_file", type=str, required=True)
37 |     parser.add_argument("--response_field", type=str, default="response")
38 |     parser.add_argument("--num_workers", type=int, default=16)
39 |     args = parser.parse_args()
40 | 
41 |     data = json.load(open(args.input_file))
42 | 
43 |     annotate = partial(acc_func, response_field=args.response_field)
44 |     with Pool(args.num_workers) as p:
45 |         data = list(tqdm(p.imap(annotate, data), total=len(data)))
46 | 
47 |     save_path = args.input_file.replace(".json", "_accumulated.json")
48 |     json.dump(data, open(save_path, "w"))
49 | 
50 | 
51 | if __name__ == '__main__':
52 |     main()
53 | 


--------------------------------------------------------------------------------
/conf/api/vllm/llama2-7b/reclor_tems/dev_react_0shot_v1_1_vllm_sc.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - hydra: default
 3 |   - reader/reclor@read_tensor: react_service_0shot_v1_0
 4 |   - post_process: openai_react
 5 |   - api/vllm/vllm_params@sampling_params: sampling_param_sample
 6 |   - _self_
 7 | 
 8 | hydra:
 9 |   searchpath:
10 |     - file://conf/
11 | 
12 | train_file: ../research.data/reclor_data/train.json
13 | dev_file: ../research.data/reclor_data/val.json
14 | test_file: ${dev_file}
15 | 
16 | save_best: False
17 | exp_name:
18 | exp_notes:
19 | output_dir: experiments/${exp_name}
20 | 
21 | step:
22 | eval_sub_path: checkpoint-${step}
23 | 
24 | sampling_params:
25 |   max_tokens: 2048
26 | gpu_memory_utilization: 0.95
27 | 
28 | read_tensor:
29 |   template_id: "Context:\n{}\n\nQuestion:\n{}\n\nOptions:\n{}\n\nThought 1: "  # In version v2.1, we change the template to: ```xxx\n\nThought 1: ```
30 |   service_based: False
31 |   service_processor:
32 | 
33 | output_file: ${output_dir}/${eval_sub_path}/reclor.react.dev.n${sampling_params.n}.tem${sampling_params.temperature}.0shot.v1.1.json
34 | flush_file: ${output_file}l
35 | 
36 | # Dataloader
37 | num_workers: 32
38 | prefetch_factor:
39 | 
40 | # Training hyper-parameters
41 | per_gpu_train_batch_size: 1
42 | per_gpu_eval_batch_size: 1
43 | 
44 | ddp_eval: False
45 | no_cuda: False
46 | seed: 42
47 | local_rank: -1
48 | 
49 | # Temporary variables
50 | fp16: True
51 | fp16_bfloat16: True
52 | n_gpu: 1
53 | device:
54 | train_batch_size:
55 | eval_batch_size:
56 | world_size:
57 | 
58 | # CUDA_VISIBLE_DEVICES=0 run15 python vllm_inference.py  -cp conf/api/vllm/mistral/reclor/ -cn train_react_1shot_sample5_split_v1_0 read_tensor.split_size=4 read_tensor.split_id=0
59 | 


--------------------------------------------------------------------------------
/conf/api/vllm/llama2-7b/ar_lsat_tems/test_react_v1_0.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - hydra: default
 3 |   - reader/logiqav2@read_tensor: react_service_0shot_v1_0
 4 |   - post_process: openai_react
 5 |   - api/vllm/vllm_params@sampling_params: sampling_param_greedy
 6 |   - _self_
 7 | 
 8 | hydra:
 9 |   searchpath:
10 |     - file://conf/
11 | 
12 | train_file: ../research.data/AR-LSAT/data/AR_TrainingData.json
13 | dev_file: ../research.data/AR-LSAT/data/AR_DevelopmentData.json
14 | test_file: ../research.data/AR-LSAT/data/AR_TestData.json
15 | 
16 | save_best: False
17 | exp_name:
18 | exp_notes:
19 | output_dir: experiments/${exp_name}
20 | 
21 | step: 800
22 | eval_sub_path: checkpoint-${step}
23 | 
24 | # Data loading
25 | read_tensor:
26 |   read_func:
27 |     _target_: data.ar_lsat.ARLSATReader
28 |     flat_options: True
29 |     option_order: "ABCDE"
30 |   template_id: "Context:\n{}\n\nQuestion:\n{}\n\nOptions:\n{}\n\nThought 1: "
31 |   service_based: False
32 |   service_processor:
33 | 
34 | sampling_params:
35 |   max_tokens: 3072
36 | 
37 | output_file: ${output_dir}/${eval_sub_path}/ar-lsat.test.react.v1.0.0shot.json
38 | flush_file: ${output_file}l
39 | 
40 | post_process:
41 |   answer_clean:
42 |     _target_: post_processors.openai_api_callback.ReActSeparatorClean
43 |     regrex: "A|B|C|D|E"
44 | 
45 | # Dataloader
46 | num_workers: 32
47 | prefetch_factor: 2
48 | 
49 | 
50 | # Training hyper-parameters
51 | per_gpu_train_batch_size: 1
52 | per_gpu_eval_batch_size: 1
53 | 
54 | ddp_eval: False
55 | no_cuda: False
56 | seed: 42
57 | local_rank: -1
58 | 
59 | # Temporary variables
60 | fp16: True
61 | fp16_bfloat16: True
62 | n_gpu: 1
63 | device:
64 | train_batch_size:
65 | eval_batch_size:
66 | world_size:
67 | 


--------------------------------------------------------------------------------
/models/string_rule_reward.py:
--------------------------------------------------------------------------------
 1 | import re
 2 | 
 3 | import torch
 4 | from torch import nn
 5 | from transformers import PreTrainedTokenizer, PreTrainedModel
 6 | from typing import List, Dict
 7 | 
 8 | from models.reward_model_mixin import RewardModelMixin, RewardModelOutputs
 9 | 
10 | 
11 | class MultipleChoiceAccuracyReward(nn.Module, RewardModelMixin):
12 |     def __init__(self, base_model: PreTrainedModel, tokenizer: PreTrainedTokenizer):
13 |         super().__init__()
14 |         self.tokenizer = tokenizer
15 |         self.option2int = {"A": 0, "B": 1, "C": 2, "D": 3, "E": 4}
16 | 
17 |     def forward(self, *args, **kwargs):
18 |         pass
19 | 
20 |     def forward_value(self, seq: torch.LongTensor, attention_mask: torch.LongTensor, prompt_length: int, labels: List[int], *args, **kwargs) -> Dict:
21 |         if prompt_length > 0:
22 |             seq = seq[:, prompt_length:]
23 |         decoded_outputs = self.tokenizer.batch_decode(seq, skip_special_tokens=True)
24 | 
25 |         regrex = "A|B|C|D|E"
26 |         preds = [re.findall(regrex, text) for text in decoded_outputs]
27 | 
28 |         rewards = []
29 |         for pred, label in zip(preds, labels):
30 |             if len(pred) == 0:
31 |                 rewards.append(-1)
32 |             else:
33 |                 # rewards.append(int(self.option2int[pred[-1]] == label))
34 |                 if self.option2int[pred[-1]] == label:
35 |                     rewards.append(1)
36 |                 else:
37 |                     rewards.append(-1)
38 | 
39 |         rewards = torch.tensor(rewards, dtype=torch.bfloat16, device=seq.device)
40 |         return {
41 |             "values": rewards,
42 |             "chosen_end_scores": rewards,
43 |         }
44 | 


--------------------------------------------------------------------------------
/conf/api/vllm/llama2-7b/reclor_tems/test_react_0shot_v1_1_vllm_sc.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - hydra: default
 3 |   - reader/reclor@read_tensor: react_service_0shot_v1_0
 4 |   - post_process: openai_react
 5 |   - api/vllm/vllm_params@sampling_params: sampling_param_sample
 6 |   - _self_
 7 | 
 8 | hydra:
 9 |   searchpath:
10 |     - file://conf/
11 | 
12 | train_file: ../research.data/reclor_data/train.json
13 | dev_file: ../research.data/reclor_data/val.json
14 | test_file: ../research.data/reclor_data/test.json
15 | 
16 | save_best: False
17 | exp_name:
18 | exp_notes:
19 | output_dir: experiments/${exp_name}
20 | 
21 | step:
22 | eval_sub_path: checkpoint-${step}
23 | 
24 | sampling_params:
25 |   max_tokens: 2048
26 | gpu_memory_utilization: 0.95
27 | 
28 | read_tensor:
29 |   template_id: "Context:\n{}\n\nQuestion:\n{}\n\nOptions:\n{}\n\nThought 1: "  # In version v2.1, we change the template to: ```xxx\n\nThought 1: ```
30 |   service_based: False
31 |   service_processor:
32 | 
33 | output_file: ${output_dir}/${eval_sub_path}/reclor.react.test.n${sampling_params.n}.tem${sampling_params.temperature}.0shot.v1.1.json
34 | flush_file: ${output_file}l
35 | 
36 | # Dataloader
37 | num_workers: 32
38 | prefetch_factor:
39 | 
40 | # Training hyper-parameters
41 | per_gpu_train_batch_size: 1
42 | per_gpu_eval_batch_size: 1
43 | 
44 | ddp_eval: False
45 | no_cuda: False
46 | seed: 42
47 | local_rank: -1
48 | 
49 | # Temporary variables
50 | fp16: True
51 | fp16_bfloat16: True
52 | n_gpu: 1
53 | device:
54 | train_batch_size:
55 | eval_batch_size:
56 | world_size:
57 | 
58 | # CUDA_VISIBLE_DEVICES=0 run15 python vllm_inference.py  -cp conf/api/vllm/mistral/reclor/ -cn train_react_1shot_sample5_split_v1_0 read_tensor.split_size=4 read_tensor.split_id=0
59 | 


--------------------------------------------------------------------------------
/data/ar_lsat.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | 
 3 | from data.logiqav2 import _format_option_list
 4 | 
 5 | 
 6 | class ARLSATReader:
 7 |     rank2option = ['A', 'B', 'C', 'D', 'E']
 8 | 
 9 |     def __init__(self, flat_options: bool = False, option_order: str = "ABCDE"):
10 |         self.flat_options = flat_options
11 |         self.option_order = option_order
12 | 
13 |     def __call__(self, file):
14 |         all_context = []
15 |         all_question = []
16 |         all_option_list = []
17 |         all_label = []
18 |         print(file)
19 |         data = json.load(open(file, "r"))
20 |         for item in data:
21 |             for q in item["questions"]:
22 |                 all_context.append(item["passage"])
23 |                 all_question.append(q["question"])
24 | 
25 |                 options = []
26 |                 ordered_label = -1
27 |                 for i, x in enumerate(self.option_order):
28 |                     idx = ord(x) - ord('A')
29 |                     options.append(q["options"][idx])
30 | 
31 |                     if x == q["answer"]:
32 |                         ordered_label = i
33 | 
34 |                 # if "Test" not in file:
35 |                 assert ordered_label != -1, (q["answer"], q["options"], x)
36 | 
37 |                 all_label.append(ordered_label)
38 |                 all_option_list.append(options)
39 | 
40 |         return [
41 |             {
42 |                 "context": context,
43 |                 "question": question,
44 |                 "option_list": _format_option_list(option_list, self.rank2option) if self.flat_options else option_list,
45 |                 "label": label,
46 |             } for context, question, option_list, label in zip(all_context, all_question, all_option_list, all_label)
47 |         ]
48 | 


--------------------------------------------------------------------------------
/conf/api/vllm/llama2-7b/folio_tems/react_dev_0shot_tem_v1_0.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - hydra: default
 3 |   - post_process: openai_react
 4 |   - api/vllm/vllm_params@sampling_params: sampling_param_greedy
 5 |   - _self_
 6 | 
 7 | hydra:
 8 |   searchpath:
 9 |     - file://conf/
10 | 
11 | train_file: ../research.data/FOLIO/data/v0.0/folio-train.jsonl
12 | dev_file: ../research.data/FOLIO/data/v0.0/folio-validation.jsonl
13 | test_file: ../research.data/FOLIO/data/v0.0/folio-validation.jsonl
14 | 
15 | save_best: False
16 | exp_name:
17 | exp_notes:
18 | output_dir: experiments/${exp_name}
19 | 
20 | step: 800
21 | eval_sub_path: checkpoint-${step}
22 | 
23 | read_tensor:
24 |   _target_: data.logiqav2.ComposePromptGenerator
25 |   read_func:
26 |     _target_: data.folio.FOLIO2QAReader
27 |   instruction:
28 |     _target_: data.prompts.logiqav2.react.prompts.get_prompt
29 |     prompt_name: react_v2
30 |   few_shot_prompt:
31 |   compose_keys: [ "context", "question", "option_list" ]
32 |   max_data_num: -1
33 |   api_based: False
34 |   template_id: "Context:\n{}\n\nQuestion:\n{}\n\nOptions:\n{}\n\nThought 1: "  # In version v2.1, we change the template to: ```xxx\n\nThought 1: ```
35 |   service_based: False
36 |   service_processor:
37 | 
38 | output_file: ${output_dir}/${eval_sub_path}/folio.dev.qa.react.v1.0.0shot.json
39 | flush_file: ${output_file}l
40 | 
41 | post_process:
42 |   answer_clean:
43 |     _target_: post_processors.openai_api_callback.ReActSeparatorClean
44 |     regrex: "A|B"
45 | 
46 | # Dataloader
47 | num_workers: 48
48 | prefetch_factor: 2
49 | 
50 | ddp_eval: False
51 | no_cuda: False
52 | seed: 42
53 | local_rank: -1
54 | 
55 | # Temporary variables
56 | fp16: True
57 | fp16_bfloat16: True
58 | n_gpu: 1
59 | device:
60 | train_batch_size:
61 | eval_batch_size:
62 | world_size:
63 | 


--------------------------------------------------------------------------------
/conf/api/vllm/llama2-7b/reclor_tems/dev_react_0shot_v1_0_service.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - hydra: default
 3 |   - reader/reclor@read_tensor: react_service_0shot_v1_0
 4 |   - post_process: openai_react
 5 |   - api/vllm/vllm_params@sampling_params: sampling_param_greedy
 6 |   - _self_
 7 | 
 8 | hydra:
 9 |   searchpath:
10 |     - file://conf/
11 | 
12 | train_file: ../research.data/reclor_data/train.json
13 | dev_file: ../research.data/reclor_data/val.json
14 | test_file: ${dev_file}
15 | 
16 | save_best: False
17 | exp_name:
18 | exp_notes:
19 | output_dir:
20 | eval_sub_path:
21 | 
22 | sampling_params:
23 |   max_tokens: 2048
24 | gpu_memory_utilization: 0.95
25 | 
26 | read_tensor:
27 | #  split_size: -1
28 | #  split_id: 0
29 |   service_based: True
30 |   service_processor:
31 |     _target_: data.vllm.VLLMRequestGenerator
32 |     api_url: http://0.0.0.0:6000/v1/completions
33 |     max_tokens: ${sampling_params.max_tokens}
34 |     model: llama2-7b-reclor-distil
35 |     stop: [ "</s>", "\n\n\n\n", "Context:\n" ]
36 |     n: ${sampling_params.n}
37 |     temperature: ${sampling_params.temperature}
38 |   flush_file: ${flush_file}
39 | 
40 | output_file: ${output_dir}/reclor.react.dev.0shot.v1.0.json
41 | flush_file: ${output_file}l
42 | 
43 | # Dataloader
44 | num_workers: 16
45 | prefetch_factor:
46 | 
47 | # Training hyper-parameters
48 | per_gpu_train_batch_size: 1
49 | per_gpu_eval_batch_size: 1
50 | 
51 | ddp_eval: False
52 | no_cuda: False
53 | seed: 42
54 | local_rank: -1
55 | 
56 | # Temporary variables
57 | fp16: True
58 | fp16_bfloat16: True
59 | n_gpu: 1
60 | device:
61 | train_batch_size:
62 | eval_batch_size:
63 | world_size:
64 | 
65 | # CUDA_VISIBLE_DEVICES=0 run15 python vllm_inference.py  -cp conf/api/vllm/mistral/reclor/ -cn train_react_1shot_sample5_split_v1_0 read_tensor.split_size=4 read_tensor.split_id=0
66 | 


--------------------------------------------------------------------------------
/conf/api/vllm/math/gsm8k_gemma_test_0shot_tem_v1_1.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - hydra: default
 3 |   - post_process: math
 4 |   - api/vllm/vllm_params@sampling_params: sampling_param_greedy
 5 |   - _self_
 6 | 
 7 | hydra:
 8 |   searchpath:
 9 |     - file://conf/
10 | 
11 | train_file:
12 | dev_file:
13 | test_file: ../research.data/MetaMathQA/test/GSM8K_test.json
14 | 
15 | save_best: False
16 | exp_name:
17 | exp_notes:
18 | output_dir: experiments/${exp_name}
19 | 
20 | step: 800
21 | eval_sub_path: checkpoint-${step}
22 | 
23 | read_tensor:
24 |   _target_: data.logic_combine.ResponseAlignDataset
25 |   aligner:
26 |     _target_: data.math.gsm8k_gold_answer_extractor
27 |   template: "{instruction}\n\n### Question: {query}\n\nSubQuestion 1: "
28 |   instruction: "Given a question, please decompose it into sub-questions. For each sub-question, please answer it in a complete sentence, ending with \"The answer is\". When the original question is answerable, please start the sub-question with \"Now we can answer the question: \"."
29 |   max_data_num: -1
30 |   service_based: False
31 |   api_based: False
32 |   index_field: "index"
33 | 
34 | sampling_params:
35 |   stop: [ "<eos>", "\n\n\n\n", "### Instruction" ]
36 | 
37 | output_file: ${output_dir}/${eval_sub_path}/gsm8k.test.v1.1.0shot.json
38 | flush_file: ${output_file}l
39 | 
40 | # Dataloader
41 | num_workers: 48
42 | prefetch_factor: 2
43 | 
44 | post_process:
45 |   answer_clean:
46 |     _target_: data.math.math_answer_cleaner
47 |     separator: "The answer is"
48 |   resume: False
49 |   index_field: "index"
50 |   label_field: "label"
51 | 
52 | ddp_eval: False
53 | no_cuda: False
54 | seed: 42
55 | local_rank: -1
56 | 
57 | # Temporary variables
58 | fp16: True
59 | fp16_bfloat16: True
60 | n_gpu: 1
61 | device:
62 | train_batch_size:
63 | eval_batch_size:
64 | world_size:
65 | 


--------------------------------------------------------------------------------
/scripts/sent_tf_react_step_encoding.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | from FlagEmbedding import FlagModel
 3 | import json
 4 | from tqdm import tqdm
 5 | import numpy as np
 6 | 
 7 | 
 8 | def main():
 9 |     parser = argparse.ArgumentParser()
10 |     parser.add_argument("--input_file", type=str)
11 |     parser.add_argument("--model_path", type=str)
12 |     parser.add_argument("--output_file", type=str)
13 |     args = parser.parse_args()
14 | 
15 |     data = json.load(open(args.input_file, "r"))
16 | 
17 |     node_ids = []
18 |     node_types = []
19 |     node_rationales = []
20 |     node2offset = {}
21 |     node2idx = {}
22 |     for j, item in tqdm(enumerate(data)):
23 |         node2offset[j] = [len(node_rationales), -1]
24 |         node2idx[j] = {}
25 |         for i in range(len(item["response"])):
26 |             chain_nodes = item["nodes"][i]
27 | 
28 |             chain_node_ids = [node["id"] for node in chain_nodes]
29 |             chain_node_types = [node["type"] for node in chain_nodes]
30 |             chain_node_rationales = [node["content"] for node in chain_nodes]
31 | 
32 |             node2idx[j][i] = len(node_ids)
33 | 
34 |             node_ids.extend(chain_node_ids)
35 |             node_types.extend(chain_node_types)
36 |             node_rationales.extend(chain_node_rationales)
37 | 
38 |             assert "Finish[The answer is" in item["nodes"][i][-1]["content"]
39 | 
40 |         node2offset[j][1] = len(node_rationales)
41 | 
42 |     model = FlagModel(args.model_path,
43 |                       # query_instruction_for_retrieval="",
44 |                       use_fp16=True)  # Setting use_fp16 to True speeds up computation with a slight performance degradation
45 |     embeddings = model.encode(node_rationales)
46 | 
47 |     np.save(args.output_file, embeddings)
48 | 
49 | 
50 | if __name__ == '__main__':
51 |     main()
52 | 


--------------------------------------------------------------------------------
/conf/api/vllm/llama2-7b/logiqav2_qa_rest_train_react_v1_0_0shot_sample.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - hydra: default
 3 |   - reader/logiqav2@read_tensor: react_service_0shot_v1_0
 4 |   - post_process: openai_react
 5 | #  - api/vllm/vllm_params@sampling_params: sampling_param_greedy
 6 |   - _self_
 7 | 
 8 | hydra:
 9 |   searchpath:
10 |     - file://conf/
11 | 
12 | train_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/train.txt
13 | dev_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/train.txt
14 | test_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/train.txt
15 | 
16 | step: 2000
17 | eval_sub_path: checkpoint-${step}
18 | 
19 | n: 10
20 | split_size: 4
21 | split_id: 0
22 | 
23 | 
24 | # Data loading
25 | read_tensor:
26 |   template_id: "Context:\n{}\n\nQuestion:\n{}\n\nOptions:\n{}\n\nThought 1: "
27 |   split_size: ${split_size}
28 |   split_id: ${split_id}
29 |   service_based: False
30 |   service_processor:
31 | 
32 | sampling_params:
33 |   _target_: vllm.SamplingParams
34 |   n: ${n}
35 |   temperature: 1.0
36 |   top_p: 0.8
37 |   stop: [ "</s>", "\n\n\n\n" ]
38 |   max_tokens: 2048
39 | 
40 | save_best: False
41 | output_dir: experiments/llama2.7b.chat.logiqav2.70b-distil.self-sft.A40.w8.v1.0
42 | 
43 | suffix: ${n}.tem${sampling_params.temperature}.p${sampling_params.top_p}.s${split_id}-of-${split_size}
44 | output_file: ${output_dir}/${eval_sub_path}/logiqav2-train.full.qa.react.v1.0.0shot.${suffix}.json
45 | flush_file: ${output_file}l
46 | 
47 | # Dataloader
48 | num_workers: 96
49 | prefetch_factor: 2
50 | 
51 | 
52 | # Training hyper-parameters
53 | per_gpu_train_batch_size: 1
54 | per_gpu_eval_batch_size: 1
55 | 
56 | ddp_eval: False
57 | no_cuda: False
58 | seed: 42
59 | local_rank: -1
60 | 
61 | # Temporary variables
62 | fp16: True
63 | fp16_bfloat16: True
64 | n_gpu: 1
65 | device:
66 | train_batch_size:
67 | eval_batch_size:
68 | world_size:
69 | 


--------------------------------------------------------------------------------
/conf/api/vllm/math/math_gemma_test_0shot_tem_v1_1.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - hydra: default
 3 |   - post_process: math
 4 |   - api/vllm/vllm_params@sampling_params: sampling_param_greedy
 5 |   - _self_
 6 | 
 7 | hydra:
 8 |   searchpath:
 9 |     - file://conf/
10 | 
11 | train_file:
12 | dev_file:
13 | test_file: ../research.data/MetaMathQA/test/MATH_test.json
14 | 
15 | save_best: False
16 | exp_name:
17 | exp_notes:
18 | output_dir: experiments/${exp_name}
19 | 
20 | step: 800
21 | eval_sub_path: checkpoint-${step}
22 | 
23 | read_tensor:
24 |   _target_: data.logic_combine.ResponseAlignDataset
25 |   aligner:
26 |     _target_: data.math.math_gold_answer_extractor
27 |     kv_mapping:
28 |       instruction: query
29 |   template: "{instruction}\n\n### Question: {query}\n\nSubQuestion 1: "
30 |   instruction: "Given a question, please decompose it into sub-questions. For each sub-question, please answer it in a complete sentence, ending with \"The answer is\". When the original question is answerable, please start the sub-question with \"Now we can answer the question: \"."
31 |   max_data_num: -1
32 |   service_based: False
33 |   api_based: False
34 |   index_field: "idx"
35 | 
36 | sampling_params:
37 |   stop: [ "<eos>", "\n\n\n\n", "### Instruction" ]
38 | 
39 | output_file: ${output_dir}/${eval_sub_path}/math.test.v1.1.0shot.json
40 | flush_file: ${output_file}l
41 | 
42 | # Dataloader
43 | num_workers: 48
44 | prefetch_factor: 2
45 | 
46 | post_process:
47 |   answer_clean:
48 |     _target_: data.math.math_answer_cleaner
49 |     separator: "The answer is"
50 |   resume: False
51 |   index_field: "idx"
52 |   label_field: "label"
53 | 
54 | ddp_eval: False
55 | no_cuda: False
56 | seed: 42
57 | local_rank: -1
58 | 
59 | # Temporary variables
60 | fp16: True
61 | fp16_bfloat16: True
62 | n_gpu: 1
63 | device:
64 | train_batch_size:
65 | eval_batch_size:
66 | world_size:
67 | 


--------------------------------------------------------------------------------
/conf/api/vllm/mistral/reclor/train_react_1shot_sample5_split_v1_0.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - hydra: default
 3 |   - reader/reclor@read_tensor: react_service_1shot_v1_0
 4 |   - post_process: openai_react
 5 |   - api/vllm/vllm_params@sampling_params: sampling_param_sample
 6 |   - _self_
 7 | 
 8 | hydra:
 9 |   searchpath:
10 |     - file://conf/
11 | 
12 | train_file: ../research.data/reclor_data/train.json
13 | dev_file: ../research.data/reclor_data/val.json
14 | test_file: ${train_file}
15 | 
16 | save_best: False
17 | exp_name:
18 | exp_notes:
19 | output_dir: ../pretrained-models/Mixtral-8x7B-Instruct-v0.1
20 | eval_sub_path:
21 | 
22 | sampling_params:
23 |   max_tokens: 4096
24 | gpu_memory_utilization: 0.95
25 | 
26 | read_tensor:
27 |   split_size: 4
28 |   split_id: 0
29 |   service_based: True
30 |   service_processor:
31 |     _target_: data.vllm.VLLMRequestGenerator
32 |     api_url: http://0.0.0.0:6000/v1/completions
33 |     max_tokens: ${sampling_params.max_tokens}
34 |     model: mixtral-ins
35 |     stop: [ "</s>", "\n\n\n\n", "Context:\n" ]
36 |     n: ${sampling_params.n}
37 |     temperature: ${sampling_params.temperature}
38 |   flush_file: ${flush_file}
39 | 
40 | output_file: ${output_dir}/reclor.react.train.1shot.sample5.${read_tensor.split_id}-${read_tensor.split_size}.v1.0.json
41 | flush_file: ${output_file}l
42 | 
43 | # Dataloader
44 | num_workers: 16
45 | prefetch_factor:
46 | 
47 | # Training hyper-parameters
48 | per_gpu_train_batch_size: 1
49 | per_gpu_eval_batch_size: 1
50 | 
51 | ddp_eval: False
52 | no_cuda: False
53 | seed: 42
54 | local_rank: -1
55 | 
56 | # Temporary variables
57 | fp16: True
58 | fp16_bfloat16: True
59 | n_gpu: 1
60 | device:
61 | train_batch_size:
62 | eval_batch_size:
63 | world_size:
64 | 
65 | # CUDA_VISIBLE_DEVICES=0 run15 python vllm_inference.py  -cp conf/api/vllm/mistral/reclor/ -cn train_react_1shot_sample5_split_v1_0 read_tensor.split_size=4 read_tensor.split_id=0
66 | 


--------------------------------------------------------------------------------
/data/reclor.py:
--------------------------------------------------------------------------------
 1 | import copy
 2 | import json
 3 | import os.path
 4 | from typing import List, Dict, Tuple, Union, Any, Callable
 5 | 
 6 | from omegaconf.listconfig import ListConfig
 7 | from torch.utils.data import Dataset
 8 | from transformers import PreTrainedTokenizer
 9 | 
10 | from general_util.logger import get_child_logger
11 | from data.logiqav2 import _format_option_list
12 | 
13 | logger = get_child_logger(__name__)
14 | 
15 | 
16 | class ReClorReader:
17 |     rank2option = ['A', 'B', 'C', 'D']
18 | 
19 |     def __init__(self, flat_options: bool = False, option_order: str = "ABCD"):
20 |         self.flat_options = flat_options
21 |         self.option_order = option_order
22 | 
23 |     def __call__(self, file):
24 |         data = json.load(open(file, 'r'))
25 | 
26 |         all_context = []
27 |         all_question = []
28 |         all_option_list = []
29 |         all_label = []
30 |         for sample in data:
31 |             all_context.append(sample["context"])
32 |             all_question.append(sample["question"])
33 | 
34 |             options = []
35 |             ordered_label = -1
36 |             for i, x in enumerate(self.option_order):
37 |                 idx = ord(x) - ord('A')
38 |                 options.append(sample["answers"][idx])
39 | 
40 |                 if "label" in sample and ord(x) - ord('A') == sample["label"]:
41 |                     ordered_label = i
42 | 
43 |             all_option_list.append(options)
44 |             all_label.append(ordered_label)
45 | 
46 |         return [
47 |             {
48 |                 "context": context,
49 |                 "question": question,
50 |                 "option_list": _format_option_list(option_list, self.rank2option) if self.flat_options else option_list,
51 |                 "label": label,
52 |             } for context, question, option_list, label in zip(all_context, all_question, all_option_list, all_label)
53 |         ]
54 | 


--------------------------------------------------------------------------------
/conf/api/vllm/logiqav2_qa_dev_decompose_dpo_v2_0.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - hydra: default
 3 |   - _self_
 4 | 
 5 | hydra:
 6 |   searchpath:
 7 |     - file://conf/
 8 | 
 9 | train_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/train.txt
10 | dev_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/dev.txt
11 | test_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/dev.txt
12 | 
13 | num_shot: 5
14 | 
15 | output_file: experiments/llama2.7b.chat.70b-chat-distil.logiqav2.dpo.A100.w3.v2.1/checkpoint-1000/logiqav2-dev.full.qa.decompose.llama2.7b.distil.dpo.v2.0.json
16 | lush_file: ${output_file}l
17 | 
18 | # Data loading
19 | read_tensor:
20 |   _target_: data.logiqav2.ComposePromptGenerator
21 |   read_func:
22 |     _target_: data.logiqav2.LogicQAReader
23 |     flat_options: True
24 |   template_id: 4
25 |   instruction:
26 |   few_shot_prompt:
27 |     _target_: data.logiqav2.read_single_file
28 |     file_path: data/prompts/logiqav2/decomposition/gpt4/dev_10741_0.md
29 |   compose_keys: [ "context", "question", "option_list" ]
30 |   max_data_num: -1
31 |   api_based: False
32 |   service_based: True
33 |   service_processor:
34 |     _target_: data.vllm.VLLMRequestGenerator
35 |     api_url: http://localhost:8000/v1/completions
36 |     max_tokens: 2048
37 |     model: llama-2-7b-distil-dpo
38 | 
39 | # Dataloader
40 | num_workers: 96
41 | prefetch_factor: 2
42 | 
43 | output_dir:
44 | 
45 | post_process:
46 |   _target_: post_processors.openai_api_callback.OpenAICallBack
47 |   output_file: ${output_file}
48 |   answer_clean:
49 |     _target_: post_processors.openai_api_callback.MCQAAnswerClean
50 |     prompt: few-shot
51 | 
52 | 
53 | # Training hyper-parameters
54 | per_gpu_train_batch_size: 1
55 | per_gpu_eval_batch_size: 1
56 | 
57 | ddp_eval: False
58 | no_cuda: False
59 | seed: 42
60 | local_rank: -1
61 | 
62 | # Temporary variables
63 | n_gpu: 1
64 | device:
65 | train_batch_size:
66 | eval_batch_size:
67 | world_size:
68 | 


--------------------------------------------------------------------------------
/conf/api/vllm/logiqav2_qa_dev_decompose_dpo_v2_0_cp1800.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - hydra: default
 3 |   - _self_
 4 | 
 5 | hydra:
 6 |   searchpath:
 7 |     - file://conf/
 8 | 
 9 | train_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/train.txt
10 | dev_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/dev.txt
11 | test_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/dev.txt
12 | 
13 | num_shot: 5
14 | 
15 | output_file: experiments/llama2.7b.chat.70b-chat-distil.logiqav2.dpo.A100.w3.v2.1/checkpoint-1800/logiqav2-dev.full.qa.decompose.llama2.7b.distil.dpo.v2.0.json
16 | lush_file: ${output_file}l
17 | 
18 | # Data loading
19 | read_tensor:
20 |   _target_: data.logiqav2.ComposePromptGenerator
21 |   read_func:
22 |     _target_: data.logiqav2.LogicQAReader
23 |     flat_options: True
24 |   template_id: 4
25 |   instruction:
26 |   few_shot_prompt:
27 |     _target_: data.logiqav2.read_single_file
28 |     file_path: data/prompts/logiqav2/decomposition/gpt4/dev_10741_0.md
29 |   compose_keys: [ "context", "question", "option_list" ]
30 |   max_data_num: -1
31 |   api_based: False
32 |   service_based: True
33 |   service_processor:
34 |     _target_: data.vllm.VLLMRequestGenerator
35 |     api_url: http://localhost:6001/v1/completions
36 |     max_tokens: 2048
37 |     model: llama-2-7b-distil-dpo-cp1800
38 | 
39 | # Dataloader
40 | num_workers: 96
41 | prefetch_factor: 2
42 | 
43 | output_dir:
44 | 
45 | post_process:
46 |   _target_: post_processors.openai_api_callback.OpenAICallBack
47 |   output_file: ${output_file}
48 |   answer_clean:
49 |     _target_: post_processors.openai_api_callback.MCQAAnswerClean
50 |     prompt: few-shot
51 | 
52 | 
53 | # Training hyper-parameters
54 | per_gpu_train_batch_size: 1
55 | per_gpu_eval_batch_size: 1
56 | 
57 | ddp_eval: False
58 | no_cuda: False
59 | seed: 42
60 | local_rank: -1
61 | 
62 | # Temporary variables
63 | n_gpu: 1
64 | device:
65 | train_batch_size:
66 | eval_batch_size:
67 | world_size:
68 | 


--------------------------------------------------------------------------------
/lora_share_trainer/utils/ds_utils.py:
--------------------------------------------------------------------------------
 1 | from transformers import PreTrainedModel
 2 | import deepspeed
 3 | from fairscale.nn.model_parallel import initialize as mpu
 4 | from omegaconf import DictConfig, OmegaConf
 5 | from general_util import training_utils
 6 | 
 7 | 
 8 | def init_ds_training_engine(model: PreTrainedModel, ds_cfg: DictConfig, global_cfg: DictConfig, ):
 9 |     ds_config = ds_cfg
10 |     if "total_num_steps" in ds_config.scheduler.params:
11 |         ds_config.scheduler.params.total_num_steps = global_cfg.max_steps
12 |     ds_config.scheduler.params.warmup_num_steps = global_cfg.warmup_steps
13 |     ds_config = OmegaConf.to_container(ds_config, resolve=True)
14 |     ds_config["train_mirco_batch_size_per_gpu"] = global_cfg.per_gpu_train_batch_size
15 | 
16 |     optim_params = training_utils.get_optimizer_grouped_parameters(model, global_cfg.actor_weight_decay)
17 | 
18 |     engine, optimizer, _, scheduler = deepspeed.initialize(
19 |         model=model,
20 |         model_parameters=optim_params,
21 |         config_params=ds_config,
22 |         mpu=mpu if mpu.model_parallel_is_initialized() else None,
23 |     )
24 | 
25 |     return engine, optimizer, scheduler
26 | 
27 | 
28 | def init_ds_eval_engine(model: PreTrainedModel, ds_cfg: DictConfig, global_cfg: DictConfig):
29 |     ds_config = ds_cfg
30 |     if ds_config.zero_optimization.stage != 3:
31 |         ds_config.zero_optimization.stage = 0
32 | 
33 |     ds_config = OmegaConf.to_container(ds_config, resolve=True)
34 |     ds_config["train_mirco_batch_size_per_gpu"] = global_cfg.per_gpu_train_batch_size
35 |     if "optimizer" in ds_config:
36 |         ds_config.pop("optimizer")
37 |     if "scheduler" in ds_config:
38 |         ds_config.pop("scheduler")
39 | 
40 |     engine, *_ = deepspeed.initialize(
41 |         model=model,
42 |         config_params=ds_config,
43 |         mpu=mpu if mpu.model_parallel_is_initialized() else None,
44 |     )
45 | 
46 |     return engine
47 | 


--------------------------------------------------------------------------------
/general_util/mixin.py:
--------------------------------------------------------------------------------
 1 | from collections import defaultdict
 2 | from typing import Dict, List, Tuple
 3 | 
 4 | import torch
 5 | 
 6 | from general_util.average_meter import LogMetric, AverageMeter
 7 | from general_util.logger import get_child_logger
 8 | 
 9 | logger = get_child_logger("Mixin")
10 | 
11 | 
12 | class LogMixin:
13 |     eval_metrics: LogMetric = None
14 | 
15 |     def init_metric(self, *metric_names):
16 |         self.eval_metrics = LogMetric(*metric_names)
17 | 
18 |     def get_eval_log(self, reset=False, ddp=False, device='cpu'):
19 | 
20 |         if self.eval_metrics is None:
21 |             logger.warning("The `eval_metrics` attribute hasn't been initialized.")
22 | 
23 |         if ddp:
24 |             for metric in self.eval_metrics.metrics.values():
25 |                 metric.gather(device=device)
26 | 
27 |         results = self.eval_metrics.get_log()
28 | 
29 |         _eval_metric_log = '\t'.join([f"{k}: {v}" for k, v in results.items()])
30 | 
31 |         if reset:
32 |             self.eval_metrics.reset()
33 | 
34 |         return _eval_metric_log, results
35 | 
36 | 
37 | class MetricMixin:
38 |     # TODO: 如何利用hydra解耦计算metric的方式和模型？
39 |     def __init__(self, metrics: List[Tuple[str, str, str, str]]):
40 |         self.metrics = {
41 |             name: {
42 |                 "key": key,
43 |                 "val": val,
44 |                 "func": func,
45 |                 "meter": AverageMeter()
46 |             } for key, val, func, name in metrics
47 |         }
48 | 
49 | 
50 | class PredictionMixin:
51 |     tensor_dict: Dict[str, List] = defaultdict(list)
52 | 
53 |     def reset_predict_tensors(self):
54 |         self.tensor_dict = defaultdict(list)
55 | 
56 |     def concat_predict_tensors(self, **tensors: torch.Tensor):
57 |         for k, v in tensors.items():
58 |             self.tensor_dict[k].extend(v.detach().cpu().tolist())
59 | 
60 |     def get_predict_tensors(self):
61 |         return self.tensor_dict
62 | 


--------------------------------------------------------------------------------
/data/folio.py:
--------------------------------------------------------------------------------
 1 | import copy
 2 | import json
 3 | import os.path
 4 | from typing import List, Dict, Tuple, Union, Any, Callable
 5 | 
 6 | from omegaconf.listconfig import ListConfig
 7 | from torch.utils.data import Dataset
 8 | from transformers import PreTrainedTokenizer
 9 | 
10 | from general_util.logger import get_child_logger
11 | from data.logiqav2 import _format_option_list
12 | 
13 | logger = get_child_logger(__name__)
14 | 
15 | 
16 | class FOLIO2QAReader:
17 |     rank2option = ['A', 'B']
18 | 
19 |     def __init__(self,):
20 |         self.context = "There is one hypothesis and a group of premises:\n\nHypothesis:\n{}\n\nPremises:\n{}"
21 |         self.question = "Verify the hypothesis is true or false based on the premises."
22 |         self.option = "A. True\nB. False"
23 | 
24 |     def __call__(self, file):
25 |         all_context = []
26 |         all_option_list = []
27 |         all_label = []
28 |         with open(file) as f:
29 |             for line in f.readlines():
30 |                 item = json.loads(line)
31 | 
32 |                 conclusion = item["conclusion"]
33 |                 premises = item["premises"]
34 |                 premises_str = []
35 |                 for i, premise in enumerate(premises):
36 |                     premises_str.append("{}. {}".format(i + 1, premise))
37 |                 premises_str = "\n".join(premises_str)
38 |                 label = 0 if item["label"] == "True" else 1
39 | 
40 |                 all_context.append(self.context.format(conclusion, premises_str))
41 |                 all_option_list.append(self.option)
42 |                 all_label.append(label)
43 | 
44 |         return [
45 |             {
46 |                 "context": context,
47 |                 "question": self.question,
48 |                 "option_list": option_list,
49 |                 "label": label,
50 |             } for context, option_list, label in zip(all_context, all_option_list, all_label)
51 |         ]
52 | 
53 | 
54 | 


--------------------------------------------------------------------------------
/scripts/split_response_train_dev_according2item_id.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import collections
 3 | import json
 4 | import os.path
 5 | import random
 6 | from glob import glob
 7 | 
 8 | 
 9 | def main():
10 |     parser = argparse.ArgumentParser()
11 |     parser.add_argument('--input_file', type=str, required=True)
12 |     parser.add_argument("--output_file", type=str, required=True)
13 |     args = parser.parse_args()
14 | 
15 |     item_id2responses = collections.defaultdict(list)
16 |     if os.path.exists(args.input_file):
17 |         data = json.load(open(args.input_file))
18 |     else:
19 |         files = glob(args.input_file)
20 |         print(files)
21 |         data = []
22 |         for file in files:
23 |             data.extend(json.load(open(file)))
24 | 
25 |     for item in data:
26 |         item_id, state_id = item['id'].split("_")
27 |         item_id2responses[item_id].append(item)
28 | 
29 |     print("data size: {}".format(len(item_id2responses)))
30 |     print(f"Response size: {len(data)}")
31 | 
32 |     data_ids = list(item_id2responses.keys())
33 |     # read `dev_num` from command line
34 |     dev_num = int(input("dev_num: "))
35 |     dev_ids = random.sample(data_ids, dev_num)
36 |     dev_ids = set(dev_ids)
37 | 
38 |     dev_data = []
39 |     train_data = []
40 |     for item_id, responses in item_id2responses.items():
41 |         if item_id in dev_ids:
42 |             dev_data.extend(responses)
43 |         else:
44 |             train_data.extend(responses)
45 | 
46 |     print("dev size: {}".format(len(dev_data)))
47 |     print("train size: {}".format(len(train_data)))
48 | 
49 |     json.dump(dev_data, open(args.output_file.replace(".json", f".sub_dev_itemid.{len(dev_data)}.json"), "w"), indent=2, ensure_ascii=False)
50 |     json.dump(train_data, open(args.output_file.replace(".json", f".sub_train_itemid.{len(train_data)}.json"), "w"), indent=2, ensure_ascii=False)
51 | 
52 | 
53 | if __name__ == "__main__":
54 |     main()
55 | 


--------------------------------------------------------------------------------
/scripts/fixed_explore_from_infer/logiqav2/split_pair.sh:
--------------------------------------------------------------------------------
 1 | sft_model_dir=experiments/llama2.7b.chat.logiqav2.llama-2-70b-chat.dpo-sft.A6K.w4.v1.0/checkpoint-1600/fix_hack_data_dir/
 2 | dpo_data="logiqav2-train.full.qa.react.v1.0.0shot.sample10.clean_dpo_pair.json"
 3 | step_dpo_data="logiqav2-train.react.v1.0.0shot.sample10.clean_inter_ver2.0.rs0.2.r0.3.prm_hack_fix_v10_cp800_best_of_10.neg10.pos0.5.v2.2.(2,3).pair.product.(2,3).full_only.json"
 4 | 
 5 | seed=43
 6 | 
 7 | python scripts/split_pairs_according_to_ids.py --input_file $sft_model_dir/$dpo_data --output_file $sft_model_dir/logiqav2-train.full.qa.react.v1.0.0shot.sample10.clean_dpo_pair.ratio40.json --ratio 0.4
 8 | python scripts/split_pairs_according_to_ids.py --input_file $sft_model_dir/$dpo_data --output_file $sft_model_dir/logiqav2-train.full.qa.react.v1.0.0shot.sample10.clean_dpo_pair.ratio60.s$seed.json --ratio 0.6
 9 | python scripts/split_pairs_according_to_ids.py --input_file $sft_model_dir/$dpo_data --output_file $sft_model_dir/logiqav2-train.full.qa.react.v1.0.0shot.sample10.clean_dpo_pair.ratio80.json --ratio 0.8
10 | 
11 | python scripts/split_pairs_according_to_ids.py --input_file $sft_model_dir/$step_dpo_data --output_file "$sft_model_dir/logiqav2-train.react.v1.0.0shot.sample10.clean_inter_ver2.0.rs0.2.r0.3.prm_hack_fix_v10_cp800_best_of_10.neg10.pos0.5.v2.2.(2,3).pair.product.(2,3).full_only.ratio40.json" --ratio 0.4
12 | python scripts/split_pairs_according_to_ids.py --input_file $sft_model_dir/$step_dpo_data --output_file "$sft_model_dir/logiqav2-train.react.v1.0.0shot.sample10.clean_inter_ver2.0.rs0.2.r0.3.prm_hack_fix_v10_cp800_best_of_10.neg10.pos0.5.v2.2.(2,3).pair.product.(2,3).full_only.ratio60.s$seed.json" --ratio 0.6
13 | python scripts/split_pairs_according_to_ids.py --input_file $sft_model_dir/$step_dpo_data --output_file "$sft_model_dir/logiqav2-train.react.v1.0.0shot.sample10.clean_inter_ver2.0.rs0.2.r0.3.prm_hack_fix_v10_cp800_best_of_10.neg10.pos0.5.v2.2.(2,3).pair.product.(2,3).full_only.ratio80.json" --ratio 0.8


--------------------------------------------------------------------------------
/conf/api/vllm/math/math_deepseek_test_0shot_tem_v1_1.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - hydra: default
 3 | #  - post_process: deepseek
 4 |   - api/vllm/vllm_params@sampling_params: sampling_param_greedy
 5 |   - _self_
 6 | 
 7 | hydra:
 8 |   searchpath:
 9 |     - file://conf/
10 | 
11 | train_file:
12 | dev_file:
13 | test_file: ../research.data/MetaMathQA/test/MATH_test.json
14 | 
15 | save_best: False
16 | exp_name:
17 | exp_notes:
18 | output_dir: experiments/${exp_name}
19 | 
20 | step: 800
21 | eval_sub_path: checkpoint-${step}
22 | 
23 | read_tensor:
24 |   _target_: data.logic_combine.ResponseAlignDataset
25 |   aligner:
26 | #    _target_: data.math.math_gold_answer_extractor
27 |     _target_: data.math.math_gold_answer_extractor_deepseek
28 |     kv_mapping:
29 |       instruction: question
30 |   template: "User: {question}\nPlease reason step by step, and put your final answer within {instruction}.\n\nAssistant:"
31 |   instruction: "\\boxed{}"  # Hack here! because {} wil report error.
32 |   max_data_num: -1
33 |   service_based: False
34 |   api_based: False
35 |   index_field: "idx"
36 | 
37 | sampling_params:
38 |   stop: [ "<eos>", "\n\n\n\n", "### Instruction", "<｜end▁of▁sentence｜>" ]
39 | 
40 | output_file: ${output_dir}/${eval_sub_path}/math.test.v1.1.0shot.json
41 | flush_file: ${output_file}l
42 | 
43 | # Dataloader
44 | num_workers: 48
45 | prefetch_factor: 2
46 | 
47 | 
48 | post_process:
49 | #  _target_: post_processors.openai_api_callback.OpenAIMATHCallBack
50 |   _target_: post_processors.openai_api_callback.DeepSeekMathCallBack
51 |   output_file: ${output_file}
52 | #  answer_clean:
53 | #    _target_: data.math.math_boxed_answer_cleaner_proxy
54 |   eval_fn: math
55 |   answer_clean: math
56 |   resume: False
57 |   index_field: "idx"
58 |   label_field: "label"
59 | 
60 | ddp_eval: False
61 | no_cuda: False
62 | seed: 42
63 | local_rank: -1
64 | 
65 | # Temporary variables
66 | fp16: True
67 | fp16_bfloat16: True
68 | n_gpu: 1
69 | device:
70 | train_batch_size:
71 | eval_batch_size:
72 | world_size:
73 | 


--------------------------------------------------------------------------------
/conf/api/vllm/llama2-7b/logiqav2_qa_dev_react_dpo_v1_0.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - hydra: default
 3 |   - _self_
 4 | 
 5 | hydra:
 6 |   searchpath:
 7 |     - file://conf/
 8 | 
 9 | train_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/train.txt
10 | dev_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/dev.txt
11 | test_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/dev.txt
12 | 
13 | num_shot: 5
14 | 
15 | output_file: experiments/llama2.7b.chat.logiqav2.dpo.A100.w2.v1.0/checkpoint-1600/logiqav2-dev.full.qa.react.v1.0.json
16 | flush_file: ${output_file}l
17 | 
18 | # Data loading
19 | read_tensor:
20 |   _target_: data.logiqav2.ComposePromptGenerator
21 |   read_func:
22 |     _target_: data.logiqav2.LogicQAReader
23 |     flat_options: True
24 |   template_id: 7
25 |   instruction:
26 |     _target_: data.prompts.logiqav2.react.prompts.get_prompt
27 |     prompt_name: react_v2
28 |   few_shot_prompt:
29 |     _target_: data.logiqav2.read_single_file
30 |     file_path: data/prompts/logiqav2/react/train_4554_p1.txt
31 |   compose_keys: [ "context", "question", "option_list" ]
32 |   max_data_num: -1
33 |   api_based: False
34 |   service_based: True
35 |   service_processor:
36 |     _target_: data.vllm.VLLMRequestGenerator
37 |     api_url: http://localhost:8000/v1/completions
38 |     max_tokens: 2048
39 |     model: llama-2-7b-dpo-cp1600
40 | 
41 | # Dataloader
42 | num_workers: 96
43 | prefetch_factor: 2
44 | 
45 | output_dir:
46 | 
47 | post_process:
48 |   _target_: post_processors.openai_api_callback.OpenAICallBack
49 |   output_file: ${output_file}
50 |   answer_clean:
51 |     _target_: post_processors.openai_api_callback.ReActSeparatorClean
52 | #    prompt: "few-shot"
53 | #    separator: "Finish"
54 | #    separate_idx: 1
55 | 
56 | 
57 | # Training hyper-parameters
58 | per_gpu_train_batch_size: 1
59 | per_gpu_eval_batch_size: 1
60 | 
61 | ddp_eval: False
62 | no_cuda: False
63 | seed: 42
64 | local_rank: -1
65 | 
66 | # Temporary variables
67 | n_gpu: 1
68 | device:
69 | train_batch_size:
70 | eval_batch_size:
71 | world_size:
72 | 


--------------------------------------------------------------------------------
/conf/api/vllm/logiqav2_qa_dev_react_step_dpo_v1_0.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - hydra: default
 3 |   - _self_
 4 | 
 5 | hydra:
 6 |   searchpath:
 7 |     - file://conf/
 8 | 
 9 | train_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/train.txt
10 | dev_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/dev.txt
11 | test_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/dev.txt
12 | 
13 | num_shot: 5
14 | 
15 | output_file: experiments/llama2.7b.chat.logiqav2.step.dpo.A100.w3.v1.0.fix/checkpoint-1600/logiqav2-dev.full.qa.react.v1.0.json
16 | flush_file: ${output_file}l
17 | 
18 | # Data loading
19 | read_tensor:
20 |   _target_: data.logiqav2.ComposePromptGenerator
21 |   read_func:
22 |     _target_: data.logiqav2.LogicQAReader
23 |     flat_options: True
24 |   template_id: 7
25 |   instruction:
26 |     _target_: data.prompts.logiqav2.react.prompts.get_prompt
27 |     prompt_name: react_v2
28 |   few_shot_prompt:
29 |     _target_: data.logiqav2.read_single_file
30 |     file_path: data/prompts/logiqav2/react/train_4554_p1.txt
31 |   compose_keys: [ "context", "question", "option_list" ]
32 |   max_data_num: -1
33 |   api_based: False
34 |   service_based: True
35 |   service_processor:
36 |     _target_: data.vllm.VLLMRequestGenerator
37 |     api_url: http://localhost:6000/v1/completions
38 |     max_tokens: 2048
39 |     model: llama-2-7b-step-dpo-cp1600
40 | 
41 | # Dataloader
42 | num_workers: 96
43 | prefetch_factor: 2
44 | 
45 | output_dir:
46 | 
47 | post_process:
48 |   _target_: post_processors.openai_api_callback.OpenAICallBack
49 |   output_file: ${output_file}
50 |   answer_clean:
51 |     _target_: post_processors.openai_api_callback.ReActSeparatorClean
52 | #    prompt: "few-shot"
53 | #    separator: "Finish"
54 | #    separate_idx: 1
55 | 
56 | 
57 | # Training hyper-parameters
58 | per_gpu_train_batch_size: 1
59 | per_gpu_eval_batch_size: 1
60 | 
61 | ddp_eval: False
62 | no_cuda: False
63 | seed: 42
64 | local_rank: -1
65 | 
66 | # Temporary variables
67 | n_gpu: 1
68 | device:
69 | train_batch_size:
70 | eval_batch_size:
71 | world_size:
72 | 


--------------------------------------------------------------------------------
/scripts/cot/deepseek_cot_sample_steps.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import argparse
 3 | import os
 4 | from glob import glob
 5 | from functools import partial
 6 | from multiprocessing import Pool
 7 | from tqdm import tqdm
 8 | 
 9 | 
10 | def acc_func(item, response_field: str = "response", offset: int = 0):
11 |     s = set()
12 |     acc_steps = []
13 |     for i, (resp, p) in enumerate(zip(item[response_field], item["pred"])):
14 |         steps = resp.split("\n")
15 |         acc = ""
16 |         if offset > 0:
17 |             steps = steps[:-offset]
18 |         for j, step in enumerate(steps):
19 |             if j == 0:
20 |                 acc = step
21 |             else:
22 |                 acc += "\n" + step
23 | 
24 |             if acc in s:
25 |                 continue
26 | 
27 |             s.add(acc)
28 |             acc_steps.append({"id": f"{item['id']}_{i}_{j}", "response": acc})
29 | 
30 |     item["accumulated_response"] = acc_steps
31 |     return item
32 | 
33 | 
34 | def main():
35 |     parser = argparse.ArgumentParser()
36 |     parser.add_argument("--input_file", type=str, required=True)
37 |     parser.add_argument("--offset", type=int, default=0)
38 |     parser.add_argument("--num_workers", type=int, default=16)
39 |     args = parser.parse_args()
40 | 
41 |     if os.path.exists(args.input_file):
42 |         files = [args.input_file]
43 |     else:
44 |         files = glob(args.input_file)
45 |     data = []
46 |     for file in files:
47 |         data += json.load(open(file, "r"))
48 | 
49 |     annotate = partial(acc_func, response_field="response", offset=args.offset)
50 |     with Pool(args.num_workers) as p:
51 |         data = list(tqdm(p.imap(annotate, data), total=len(data)))
52 | 
53 |     outputs = [item for item in data if "accumulated_response" in item and item["accumulated_response"]]
54 |     print(f"Number of items with accumulated responses: {len(outputs)}")
55 |     json.dump(outputs, open(args.input_file.replace(".json", f"_accumulated_off{args.offset}.json"), "w"))
56 | 
57 | 
58 | if __name__ == '__main__':
59 |     main()
60 | 


--------------------------------------------------------------------------------
/conf/api/vllm/llama2-7b/logiqav2_qa_dev_react_dpo_v2_0.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - hydra: default
 3 |   - _self_
 4 | 
 5 | hydra:
 6 |   searchpath:
 7 |     - file://conf/
 8 | 
 9 | train_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/train.txt
10 | dev_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/dev.txt
11 | test_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/dev.txt
12 | 
13 | num_shot: 5
14 | 
15 | output_file: experiments/llama2.7b.chat.logiqav2.dpo.A100.w2.v2.1/checkpoint-1600/logiqav2-dev.full.qa.react.v1.0.json
16 | flush_file: ${output_file}l
17 | 
18 | # Data loading
19 | read_tensor:
20 |   _target_: data.logiqav2.ComposePromptGenerator
21 |   read_func:
22 |     _target_: data.logiqav2.LogicQAReader
23 |     flat_options: True
24 |   template_id: 8
25 |   instruction:
26 |     _target_: data.prompts.logiqav2.react.prompts.get_prompt
27 |     prompt_name: react_v2
28 |   few_shot_prompt:
29 |     _target_: data.logiqav2.read_single_file
30 |     file_path: data/prompts/logiqav2/react/train_4554.txt
31 |   compose_keys: [ "context", "question", "option_list" ]
32 |   max_data_num: -1
33 |   api_based: False
34 |   service_based: True
35 |   service_processor:
36 |     _target_: data.vllm.VLLMRequestGenerator
37 |     api_url: http://0.0.0.0:6001/v1/completions
38 |     max_tokens: 2048
39 |     model: llama-2-7b-dpo-v2.1-cp1600
40 |     stop: [ "</s>", "\n\n\n\n" ]
41 | 
42 | # Dataloader
43 | num_workers: 96
44 | prefetch_factor: 2
45 | 
46 | output_dir:
47 | 
48 | post_process:
49 |   _target_: post_processors.openai_api_callback.OpenAICallBack
50 |   output_file: ${output_file}
51 |   answer_clean:
52 |     _target_: post_processors.openai_api_callback.ReActSeparatorClean
53 | #    prompt: "few-shot"
54 | #    separator: "Finish"
55 | #    separate_idx: 1
56 | 
57 | 
58 | # Training hyper-parameters
59 | per_gpu_train_batch_size: 1
60 | per_gpu_eval_batch_size: 1
61 | 
62 | ddp_eval: False
63 | no_cuda: False
64 | seed: 42
65 | local_rank: -1
66 | 
67 | # Temporary variables
68 | n_gpu: 1
69 | device:
70 | train_batch_size:
71 | eval_batch_size:
72 | world_size:
73 | 


--------------------------------------------------------------------------------
/conf/api/vllm/math/gsm8k_deepseek_test_0shot_tem_v1_1.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - hydra: default
 3 | #  - post_process: deepseek
 4 |   - api/vllm/vllm_params@sampling_params: sampling_param_greedy
 5 |   - _self_
 6 | 
 7 | hydra:
 8 |   searchpath:
 9 |     - file://conf/
10 | 
11 | train_file:
12 | dev_file:
13 | test_file: ../research.data/MetaMathQA/test/GSM8K_test.json
14 | 
15 | save_best: False
16 | exp_name:
17 | exp_notes:
18 | output_dir: experiments/${exp_name}
19 | 
20 | step: 800
21 | eval_sub_path: checkpoint-${step}
22 | 
23 | read_tensor:
24 |   _target_: data.logic_combine.ResponseAlignDataset
25 |   aligner:
26 | #    _target_: data.math.math_gold_answer_extractor
27 |     _target_: data.math.gsm8k_gold_answer_extractor
28 | #    query_field: "query"
29 |     response_field: "response"
30 | #    kv_mapping:
31 | #      instruction: question
32 |   template: "User: {query}\nPlease reason step by step, and put your final answer within {instruction}.\n\nAssistant:"
33 |   instruction: "\\boxed{}"  # Hack here! because {} wil report error.
34 |   max_data_num: -1
35 |   service_based: False
36 |   api_based: False
37 |   index_field: "index"
38 | 
39 | sampling_params:
40 |   stop: [ "<eos>", "\n\n\n\n", "### Instruction", "<｜end▁of▁sentence｜>" ]
41 | 
42 | output_file: ${output_dir}/${eval_sub_path}/gsm8k.test.v1.1.0shot.json
43 | flush_file: ${output_file}l
44 | 
45 | # Dataloader
46 | num_workers: 48
47 | prefetch_factor: 2
48 | 
49 | 
50 | post_process:
51 | #  _target_: post_processors.openai_api_callback.OpenAIMATHCallBack
52 |   _target_: post_processors.openai_api_callback.DeepSeekMathCallBack
53 |   output_file: ${output_file}
54 | #  answer_clean:
55 | #    _target_: data.math.math_boxed_answer_cleaner_proxy
56 |   eval_fn: gsm8k
57 |   answer_clean: gsm8k
58 |   resume: False
59 |   index_field: "index"
60 |   label_field: "label"
61 | 
62 | ddp_eval: False
63 | no_cuda: False
64 | seed: 42
65 | local_rank: -1
66 | 
67 | # Temporary variables
68 | fp16: True
69 | fp16_bfloat16: True
70 | n_gpu: 1
71 | device:
72 | train_batch_size:
73 | eval_batch_size:
74 | world_size:
75 | 


--------------------------------------------------------------------------------
/data/prompts/logiqav2/logic_form/human/dev_218_0.md:
--------------------------------------------------------------------------------
 1 | Convert the input texts following the keywords [Context], [Question] and each [Option] into logic forms.  For each logic form, the format is [predicate](entity 1,  ..., entity n). There is a predicate which indicates the relations among at most n entities and those entities are the arguments of the predicate.   
 2 | Use logical operations to derive the correct option.  Common logical operators include AND, OR, NOT,   and  ==> (logically implies). 
 3 | 
 4 | [Context]:
 5 | Jupiter is a gas giant planet and the largest planet in the solar system. Its mass is 2.5 times the total mass of the other seven planets in the solar system. Observations have found that most of the more than 70 moons surrounding Jupiter are composed of water ice. Therefore, Jupiter's atmosphere should contain a considerable amount of water.
 6 | 
 7 | [Question]:
 8 | Which of the followings, if true, can best support the above statement?
 9 | 
10 | [Options]:
11 | A. After hundreds of millions of years, the satellite may slowly fall onto the planet.
12 | B. Many of the water in interstellar space exists in gaseous form.
13 | C. Uranus is also a gas giant planet, and it has been confirmed that it contains a lot of water ice.
14 | D. The satellite and the planets around it were formed from the same gas and dust at the same time.
15 | 
16 | Here are the logic forms for context, question and options:
17 | 
18 | [Context]
19 | 1. isGasGiant(Jupiter) AND isLargestInSolarSystem(Jupiter)
20 | 2. mass(Jupiter) = 2.5 * sumOfMass(otherSevenPlanetsInSolarSystem)
21 | 3. composedOfWaterIce(surroundingMoons(Jupiter)) > 70
22 | 4. containsConsiderableWater(atmosphere(Jupiter))
23 | 
24 | [Question]
25 | Which of the followings, if true, can best support the statement Context-4?
26 | 
27 | [Options]
28 | A. fallOntoPlanet(satellite, planet) AND afterHundredsOfMillionsOfYears()
29 | B. existsInGaseousForm(water, interstellarSpace)
30 | C. isGasGiant(Uranus) AND containsLotsOfWaterIce(Uranus)
31 | D. formedFromSameGasAndDust(satellite, planet) AND atSameTime(satellite, planet)
32 | 
33 | 
34 | 
35 | 


--------------------------------------------------------------------------------
/conf/api/vllm/llama2-7b/logiqav2_qa_dev_react_dpo_v2_0_0shot.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - hydra: default
 3 |   - _self_
 4 | 
 5 | hydra:
 6 |   searchpath:
 7 |     - file://conf/
 8 | 
 9 | train_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/train.txt
10 | dev_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/dev.txt
11 | test_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/dev.txt
12 | 
13 | num_shot: 5
14 | 
15 | output_file: experiments/llama2.7b.chat.logiqav2.dpo.A100.w2.v2.1/checkpoint-1600/logiqav2-dev.full.qa.react.0shot.v1.0.json
16 | flush_file: ${output_file}l
17 | 
18 | # Data loading
19 | read_tensor:
20 |   _target_: data.logiqav2.ComposePromptGenerator
21 |   read_func:
22 |     _target_: data.logiqav2.LogicQAReader
23 |     flat_options: True
24 |   template_id: 8
25 |   instruction:
26 |     _target_: data.prompts.logiqav2.react.prompts.get_prompt
27 |     prompt_name: react_v2
28 | #  few_shot_prompt:
29 | #    _target_: data.logiqav2.read_single_file
30 | #    file_path: data/prompts/logiqav2/react/train_4554.txt
31 |   compose_keys: [ "context", "question", "option_list" ]
32 |   max_data_num: -1
33 |   api_based: False
34 |   service_based: True
35 |   service_processor:
36 |     _target_: data.vllm.VLLMRequestGenerator
37 |     api_url: http://0.0.0.0:8000/v1/completions
38 |     max_tokens: 2048
39 |     model: llama-2-7b-dpo-v2.1-cp1600
40 |     stop: [ "</s>", "\n\n\n\n" ]
41 | 
42 | # Dataloader
43 | num_workers: 96
44 | prefetch_factor: 2
45 | 
46 | output_dir:
47 | 
48 | post_process:
49 |   _target_: post_processors.openai_api_callback.OpenAICallBack
50 |   output_file: ${output_file}
51 |   answer_clean:
52 |     _target_: post_processors.openai_api_callback.ReActSeparatorClean
53 | #    prompt: "few-shot"
54 | #    separator: "Finish"
55 | #    separate_idx: 1
56 | 
57 | 
58 | # Training hyper-parameters
59 | per_gpu_train_batch_size: 1
60 | per_gpu_eval_batch_size: 1
61 | 
62 | ddp_eval: False
63 | no_cuda: False
64 | seed: 42
65 | local_rank: -1
66 | 
67 | # Temporary variables
68 | n_gpu: 1
69 | device:
70 | train_batch_size:
71 | eval_batch_size:
72 | world_size:
73 | 


--------------------------------------------------------------------------------
/conf/api/vllm/llama2-7b/logiqav2_qa_dev_react_step_dpo_v2_0.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - hydra: default
 3 |   - _self_
 4 | 
 5 | hydra:
 6 |   searchpath:
 7 |     - file://conf/
 8 | 
 9 | train_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/train.txt
10 | dev_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/dev.txt
11 | test_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/dev.txt
12 | 
13 | num_shot: 5
14 | 
15 | output_file: experiments/llama2.7b.chat.logiqav2.step.dpo.A100.w2.v2.1/checkpoint-1600/logiqav2-dev.full.qa.react.v1.0.json
16 | flush_file: ${output_file}l
17 | 
18 | # Data loading
19 | read_tensor:
20 |   _target_: data.logiqav2.ComposePromptGenerator
21 |   read_func:
22 |     _target_: data.logiqav2.LogicQAReader
23 |     flat_options: True
24 |   template_id: 8
25 |   instruction:
26 |     _target_: data.prompts.logiqav2.react.prompts.get_prompt
27 |     prompt_name: react_v2
28 |   few_shot_prompt:
29 |     _target_: data.logiqav2.read_single_file
30 |     file_path: data/prompts/logiqav2/react/train_4554.txt
31 |   compose_keys: [ "context", "question", "option_list" ]
32 |   max_data_num: -1
33 |   api_based: False
34 |   service_based: True
35 |   service_processor:
36 |     _target_: data.vllm.VLLMRequestGenerator
37 |     api_url: http://0.0.0.0:6000/v1/completions
38 |     max_tokens: 2048
39 |     model: llama-2-7b-step-dpo-v2.1-cp1600
40 |     stop: [ "</s>", "\n\n\n\n" ]
41 | 
42 | # Dataloader
43 | num_workers: 96
44 | prefetch_factor: 2
45 | 
46 | output_dir:
47 | 
48 | post_process:
49 |   _target_: post_processors.openai_api_callback.OpenAICallBack
50 |   output_file: ${output_file}
51 |   answer_clean:
52 |     _target_: post_processors.openai_api_callback.ReActSeparatorClean
53 | #    prompt: "few-shot"
54 | #    separator: "Finish"
55 | #    separate_idx: 1
56 | 
57 | 
58 | # Training hyper-parameters
59 | per_gpu_train_batch_size: 1
60 | per_gpu_eval_batch_size: 1
61 | 
62 | ddp_eval: False
63 | no_cuda: False
64 | seed: 42
65 | local_rank: -1
66 | 
67 | # Temporary variables
68 | n_gpu: 1
69 | device:
70 | train_batch_size:
71 | eval_batch_size:
72 | world_size:
73 | 


--------------------------------------------------------------------------------
/conf/api/vllm/llama2-7b/logiqav2_qa_dev_react_step_dpo_v5_0.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - hydra: default
 3 |   - _self_
 4 | 
 5 | hydra:
 6 |   searchpath:
 7 |     - file://conf/
 8 | 
 9 | train_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/train.txt
10 | dev_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/dev.txt
11 | test_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/dev.txt
12 | 
13 | step: 1600
14 | 
15 | output_file: experiments/llama2.7b.chat.logiqav2.step.dpo.H100.w4.v5.0/checkpoint-${step}/logiqav2-dev.full.qa.react.v1.0.json
16 | flush_file: ${output_file}l
17 | 
18 | # Data loading
19 | read_tensor:
20 |   _target_: data.logiqav2.ComposePromptGenerator
21 |   read_func:
22 |     _target_: data.logiqav2.LogicQAReader
23 |     flat_options: True
24 |   template_id: 8
25 |   instruction:
26 |     _target_: data.prompts.logiqav2.react.prompts.get_prompt
27 |     prompt_name: react_v2
28 |   few_shot_prompt:
29 |     _target_: data.logiqav2.read_single_file
30 |     file_path: data/prompts/logiqav2/react/train_4554.txt
31 |   compose_keys: [ "context", "question", "option_list" ]
32 |   max_data_num: -1
33 |   api_based: False
34 |   service_based: True
35 |   service_processor:
36 |     _target_: data.vllm.VLLMRequestGenerator
37 |     api_url: http://0.0.0.0:6000/v1/completions
38 |     max_tokens: 2048
39 |     model: llama-2-7b-step-dpo-v5.0-cp${step}
40 |     stop: [ "</s>", "\n\n\n\n" ]
41 | 
42 | # Dataloader
43 | num_workers: 48
44 | prefetch_factor: 2
45 | 
46 | output_dir:
47 | 
48 | post_process:
49 |   _target_: post_processors.openai_api_callback.OpenAICallBack
50 |   output_file: ${output_file}
51 |   answer_clean:
52 |     _target_: post_processors.openai_api_callback.ReActSeparatorClean
53 | #    prompt: "few-shot"
54 | #    separator: "Finish"
55 | #    separate_idx: 1
56 | 
57 | 
58 | # Training hyper-parameters
59 | per_gpu_train_batch_size: 1
60 | per_gpu_eval_batch_size: 1
61 | 
62 | ddp_eval: False
63 | no_cuda: False
64 | seed: 42
65 | local_rank: -1
66 | 
67 | # Temporary variables
68 | n_gpu: 1
69 | device:
70 | train_batch_size:
71 | eval_batch_size:
72 | world_size:
73 | 


--------------------------------------------------------------------------------
/conf/api/vllm/llama2-7b/logiqav2_qa_dev_react_step_dpo_v2_0_0shot.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - hydra: default
 3 |   - _self_
 4 | 
 5 | hydra:
 6 |   searchpath:
 7 |     - file://conf/
 8 | 
 9 | train_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/train.txt
10 | dev_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/dev.txt
11 | test_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/dev.txt
12 | 
13 | num_shot: 5
14 | 
15 | output_file: experiments/llama2.7b.chat.logiqav2.step.dpo.A100.w2.v2.1/checkpoint-1600/logiqav2-dev.full.qa.react.0shot.v1.0.json
16 | flush_file: ${output_file}l
17 | 
18 | # Data loading
19 | read_tensor:
20 |   _target_: data.logiqav2.ComposePromptGenerator
21 |   read_func:
22 |     _target_: data.logiqav2.LogicQAReader
23 |     flat_options: True
24 |   template_id: 8
25 |   instruction:
26 |     _target_: data.prompts.logiqav2.react.prompts.get_prompt
27 |     prompt_name: react_v2
28 | #  few_shot_prompt:
29 | #    _target_: data.logiqav2.read_single_file
30 | #    file_path: data/prompts/logiqav2/react/train_4554.txt
31 |   compose_keys: [ "context", "question", "option_list" ]
32 |   max_data_num: -1
33 |   api_based: False
34 |   service_based: True
35 |   service_processor:
36 |     _target_: data.vllm.VLLMRequestGenerator
37 |     api_url: http://0.0.0.0:8000/v1/completions
38 |     max_tokens: 2048
39 |     model: llama-2-7b-step-dpo-v2.1-cp1600
40 |     stop: [ "</s>", "\n\n\n\n" ]
41 | 
42 | # Dataloader
43 | num_workers: 96
44 | prefetch_factor: 2
45 | 
46 | output_dir:
47 | 
48 | post_process:
49 |   _target_: post_processors.openai_api_callback.OpenAICallBack
50 |   output_file: ${output_file}
51 |   answer_clean:
52 |     _target_: post_processors.openai_api_callback.ReActSeparatorClean
53 | #    prompt: "few-shot"
54 | #    separator: "Finish"
55 | #    separate_idx: 1
56 | 
57 | 
58 | # Training hyper-parameters
59 | per_gpu_train_batch_size: 1
60 | per_gpu_eval_batch_size: 1
61 | 
62 | ddp_eval: False
63 | no_cuda: False
64 | seed: 42
65 | local_rank: -1
66 | 
67 | # Temporary variables
68 | n_gpu: 1
69 | device:
70 | train_batch_size:
71 | eval_batch_size:
72 | world_size:
73 | 


--------------------------------------------------------------------------------
/conf/api/vllm/llama2-7b/logiqav2_qa_dev_react_step_dpo_v5_1_0shot.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - hydra: default
 3 |   - _self_
 4 | 
 5 | hydra:
 6 |   searchpath:
 7 |     - file://conf/
 8 | 
 9 | train_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/train.txt
10 | dev_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/dev.txt
11 | test_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/dev.txt
12 | 
13 | step: 800
14 | 
15 | output_file: experiments/llama2.7b.chat.logiqav2.step.dpo.H100.w4.v5.1/checkpoint-${step}/logiqav2-dev.full.qa.react.v1.0.0shot.json
16 | flush_file: ${output_file}l
17 | 
18 | # Data loading
19 | read_tensor:
20 |   _target_: data.logiqav2.ComposePromptGenerator
21 |   read_func:
22 |     _target_: data.logiqav2.LogicQAReader
23 |     flat_options: True
24 |   template_id: 8
25 |   instruction:
26 |     _target_: data.prompts.logiqav2.react.prompts.get_prompt
27 |     prompt_name: react_v2
28 | #  few_shot_prompt:
29 | #    _target_: data.logiqav2.read_single_file
30 | #    file_path: data/prompts/logiqav2/react/train_4554.txt
31 |   compose_keys: [ "context", "question", "option_list" ]
32 |   max_data_num: -1
33 |   api_based: False
34 |   service_based: True
35 |   service_processor:
36 |     _target_: data.vllm.VLLMRequestGenerator
37 |     api_url: http://0.0.0.0:6000/v1/completions
38 |     max_tokens: 2048
39 |     model: llama-2-7b-step-dpo-v5.1-cp${step}
40 |     stop: [ "</s>", "\n\n\n\n" ]
41 | 
42 | # Dataloader
43 | num_workers: 48
44 | prefetch_factor: 2
45 | 
46 | output_dir:
47 | 
48 | post_process:
49 |   _target_: post_processors.openai_api_callback.OpenAICallBack
50 |   output_file: ${output_file}
51 |   answer_clean:
52 |     _target_: post_processors.openai_api_callback.ReActSeparatorClean
53 | #    prompt: "few-shot"
54 | #    separator: "Finish"
55 | #    separate_idx: 1
56 | 
57 | 
58 | # Training hyper-parameters
59 | per_gpu_train_batch_size: 1
60 | per_gpu_eval_batch_size: 1
61 | 
62 | ddp_eval: False
63 | no_cuda: False
64 | seed: 42
65 | local_rank: -1
66 | 
67 | # Temporary variables
68 | n_gpu: 1
69 | device:
70 | train_batch_size:
71 | eval_batch_size:
72 | world_size:
73 | 


--------------------------------------------------------------------------------
/conf/api/vllm/llama2-7b/logiqav2_qa_dev_react_step_dpo_v5_0_0shot.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - hydra: default
 3 |   - _self_
 4 | 
 5 | hydra:
 6 |   searchpath:
 7 |     - file://conf/
 8 | 
 9 | train_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/train.txt
10 | dev_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/dev.txt
11 | test_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/dev.txt
12 | 
13 | step: 1600
14 | 
15 | output_file: experiments/llama2.7b.chat.logiqav2.step.dpo.H100.w4.v5.0/checkpoint-${step}/logiqav2-dev.full.qa.react.v1.0.0shot.json
16 | flush_file: ${output_file}l
17 | 
18 | # Data loading
19 | read_tensor:
20 |   _target_: data.logiqav2.ComposePromptGenerator
21 |   read_func:
22 |     _target_: data.logiqav2.LogicQAReader
23 |     flat_options: True
24 |   template_id: 8
25 |   instruction:
26 |     _target_: data.prompts.logiqav2.react.prompts.get_prompt
27 |     prompt_name: react_v2
28 | #  few_shot_prompt:
29 | #    _target_: data.logiqav2.read_single_file
30 | #    file_path: data/prompts/logiqav2/react/train_4554.txt
31 |   compose_keys: [ "context", "question", "option_list" ]
32 |   max_data_num: -1
33 |   api_based: False
34 |   service_based: True
35 |   service_processor:
36 |     _target_: data.vllm.VLLMRequestGenerator
37 |     api_url: http://0.0.0.0:6000/v1/completions
38 |     max_tokens: 2048
39 |     model: llama-2-7b-step-dpo-v5.0-cp${step}
40 |     stop: [ "</s>", "\n\n\n\n" ]
41 | 
42 | # Dataloader
43 | num_workers: 48
44 | prefetch_factor: 2
45 | 
46 | output_dir:
47 | 
48 | post_process:
49 |   _target_: post_processors.openai_api_callback.OpenAICallBack
50 |   output_file: ${output_file}
51 |   answer_clean:
52 |     _target_: post_processors.openai_api_callback.ReActSeparatorClean
53 | #    prompt: "few-shot"
54 | #    separator: "Finish"
55 | #    separate_idx: 1
56 | 
57 | 
58 | # Training hyper-parameters
59 | per_gpu_train_batch_size: 1
60 | per_gpu_eval_batch_size: 1
61 | 
62 | ddp_eval: False
63 | no_cuda: False
64 | seed: 42
65 | local_rank: -1
66 | 
67 | # Temporary variables
68 | n_gpu: 1
69 | device:
70 | train_batch_size:
71 | eval_batch_size:
72 | world_size:
73 | 


--------------------------------------------------------------------------------
/conf/api/vllm/llama2-7b/logiqav2_qa_dev_react_step_dpo_v5_0_fix_1shot.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - hydra: default
 3 |   - _self_
 4 | 
 5 | hydra:
 6 |   searchpath:
 7 |     - file://conf/
 8 | 
 9 | train_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/train.txt
10 | dev_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/dev.txt
11 | test_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/dev.txt
12 | 
13 | step: 800
14 | 
15 | output_file: experiments/llama2.7b.chat.logiqav2.step.dpo.H100.w4.v5.0.fix/checkpoint-${step}/logiqav2-dev.full.qa.react.v1.0.1shot.json
16 | flush_file: ${output_file}l
17 | 
18 | # Data loading
19 | read_tensor:
20 |   _target_: data.logiqav2.ComposePromptGenerator
21 |   read_func:
22 |     _target_: data.logiqav2.LogicQAReader
23 |     flat_options: True
24 |   template_id: 8
25 |   instruction:
26 |     _target_: data.prompts.logiqav2.react.prompts.get_prompt
27 |     prompt_name: react_v2
28 |   few_shot_prompt:
29 |     _target_: data.logiqav2.read_single_file
30 |     file_path: data/prompts/logiqav2/react/train_4554.txt
31 |   compose_keys: [ "context", "question", "option_list" ]
32 |   max_data_num: -1
33 |   api_based: False
34 |   service_based: True
35 |   service_processor:
36 |     _target_: data.vllm.VLLMRequestGenerator
37 |     api_url: http://0.0.0.0:6000/v1/completions
38 |     max_tokens: 2048
39 |     model: llama-2-7b-step-dpo-v5.0-fix-cp${step}
40 |     stop: [ "</s>", "\n\n\n\n" ]
41 | 
42 | # Dataloader
43 | num_workers: 48
44 | prefetch_factor: 2
45 | 
46 | output_dir:
47 | 
48 | post_process:
49 |   _target_: post_processors.openai_api_callback.OpenAICallBack
50 |   output_file: ${output_file}
51 |   answer_clean:
52 |     _target_: post_processors.openai_api_callback.ReActSeparatorClean
53 | #    prompt: "few-shot"
54 | #    separator: "Finish"
55 | #    separate_idx: 1
56 | 
57 | 
58 | # Training hyper-parameters
59 | per_gpu_train_batch_size: 1
60 | per_gpu_eval_batch_size: 1
61 | 
62 | ddp_eval: False
63 | no_cuda: False
64 | seed: 42
65 | local_rank: -1
66 | 
67 | # Temporary variables
68 | n_gpu: 1
69 | device:
70 | train_batch_size:
71 | eval_batch_size:
72 | world_size:
73 | 


--------------------------------------------------------------------------------
/conf/api/vllm/llama2-7b/logiqav2_qa_dev_react_step_dpo_v5_0_fix_0shot.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - hydra: default
 3 |   - _self_
 4 | 
 5 | hydra:
 6 |   searchpath:
 7 |     - file://conf/
 8 | 
 9 | train_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/train.txt
10 | dev_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/dev.txt
11 | test_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/dev.txt
12 | 
13 | step: 800
14 | 
15 | output_file: experiments/llama2.7b.chat.logiqav2.step.dpo.H100.w4.v5.0.fix/checkpoint-${step}/logiqav2-dev.full.qa.react.v1.0.0shot.json
16 | flush_file: ${output_file}l
17 | 
18 | # Data loading
19 | read_tensor:
20 |   _target_: data.logiqav2.ComposePromptGenerator
21 |   read_func:
22 |     _target_: data.logiqav2.LogicQAReader
23 |     flat_options: True
24 |   template_id: 8
25 |   instruction:
26 |     _target_: data.prompts.logiqav2.react.prompts.get_prompt
27 |     prompt_name: react_v2
28 | #  few_shot_prompt:
29 | #    _target_: data.logiqav2.read_single_file
30 | #    file_path: data/prompts/logiqav2/react/train_4554.txt
31 |   compose_keys: [ "context", "question", "option_list" ]
32 |   max_data_num: -1
33 |   api_based: False
34 |   service_based: True
35 |   service_processor:
36 |     _target_: data.vllm.VLLMRequestGenerator
37 |     api_url: http://0.0.0.0:6000/v1/completions
38 |     max_tokens: 2048
39 |     model: llama-2-7b-step-dpo-v5.0-fix-cp${step}
40 |     stop: [ "</s>", "\n\n\n\n" ]
41 | 
42 | # Dataloader
43 | num_workers: 48
44 | prefetch_factor: 2
45 | 
46 | output_dir:
47 | 
48 | post_process:
49 |   _target_: post_processors.openai_api_callback.OpenAICallBack
50 |   output_file: ${output_file}
51 |   answer_clean:
52 |     _target_: post_processors.openai_api_callback.ReActSeparatorClean
53 | #    prompt: "few-shot"
54 | #    separator: "Finish"
55 | #    separate_idx: 1
56 | 
57 | 
58 | # Training hyper-parameters
59 | per_gpu_train_batch_size: 1
60 | per_gpu_eval_batch_size: 1
61 | 
62 | ddp_eval: False
63 | no_cuda: False
64 | seed: 42
65 | local_rank: -1
66 | 
67 | # Temporary variables
68 | n_gpu: 1
69 | device:
70 | train_batch_size:
71 | eval_batch_size:
72 | world_size:
73 | 


--------------------------------------------------------------------------------
/conf/api/vllm/llama2-7b/logiqav2_qa_sft70bdistil_dev_react_v1_0_0shot.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - hydra: default
 3 |   - _self_
 4 | 
 5 | hydra:
 6 |   searchpath:
 7 |     - file://conf/
 8 | 
 9 | train_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/train.txt
10 | dev_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/dev.txt
11 | test_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/dev.txt
12 | 
13 | step: 1600
14 | 
15 | output_file: experiments/llama2.7b.chat.logiqav2.llama-2-70b-chat.dpo-sft.A6K.w4.v1.0/checkpoint-${step}/logiqav2-dev.full.qa.react.v1.0.0shot.json
16 | flush_file: ${output_file}l
17 | 
18 | # Data loading
19 | read_tensor:
20 |   _target_: data.logiqav2.ComposePromptGenerator
21 |   read_func:
22 |     _target_: data.logiqav2.LogicQAReader
23 |     flat_options: True
24 |   template_id: 8
25 |   instruction:
26 |     _target_: data.prompts.logiqav2.react.prompts.get_prompt
27 |     prompt_name: react_v2
28 | #  few_shot_prompt:
29 | #    _target_: data.logiqav2.read_single_file
30 | #    file_path: data/prompts/logiqav2/react/train_4554.txt
31 |   compose_keys: [ "context", "question", "option_list" ]
32 |   max_data_num: -1
33 |   api_based: False
34 |   service_based: True
35 |   service_processor:
36 |     _target_: data.vllm.VLLMRequestGenerator
37 |     api_url: http://0.0.0.0:6000/v1/completions
38 |     max_tokens: 2048
39 |     model: llama-2-7b-sft70b-v1.0-cp1600
40 |     stop: [ "</s>", "\n\n\n\n" ]
41 | 
42 | # Dataloader
43 | num_workers: 48
44 | prefetch_factor: 2
45 | 
46 | output_dir:
47 | 
48 | post_process:
49 |   _target_: post_processors.openai_api_callback.OpenAICallBack
50 |   output_file: ${output_file}
51 |   answer_clean:
52 |     _target_: post_processors.openai_api_callback.ReActSeparatorClean
53 | #    prompt: "few-shot"
54 | #    separator: "Finish"
55 | #    separate_idx: 1
56 | 
57 | 
58 | # Training hyper-parameters
59 | per_gpu_train_batch_size: 1
60 | per_gpu_eval_batch_size: 1
61 | 
62 | ddp_eval: False
63 | no_cuda: False
64 | seed: 42
65 | local_rank: -1
66 | 
67 | # Temporary variables
68 | n_gpu: 1
69 | device:
70 | train_batch_size:
71 | eval_batch_size:
72 | world_size:
73 | 


--------------------------------------------------------------------------------
/conf/api/vllm/llama2-7b/logiqav2_qa_sft70bdistil_dev_react_v1_0_1shot.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - hydra: default
 3 |   - _self_
 4 | 
 5 | hydra:
 6 |   searchpath:
 7 |     - file://conf/
 8 | 
 9 | train_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/train.txt
10 | dev_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/dev.txt
11 | test_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/dev.txt
12 | 
13 | step: 1600
14 | 
15 | output_file: experiments/llama2.7b.chat.logiqav2.llama-2-70b-chat.dpo-sft.A6K.w4.v1.0/checkpoint-${step}/logiqav2-dev.full.qa.react.v1.0.1shot.json
16 | flush_file: ${output_file}l
17 | 
18 | # Data loading
19 | read_tensor:
20 |   _target_: data.logiqav2.ComposePromptGenerator
21 |   read_func:
22 |     _target_: data.logiqav2.LogicQAReader
23 |     flat_options: True
24 |   template_id: 8
25 |   instruction:
26 |     _target_: data.prompts.logiqav2.react.prompts.get_prompt
27 |     prompt_name: react_v2
28 |   few_shot_prompt:
29 |     _target_: data.logiqav2.read_single_file
30 |     file_path: data/prompts/logiqav2/react/train_4554.txt
31 |   compose_keys: [ "context", "question", "option_list" ]
32 |   max_data_num: -1
33 |   api_based: False
34 |   service_based: True
35 |   service_processor:
36 |     _target_: data.vllm.VLLMRequestGenerator
37 |     api_url: http://0.0.0.0:6000/v1/completions
38 |     max_tokens: 2048
39 |     model: llama-2-7b-sft70b-v1.0-cp1600
40 |     stop: [ "</s>", "\n\n\n\n" ]
41 | 
42 | # Dataloader
43 | num_workers: 48
44 | prefetch_factor: 2
45 | 
46 | output_dir:
47 | 
48 | post_process:
49 |   _target_: post_processors.openai_api_callback.OpenAICallBack
50 |   output_file: ${output_file}
51 |   answer_clean:
52 |     _target_: post_processors.openai_api_callback.ReActSeparatorClean
53 | #    prompt: "few-shot"
54 | #    separator: "Finish"
55 | #    separate_idx: 1
56 | 
57 | 
58 | # Training hyper-parameters
59 | per_gpu_train_batch_size: 1
60 | per_gpu_eval_batch_size: 1
61 | 
62 | ddp_eval: False
63 | no_cuda: False
64 | seed: 42
65 | local_rank: -1
66 | 
67 | # Temporary variables
68 | n_gpu: 1
69 | device:
70 | train_batch_size:
71 | eval_batch_size:
72 | world_size:
73 | 


--------------------------------------------------------------------------------
/conf/api/vllm/llama2-7b/logiqav2_qa_dev_react_dpo_v4_1.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - hydra: default
 3 |   - _self_
 4 | 
 5 | hydra:
 6 |   searchpath:
 7 |     - file://conf/
 8 | 
 9 | train_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/train.txt
10 | dev_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/dev.txt
11 | test_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/dev.txt
12 | 
13 | step: 800
14 | 
15 | output_file: experiments/llama2.7b.chat.logiqav2.dpo.A100.w4.v4.1/checkpoint-${step}/logiqav2-dev.full.qa.react.v1.0.json
16 | flush_file: ${output_file}l
17 | 
18 | # Data loading
19 | read_tensor:
20 |   _target_: data.logiqav2.ComposePromptGenerator
21 |   read_func:
22 |     _target_: data.logiqav2.LogicQAReader
23 |     flat_options: True
24 |   template_id: 8
25 |   instruction:
26 |     _target_: data.prompts.logiqav2.react.prompts.get_prompt
27 |     prompt_name: react_v2
28 |   few_shot_prompt:
29 |     _target_: data.logiqav2.read_single_file
30 |     file_path: data/prompts/logiqav2/react/train_4554.txt
31 |   compose_keys: [ "context", "question", "option_list" ]
32 |   max_data_num: -1
33 |   api_based: False
34 |   service_based: True
35 |   service_processor:
36 |     _target_: data.vllm.VLLMRequestGenerator
37 | #    api_url: http://0.0.0.0:8000/v1/completions
38 |     api_url: http://0.0.0.0:6000/v1/completions
39 |     max_tokens: 2048
40 |     model: llama-2-7b-dpo-v4.1-cp${step}
41 |     stop: [ "</s>", "\n\n\n\n" ]
42 | 
43 | # Dataloader
44 | num_workers: 48
45 | prefetch_factor: 2
46 | 
47 | output_dir:
48 | 
49 | post_process:
50 |   _target_: post_processors.openai_api_callback.OpenAICallBack
51 |   output_file: ${output_file}
52 |   answer_clean:
53 |     _target_: post_processors.openai_api_callback.ReActSeparatorClean
54 | #    prompt: "few-shot"
55 | #    separator: "Finish"
56 | #    separate_idx: 1
57 | 
58 | 
59 | # Training hyper-parameters
60 | per_gpu_train_batch_size: 1
61 | per_gpu_eval_batch_size: 1
62 | 
63 | ddp_eval: False
64 | no_cuda: False
65 | seed: 42
66 | local_rank: -1
67 | 
68 | # Temporary variables
69 | n_gpu: 1
70 | device:
71 | train_batch_size:
72 | eval_batch_size:
73 | world_size:
74 | 


--------------------------------------------------------------------------------
/scripts/explore_from_inter/run_llama_sft_v2.0.sh:
--------------------------------------------------------------------------------
 1 | data_dir=experiments/llama2.7b.chat.logiqav2.llama-2-70b-chat.dpo-sft.A6K.w4.v1.0/checkpoint-1600
 2 | #data_dir=experiments/llama2.7b.chat.logiqav2.70b-distil.dpo.H100.w4.v1.0/checkpoint-1600
 3 | 
 4 | #ratio_s=0.2
 5 | #ratio=0.3
 6 | ratio_s=0.4
 7 | ratio=0.2
 8 | 
 9 | #python scripts/sample_react_inter_states_v2.0.py --input_file $data_dir/logiqav2-train.full.qa.react.v1.0.0shot.sample10.json \
10 | #  --output_file $data_dir/react-inter-states/logiqav2-train.full.qa.react.v1.0.0shot.sample10.clean_inter_ver2.0.rs0.2.r0.6.json \
11 | #  --split_num 20 --ratio_s 0.2 --ratio 0.6
12 | 
13 | #python scripts/sample_react_inter_states_v2.0.py --input_file $data_dir/logiqav2-train.full.qa.react.v1.0.0shot.sample10.json \
14 | #  --output_file $data_dir/react-inter-states/logiqav2-train.full.qa.react.v1.0.0shot.sample10.clean_inter_ver2.0.rs${ratio_s}.r${ratio}.json \
15 | #  --split_num 20 --ratio_s ${ratio_s} --ratio ${ratio}
16 | 
17 | #python scripts/sample_react_inter_states_v2.0.py --input_file $data_dir/logiqav2-train.react.sample5.v1.0.0shot.json \
18 | #  --output_file $data_dir/react-inter-states/logiqav2-train.react.v1.0.0shot.sample5.clean_inter_ver2.0.rs${ratio_s}.r${ratio}.json \
19 | #  --split_num 10 --ratio_s ${ratio_s} --ratio ${ratio}
20 | 
21 | #python scripts/sample_react_inter_states_v2.1.py --input_file $data_dir/logiqav2-train.full.qa.react.v1.0.0shot.sample10.json \
22 | #  --output_file $data_dir/react-inter-states/logiqav2-train.react.v1.0.0shot.sample10.clean_inter_ver2.1.rs${ratio_s}.r${ratio}.json \
23 | #  --split_num 4 --ratio_s ${ratio_s} --ratio ${ratio}
24 | 
25 | 
26 | 
27 | # ================================= ReClor
28 | data_dir="experiments/llama2.7b.chat.mixtral.dpo-sft.A100.40.w8.v1.0/checkpoint-1200"
29 | ratio_s=0.2
30 | ratio=0.3
31 | python scripts/sample_react_inter_states_v2.0.py --input_file $data_dir/reclor.react.train.0shot.sample10.v1.0.json \
32 |   --output_file $data_dir/react-inter-states/reclor.train.react.v1.0.0shot.sample10.clean_inter_ver2.0.rs${ratio_s}.r${ratio}.json \
33 |   --split_num 1 --ratio_s ${ratio_s} --ratio ${ratio}
34 | 
35 | 
36 | 


--------------------------------------------------------------------------------
/general_util/logger.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | import os
 3 | import sys
 4 | from torch import distributed as dist
 5 | 
 6 | _root_name = 'FK'
 7 | 
 8 | 
 9 | def get_child_logger(child_name):
10 |     # _local_rank = getattr(os.environ, "LOCAL_RANK", "")
11 |     #
12 |     # if _root_name == "FK" and _local_rank:
13 |     #     return logging.getLogger(_root_name + '.' + _local_rank + '.' + child_name)
14 | 
15 |     return logging.getLogger(_root_name + '.' + child_name)
16 | 
17 | 
18 | def setting_logger(log_file: str, local_rank: int = -1):
19 |     logging.basicConfig(format='%(asctime)s - %(levelname)s - %(name)s -   %(message)s',
20 |                         datefmt='%m/%d/%Y %H:%M:%S',
21 |                         level=logging.INFO if local_rank in [-1, 0] else logging.WARNING)
22 | 
23 |     # global _root_name
24 |     # if local_rank != -1 and _root_name == "FK":
25 |     #     _root_name = _root_name + '.' + str(local_rank)
26 |     logger = logging.getLogger(_root_name)
27 |     logger.setLevel(logging.INFO if local_rank in [-1, 0] else logging.WARNING)
28 | 
29 |     rf_handler = logging.StreamHandler(sys.stderr)
30 |     rf_handler.setLevel(logging.INFO)
31 |     rf_handler.setFormatter(logging.Formatter(fmt='%(asctime)s - %(levelname)s - %(name)s -   %(message)s',
32 |                                               datefmt='%m/%d/%Y %H:%M:%S'))
33 | 
34 |     output_dir = './log_dir'
35 |     if local_rank not in [-1, 0]:
36 |         dist.barrier()
37 |     
38 |     if not os.path.exists(output_dir):
39 |         os.makedirs(output_dir)
40 |         
41 |     if local_rank == 0:
42 |         dist.barrier()
43 | 
44 |     if log_file:
45 |         model_name = "-".join(log_file.replace('/', ' ').split()[1:])
46 |         f_handler = logging.FileHandler(os.path.join(
47 |             output_dir, model_name + '-output.log'))
48 |         f_handler.setLevel(logging.INFO)
49 |         f_handler.setFormatter(logging.Formatter(fmt="%(asctime)s - %(levelname)s - %(name)s - %(message)s",
50 |                                                  datefmt='%m/%d/%Y %H:%M:%S'))
51 | 
52 |         logger.addHandler(f_handler)
53 | 
54 |     return logger
55 | 


--------------------------------------------------------------------------------
/conf/api/vllm/llama2-7b/logiqav2_qa_dev_react_dpo_v4_1_0shot.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - hydra: default
 3 |   - _self_
 4 | 
 5 | hydra:
 6 |   searchpath:
 7 |     - file://conf/
 8 | 
 9 | train_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/train.txt
10 | dev_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/dev.txt
11 | test_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/dev.txt
12 | 
13 | step: 800
14 | 
15 | output_file: experiments/llama2.7b.chat.logiqav2.dpo.A100.w4.v4.1/checkpoint-${step}/logiqav2-dev.full.qa.react.v1.0.0shot.json
16 | flush_file: ${output_file}l
17 | 
18 | # Data loading
19 | read_tensor:
20 |   _target_: data.logiqav2.ComposePromptGenerator
21 |   read_func:
22 |     _target_: data.logiqav2.LogicQAReader
23 |     flat_options: True
24 |   template_id: 8
25 |   instruction:
26 |     _target_: data.prompts.logiqav2.react.prompts.get_prompt
27 |     prompt_name: react_v2
28 | #  few_shot_prompt:
29 | #    _target_: data.logiqav2.read_single_file
30 | #    file_path: data/prompts/logiqav2/react/train_4554.txt
31 |   compose_keys: [ "context", "question", "option_list" ]
32 |   max_data_num: -1
33 |   api_based: False
34 |   service_based: True
35 |   service_processor:
36 |     _target_: data.vllm.VLLMRequestGenerator
37 | #    api_url: http://0.0.0.0:8000/v1/completions
38 |     api_url: http://0.0.0.0:6000/v1/completions
39 |     max_tokens: 2048
40 |     model: llama-2-7b-dpo-v4.1-cp${step}
41 |     stop: [ "</s>", "\n\n\n\n" ]
42 | 
43 | # Dataloader
44 | num_workers: 48
45 | prefetch_factor: 2
46 | 
47 | output_dir:
48 | 
49 | post_process:
50 |   _target_: post_processors.openai_api_callback.OpenAICallBack
51 |   output_file: ${output_file}
52 |   answer_clean:
53 |     _target_: post_processors.openai_api_callback.ReActSeparatorClean
54 | #    prompt: "few-shot"
55 | #    separator: "Finish"
56 | #    separate_idx: 1
57 | 
58 | 
59 | # Training hyper-parameters
60 | per_gpu_train_batch_size: 1
61 | per_gpu_eval_batch_size: 1
62 | 
63 | ddp_eval: False
64 | no_cuda: False
65 | seed: 42
66 | local_rank: -1
67 | 
68 | # Temporary variables
69 | n_gpu: 1
70 | device:
71 | train_batch_size:
72 | eval_batch_size:
73 | world_size:
74 | 


--------------------------------------------------------------------------------
/conf/api/vllm/llama2-7b/logiqav2_qa_dev_react_step_dpo_v3_0.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - hydra: default
 3 |   - _self_
 4 | 
 5 | hydra:
 6 |   searchpath:
 7 |     - file://conf/
 8 | 
 9 | train_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/train.txt
10 | dev_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/dev.txt
11 | test_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/dev.txt
12 | 
13 | step: 3200
14 | 
15 | output_file: experiments/llama2.7b.chat.logiqav2.step.dpo.H100.w2.v3.0/checkpoint-${step}/logiqav2-dev.full.qa.react.v1.0.json
16 | flush_file: ${output_file}l
17 | 
18 | # Data loading
19 | read_tensor:
20 |   _target_: data.logiqav2.ComposePromptGenerator
21 |   read_func:
22 |     _target_: data.logiqav2.LogicQAReader
23 |     flat_options: True
24 |   template_id: 8
25 |   instruction:
26 |     _target_: data.prompts.logiqav2.react.prompts.get_prompt
27 |     prompt_name: react_v2
28 |   few_shot_prompt:
29 |     _target_: data.logiqav2.read_single_file
30 |     file_path: data/prompts/logiqav2/react/train_4554.txt
31 |   compose_keys: [ "context", "question", "option_list" ]
32 |   max_data_num: -1
33 |   api_based: False
34 |   service_based: True
35 |   service_processor:
36 |     _target_: data.vllm.VLLMRequestGenerator
37 | #    api_url: http://0.0.0.0:8000/v1/completions
38 |     api_url: http://0.0.0.0:6000/v1/completions
39 |     max_tokens: 2048
40 |     model: llama-2-7b-step-dpo-v3.0-cp${step}
41 |     stop: [ "</s>", "\n\n\n\n" ]
42 | 
43 | # Dataloader
44 | num_workers: 48
45 | prefetch_factor: 2
46 | 
47 | output_dir:
48 | 
49 | post_process:
50 |   _target_: post_processors.openai_api_callback.OpenAICallBack
51 |   output_file: ${output_file}
52 |   answer_clean:
53 |     _target_: post_processors.openai_api_callback.ReActSeparatorClean
54 | #    prompt: "few-shot"
55 | #    separator: "Finish"
56 | #    separate_idx: 1
57 | 
58 | 
59 | # Training hyper-parameters
60 | per_gpu_train_batch_size: 1
61 | per_gpu_eval_batch_size: 1
62 | 
63 | ddp_eval: False
64 | no_cuda: False
65 | seed: 42
66 | local_rank: -1
67 | 
68 | # Temporary variables
69 | n_gpu: 1
70 | device:
71 | train_batch_size:
72 | eval_batch_size:
73 | world_size:
74 | 


--------------------------------------------------------------------------------
/conf/api/vllm/llama2-7b/logiqav2_qa_dev_react_step_dpo_v4_0.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - hydra: default
 3 |   - _self_
 4 | 
 5 | hydra:
 6 |   searchpath:
 7 |     - file://conf/
 8 | 
 9 | train_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/train.txt
10 | dev_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/dev.txt
11 | test_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/dev.txt
12 | 
13 | step: 1200
14 | 
15 | output_file: experiments/llama2.7b.chat.logiqav2.step.dpo.A40.w4.v4.0/checkpoint-${step}/logiqav2-dev.full.qa.react.v1.0.json
16 | flush_file: ${output_file}l
17 | 
18 | # Data loading
19 | read_tensor:
20 |   _target_: data.logiqav2.ComposePromptGenerator
21 |   read_func:
22 |     _target_: data.logiqav2.LogicQAReader
23 |     flat_options: True
24 |   template_id: 8
25 |   instruction:
26 |     _target_: data.prompts.logiqav2.react.prompts.get_prompt
27 |     prompt_name: react_v2
28 |   few_shot_prompt:
29 |     _target_: data.logiqav2.read_single_file
30 |     file_path: data/prompts/logiqav2/react/train_4554.txt
31 |   compose_keys: [ "context", "question", "option_list" ]
32 |   max_data_num: -1
33 |   api_based: False
34 |   service_based: True
35 |   service_processor:
36 |     _target_: data.vllm.VLLMRequestGenerator
37 | #    api_url: http://0.0.0.0:8000/v1/completions
38 |     api_url: http://0.0.0.0:6000/v1/completions
39 |     max_tokens: 2048
40 |     model: llama-2-7b-step-dpo-v4.0-cp${step}
41 |     stop: [ "</s>", "\n\n\n\n" ]
42 | 
43 | # Dataloader
44 | num_workers: 48
45 | prefetch_factor: 2
46 | 
47 | output_dir:
48 | 
49 | post_process:
50 |   _target_: post_processors.openai_api_callback.OpenAICallBack
51 |   output_file: ${output_file}
52 |   answer_clean:
53 |     _target_: post_processors.openai_api_callback.ReActSeparatorClean
54 | #    prompt: "few-shot"
55 | #    separator: "Finish"
56 | #    separate_idx: 1
57 | 
58 | 
59 | # Training hyper-parameters
60 | per_gpu_train_batch_size: 1
61 | per_gpu_eval_batch_size: 1
62 | 
63 | ddp_eval: False
64 | no_cuda: False
65 | seed: 42
66 | local_rank: -1
67 | 
68 | # Temporary variables
69 | n_gpu: 1
70 | device:
71 | train_batch_size:
72 | eval_batch_size:
73 | world_size:
74 | 


--------------------------------------------------------------------------------
/conf/api/vllm/llama2-7b/logiqav2_qa_dev_react_step_dpo_v4_3.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - hydra: default
 3 |   - _self_
 4 | 
 5 | hydra:
 6 |   searchpath:
 7 |     - file://conf/
 8 | 
 9 | train_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/train.txt
10 | dev_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/dev.txt
11 | test_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/dev.txt
12 | 
13 | step: 800
14 | 
15 | output_file: experiments/llama2.7b.chat.logiqav2.step.dpo.H100.w4.v4.3/checkpoint-${step}/logiqav2-dev.full.qa.react.v1.0.json
16 | flush_file: ${output_file}l
17 | 
18 | # Data loading
19 | read_tensor:
20 |   _target_: data.logiqav2.ComposePromptGenerator
21 |   read_func:
22 |     _target_: data.logiqav2.LogicQAReader
23 |     flat_options: True
24 |   template_id: 8
25 |   instruction:
26 |     _target_: data.prompts.logiqav2.react.prompts.get_prompt
27 |     prompt_name: react_v2
28 |   few_shot_prompt:
29 |     _target_: data.logiqav2.read_single_file
30 |     file_path: data/prompts/logiqav2/react/train_4554.txt
31 |   compose_keys: [ "context", "question", "option_list" ]
32 |   max_data_num: -1
33 |   api_based: False
34 |   service_based: True
35 |   service_processor:
36 |     _target_: data.vllm.VLLMRequestGenerator
37 | #    api_url: http://0.0.0.0:8000/v1/completions
38 |     api_url: http://0.0.0.0:6000/v1/completions
39 |     max_tokens: 2048
40 |     model: llama-2-7b-step-dpo-v4.3-cp${step}
41 |     stop: [ "</s>", "\n\n\n\n" ]
42 | 
43 | # Dataloader
44 | num_workers: 48
45 | prefetch_factor: 2
46 | 
47 | output_dir:
48 | 
49 | post_process:
50 |   _target_: post_processors.openai_api_callback.OpenAICallBack
51 |   output_file: ${output_file}
52 |   answer_clean:
53 |     _target_: post_processors.openai_api_callback.ReActSeparatorClean
54 | #    prompt: "few-shot"
55 | #    separator: "Finish"
56 | #    separate_idx: 1
57 | 
58 | 
59 | # Training hyper-parameters
60 | per_gpu_train_batch_size: 1
61 | per_gpu_eval_batch_size: 1
62 | 
63 | ddp_eval: False
64 | no_cuda: False
65 | seed: 42
66 | local_rank: -1
67 | 
68 | # Temporary variables
69 | n_gpu: 1
70 | device:
71 | train_batch_size:
72 | eval_batch_size:
73 | world_size:
74 | 


--------------------------------------------------------------------------------
/conf/api/vllm/llama2-7b/logiqav2_qa_dev_react_step_dpo_v4_1_1.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - hydra: default
 3 |   - _self_
 4 | 
 5 | hydra:
 6 |   searchpath:
 7 |     - file://conf/
 8 | 
 9 | train_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/train.txt
10 | dev_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/dev.txt
11 | test_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/dev.txt
12 | 
13 | step: 2000
14 | 
15 | output_file: experiments/llama2.7b.chat.logiqav2.step.dpo.H100.w4.v4.1.1/checkpoint-${step}/logiqav2-dev.full.qa.react.v1.0.json
16 | flush_file: ${output_file}l
17 | 
18 | # Data loading
19 | read_tensor:
20 |   _target_: data.logiqav2.ComposePromptGenerator
21 |   read_func:
22 |     _target_: data.logiqav2.LogicQAReader
23 |     flat_options: True
24 |   template_id: 8
25 |   instruction:
26 |     _target_: data.prompts.logiqav2.react.prompts.get_prompt
27 |     prompt_name: react_v2
28 |   few_shot_prompt:
29 |     _target_: data.logiqav2.read_single_file
30 |     file_path: data/prompts/logiqav2/react/train_4554.txt
31 |   compose_keys: [ "context", "question", "option_list" ]
32 |   max_data_num: -1
33 |   api_based: False
34 |   service_based: True
35 |   service_processor:
36 |     _target_: data.vllm.VLLMRequestGenerator
37 | #    api_url: http://0.0.0.0:8000/v1/completions
38 |     api_url: http://0.0.0.0:6000/v1/completions
39 |     max_tokens: 2048
40 |     model: llama-2-7b-step-dpo-v4.1.1-cp${step}
41 |     stop: [ "</s>", "\n\n\n\n" ]
42 | 
43 | # Dataloader
44 | num_workers: 48
45 | prefetch_factor: 2
46 | 
47 | output_dir:
48 | 
49 | post_process:
50 |   _target_: post_processors.openai_api_callback.OpenAICallBack
51 |   output_file: ${output_file}
52 |   answer_clean:
53 |     _target_: post_processors.openai_api_callback.ReActSeparatorClean
54 | #    prompt: "few-shot"
55 | #    separator: "Finish"
56 | #    separate_idx: 1
57 | 
58 | 
59 | # Training hyper-parameters
60 | per_gpu_train_batch_size: 1
61 | per_gpu_eval_batch_size: 1
62 | 
63 | ddp_eval: False
64 | no_cuda: False
65 | seed: 42
66 | local_rank: -1
67 | 
68 | # Temporary variables
69 | n_gpu: 1
70 | device:
71 | train_batch_size:
72 | eval_batch_size:
73 | world_size:
74 | 


--------------------------------------------------------------------------------
/scripts/merge_response.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | from glob import glob
 3 | import os
 4 | import argparse
 5 | 
 6 | """
 7 | In this script, we simple merge the response, and use `construct_dpo_data_from_react_response_v1.1.py` to remove duplicate and calibrate the predictions.
 8 | """
 9 | 
10 | 
11 | def merge_response(item_a, item_b):
12 |     a_responses = item_a["response"]
13 |     b_responses = item_b["response"]
14 | 
15 |     preds_a = item_a["pred"]
16 |     preds_b = item_b["pred"]
17 | 
18 |     new_response = a_responses + b_responses
19 |     new_pred = preds_a + preds_b
20 | 
21 |     assert item_a["id"] == item_b["id"]
22 |     assert item_a["text"] == item_b["text"]
23 |     assert item_a["label"] == item_b["label"]
24 | 
25 |     new_item = {
26 |         "id": item_a["id"],
27 |         "text": item_a["text"],
28 |         "label": item_a["label"],
29 |         "response": new_response,
30 |         "pred": new_pred,
31 |     }
32 |     return new_item
33 | 
34 | 
35 | def main():
36 |     parser = argparse.ArgumentParser()
37 |     parser.add_argument("--input_file", type=str)
38 |     parser.add_argument("--output_file", type=str)
39 |     args = parser.parse_args()
40 | 
41 |     if os.path.exists(args.input_file):
42 |         files = [args.input_file]
43 |     else:
44 |         files = glob(args.input_file)
45 |     print(files)
46 | 
47 |     data = []
48 |     for file in files:
49 |         data.extend(json.load(open(file)))
50 |     print(f"Total number of data: ", len(data))
51 | 
52 |     id2data = {}
53 |     for item in data:
54 |         if item["id"] in id2data:
55 |             id2data[item["id"]] = merge_response(id2data[item["id"]], item)
56 |         else:
57 |             id2data[item["id"]] = item
58 |     print(f"Total number of data after merging: ", len(id2data))
59 | 
60 |     avg_resp_num = 0
61 |     for item in id2data.values():
62 |         avg_resp_num += len(item["response"])
63 |     avg_resp_num /= len(id2data)
64 |     print(f"Average number of responses: {avg_resp_num}")
65 | 
66 |     data = list(id2data.values())
67 |     json.dump(data, open(args.output_file, "w"))
68 | 
69 | 
70 | if __name__ == "__main__":
71 |     main()
72 | 


--------------------------------------------------------------------------------
/scripts/deepspeed/ds_full_checkpoint2hf.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import json
 3 | import os
 4 | from glob import glob
 5 | from pathlib import Path
 6 | 
 7 | import torch
 8 | import transformers
 9 | from accelerate import init_empty_weights
10 | from transformers import AutoModelForCausalLM
11 | 
12 | 
13 | def extract_weight(mp_states):
14 |     state_dicts = torch.load(mp_states, map_location="cpu")
15 |     state_dicts = state_dicts["module"]
16 |     return state_dicts
17 | 
18 | 
19 | def write_model(input_base_path, mp_states_name, config_dir):
20 |     config = transformers.AutoConfig.from_pretrained(config_dir)
21 |     with init_empty_weights():
22 |         model = AutoModelForCausalLM.from_config(config)
23 | 
24 |     if os.path.exists(input_base_path):
25 |         checkpoint_dirs = [input_base_path]
26 |     else:
27 |         checkpoint_dirs = glob(input_base_path, recursive=True)
28 |     print(f"Found checkpoints: {checkpoint_dirs}")
29 | 
30 |     for checkpoint_dir in checkpoint_dirs:
31 |         print(f"Writing checkpoint: {checkpoint_dir}")
32 |         states_file = os.path.join(checkpoint_dir, mp_states_name)
33 |         checkpoint_state_dict = extract_weight(states_file)
34 |         step = checkpoint_dir.split("global_step")[-1]
35 |         save_dir = os.path.join(os.path.dirname(checkpoint_dir), f"checkpoint-{step}")
36 |         print(f"Saving checkpoint to {save_dir}")
37 |         model.save_pretrained(save_dir, state_dict=checkpoint_state_dict, max_shard_size="3GB", safe_serialization=False)
38 | 
39 | 
40 | def main():
41 |     parser = argparse.ArgumentParser()
42 |     parser.add_argument(
43 |         "--input_dir",
44 |         help="Location of LLaMA weights, which contains tokenizer.model and model folders",
45 |     )
46 |     parser.add_argument("--mp_states_name", type=str, default="mp_rank_00_model_states.pt")
47 |     parser.add_argument(
48 |         "--config_dir",
49 |     )
50 |     args = parser.parse_args()
51 |     write_model(
52 |         input_base_path=args.input_dir,
53 |         mp_states_name=args.mp_states_name,
54 |         config_dir=args.config_dir,
55 |     )
56 | 
57 | 
58 | if __name__ == "__main__":
59 |     main()
60 | 


--------------------------------------------------------------------------------