├── src
    ├── exp
    │   ├── __init__.py
    │   ├── cal_ttest.py
    │   ├── analysis_sgm.py
    │   ├── run_exp_api.py
    │   ├── analysis_mmlt.py
    │   ├── run_exp.py
    │   ├── cal_mlt_scores.py
    │   ├── cal_elm_rmse.py
    │   └── cal_level_scores.py
    ├── data_process
    │   ├── __init__.py
    │   ├── raw_openhermes_process.py
    │   ├── build_arena_dataset.py
    │   ├── build_tlg_dataset.py
    │   └── build_training_dataset.py
    ├── finetuning
    │   ├── callback.py
    │   ├── dataset.py
    │   └── finetune.py
    └── utils
    │   ├── __init__.py
    │   ├── count.py
    │   ├── json_file.py
    │   ├── config.py
    │   └── templates.py
├── images
    ├── TLG.png
    ├── mmlt.png
    ├── sgm.png
    ├── method.png
    ├── TLG_ruler.png
    └── overall_performance.png
├── requirements.txt
├── scripts
    ├── Yi-1.5-6B
    │   ├── run_mmlt.sh
    │   ├── run_self_generated_mlt.sh
    │   ├── run_tlg.sh
    │   ├── ruler.sh
    │   ├── vanilla.sh
    │   ├── ruler_lm_eval.sh
    │   └── vanilla_lm_eval.sh
    ├── gemma-7b
    │   ├── run_mmlt.sh
    │   ├── run_self_generated_mlt.sh
    │   ├── run_tlg.sh
    │   ├── ruler.sh
    │   ├── vanilla.sh
    │   ├── ruler_lm_eval.sh
    │   └── vanilla_lm_eval.sh
    ├── Qwen1.5-7B
    │   ├── run_mmlt.sh
    │   ├── run_self_generated_mlt.sh
    │   ├── run_tlg.sh
    │   ├── ruler.sh
    │   ├── vanilla.sh
    │   ├── ruler_lm_eval.sh
    │   └── vanilla_lm_eval.sh
    ├── Meta-Llama-3-8B
    │   ├── run_mmlt.sh
    │   ├── run_self_generated_mlt.sh
    │   ├── run_tlg.sh
    │   ├── ruler.sh
    │   ├── vanilla.sh
    │   ├── ruler_lm_eval.sh
    │   └── vanilla_lm_eval.sh
    ├── Mistral-7B-v0.3
    │   ├── run_mmlt.sh
    │   ├── run_self_generated_mlt.sh
    │   ├── run_tlg.sh
    │   ├── ruler.sh
    │   ├── vanilla.sh
    │   ├── ruler_lm_eval.sh
    │   └── vanilla_lm_eval.sh
    ├── deepseek-llm-7b-base
    │   ├── run_mmlt.sh
    │   ├── run_self_generated_mlt.sh
    │   ├── run_tlg.sh
    │   ├── ruler.sh
    │   ├── vanilla.sh
    │   ├── ruler_lm_eval.sh
    │   └── vanilla_lm_eval.sh
    └── download.sh
├── configs
    ├── ds_config_zero2.json
    ├── ds_config_zero3.json
    └── ds_config_zero3_cpu_offload.json
├── LICENSE
├── .gitignore
└── README.md


/src/exp/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/src/data_process/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/src/finetuning/callback.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/src/utils/__init__.py:
--------------------------------------------------------------------------------
1 | from .count import *
2 | from .json_file import *


--------------------------------------------------------------------------------
/images/TLG.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Geaming2002/Ruler/HEAD/images/TLG.png


--------------------------------------------------------------------------------
/images/mmlt.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Geaming2002/Ruler/HEAD/images/mmlt.png


--------------------------------------------------------------------------------
/images/sgm.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Geaming2002/Ruler/HEAD/images/sgm.png


--------------------------------------------------------------------------------
/images/method.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Geaming2002/Ruler/HEAD/images/method.png


--------------------------------------------------------------------------------
/images/TLG_ruler.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Geaming2002/Ruler/HEAD/images/TLG_ruler.png


--------------------------------------------------------------------------------
/images/overall_performance.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Geaming2002/Ruler/HEAD/images/overall_performance.png


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | deepspeed==0.13.1
 2 | nlp==0.4.0
 3 | nltk==3.8.1
 4 | openai==1.50.0
 5 | pandas==2.2.3
 6 | python-dotenv==1.0.1
 7 | rich==13.8.1
 8 | scikit_learn==1.5.2
 9 | scipy==1.14.1
10 | shortuuid==1.0.13
11 | tiktoken==0.7.0
12 | torch==2.4.0
13 | tqdm==4.66.4
14 | transformers==4.44.2
15 | vllm==0.5.5
16 | 


--------------------------------------------------------------------------------
/scripts/Yi-1.5-6B/run_mmlt.sh:
--------------------------------------------------------------------------------
1 | python exp/run_exp.py\
2 |     --dataset_path ../datasets/multi_mlt.jsonl\
3 |     --gpus 1\
4 |     --template custom\
5 |     --model_name_or_path ../outputs/checkpoints/ruler_Yi-1.5-6B_bs_4_ga_8_lr_2e-5_eps_3/checkpoint-2841\
6 |     --output_path ../outputs/multi_mlt/mmlt_ruler_Yi-1.5-6B.jsonl
7 | 


--------------------------------------------------------------------------------
/scripts/gemma-7b/run_mmlt.sh:
--------------------------------------------------------------------------------
1 | python exp/run_exp.py\
2 |     --dataset_path ../datasets/multi_mlt.jsonl\
3 |     --gpus 1\
4 |     --template custom\
5 |     --model_name_or_path ../outputs/checkpoints/ruler_gemma-7b_bs_4_ga_8_lr_2e-5_eps_3/checkpoint-2841\
6 |     --output_path ../outputs/multi_mlt/mmlt_ruler_gemma-7b.jsonl
7 | 
8 | 


--------------------------------------------------------------------------------
/scripts/Qwen1.5-7B/run_mmlt.sh:
--------------------------------------------------------------------------------
1 | python exp/run_exp.py\
2 |     --dataset_path ../datasets/multi_mlt.jsonl\
3 |     --gpus 1\
4 |     --template custom\
5 |     --model_name_or_path ../outputs/checkpoints/ruler_Qwen1.5-7B_bs_4_ga_8_lr_2e-5_eps_3/checkpoint-2841\
6 |     --output_path ../outputs/multi_mlt/mmlt_ruler_Qwen1.5-7B.jsonl
7 | 
8 | 


--------------------------------------------------------------------------------
/scripts/Meta-Llama-3-8B/run_mmlt.sh:
--------------------------------------------------------------------------------
1 | python exp/run_exp.py\
2 |     --dataset_path ../datasets/multi_mlt.jsonl\
3 |     --gpus 1\
4 |     --template custom\
5 |     --model_name_or_path ../outputs/checkpoints/ruler_Meta-Llama-3-8B_bs_4_ga_8_lr_2e-5_eps_3/checkpoint-2841\
6 |     --output_path ../outputs/multi_mlt/mmlt_ruler_Meta-Llama-3-8B.jsonl
7 | 


--------------------------------------------------------------------------------
/scripts/Mistral-7B-v0.3/run_mmlt.sh:
--------------------------------------------------------------------------------
1 | python exp/run_exp.py\
2 |     --dataset_path ../datasets/multi_mlt.jsonl\
3 |     --gpus 1\
4 |     --template custom\
5 |     --model_name_or_path ../outputs/checkpoints/ruler_Mistral-7B-v0.3_bs_4_ga_8_lr_2e-5_eps_3/checkpoint-2841\
6 |     --output_path ../outputs/multi_mlt/mmlt_ruler_Mistral-7B-v0.3.jsonl
7 | 


--------------------------------------------------------------------------------
/scripts/Yi-1.5-6B/run_self_generated_mlt.sh:
--------------------------------------------------------------------------------
1 | python exp/run_exp.py\
2 |     --dataset_path ../datasets/self_generated_mlt.jsonl\
3 |     --gpus 1\
4 |     --template custom\
5 |     --model_name_or_path ../outputs/checkpoints/ruler_Yi-1.5-6B_bs_4_ga_8_lr_2e-5_eps_3/checkpoint-2841\
6 |     --output_path ../outputs/self_generated_mlt/sgm_ruler_Yi-1.5-6B.jsonl
7 | 


--------------------------------------------------------------------------------
/scripts/deepseek-llm-7b-base/run_mmlt.sh:
--------------------------------------------------------------------------------
1 | python exp/run_exp.py\
2 |     --dataset_path ../datasets/multi_mlt.jsonl\
3 |     --gpus 1\
4 |     --template custom\
5 |     --model_name_or_path ../outputs/checkpoints/ruler_deepseek-llm-7b-base_bs_4_ga_8_lr_2e-5_eps_3/checkpoint-2841\
6 |     --output_path ../outputs/multi_mlt/mmlt_ruler_deepseek-llm-7b-base.jsonl
7 | 


--------------------------------------------------------------------------------
/scripts/gemma-7b/run_self_generated_mlt.sh:
--------------------------------------------------------------------------------
1 | python exp/run_exp.py\
2 |     --dataset_path ../datasets/self_generated_mlt.jsonl\
3 |     --gpus 1\
4 |     --template custom\
5 |     --model_name_or_path ../outputs/checkpoints/ruler_gemma-7b_bs_4_ga_8_lr_2e-5_eps_3/checkpoint-2841\
6 |     --output_path ../outputs/self_generated_mlt/sgm_ruler_gemma-7b.jsonl
7 | 
8 | 


--------------------------------------------------------------------------------
/scripts/Qwen1.5-7B/run_self_generated_mlt.sh:
--------------------------------------------------------------------------------
1 | python exp/run_exp.py\
2 |     --dataset_path ../datasets/self_generated_mlt.jsonl\
3 |     --gpus 1\
4 |     --template custom\
5 |     --model_name_or_path ../outputs/checkpoints/ruler_Qwen1.5-7B_bs_4_ga_8_lr_2e-5_eps_3/checkpoint-2841\
6 |     --output_path ../outputs/self_generated_mlt/sgm_ruler_Qwen1.5-7B.jsonl
7 | 
8 | 


--------------------------------------------------------------------------------
/scripts/Meta-Llama-3-8B/run_self_generated_mlt.sh:
--------------------------------------------------------------------------------
1 | python exp/run_exp.py\
2 |     --dataset_path ../datasets/self_generated_mlt.jsonl\
3 |     --gpus 1\
4 |     --template custom\
5 |     --model_name_or_path ../outputs/checkpoints/ruler_Meta-Llama-3-8B_bs_4_ga_8_lr_2e-5_eps_3/checkpoint-2841\
6 |     --output_path ../outputs/self_generated_mlt/sgm_ruler_Meta-Llama-3-8B.jsonl
7 | 


--------------------------------------------------------------------------------
/scripts/Mistral-7B-v0.3/run_self_generated_mlt.sh:
--------------------------------------------------------------------------------
1 | python exp/run_exp.py\
2 |     --dataset_path ../datasets/self_generated_mlt.jsonl\
3 |     --gpus 1\
4 |     --template custom\
5 |     --model_name_or_path ../outputs/checkpoints/ruler_Mistral-7B-v0.3_bs_4_ga_8_lr_2e-5_eps_3/checkpoint-2841\
6 |     --output_path ../outputs/self_generated_mlt/sgm_ruler_Mistral-7B-v0.3.jsonl
7 | 


--------------------------------------------------------------------------------
/scripts/deepseek-llm-7b-base/run_self_generated_mlt.sh:
--------------------------------------------------------------------------------
1 | python exp/run_exp.py\
2 |     --dataset_path ../datasets/self_generated_mlt.jsonl\
3 |     --gpus 1\
4 |     --template custom\
5 |     --model_name_or_path ../outputs/checkpoints/ruler_deepseek-llm-7b-base_bs_4_ga_8_lr_2e-5_eps_3/checkpoint-2841\
6 |     --output_path ../outputs/self_generated_mlt/sgm_ruler_deepseek-llm-7b-base.jsonl
7 | 


--------------------------------------------------------------------------------
/src/utils/count.py:
--------------------------------------------------------------------------------
 1 | import nltk
 2 | 
 3 | def count_words(text):
 4 |     """Counts the number of words."""
 5 |     tokenizer = nltk.tokenize.RegexpTokenizer(r"\w+")
 6 |     tokens = tokenizer.tokenize(text)
 7 |     num_words = len(tokens)
 8 |     # print(tokens)
 9 |     return num_words
10 | 
11 | 
12 | def count_tokens(tokenizer, text):
13 |     inputs = tokenizer.encode(text, return_tensors="pt")
14 |     return inputs.shape[1]
15 | 
16 | if __name__ == "__main__":
17 |     pass


--------------------------------------------------------------------------------
/src/utils/json_file.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | 
 3 | 
 4 | def load_json(path):
 5 |     with open(path, "r") as file:
 6 |         return json.load(file)
 7 | 
 8 | 
 9 | def load_jsonl(path):
10 |     data = []
11 |     with open(path, "r") as file:
12 |         for line in file:
13 |             json_data = json.loads(line)
14 |             data.append(json_data)
15 |     return data
16 | 
17 | 
18 | def save_jsonl(path, data):
19 |     with open(path, "w", encoding="utf-8") as file:
20 |         for item in data:
21 |             json_string = json.dumps(item, ensure_ascii=False)
22 |             file.write(json_string + "\n")
23 | 


--------------------------------------------------------------------------------
/scripts/gemma-7b/run_tlg.sh:
--------------------------------------------------------------------------------
 1 | python exp/run_exp.py\
 2 |     --dataset_path ../datasets/tlg_dataset.jsonl\
 3 |     --gpus 1\
 4 |     --template custom\
 5 |     --model_name_or_path ../outputs/checkpoints/ruler_gemma-7b_bs_4_ga_8_lr_2e-5_eps_3/checkpoint-2841\
 6 |     --output_path ../outputs/tlg/tlg_ot_ruler_gemma-7b.jsonl
 7 | 
 8 | python exp/run_exp.py\
 9 |     --dataset_path ../datasets/tlg_dataset.jsonl\
10 |     --gpus 1\
11 |     --template custom\
12 |     --model_name_or_path ../outputs/checkpoints/vanilla_gemma-7b_bs_4_ga_8_lr_2e-5_eps_3/checkpoint-2841\
13 |     --output_path ../outputs/tlg/tlg_ot_vanilla_gemma-7b.jsonl
14 | 


--------------------------------------------------------------------------------
/scripts/Yi-1.5-6B/run_tlg.sh:
--------------------------------------------------------------------------------
 1 | python exp/run_exp.py\
 2 |     --dataset_path ../datasets/tlg_dataset.jsonl\
 3 |     --gpus 1\
 4 |     --template custom\
 5 |     --model_name_or_path ../outputs/checkpoints/ruler_Yi-1.5-6B_bs_4_ga_8_lr_2e-5_eps_3/checkpoint-2841\
 6 |     --output_path ../outputs/tlg/tlg_ot_ruler_Yi-1.5-6B.jsonl
 7 | 
 8 | python exp/run_exp.py\
 9 |     --dataset_path ../datasets/tlg_dataset.jsonl\
10 |     --gpus 1\
11 |     --template custom\
12 |     --model_name_or_path ../outputs/checkpoints/vanilla_Yi-1.5-6B_bs_4_ga_8_lr_2e-5_eps_3/checkpoint-2841\
13 |     --output_path ../outputs/tlg/tlg_ot_vanilla_Yi-1.5-6B.jsonl
14 | 


--------------------------------------------------------------------------------
/scripts/Qwen1.5-7B/run_tlg.sh:
--------------------------------------------------------------------------------
 1 | python exp/run_exp.py\
 2 |     --dataset_path ../datasets/tlg_dataset.jsonl\
 3 |     --gpus 1\
 4 |     --template custom\
 5 |     --model_name_or_path ../outputs/checkpoints/ruler_Qwen1.5-7B_bs_4_ga_8_lr_2e-5_eps_3/checkpoint-2841\
 6 |     --output_path ../outputs/tlg/tlg_ot_ruler_Qwen1.5-7B.jsonl
 7 | 
 8 | python exp/run_exp.py\
 9 |     --dataset_path ../datasets/tlg_dataset.jsonl\
10 |     --gpus 1\
11 |     --template custom\
12 |     --model_name_or_path ../outputs/checkpoints/vanilla_Qwen1.5-7B_bs_4_ga_8_lr_2e-5_eps_3/checkpoint-2841\
13 |     --output_path ../outputs/tlg/tlg_ot_vanilla_Qwen1.5-7B.jsonl
14 | 


--------------------------------------------------------------------------------
/scripts/download.sh:
--------------------------------------------------------------------------------
 1 | # datasets
 2 | mkdir -p datasets
 3 | mkdir -p datasets/LongForm
 4 | mkdir -p datasets/OpenHermes
 5 | # logs
 6 | mkdir -p logs
 7 | # outputs
 8 | mkdir -p outputs
 9 | mkdir -p outputs/checkpoints
10 | mkdir -p outputs/multi_mlt
11 | mkdir -p outputs/other_tasks
12 | mkdir -p outputs/self_generated_mlt
13 | mkdir -p outputs/tlg
14 | 
15 | # download longform
16 | huggingface-cli download --repo-type dataset --resume-download akoksal/LongForm --local-dir ../datasets/LongForm
17 | # download openhermes
18 | huggingface-cli download --repo-type dataset --resume-download teknium/OpenHermes-2.5 --local-dir ../datasets/OpenHermes
19 | 


--------------------------------------------------------------------------------
/scripts/Meta-Llama-3-8B/run_tlg.sh:
--------------------------------------------------------------------------------
 1 | python exp/run_exp.py\
 2 |     --dataset_path ../datasets/tlg_dataset.jsonl\
 3 |     --gpus 1\
 4 |     --template custom\
 5 |     --model_name_or_path ../outputs/checkpoints/ruler_Meta-Llama-3-8B_bs_4_ga_8_lr_2e-5_eps_3/checkpoint-2841\
 6 |     --output_path ../outputs/tlg/tlg_ot_ruler_Meta-Llama-3-8B.jsonl
 7 | 
 8 | python exp/run_exp.py\
 9 |     --dataset_path ../datasets/tlg_dataset.jsonl\
10 |     --gpus 1\
11 |     --template custom\
12 |     --model_name_or_path ../outputs/checkpoints/vanilla_Meta-Llama-3-8B_bs_4_ga_8_lr_2e-5_eps_3/checkpoint-2841\
13 |     --output_path ../outputs/tlg/tlg_ot_vanilla_Meta-Llama-3-8B.jsonl
14 | 


--------------------------------------------------------------------------------
/scripts/Mistral-7B-v0.3/run_tlg.sh:
--------------------------------------------------------------------------------
 1 | python exp/run_exp.py\
 2 |     --dataset_path ../datasets/tlg_dataset.jsonl\
 3 |     --gpus 1\
 4 |     --template custom\
 5 |     --model_name_or_path ../outputs/checkpoints/ruler_Mistral-7B-v0.3_bs_4_ga_8_lr_2e-5_eps_3/checkpoint-2841\
 6 |     --output_path ../outputs/tlg/tlg_ot_ruler_Mistral-7B-v0.3.jsonl
 7 | 
 8 | python exp/run_exp.py\
 9 |     --dataset_path ../datasets/tlg_dataset.jsonl\
10 |     --gpus 1\
11 |     --template custom\
12 |     --model_name_or_path ../outputs/checkpoints/vanilla_Mistral-7B-v0.3_bs_4_ga_8_lr_2e-5_eps_3/checkpoint-2841\
13 |     --output_path ../outputs/tlg/tlg_ot_vanilla_Mistral-7B-v0.3.jsonl
14 | 


--------------------------------------------------------------------------------
/scripts/deepseek-llm-7b-base/run_tlg.sh:
--------------------------------------------------------------------------------
 1 | python exp/run_exp.py\
 2 |     --dataset_path ../datasets/tlg_dataset.jsonl\
 3 |     --gpus 1\
 4 |     --template custom\
 5 |     --model_name_or_path ../outputs/checkpoints/ruler_deepseek-llm-7b-base_bs_4_ga_8_lr_2e-5_eps_3/checkpoint-2841\
 6 |     --output_path ../outputs/tlg/tlg_ot_ruler_deepseek-llm-7b-base.jsonl
 7 | 
 8 | python exp/run_exp.py\
 9 |     --dataset_path ../datasets/tlg_dataset.jsonl\
10 |     --gpus 1\
11 |     --template custom\
12 |     --model_name_or_path ../outputs/checkpoints/vanilla_deepseek-llm-7b-base_bs_4_ga_8_lr_2e-5_eps_3/checkpoint-2841\
13 |     --output_path ../outputs/tlg/tlg_ot_vanilla_deepseek-llm-7b-base.jsonl
14 | 


--------------------------------------------------------------------------------
/configs/ds_config_zero2.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "fp16": {
 3 |         "enabled": "auto",
 4 |         "loss_scale": 0,
 5 |         "loss_scale_window": 1000,
 6 |         "initial_scale_power": 16,
 7 |         "hysteresis": 2,
 8 |         "min_loss_scale": 1
 9 |     },
10 |     "bf16": {
11 |         "enabled": "auto"
12 |     },
13 |     "train_micro_batch_size_per_gpu": "auto",
14 |     "train_batch_size": "auto",
15 |     "gradient_accumulation_steps": "auto",
16 |     "zero_optimization": {
17 |         "stage": 2,
18 |         "overlap_comm": true,
19 |         "contiguous_gradients": true,
20 |         "sub_group_size": 1e9,
21 |         "reduce_bucket_size": "auto"
22 |     }
23 | }


--------------------------------------------------------------------------------
/src/data_process/raw_openhermes_process.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | from utils import load_json, save_jsonl
 3 | 
 4 | 
 5 | def main(args):
 6 |     # raw data load
 7 |     df = load_json(args.dataset_path)
 8 |     data = []
 9 |     for d in df:
10 |         if len(d["conversations"]) == 2:
11 |             data.append(d)
12 |     # save to output_path
13 |     save_jsonl(args.output_path, data)
14 | 
15 | 
16 | if __name__ == "__main__":
17 |     parser = argparse.ArgumentParser()
18 |     parser.add_argument("--dataset_path", type=str, default=None)
19 |     parser.add_argument("--model_name_or_path", type=str, default=None)
20 |     parser.add_argument("--output_path", type=str, default=None)
21 |     args = parser.parse_args()
22 |     main(args)
23 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2024 Geaming
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/src/data_process/build_arena_dataset.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import nlp
 3 | import random
 4 | import argparse
 5 | import pandas as pd
 6 | from utils import load_jsonl, save_jsonl
 7 | from utils.config import TARGET_LENGTH
 8 | 
 9 | 
10 | def main(args):
11 |     df = load_jsonl(args.dataset_path)
12 |     data = []
13 |     id = 0
14 |     for d in df:
15 |         data.append({"id": id, "Instruction": d["turns"][0]["content"]})
16 |         id += 1
17 |     if args.num is not None:
18 |         random.seed(args.random_seed)
19 |         random.shuffle(data)
20 |         data = data[: args.num]
21 |         data = [
22 |             {**d, "TargetLength": tl} for d in data for tl in TARGET_LENGTH
23 |         ]
24 |     # save to output_path
25 |     save_jsonl(args.output_path, data)
26 | 
27 | 
28 | if __name__ == "__main__":
29 |     parser = argparse.ArgumentParser()
30 |     parser.add_argument("--dataset_path", type=str, default=None)
31 |     parser.add_argument("--num", type=int, default=None)
32 |     parser.add_argument("--random_seed", type=int, default=10)
33 |     parser.add_argument("--output_path", type=str, default=None)
34 |     args = parser.parse_args()
35 |     main(args)
36 | 


--------------------------------------------------------------------------------
/src/data_process/build_tlg_dataset.py:
--------------------------------------------------------------------------------
 1 | import random
 2 | import argparse
 3 | from utils import load_jsonl, save_jsonl
 4 | from utils.config import TARGET_LENGTH
 5 | 
 6 | 
 7 | def main(args):
 8 |     # random seed
 9 |     random.seed(args.random_seed)
10 |     # raw data load
11 |     df = load_jsonl(args.dataset_path)
12 |     # random sample
13 |     random.shuffle(df)
14 |     df = df[: args.num]
15 |     # add target length
16 |     data = []
17 |     target_lengths = [random.choice(TARGET_LENGTH) for _ in range(args.num)]
18 |     for idx in range(len(df)):
19 |         d = {}
20 |         d['id'] = idx
21 |         d["Instruction"] = df[idx]["conversations"][0]["value"]
22 |         d["TargetLength"] = target_lengths[idx]
23 |         data.append(d)
24 |     # save to output_path
25 |     save_jsonl(args.output_path, data)
26 | 
27 | 
28 | if __name__ == "__main__":
29 |     parser = argparse.ArgumentParser()
30 |     parser.add_argument("--dataset_path", type=str, default=None)
31 |     parser.add_argument("--num", type=int, default=None)
32 |     parser.add_argument("--random_seed", type=int, default=10)
33 |     parser.add_argument("--output_path", type=str, default=None)
34 |     args = parser.parse_args()
35 |     main(args)
36 | 


--------------------------------------------------------------------------------
/src/exp/cal_ttest.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | from utils import load_jsonl
 3 | from rich.table import Table
 4 | from rich.console import Console
 5 | from utils.count import count_words
 6 | import scipy.stats as stats
 7 | 
 8 | 
 9 | def main(args):
10 |     # raw data load
11 |     vanilla_df = load_jsonl(args.vanilla_dataset_path)
12 |     ruler_dataset_path = args.vanilla_dataset_path.replace("tlg_", "tlg_Ruler_")
13 |     ruler_df = load_jsonl(ruler_dataset_path)
14 |     print(ruler_dataset_path)
15 |     vanilla_lengths, ruler_lengths = [], []
16 |     for idx in range(len(vanilla_df)):
17 |         vanilla_lengths.append(count_words(vanilla_df[idx]["output"]))
18 |         ruler_lengths.append(count_words(ruler_df[idx]["output"]))
19 |     table = Table(show_header=True, header_style="bold magenta")
20 |     table.add_column("Model", style="dim", width=12)
21 |     table.add_column("t", justify="right")
22 |     table.add_column("p", justify="right")
23 |     t_statistic, p_value = stats.ttest_ind(ruler_lengths,vanilla_lengths)
24 |     table.add_row(
25 |         args.vanilla_dataset_path.split("/")[-1][4:],
26 |         f"{t_statistic:.4f}",
27 |         f"{p_value:.4f}",
28 |     )
29 |     console = Console()
30 |     console.print(table)
31 |     # print(f"{t_statistic:.4f}|{p_value:.4f}|")
32 | 
33 | if __name__ == "__main__":
34 |     parser = argparse.ArgumentParser()
35 |     parser.add_argument("--vanilla_dataset_path", type=str, default=None)
36 |     args = parser.parse_args()
37 |     main(args)
38 | 


--------------------------------------------------------------------------------
/configs/ds_config_zero3.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "bf16": {
 3 |         "enabled": "auto"
 4 |     },
 5 |     "optimizer": {
 6 |         "type": "AdamW",
 7 |         "params": {
 8 |             "lr": "auto",
 9 |             "betas": "auto",
10 |             "eps": "auto",
11 |             "weight_decay": "auto"
12 |         }
13 |     },
14 | 
15 |     "scheduler": {
16 |         "type": "WarmupLR",
17 |         "params": {
18 |             "warmup_min_lr": "auto",
19 |             "warmup_max_lr": "auto",
20 |             "warmup_num_steps": "auto"
21 |         }
22 |     },
23 | 
24 |     "zero_optimization": {
25 |         "stage": 3,
26 |         "offload_optimizer": {
27 |             "device": "none",
28 |             "pin_memory": true
29 |         },
30 |         "offload_param": {
31 |             "device": "none",
32 |             "pin_memory": true
33 |         },
34 |         "overlap_comm": true,
35 |         "contiguous_gradients": true,
36 |         "sub_group_size": 1e9,
37 |         "reduce_bucket_size": "auto",
38 |         "stage3_prefetch_bucket_size": "auto",
39 |         "stage3_param_persistence_threshold": "auto",
40 |         "stage3_max_live_parameters": 1e9,
41 |         "stage3_max_reuse_distance": 1e9,
42 |         "stage3_gather_16bit_weights_on_model_save": true
43 |     },
44 | 
45 |     "gradient_accumulation_steps": "auto",
46 |     "gradient_clipping": "auto",
47 |     "steps_per_print": 20,
48 |     "train_batch_size": "auto",
49 |     "train_micro_batch_size_per_gpu": "auto",
50 |     "wall_clock_breakdown": false
51 | }


--------------------------------------------------------------------------------
/configs/ds_config_zero3_cpu_offload.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "bf16": {
 3 |         "enabled": "auto"
 4 |     },
 5 |     "optimizer": {
 6 |         "type": "AdamW",
 7 |         "params": {
 8 |             "lr": "auto",
 9 |             "betas": "auto",
10 |             "eps": "auto",
11 |             "weight_decay": "auto"
12 |         }
13 |     },
14 | 
15 |     "scheduler": {
16 |         "type": "WarmupLR",
17 |         "params": {
18 |             "warmup_min_lr": "auto",
19 |             "warmup_max_lr": "auto",
20 |             "warmup_num_steps": "auto"
21 |         }
22 |     },
23 | 
24 |     "zero_optimization": {
25 |         "stage": 3,
26 |         "offload_optimizer": {
27 |             "device": "cpu",
28 |             "pin_memory": true
29 |         },
30 |         "offload_param": {
31 |             "device": "cpu",
32 |             "pin_memory": true
33 |         },
34 |         "overlap_comm": true,
35 |         "contiguous_gradients": true,
36 |         "sub_group_size": 1e9,
37 |         "reduce_bucket_size": "auto",
38 |         "stage3_prefetch_bucket_size": "auto",
39 |         "stage3_param_persistence_threshold": "auto",
40 |         "stage3_max_live_parameters": 1e9,
41 |         "stage3_max_reuse_distance": 1e9,
42 |         "stage3_gather_16bit_weights_on_model_save": true
43 |     },
44 | 
45 |     "gradient_accumulation_steps": "auto",
46 |     "gradient_clipping": "auto",
47 |     "steps_per_print": 20,
48 |     "train_batch_size": "auto",
49 |     "train_micro_batch_size_per_gpu": "auto",
50 |     "wall_clock_breakdown": false
51 | }


--------------------------------------------------------------------------------
/scripts/Meta-Llama-3-8B/ruler.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | export CUDA_VISIBLE_DEVICES=4,5,6,7
 3 | 
 4 | export MASTER_PORT=$(echo $METIS_WORKER_0_PORT | cut -d',' -f1)
 5 | 
 6 | LEARNING_RATE=2e-5
 7 | NUM_TRAIN_EPOCHS=3
 8 | VANILLA=False
 9 | 
10 | MODEL_NAME_OR_PATH=/data1/HF-Models/meta-llama/Meta-Llama-3-8B
11 | echo "Finetune from: ${MODEL_NAME_OR_PATH}"
12 | MODEL=${MODEL_NAME_OR_PATH##*/}
13 | 
14 | TEMPLATE=custom
15 | echo "Finetune data template: ${TEMPLATE}"
16 | 
17 | DATA_PATH=../datasets/ruler_training_dataset.jsonl
18 | echo "Finetune data path: ${DATA_PATH}"
19 | 
20 | MODEL_MAX_LENGTH=2048
21 | echo "Model max length: ${MODEL_MAX_LENGTH}"
22 | 
23 | BATCH_SIZE=4
24 | echo "Per device train batch size: ${BATCH_SIZE}"
25 | 
26 | GRAD_ACCUM=8
27 | echo "Gradient accumulation steps: ${GRAD_ACCUM}"
28 | 
29 | OUTPUT_DIR="../outputs/checkpoints/ruler_${MODEL}_bs_${BATCH_SIZE}_ga_${GRAD_ACCUM}_lr_${LEARNING_RATE}_eps_${NUM_TRAIN_EPOCHS}"
30 | LOG_DIR=../logs
31 | 
32 | deepspeed finetuning/finetune.py \
33 |     --vanilla $VANILLA \
34 |     --deepspeed ../configs/ds_config_zero3.json \
35 |     --model_name_or_path $MODEL_NAME_OR_PATH \
36 |     --template $TEMPLATE\
37 |     --model_max_length $MODEL_MAX_LENGTH \
38 |     --data_path $DATA_PATH \
39 |     --output_dir $OUTPUT_DIR \
40 |     --bf16 True \
41 |     --tf32 True \
42 |     --per_device_train_batch_size ${BATCH_SIZE} \
43 |     --gradient_accumulation_steps ${GRAD_ACCUM} \
44 |     --gradient_checkpointing True \
45 |     --lr_scheduler_type cosine \
46 |     --learning_rate ${LEARNING_RATE} \
47 |     --warmup_ratio 0.05 \
48 |     --num_train_epochs ${NUM_TRAIN_EPOCHS} \
49 |     --evaluation_strategy no \
50 |     --save_strategy epoch \
51 |     --save_total_limit 1 \
52 |     --logging_steps 5 \
53 |     2>&1 | tee ${LOG_DIR}/output_ruler_${MODEL}.log


--------------------------------------------------------------------------------
/scripts/Mistral-7B-v0.3/ruler.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | export CUDA_VISIBLE_DEVICES=4,5,6,7
 3 | 
 4 | export MASTER_PORT=$(echo $METIS_WORKER_0_PORT | cut -d',' -f1)
 5 | 
 6 | LEARNING_RATE=2e-5
 7 | NUM_TRAIN_EPOCHS=3
 8 | VANILLA=False
 9 | 
10 | MODEL_NAME_OR_PATH=/data1/HF-Models/mistralai/Mistral-7B-v0.3
11 | echo "Finetune from: ${MODEL_NAME_OR_PATH}"
12 | MODEL=${MODEL_NAME_OR_PATH##*/}
13 | 
14 | TEMPLATE=custom
15 | echo "Finetune data template: ${TEMPLATE}"
16 | 
17 | DATA_PATH=../datasets/ruler_training_dataset.jsonl
18 | echo "Finetune data path: ${DATA_PATH}"
19 | 
20 | MODEL_MAX_LENGTH=2048
21 | echo "Model max length: ${MODEL_MAX_LENGTH}"
22 | 
23 | BATCH_SIZE=4
24 | echo "Per device train batch size: ${BATCH_SIZE}"
25 | 
26 | GRAD_ACCUM=8
27 | echo "Gradient accumulation steps: ${GRAD_ACCUM}"
28 | 
29 | OUTPUT_DIR="../outputs/checkpoints/ruler_${MODEL}_bs_${BATCH_SIZE}_ga_${GRAD_ACCUM}_lr_${LEARNING_RATE}_eps_${NUM_TRAIN_EPOCHS}"
30 | LOG_DIR=../logs
31 | 
32 | deepspeed finetuning/finetune.py \
33 |     --vanilla $VANILLA \
34 |     --deepspeed ../configs/ds_config_zero3.json \
35 |     --model_name_or_path $MODEL_NAME_OR_PATH \
36 |     --template $TEMPLATE\
37 |     --model_max_length $MODEL_MAX_LENGTH \
38 |     --data_path $DATA_PATH \
39 |     --output_dir $OUTPUT_DIR \
40 |     --bf16 True \
41 |     --tf32 True \
42 |     --per_device_train_batch_size ${BATCH_SIZE} \
43 |     --gradient_accumulation_steps ${GRAD_ACCUM} \
44 |     --gradient_checkpointing True \
45 |     --lr_scheduler_type cosine \
46 |     --learning_rate ${LEARNING_RATE} \
47 |     --warmup_ratio 0.05 \
48 |     --num_train_epochs ${NUM_TRAIN_EPOCHS} \
49 |     --evaluation_strategy no \
50 |     --save_strategy epoch \
51 |     --save_total_limit 1 \
52 |     --logging_steps 5 \
53 |     2>&1 | tee ${LOG_DIR}/output_ruler_${MODEL}.log


--------------------------------------------------------------------------------
/scripts/Mistral-7B-v0.3/vanilla.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | export CUDA_VISIBLE_DEVICES=4,5,6,7
 3 | 
 4 | export MASTER_PORT=$(echo $METIS_WORKER_0_PORT | cut -d',' -f1)
 5 | 
 6 | LEARNING_RATE=2e-5
 7 | NUM_TRAIN_EPOCHS=3
 8 | VANILLA=True
 9 | 
10 | MODEL_NAME_OR_PATH=/data1/HF-Models/mistralai/Mistral-7B-v0.3
11 | echo "Finetune from: ${MODEL_NAME_OR_PATH}"
12 | MODEL=${MODEL_NAME_OR_PATH##*/}
13 | 
14 | TEMPLATE=custom
15 | echo "Finetune data template: ${TEMPLATE}"
16 | 
17 | DATA_PATH=../datasets/ruler_training_dataset.jsonl
18 | echo "Finetune data path: ${DATA_PATH}"
19 | 
20 | MODEL_MAX_LENGTH=2048
21 | echo "Model max length: ${MODEL_MAX_LENGTH}"
22 | 
23 | BATCH_SIZE=4
24 | echo "Per device train batch size: ${BATCH_SIZE}"
25 | 
26 | GRAD_ACCUM=8
27 | echo "Gradient accumulation steps: ${GRAD_ACCUM}"
28 | 
29 | OUTPUT_DIR="../outputs/checkpoints/vanilla_${MODEL}_bs_${BATCH_SIZE}_ga_${GRAD_ACCUM}_lr_${LEARNING_RATE}_eps_${NUM_TRAIN_EPOCHS}"
30 | LOG_DIR=../logs
31 | 
32 | deepspeed finetuning/finetune.py \
33 |     --vanilla $VANILLA \
34 |     --deepspeed ../configs/ds_config_zero3.json \
35 |     --model_name_or_path $MODEL_NAME_OR_PATH \
36 |     --template $TEMPLATE\
37 |     --model_max_length $MODEL_MAX_LENGTH \
38 |     --data_path $DATA_PATH \
39 |     --output_dir $OUTPUT_DIR \
40 |     --bf16 True \
41 |     --tf32 True \
42 |     --per_device_train_batch_size ${BATCH_SIZE} \
43 |     --gradient_accumulation_steps ${GRAD_ACCUM} \
44 |     --gradient_checkpointing True \
45 |     --lr_scheduler_type cosine \
46 |     --learning_rate ${LEARNING_RATE} \
47 |     --warmup_ratio 0.05 \
48 |     --num_train_epochs ${NUM_TRAIN_EPOCHS} \
49 |     --evaluation_strategy no \
50 |     --save_strategy epoch \
51 |     --save_total_limit 1 \
52 |     --logging_steps 5 \
53 |     2>&1 | tee ${LOG_DIR}/output_vanilla_${MODEL}.log


--------------------------------------------------------------------------------
/scripts/Meta-Llama-3-8B/vanilla.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | export CUDA_VISIBLE_DEVICES=4,5,6,7
 3 | 
 4 | export MASTER_PORT=$(echo $METIS_WORKER_0_PORT | cut -d',' -f1)
 5 | 
 6 | LEARNING_RATE=2e-5
 7 | NUM_TRAIN_EPOCHS=3
 8 | VANILLA=True
 9 | 
10 | MODEL_NAME_OR_PATH=/data1/HF-Models/meta-llama/Meta-Llama-3-8B
11 | echo "Finetune from: ${MODEL_NAME_OR_PATH}"
12 | MODEL=${MODEL_NAME_OR_PATH##*/}
13 | 
14 | TEMPLATE=custom
15 | echo "Finetune data template: ${TEMPLATE}"
16 | 
17 | DATA_PATH=../datasets/ruler_training_dataset.jsonl
18 | echo "Finetune data path: ${DATA_PATH}"
19 | 
20 | MODEL_MAX_LENGTH=2048
21 | echo "Model max length: ${MODEL_MAX_LENGTH}"
22 | 
23 | BATCH_SIZE=4
24 | echo "Per device train batch size: ${BATCH_SIZE}"
25 | 
26 | GRAD_ACCUM=8
27 | echo "Gradient accumulation steps: ${GRAD_ACCUM}"
28 | 
29 | OUTPUT_DIR="../outputs/checkpoints/vanilla_${MODEL}_bs_${BATCH_SIZE}_ga_${GRAD_ACCUM}_lr_${LEARNING_RATE}_eps_${NUM_TRAIN_EPOCHS}"
30 | LOG_DIR=../logs
31 | 
32 | deepspeed finetuning/finetune.py \
33 |     --vanilla $VANILLA \
34 |     --deepspeed ../configs/ds_config_zero3.json \
35 |     --model_name_or_path $MODEL_NAME_OR_PATH \
36 |     --template $TEMPLATE\
37 |     --model_max_length $MODEL_MAX_LENGTH \
38 |     --data_path $DATA_PATH \
39 |     --output_dir $OUTPUT_DIR \
40 |     --bf16 True \
41 |     --tf32 True \
42 |     --per_device_train_batch_size ${BATCH_SIZE} \
43 |     --gradient_accumulation_steps ${GRAD_ACCUM} \
44 |     --gradient_checkpointing True \
45 |     --lr_scheduler_type cosine \
46 |     --learning_rate ${LEARNING_RATE} \
47 |     --warmup_ratio 0.05 \
48 |     --num_train_epochs ${NUM_TRAIN_EPOCHS} \
49 |     --evaluation_strategy no \
50 |     --save_strategy epoch \
51 |     --save_total_limit 1 \
52 |     --logging_steps 5 \
53 |     2>&1 | tee ${LOG_DIR}/output_vanilla_${MODEL}.log
54 | 


--------------------------------------------------------------------------------
/src/utils/config.py:
--------------------------------------------------------------------------------
 1 | from collections import OrderedDict
 2 | 
 3 | 
 4 | class Inf:
 5 |     def __gt__(self, other):
 6 |         return True
 7 | 
 8 |     def __ge__(self, other):
 9 |         return True
10 | 
11 |     def __lt__(self, other):
12 |         return False
13 | 
14 |     def __eq__(self, other):
15 |         return isinstance(other, Inf)
16 | 
17 |     def __repr__(self):
18 |         return "Inf"
19 | 
20 | 
21 | inf = Inf()
22 | 
23 | 
24 | # FLCG EXP
25 | LEVEL0 = ["10", "30", "50", "80"]
26 | LEVEL1 = ["150", "300", "500"]
27 | LEVEL2 = ["700", ">800"]
28 | RANGE = OrderedDict(
29 |     {
30 |         # level:0
31 |         "10": {"PM": [0, 20], "FM": [0, 20]},
32 |         "30": {"PM": [20, 40], "FM": [20, 40]},
33 |         "50": {"PM": [40, 60], "FM": [40, 60]},
34 |         "80": {"PM": [70, 90], "FM": [60, 100]},
35 |         # level:1
36 |         "150": {"PM": [130, 170], "FM": [100, 200]},
37 |         "300": {"PM": [280, 320], "FM": [200, 400]},
38 |         "500": {"PM": [450, 550], "FM": [400, 600]},
39 |         # level:2
40 |         "700": {"PM": [630, 770], "FM": [600, 800]},
41 |         ">800": {"PM": [800, inf], "FM": [800, inf]},
42 |     }
43 | )
44 | 
45 | TARGET_LENGTH = list(RANGE.keys())
46 | 
47 | MetaLengthToken = [
48 |     ["[MLT:10]", [5, 15]],
49 |     ["[MLT:30]", [25, 35]],
50 |     ["[MLT:50]", [45, 55]],
51 |     ["[MLT:80]", [75, 85]],
52 |     ["[MLT:150]", [135, 155]],
53 |     ["[MLT:300]", [295, 305]],
54 |     ["[MLT:500]", [495, 505]],
55 |     ["[MLT:700]", [695, 705]],
56 |     ["[MLT:>800]", [800, inf]],
57 | ]
58 | 
59 | # MLT training dataset
60 | SAMPLE = {
61 |     "[MLT:10]": 10000 * 2,
62 |     "[MLT:30]": 10000 * 2,
63 |     "[MLT:50]": 10000 * 2,
64 |     "[MLT:80]": 10000 * 2,
65 |     "[MLT:150]": 10000 * 2,
66 |     "[MLT:300]": 10000 * 2,
67 |     "[MLT:500]": 10000 * 2,
68 |     "[MLT:700]": 10000 * 2,
69 |     "[MLT:>800]": 10000 * 2,
70 | }
71 | 


--------------------------------------------------------------------------------
/scripts/Qwen1.5-7B/ruler.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | export CUDA_VISIBLE_DEVICES=0,1,2,3
 3 | 
 4 | find_free_port() {
 5 |     while :
 6 |     do
 7 |         PORT=$(( ( RANDOM % 64512 ) + 1024 ))
 8 |         (echo >/dev/tcp/localhost/$PORT) >/dev/null 2>&1
 9 |         if [ $? -ne 0 ]; then
10 |             echo $PORT
11 |             return
12 |         fi
13 |     done
14 | }
15 | 
16 | export MASTER_PORT=$(find_free_port)
17 | 
18 | LEARNING_RATE=2e-5
19 | NUM_TRAIN_EPOCHS=3
20 | VANILLA=False
21 | 
22 | MODEL_NAME_OR_PATH=/data1/HF-Models/Qwen/Qwen1.5-7B
23 | echo "Finetune from: ${MODEL_NAME_OR_PATH}"
24 | MODEL=${MODEL_NAME_OR_PATH##*/}
25 | 
26 | TEMPLATE=custom
27 | echo "Finetune data template: ${TEMPLATE}"
28 | 
29 | DATA_PATH=../datasets/ruler_training_dataset.jsonl
30 | echo "Finetune data path: ${DATA_PATH}"
31 | 
32 | MODEL_MAX_LENGTH=2048
33 | echo "Model max length: ${MODEL_MAX_LENGTH}"
34 | 
35 | BATCH_SIZE=4
36 | echo "Per device train batch size: ${BATCH_SIZE}"
37 | 
38 | GRAD_ACCUM=8
39 | echo "Gradient accumulation steps: ${GRAD_ACCUM}"
40 | 
41 | OUTPUT_DIR="../outputs/checkpoints/ruler_${MODEL}_bs_${BATCH_SIZE}_ga_${GRAD_ACCUM}_lr_${LEARNING_RATE}_eps_${NUM_TRAIN_EPOCHS}"
42 | LOG_DIR=../logs
43 | 
44 | deepspeed --master_port=$MASTER_PORT finetuning/finetune.py \
45 |     --vanilla $VANILLA \
46 |     --deepspeed ../configs/ds_config_zero3.json \
47 |     --model_name_or_path $MODEL_NAME_OR_PATH \
48 |     --template $TEMPLATE\
49 |     --model_max_length $MODEL_MAX_LENGTH \
50 |     --data_path $DATA_PATH \
51 |     --output_dir $OUTPUT_DIR \
52 |     --bf16 True \
53 |     --tf32 True \
54 |     --per_device_train_batch_size ${BATCH_SIZE} \
55 |     --gradient_accumulation_steps ${GRAD_ACCUM} \
56 |     --gradient_checkpointing True \
57 |     --lr_scheduler_type cosine \
58 |     --learning_rate ${LEARNING_RATE} \
59 |     --warmup_ratio 0.05 \
60 |     --num_train_epochs ${NUM_TRAIN_EPOCHS} \
61 |     --evaluation_strategy no \
62 |     --save_strategy epoch \
63 |     --save_total_limit 1 \
64 |     --logging_steps 5 \
65 |     2>&1 | tee ${LOG_DIR}/output_ruler_${MODEL}.log


--------------------------------------------------------------------------------
/scripts/Yi-1.5-6B/ruler.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | export CUDA_VISIBLE_DEVICES=0,1,2,3
 3 | 
 4 | find_free_port() {
 5 |     while :
 6 |     do
 7 |         PORT=$(( ( RANDOM % 64512 ) + 1024 ))
 8 |         (echo >/dev/tcp/localhost/$PORT) >/dev/null 2>&1
 9 |         if [ $? -ne 0 ]; then
10 |             echo $PORT
11 |             return
12 |         fi
13 |     done
14 | }
15 | 
16 | export MASTER_PORT=$(find_free_port)
17 | 
18 | LEARNING_RATE=2e-5
19 | NUM_TRAIN_EPOCHS=3
20 | VANILLA=False
21 | 
22 | MODEL_NAME_OR_PATH=/data1/HF-Models/01-ai/Yi-1.5-6B
23 | echo "Finetune from: ${MODEL_NAME_OR_PATH}"
24 | MODEL=${MODEL_NAME_OR_PATH##*/}
25 | 
26 | TEMPLATE=custom
27 | echo "Finetune data template: ${TEMPLATE}"
28 | 
29 | DATA_PATH=../datasets/ruler_training_dataset.jsonl
30 | echo "Finetune data path: ${DATA_PATH}"
31 | 
32 | MODEL_MAX_LENGTH=2048
33 | echo "Model max length: ${MODEL_MAX_LENGTH}"
34 | 
35 | BATCH_SIZE=4
36 | echo "Per device train batch size: ${BATCH_SIZE}"
37 | 
38 | GRAD_ACCUM=8
39 | echo "Gradient accumulation steps: ${GRAD_ACCUM}"
40 | 
41 | OUTPUT_DIR="../outputs/checkpoints/ruler_${MODEL}_bs_${BATCH_SIZE}_ga_${GRAD_ACCUM}_lr_${LEARNING_RATE}_eps_${NUM_TRAIN_EPOCHS}"
42 | LOG_DIR=../logs
43 | 
44 | deepspeed --master_port=$MASTER_PORT finetuning/finetune.py \
45 |     --vanilla $VANILLA \
46 |     --deepspeed ../configs/ds_config_zero3.json \
47 |     --model_name_or_path $MODEL_NAME_OR_PATH \
48 |     --template $TEMPLATE\
49 |     --model_max_length $MODEL_MAX_LENGTH \
50 |     --data_path $DATA_PATH \
51 |     --output_dir $OUTPUT_DIR \
52 |     --bf16 True \
53 |     --tf32 True \
54 |     --per_device_train_batch_size ${BATCH_SIZE} \
55 |     --gradient_accumulation_steps ${GRAD_ACCUM} \
56 |     --gradient_checkpointing True \
57 |     --lr_scheduler_type cosine \
58 |     --learning_rate ${LEARNING_RATE} \
59 |     --warmup_ratio 0.05 \
60 |     --num_train_epochs ${NUM_TRAIN_EPOCHS} \
61 |     --evaluation_strategy no \
62 |     --save_strategy epoch \
63 |     --save_total_limit 1 \
64 |     --logging_steps 5 \
65 |     2>&1 | tee ${LOG_DIR}/output_ruler_${MODEL}.log


--------------------------------------------------------------------------------
/scripts/gemma-7b/ruler.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | export CUDA_VISIBLE_DEVICES=4,5,6,7
 3 | 
 4 | find_free_port() {
 5 |     while :
 6 |     do
 7 |         PORT=$(( ( RANDOM % 64512 ) + 1024 ))
 8 |         (echo >/dev/tcp/localhost/$PORT) >/dev/null 2>&1
 9 |         if [ $? -ne 0 ]; then
10 |             echo $PORT
11 |             return
12 |         fi
13 |     done
14 | }
15 | 
16 | export MASTER_PORT=$(find_free_port)
17 | 
18 | LEARNING_RATE=2e-5
19 | NUM_TRAIN_EPOCHS=3
20 | VANILLA=False
21 | 
22 | MODEL_NAME_OR_PATH=/data1/HF-Models/google/gemma-7b
23 | echo "Finetune from: ${MODEL_NAME_OR_PATH}"
24 | MODEL=${MODEL_NAME_OR_PATH##*/}
25 | 
26 | TEMPLATE=custom
27 | echo "Finetune data template: ${TEMPLATE}"
28 | 
29 | DATA_PATH=../datasets/ruler_training_dataset.jsonl
30 | echo "Finetune data path: ${DATA_PATH}"
31 | 
32 | MODEL_MAX_LENGTH=2048
33 | echo "Model max length: ${MODEL_MAX_LENGTH}"
34 | 
35 | BATCH_SIZE=4
36 | echo "Per device train batch size: ${BATCH_SIZE}"
37 | 
38 | GRAD_ACCUM=8
39 | echo "Gradient accumulation steps: ${GRAD_ACCUM}"
40 | 
41 | OUTPUT_DIR="../outputs/checkpoints/ruler_${MODEL}_bs_${BATCH_SIZE}_ga_${GRAD_ACCUM}_lr_${LEARNING_RATE}_eps_${NUM_TRAIN_EPOCHS}"
42 | LOG_DIR=../logs
43 | 
44 | deepspeed --master_port=$MASTER_PORT finetuning/finetune.py \
45 |     --vanilla $VANILLA \
46 |     --deepspeed ../configs/ds_config_zero3.json \
47 |     --model_name_or_path $MODEL_NAME_OR_PATH \
48 |     --template $TEMPLATE\
49 |     --model_max_length $MODEL_MAX_LENGTH \
50 |     --data_path $DATA_PATH \
51 |     --output_dir $OUTPUT_DIR \
52 |     --bf16 True \
53 |     --tf32 True \
54 |     --per_device_train_batch_size ${BATCH_SIZE} \
55 |     --gradient_accumulation_steps ${GRAD_ACCUM} \
56 |     --gradient_checkpointing True \
57 |     --lr_scheduler_type cosine \
58 |     --learning_rate ${LEARNING_RATE} \
59 |     --warmup_ratio 0.05 \
60 |     --num_train_epochs ${NUM_TRAIN_EPOCHS} \
61 |     --evaluation_strategy no \
62 |     --save_strategy epoch \
63 |     --save_total_limit 1 \
64 |     --logging_steps 5 \
65 |     2>&1 | tee ${LOG_DIR}/output_ruler_${MODEL}.log


--------------------------------------------------------------------------------
/scripts/Qwen1.5-7B/vanilla.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | export CUDA_VISIBLE_DEVICES=0,1,2,3
 3 | 
 4 | find_free_port() {
 5 |     while :
 6 |     do
 7 |         PORT=$(( ( RANDOM % 64512 ) + 1024 ))
 8 |         (echo >/dev/tcp/localhost/$PORT) >/dev/null 2>&1
 9 |         if [ $? -ne 0 ]; then
10 |             echo $PORT
11 |             return
12 |         fi
13 |     done
14 | }
15 | 
16 | export MASTER_PORT=$(find_free_port)
17 | 
18 | LEARNING_RATE=2e-5
19 | NUM_TRAIN_EPOCHS=3
20 | VANILLA=True
21 | 
22 | MODEL_NAME_OR_PATH=/data1/HF-Models/Qwen/Qwen1.5-7B
23 | echo "Finetune from: ${MODEL_NAME_OR_PATH}"
24 | MODEL=${MODEL_NAME_OR_PATH##*/}
25 | 
26 | TEMPLATE=custom
27 | echo "Finetune data template: ${TEMPLATE}"
28 | 
29 | DATA_PATH=../datasets/ruler_training_dataset.jsonl
30 | echo "Finetune data path: ${DATA_PATH}"
31 | 
32 | MODEL_MAX_LENGTH=2048
33 | echo "Model max length: ${MODEL_MAX_LENGTH}"
34 | 
35 | BATCH_SIZE=4
36 | echo "Per device train batch size: ${BATCH_SIZE}"
37 | 
38 | GRAD_ACCUM=8
39 | echo "Gradient accumulation steps: ${GRAD_ACCUM}"
40 | 
41 | OUTPUT_DIR="../outputs/checkpoints/vanilla_${MODEL}_bs_${BATCH_SIZE}_ga_${GRAD_ACCUM}_lr_${LEARNING_RATE}_eps_${NUM_TRAIN_EPOCHS}"
42 | LOG_DIR=../logs
43 | 
44 | deepspeed --master_port=$MASTER_PORT finetuning/finetune.py \
45 |     --vanilla $VANILLA \
46 |     --deepspeed ../configs/ds_config_zero3.json \
47 |     --model_name_or_path $MODEL_NAME_OR_PATH \
48 |     --template $TEMPLATE\
49 |     --model_max_length $MODEL_MAX_LENGTH \
50 |     --data_path $DATA_PATH \
51 |     --output_dir $OUTPUT_DIR \
52 |     --bf16 True \
53 |     --tf32 True \
54 |     --per_device_train_batch_size ${BATCH_SIZE} \
55 |     --gradient_accumulation_steps ${GRAD_ACCUM} \
56 |     --gradient_checkpointing True \
57 |     --lr_scheduler_type cosine \
58 |     --learning_rate ${LEARNING_RATE} \
59 |     --warmup_ratio 0.05 \
60 |     --num_train_epochs ${NUM_TRAIN_EPOCHS} \
61 |     --evaluation_strategy no \
62 |     --save_strategy epoch \
63 |     --save_total_limit 1 \
64 |     --logging_steps 5 \
65 |     2>&1 | tee ${LOG_DIR}/output_vanilla_${MODEL}.log
66 | 


--------------------------------------------------------------------------------
/scripts/Yi-1.5-6B/vanilla.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | export CUDA_VISIBLE_DEVICES=4,5,6,7
 3 | 
 4 | find_free_port() {
 5 |     while :
 6 |     do
 7 |         PORT=$(( ( RANDOM % 64512 ) + 1024 ))
 8 |         (echo >/dev/tcp/localhost/$PORT) >/dev/null 2>&1
 9 |         if [ $? -ne 0 ]; then
10 |             echo $PORT
11 |             return
12 |         fi
13 |     done
14 | }
15 | 
16 | export MASTER_PORT=$(find_free_port)
17 | 
18 | LEARNING_RATE=2e-5
19 | NUM_TRAIN_EPOCHS=3
20 | VANILLA=True
21 | 
22 | MODEL_NAME_OR_PATH=/data1/HF-Models/01-ai/Yi-1.5-6B
23 | echo "Finetune from: ${MODEL_NAME_OR_PATH}"
24 | MODEL=${MODEL_NAME_OR_PATH##*/}
25 | 
26 | TEMPLATE=custom
27 | echo "Finetune data template: ${TEMPLATE}"
28 | 
29 | DATA_PATH=../datasets/ruler_training_dataset.jsonl
30 | echo "Finetune data path: ${DATA_PATH}"
31 | 
32 | MODEL_MAX_LENGTH=2048
33 | echo "Model max length: ${MODEL_MAX_LENGTH}"
34 | 
35 | BATCH_SIZE=4
36 | echo "Per device train batch size: ${BATCH_SIZE}"
37 | 
38 | GRAD_ACCUM=8
39 | echo "Gradient accumulation steps: ${GRAD_ACCUM}"
40 | 
41 | OUTPUT_DIR="../outputs/checkpoints/vanilla_${MODEL}_bs_${BATCH_SIZE}_ga_${GRAD_ACCUM}_lr_${LEARNING_RATE}_eps_${NUM_TRAIN_EPOCHS}"
42 | LOG_DIR=../logs
43 | 
44 | deepspeed --master_port=$MASTER_PORT finetuning/finetune.py \
45 |     --vanilla $VANILLA \
46 |     --deepspeed ../configs/ds_config_zero3.json \
47 |     --model_name_or_path $MODEL_NAME_OR_PATH \
48 |     --template $TEMPLATE\
49 |     --model_max_length $MODEL_MAX_LENGTH \
50 |     --data_path $DATA_PATH \
51 |     --output_dir $OUTPUT_DIR \
52 |     --bf16 True \
53 |     --tf32 True \
54 |     --per_device_train_batch_size ${BATCH_SIZE} \
55 |     --gradient_accumulation_steps ${GRAD_ACCUM} \
56 |     --gradient_checkpointing True \
57 |     --lr_scheduler_type cosine \
58 |     --learning_rate ${LEARNING_RATE} \
59 |     --warmup_ratio 0.05 \
60 |     --num_train_epochs ${NUM_TRAIN_EPOCHS} \
61 |     --evaluation_strategy no \
62 |     --save_strategy epoch \
63 |     --save_total_limit 1 \
64 |     --logging_steps 5 \
65 |     2>&1 | tee ${LOG_DIR}/output_vanilla_${MODEL}.log
66 | 


--------------------------------------------------------------------------------
/scripts/gemma-7b/vanilla.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | export CUDA_VISIBLE_DEVICES=4,5,6,7
 3 | 
 4 | find_free_port() {
 5 |     while :
 6 |     do
 7 |         PORT=$(( ( RANDOM % 64512 ) + 1024 ))
 8 |         (echo >/dev/tcp/localhost/$PORT) >/dev/null 2>&1
 9 |         if [ $? -ne 0 ]; then
10 |             echo $PORT
11 |             return
12 |         fi
13 |     done
14 | }
15 | 
16 | export MASTER_PORT=$(find_free_port)
17 | 
18 | LEARNING_RATE=2e-5
19 | NUM_TRAIN_EPOCHS=3
20 | VANILLA=True
21 | 
22 | MODEL_NAME_OR_PATH=/data1/HF-Models/google/gemma-7b
23 | echo "Finetune from: ${MODEL_NAME_OR_PATH}"
24 | MODEL=${MODEL_NAME_OR_PATH##*/}
25 | 
26 | TEMPLATE=custom
27 | echo "Finetune data template: ${TEMPLATE}"
28 | 
29 | DATA_PATH=../datasets/ruler_training_dataset.jsonl
30 | echo "Finetune data path: ${DATA_PATH}"
31 | 
32 | MODEL_MAX_LENGTH=2048
33 | echo "Model max length: ${MODEL_MAX_LENGTH}"
34 | 
35 | BATCH_SIZE=4
36 | echo "Per device train batch size: ${BATCH_SIZE}"
37 | 
38 | GRAD_ACCUM=8
39 | echo "Gradient accumulation steps: ${GRAD_ACCUM}"
40 | 
41 | OUTPUT_DIR="../outputs/checkpoints/vanilla_${MODEL}_bs_${BATCH_SIZE}_ga_${GRAD_ACCUM}_lr_${LEARNING_RATE}_eps_${NUM_TRAIN_EPOCHS}"
42 | LOG_DIR=../logs
43 | 
44 | deepspeed --master_port=$MASTER_PORT finetuning/finetune.py \
45 |     --vanilla $VANILLA \
46 |     --deepspeed ../configs/ds_config_zero3.json \
47 |     --model_name_or_path $MODEL_NAME_OR_PATH \
48 |     --template $TEMPLATE\
49 |     --model_max_length $MODEL_MAX_LENGTH \
50 |     --data_path $DATA_PATH \
51 |     --output_dir $OUTPUT_DIR \
52 |     --bf16 True \
53 |     --tf32 True \
54 |     --per_device_train_batch_size ${BATCH_SIZE} \
55 |     --gradient_accumulation_steps ${GRAD_ACCUM} \
56 |     --gradient_checkpointing True \
57 |     --lr_scheduler_type cosine \
58 |     --learning_rate ${LEARNING_RATE} \
59 |     --warmup_ratio 0.05 \
60 |     --num_train_epochs ${NUM_TRAIN_EPOCHS} \
61 |     --evaluation_strategy no \
62 |     --save_strategy epoch \
63 |     --save_total_limit 1 \
64 |     --logging_steps 5 \
65 |     2>&1 | tee ${LOG_DIR}/output_vanilla_${MODEL}.log
66 | 


--------------------------------------------------------------------------------
/scripts/deepseek-llm-7b-base/ruler.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | export CUDA_VISIBLE_DEVICES=4,5,6,7
 3 | 
 4 | find_free_port() {
 5 |     while :
 6 |     do
 7 |         PORT=$(( ( RANDOM % 64512 ) + 1024 ))
 8 |         (echo >/dev/tcp/localhost/$PORT) >/dev/null 2>&1
 9 |         if [ $? -ne 0 ]; then
10 |             echo $PORT
11 |             return
12 |         fi
13 |     done
14 | }
15 | 
16 | export MASTER_PORT=$(find_free_port)
17 | 
18 | LEARNING_RATE=2e-5
19 | NUM_TRAIN_EPOCHS=3
20 | VANILLA=False
21 | 
22 | MODEL_NAME_OR_PATH=/data1/HF-Models/deepseek-ai/deepseek-llm-7b-base
23 | echo "Finetune from: ${MODEL_NAME_OR_PATH}"
24 | MODEL=${MODEL_NAME_OR_PATH##*/}
25 | 
26 | TEMPLATE=custom
27 | echo "Finetune data template: ${TEMPLATE}"
28 | 
29 | DATA_PATH=../datasets/ruler_training_dataset.jsonl
30 | echo "Finetune data path: ${DATA_PATH}"
31 | 
32 | MODEL_MAX_LENGTH=2048
33 | echo "Model max length: ${MODEL_MAX_LENGTH}"
34 | 
35 | BATCH_SIZE=4
36 | echo "Per device train batch size: ${BATCH_SIZE}"
37 | 
38 | GRAD_ACCUM=8
39 | echo "Gradient accumulation steps: ${GRAD_ACCUM}"
40 | 
41 | OUTPUT_DIR="../outputs/checkpoints/ruler_${MODEL}_bs_${BATCH_SIZE}_ga_${GRAD_ACCUM}_lr_${LEARNING_RATE}_eps_${NUM_TRAIN_EPOCHS}"
42 | LOG_DIR=../logs
43 | 
44 | deepspeed --master_port=$MASTER_PORT finetuning/finetune.py \
45 |     --vanilla $VANILLA \
46 |     --deepspeed ../configs/ds_config_zero3.json \
47 |     --model_name_or_path $MODEL_NAME_OR_PATH \
48 |     --template $TEMPLATE\
49 |     --model_max_length $MODEL_MAX_LENGTH \
50 |     --data_path $DATA_PATH \
51 |     --output_dir $OUTPUT_DIR \
52 |     --bf16 True \
53 |     --tf32 True \
54 |     --per_device_train_batch_size ${BATCH_SIZE} \
55 |     --gradient_accumulation_steps ${GRAD_ACCUM} \
56 |     --gradient_checkpointing True \
57 |     --lr_scheduler_type cosine \
58 |     --learning_rate ${LEARNING_RATE} \
59 |     --warmup_ratio 0.05 \
60 |     --num_train_epochs ${NUM_TRAIN_EPOCHS} \
61 |     --evaluation_strategy no \
62 |     --save_strategy epoch \
63 |     --save_total_limit 1 \
64 |     --logging_steps 5 \
65 |     2>&1 | tee ${LOG_DIR}/output_ruler_${MODEL}.log


--------------------------------------------------------------------------------
/scripts/deepseek-llm-7b-base/vanilla.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | export CUDA_VISIBLE_DEVICES=4,5,6,7
 3 | 
 4 | find_free_port() {
 5 |     while :
 6 |     do
 7 |         PORT=$(( ( RANDOM % 64512 ) + 1024 ))
 8 |         (echo >/dev/tcp/localhost/$PORT) >/dev/null 2>&1
 9 |         if [ $? -ne 0 ]; then
10 |             echo $PORT
11 |             return
12 |         fi
13 |     done
14 | }
15 | 
16 | export MASTER_PORT=$(find_free_port)
17 | 
18 | LEARNING_RATE=2e-5
19 | NUM_TRAIN_EPOCHS=3
20 | VANILLA=True
21 | 
22 | MODEL_NAME_OR_PATH=/data1/HF-Models/deepseek-ai/deepseek-llm-7b-base
23 | echo "Finetune from: ${MODEL_NAME_OR_PATH}"
24 | MODEL=${MODEL_NAME_OR_PATH##*/}
25 | 
26 | TEMPLATE=custom
27 | echo "Finetune data template: ${TEMPLATE}"
28 | 
29 | DATA_PATH=../datasets/ruler_training_dataset.jsonl
30 | echo "Finetune data path: ${DATA_PATH}"
31 | 
32 | MODEL_MAX_LENGTH=2048
33 | echo "Model max length: ${MODEL_MAX_LENGTH}"
34 | 
35 | BATCH_SIZE=4
36 | echo "Per device train batch size: ${BATCH_SIZE}"
37 | 
38 | GRAD_ACCUM=8
39 | echo "Gradient accumulation steps: ${GRAD_ACCUM}"
40 | 
41 | OUTPUT_DIR="../outputs/checkpoints/vanilla_${MODEL}_bs_${BATCH_SIZE}_ga_${GRAD_ACCUM}_lr_${LEARNING_RATE}_eps_${NUM_TRAIN_EPOCHS}"
42 | LOG_DIR=../logs
43 | 
44 | deepspeed --master_port=$MASTER_PORT finetuning/finetune.py \
45 |     --vanilla $VANILLA \
46 |     --deepspeed ../configs/ds_config_zero3.json \
47 |     --model_name_or_path $MODEL_NAME_OR_PATH \
48 |     --template $TEMPLATE\
49 |     --model_max_length $MODEL_MAX_LENGTH \
50 |     --data_path $DATA_PATH \
51 |     --output_dir $OUTPUT_DIR \
52 |     --bf16 True \
53 |     --tf32 True \
54 |     --per_device_train_batch_size ${BATCH_SIZE} \
55 |     --gradient_accumulation_steps ${GRAD_ACCUM} \
56 |     --gradient_checkpointing True \
57 |     --lr_scheduler_type cosine \
58 |     --learning_rate ${LEARNING_RATE} \
59 |     --warmup_ratio 0.05 \
60 |     --num_train_epochs ${NUM_TRAIN_EPOCHS} \
61 |     --evaluation_strategy no \
62 |     --save_strategy epoch \
63 |     --save_total_limit 1 \
64 |     --logging_steps 5 \
65 |     2>&1 | tee ${LOG_DIR}/output_vanilla_${MODEL}.log
66 | 


--------------------------------------------------------------------------------
/src/exp/analysis_sgm.py:
--------------------------------------------------------------------------------
 1 | import re
 2 | import argparse
 3 | from utils.config import MetaLengthToken, RANGE
 4 | from utils import load_jsonl
 5 | from rich.table import Table
 6 | from rich.console import Console
 7 | from utils.count import count_words
 8 | 
 9 | 
10 | def main(args):
11 |     # raw data load
12 |     df = load_jsonl(args.dataset_path)
13 |     # draw table
14 |     table = Table(show_header=True, header_style="bold magenta")
15 |     table.add_column("Model", style="dim", width=15)
16 |     for mlt in MetaLengthToken:
17 |         table.add_column(mlt[0].split(":")[-1][:-1], justify="right")
18 |     table.add_column("FM", justify="right")
19 |     table.add_column("Avg", justify="right")
20 |     count = {mlt[0]: 0 for mlt in MetaLengthToken}
21 |     for d in df:
22 |         if d["output"].count("MLT") != 1:
23 |             d["output"] = "[MLT:" + d["output"].split("[MLT:")[1]
24 |         for mlt in MetaLengthToken:
25 |             if mlt[0] in d["output"]:
26 |                 count[mlt[0]] += 1
27 |     hit = 0
28 |     all_wc = 0
29 |     for d in df:
30 |         cleaned_text = re.sub(r"\[MLT:\d+\]", "", d["output"])  # clean MLT
31 |         wc = count_words(cleaned_text)
32 |         mlt = d["output"].split("]")[0] + "]"
33 |         if (wc > RANGE[mlt.split(":")[-1][:-1]]["FM"][0]) and (
34 |             wc <= RANGE[mlt.split(":")[-1][:-1]]["FM"][1]
35 |         ):
36 |             hit += 1
37 |         all_wc += wc
38 |     table.add_row(
39 |         args.dataset_path.split("/")[-1].split("tl_")[-1][:15],
40 |         f"{count['[MLT:10]']}",
41 |         f"{count['[MLT:30]']}",
42 |         f"{count['[MLT:50]']}",
43 |         f"{count['[MLT:80]']}",
44 |         f"{count['[MLT:150]']}",
45 |         f"{count['[MLT:300]']}",
46 |         f"{count['[MLT:500]']}",
47 |         f"{count['[MLT:700]']}",
48 |         f"{count['[MLT:>800]']}",
49 |         f"{hit/len(df)*100:.2f}",
50 |         f"{all_wc/len(df):.0f}",
51 |     )
52 |     console = Console()
53 |     console.print(table)
54 | 
55 | 
56 | if __name__ == "__main__":
57 |     parser = argparse.ArgumentParser()
58 |     parser.add_argument("--dataset_path", type=str, default=None)
59 |     args = parser.parse_args()
60 |     main(args)
61 | 


--------------------------------------------------------------------------------
/src/exp/run_exp_api.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import openai
 3 | import os
 4 | from tqdm import tqdm
 5 | from utils import load_jsonl, save_jsonl
 6 | from dotenv import load_dotenv
 7 | 
 8 | 
 9 | load_dotenv()
10 | 
11 | 
12 | def main(args):
13 |     client = openai.OpenAI(
14 |         api_key=args.key, base_url=os.getenv("OPENAI_BASE_URL")
15 |     )
16 |     def get_completion_openai(
17 |         prompt: str,
18 |         model: str,
19 |     ) -> str:
20 |         """
21 |         Generate a completion using the OpenAI API.
22 | 
23 |         Args:
24 |             prompt (str): The user's prompt or query.
25 |             model (str, optional): The name of the OpenAI model to use for generating the completion.
26 |                 Defaults to "gpt-4-turbo".
27 |         """
28 |         response = client.chat.completions.create(
29 |             model=model,
30 |             top_p=1,
31 |             max_tokens=2048,
32 |             messages=[
33 |                 {"role": "user", "content": prompt},
34 |             ],
35 |         )
36 |         return response.choices[0].message.content
37 | 
38 |     # raw data load
39 |     df = load_jsonl(args.dataset_path)
40 |     # load tokenizer and llm
41 |     for idx in tqdm(range(len(df))):
42 |         instruction = df[idx]["Instruction"]
43 |         targetlength = df[idx]["TargetLength"] if "TargetLength" in df[idx] else ""
44 |         if targetlength != "":
45 |             targetlength = targetlength.replace(">", "more than ")
46 |             question = f"{instruction}\nThe response should have a word count of {targetlength} words."
47 |             df[idx]["prompt"] = question
48 |             flag = False
49 |             while not flag:
50 |                 try:
51 |                     output = get_completion_openai(question, args.model)
52 |                     flag = True
53 |                 except Exception as e:
54 |                     print(e)
55 |             df[idx]["output"] = output
56 |         if idx % 10 == 0:
57 |             save_jsonl(args.output_path, df)
58 |     # save to output_path
59 |     save_jsonl(args.output_path, df)
60 | 
61 | 
62 | if __name__ == "__main__":
63 |     parser = argparse.ArgumentParser()
64 |     parser.add_argument("--dataset_path", type=str, default=None)
65 |     parser.add_argument("--model", type=str, default=None)
66 |     parser.add_argument("--output_path", type=str, default=None)
67 |     parser.add_argument("--key", type=str, default=None)
68 |     args = parser.parse_args()
69 |     main(args)
70 | 


--------------------------------------------------------------------------------
/src/exp/analysis_mmlt.py:
--------------------------------------------------------------------------------
 1 | import re
 2 | import argparse
 3 | from utils.config import MetaLengthToken, RANGE
 4 | from utils import load_jsonl
 5 | from rich.table import Table
 6 | from rich.console import Console
 7 | from utils.count import count_words
 8 | 
 9 | 
10 | def main(args):
11 |     # raw data load
12 |     df = load_jsonl(args.dataset_path)
13 |     # draw table
14 |     table = Table(show_header=True, header_style="bold magenta")
15 |     table.add_column("Model", style="dim", width=15)
16 |     for mlt in MetaLengthToken:
17 |         table.add_column(mlt[0].split(':')[-1][:-1], justify="right")
18 |     table.add_column('Acc', justify="right")
19 |     count = {mlt[0]:0 for mlt in MetaLengthToken}
20 |     hit = {mlt[0]:0 for mlt in MetaLengthToken}
21 |     for d in df:
22 |         count[f"[MLT:{d['TargetLength']}]"] += 1
23 |     for d in df:
24 |         wc = count_words(d['output'])
25 |         mlt = f"[MLT:{d['TargetLength']}]"
26 |         if (wc > RANGE[mlt.split(':')[-1][:-1]]['FM'][0]) and (wc <= RANGE[mlt.split(':')[-1][:-1]]['FM'][1]):
27 |             hit[mlt] += 1
28 |     # print(hit)
29 |     # print(count)
30 |     table.add_row(
31 |         args.dataset_path.split('/')[-1].split('tl_')[-1][:15],
32 |         f"{hit['[MLT:10]']/count['[MLT:10]']*100:.2f}",
33 |         f"{hit['[MLT:30]']/count['[MLT:30]']*100:.2f}",
34 |         f"{hit['[MLT:50]']/count['[MLT:50]']*100:.2f}",
35 |         f"{hit['[MLT:80]']/count['[MLT:80]']*100:.2f}",
36 |         f"{hit['[MLT:150]']/count['[MLT:150]']*100:.2f}",
37 |         f"{hit['[MLT:300]']/count['[MLT:300]']*100:.2f}",
38 |         f"{hit['[MLT:500]']/count['[MLT:500]']*100:.2f}",
39 |         f"{hit['[MLT:700]']/count['[MLT:700]']*100:.2f}",
40 |         f"{hit['[MLT:>800]']/count['[MLT:>800]']*100:.2f}",
41 |         f"{sum(hit.values())/sum(count.values())*100:.2f}",
42 |     )
43 |     console = Console()
44 |     console.print(table)
45 |     latex = [
46 |         f"{hit['[MLT:10]']/count['[MLT:10]']*100:.1f}",
47 |         f"{hit['[MLT:30]']/count['[MLT:30]']*100:.1f}",
48 |         f"{hit['[MLT:50]']/count['[MLT:50]']*100:.1f}",
49 |         f"{hit['[MLT:80]']/count['[MLT:80]']*100:.1f}",
50 |         f"{hit['[MLT:150]']/count['[MLT:150]']*100:.1f}",
51 |         f"{hit['[MLT:300]']/count['[MLT:300]']*100:.1f}",
52 |         f"{hit['[MLT:500]']/count['[MLT:500]']*100:.1f}",
53 |         f"{hit['[MLT:700]']/count['[MLT:700]']*100:.1f}",
54 |         f"{hit['[MLT:>800]']/count['[MLT:>800]']*100:.1f}",
55 |         f"{sum(hit.values())/sum(count.values())*100:.2f}",
56 |     ]
57 |     print('&'.join(latex) + '\\\\')
58 | 
59 | 
60 | if __name__ == "__main__":
61 |     parser = argparse.ArgumentParser()
62 |     parser.add_argument("--dataset_path", type=str, default=None)
63 |     args = parser.parse_args()
64 |     main(args)
65 | 


--------------------------------------------------------------------------------
/src/exp/run_exp.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | from tqdm import tqdm
 3 | from vllm import LLM, SamplingParams
 4 | from transformers import AutoTokenizer
 5 | from utils import load_jsonl, save_jsonl
 6 | from utils.templates import TemplatesMapping
 7 | 
 8 | 
 9 | def main(args):
10 |     # raw data load
11 |     df = load_jsonl(args.dataset_path)
12 |     # load tokenizer and llm
13 |     tokenizer = AutoTokenizer.from_pretrained(args.model_name_or_path)
14 |     llm = LLM(
15 |         model=args.model_name_or_path,
16 |         trust_remote_code=True,
17 |         tensor_parallel_size=args.gpus,
18 |     )
19 |     template = TemplatesMapping[args.template]
20 |     if args.template == "default" in args.dataset_path:
21 |         terminators = TemplatesMapping["default"].get_stop_tokens(
22 |             args.model_name_or_path
23 |         )
24 |     elif "self_generated_mlt.jsonl" in args.dataset_path:
25 |         terminators = ["<|end_of_text|>", "<|eot_id|>"]
26 |     else:
27 |         terminators = template.STOP_TOKENS
28 |     print(f"> STOP_TOKENS:{terminators}")
29 |     terminators = tokenizer.convert_tokens_to_ids(terminators)
30 |     skip_sepcial_tokens = False if "self_generated_mlt.jsonl" in args.dataset_path else True
31 |     sampling_params = SamplingParams(
32 |         temperature=0,
33 |         max_tokens=2048,
34 |         stop_token_ids=terminators,
35 |         skip_special_tokens=skip_sepcial_tokens,
36 |     )
37 |     for idx in tqdm(range(len(df))):
38 |         instruction = df[idx]["Instruction"]
39 |         targetlength = df[idx]["TargetLength"] if "TargetLength" in df[idx] else ""
40 |         if args.template == "default":
41 |             prompts = [
42 |                 template.apply_template_for_generation(
43 |                     instruction, targetlength, tokenizer
44 |                 )
45 |             ]
46 |         elif args.template == "custom":
47 |             if "vanilla" in args.model_name_or_path:
48 |                 prompts = [
49 |                     template.apply_template_for_generation_vanilla(instruction, targetlength)
50 |                 ]
51 |             else:
52 |                 prompts = [
53 |                     template.apply_template_for_generation(instruction, targetlength)
54 |                 ]
55 |         else:
56 |             prompts = [
57 |                 template.apply_template_for_generation(instruction, targetlength)
58 |             ]
59 |         df[idx]["prompt"] = prompts[0]
60 |         outputs = llm.generate(prompts, sampling_params)
61 |         for output in outputs:
62 |             generated_text = output.outputs[0].text
63 |             df[idx]["output"] = generated_text
64 |         if idx % 100 == 0:
65 |             save_jsonl(args.output_path, df)
66 |     # save to output_path
67 |     save_jsonl(args.output_path, df)
68 | 
69 | 
70 | if __name__ == "__main__":
71 |     parser = argparse.ArgumentParser()
72 |     parser.add_argument("--dataset_path", type=str, default=None)
73 |     parser.add_argument("--model_name_or_path", type=str, default=None)
74 |     parser.add_argument("--gpus", type=int, default=1)
75 |     parser.add_argument("--template", type=str, default="default")
76 |     parser.add_argument("--output_path", type=str, default=None)
77 |     args = parser.parse_args()
78 |     main(args)
79 | 


--------------------------------------------------------------------------------
/scripts/Yi-1.5-6B/ruler_lm_eval.sh:
--------------------------------------------------------------------------------
 1 | set -ex
 2 | export CUDA_VISIBLE_DEVICES=7
 3 | export NUMEXPR_MAX_THREADS=128
 4 | 
 5 | MODEL=vllm
 6 | MODEL_NAME=ruler_Yi-1.5-6B
 7 | MODEL_NAME_OR_PATH=/data1/lijiaming/Ruler/checkpoints/ruler_Yi-1.5-6B_bs_4_ga_8_lr_2e-5_eps_3/checkpoint-2841
 8 | OUTPUT_PATH=../outputs/other_tasks/${MODEL_NAME}
 9 | TOKENIZER_MODE=auto
10 | NUM_GPUS=1
11 | GPU_MEMORY_UTILIZATION=0.8
12 | 
13 | mkdir -p $OUTPUT_PATH
14 | 
15 | # lm_eval --model $MODEL \
16 | #     --model_args pretrained=${MODEL_NAME_OR_PATH},trust_remote_code=True,tokenizer_mode=${TOKENIZER_MODE},tensor_parallel_size=${NUM_GPUS},dtype=auto,gpu_memory_utilization=${GPU_MEMORY_UTILIZATION} \
17 | #     --tasks leaderboard \
18 | #     --device cuda \
19 | #     --output_path ${OUTPUT_PATH}/${MODEL}_eval_leaderboard \
20 | #     --batch_size 1 \
21 | #     --write_out \
22 | #     2>&1 | tee ${OUTPUT_PATH}/${MODEL}_eval_leaderboard.log
23 | 
24 | lm_eval --model $MODEL \
25 |     --model_args pretrained=${MODEL_NAME_OR_PATH},trust_remote_code=True,tokenizer_mode=${TOKENIZER_MODE},tensor_parallel_size=${NUM_GPUS},dtype=auto,gpu_memory_utilization=${GPU_MEMORY_UTILIZATION} \
26 |     --tasks ai2_arc \
27 |     --device cuda \
28 |     --output_path ${OUTPUT_PATH}/${MODEL}_eval_ai2_arc \
29 |     --batch_size 1 \
30 |     --num_fewshot 25 \
31 |     --write_out \
32 |     2>&1 | tee ${OUTPUT_PATH}/${MODEL}_eval_ai2_arc.log
33 | 
34 | lm_eval --model $MODEL \
35 |     --model_args pretrained=${MODEL_NAME_OR_PATH},trust_remote_code=True,tokenizer_mode=${TOKENIZER_MODE},tensor_parallel_size=${NUM_GPUS},dtype=auto,gpu_memory_utilization=${GPU_MEMORY_UTILIZATION} \
36 |     --tasks hellaswag \
37 |     --device cuda \
38 |     --output_path ${OUTPUT_PATH}/${MODEL}_eval_hellaswag \
39 |     --batch_size 1 \
40 |     --num_fewshot 10 \
41 |     --write_out \
42 |     2>&1 | tee ${OUTPUT_PATH}/${MODEL}_eval_hellaswag.log
43 | 
44 | lm_eval --model $MODEL \
45 |     --model_args pretrained=${MODEL_NAME_OR_PATH},trust_remote_code=True,tokenizer_mode=${TOKENIZER_MODE},tensor_parallel_size=${NUM_GPUS},dtype=auto,gpu_memory_utilization=${GPU_MEMORY_UTILIZATION} \
46 |     --tasks truthfulqa \
47 |     --device cuda \
48 |     --output_path ${OUTPUT_PATH}/${MODEL}_eval_truthfulqa \
49 |     --batch_size 1 \
50 |     --num_fewshot 0 \
51 |     --write_out \
52 |     2>&1 | tee ${OUTPUT_PATH}/${MODEL}_eval_truthfulqa.log
53 | 
54 | lm_eval --model $MODEL \
55 |     --model_args pretrained=${MODEL_NAME_OR_PATH},trust_remote_code=True,tokenizer_mode=${TOKENIZER_MODE},tensor_parallel_size=${NUM_GPUS},dtype=auto,gpu_memory_utilization=${GPU_MEMORY_UTILIZATION} \
56 |     --tasks mmlu \
57 |     --device cuda \
58 |     --output_path ${OUTPUT_PATH}/${MODEL}_eval_mmlu \
59 |     --batch_size 1 \
60 |     --num_fewshot 5 \
61 |     --write_out \
62 |     2>&1 | tee ${OUTPUT_PATH}/${MODEL}_eval_mmlu.log
63 | 
64 | lm_eval --model $MODEL \
65 |     --model_args pretrained=${MODEL_NAME_OR_PATH},trust_remote_code=True,tokenizer_mode=${TOKENIZER_MODE},tensor_parallel_size=${NUM_GPUS},dtype=auto,gpu_memory_utilization=${GPU_MEMORY_UTILIZATION} \
66 |     --tasks winogrande \
67 |     --device cuda \
68 |     --output_path ${OUTPUT_PATH}/${MODEL}_eval_winogrande \
69 |     --batch_size 1 \
70 |     --num_fewshot 5 \
71 |     --write_out \
72 |     2>&1 | tee ${OUTPUT_PATH}/${MODEL}_eval_winogrande.log
73 | 
74 | lm_eval --model $MODEL \
75 |     --model_args pretrained=${MODEL_NAME_OR_PATH},trust_remote_code=True,tokenizer_mode=${TOKENIZER_MODE},tensor_parallel_size=${NUM_GPUS},dtype=auto,gpu_memory_utilization=${GPU_MEMORY_UTILIZATION} \
76 |     --tasks gsm8k \
77 |     --device cuda \
78 |     --output_path ${OUTPUT_PATH}/${MODEL}_eval_gsm8k \
79 |     --batch_size 1 \
80 |     --num_fewshot 5 \
81 |     --write_out \
82 |     2>&1 | tee ${OUTPUT_PATH}/${MODEL}_eval_gsm8k.log


--------------------------------------------------------------------------------
/scripts/Yi-1.5-6B/vanilla_lm_eval.sh:
--------------------------------------------------------------------------------
 1 | set -ex
 2 | export CUDA_VISIBLE_DEVICES=6
 3 | export NUMEXPR_MAX_THREADS=128
 4 | 
 5 | MODEL=vllm
 6 | MODEL_NAME=vanilla_Yi-1.5-6B
 7 | MODEL_NAME_OR_PATH=/data1/lijiaming/Ruler/checkpoints/vanilla_Yi-1.5-6B_bs_4_ga_8_lr_2e-5_eps_3/checkpoint-2841
 8 | OUTPUT_PATH=../outputs/other_tasks/${MODEL_NAME}
 9 | TOKENIZER_MODE=auto
10 | NUM_GPUS=1
11 | GPU_MEMORY_UTILIZATION=0.8
12 | 
13 | mkdir -p $OUTPUT_PATH
14 | 
15 | # lm_eval --model $MODEL \
16 | #     --model_args pretrained=${MODEL_NAME_OR_PATH},trust_remote_code=True,tokenizer_mode=${TOKENIZER_MODE},tensor_parallel_size=${NUM_GPUS},dtype=auto,gpu_memory_utilization=${GPU_MEMORY_UTILIZATION} \
17 | #     --tasks leaderboard \
18 | #     --device cuda \
19 | #     --output_path ${OUTPUT_PATH}/${MODEL}_eval_leaderboard \
20 | #     --batch_size 1 \
21 | #     --write_out \
22 | #     2>&1 | tee ${OUTPUT_PATH}/${MODEL}_eval_leaderboard.log
23 | 
24 | lm_eval --model $MODEL \
25 |     --model_args pretrained=${MODEL_NAME_OR_PATH},trust_remote_code=True,tokenizer_mode=${TOKENIZER_MODE},tensor_parallel_size=${NUM_GPUS},dtype=auto,gpu_memory_utilization=${GPU_MEMORY_UTILIZATION} \
26 |     --tasks ai2_arc \
27 |     --device cuda \
28 |     --output_path ${OUTPUT_PATH}/${MODEL}_eval_ai2_arc \
29 |     --batch_size 1 \
30 |     --num_fewshot 25 \
31 |     --write_out \
32 |     2>&1 | tee ${OUTPUT_PATH}/${MODEL}_eval_ai2_arc.log
33 | 
34 | lm_eval --model $MODEL \
35 |     --model_args pretrained=${MODEL_NAME_OR_PATH},trust_remote_code=True,tokenizer_mode=${TOKENIZER_MODE},tensor_parallel_size=${NUM_GPUS},dtype=auto,gpu_memory_utilization=${GPU_MEMORY_UTILIZATION} \
36 |     --tasks hellaswag \
37 |     --device cuda \
38 |     --output_path ${OUTPUT_PATH}/${MODEL}_eval_hellaswag \
39 |     --batch_size 1 \
40 |     --num_fewshot 10 \
41 |     --write_out \
42 |     2>&1 | tee ${OUTPUT_PATH}/${MODEL}_eval_hellaswag.log
43 | 
44 | lm_eval --model $MODEL \
45 |     --model_args pretrained=${MODEL_NAME_OR_PATH},trust_remote_code=True,tokenizer_mode=${TOKENIZER_MODE},tensor_parallel_size=${NUM_GPUS},dtype=auto,gpu_memory_utilization=${GPU_MEMORY_UTILIZATION} \
46 |     --tasks truthfulqa \
47 |     --device cuda \
48 |     --output_path ${OUTPUT_PATH}/${MODEL}_eval_truthfulqa \
49 |     --batch_size 1 \
50 |     --num_fewshot 0 \
51 |     --write_out \
52 |     2>&1 | tee ${OUTPUT_PATH}/${MODEL}_eval_truthfulqa.log
53 | 
54 | lm_eval --model $MODEL \
55 |     --model_args pretrained=${MODEL_NAME_OR_PATH},trust_remote_code=True,tokenizer_mode=${TOKENIZER_MODE},tensor_parallel_size=${NUM_GPUS},dtype=auto,gpu_memory_utilization=${GPU_MEMORY_UTILIZATION} \
56 |     --tasks mmlu \
57 |     --device cuda \
58 |     --output_path ${OUTPUT_PATH}/${MODEL}_eval_mmlu \
59 |     --batch_size 1 \
60 |     --num_fewshot 5 \
61 |     --write_out \
62 |     2>&1 | tee ${OUTPUT_PATH}/${MODEL}_eval_mmlu.log
63 | 
64 | lm_eval --model $MODEL \
65 |     --model_args pretrained=${MODEL_NAME_OR_PATH},trust_remote_code=True,tokenizer_mode=${TOKENIZER_MODE},tensor_parallel_size=${NUM_GPUS},dtype=auto,gpu_memory_utilization=${GPU_MEMORY_UTILIZATION} \
66 |     --tasks winogrande \
67 |     --device cuda \
68 |     --output_path ${OUTPUT_PATH}/${MODEL}_eval_winogrande \
69 |     --batch_size 1 \
70 |     --num_fewshot 5 \
71 |     --write_out \
72 |     2>&1 | tee ${OUTPUT_PATH}/${MODEL}_eval_winogrande.log
73 | 
74 | lm_eval --model $MODEL \
75 |     --model_args pretrained=${MODEL_NAME_OR_PATH},trust_remote_code=True,tokenizer_mode=${TOKENIZER_MODE},tensor_parallel_size=${NUM_GPUS},dtype=auto,gpu_memory_utilization=${GPU_MEMORY_UTILIZATION} \
76 |     --tasks gsm8k \
77 |     --device cuda \
78 |     --output_path ${OUTPUT_PATH}/${MODEL}_eval_gsm8k \
79 |     --batch_size 1 \
80 |     --num_fewshot 5 \
81 |     --write_out \
82 |     2>&1 | tee ${OUTPUT_PATH}/${MODEL}_eval_gsm8k.log


--------------------------------------------------------------------------------
/scripts/gemma-7b/ruler_lm_eval.sh:
--------------------------------------------------------------------------------
 1 | set -ex
 2 | export CUDA_VISIBLE_DEVICES=2
 3 | export NUMEXPR_MAX_THREADS=128
 4 | 
 5 | MODEL=vllm
 6 | MODEL_NAME=ruler_gemma-7b
 7 | MODEL_NAME_OR_PATH=/home/lijiaming/workspace/Seed/Seed-Ruler/outputs/checkpoints/ruler_gemma-7b_bs_4_ga_8_lr_2e-5_eps_3/checkpoint-2841
 8 | OUTPUT_PATH=../outputs/other_tasks/${MODEL_NAME}
 9 | TOKENIZER_MODE=auto
10 | NUM_GPUS=1
11 | GPU_MEMORY_UTILIZATION=0.8
12 | 
13 | mkdir -p $OUTPUT_PATH
14 | 
15 | # lm_eval --model $MODEL \
16 | #     --model_args pretrained=${MODEL_NAME_OR_PATH},trust_remote_code=True,tokenizer_mode=${TOKENIZER_MODE},tensor_parallel_size=${NUM_GPUS},dtype=auto,gpu_memory_utilization=${GPU_MEMORY_UTILIZATION} \
17 | #     --tasks leaderboard \
18 | #     --device cuda \
19 | #     --output_path ${OUTPUT_PATH}/${MODEL}_eval_leaderboard \
20 | #     --batch_size 1 \
21 | #     --write_out \
22 | #     2>&1 | tee ${OUTPUT_PATH}/${MODEL}_eval_leaderboard.log
23 | 
24 | lm_eval --model $MODEL \
25 |     --model_args pretrained=${MODEL_NAME_OR_PATH},trust_remote_code=True,tokenizer_mode=${TOKENIZER_MODE},tensor_parallel_size=${NUM_GPUS},dtype=auto,gpu_memory_utilization=${GPU_MEMORY_UTILIZATION} \
26 |     --tasks ai2_arc \
27 |     --device cuda \
28 |     --output_path ${OUTPUT_PATH}/${MODEL}_eval_ai2_arc \
29 |     --batch_size 1 \
30 |     --num_fewshot 25 \
31 |     --write_out \
32 |     2>&1 | tee ${OUTPUT_PATH}/${MODEL}_eval_ai2_arc.log
33 | 
34 | lm_eval --model $MODEL \
35 |     --model_args pretrained=${MODEL_NAME_OR_PATH},trust_remote_code=True,tokenizer_mode=${TOKENIZER_MODE},tensor_parallel_size=${NUM_GPUS},dtype=auto,gpu_memory_utilization=${GPU_MEMORY_UTILIZATION} \
36 |     --tasks hellaswag \
37 |     --device cuda \
38 |     --output_path ${OUTPUT_PATH}/${MODEL}_eval_hellaswag \
39 |     --batch_size 1 \
40 |     --num_fewshot 10 \
41 |     --write_out \
42 |     2>&1 | tee ${OUTPUT_PATH}/${MODEL}_eval_hellaswag.log
43 | 
44 | lm_eval --model $MODEL \
45 |     --model_args pretrained=${MODEL_NAME_OR_PATH},trust_remote_code=True,tokenizer_mode=${TOKENIZER_MODE},tensor_parallel_size=${NUM_GPUS},dtype=auto,gpu_memory_utilization=${GPU_MEMORY_UTILIZATION} \
46 |     --tasks truthfulqa \
47 |     --device cuda \
48 |     --output_path ${OUTPUT_PATH}/${MODEL}_eval_truthfulqa \
49 |     --batch_size 1 \
50 |     --num_fewshot 0 \
51 |     --write_out \
52 |     2>&1 | tee ${OUTPUT_PATH}/${MODEL}_eval_truthfulqa.log
53 | 
54 | lm_eval --model $MODEL \
55 |     --model_args pretrained=${MODEL_NAME_OR_PATH},trust_remote_code=True,tokenizer_mode=${TOKENIZER_MODE},tensor_parallel_size=${NUM_GPUS},dtype=auto,gpu_memory_utilization=${GPU_MEMORY_UTILIZATION} \
56 |     --tasks mmlu \
57 |     --device cuda \
58 |     --output_path ${OUTPUT_PATH}/${MODEL}_eval_mmlu \
59 |     --batch_size 1 \
60 |     --num_fewshot 5 \
61 |     --write_out \
62 |     2>&1 | tee ${OUTPUT_PATH}/${MODEL}_eval_mmlu.log
63 | 
64 | lm_eval --model $MODEL \
65 |     --model_args pretrained=${MODEL_NAME_OR_PATH},trust_remote_code=True,tokenizer_mode=${TOKENIZER_MODE},tensor_parallel_size=${NUM_GPUS},dtype=auto,gpu_memory_utilization=${GPU_MEMORY_UTILIZATION} \
66 |     --tasks winogrande \
67 |     --device cuda \
68 |     --output_path ${OUTPUT_PATH}/${MODEL}_eval_winogrande \
69 |     --batch_size 1 \
70 |     --num_fewshot 5 \
71 |     --write_out \
72 |     2>&1 | tee ${OUTPUT_PATH}/${MODEL}_eval_winogrande.log
73 | 
74 | lm_eval --model $MODEL \
75 |     --model_args pretrained=${MODEL_NAME_OR_PATH},trust_remote_code=True,tokenizer_mode=${TOKENIZER_MODE},tensor_parallel_size=${NUM_GPUS},dtype=auto,gpu_memory_utilization=${GPU_MEMORY_UTILIZATION} \
76 |     --tasks gsm8k \
77 |     --device cuda \
78 |     --output_path ${OUTPUT_PATH}/${MODEL}_eval_gsm8k \
79 |     --batch_size 1 \
80 |     --num_fewshot 5 \
81 |     --write_out \
82 |     2>&1 | tee ${OUTPUT_PATH}/${MODEL}_eval_gsm8k.log


--------------------------------------------------------------------------------
/scripts/Qwen1.5-7B/ruler_lm_eval.sh:
--------------------------------------------------------------------------------
 1 | set -ex
 2 | export CUDA_VISIBLE_DEVICES=4
 3 | export NUMEXPR_MAX_THREADS=128
 4 | 
 5 | MODEL=vllm
 6 | MODEL_NAME=ruler_Qwen1.5-7B
 7 | MODEL_NAME_OR_PATH=/home/lijiaming/workspace/Seed/Seed-Ruler/outputs/checkpoints/ruler_Qwen1.5-7B_bs_4_ga_8_lr_2e-5_eps_3/checkpoint-2841
 8 | OUTPUT_PATH=../outputs/other_tasks/${MODEL_NAME}
 9 | TOKENIZER_MODE=auto
10 | NUM_GPUS=1
11 | GPU_MEMORY_UTILIZATION=0.8
12 | 
13 | mkdir -p $OUTPUT_PATH
14 | 
15 | # lm_eval --model $MODEL \
16 | #     --model_args pretrained=${MODEL_NAME_OR_PATH},trust_remote_code=True,tokenizer_mode=${TOKENIZER_MODE},tensor_parallel_size=${NUM_GPUS},dtype=auto,gpu_memory_utilization=${GPU_MEMORY_UTILIZATION} \
17 | #     --tasks leaderboard \
18 | #     --device cuda \
19 | #     --output_path ${OUTPUT_PATH}/${MODEL}_eval_leaderboard \
20 | #     --batch_size 1 \
21 | #     --write_out \
22 | #     2>&1 | tee ${OUTPUT_PATH}/${MODEL}_eval_leaderboard.log
23 | 
24 | lm_eval --model $MODEL \
25 |     --model_args pretrained=${MODEL_NAME_OR_PATH},trust_remote_code=True,tokenizer_mode=${TOKENIZER_MODE},tensor_parallel_size=${NUM_GPUS},dtype=auto,gpu_memory_utilization=${GPU_MEMORY_UTILIZATION} \
26 |     --tasks ai2_arc \
27 |     --device cuda \
28 |     --output_path ${OUTPUT_PATH}/${MODEL}_eval_ai2_arc \
29 |     --batch_size 1 \
30 |     --num_fewshot 25 \
31 |     --write_out \
32 |     2>&1 | tee ${OUTPUT_PATH}/${MODEL}_eval_ai2_arc.log
33 | 
34 | lm_eval --model $MODEL \
35 |     --model_args pretrained=${MODEL_NAME_OR_PATH},trust_remote_code=True,tokenizer_mode=${TOKENIZER_MODE},tensor_parallel_size=${NUM_GPUS},dtype=auto,gpu_memory_utilization=${GPU_MEMORY_UTILIZATION} \
36 |     --tasks hellaswag \
37 |     --device cuda \
38 |     --output_path ${OUTPUT_PATH}/${MODEL}_eval_hellaswag \
39 |     --batch_size 1 \
40 |     --num_fewshot 10 \
41 |     --write_out \
42 |     2>&1 | tee ${OUTPUT_PATH}/${MODEL}_eval_hellaswag.log
43 | 
44 | lm_eval --model $MODEL \
45 |     --model_args pretrained=${MODEL_NAME_OR_PATH},trust_remote_code=True,tokenizer_mode=${TOKENIZER_MODE},tensor_parallel_size=${NUM_GPUS},dtype=auto,gpu_memory_utilization=${GPU_MEMORY_UTILIZATION} \
46 |     --tasks truthfulqa \
47 |     --device cuda \
48 |     --output_path ${OUTPUT_PATH}/${MODEL}_eval_truthfulqa \
49 |     --batch_size 1 \
50 |     --num_fewshot 0 \
51 |     --write_out \
52 |     2>&1 | tee ${OUTPUT_PATH}/${MODEL}_eval_truthfulqa.log
53 | 
54 | lm_eval --model $MODEL \
55 |     --model_args pretrained=${MODEL_NAME_OR_PATH},trust_remote_code=True,tokenizer_mode=${TOKENIZER_MODE},tensor_parallel_size=${NUM_GPUS},dtype=auto,gpu_memory_utilization=${GPU_MEMORY_UTILIZATION} \
56 |     --tasks mmlu \
57 |     --device cuda \
58 |     --output_path ${OUTPUT_PATH}/${MODEL}_eval_mmlu \
59 |     --batch_size 1 \
60 |     --num_fewshot 5 \
61 |     --write_out \
62 |     2>&1 | tee ${OUTPUT_PATH}/${MODEL}_eval_mmlu.log
63 | 
64 | lm_eval --model $MODEL \
65 |     --model_args pretrained=${MODEL_NAME_OR_PATH},trust_remote_code=True,tokenizer_mode=${TOKENIZER_MODE},tensor_parallel_size=${NUM_GPUS},dtype=auto,gpu_memory_utilization=${GPU_MEMORY_UTILIZATION} \
66 |     --tasks winogrande \
67 |     --device cuda \
68 |     --output_path ${OUTPUT_PATH}/${MODEL}_eval_winogrande \
69 |     --batch_size 1 \
70 |     --num_fewshot 5 \
71 |     --write_out \
72 |     2>&1 | tee ${OUTPUT_PATH}/${MODEL}_eval_winogrande.log
73 | 
74 | lm_eval --model $MODEL \
75 |     --model_args pretrained=${MODEL_NAME_OR_PATH},trust_remote_code=True,tokenizer_mode=${TOKENIZER_MODE},tensor_parallel_size=${NUM_GPUS},dtype=auto,gpu_memory_utilization=${GPU_MEMORY_UTILIZATION} \
76 |     --tasks gsm8k \
77 |     --device cuda \
78 |     --output_path ${OUTPUT_PATH}/${MODEL}_eval_gsm8k \
79 |     --batch_size 1 \
80 |     --num_fewshot 5 \
81 |     --write_out \
82 |     2>&1 | tee ${OUTPUT_PATH}/${MODEL}_eval_gsm8k.log


--------------------------------------------------------------------------------
/scripts/gemma-7b/vanilla_lm_eval.sh:
--------------------------------------------------------------------------------
 1 | set -ex
 2 | export CUDA_VISIBLE_DEVICES=3
 3 | export NUMEXPR_MAX_THREADS=128
 4 | 
 5 | MODEL=vllm
 6 | MODEL_NAME=vanilla_gemma-7b
 7 | MODEL_NAME_OR_PATH=/home/lijiaming/workspace/Seed/Seed-Ruler/outputs/checkpoints/vanilla_gemma-7b_bs_4_ga_8_lr_2e-5_eps_3/checkpoint-2841
 8 | OUTPUT_PATH=../outputs/other_tasks/${MODEL_NAME}
 9 | TOKENIZER_MODE=auto
10 | NUM_GPUS=1
11 | GPU_MEMORY_UTILIZATION=0.8
12 | 
13 | mkdir -p $OUTPUT_PATH
14 | 
15 | # lm_eval --model $MODEL \
16 | #     --model_args pretrained=${MODEL_NAME_OR_PATH},trust_remote_code=True,tokenizer_mode=${TOKENIZER_MODE},tensor_parallel_size=${NUM_GPUS},dtype=auto,gpu_memory_utilization=${GPU_MEMORY_UTILIZATION} \
17 | #     --tasks leaderboard \
18 | #     --device cuda \
19 | #     --output_path ${OUTPUT_PATH}/${MODEL}_eval_leaderboard \
20 | #     --batch_size 1 \
21 | #     --write_out \
22 | #     2>&1 | tee ${OUTPUT_PATH}/${MODEL}_eval_leaderboard.log
23 | 
24 | lm_eval --model $MODEL \
25 |     --model_args pretrained=${MODEL_NAME_OR_PATH},trust_remote_code=True,tokenizer_mode=${TOKENIZER_MODE},tensor_parallel_size=${NUM_GPUS},dtype=auto,gpu_memory_utilization=${GPU_MEMORY_UTILIZATION} \
26 |     --tasks ai2_arc \
27 |     --device cuda \
28 |     --output_path ${OUTPUT_PATH}/${MODEL}_eval_ai2_arc \
29 |     --batch_size 1 \
30 |     --num_fewshot 25 \
31 |     --write_out \
32 |     2>&1 | tee ${OUTPUT_PATH}/${MODEL}_eval_ai2_arc.log
33 | 
34 | lm_eval --model $MODEL \
35 |     --model_args pretrained=${MODEL_NAME_OR_PATH},trust_remote_code=True,tokenizer_mode=${TOKENIZER_MODE},tensor_parallel_size=${NUM_GPUS},dtype=auto,gpu_memory_utilization=${GPU_MEMORY_UTILIZATION} \
36 |     --tasks hellaswag \
37 |     --device cuda \
38 |     --output_path ${OUTPUT_PATH}/${MODEL}_eval_hellaswag \
39 |     --batch_size 1 \
40 |     --num_fewshot 10 \
41 |     --write_out \
42 |     2>&1 | tee ${OUTPUT_PATH}/${MODEL}_eval_hellaswag.log
43 | 
44 | lm_eval --model $MODEL \
45 |     --model_args pretrained=${MODEL_NAME_OR_PATH},trust_remote_code=True,tokenizer_mode=${TOKENIZER_MODE},tensor_parallel_size=${NUM_GPUS},dtype=auto,gpu_memory_utilization=${GPU_MEMORY_UTILIZATION} \
46 |     --tasks truthfulqa \
47 |     --device cuda \
48 |     --output_path ${OUTPUT_PATH}/${MODEL}_eval_truthfulqa \
49 |     --batch_size 1 \
50 |     --num_fewshot 0 \
51 |     --write_out \
52 |     2>&1 | tee ${OUTPUT_PATH}/${MODEL}_eval_truthfulqa.log
53 | 
54 | lm_eval --model $MODEL \
55 |     --model_args pretrained=${MODEL_NAME_OR_PATH},trust_remote_code=True,tokenizer_mode=${TOKENIZER_MODE},tensor_parallel_size=${NUM_GPUS},dtype=auto,gpu_memory_utilization=${GPU_MEMORY_UTILIZATION} \
56 |     --tasks mmlu \
57 |     --device cuda \
58 |     --output_path ${OUTPUT_PATH}/${MODEL}_eval_mmlu \
59 |     --batch_size 1 \
60 |     --num_fewshot 5 \
61 |     --write_out \
62 |     2>&1 | tee ${OUTPUT_PATH}/${MODEL}_eval_mmlu.log
63 | 
64 | lm_eval --model $MODEL \
65 |     --model_args pretrained=${MODEL_NAME_OR_PATH},trust_remote_code=True,tokenizer_mode=${TOKENIZER_MODE},tensor_parallel_size=${NUM_GPUS},dtype=auto,gpu_memory_utilization=${GPU_MEMORY_UTILIZATION} \
66 |     --tasks winogrande \
67 |     --device cuda \
68 |     --output_path ${OUTPUT_PATH}/${MODEL}_eval_winogrande \
69 |     --batch_size 1 \
70 |     --num_fewshot 5 \
71 |     --write_out \
72 |     2>&1 | tee ${OUTPUT_PATH}/${MODEL}_eval_winogrande.log
73 | 
74 | lm_eval --model $MODEL \
75 |     --model_args pretrained=${MODEL_NAME_OR_PATH},trust_remote_code=True,tokenizer_mode=${TOKENIZER_MODE},tensor_parallel_size=${NUM_GPUS},dtype=auto,gpu_memory_utilization=${GPU_MEMORY_UTILIZATION} \
76 |     --tasks gsm8k \
77 |     --device cuda \
78 |     --output_path ${OUTPUT_PATH}/${MODEL}_eval_gsm8k \
79 |     --batch_size 1 \
80 |     --num_fewshot 5 \
81 |     --write_out \
82 |     2>&1 | tee ${OUTPUT_PATH}/${MODEL}_eval_gsm8k.log


--------------------------------------------------------------------------------
/src/exp/cal_mlt_scores.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | from utils.config import RANGE, LEVEL0, LEVEL1, LEVEL2
 3 | from utils import load_jsonl
 4 | from rich.table import Table
 5 | from rich.console import Console
 6 | from utils.count import count_words
 7 | 
 8 | 
 9 | def metric_targetlength(df, LEVEL):
10 |     result = {
11 |         targetlength: {"PM": {"in": 0, "out": 0}, "FM": {"in": 0, "out": 0}}
12 |         for targetlength in LEVEL
13 |     }
14 |     for d in df:
15 |         length = count_words(d["output"])
16 |         if d["TargetLength"] in result:
17 |             # PM
18 |             if (
19 |                 length > RANGE[d["TargetLength"]]["PM"][0]
20 |                 and length <= RANGE[d["TargetLength"]]["PM"][1]
21 |             ):
22 |                 result[d["TargetLength"]]["PM"]["in"] += 1
23 |             else:
24 |                 result[d["TargetLength"]]["PM"]["out"] += 1
25 |             # FM
26 |             if (
27 |                 length > RANGE[d["TargetLength"]]["FM"][0]
28 |                 and length <= RANGE[d["TargetLength"]]["FM"][1]
29 |             ):
30 |                 result[d["TargetLength"]]["FM"]["in"] += 1
31 |             else:
32 |                 result[d["TargetLength"]]["FM"]["out"] += 1
33 |     # draw table
34 |     table = Table(show_header=True, header_style="bold magenta")
35 |     table.add_column("TargetLength", style="dim", width=12)
36 |     table.add_column("PM_in", justify="right")
37 |     table.add_column("PM_out", justify="right")
38 |     table.add_column("PM", justify="right")
39 |     table.add_column("FM_in", justify="right")
40 |     table.add_column("FM_out", justify="right")
41 |     table.add_column("FM", justify="right")
42 |     # latex_str = ""
43 |     for key in result:
44 |         table.add_row(
45 |             key,
46 |             f"{result[key]['PM']['in']}",
47 |             f"{result[key]['PM']['out']}",
48 |             f"{result[key]['PM']['in'] / (result[key]['PM']['in'] + result[key]['PM']['out'])*100:.2f}",
49 |             f"{result[key]['FM']['in']}",
50 |             f"{result[key]['FM']['out']}",
51 |             f"{result[key]['FM']['in'] / (result[key]['FM']['in'] + result[key]['FM']['out'])*100:.2f}",
52 |         )
53 |         # latex_str = (
54 |         #     latex_str
55 |         #     + "&"
56 |         #     + f"{result[key]['PM']['in'] / (result[key]['PM']['in'] + result[key]['PM']['out'])*100:.2f}"
57 |         #     + "&"
58 |         #     + f"{result[key]['FM']['in'] / (result[key]['FM']['in'] + result[key]['FM']['out'])*100:.2f}"
59 |         # )
60 |     table.add_row(
61 |         "Total",
62 |         f"{sum([result[key]['PM']['in']for key in result])}",
63 |         f"{sum([result[key]['PM']['out']for key in result])}",
64 |         f"{sum([result[key]['PM']['in']for key in result]) / (sum([result[key]['PM']['in']for key in result]) + sum([result[key]['PM']['out']for key in result]))*100:.2f}",
65 |         f"{sum([result[key]['FM']['in']for key in result])}",
66 |         f"{sum([result[key]['FM']['out']for key in result])}",
67 |         f"{sum([result[key]['FM']['in']for key in result]) / (sum([result[key]['FM']['in']for key in result]) + sum([result[key]['FM']['out']for key in result]))*100:.2f}",
68 |     )
69 |     console = Console()
70 |     console.print(table)
71 |     # print(latex_str)
72 | 
73 | 
74 | def main(args):
75 |     # raw data load
76 |     df = load_jsonl(args.dataset_path)
77 |     print(f"> LEVEL0{'='*20}")
78 |     metric_targetlength(df, LEVEL0)
79 |     print(f"> LEVEL1{'='*20}")
80 |     metric_targetlength(df, LEVEL1)
81 |     print(f"> LEVEL2{'='*20}")
82 |     metric_targetlength(df, LEVEL2)
83 | 
84 | 
85 | if __name__ == "__main__":
86 |     parser = argparse.ArgumentParser()
87 |     parser.add_argument("--dataset_path", type=str, default=None)
88 |     args = parser.parse_args()
89 |     main(args)
90 | 


--------------------------------------------------------------------------------
/scripts/Qwen1.5-7B/vanilla_lm_eval.sh:
--------------------------------------------------------------------------------
 1 | set -ex
 2 | export CUDA_VISIBLE_DEVICES=5
 3 | export NUMEXPR_MAX_THREADS=128
 4 | 
 5 | MODEL=vllm
 6 | MODEL_NAME=vanilla_Qwen1.5-7B
 7 | MODEL_NAME_OR_PATH=/home/lijiaming/workspace/Seed/Seed-Ruler/outputs/checkpoints/vanilla_Qwen1.5-7B_bs_4_ga_8_lr_2e-5_eps_3/checkpoint-2841
 8 | OUTPUT_PATH=../outputs/other_tasks/${MODEL_NAME}
 9 | TOKENIZER_MODE=auto
10 | NUM_GPUS=1
11 | GPU_MEMORY_UTILIZATION=0.8
12 | 
13 | mkdir -p $OUTPUT_PATH
14 | 
15 | # lm_eval --model $MODEL \
16 | #     --model_args pretrained=${MODEL_NAME_OR_PATH},trust_remote_code=True,tokenizer_mode=${TOKENIZER_MODE},tensor_parallel_size=${NUM_GPUS},dtype=auto,gpu_memory_utilization=${GPU_MEMORY_UTILIZATION} \
17 | #     --tasks leaderboard \
18 | #     --device cuda \
19 | #     --output_path ${OUTPUT_PATH}/${MODEL}_eval_leaderboard \
20 | #     --batch_size 1 \
21 | #     --write_out \
22 | #     2>&1 | tee ${OUTPUT_PATH}/${MODEL}_eval_leaderboard.log
23 | 
24 | lm_eval --model $MODEL \
25 |     --model_args pretrained=${MODEL_NAME_OR_PATH},trust_remote_code=True,tokenizer_mode=${TOKENIZER_MODE},tensor_parallel_size=${NUM_GPUS},dtype=auto,gpu_memory_utilization=${GPU_MEMORY_UTILIZATION} \
26 |     --tasks ai2_arc \
27 |     --device cuda \
28 |     --output_path ${OUTPUT_PATH}/${MODEL}_eval_ai2_arc \
29 |     --batch_size 1 \
30 |     --num_fewshot 25 \
31 |     --write_out \
32 |     2>&1 | tee ${OUTPUT_PATH}/${MODEL}_eval_ai2_arc.log
33 | 
34 | lm_eval --model $MODEL \
35 |     --model_args pretrained=${MODEL_NAME_OR_PATH},trust_remote_code=True,tokenizer_mode=${TOKENIZER_MODE},tensor_parallel_size=${NUM_GPUS},dtype=auto,gpu_memory_utilization=${GPU_MEMORY_UTILIZATION} \
36 |     --tasks hellaswag \
37 |     --device cuda \
38 |     --output_path ${OUTPUT_PATH}/${MODEL}_eval_hellaswag \
39 |     --batch_size 1 \
40 |     --num_fewshot 10 \
41 |     --write_out \
42 |     2>&1 | tee ${OUTPUT_PATH}/${MODEL}_eval_hellaswag.log
43 | 
44 | lm_eval --model $MODEL \
45 |     --model_args pretrained=${MODEL_NAME_OR_PATH},trust_remote_code=True,tokenizer_mode=${TOKENIZER_MODE},tensor_parallel_size=${NUM_GPUS},dtype=auto,gpu_memory_utilization=${GPU_MEMORY_UTILIZATION} \
46 |     --tasks truthfulqa \
47 |     --device cuda \
48 |     --output_path ${OUTPUT_PATH}/${MODEL}_eval_truthfulqa \
49 |     --batch_size 1 \
50 |     --num_fewshot 0 \
51 |     --write_out \
52 |     2>&1 | tee ${OUTPUT_PATH}/${MODEL}_eval_truthfulqa.log
53 | 
54 | lm_eval --model $MODEL \
55 |     --model_args pretrained=${MODEL_NAME_OR_PATH},trust_remote_code=True,tokenizer_mode=${TOKENIZER_MODE},tensor_parallel_size=${NUM_GPUS},dtype=auto,gpu_memory_utilization=${GPU_MEMORY_UTILIZATION} \
56 |     --tasks mmlu \
57 |     --device cuda \
58 |     --output_path ${OUTPUT_PATH}/${MODEL}_eval_mmlu \
59 |     --batch_size 1 \
60 |     --num_fewshot 5 \
61 |     --write_out \
62 |     2>&1 | tee ${OUTPUT_PATH}/${MODEL}_eval_mmlu.log
63 | 
64 | lm_eval --model $MODEL \
65 |     --model_args pretrained=${MODEL_NAME_OR_PATH},trust_remote_code=True,tokenizer_mode=${TOKENIZER_MODE},tensor_parallel_size=${NUM_GPUS},dtype=auto,gpu_memory_utilization=${GPU_MEMORY_UTILIZATION} \
66 |     --tasks winogrande \
67 |     --device cuda \
68 |     --output_path ${OUTPUT_PATH}/${MODEL}_eval_winogrande \
69 |     --batch_size 1 \
70 |     --num_fewshot 5 \
71 |     --write_out \
72 |     2>&1 | tee ${OUTPUT_PATH}/${MODEL}_eval_winogrande.log
73 | 
74 | lm_eval --model $MODEL \
75 |     --model_args pretrained=${MODEL_NAME_OR_PATH},trust_remote_code=True,tokenizer_mode=${TOKENIZER_MODE},tensor_parallel_size=${NUM_GPUS},dtype=auto,gpu_memory_utilization=${GPU_MEMORY_UTILIZATION} \
76 |     --tasks gsm8k \
77 |     --device cuda \
78 |     --output_path ${OUTPUT_PATH}/${MODEL}_eval_gsm8k \
79 |     --batch_size 1 \
80 |     --num_fewshot 5 \
81 |     --write_out \
82 |     2>&1 | tee ${OUTPUT_PATH}/${MODEL}_eval_gsm8k.log


--------------------------------------------------------------------------------
/scripts/Meta-Llama-3-8B/ruler_lm_eval.sh:
--------------------------------------------------------------------------------
 1 | set -ex
 2 | export CUDA_VISIBLE_DEVICES=0
 3 | export NUMEXPR_MAX_THREADS=128
 4 | 
 5 | MODEL=vllm
 6 | MODEL_NAME=ruler_Meta-Llama-3-8B
 7 | MODEL_NAME_OR_PATH=/home/lijiaming/workspace/Seed/Seed-Ruler/outputs/checkpoints/ruler_Meta-Llama-3-8B_bs_4_ga_8_lr_2e-5_eps_3/checkpoint-2841
 8 | OUTPUT_PATH=../outputs/other_tasks/${MODEL_NAME}
 9 | TOKENIZER_MODE=auto
10 | NUM_GPUS=1
11 | GPU_MEMORY_UTILIZATION=0.8
12 | 
13 | mkdir -p $OUTPUT_PATH
14 | 
15 | # lm_eval --model $MODEL \
16 | #     --model_args pretrained=${MODEL_NAME_OR_PATH},trust_remote_code=True,tokenizer_mode=${TOKENIZER_MODE},tensor_parallel_size=${NUM_GPUS},dtype=auto,gpu_memory_utilization=${GPU_MEMORY_UTILIZATION} \
17 | #     --tasks leaderboard \
18 | #     --device cuda \
19 | #     --output_path ${OUTPUT_PATH}/${MODEL}_eval_leaderboard \
20 | #     --batch_size 1 \
21 | #     --write_out \
22 | #     2>&1 | tee ${OUTPUT_PATH}/${MODEL}_eval_leaderboard.log
23 | 
24 | lm_eval --model $MODEL \
25 |     --model_args pretrained=${MODEL_NAME_OR_PATH},trust_remote_code=True,tokenizer_mode=${TOKENIZER_MODE},tensor_parallel_size=${NUM_GPUS},dtype=auto,gpu_memory_utilization=${GPU_MEMORY_UTILIZATION} \
26 |     --tasks ai2_arc \
27 |     --device cuda \
28 |     --output_path ${OUTPUT_PATH}/${MODEL}_eval_ai2_arc \
29 |     --batch_size 1 \
30 |     --num_fewshot 25 \
31 |     --write_out \
32 |     2>&1 | tee ${OUTPUT_PATH}/${MODEL}_eval_ai2_arc.log
33 | 
34 | lm_eval --model $MODEL \
35 |     --model_args pretrained=${MODEL_NAME_OR_PATH},trust_remote_code=True,tokenizer_mode=${TOKENIZER_MODE},tensor_parallel_size=${NUM_GPUS},dtype=auto,gpu_memory_utilization=${GPU_MEMORY_UTILIZATION} \
36 |     --tasks hellaswag \
37 |     --device cuda \
38 |     --output_path ${OUTPUT_PATH}/${MODEL}_eval_hellaswag \
39 |     --batch_size 1 \
40 |     --num_fewshot 10 \
41 |     --write_out \
42 |     2>&1 | tee ${OUTPUT_PATH}/${MODEL}_eval_hellaswag.log
43 | 
44 | lm_eval --model $MODEL \
45 |     --model_args pretrained=${MODEL_NAME_OR_PATH},trust_remote_code=True,tokenizer_mode=${TOKENIZER_MODE},tensor_parallel_size=${NUM_GPUS},dtype=auto,gpu_memory_utilization=${GPU_MEMORY_UTILIZATION} \
46 |     --tasks truthfulqa \
47 |     --device cuda \
48 |     --output_path ${OUTPUT_PATH}/${MODEL}_eval_truthfulqa \
49 |     --batch_size 1 \
50 |     --num_fewshot 0 \
51 |     --write_out \
52 |     2>&1 | tee ${OUTPUT_PATH}/${MODEL}_eval_truthfulqa.log
53 | 
54 | lm_eval --model $MODEL \
55 |     --model_args pretrained=${MODEL_NAME_OR_PATH},trust_remote_code=True,tokenizer_mode=${TOKENIZER_MODE},tensor_parallel_size=${NUM_GPUS},dtype=auto,gpu_memory_utilization=${GPU_MEMORY_UTILIZATION} \
56 |     --tasks mmlu \
57 |     --device cuda \
58 |     --output_path ${OUTPUT_PATH}/${MODEL}_eval_mmlu \
59 |     --batch_size 1 \
60 |     --num_fewshot 5 \
61 |     --write_out \
62 |     2>&1 | tee ${OUTPUT_PATH}/${MODEL}_eval_mmlu.log
63 | 
64 | lm_eval --model $MODEL \
65 |     --model_args pretrained=${MODEL_NAME_OR_PATH},trust_remote_code=True,tokenizer_mode=${TOKENIZER_MODE},tensor_parallel_size=${NUM_GPUS},dtype=auto,gpu_memory_utilization=${GPU_MEMORY_UTILIZATION} \
66 |     --tasks winogrande \
67 |     --device cuda \
68 |     --output_path ${OUTPUT_PATH}/${MODEL}_eval_winogrande \
69 |     --batch_size 1 \
70 |     --num_fewshot 5 \
71 |     --write_out \
72 |     2>&1 | tee ${OUTPUT_PATH}/${MODEL}_eval_winogrande.log
73 | 
74 | lm_eval --model $MODEL \
75 |     --model_args pretrained=${MODEL_NAME_OR_PATH},trust_remote_code=True,tokenizer_mode=${TOKENIZER_MODE},tensor_parallel_size=${NUM_GPUS},dtype=auto,gpu_memory_utilization=${GPU_MEMORY_UTILIZATION} \
76 |     --tasks gsm8k \
77 |     --device cuda \
78 |     --output_path ${OUTPUT_PATH}/${MODEL}_eval_gsm8k \
79 |     --batch_size 1 \
80 |     --num_fewshot 5 \
81 |     --write_out \
82 |     2>&1 | tee ${OUTPUT_PATH}/${MODEL}_eval_gsm8k.log


--------------------------------------------------------------------------------
/scripts/Mistral-7B-v0.3/ruler_lm_eval.sh:
--------------------------------------------------------------------------------
 1 | set -ex
 2 | export CUDA_VISIBLE_DEVICES=0
 3 | export NUMEXPR_MAX_THREADS=128
 4 | 
 5 | MODEL=vllm
 6 | MODEL_NAME=ruler_Mistral-7B-v0.3
 7 | MODEL_NAME_OR_PATH=/home/lijiaming/workspace/Seed/Seed-Ruler/outputs/checkpoints/ruler_Mistral-7B-v0.3_bs_4_ga_8_lr_2e-5_eps_3/checkpoint-2841
 8 | OUTPUT_PATH=../outputs/other_tasks/${MODEL_NAME}
 9 | TOKENIZER_MODE=auto
10 | NUM_GPUS=1
11 | GPU_MEMORY_UTILIZATION=0.8
12 | 
13 | mkdir -p $OUTPUT_PATH
14 | 
15 | # lm_eval --model $MODEL \
16 | #     --model_args pretrained=${MODEL_NAME_OR_PATH},trust_remote_code=True,tokenizer_mode=${TOKENIZER_MODE},tensor_parallel_size=${NUM_GPUS},dtype=auto,gpu_memory_utilization=${GPU_MEMORY_UTILIZATION} \
17 | #     --tasks leaderboard \
18 | #     --device cuda \
19 | #     --output_path ${OUTPUT_PATH}/${MODEL}_eval_leaderboard \
20 | #     --batch_size 1 \
21 | #     --write_out \
22 | #     2>&1 | tee ${OUTPUT_PATH}/${MODEL}_eval_leaderboard.log
23 | 
24 | lm_eval --model $MODEL \
25 |     --model_args pretrained=${MODEL_NAME_OR_PATH},trust_remote_code=True,tokenizer_mode=${TOKENIZER_MODE},tensor_parallel_size=${NUM_GPUS},dtype=auto,gpu_memory_utilization=${GPU_MEMORY_UTILIZATION} \
26 |     --tasks ai2_arc \
27 |     --device cuda \
28 |     --output_path ${OUTPUT_PATH}/${MODEL}_eval_ai2_arc \
29 |     --batch_size 1 \
30 |     --num_fewshot 25 \
31 |     --write_out \
32 |     2>&1 | tee ${OUTPUT_PATH}/${MODEL}_eval_ai2_arc.log
33 | 
34 | lm_eval --model $MODEL \
35 |     --model_args pretrained=${MODEL_NAME_OR_PATH},trust_remote_code=True,tokenizer_mode=${TOKENIZER_MODE},tensor_parallel_size=${NUM_GPUS},dtype=auto,gpu_memory_utilization=${GPU_MEMORY_UTILIZATION} \
36 |     --tasks hellaswag \
37 |     --device cuda \
38 |     --output_path ${OUTPUT_PATH}/${MODEL}_eval_hellaswag \
39 |     --batch_size 1 \
40 |     --num_fewshot 10 \
41 |     --write_out \
42 |     2>&1 | tee ${OUTPUT_PATH}/${MODEL}_eval_hellaswag.log
43 | 
44 | lm_eval --model $MODEL \
45 |     --model_args pretrained=${MODEL_NAME_OR_PATH},trust_remote_code=True,tokenizer_mode=${TOKENIZER_MODE},tensor_parallel_size=${NUM_GPUS},dtype=auto,gpu_memory_utilization=${GPU_MEMORY_UTILIZATION} \
46 |     --tasks truthfulqa \
47 |     --device cuda \
48 |     --output_path ${OUTPUT_PATH}/${MODEL}_eval_truthfulqa \
49 |     --batch_size 1 \
50 |     --num_fewshot 0 \
51 |     --write_out \
52 |     2>&1 | tee ${OUTPUT_PATH}/${MODEL}_eval_truthfulqa.log
53 | 
54 | lm_eval --model $MODEL \
55 |     --model_args pretrained=${MODEL_NAME_OR_PATH},trust_remote_code=True,tokenizer_mode=${TOKENIZER_MODE},tensor_parallel_size=${NUM_GPUS},dtype=auto,gpu_memory_utilization=${GPU_MEMORY_UTILIZATION} \
56 |     --tasks mmlu \
57 |     --device cuda \
58 |     --output_path ${OUTPUT_PATH}/${MODEL}_eval_mmlu \
59 |     --batch_size 1 \
60 |     --num_fewshot 5 \
61 |     --write_out \
62 |     2>&1 | tee ${OUTPUT_PATH}/${MODEL}_eval_mmlu.log
63 | 
64 | lm_eval --model $MODEL \
65 |     --model_args pretrained=${MODEL_NAME_OR_PATH},trust_remote_code=True,tokenizer_mode=${TOKENIZER_MODE},tensor_parallel_size=${NUM_GPUS},dtype=auto,gpu_memory_utilization=${GPU_MEMORY_UTILIZATION} \
66 |     --tasks winogrande \
67 |     --device cuda \
68 |     --output_path ${OUTPUT_PATH}/${MODEL}_eval_winogrande \
69 |     --batch_size 1 \
70 |     --num_fewshot 5 \
71 |     --write_out \
72 |     2>&1 | tee ${OUTPUT_PATH}/${MODEL}_eval_winogrande.log
73 | 
74 | lm_eval --model $MODEL \
75 |     --model_args pretrained=${MODEL_NAME_OR_PATH},trust_remote_code=True,tokenizer_mode=${TOKENIZER_MODE},tensor_parallel_size=${NUM_GPUS},dtype=auto,gpu_memory_utilization=${GPU_MEMORY_UTILIZATION} \
76 |     --tasks gsm8k \
77 |     --device cuda \
78 |     --output_path ${OUTPUT_PATH}/${MODEL}_eval_gsm8k \
79 |     --batch_size 1 \
80 |     --num_fewshot 5 \
81 |     --write_out \
82 |     2>&1 | tee ${OUTPUT_PATH}/${MODEL}_eval_gsm8k.log


--------------------------------------------------------------------------------
/scripts/Meta-Llama-3-8B/vanilla_lm_eval.sh:
--------------------------------------------------------------------------------
 1 | set -ex
 2 | export CUDA_VISIBLE_DEVICES=0
 3 | export NUMEXPR_MAX_THREADS=128
 4 | 
 5 | MODEL=vllm
 6 | MODEL_NAME=vanilla_Meta-Llama-3-8B
 7 | MODEL_NAME_OR_PATH=/home/lijiaming/workspace/Seed/Seed-Ruler/outputs/checkpoints/vanilla_Meta-Llama-3-8B_bs_4_ga_8_lr_2e-5_eps_3/checkpoint-2841
 8 | OUTPUT_PATH=../outputs/other_tasks/${MODEL_NAME}
 9 | TOKENIZER_MODE=auto
10 | NUM_GPUS=1
11 | GPU_MEMORY_UTILIZATION=0.8
12 | 
13 | mkdir -p $OUTPUT_PATH
14 | 
15 | # lm_eval --model $MODEL \
16 | #     --model_args pretrained=${MODEL_NAME_OR_PATH},trust_remote_code=True,tokenizer_mode=${TOKENIZER_MODE},tensor_parallel_size=${NUM_GPUS},dtype=auto,gpu_memory_utilization=${GPU_MEMORY_UTILIZATION} \
17 | #     --tasks leaderboard \
18 | #     --device cuda \
19 | #     --output_path ${OUTPUT_PATH}/${MODEL}_eval_leaderboard \
20 | #     --batch_size 1 \
21 | #     --write_out \
22 | #     2>&1 | tee ${OUTPUT_PATH}/${MODEL}_eval_leaderboard.log
23 | 
24 | lm_eval --model $MODEL \
25 |     --model_args pretrained=${MODEL_NAME_OR_PATH},trust_remote_code=True,tokenizer_mode=${TOKENIZER_MODE},tensor_parallel_size=${NUM_GPUS},dtype=auto,gpu_memory_utilization=${GPU_MEMORY_UTILIZATION} \
26 |     --tasks ai2_arc \
27 |     --device cuda \
28 |     --output_path ${OUTPUT_PATH}/${MODEL}_eval_ai2_arc \
29 |     --batch_size 1 \
30 |     --num_fewshot 25 \
31 |     --write_out \
32 |     2>&1 | tee ${OUTPUT_PATH}/${MODEL}_eval_ai2_arc.log
33 | 
34 | lm_eval --model $MODEL \
35 |     --model_args pretrained=${MODEL_NAME_OR_PATH},trust_remote_code=True,tokenizer_mode=${TOKENIZER_MODE},tensor_parallel_size=${NUM_GPUS},dtype=auto,gpu_memory_utilization=${GPU_MEMORY_UTILIZATION} \
36 |     --tasks hellaswag \
37 |     --device cuda \
38 |     --output_path ${OUTPUT_PATH}/${MODEL}_eval_hellaswag \
39 |     --batch_size 1 \
40 |     --num_fewshot 10 \
41 |     --write_out \
42 |     2>&1 | tee ${OUTPUT_PATH}/${MODEL}_eval_hellaswag.log
43 | 
44 | lm_eval --model $MODEL \
45 |     --model_args pretrained=${MODEL_NAME_OR_PATH},trust_remote_code=True,tokenizer_mode=${TOKENIZER_MODE},tensor_parallel_size=${NUM_GPUS},dtype=auto,gpu_memory_utilization=${GPU_MEMORY_UTILIZATION} \
46 |     --tasks truthfulqa \
47 |     --device cuda \
48 |     --output_path ${OUTPUT_PATH}/${MODEL}_eval_truthfulqa \
49 |     --batch_size 1 \
50 |     --num_fewshot 0 \
51 |     --write_out \
52 |     2>&1 | tee ${OUTPUT_PATH}/${MODEL}_eval_truthfulqa.log
53 | 
54 | lm_eval --model $MODEL \
55 |     --model_args pretrained=${MODEL_NAME_OR_PATH},trust_remote_code=True,tokenizer_mode=${TOKENIZER_MODE},tensor_parallel_size=${NUM_GPUS},dtype=auto,gpu_memory_utilization=${GPU_MEMORY_UTILIZATION} \
56 |     --tasks mmlu \
57 |     --device cuda \
58 |     --output_path ${OUTPUT_PATH}/${MODEL}_eval_mmlu \
59 |     --batch_size 1 \
60 |     --num_fewshot 5 \
61 |     --write_out \
62 |     2>&1 | tee ${OUTPUT_PATH}/${MODEL}_eval_mmlu.log
63 | 
64 | lm_eval --model $MODEL \
65 |     --model_args pretrained=${MODEL_NAME_OR_PATH},trust_remote_code=True,tokenizer_mode=${TOKENIZER_MODE},tensor_parallel_size=${NUM_GPUS},dtype=auto,gpu_memory_utilization=${GPU_MEMORY_UTILIZATION} \
66 |     --tasks winogrande \
67 |     --device cuda \
68 |     --output_path ${OUTPUT_PATH}/${MODEL}_eval_winogrande \
69 |     --batch_size 1 \
70 |     --num_fewshot 5 \
71 |     --write_out \
72 |     2>&1 | tee ${OUTPUT_PATH}/${MODEL}_eval_winogrande.log
73 | 
74 | lm_eval --model $MODEL \
75 |     --model_args pretrained=${MODEL_NAME_OR_PATH},trust_remote_code=True,tokenizer_mode=${TOKENIZER_MODE},tensor_parallel_size=${NUM_GPUS},dtype=auto,gpu_memory_utilization=${GPU_MEMORY_UTILIZATION} \
76 |     --tasks gsm8k \
77 |     --device cuda \
78 |     --output_path ${OUTPUT_PATH}/${MODEL}_eval_gsm8k \
79 |     --batch_size 1 \
80 |     --num_fewshot 5 \
81 |     --write_out \
82 |     2>&1 | tee ${OUTPUT_PATH}/${MODEL}_eval_gsm8k.log


--------------------------------------------------------------------------------
/scripts/deepseek-llm-7b-base/ruler_lm_eval.sh:
--------------------------------------------------------------------------------
 1 | set -ex
 2 | export CUDA_VISIBLE_DEVICES=1
 3 | export NUMEXPR_MAX_THREADS=128
 4 | 
 5 | MODEL=vllm
 6 | MODEL_NAME=ruler_deepseek-llm-7b-base
 7 | MODEL_NAME_OR_PATH=/home/lijiaming/workspace/Seed/Seed-Ruler/outputs/checkpoints/ruler_deepseek-llm-7b-base_bs_4_ga_8_lr_2e-5_eps_3/checkpoint-2841
 8 | OUTPUT_PATH=../outputs/overall_performance/${MODEL_NAME}
 9 | TOKENIZER_MODE=auto
10 | NUM_GPUS=1
11 | GPU_MEMORY_UTILIZATION=0.8
12 | 
13 | mkdir -p $OUTPUT_PATH
14 | 
15 | # lm_eval --model $MODEL \
16 | #     --model_args pretrained=${MODEL_NAME_OR_PATH},trust_remote_code=True,tokenizer_mode=${TOKENIZER_MODE},tensor_parallel_size=${NUM_GPUS},dtype=auto,gpu_memory_utilization=${GPU_MEMORY_UTILIZATION} \
17 | #     --tasks leaderboard \
18 | #     --device cuda \
19 | #     --output_path ${OUTPUT_PATH}/${MODEL}_eval_leaderboard \
20 | #     --batch_size 1 \
21 | #     --write_out \
22 | #     2>&1 | tee ${OUTPUT_PATH}/${MODEL}_eval_leaderboard.log
23 | 
24 | lm_eval --model $MODEL \
25 |     --model_args pretrained=${MODEL_NAME_OR_PATH},trust_remote_code=True,tokenizer_mode=${TOKENIZER_MODE},tensor_parallel_size=${NUM_GPUS},dtype=auto,gpu_memory_utilization=${GPU_MEMORY_UTILIZATION} \
26 |     --tasks ai2_arc \
27 |     --device cuda \
28 |     --output_path ${OUTPUT_PATH}/${MODEL}_eval_ai2_arc \
29 |     --batch_size 1 \
30 |     --num_fewshot 25 \
31 |     --write_out \
32 |     2>&1 | tee ${OUTPUT_PATH}/${MODEL}_eval_ai2_arc.log
33 | 
34 | lm_eval --model $MODEL \
35 |     --model_args pretrained=${MODEL_NAME_OR_PATH},trust_remote_code=True,tokenizer_mode=${TOKENIZER_MODE},tensor_parallel_size=${NUM_GPUS},dtype=auto,gpu_memory_utilization=${GPU_MEMORY_UTILIZATION} \
36 |     --tasks hellaswag \
37 |     --device cuda \
38 |     --output_path ${OUTPUT_PATH}/${MODEL}_eval_hellaswag \
39 |     --batch_size 1 \
40 |     --num_fewshot 10 \
41 |     --write_out \
42 |     2>&1 | tee ${OUTPUT_PATH}/${MODEL}_eval_hellaswag.log
43 | 
44 | lm_eval --model $MODEL \
45 |     --model_args pretrained=${MODEL_NAME_OR_PATH},trust_remote_code=True,tokenizer_mode=${TOKENIZER_MODE},tensor_parallel_size=${NUM_GPUS},dtype=auto,gpu_memory_utilization=${GPU_MEMORY_UTILIZATION} \
46 |     --tasks truthfulqa \
47 |     --device cuda \
48 |     --output_path ${OUTPUT_PATH}/${MODEL}_eval_truthfulqa \
49 |     --batch_size 1 \
50 |     --num_fewshot 0 \
51 |     --write_out \
52 |     2>&1 | tee ${OUTPUT_PATH}/${MODEL}_eval_truthfulqa.log
53 | 
54 | lm_eval --model $MODEL \
55 |     --model_args pretrained=${MODEL_NAME_OR_PATH},trust_remote_code=True,tokenizer_mode=${TOKENIZER_MODE},tensor_parallel_size=${NUM_GPUS},dtype=auto,gpu_memory_utilization=${GPU_MEMORY_UTILIZATION} \
56 |     --tasks mmlu \
57 |     --device cuda \
58 |     --output_path ${OUTPUT_PATH}/${MODEL}_eval_mmlu \
59 |     --batch_size 1 \
60 |     --num_fewshot 5 \
61 |     --write_out \
62 |     2>&1 | tee ${OUTPUT_PATH}/${MODEL}_eval_mmlu.log
63 | 
64 | lm_eval --model $MODEL \
65 |     --model_args pretrained=${MODEL_NAME_OR_PATH},trust_remote_code=True,tokenizer_mode=${TOKENIZER_MODE},tensor_parallel_size=${NUM_GPUS},dtype=auto,gpu_memory_utilization=${GPU_MEMORY_UTILIZATION} \
66 |     --tasks winogrande \
67 |     --device cuda \
68 |     --output_path ${OUTPUT_PATH}/${MODEL}_eval_winogrande \
69 |     --batch_size 1 \
70 |     --num_fewshot 5 \
71 |     --write_out \
72 |     2>&1 | tee ${OUTPUT_PATH}/${MODEL}_eval_winogrande.log
73 | 
74 | lm_eval --model $MODEL \
75 |     --model_args pretrained=${MODEL_NAME_OR_PATH},trust_remote_code=True,tokenizer_mode=${TOKENIZER_MODE},tensor_parallel_size=${NUM_GPUS},dtype=auto,gpu_memory_utilization=${GPU_MEMORY_UTILIZATION} \
76 |     --tasks gsm8k \
77 |     --device cuda \
78 |     --output_path ${OUTPUT_PATH}/${MODEL}_eval_gsm8k \
79 |     --batch_size 1 \
80 |     --num_fewshot 5 \
81 |     --write_out \
82 |     2>&1 | tee ${OUTPUT_PATH}/${MODEL}_eval_gsm8k.log


--------------------------------------------------------------------------------
/scripts/deepseek-llm-7b-base/vanilla_lm_eval.sh:
--------------------------------------------------------------------------------
 1 | set -ex
 2 | export CUDA_VISIBLE_DEVICES=1
 3 | export NUMEXPR_MAX_THREADS=128
 4 | 
 5 | MODEL=vllm
 6 | MODEL_NAME=vanilla_deepseek-llm-7b-base
 7 | MODEL_NAME_OR_PATH=/home/lijiaming/workspace/Seed/Seed-Ruler/outputs/checkpoints/vanilla_deepseek-llm-7b-base_bs_4_ga_8_lr_2e-5_eps_3/checkpoint-2841
 8 | OUTPUT_PATH=../outputs/other_tasks/${MODEL_NAME}
 9 | TOKENIZER_MODE=auto
10 | NUM_GPUS=1
11 | GPU_MEMORY_UTILIZATION=0.8
12 | 
13 | mkdir -p $OUTPUT_PATH
14 | 
15 | # lm_eval --model $MODEL \
16 | #     --model_args pretrained=${MODEL_NAME_OR_PATH},trust_remote_code=True,tokenizer_mode=${TOKENIZER_MODE},tensor_parallel_size=${NUM_GPUS},dtype=auto,gpu_memory_utilization=${GPU_MEMORY_UTILIZATION} \
17 | #     --tasks leaderboard \
18 | #     --device cuda \
19 | #     --output_path ${OUTPUT_PATH}/${MODEL}_eval_leaderboard \
20 | #     --batch_size 1 \
21 | #     --write_out \
22 | #     2>&1 | tee ${OUTPUT_PATH}/${MODEL}_eval_leaderboard.log
23 | 
24 | lm_eval --model $MODEL \
25 |     --model_args pretrained=${MODEL_NAME_OR_PATH},trust_remote_code=True,tokenizer_mode=${TOKENIZER_MODE},tensor_parallel_size=${NUM_GPUS},dtype=auto,gpu_memory_utilization=${GPU_MEMORY_UTILIZATION} \
26 |     --tasks ai2_arc \
27 |     --device cuda \
28 |     --output_path ${OUTPUT_PATH}/${MODEL}_eval_ai2_arc \
29 |     --batch_size 1 \
30 |     --num_fewshot 25 \
31 |     --write_out \
32 |     2>&1 | tee ${OUTPUT_PATH}/${MODEL}_eval_ai2_arc.log
33 | 
34 | lm_eval --model $MODEL \
35 |     --model_args pretrained=${MODEL_NAME_OR_PATH},trust_remote_code=True,tokenizer_mode=${TOKENIZER_MODE},tensor_parallel_size=${NUM_GPUS},dtype=auto,gpu_memory_utilization=${GPU_MEMORY_UTILIZATION} \
36 |     --tasks hellaswag \
37 |     --device cuda \
38 |     --output_path ${OUTPUT_PATH}/${MODEL}_eval_hellaswag \
39 |     --batch_size 1 \
40 |     --num_fewshot 10 \
41 |     --write_out \
42 |     2>&1 | tee ${OUTPUT_PATH}/${MODEL}_eval_hellaswag.log
43 | 
44 | lm_eval --model $MODEL \
45 |     --model_args pretrained=${MODEL_NAME_OR_PATH},trust_remote_code=True,tokenizer_mode=${TOKENIZER_MODE},tensor_parallel_size=${NUM_GPUS},dtype=auto,gpu_memory_utilization=${GPU_MEMORY_UTILIZATION} \
46 |     --tasks truthfulqa \
47 |     --device cuda \
48 |     --output_path ${OUTPUT_PATH}/${MODEL}_eval_truthfulqa \
49 |     --batch_size 1 \
50 |     --num_fewshot 0 \
51 |     --write_out \
52 |     2>&1 | tee ${OUTPUT_PATH}/${MODEL}_eval_truthfulqa.log
53 | 
54 | lm_eval --model $MODEL \
55 |     --model_args pretrained=${MODEL_NAME_OR_PATH},trust_remote_code=True,tokenizer_mode=${TOKENIZER_MODE},tensor_parallel_size=${NUM_GPUS},dtype=auto,gpu_memory_utilization=${GPU_MEMORY_UTILIZATION} \
56 |     --tasks mmlu \
57 |     --device cuda \
58 |     --output_path ${OUTPUT_PATH}/${MODEL}_eval_mmlu \
59 |     --batch_size 1 \
60 |     --num_fewshot 5 \
61 |     --write_out \
62 |     2>&1 | tee ${OUTPUT_PATH}/${MODEL}_eval_mmlu.log
63 | 
64 | lm_eval --model $MODEL \
65 |     --model_args pretrained=${MODEL_NAME_OR_PATH},trust_remote_code=True,tokenizer_mode=${TOKENIZER_MODE},tensor_parallel_size=${NUM_GPUS},dtype=auto,gpu_memory_utilization=${GPU_MEMORY_UTILIZATION} \
66 |     --tasks winogrande \
67 |     --device cuda \
68 |     --output_path ${OUTPUT_PATH}/${MODEL}_eval_winogrande \
69 |     --batch_size 1 \
70 |     --num_fewshot 5 \
71 |     --write_out \
72 |     2>&1 | tee ${OUTPUT_PATH}/${MODEL}_eval_winogrande.log
73 | 
74 | lm_eval --model $MODEL \
75 |     --model_args pretrained=${MODEL_NAME_OR_PATH},trust_remote_code=True,tokenizer_mode=${TOKENIZER_MODE},tensor_parallel_size=${NUM_GPUS},dtype=auto,gpu_memory_utilization=${GPU_MEMORY_UTILIZATION} \
76 |     --tasks gsm8k \
77 |     --device cuda \
78 |     --output_path ${OUTPUT_PATH}/${MODEL}_eval_gsm8k \
79 |     --batch_size 1 \
80 |     --num_fewshot 5 \
81 |     --write_out \
82 |     2>&1 | tee ${OUTPUT_PATH}/${MODEL}_eval_gsm8k.log


--------------------------------------------------------------------------------
/scripts/Mistral-7B-v0.3/vanilla_lm_eval.sh:
--------------------------------------------------------------------------------
 1 | set -ex
 2 | export CUDA_VISIBLE_DEVICES=0
 3 | export NUMEXPR_MAX_THREADS=128
 4 | 
 5 | MODEL=vllm
 6 | MODEL_NAME=vanilla_Mistral-7B-v0.3
 7 | MODEL_NAME_OR_PATH=/home/lijiaming/workspace/Seed/Seed-Ruler/outputs/checkpoints/vanilla_Mistral-7B-v0.3_bs_4_ga_8_lr_2e-5_eps_3/checkpoint-2841
 8 | OUTPUT_PATH=../outputs/other_tasks/${MODEL_NAME}
 9 | TOKENIZER_MODE=auto
10 | NUM_GPUS=1
11 | GPU_MEMORY_UTILIZATION=0.8
12 | 
13 | mkdir -p $OUTPUT_PATH
14 | 
15 | lm_eval --model $MODEL \
16 |     --model_args pretrained=${MODEL_NAME_OR_PATH},trust_remote_code=True,tokenizer_mode=${TOKENIZER_MODE},tensor_parallel_size=${NUM_GPUS},dtype=auto,gpu_memory_utilization=${GPU_MEMORY_UTILIZATION} \
17 |     --tasks leaderboard \
18 |     --device cuda \
19 |     --output_path ${OUTPUT_PATH}/${MODEL}_eval_leaderboard \
20 |     --batch_size 1 \
21 |     --write_out \
22 |     2>&1 | tee ${OUTPUT_PATH}/${MODEL}_eval_leaderboard.log
23 | 
24 | # lm_eval --model $MODEL \
25 | #     --model_args pretrained=${MODEL_NAME_OR_PATH},trust_remote_code=True,tokenizer_mode=${TOKENIZER_MODE},tensor_parallel_size=${NUM_GPUS},dtype=auto,gpu_memory_utilization=${GPU_MEMORY_UTILIZATION} \
26 | #     --tasks ai2_arc \
27 | #     --device cuda \
28 | #     --output_path ${OUTPUT_PATH}/${MODEL}_eval_ai2_arc \
29 | #     --batch_size 1 \
30 | #     --num_fewshot 25 \
31 | #     --write_out \
32 | #     2>&1 | tee ${OUTPUT_PATH}/${MODEL}_eval_ai2_arc.log
33 | 
34 | # lm_eval --model $MODEL \
35 | #     --model_args pretrained=${MODEL_NAME_OR_PATH},trust_remote_code=True,tokenizer_mode=${TOKENIZER_MODE},tensor_parallel_size=${NUM_GPUS},dtype=auto,gpu_memory_utilization=${GPU_MEMORY_UTILIZATION} \
36 | #     --tasks hellaswag \
37 | #     --device cuda \
38 | #     --output_path ${OUTPUT_PATH}/${MODEL}_eval_hellaswag \
39 | #     --batch_size 1 \
40 | #     --num_fewshot 10 \
41 | #     --write_out \
42 | #     2>&1 | tee ${OUTPUT_PATH}/${MODEL}_eval_hellaswag.log
43 | 
44 | # lm_eval --model $MODEL \
45 | #     --model_args pretrained=${MODEL_NAME_OR_PATH},trust_remote_code=True,tokenizer_mode=${TOKENIZER_MODE},tensor_parallel_size=${NUM_GPUS},dtype=auto,gpu_memory_utilization=${GPU_MEMORY_UTILIZATION} \
46 | #     --tasks truthfulqa \
47 | #     --device cuda \
48 | #     --output_path ${OUTPUT_PATH}/${MODEL}_eval_truthfulqa \
49 | #     --batch_size 1 \
50 | #     --num_fewshot 0 \
51 | #     --write_out \
52 | #     2>&1 | tee ${OUTPUT_PATH}/${MODEL}_eval_truthfulqa.log
53 | 
54 | # lm_eval --model $MODEL \
55 | #     --model_args pretrained=${MODEL_NAME_OR_PATH},trust_remote_code=True,tokenizer_mode=${TOKENIZER_MODE},tensor_parallel_size=${NUM_GPUS},dtype=auto,gpu_memory_utilization=${GPU_MEMORY_UTILIZATION} \
56 | #     --tasks mmlu \
57 | #     --device cuda \
58 | #     --output_path ${OUTPUT_PATH}/${MODEL}_eval_mmlu \
59 | #     --batch_size 1 \
60 | #     --num_fewshot 5 \
61 | #     --write_out \
62 | #     2>&1 | tee ${OUTPUT_PATH}/${MODEL}_eval_mmlu.log
63 | 
64 | # lm_eval --model $MODEL \
65 | #     --model_args pretrained=${MODEL_NAME_OR_PATH},trust_remote_code=True,tokenizer_mode=${TOKENIZER_MODE},tensor_parallel_size=${NUM_GPUS},dtype=auto,gpu_memory_utilization=${GPU_MEMORY_UTILIZATION} \
66 | #     --tasks winogrande \
67 | #     --device cuda \
68 | #     --output_path ${OUTPUT_PATH}/${MODEL}_eval_winogrande \
69 | #     --batch_size 1 \
70 | #     --num_fewshot 5 \
71 | #     --write_out \
72 | #     2>&1 | tee ${OUTPUT_PATH}/${MODEL}_eval_winogrande.log
73 | 
74 | # lm_eval --model $MODEL \
75 | #     --model_args pretrained=${MODEL_NAME_OR_PATH},trust_remote_code=True,tokenizer_mode=${TOKENIZER_MODE},tensor_parallel_size=${NUM_GPUS},dtype=auto,gpu_memory_utilization=${GPU_MEMORY_UTILIZATION} \
76 | #     --tasks gsm8k \
77 | #     --device cuda \
78 | #     --output_path ${OUTPUT_PATH}/${MODEL}_eval_gsm8k \
79 | #     --batch_size 1 \
80 | #     --num_fewshot 5 \
81 | #     --write_out \
82 | #     2>&1 | tee ${OUTPUT_PATH}/${MODEL}_eval_gsm8k.log


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | share/python-wheels/
 24 | *.egg-info/
 25 | .installed.cfg
 26 | *.egg
 27 | MANIFEST
 28 | 
 29 | # PyInstaller
 30 | #  Usually these files are written by a python script from a template
 31 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 32 | *.manifest
 33 | *.spec
 34 | 
 35 | # Installer logs
 36 | pip-log.txt
 37 | pip-delete-this-directory.txt
 38 | 
 39 | # Unit test / coverage reports
 40 | htmlcov/
 41 | .tox/
 42 | .nox/
 43 | .coverage
 44 | .coverage.*
 45 | .cache
 46 | nosetests.xml
 47 | coverage.xml
 48 | *.cover
 49 | *.py,cover
 50 | .hypothesis/
 51 | .pytest_cache/
 52 | cover/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | .pybuilder/
 76 | target/
 77 | 
 78 | # Jupyter Notebook
 79 | .ipynb_checkpoints
 80 | 
 81 | # IPython
 82 | profile_default/
 83 | ipython_config.py
 84 | 
 85 | # pyenv
 86 | #   For a library or package, you might want to ignore these files since the code is
 87 | #   intended to run in multiple environments; otherwise, check them in:
 88 | # .python-version
 89 | 
 90 | # pipenv
 91 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 92 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 93 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 94 | #   install all needed dependencies.
 95 | #Pipfile.lock
 96 | 
 97 | # poetry
 98 | #   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
 99 | #   This is especially recommended for binary packages to ensure reproducibility, and is more
100 | #   commonly ignored for libraries.
101 | #   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
102 | #poetry.lock
103 | 
104 | # pdm
105 | #   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
106 | #pdm.lock
107 | #   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
108 | #   in version control.
109 | #   https://pdm.fming.dev/#use-with-ide
110 | .pdm.toml
111 | 
112 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
113 | __pypackages__/
114 | 
115 | # Celery stuff
116 | celerybeat-schedule
117 | celerybeat.pid
118 | 
119 | # SageMath parsed files
120 | *.sage.py
121 | 
122 | # Environments
123 | .env
124 | .venv
125 | env/
126 | venv/
127 | ENV/
128 | env.bak/
129 | venv.bak/
130 | 
131 | # Spyder project settings
132 | .spyderproject
133 | .spyproject
134 | 
135 | # Rope project settings
136 | .ropeproject
137 | 
138 | # mkdocs documentation
139 | /site
140 | 
141 | # mypy
142 | .mypy_cache/
143 | .dmypy.json
144 | dmypy.json
145 | 
146 | # Pyre type checker
147 | .pyre/
148 | 
149 | # pytype static type analyzer
150 | .pytype/
151 | 
152 | # Cython debug symbols
153 | cython_debug/
154 | 
155 | # PyCharm
156 | #  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
157 | #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
158 | #  and can be added to the global gitignore or merged into this file.  For a more nuclear
159 | #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
160 | #.idea/
161 | 
162 | outputs/
163 | datasets/
164 | logs/
165 | test/
166 | !datasets/download.sh


--------------------------------------------------------------------------------
/src/finetuning/dataset.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | from transformers import PreTrainedTokenizer
  3 | from datasets import load_dataset
  4 | from utils.config import MetaLengthToken
  5 | from typing import Dict, Sequence
  6 | 
  7 | IGNORE_INDEX = -100
  8 | 
  9 | 
 10 | class DataCollatorForSupervisedDataset:
 11 |     """Collate examples for supervised fine-tuning."""
 12 | 
 13 |     def __init__(self, tokenizer: PreTrainedTokenizer):
 14 |         self.tokenizer = tokenizer
 15 | 
 16 |     def __call__(self, instances: Sequence[Dict]) -> Dict[str, torch.Tensor]:
 17 |         input_ids, labels = tuple(
 18 |             [instance[key] for instance in instances] for key in ("input_ids", "labels")
 19 |         )
 20 |         input_ids = [torch.tensor(x) for x in input_ids]
 21 |         input_ids = torch.nn.utils.rnn.pad_sequence(
 22 |             input_ids, batch_first=True, padding_value=self.tokenizer.pad_token_id
 23 |         )
 24 |         labels = [torch.tensor(x) for x in labels]
 25 |         labels = torch.nn.utils.rnn.pad_sequence(
 26 |             labels, batch_first=True, padding_value=IGNORE_INDEX
 27 |         )
 28 | 
 29 |         return dict(
 30 |             input_ids=input_ids,
 31 |             labels=labels,
 32 |             attention_mask=input_ids.ne(self.tokenizer.pad_token_id),
 33 |         )
 34 | 
 35 | 
 36 | def preprocess_template(instruction, mlt, output, tokenizer, template,vanilla):
 37 |     if vanilla:
 38 |         mlt = ''
 39 |     prompts = template.apply_template(instruction, mlt, output)
 40 |     input_ids = tokenizer.encode(
 41 |         prompts, truncation=True, max_length=tokenizer.model_max_length
 42 |     )  # truncation
 43 |     split_token_idx = None
 44 |     for i in MetaLengthToken:
 45 |         i_id = tokenizer.convert_tokens_to_ids(i[0])
 46 |         if i_id in input_ids:
 47 |             split_token_idx = input_ids.index(i_id)
 48 |     if split_token_idx is None:
 49 |         labels = [IGNORE_INDEX for _ in range(len(input_ids))]
 50 |     else:
 51 |         labels = [
 52 |             input_ids[i] if i >= split_token_idx else IGNORE_INDEX
 53 |             for i in range(len(input_ids))
 54 |         ]
 55 |     # vanilla
 56 |     if vanilla:
 57 |         instruction_prompts = template.apply_template_for_instruction(instruction)
 58 |         instruction_ids = tokenizer.encode(
 59 |         instruction_prompts, truncation=True, max_length=tokenizer.model_max_length
 60 |         )
 61 |         labels = [
 62 |             input_ids[i] if i >= len(instruction_ids) else IGNORE_INDEX
 63 |             for i in range(len(input_ids))
 64 |         ]
 65 | 
 66 |     return input_ids, labels
 67 | 
 68 | 
 69 | def preprocess(examples, tokenizer, template, vanilla):
 70 |     processed_input_ids, processed_labels = [], []
 71 | 
 72 |     instructions, mlts, outputs = (
 73 |         examples["Instruction"],
 74 |         examples["mlt"],
 75 |         examples["output"],
 76 |     )
 77 |     for instruction, mlt, output in zip(instructions, mlts, outputs):
 78 |         input_ids, labels = preprocess_template(
 79 |             instruction, mlt, output, tokenizer, template, vanilla
 80 |         )
 81 | 
 82 |         processed_input_ids.append(input_ids)
 83 |         processed_labels.append(labels)
 84 | 
 85 |     return {"input_ids": processed_input_ids, "labels": processed_labels}
 86 | 
 87 | 
 88 | def load_custom_dataset(tokenizer: PreTrainedTokenizer, data_path: str, template, vanilla):
 89 |     train_datasets = load_dataset("json", data_files=data_path, split="train")
 90 | 
 91 |     train_dataset = train_datasets.map(
 92 |         preprocess,
 93 |         batched=True,
 94 |         batch_size=3000,
 95 |         num_proc=32,
 96 |         remove_columns=train_datasets.column_names,
 97 |         keep_in_memory=True,
 98 |         load_from_cache_file=False,
 99 |         desc="Running Encoding",
100 |         fn_kwargs={"tokenizer": tokenizer, "template": template, "vanilla":vanilla},
101 |     )
102 | 
103 |     torch.distributed.barrier()
104 | 
105 |     return train_dataset
106 | 


--------------------------------------------------------------------------------
/src/exp/cal_elm_rmse.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | from utils.config import LEVEL0, LEVEL1, LEVEL2
  3 | from utils import load_jsonl
  4 | from utils.count import count_words
  5 | from rich.table import Table
  6 | from rich.console import Console
  7 | from sklearn.metrics import root_mean_squared_error
  8 | 
  9 | 
 10 | def calculate_rmse(actual, predicted):
 11 |     """
 12 |     Calculate the Root Mean Square Error between two arrays using scikit-learn.
 13 | 
 14 |     Parameters:
 15 |     actual (array-like): The array of actual values.
 16 |     predicted (array-like): The array of predicted values.
 17 | 
 18 |     Returns:
 19 |     float: The calculated RMSE value.
 20 |     """
 21 |     # Calculate the RMSE
 22 |     rmse = root_mean_squared_error(actual, predicted)
 23 | 
 24 |     return rmse
 25 | 
 26 | 
 27 | def elm(list1, list2):
 28 |     """
 29 |     Count the number of elements that are the same in both lists at the same positions.
 30 | 
 31 |     Parameters:
 32 |     list1 (list): The first list.
 33 |     list2 (list): The second list.
 34 | 
 35 |     Returns:
 36 |     int: The count of elements that are the same at the same positions.
 37 |     """
 38 |     # Use zip to pair the elements and then check for equality
 39 |     same_position_count = sum(1 for a, b in zip(list1, list2) if a == b)
 40 | 
 41 |     return same_position_count
 42 | 
 43 | 
 44 | def main(args):
 45 |     # raw data load
 46 |     df = load_jsonl(args.dataset_path)
 47 |     # calculate metric
 48 |     predicted_lengths = []
 49 |     target_lengths = []
 50 |     predicted_lengths_0, predicted_lengths_1, predicted_lengths_2 = [], [], []
 51 |     target_lengths_0, target_lengths_1, target_lengths_2 = [], [], []
 52 |     for d in df:
 53 |         length = count_words(d["output"])
 54 |         if d["TargetLength"] != ">800":
 55 |             predicted_lengths.append(length)
 56 |             target_lengths.append(int(d["TargetLength"]))
 57 |         if d["TargetLength"] in LEVEL0:
 58 |             predicted_lengths_0.append(length)
 59 |             target_lengths_0.append(int(d["TargetLength"]))
 60 |         elif d["TargetLength"] in LEVEL1:
 61 |             predicted_lengths_1.append(length)
 62 |             target_lengths_1.append(int(d["TargetLength"]))
 63 |         elif d["TargetLength"] in LEVEL2 and d["TargetLength"] != ">800":
 64 |             predicted_lengths_2.append(length)
 65 |             target_lengths_2.append(int(d["TargetLength"]))
 66 |         else:
 67 |             if d["TargetLength"] != ">800":
 68 |                 raise KeyError
 69 |     table = Table(show_header=True, header_style="bold magenta")
 70 |     table.add_column("Model", style="dim", width=12)
 71 |     table.add_column("Level 0_elm", justify="right")
 72 |     table.add_column("Level 0_rmse", justify="right")
 73 |     table.add_column("Level 1_elm", justify="right")
 74 |     table.add_column("Level 1_rmse", justify="right")
 75 |     table.add_column("Level 2_elm", justify="right")
 76 |     table.add_column("Level 2_rmse", justify="right")
 77 |     table.add_column("All Level_elm", justify="right")
 78 |     table.add_column("All Level 0_rmse", justify="right")
 79 |     table.add_row(
 80 |         args.dataset_path.split("/")[-1][4:],
 81 |         f"{elm(target_lengths_0,predicted_lengths_0)/len(predicted_lengths_0)*100:.2f}",
 82 |         f"{calculate_rmse(predicted_lengths_0,target_lengths_0):.2f}",
 83 |         f"{elm(target_lengths_1,predicted_lengths_1)/len(predicted_lengths_1)*100:.2f}",
 84 |         f"{calculate_rmse(predicted_lengths_1,target_lengths_1):.2f}",
 85 |         f"{elm(target_lengths_2,predicted_lengths_2)/len(predicted_lengths_2)*100:.2f}",
 86 |         f"{calculate_rmse(predicted_lengths_2,target_lengths_2):.2f}",
 87 |         f"{elm(target_lengths,predicted_lengths)/len(predicted_lengths)*100:.2f}",
 88 |         f"{calculate_rmse(predicted_lengths,target_lengths):.2f}",
 89 |     )
 90 |     console = Console()
 91 |     console.print(table)
 92 |     print(f"{elm(target_lengths_0,predicted_lengths_0)/len(predicted_lengths_0)*100:.2f}/{calculate_rmse(predicted_lengths_0,target_lengths_0):.2f}|{elm(target_lengths_1,predicted_lengths_1)/len(predicted_lengths_1)*100:.2f}/{calculate_rmse(predicted_lengths_1,target_lengths_1):.2f}|{elm(target_lengths_2,predicted_lengths_2)/len(predicted_lengths_2)*100:.2f}/{calculate_rmse(predicted_lengths_2,target_lengths_2):.2f}|{elm(target_lengths,predicted_lengths)/len(predicted_lengths)*100:.2f}/{calculate_rmse(predicted_lengths,target_lengths):.2f}|")
 93 | 
 94 | 
 95 | if __name__ == "__main__":
 96 |     parser = argparse.ArgumentParser()
 97 |     parser.add_argument("--dataset_path", type=str, default=None)
 98 |     args = parser.parse_args()
 99 |     main(args)
100 | 


--------------------------------------------------------------------------------
/src/exp/cal_level_scores.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | from utils.config import RANGE, LEVEL0, LEVEL1, LEVEL2
  3 | from utils import load_jsonl
  4 | from rich.table import Table
  5 | from rich.console import Console
  6 | from utils.count import count_words
  7 | 
  8 | 
  9 | def main(args):
 10 |     result = RANGE
 11 |     for key in result:
 12 |         (
 13 |             result[key]["PM_in"],
 14 |             result[key]["PM_out"],
 15 |             result[key]["FM_in"],
 16 |             result[key]["FM_out"],
 17 |         ) = 0, 0, 0, 0
 18 |     # raw data load
 19 |     df = load_jsonl(args.dataset_path)
 20 |     # calculate metric
 21 |     for d in df:
 22 |         length = count_words(d["output"])
 23 |         # PM
 24 |         if (
 25 |             length > result[d["TargetLength"]]["PM"][0]
 26 |             and length <= result[d["TargetLength"]]["PM"][1]
 27 |         ):
 28 |             result[d["TargetLength"]]["PM_in"] += 1
 29 |         else:
 30 |             result[d["TargetLength"]]["PM_out"] += 1
 31 |         # FM
 32 |         if (
 33 |             length > result[d["TargetLength"]]["FM"][0]
 34 |             and length <= result[d["TargetLength"]]["FM"][1]
 35 |         ):
 36 |             result[d["TargetLength"]]["FM_in"] += 1
 37 |         else:
 38 |             result[d["TargetLength"]]["FM_out"] += 1
 39 |     # level 0
 40 |     levle0_pm_in, levle0_pm_out, levle0_fm_in, levle0_fm_out = 0, 0, 0, 0
 41 |     # level 1
 42 |     levle1_pm_in, levle1_pm_out, levle1_fm_in, levle1_fm_out = 0, 0, 0, 0
 43 |     # level 2
 44 |     levle2_pm_in, levle2_pm_out, levle2_fm_in, levle2_fm_out = 0, 0, 0, 0
 45 |     for key in result:
 46 |         if key in LEVEL0:
 47 |             levle0_pm_in += result[key]["PM_in"]
 48 |             levle0_pm_out += result[key]["PM_out"]
 49 |             levle0_fm_in += result[key]["FM_in"]
 50 |             levle0_fm_out += result[key]["FM_out"]
 51 |         elif key in LEVEL1:
 52 |             levle1_pm_in += result[key]["PM_in"]
 53 |             levle1_pm_out += result[key]["PM_out"]
 54 |             levle1_fm_in += result[key]["FM_in"]
 55 |             levle1_fm_out += result[key]["FM_out"]
 56 |         elif key in LEVEL2:
 57 |             levle2_pm_in += result[key]["PM_in"]
 58 |             levle2_pm_out += result[key]["PM_out"]
 59 |             levle2_fm_in += result[key]["FM_in"]
 60 |             levle2_fm_out += result[key]["FM_out"]
 61 |     # draw table
 62 |     table = Table(show_header=True, header_style="bold magenta")
 63 |     table.add_column("Level", style="dim", width=12)
 64 |     table.add_column("PM_in", justify="right")
 65 |     table.add_column("PM_out", justify="right")
 66 |     table.add_column("PM", justify="right")
 67 |     table.add_column("FM_in", justify="right")
 68 |     table.add_column("FM_out", justify="right")
 69 |     table.add_column("FM", justify="right")
 70 |     table.add_row(
 71 |         "Level:0",
 72 |         f"{levle0_pm_in}",
 73 |         f"{levle0_pm_out}",
 74 |         f"{levle0_pm_in/(levle0_pm_in + levle0_pm_out)*100:.2f}",
 75 |         f"{levle0_fm_in}",
 76 |         f"{levle0_fm_out}",
 77 |         f"{levle0_fm_in/(levle0_fm_in + levle0_fm_out)*100:.2f}",
 78 |     )
 79 |     table.add_row(
 80 |         "Level:1",
 81 |         f"{levle1_pm_in}",
 82 |         f"{levle1_pm_out}",
 83 |         f"{levle1_pm_in/(levle1_pm_in + levle1_pm_out)*100:.2f}",
 84 |         f"{levle1_fm_in}",
 85 |         f"{levle1_fm_out}",
 86 |         f"{levle1_fm_in/(levle1_fm_in + levle1_fm_out)*100:.2f}",
 87 |     )
 88 |     table.add_row(
 89 |         "Level:2",
 90 |         f"{levle2_pm_in}",
 91 |         f"{levle2_pm_out}",
 92 |         f"{levle2_pm_in/(levle2_pm_in + levle2_pm_out)*100:.2f}",
 93 |         f"{levle2_fm_in}",
 94 |         f"{levle2_fm_out}",
 95 |         f"{levle2_fm_in/(levle2_fm_in + levle2_fm_out)*100:.2f}",
 96 |     )
 97 |     table.add_row(
 98 |         "All Level",
 99 |         f"{levle0_pm_in +levle1_pm_in + levle2_pm_in}",
100 |         f"{levle0_pm_out+ levle1_pm_out + levle2_pm_out}",
101 |         f"{(levle0_pm_in +levle1_pm_in + levle2_pm_in)/(levle0_pm_in +levle1_pm_in + levle2_pm_in + levle0_pm_out+ levle1_pm_out + levle2_pm_out)*100:.2f}",
102 |         f"{levle0_fm_in +levle1_fm_in + levle2_fm_in}",
103 |         f"{levle0_fm_out+ levle1_fm_out + levle2_fm_out}",
104 |         f"{(levle0_fm_in +levle1_fm_in + levle2_fm_in)/(levle0_fm_in +levle1_fm_in + levle2_fm_in + levle0_fm_out+ levle1_fm_out + levle2_fm_out)*100:.2f}",
105 |     )
106 |     console = Console()
107 |     console.print(table)
108 | 
109 | 
110 | if __name__ == "__main__":
111 |     parser = argparse.ArgumentParser()
112 |     parser.add_argument("--dataset_path", type=str, default=None)
113 |     args = parser.parse_args()
114 |     main(args)
115 | 


--------------------------------------------------------------------------------
/src/finetuning/finetune.py:
--------------------------------------------------------------------------------
  1 | import random
  2 | import torch
  3 | import transformers
  4 | 
  5 | from transformers import AutoModelForCausalLM, AutoTokenizer, HfArgumentParser, Trainer
  6 | from dataclasses import dataclass, field
  7 | 
  8 | from dataset import load_custom_dataset, DataCollatorForSupervisedDataset
  9 | from utils.config import MetaLengthToken
 10 | from utils.templates import TemplatesMapping
 11 | 
 12 | 
 13 | @dataclass
 14 | class ModelArguments:
 15 |     model_name_or_path: str = field(
 16 |         default="",
 17 |         metadata={"help": "The model checkpoint for weights initialization."},
 18 |     )
 19 |     template: str = field(default="", metadata={"help": "The template used to train"})
 20 | 
 21 | 
 22 | @dataclass
 23 | class DataArguments:
 24 |     data_path: str = field(
 25 |         default=None, metadata={"help": "Path to the training data."}
 26 |     )
 27 | 
 28 | 
 29 | @dataclass
 30 | class TrainingArguments(transformers.TrainingArguments):
 31 |     vanilla: bool = field(
 32 |         default=False,
 33 |         metadata={"help": "Vanilla finetuning or Ruler finetuning, defaulty is False."},
 34 |     )
 35 |     model_max_length: int = field(
 36 |         default=2048,
 37 |         metadata={
 38 |             "help": "Maximum sequence length. Sequences will be right padded (and possibly truncated)."
 39 |         },
 40 |     )
 41 |     gradient_checkpointing_kwargs: dict = field(
 42 |         default_factory=lambda: {"use_reentrant": False},
 43 |         metadata={"help": "gradient checkpointing kwargs"},
 44 |     )
 45 | 
 46 | 
 47 | if __name__ == "__main__":
 48 |     parser = HfArgumentParser((ModelArguments, DataArguments, TrainingArguments))
 49 |     model_args, data_args, training_args = parser.parse_args_into_dataclasses()
 50 |     print(training_args.vanilla)
 51 |     if training_args.local_rank == 0:
 52 |         print("=" * 100)
 53 |         print(training_args)
 54 | 
 55 |     if training_args.local_rank == 0:
 56 |         print("> Loading tokenizer from {}".format(model_args.model_name_or_path))
 57 | 
 58 |     tokenizer = AutoTokenizer.from_pretrained(
 59 |         model_args.model_name_or_path,
 60 |         model_max_length=training_args.model_max_length,
 61 |         padding_side="right",
 62 |         truncation_side="right",
 63 |         use_fast=True,
 64 |         trust_remote_code=True,
 65 |     )
 66 |     template = TemplatesMapping[model_args.template]
 67 |     # add special tokens
 68 |     if training_args.vanilla:
 69 |         special_tokens = {"additional_special_tokens": [t for t in template.SPECIAL_TOKENS]}
 70 |     elif model_args.template == 'custom':
 71 |         special_tokens = {"additional_special_tokens": [t for t in template.SPECIAL_TOKENS + [m[0]for m in MetaLengthToken]]}
 72 |     else:
 73 |         special_tokens = {"additional_special_tokens": [t[0] for t in MetaLengthToken]}
 74 |     print(f"> New special tokens: {special_tokens}")
 75 |     tokenizer.add_special_tokens(special_tokens)
 76 |     for st in special_tokens["additional_special_tokens"]:
 77 |         print(f"{st}:{tokenizer.convert_tokens_to_ids(st)}")
 78 | 
 79 |     tokenizer.pad_token = (
 80 |         tokenizer.eos_token if tokenizer.pad_token is None else tokenizer.pad_token
 81 |     )
 82 |     if training_args.local_rank == 0:
 83 |         print("> PAD Token:", tokenizer.pad_token, tokenizer.pad_token_id)
 84 |         print("> BOS Token", tokenizer.bos_token, tokenizer.bos_token_id)
 85 |         print("> EOS Token", tokenizer.eos_token, tokenizer.eos_token_id)
 86 | 
 87 |     if training_args.local_rank == 0:
 88 |         print("> Loading model from {}".format(model_args.model_name_or_path))
 89 | 
 90 |     if "glm-4" in model_args.model_name_or_path:  # glm-4 not support flash attention 2s
 91 |         model = AutoModelForCausalLM.from_pretrained(
 92 |             model_args.model_name_or_path,
 93 |             torch_dtype=torch.bfloat16,
 94 |             trust_remote_code=True,
 95 |         )
 96 |     else:
 97 |         model = AutoModelForCausalLM.from_pretrained(
 98 |             model_args.model_name_or_path,
 99 |             attn_implementation="flash_attention_2",
100 |             torch_dtype=torch.bfloat16,
101 |             trust_remote_code=True,
102 |         )
103 |     model.resize_token_embeddings(len(tokenizer))
104 |     train_dataset = load_custom_dataset(
105 |         tokenizer=tokenizer,
106 |         data_path=data_args.data_path,
107 |         template=template,
108 |         vanilla=training_args.vanilla,
109 |     )
110 |     
111 |     if training_args.local_rank == 0:
112 |         print("> Training dataset samples:", len(train_dataset))
113 |         for index in random.sample(range(len(train_dataset)), 3):
114 |             print("=" * 100)
115 |             print(
116 |                 f"Sample {index} of the training set:\n{tokenizer.decode(list(train_dataset[index]['input_ids']))}"
117 |             )
118 |             print(f"{train_dataset[index]['input_ids']}")
119 |             print(f"{train_dataset[index]['labels']}")
120 | 
121 |     data_collator = DataCollatorForSupervisedDataset(tokenizer=tokenizer)
122 | 
123 |     trainer = Trainer(
124 |         model=model,
125 |         tokenizer=tokenizer,
126 |         args=training_args,
127 |         train_dataset=train_dataset,
128 |         data_collator=data_collator,
129 |     )
130 | 
131 |     trainer.train(resume_from_checkpoint=training_args.resume_from_checkpoint)
132 | 


--------------------------------------------------------------------------------
/src/data_process/build_training_dataset.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import nlp
  3 | import random
  4 | import argparse
  5 | import pandas as pd
  6 | from utils import load_jsonl, save_jsonl
  7 | from utils.config import MetaLengthToken, SAMPLE
  8 | from utils.count import count_words
  9 | 
 10 | 
 11 | 
 12 | 
 13 | def list_files(directory):
 14 |     return [
 15 |         f for f in os.listdir(directory) if os.path.isfile(os.path.join(directory, f))
 16 |     ]
 17 | 
 18 | 
 19 | def add_MLT(instruction: str):
 20 |     result = None
 21 |     word_count = count_words(instruction)
 22 |     for mlt in MetaLengthToken:
 23 |         if word_count > mlt[1][0] and word_count <= mlt[1][1]:
 24 |             result = mlt[0]
 25 |     return result
 26 | 
 27 | 
 28 | def process_OpenHermes(dataset_path, random_seed, num):
 29 |     # set random seed
 30 |     random.seed(random_seed)
 31 |     df = load_jsonl(dataset_path)
 32 |     random.shuffle(df)
 33 |     print(f"{'='*10}First data in TLG Dataset{'='*10}")
 34 |     print(df[0]["conversations"][0]["value"])
 35 |     print(f"{'='*10}Last data in TLG Dataset{'='*10}")
 36 |     print(df[num - 1]["conversations"][0]["value"])
 37 |     print("=" * 20)
 38 |     df = df[num:]  # cut off the FLCG exp dataset
 39 |     # sampled data
 40 |     sampled_data = {key[0]: [] for key in MetaLengthToken}
 41 |     for idx in range(len(df)):
 42 |         d = {}
 43 |         d["Instruction"] = df[idx]["conversations"][0]["value"]
 44 |         d["word_count"] = len(df[idx]["conversations"][1]["value"].split())
 45 |         d["output"] = df[idx]["conversations"][1]["value"]
 46 |         d["mlt"] = add_MLT(df[idx]["conversations"][1]["value"])
 47 |         if d["mlt"] is not None:
 48 |             sampled_data[d["mlt"]].append(d)
 49 |     return sampled_data
 50 | 
 51 | 
 52 | def process_longform(dir_path):
 53 |     # sampled data
 54 |     sampled_data = {key[0]: [] for key in MetaLengthToken}
 55 |     longform_files = list_files(dir_path)
 56 |     for file in longform_files:
 57 |         df = pd.read_parquet(f"{dir_path}/{file}")
 58 |         for idx in range(df.shape[0]):
 59 |             d = {}
 60 |             d["Instruction"] = df.iloc[idx]["input"]
 61 |             d["word_count"] = len(df.iloc[idx]["output"].split())
 62 |             d["output"] = df.iloc[idx]["output"]
 63 |             d["mlt"] = add_MLT(df.iloc[idx]["output"])
 64 |             if d["mlt"] is not None:
 65 |                 sampled_data[d["mlt"]].append(d)
 66 |     return sampled_data
 67 | 
 68 | 
 69 | def process_eli5():
 70 |     # sampled data
 71 |     sampled_data = {key[0]: [] for key in MetaLengthToken}
 72 |     eli5 = nlp.load_dataset("eli5")
 73 |     files = ["train_eli5", "test_eli5", "validation_eli5"]
 74 |     for file in files:
 75 |         for data in eli5[file]:
 76 |             d = {}
 77 |             d["Instruction"] = data["title"]
 78 |             answer = ""
 79 |             for i in data["answers"]["text"]:
 80 |                 if len(i.split()) > len(answer.split()):
 81 |                     answer = i
 82 |             d["word_count"] = len(answer.split())
 83 |             d["output"] = answer
 84 |             d["mlt"] = add_MLT(answer)
 85 |             if d["mlt"] is not None:
 86 |                 sampled_data[d["mlt"]].append(d)
 87 |     return sampled_data
 88 | 
 89 | 
 90 | def main(args):
 91 |     sampled_data = {key[0]: [] for key in MetaLengthToken}
 92 |     # OpenHermes2.5
 93 |     openhermes_data = process_OpenHermes(args.dataset_path, args.random_seed, args.num)
 94 |     print(f"{'='*10}OpenHermes2.5 dataset{'='*10}")
 95 |     for key in openhermes_data:
 96 |         random.shuffle(openhermes_data[key])
 97 |         data_num = min(len(openhermes_data[key]), SAMPLE[key] - len(sampled_data[key]))
 98 |         sampled_data[key] += openhermes_data[key][:data_num]
 99 |         print(f"{key}-{len(openhermes_data[key])}-take {data_num}.")
100 |     # Long Form
101 |     longform_data = process_longform(args.longform_dir)
102 |     print(f"{'='*10}LongForm dataset{'='*10}")
103 |     for key in longform_data:
104 |         random.shuffle(longform_data[key])
105 |         data_num = min(len(longform_data[key]), SAMPLE[key] - len(sampled_data[key]))
106 |         sampled_data[key] += longform_data[key][:data_num]
107 |         print(f"{key}-{len(longform_data[key])}-take {data_num}")
108 |     # ELI5
109 |     eli5_data = process_eli5()
110 |     print(f"{'='*10}ELI5 dataset{'='*10}")
111 |     for key in eli5_data:
112 |         random.shuffle(eli5_data[key])
113 |         data_num = min(len(eli5_data[key]), SAMPLE[key] - len(sampled_data[key]))
114 |         sampled_data[key] += eli5_data[key][:data_num]
115 |         print(f"{key}-{len(eli5_data[key])}-take {data_num}")
116 |     print(f"{'='*10}FINAL{'='*10}")
117 |     data = []
118 |     for key in sampled_data:
119 |         data += sampled_data[key]
120 |         print(f"{key}-{len(sampled_data[key])}")
121 |     random.shuffle(data)
122 |     global_id = 0
123 |     for d in data:
124 |         d["id"] = global_id
125 |         global_id += 1
126 |     print(f"Total:{global_id}")
127 |     # save to output_path
128 |     save_jsonl(args.output_path, data)
129 | 
130 | 
131 | if __name__ == "__main__":
132 |     parser = argparse.ArgumentParser()
133 |     parser.add_argument("--dataset_path", type=str, default=None)
134 |     parser.add_argument("--longform_dir", type=str, default=None)
135 |     parser.add_argument("--num", type=int, default=None)
136 |     parser.add_argument("--random_seed", type=int, default=10)
137 |     parser.add_argument("--output_path", type=str, default=None)
138 |     args = parser.parse_args()
139 |     main(args)
140 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Ruler: A Model-Agnostic Method to Control Generated Length for Large Language Models
  2 | 
  3 | <a href='https://arxiv.org/abs/2409.18943'><img src='https://img.shields.io/badge/Arxiv-Paper-red'></a>
  4 | 
  5 | ![Method](images/method.png)
  6 | 
  7 | ##  🤩 Release
  8 | - [2024/09/20] 🥳 [Ruler](https://arxiv.org/abs/2409.18943) is accepted by EMNLP 2024 Findings.
  9 | 
 10 | ## 😎 Overview
 11 | 
 12 | **Ruler** a novel, model-agnostic approach employs Meta Length Tokens (*MLTs*) to enhance the instruction-following ability of LLMs under length-constrained instructions
 13 | 
 14 | **Ruler** equips LLMs with the ability to generate responses of a target length. Moreover, it can automatically generate appropriate *MLT* when not target length is provided. Comprehensive experments show the effectiveness of **Ruler** across different LLMs.
 15 | 
 16 | ## 🧐 Quickstart
 17 | 
 18 | We also provide a more [detailed experiments document](./experiments.md) (specific to each experiment and including all the results!).
 19 | 
 20 | ### Prepare Environment
 21 | 
 22 | First, you should set up a python environment. This code base has been tested under python 3.x, and we officially support python 3.10.
 23 | ```bash
 24 | conda create -n ruler python=3.10
 25 | cd Ruler # where contains 'requirements.txt'
 26 | pip install -r requirements.txt
 27 | 
 28 | export PYTHONPATH=xxxx/Ruler/src
 29 | cd src
 30 | 
 31 | # create folders and download datasets
 32 | bash ../scripts/download.sh
 33 | ```
 34 | ### Target Length Generation Task
 35 | 
 36 | **Closed-source Model**
 37 | 
 38 | ```shell
 39 | python exp/run_exp_api.py\
 40 |   --dataset_path ../datasets/tlg_dataset.jsonl\
 41 |   --model <MODEL>\
 42 |   --output_path ../outputs/tlg/tlg_<MODEL_NAME>.jsonl
 43 |   --key <MODEL KEY>
 44 | ```
 45 | 
 46 | **Open-source Model**
 47 | 
 48 | ```shell
 49 | python exp/run_exp.py\
 50 |   --dataset_path ../datasets/tlg_dataset.jsonl\
 51 |   --model_name_or_path <PATH_TO_THE_MODEL>\
 52 |   --output_path ../outputs/tlg/tlg_<MODEL_NAME>.jsonl
 53 | ```
 54 | 
 55 | **Calculate scores**
 56 | 
 57 | Different `Levels`:
 58 | 
 59 | ```shell
 60 | python exp/cal_level_scores.py\
 61 |   --dataset_path <PATH_TO_GENERATED_JSONL>
 62 | ```
 63 | 
 64 | Different `MLT`:
 65 | 
 66 | ```shell
 67 | python exp/cal_mlt_scores.py\
 68 |   --dataset_path <PATH_TO_GENERATED_JSONL>
 69 | ```
 70 | 
 71 | ![TLG](images/TLG.png)
 72 | 
 73 | ### Ruler
 74 | 
 75 | Finetuning scripts:
 76 | ```shell
 77 | export CUDA_VISIBLE_DEVICES=0,1,2,3
 78 | 
 79 | find_free_port() {
 80 |     while :
 81 |     do
 82 |         PORT=$(( ( RANDOM % 64512 ) + 1024 ))
 83 |         (echo >/dev/tcp/localhost/$PORT) >/dev/null 2>&1
 84 |         if [ $? -ne 0 ]; then
 85 |             echo $PORT
 86 |             return
 87 |         fi
 88 |     done
 89 | }
 90 | 
 91 | export MASTER_PORT=$(find_free_port)
 92 | 
 93 | LEARNING_RATE=2e-5
 94 | NUM_TRAIN_EPOCHS=3
 95 | VANILLA=False
 96 | 
 97 | MODEL_NAME_OR_PATH=<MODEL_NAME_OR_PATH>
 98 | echo "Finetune from: ${MODEL_NAME_OR_PATH}"
 99 | MODEL=${MODEL_NAME_OR_PATH##*/}
100 | 
101 | TEMPLATE=custom
102 | echo "Finetune data template: ${TEMPLATE}"
103 | 
104 | DATA_PATH=../datasets/ruler_training_dataset.jsonl
105 | echo "Finetune data path: ${DATA_PATH}"
106 | 
107 | MODEL_MAX_LENGTH=2048
108 | echo "Model max length: ${MODEL_MAX_LENGTH}"
109 | 
110 | BATCH_SIZE=4
111 | echo "Per device train batch size: ${BATCH_SIZE}"
112 | 
113 | GRAD_ACCUM=8
114 | echo "Gradient accumulation steps: ${GRAD_ACCUM}"
115 | 
116 | OUTPUT_DIR="../outputs/checkpoints/ruler_${MODEL}_bs_${BATCH_SIZE}_ga_${GRAD_ACCUM}_lr_${LEARNING_RATE}_eps_${NUM_TRAIN_EPOCHS}"
117 | LOG_DIR=../logs
118 | 
119 | deepspeed --master_port=$MASTER_PORT finetuning/finetune.py \
120 |   --vanilla $VANILLA \
121 |   --deepspeed ../configs/ds_config_zero3.json \
122 |   --model_name_or_path $MODEL_NAME_OR_PATH \
123 |   --template $TEMPLATE\
124 |   --model_max_length $MODEL_MAX_LENGTH \
125 |   --data_path $DATA_PATH \
126 |   --output_dir $OUTPUT_DIR \
127 |   --bf16 True \
128 |   --tf32 True \
129 |   --per_device_train_batch_size ${BATCH_SIZE} \
130 |   --gradient_accumulation_steps ${GRAD_ACCUM} \
131 |   --gradient_checkpointing True \
132 |   --lr_scheduler_type cosine \
133 |   --learning_rate ${LEARNING_RATE} \
134 |   --warmup_ratio 0.05 \
135 |   --num_train_epochs ${NUM_TRAIN_EPOCHS} \
136 |   --evaluation_strategy no \
137 |   --save_strategy epoch \
138 |   --save_total_limit 1 \
139 |   --logging_steps 5 \
140 |   2>&1 | tee ${LOG_DIR}/output_ruler_${MODEL}.log
141 | ```
142 | 
143 | ![TLG_ruler](images/TLG_ruler.png)
144 | 
145 | ### Multi MLT Generation Experiment
146 | 
147 | **Run exp:**
148 | 
149 | ```shell
150 | python exp/run_exp.py\
151 |   --dataset_path ../data/multi_mlt.jsonl\
152 |   --model_name_or_path <MODEL_NAME_OR_PATH>\
153 |   --gpus 1\
154 |   --template <TEMPLATE>\
155 |   --output_path ../outputs/multi_mlt/mmlt_<MDOEL_NAME>.jsonl
156 | ```
157 | 
158 | **Calculate scores**
159 | 
160 | ```shell
161 | python exp/analysis_mmlt.py\
162 |   --dataset_path ../outputs/multi_mlt/mmlt_<MDOEL_NAME>.jsonl
163 | ```
164 | 
165 | ![MMLT](images/mmlt.png)
166 | 
167 | ### Self-generated MLT Experiment
168 | 
169 | **Run exp:**
170 | 
171 | ```shell
172 | python exp/run_exp.py\
173 |   --dataset_path ../datasets/self_generated_mlt.jsonl\
174 |   --model_name_or_path <MODEL_NAME_OR_PATH>\
175 |   --gpus 1\
176 |   --template custom\
177 |   --output_path ../outputs/self_generated_mlt/sgm_<MDOEL_NAME>.jsonl
178 | ```
179 | 
180 | **Calculate scores**
181 | 
182 | ```shell
183 | python exp/analysis_sgm.py\
184 |   --dataset_path ../outputs/self_generated_mlt/sgm_<MDOEL_NAME>.jsonl
185 | ```
186 | 
187 | ![SGM](images/sgm.png)
188 | 
189 | ### Overperformance
190 | 
191 | `lm_eval` script:
192 | 
193 | ```shell
194 | set -ex
195 | export NUMEXPR_MAX_THREADS=128
196 | 
197 | MODEL=vllm
198 | MODEL_NAME=<MODEL_NAME>
199 | MODEL_NAME_OR_PATH=<MODEL_NAME_OR_PATH>
200 | OUTPUT_PATH=../outputs/overall_performance/${MODEL_NAME}
201 | TOKENIZER_MODE=auto
202 | NUM_GPUS=1
203 | GPU_MEMORY_UTILIZATION=0.8
204 | 
205 | mkdir -p $OUTPUT_PATH
206 | 
207 | lm_eval --model $MODEL \
208 |     --model_args pretrained=${MODEL_NAME_OR_PATH},trust_remote_code=True,tokenizer_mode=${TOKENIZER_MODE},tensor_parallel_size=${NUM_GPUS},dtype=auto,gpu_memory_utilization=${GPU_MEMORY_UTILIZATION} \
209 |     --tasks ai2_arc \
210 |     --device cuda \
211 |     --output_path ${OUTPUT_PATH}/${MODEL}_eval_ai2_arc \
212 |     --batch_size 1 \
213 |     --num_fewshot 25 \
214 |     --write_out \
215 |     2>&1 | tee ${OUTPUT_PATH}/${MODEL}_eval_ai2_arc.log
216 | 
217 | lm_eval --model $MODEL \
218 |     --model_args pretrained=${MODEL_NAME_OR_PATH},trust_remote_code=True,tokenizer_mode=${TOKENIZER_MODE},tensor_parallel_size=${NUM_GPUS},dtype=auto,gpu_memory_utilization=${GPU_MEMORY_UTILIZATION} \
219 |     --tasks hellaswag \
220 |     --device cuda \
221 |     --output_path ${OUTPUT_PATH}/${MODEL}_eval_hellaswag \
222 |     --batch_size 1 \
223 |     --num_fewshot 10 \
224 |     --write_out \
225 |     2>&1 | tee ${OUTPUT_PATH}/${MODEL}_eval_hellaswag.log
226 | 
227 | lm_eval --model $MODEL \
228 |     --model_args pretrained=${MODEL_NAME_OR_PATH},trust_remote_code=True,tokenizer_mode=${TOKENIZER_MODE},tensor_parallel_size=${NUM_GPUS},dtype=auto,gpu_memory_utilization=${GPU_MEMORY_UTILIZATION} \
229 |     --tasks truthfulqa \
230 |     --device cuda \
231 |     --output_path ${OUTPUT_PATH}/${MODEL}_eval_truthfulqa \
232 |     --batch_size 1 \
233 |     --num_fewshot 0 \
234 |     --write_out \
235 |     2>&1 | tee ${OUTPUT_PATH}/${MODEL}_eval_truthfulqa.log
236 | 
237 | lm_eval --model $MODEL \
238 |     --model_args pretrained=${MODEL_NAME_OR_PATH},trust_remote_code=True,tokenizer_mode=${TOKENIZER_MODE},tensor_parallel_size=${NUM_GPUS},dtype=auto,gpu_memory_utilization=${GPU_MEMORY_UTILIZATION} \
239 |     --tasks mmlu \
240 |     --device cuda \
241 |     --output_path ${OUTPUT_PATH}/${MODEL}_eval_mmlu \
242 |     --batch_size 1 \
243 |     --num_fewshot 5 \
244 |     --write_out \
245 |     2>&1 | tee ${OUTPUT_PATH}/${MODEL}_eval_mmlu.log
246 | 
247 | lm_eval --model $MODEL \
248 |     --model_args pretrained=${MODEL_NAME_OR_PATH},trust_remote_code=True,tokenizer_mode=${TOKENIZER_MODE},tensor_parallel_size=${NUM_GPUS},dtype=auto,gpu_memory_utilization=${GPU_MEMORY_UTILIZATION} \
249 |     --tasks winogrande \
250 |     --device cuda \
251 |     --output_path ${OUTPUT_PATH}/${MODEL}_eval_winogrande \
252 |     --batch_size 1 \
253 |     --num_fewshot 5 \
254 |     --write_out \
255 |     2>&1 | tee ${OUTPUT_PATH}/${MODEL}_eval_winogrande.log
256 | 
257 | lm_eval --model $MODEL \
258 |     --model_args pretrained=${MODEL_NAME_OR_PATH},trust_remote_code=True,tokenizer_mode=${TOKENIZER_MODE},tensor_parallel_size=${NUM_GPUS},dtype=auto,gpu_memory_utilization=${GPU_MEMORY_UTILIZATION} \
259 |     --tasks gsm8k \
260 |     --device cuda \
261 |     --output_path ${OUTPUT_PATH}/${MODEL}_eval_gsm8k \
262 |     --batch_size 1 \
263 |     --num_fewshot 5 \
264 |     --write_out \
265 |     2>&1 | tee ${OUTPUT_PATH}/${MODEL}_eval_gsm8k.log
266 | ```
267 | 
268 | ![OverallPerformance](images/overall_performance.png)
269 | 
270 | ## 😘 Citation
271 | 
272 | If you finding our work interesting or helpful to you, please cite this repo.
273 | 
274 | ```plain
275 | @misc{li2024rulermodelagnosticmethodcontrol,
276 |       title={Ruler: A Model-Agnostic Method to Control Generated Length for Large Language Models}, 
277 |       author={Jiaming Li and Lei Zhang and Yunshui Li and Ziqiang Liu and yuelin bai and Run Luo and Longze Chen and Min Yang},
278 |       year={2024},
279 |       eprint={2409.18943},
280 |       archivePrefix={arXiv},
281 |       primaryClass={cs.CL},
282 |       url={https://arxiv.org/abs/2409.18943}, 
283 | }
284 | ```
285 | 
286 | ## 🫡 Contact
287 | If you have any questions, feel free to contact us at `jm.li4@siat.ac.cn`


--------------------------------------------------------------------------------
/src/utils/templates.py:
--------------------------------------------------------------------------------
  1 | from utils.config import MetaLengthToken
  2 | 
  3 | # default templates
  4 | 
  5 | 
  6 | class ModelTemplate:
  7 |     STOP_TOKENS = {
  8 |         "mistral": ["</s>"],
  9 |         "glm": ["<|endoftext|>", "<|user|>", "<|observation|>"],
 10 |         "gemma": ["<eos>", "<end_of_turn>"],
 11 |         "llama": ["<|end_of_text|>", "<|eot_id|>"],
 12 |         "internlm": ["</s>", "<|im_end|>"],
 13 |         "deepseek": ["<｜end▁of▁sentence｜>"],
 14 |         "yi": ["<|im_end|>", "<|endoftext|>"],
 15 |         "qwen": ["<|im_end|>", "<|endoftext|>"],
 16 |     }
 17 | 
 18 |     @staticmethod
 19 |     def get_stop_tokens(model_name: str):
 20 |         for key in ModelTemplate.STOP_TOKENS:
 21 |             if key in model_name.lower():
 22 |                 return ModelTemplate.STOP_TOKENS[key]
 23 |         raise KeyError
 24 | 
 25 |     @staticmethod
 26 |     def apply_template_for_generation(instruction, targetlength, tokenizer):
 27 |         if targetlength != "":
 28 |             targetlength = targetlength.replace(">", "more than ")
 29 |             question = f"{instruction}\nThe response should have a word count of {targetlength} words."
 30 |         else:
 31 |             question = instruction
 32 |         messages = [
 33 |             {"role": "user", "content": question},
 34 |         ]
 35 |         prompt = tokenizer.apply_chat_template(
 36 |             messages, tokenize=False, add_generation_prompt=True
 37 |         )
 38 |         return prompt
 39 | 
 40 | 
 41 | # MLT templates
 42 | 
 43 | 
 44 | class Llama3_MLT_Template:
 45 |     """
 46 |     messages = [
 47 |         {"role": "user", "content": instruction},
 48 |         {"role": "assistant", "content": mlt+output},
 49 |     ]
 50 | 
 51 |     <|begin_of_text|><|start_header_id|>user<|end_header_id|>\n\n{instruction}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n{mlt}{output}<|eot_id|>
 52 |     """
 53 | 
 54 |     BOS_TOKEN: str = "<|begin_of_text|>"
 55 |     EOS_TOKEN: str = "<|end_of_text|>"
 56 |     STOP_TOKENS: list[str] = ["<|end_of_text|>", "<|eot_id|>"] + [
 57 |         MLT[0] for MLT in MetaLengthToken
 58 |     ]
 59 | 
 60 |     @staticmethod
 61 |     def apply_template(instruction, mlt, output):
 62 |         prompt = f"<|begin_of_text|><|start_header_id|>user<|end_header_id|>\n\n{instruction}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n{mlt}{output}<|eot_id|>"
 63 |         return prompt
 64 | 
 65 |     @staticmethod
 66 |     def apply_template_for_generation(instruction, targetlength=""):
 67 |         mlt = f"[MLT:{targetlength}]" if targetlength != "" else ""
 68 |         prompt = f"<|begin_of_text|><|start_header_id|>user<|end_header_id|>\n\n{instruction}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n{mlt}"
 69 |         return prompt
 70 | 
 71 | 
 72 | class Qwen_MLT_Template:
 73 |     """
 74 |     messages = [
 75 |         {"role": "user", "content": instruction},
 76 |         {"role": "assistant", "content": mlt+output},
 77 |     ]
 78 | 
 79 |     <|im_start|>system\nYou are a helpful assistant.<|im_end|>\n<|im_start|>user\n{instruction}<|im_end|>\n<|im_start|>assistant\n{mlt}{output}<|im_end|>
 80 |     """
 81 | 
 82 |     BOS_TOKEN: str = None
 83 |     EOS_TOKEN: str = "<|im_end|>"
 84 |     STOP_TOKENS: list[str] = [
 85 |         "<|im_end|>",
 86 |         "<|endoftext|>",
 87 |     ] + [MLT[0] for MLT in MetaLengthToken]
 88 | 
 89 |     @staticmethod
 90 |     def apply_template(instruction, mlt, output):
 91 |         prompt = f"<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n<|im_start|>user\n{instruction}<|im_end|>\n<|im_start|>assistant\n{mlt}{output}<|im_end|>"
 92 |         return prompt
 93 | 
 94 |     @staticmethod
 95 |     def apply_template_for_generation(instruction, targetlength=""):
 96 |         mlt = f"[MLT:{targetlength}]" if targetlength != "" else ""
 97 |         prompt = f"<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n<|im_start|>user\n{instruction}<|im_end|>\n<|im_start|>assistant\n{mlt}"
 98 |         return prompt
 99 | 
100 | 
101 | class Yi_MLT_Template:
102 |     """
103 |     messages = [
104 |         {"role": "user", "content": instruction},
105 |         {"role": "assistant", "content": mlt+output},
106 |     ]
107 | 
108 |     <|im_start|>user\n{instruction}<|im_end|>\n<|im_start|>assistant\n{mlt}{output}<|im_end|>
109 |     """
110 | 
111 |     BOS_TOKEN: str = "<|startoftext|>"
112 |     EOS_TOKEN: str = "<|im_end|>"
113 |     STOP_TOKENS: list[str] = [
114 |         "<|im_end|>",
115 |         "<|endoftext|>",
116 |     ] + [MLT[0] for MLT in MetaLengthToken]
117 | 
118 |     @staticmethod
119 |     def apply_template(instruction, mlt, output):
120 |         prompt = f"<|im_start|>user\n{instruction}<|im_end|>\n<|im_start|>assistant\n{mlt}{output}<|im_end|>"
121 |         return prompt
122 | 
123 |     @staticmethod
124 |     def apply_template_for_generation(instruction, targetlength=""):
125 |         mlt = f"[MLT:{targetlength}]" if targetlength != "" else ""
126 |         prompt = (
127 |             f"<|im_start|>user\n{instruction}<|im_end|>\n<|im_start|>assistant\n{mlt}"
128 |         )
129 |         return prompt
130 | 
131 | 
132 | class internlm_MLT_Template:
133 |     """
134 |     messages = [
135 |         {"role": "user", "content": instruction},
136 |         {"role": "assistant", "content": mlt+output},
137 |     ]
138 | 
139 |     <s><|im_start|>user\n{instruction}<|im_end|>\n<|im_start|>assistant\n{mlt}{output}<|im_end|>
140 |     """
141 | 
142 |     BOS_TOKEN: str = "<s>"
143 |     EOS_TOKEN: str = "</s>"
144 |     STOP_TOKENS: list[str] = ["</s>", "<|im_end|>"] + [
145 |         MLT[0] for MLT in MetaLengthToken
146 |     ]
147 | 
148 |     @staticmethod
149 |     def apply_template(instruction, mlt, output):
150 |         prompt = f"<s><|im_start|>user\n{instruction}<|im_end|>\n<|im_start|>assistant\n{mlt}{output}<|im_end|>"
151 |         return prompt
152 | 
153 |     @staticmethod
154 |     def apply_template_for_generation(instruction, targetlength=""):
155 |         mlt = f"[MLT:{targetlength}]" if targetlength != "" else ""
156 |         prompt = f"<s><|im_start|>user\n{instruction}<|im_end|>\n<|im_start|>assistant\n{mlt}"
157 |         return prompt
158 | 
159 | 
160 | class glm_MLT_Template:
161 |     """
162 |     messages = [
163 |         {"role": "user", "content": instruction},
164 |         {"role": "assistant", "content": mlt+output},
165 |     ]
166 | 
167 |     [gMASK]<sop><|user|>\ninstruction<|assistant|>\nmlt+output
168 |     """
169 | 
170 |     BOS_TOKEN: str = "[gMASK]<sop>"
171 |     EOS_TOKEN: str = "<|endoftext|>"
172 |     STOP_TOKENS: list[str] = [
173 |         "<|endoftext|>",
174 |         "<|user|>",
175 |         "<|observation|>",
176 |     ] + [MLT[0] for MLT in MetaLengthToken]
177 | 
178 |     @staticmethod
179 |     def apply_template(instruction, mlt, output):
180 |         prompt = f"[gMASK]<sop><|user|>\n{instruction}<|assistant|>\n{mlt}{output}"
181 |         return prompt
182 | 
183 |     @staticmethod
184 |     def apply_template_for_generation(instruction, targetlength=""):
185 |         mlt = f"[MLT:{targetlength}]" if targetlength != "" else ""
186 |         prompt = f"[gMASK]<sop><|user|>\n{instruction}<|assistant|>\n{mlt}"
187 |         return prompt
188 | 
189 | 
190 | class deepseek_MLT_Template:
191 |     """
192 |     messages = [
193 |         {"role": "user", "content": instruction},
194 |         {"role": "assistant", "content": mlt+output},
195 |     ]
196 | 
197 |     <｜begin▁of▁sentence｜>User: INSTRUCTION\n\nAssistant: MLT+OUTPUT<｜end▁of▁sentence｜>
198 |     """
199 | 
200 |     BOS_TOKEN: str = "<｜begin▁of▁sentence｜>"
201 |     EOS_TOKEN: str = "<｜end▁of▁sentence｜>"
202 |     STOP_TOKENS: list[str] = [
203 |         "<｜end▁of▁sentence｜>",
204 |     ] + [MLT[0] for MLT in MetaLengthToken]
205 | 
206 |     @staticmethod
207 |     def apply_template(instruction, mlt, output):
208 |         prompt = f"<｜begin▁of▁sentence｜>User: {instruction}\n\nAssistant: {mlt}{output}<｜end▁of▁sentence｜>"
209 |         return prompt
210 | 
211 |     @staticmethod
212 |     def apply_template_for_generation(instruction, targetlength=""):
213 |         mlt = f"[MLT:{targetlength}]" if targetlength != "" else ""
214 |         prompt = f"<｜begin▁of▁sentence｜>User: {instruction}\n\nAssistant: {mlt}"
215 |         return prompt
216 | 
217 | 
218 | class gemma_MLT_Template:
219 |     """
220 |     messages = [
221 |         {"role": "user", "content": instruction},
222 |         {"role": "assistant", "content": mlt+output},
223 |     ]
224 | 
225 |     <bos><start_of_turn>user\nINSTRUCTION<end_of_turn>\n<start_of_turn>model\nMLT+OUTPUT<end_of_turn>
226 |     """
227 | 
228 |     BOS_TOKEN: str = "<bos>"
229 |     EOS_TOKEN: str = "<eos>"
230 |     STOP_TOKENS: list[str] = [
231 |         "<eos>",
232 |         "<end_of_turn>",
233 |     ] + [MLT[0] for MLT in MetaLengthToken]
234 | 
235 |     @staticmethod
236 |     def apply_template(instruction, mlt, output):
237 |         prompt = f"<bos><start_of_turn>user\n{instruction}<end_of_turn>\n<start_of_turn>model\n{mlt}{output}<end_of_turn>"
238 |         return prompt
239 | 
240 |     @staticmethod
241 |     def apply_template_for_generation(instruction, targetlength=""):
242 |         mlt = f"[MLT:{targetlength}]" if targetlength != "" else ""
243 |         prompt = f"<bos><start_of_turn>user\n{instruction}<end_of_turn>\n<start_of_turn>model\n{mlt}"
244 |         return prompt
245 | 
246 | 
247 | class mistral_MLT_Template:
248 |     """
249 |     messages = [
250 |         {"role": "user", "content": instruction},
251 |         {"role": "assistant", "content": mlt+output},
252 |     ]
253 | 
254 |     <s>[INST] INSTRUCTION [/INST]MLT+OUTPUT</s>
255 |     """
256 | 
257 |     BOS_TOKEN: str = "<s>"
258 |     EOS_TOKEN: str = "</s>"
259 |     STOP_TOKENS: list[str] = [
260 |         "</s>",
261 |     ] + [MLT[0] for MLT in MetaLengthToken]
262 | 
263 |     @staticmethod
264 |     def apply_template(instruction, mlt, output):
265 |         prompt = f"<s>[INST] {instruction} [/INST]{mlt}{output}</s>"
266 |         return prompt
267 | 
268 |     @staticmethod
269 |     def apply_template_for_generation(instruction, targetlength=""):
270 |         mlt = f"[MLT:{targetlength}]" if targetlength != "" else ""
271 |         prompt = f"<s>[INST] {instruction} [/INST]{mlt}"
272 |         return prompt
273 | 
274 | 
275 | class custom_Template:
276 |     """
277 |     messages = [
278 |         {"role": "user", "content": instruction},
279 |         {"role": "assistant", "content": mlt+output},
280 |     ]
281 | 
282 |     <|begin_of_text|><|start_header_id|>user<|end_header_id|>\n\n{instruction}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n{mlt}{output}<|eot_id|>
283 |     """
284 | 
285 |     BOS_TOKEN: str = "<|begin_of_text|>"
286 |     EOS_TOKEN: str = "<|end_of_text|>"
287 |     STOP_TOKENS: list[str] = ["<|end_of_text|>", "<|eot_id|>"] + [
288 |         MLT[0] for MLT in MetaLengthToken
289 |     ]
290 |     SPECIAL_TOKENS: list[str] = [
291 |         "<|begin_of_text|>",
292 |         "<|end_of_text|>",
293 |         "<|start_header_id|>",
294 |         "<|end_header_id|>",
295 |         "<|eot_id|>",
296 |     ]
297 | 
298 |     @staticmethod
299 |     def apply_template(instruction, mlt, output):
300 |         prompt = f"<|begin_of_text|><|start_header_id|>user<|end_header_id|>\n\n{instruction}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n{mlt}{output}<|eot_id|>"
301 |         return prompt
302 | 
303 |     @staticmethod
304 |     def apply_template_for_instruction(instruction):
305 |         prompt = f"<|begin_of_text|><|start_header_id|>user<|end_header_id|>\n\n{instruction}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n"
306 |         return prompt
307 | 
308 |     @staticmethod
309 |     def apply_template_for_generation(instruction, targetlength=""):
310 |         mlt = f"[MLT:{targetlength}]" if targetlength != "" else ""
311 |         prompt = f"<|begin_of_text|><|start_header_id|>user<|end_header_id|>\n\n{instruction}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n{mlt}"
312 |         return prompt
313 |     def apply_template_for_generation_vanilla(instruction, targetlength=""):
314 |         targetlength = targetlength.replace(">", "more than ")
315 |         question = f"{instruction}\nThe response should have a word count of {targetlength} words."
316 |         prompt = f"<|begin_of_text|><|start_header_id|>user<|end_header_id|>\n\n{question}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n"
317 |         return prompt
318 | 
319 | 
320 | TemplatesMapping = {
321 |     "default": ModelTemplate,
322 |     "llama3": Llama3_MLT_Template,
323 |     "qwen": Qwen_MLT_Template,
324 |     "yi": Yi_MLT_Template,
325 |     "internlm": internlm_MLT_Template,
326 |     "glm": glm_MLT_Template,
327 |     "deepseek": deepseek_MLT_Template,
328 |     "gemma": gemma_MLT_Template,
329 |     "mistral": mistral_MLT_Template,
330 |     "custom": custom_Template,
331 | }
332 | 


--------------------------------------------------------------------------------