├── src
├── exp
│ ├── __init__.py
│ ├── cal_ttest.py
│ ├── analysis_sgm.py
│ ├── run_exp_api.py
│ ├── analysis_mmlt.py
│ ├── run_exp.py
│ ├── cal_mlt_scores.py
│ ├── cal_elm_rmse.py
│ └── cal_level_scores.py
├── data_process
│ ├── __init__.py
│ ├── raw_openhermes_process.py
│ ├── build_arena_dataset.py
│ ├── build_tlg_dataset.py
│ └── build_training_dataset.py
├── finetuning
│ ├── callback.py
│ ├── dataset.py
│ └── finetune.py
└── utils
│ ├── __init__.py
│ ├── count.py
│ ├── json_file.py
│ ├── config.py
│ └── templates.py
├── images
├── TLG.png
├── mmlt.png
├── sgm.png
├── method.png
├── TLG_ruler.png
└── overall_performance.png
├── requirements.txt
├── scripts
├── Yi-1.5-6B
│ ├── run_mmlt.sh
│ ├── run_self_generated_mlt.sh
│ ├── run_tlg.sh
│ ├── ruler.sh
│ ├── vanilla.sh
│ ├── ruler_lm_eval.sh
│ └── vanilla_lm_eval.sh
├── gemma-7b
│ ├── run_mmlt.sh
│ ├── run_self_generated_mlt.sh
│ ├── run_tlg.sh
│ ├── ruler.sh
│ ├── vanilla.sh
│ ├── ruler_lm_eval.sh
│ └── vanilla_lm_eval.sh
├── Qwen1.5-7B
│ ├── run_mmlt.sh
│ ├── run_self_generated_mlt.sh
│ ├── run_tlg.sh
│ ├── ruler.sh
│ ├── vanilla.sh
│ ├── ruler_lm_eval.sh
│ └── vanilla_lm_eval.sh
├── Meta-Llama-3-8B
│ ├── run_mmlt.sh
│ ├── run_self_generated_mlt.sh
│ ├── run_tlg.sh
│ ├── ruler.sh
│ ├── vanilla.sh
│ ├── ruler_lm_eval.sh
│ └── vanilla_lm_eval.sh
├── Mistral-7B-v0.3
│ ├── run_mmlt.sh
│ ├── run_self_generated_mlt.sh
│ ├── run_tlg.sh
│ ├── ruler.sh
│ ├── vanilla.sh
│ ├── ruler_lm_eval.sh
│ └── vanilla_lm_eval.sh
├── deepseek-llm-7b-base
│ ├── run_mmlt.sh
│ ├── run_self_generated_mlt.sh
│ ├── run_tlg.sh
│ ├── ruler.sh
│ ├── vanilla.sh
│ ├── ruler_lm_eval.sh
│ └── vanilla_lm_eval.sh
└── download.sh
├── configs
├── ds_config_zero2.json
├── ds_config_zero3.json
└── ds_config_zero3_cpu_offload.json
├── LICENSE
├── .gitignore
└── README.md
/src/exp/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/src/data_process/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/src/finetuning/callback.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/src/utils/__init__.py:
--------------------------------------------------------------------------------
1 | from .count import *
2 | from .json_file import *
--------------------------------------------------------------------------------
/images/TLG.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Geaming2002/Ruler/HEAD/images/TLG.png
--------------------------------------------------------------------------------
/images/mmlt.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Geaming2002/Ruler/HEAD/images/mmlt.png
--------------------------------------------------------------------------------
/images/sgm.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Geaming2002/Ruler/HEAD/images/sgm.png
--------------------------------------------------------------------------------
/images/method.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Geaming2002/Ruler/HEAD/images/method.png
--------------------------------------------------------------------------------
/images/TLG_ruler.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Geaming2002/Ruler/HEAD/images/TLG_ruler.png
--------------------------------------------------------------------------------
/images/overall_performance.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Geaming2002/Ruler/HEAD/images/overall_performance.png
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | deepspeed==0.13.1
2 | nlp==0.4.0
3 | nltk==3.8.1
4 | openai==1.50.0
5 | pandas==2.2.3
6 | python-dotenv==1.0.1
7 | rich==13.8.1
8 | scikit_learn==1.5.2
9 | scipy==1.14.1
10 | shortuuid==1.0.13
11 | tiktoken==0.7.0
12 | torch==2.4.0
13 | tqdm==4.66.4
14 | transformers==4.44.2
15 | vllm==0.5.5
16 |
--------------------------------------------------------------------------------
/scripts/Yi-1.5-6B/run_mmlt.sh:
--------------------------------------------------------------------------------
1 | python exp/run_exp.py\
2 | --dataset_path ../datasets/multi_mlt.jsonl\
3 | --gpus 1\
4 | --template custom\
5 | --model_name_or_path ../outputs/checkpoints/ruler_Yi-1.5-6B_bs_4_ga_8_lr_2e-5_eps_3/checkpoint-2841\
6 | --output_path ../outputs/multi_mlt/mmlt_ruler_Yi-1.5-6B.jsonl
7 |
--------------------------------------------------------------------------------
/scripts/gemma-7b/run_mmlt.sh:
--------------------------------------------------------------------------------
1 | python exp/run_exp.py\
2 | --dataset_path ../datasets/multi_mlt.jsonl\
3 | --gpus 1\
4 | --template custom\
5 | --model_name_or_path ../outputs/checkpoints/ruler_gemma-7b_bs_4_ga_8_lr_2e-5_eps_3/checkpoint-2841\
6 | --output_path ../outputs/multi_mlt/mmlt_ruler_gemma-7b.jsonl
7 |
8 |
--------------------------------------------------------------------------------
/scripts/Qwen1.5-7B/run_mmlt.sh:
--------------------------------------------------------------------------------
1 | python exp/run_exp.py\
2 | --dataset_path ../datasets/multi_mlt.jsonl\
3 | --gpus 1\
4 | --template custom\
5 | --model_name_or_path ../outputs/checkpoints/ruler_Qwen1.5-7B_bs_4_ga_8_lr_2e-5_eps_3/checkpoint-2841\
6 | --output_path ../outputs/multi_mlt/mmlt_ruler_Qwen1.5-7B.jsonl
7 |
8 |
--------------------------------------------------------------------------------
/scripts/Meta-Llama-3-8B/run_mmlt.sh:
--------------------------------------------------------------------------------
1 | python exp/run_exp.py\
2 | --dataset_path ../datasets/multi_mlt.jsonl\
3 | --gpus 1\
4 | --template custom\
5 | --model_name_or_path ../outputs/checkpoints/ruler_Meta-Llama-3-8B_bs_4_ga_8_lr_2e-5_eps_3/checkpoint-2841\
6 | --output_path ../outputs/multi_mlt/mmlt_ruler_Meta-Llama-3-8B.jsonl
7 |
--------------------------------------------------------------------------------
/scripts/Mistral-7B-v0.3/run_mmlt.sh:
--------------------------------------------------------------------------------
1 | python exp/run_exp.py\
2 | --dataset_path ../datasets/multi_mlt.jsonl\
3 | --gpus 1\
4 | --template custom\
5 | --model_name_or_path ../outputs/checkpoints/ruler_Mistral-7B-v0.3_bs_4_ga_8_lr_2e-5_eps_3/checkpoint-2841\
6 | --output_path ../outputs/multi_mlt/mmlt_ruler_Mistral-7B-v0.3.jsonl
7 |
--------------------------------------------------------------------------------
/scripts/Yi-1.5-6B/run_self_generated_mlt.sh:
--------------------------------------------------------------------------------
1 | python exp/run_exp.py\
2 | --dataset_path ../datasets/self_generated_mlt.jsonl\
3 | --gpus 1\
4 | --template custom\
5 | --model_name_or_path ../outputs/checkpoints/ruler_Yi-1.5-6B_bs_4_ga_8_lr_2e-5_eps_3/checkpoint-2841\
6 | --output_path ../outputs/self_generated_mlt/sgm_ruler_Yi-1.5-6B.jsonl
7 |
--------------------------------------------------------------------------------
/scripts/deepseek-llm-7b-base/run_mmlt.sh:
--------------------------------------------------------------------------------
1 | python exp/run_exp.py\
2 | --dataset_path ../datasets/multi_mlt.jsonl\
3 | --gpus 1\
4 | --template custom\
5 | --model_name_or_path ../outputs/checkpoints/ruler_deepseek-llm-7b-base_bs_4_ga_8_lr_2e-5_eps_3/checkpoint-2841\
6 | --output_path ../outputs/multi_mlt/mmlt_ruler_deepseek-llm-7b-base.jsonl
7 |
--------------------------------------------------------------------------------
/scripts/gemma-7b/run_self_generated_mlt.sh:
--------------------------------------------------------------------------------
1 | python exp/run_exp.py\
2 | --dataset_path ../datasets/self_generated_mlt.jsonl\
3 | --gpus 1\
4 | --template custom\
5 | --model_name_or_path ../outputs/checkpoints/ruler_gemma-7b_bs_4_ga_8_lr_2e-5_eps_3/checkpoint-2841\
6 | --output_path ../outputs/self_generated_mlt/sgm_ruler_gemma-7b.jsonl
7 |
8 |
--------------------------------------------------------------------------------
/scripts/Qwen1.5-7B/run_self_generated_mlt.sh:
--------------------------------------------------------------------------------
1 | python exp/run_exp.py\
2 | --dataset_path ../datasets/self_generated_mlt.jsonl\
3 | --gpus 1\
4 | --template custom\
5 | --model_name_or_path ../outputs/checkpoints/ruler_Qwen1.5-7B_bs_4_ga_8_lr_2e-5_eps_3/checkpoint-2841\
6 | --output_path ../outputs/self_generated_mlt/sgm_ruler_Qwen1.5-7B.jsonl
7 |
8 |
--------------------------------------------------------------------------------
/scripts/Meta-Llama-3-8B/run_self_generated_mlt.sh:
--------------------------------------------------------------------------------
1 | python exp/run_exp.py\
2 | --dataset_path ../datasets/self_generated_mlt.jsonl\
3 | --gpus 1\
4 | --template custom\
5 | --model_name_or_path ../outputs/checkpoints/ruler_Meta-Llama-3-8B_bs_4_ga_8_lr_2e-5_eps_3/checkpoint-2841\
6 | --output_path ../outputs/self_generated_mlt/sgm_ruler_Meta-Llama-3-8B.jsonl
7 |
--------------------------------------------------------------------------------
/scripts/Mistral-7B-v0.3/run_self_generated_mlt.sh:
--------------------------------------------------------------------------------
1 | python exp/run_exp.py\
2 | --dataset_path ../datasets/self_generated_mlt.jsonl\
3 | --gpus 1\
4 | --template custom\
5 | --model_name_or_path ../outputs/checkpoints/ruler_Mistral-7B-v0.3_bs_4_ga_8_lr_2e-5_eps_3/checkpoint-2841\
6 | --output_path ../outputs/self_generated_mlt/sgm_ruler_Mistral-7B-v0.3.jsonl
7 |
--------------------------------------------------------------------------------
/scripts/deepseek-llm-7b-base/run_self_generated_mlt.sh:
--------------------------------------------------------------------------------
1 | python exp/run_exp.py\
2 | --dataset_path ../datasets/self_generated_mlt.jsonl\
3 | --gpus 1\
4 | --template custom\
5 | --model_name_or_path ../outputs/checkpoints/ruler_deepseek-llm-7b-base_bs_4_ga_8_lr_2e-5_eps_3/checkpoint-2841\
6 | --output_path ../outputs/self_generated_mlt/sgm_ruler_deepseek-llm-7b-base.jsonl
7 |
--------------------------------------------------------------------------------
/src/utils/count.py:
--------------------------------------------------------------------------------
1 | import nltk
2 |
3 | def count_words(text):
4 | """Counts the number of words."""
5 | tokenizer = nltk.tokenize.RegexpTokenizer(r"\w+")
6 | tokens = tokenizer.tokenize(text)
7 | num_words = len(tokens)
8 | # print(tokens)
9 | return num_words
10 |
11 |
12 | def count_tokens(tokenizer, text):
13 | inputs = tokenizer.encode(text, return_tensors="pt")
14 | return inputs.shape[1]
15 |
16 | if __name__ == "__main__":
17 | pass
--------------------------------------------------------------------------------
/src/utils/json_file.py:
--------------------------------------------------------------------------------
1 | import json
2 |
3 |
4 | def load_json(path):
5 | with open(path, "r") as file:
6 | return json.load(file)
7 |
8 |
9 | def load_jsonl(path):
10 | data = []
11 | with open(path, "r") as file:
12 | for line in file:
13 | json_data = json.loads(line)
14 | data.append(json_data)
15 | return data
16 |
17 |
18 | def save_jsonl(path, data):
19 | with open(path, "w", encoding="utf-8") as file:
20 | for item in data:
21 | json_string = json.dumps(item, ensure_ascii=False)
22 | file.write(json_string + "\n")
23 |
--------------------------------------------------------------------------------
/scripts/gemma-7b/run_tlg.sh:
--------------------------------------------------------------------------------
1 | python exp/run_exp.py\
2 | --dataset_path ../datasets/tlg_dataset.jsonl\
3 | --gpus 1\
4 | --template custom\
5 | --model_name_or_path ../outputs/checkpoints/ruler_gemma-7b_bs_4_ga_8_lr_2e-5_eps_3/checkpoint-2841\
6 | --output_path ../outputs/tlg/tlg_ot_ruler_gemma-7b.jsonl
7 |
8 | python exp/run_exp.py\
9 | --dataset_path ../datasets/tlg_dataset.jsonl\
10 | --gpus 1\
11 | --template custom\
12 | --model_name_or_path ../outputs/checkpoints/vanilla_gemma-7b_bs_4_ga_8_lr_2e-5_eps_3/checkpoint-2841\
13 | --output_path ../outputs/tlg/tlg_ot_vanilla_gemma-7b.jsonl
14 |
--------------------------------------------------------------------------------
/scripts/Yi-1.5-6B/run_tlg.sh:
--------------------------------------------------------------------------------
1 | python exp/run_exp.py\
2 | --dataset_path ../datasets/tlg_dataset.jsonl\
3 | --gpus 1\
4 | --template custom\
5 | --model_name_or_path ../outputs/checkpoints/ruler_Yi-1.5-6B_bs_4_ga_8_lr_2e-5_eps_3/checkpoint-2841\
6 | --output_path ../outputs/tlg/tlg_ot_ruler_Yi-1.5-6B.jsonl
7 |
8 | python exp/run_exp.py\
9 | --dataset_path ../datasets/tlg_dataset.jsonl\
10 | --gpus 1\
11 | --template custom\
12 | --model_name_or_path ../outputs/checkpoints/vanilla_Yi-1.5-6B_bs_4_ga_8_lr_2e-5_eps_3/checkpoint-2841\
13 | --output_path ../outputs/tlg/tlg_ot_vanilla_Yi-1.5-6B.jsonl
14 |
--------------------------------------------------------------------------------
/scripts/Qwen1.5-7B/run_tlg.sh:
--------------------------------------------------------------------------------
1 | python exp/run_exp.py\
2 | --dataset_path ../datasets/tlg_dataset.jsonl\
3 | --gpus 1\
4 | --template custom\
5 | --model_name_or_path ../outputs/checkpoints/ruler_Qwen1.5-7B_bs_4_ga_8_lr_2e-5_eps_3/checkpoint-2841\
6 | --output_path ../outputs/tlg/tlg_ot_ruler_Qwen1.5-7B.jsonl
7 |
8 | python exp/run_exp.py\
9 | --dataset_path ../datasets/tlg_dataset.jsonl\
10 | --gpus 1\
11 | --template custom\
12 | --model_name_or_path ../outputs/checkpoints/vanilla_Qwen1.5-7B_bs_4_ga_8_lr_2e-5_eps_3/checkpoint-2841\
13 | --output_path ../outputs/tlg/tlg_ot_vanilla_Qwen1.5-7B.jsonl
14 |
--------------------------------------------------------------------------------
/scripts/download.sh:
--------------------------------------------------------------------------------
1 | # datasets
2 | mkdir -p datasets
3 | mkdir -p datasets/LongForm
4 | mkdir -p datasets/OpenHermes
5 | # logs
6 | mkdir -p logs
7 | # outputs
8 | mkdir -p outputs
9 | mkdir -p outputs/checkpoints
10 | mkdir -p outputs/multi_mlt
11 | mkdir -p outputs/other_tasks
12 | mkdir -p outputs/self_generated_mlt
13 | mkdir -p outputs/tlg
14 |
15 | # download longform
16 | huggingface-cli download --repo-type dataset --resume-download akoksal/LongForm --local-dir ../datasets/LongForm
17 | # download openhermes
18 | huggingface-cli download --repo-type dataset --resume-download teknium/OpenHermes-2.5 --local-dir ../datasets/OpenHermes
19 |
--------------------------------------------------------------------------------
/scripts/Meta-Llama-3-8B/run_tlg.sh:
--------------------------------------------------------------------------------
1 | python exp/run_exp.py\
2 | --dataset_path ../datasets/tlg_dataset.jsonl\
3 | --gpus 1\
4 | --template custom\
5 | --model_name_or_path ../outputs/checkpoints/ruler_Meta-Llama-3-8B_bs_4_ga_8_lr_2e-5_eps_3/checkpoint-2841\
6 | --output_path ../outputs/tlg/tlg_ot_ruler_Meta-Llama-3-8B.jsonl
7 |
8 | python exp/run_exp.py\
9 | --dataset_path ../datasets/tlg_dataset.jsonl\
10 | --gpus 1\
11 | --template custom\
12 | --model_name_or_path ../outputs/checkpoints/vanilla_Meta-Llama-3-8B_bs_4_ga_8_lr_2e-5_eps_3/checkpoint-2841\
13 | --output_path ../outputs/tlg/tlg_ot_vanilla_Meta-Llama-3-8B.jsonl
14 |
--------------------------------------------------------------------------------
/scripts/Mistral-7B-v0.3/run_tlg.sh:
--------------------------------------------------------------------------------
1 | python exp/run_exp.py\
2 | --dataset_path ../datasets/tlg_dataset.jsonl\
3 | --gpus 1\
4 | --template custom\
5 | --model_name_or_path ../outputs/checkpoints/ruler_Mistral-7B-v0.3_bs_4_ga_8_lr_2e-5_eps_3/checkpoint-2841\
6 | --output_path ../outputs/tlg/tlg_ot_ruler_Mistral-7B-v0.3.jsonl
7 |
8 | python exp/run_exp.py\
9 | --dataset_path ../datasets/tlg_dataset.jsonl\
10 | --gpus 1\
11 | --template custom\
12 | --model_name_or_path ../outputs/checkpoints/vanilla_Mistral-7B-v0.3_bs_4_ga_8_lr_2e-5_eps_3/checkpoint-2841\
13 | --output_path ../outputs/tlg/tlg_ot_vanilla_Mistral-7B-v0.3.jsonl
14 |
--------------------------------------------------------------------------------
/scripts/deepseek-llm-7b-base/run_tlg.sh:
--------------------------------------------------------------------------------
1 | python exp/run_exp.py\
2 | --dataset_path ../datasets/tlg_dataset.jsonl\
3 | --gpus 1\
4 | --template custom\
5 | --model_name_or_path ../outputs/checkpoints/ruler_deepseek-llm-7b-base_bs_4_ga_8_lr_2e-5_eps_3/checkpoint-2841\
6 | --output_path ../outputs/tlg/tlg_ot_ruler_deepseek-llm-7b-base.jsonl
7 |
8 | python exp/run_exp.py\
9 | --dataset_path ../datasets/tlg_dataset.jsonl\
10 | --gpus 1\
11 | --template custom\
12 | --model_name_or_path ../outputs/checkpoints/vanilla_deepseek-llm-7b-base_bs_4_ga_8_lr_2e-5_eps_3/checkpoint-2841\
13 | --output_path ../outputs/tlg/tlg_ot_vanilla_deepseek-llm-7b-base.jsonl
14 |
--------------------------------------------------------------------------------
/configs/ds_config_zero2.json:
--------------------------------------------------------------------------------
1 | {
2 | "fp16": {
3 | "enabled": "auto",
4 | "loss_scale": 0,
5 | "loss_scale_window": 1000,
6 | "initial_scale_power": 16,
7 | "hysteresis": 2,
8 | "min_loss_scale": 1
9 | },
10 | "bf16": {
11 | "enabled": "auto"
12 | },
13 | "train_micro_batch_size_per_gpu": "auto",
14 | "train_batch_size": "auto",
15 | "gradient_accumulation_steps": "auto",
16 | "zero_optimization": {
17 | "stage": 2,
18 | "overlap_comm": true,
19 | "contiguous_gradients": true,
20 | "sub_group_size": 1e9,
21 | "reduce_bucket_size": "auto"
22 | }
23 | }
--------------------------------------------------------------------------------
/src/data_process/raw_openhermes_process.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | from utils import load_json, save_jsonl
3 |
4 |
5 | def main(args):
6 | # raw data load
7 | df = load_json(args.dataset_path)
8 | data = []
9 | for d in df:
10 | if len(d["conversations"]) == 2:
11 | data.append(d)
12 | # save to output_path
13 | save_jsonl(args.output_path, data)
14 |
15 |
16 | if __name__ == "__main__":
17 | parser = argparse.ArgumentParser()
18 | parser.add_argument("--dataset_path", type=str, default=None)
19 | parser.add_argument("--model_name_or_path", type=str, default=None)
20 | parser.add_argument("--output_path", type=str, default=None)
21 | args = parser.parse_args()
22 | main(args)
23 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2024 Geaming
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/src/data_process/build_arena_dataset.py:
--------------------------------------------------------------------------------
1 | import os
2 | import nlp
3 | import random
4 | import argparse
5 | import pandas as pd
6 | from utils import load_jsonl, save_jsonl
7 | from utils.config import TARGET_LENGTH
8 |
9 |
10 | def main(args):
11 | df = load_jsonl(args.dataset_path)
12 | data = []
13 | id = 0
14 | for d in df:
15 | data.append({"id": id, "Instruction": d["turns"][0]["content"]})
16 | id += 1
17 | if args.num is not None:
18 | random.seed(args.random_seed)
19 | random.shuffle(data)
20 | data = data[: args.num]
21 | data = [
22 | {**d, "TargetLength": tl} for d in data for tl in TARGET_LENGTH
23 | ]
24 | # save to output_path
25 | save_jsonl(args.output_path, data)
26 |
27 |
28 | if __name__ == "__main__":
29 | parser = argparse.ArgumentParser()
30 | parser.add_argument("--dataset_path", type=str, default=None)
31 | parser.add_argument("--num", type=int, default=None)
32 | parser.add_argument("--random_seed", type=int, default=10)
33 | parser.add_argument("--output_path", type=str, default=None)
34 | args = parser.parse_args()
35 | main(args)
36 |
--------------------------------------------------------------------------------
/src/data_process/build_tlg_dataset.py:
--------------------------------------------------------------------------------
1 | import random
2 | import argparse
3 | from utils import load_jsonl, save_jsonl
4 | from utils.config import TARGET_LENGTH
5 |
6 |
7 | def main(args):
8 | # random seed
9 | random.seed(args.random_seed)
10 | # raw data load
11 | df = load_jsonl(args.dataset_path)
12 | # random sample
13 | random.shuffle(df)
14 | df = df[: args.num]
15 | # add target length
16 | data = []
17 | target_lengths = [random.choice(TARGET_LENGTH) for _ in range(args.num)]
18 | for idx in range(len(df)):
19 | d = {}
20 | d['id'] = idx
21 | d["Instruction"] = df[idx]["conversations"][0]["value"]
22 | d["TargetLength"] = target_lengths[idx]
23 | data.append(d)
24 | # save to output_path
25 | save_jsonl(args.output_path, data)
26 |
27 |
28 | if __name__ == "__main__":
29 | parser = argparse.ArgumentParser()
30 | parser.add_argument("--dataset_path", type=str, default=None)
31 | parser.add_argument("--num", type=int, default=None)
32 | parser.add_argument("--random_seed", type=int, default=10)
33 | parser.add_argument("--output_path", type=str, default=None)
34 | args = parser.parse_args()
35 | main(args)
36 |
--------------------------------------------------------------------------------
/src/exp/cal_ttest.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | from utils import load_jsonl
3 | from rich.table import Table
4 | from rich.console import Console
5 | from utils.count import count_words
6 | import scipy.stats as stats
7 |
8 |
9 | def main(args):
10 | # raw data load
11 | vanilla_df = load_jsonl(args.vanilla_dataset_path)
12 | ruler_dataset_path = args.vanilla_dataset_path.replace("tlg_", "tlg_Ruler_")
13 | ruler_df = load_jsonl(ruler_dataset_path)
14 | print(ruler_dataset_path)
15 | vanilla_lengths, ruler_lengths = [], []
16 | for idx in range(len(vanilla_df)):
17 | vanilla_lengths.append(count_words(vanilla_df[idx]["output"]))
18 | ruler_lengths.append(count_words(ruler_df[idx]["output"]))
19 | table = Table(show_header=True, header_style="bold magenta")
20 | table.add_column("Model", style="dim", width=12)
21 | table.add_column("t", justify="right")
22 | table.add_column("p", justify="right")
23 | t_statistic, p_value = stats.ttest_ind(ruler_lengths,vanilla_lengths)
24 | table.add_row(
25 | args.vanilla_dataset_path.split("/")[-1][4:],
26 | f"{t_statistic:.4f}",
27 | f"{p_value:.4f}",
28 | )
29 | console = Console()
30 | console.print(table)
31 | # print(f"{t_statistic:.4f}|{p_value:.4f}|")
32 |
33 | if __name__ == "__main__":
34 | parser = argparse.ArgumentParser()
35 | parser.add_argument("--vanilla_dataset_path", type=str, default=None)
36 | args = parser.parse_args()
37 | main(args)
38 |
--------------------------------------------------------------------------------
/configs/ds_config_zero3.json:
--------------------------------------------------------------------------------
1 | {
2 | "bf16": {
3 | "enabled": "auto"
4 | },
5 | "optimizer": {
6 | "type": "AdamW",
7 | "params": {
8 | "lr": "auto",
9 | "betas": "auto",
10 | "eps": "auto",
11 | "weight_decay": "auto"
12 | }
13 | },
14 |
15 | "scheduler": {
16 | "type": "WarmupLR",
17 | "params": {
18 | "warmup_min_lr": "auto",
19 | "warmup_max_lr": "auto",
20 | "warmup_num_steps": "auto"
21 | }
22 | },
23 |
24 | "zero_optimization": {
25 | "stage": 3,
26 | "offload_optimizer": {
27 | "device": "none",
28 | "pin_memory": true
29 | },
30 | "offload_param": {
31 | "device": "none",
32 | "pin_memory": true
33 | },
34 | "overlap_comm": true,
35 | "contiguous_gradients": true,
36 | "sub_group_size": 1e9,
37 | "reduce_bucket_size": "auto",
38 | "stage3_prefetch_bucket_size": "auto",
39 | "stage3_param_persistence_threshold": "auto",
40 | "stage3_max_live_parameters": 1e9,
41 | "stage3_max_reuse_distance": 1e9,
42 | "stage3_gather_16bit_weights_on_model_save": true
43 | },
44 |
45 | "gradient_accumulation_steps": "auto",
46 | "gradient_clipping": "auto",
47 | "steps_per_print": 20,
48 | "train_batch_size": "auto",
49 | "train_micro_batch_size_per_gpu": "auto",
50 | "wall_clock_breakdown": false
51 | }
--------------------------------------------------------------------------------
/configs/ds_config_zero3_cpu_offload.json:
--------------------------------------------------------------------------------
1 | {
2 | "bf16": {
3 | "enabled": "auto"
4 | },
5 | "optimizer": {
6 | "type": "AdamW",
7 | "params": {
8 | "lr": "auto",
9 | "betas": "auto",
10 | "eps": "auto",
11 | "weight_decay": "auto"
12 | }
13 | },
14 |
15 | "scheduler": {
16 | "type": "WarmupLR",
17 | "params": {
18 | "warmup_min_lr": "auto",
19 | "warmup_max_lr": "auto",
20 | "warmup_num_steps": "auto"
21 | }
22 | },
23 |
24 | "zero_optimization": {
25 | "stage": 3,
26 | "offload_optimizer": {
27 | "device": "cpu",
28 | "pin_memory": true
29 | },
30 | "offload_param": {
31 | "device": "cpu",
32 | "pin_memory": true
33 | },
34 | "overlap_comm": true,
35 | "contiguous_gradients": true,
36 | "sub_group_size": 1e9,
37 | "reduce_bucket_size": "auto",
38 | "stage3_prefetch_bucket_size": "auto",
39 | "stage3_param_persistence_threshold": "auto",
40 | "stage3_max_live_parameters": 1e9,
41 | "stage3_max_reuse_distance": 1e9,
42 | "stage3_gather_16bit_weights_on_model_save": true
43 | },
44 |
45 | "gradient_accumulation_steps": "auto",
46 | "gradient_clipping": "auto",
47 | "steps_per_print": 20,
48 | "train_batch_size": "auto",
49 | "train_micro_batch_size_per_gpu": "auto",
50 | "wall_clock_breakdown": false
51 | }
--------------------------------------------------------------------------------
/scripts/Meta-Llama-3-8B/ruler.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | export CUDA_VISIBLE_DEVICES=4,5,6,7
3 |
4 | export MASTER_PORT=$(echo $METIS_WORKER_0_PORT | cut -d',' -f1)
5 |
6 | LEARNING_RATE=2e-5
7 | NUM_TRAIN_EPOCHS=3
8 | VANILLA=False
9 |
10 | MODEL_NAME_OR_PATH=/data1/HF-Models/meta-llama/Meta-Llama-3-8B
11 | echo "Finetune from: ${MODEL_NAME_OR_PATH}"
12 | MODEL=${MODEL_NAME_OR_PATH##*/}
13 |
14 | TEMPLATE=custom
15 | echo "Finetune data template: ${TEMPLATE}"
16 |
17 | DATA_PATH=../datasets/ruler_training_dataset.jsonl
18 | echo "Finetune data path: ${DATA_PATH}"
19 |
20 | MODEL_MAX_LENGTH=2048
21 | echo "Model max length: ${MODEL_MAX_LENGTH}"
22 |
23 | BATCH_SIZE=4
24 | echo "Per device train batch size: ${BATCH_SIZE}"
25 |
26 | GRAD_ACCUM=8
27 | echo "Gradient accumulation steps: ${GRAD_ACCUM}"
28 |
29 | OUTPUT_DIR="../outputs/checkpoints/ruler_${MODEL}_bs_${BATCH_SIZE}_ga_${GRAD_ACCUM}_lr_${LEARNING_RATE}_eps_${NUM_TRAIN_EPOCHS}"
30 | LOG_DIR=../logs
31 |
32 | deepspeed finetuning/finetune.py \
33 | --vanilla $VANILLA \
34 | --deepspeed ../configs/ds_config_zero3.json \
35 | --model_name_or_path $MODEL_NAME_OR_PATH \
36 | --template $TEMPLATE\
37 | --model_max_length $MODEL_MAX_LENGTH \
38 | --data_path $DATA_PATH \
39 | --output_dir $OUTPUT_DIR \
40 | --bf16 True \
41 | --tf32 True \
42 | --per_device_train_batch_size ${BATCH_SIZE} \
43 | --gradient_accumulation_steps ${GRAD_ACCUM} \
44 | --gradient_checkpointing True \
45 | --lr_scheduler_type cosine \
46 | --learning_rate ${LEARNING_RATE} \
47 | --warmup_ratio 0.05 \
48 | --num_train_epochs ${NUM_TRAIN_EPOCHS} \
49 | --evaluation_strategy no \
50 | --save_strategy epoch \
51 | --save_total_limit 1 \
52 | --logging_steps 5 \
53 | 2>&1 | tee ${LOG_DIR}/output_ruler_${MODEL}.log
--------------------------------------------------------------------------------
/scripts/Mistral-7B-v0.3/ruler.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | export CUDA_VISIBLE_DEVICES=4,5,6,7
3 |
4 | export MASTER_PORT=$(echo $METIS_WORKER_0_PORT | cut -d',' -f1)
5 |
6 | LEARNING_RATE=2e-5
7 | NUM_TRAIN_EPOCHS=3
8 | VANILLA=False
9 |
10 | MODEL_NAME_OR_PATH=/data1/HF-Models/mistralai/Mistral-7B-v0.3
11 | echo "Finetune from: ${MODEL_NAME_OR_PATH}"
12 | MODEL=${MODEL_NAME_OR_PATH##*/}
13 |
14 | TEMPLATE=custom
15 | echo "Finetune data template: ${TEMPLATE}"
16 |
17 | DATA_PATH=../datasets/ruler_training_dataset.jsonl
18 | echo "Finetune data path: ${DATA_PATH}"
19 |
20 | MODEL_MAX_LENGTH=2048
21 | echo "Model max length: ${MODEL_MAX_LENGTH}"
22 |
23 | BATCH_SIZE=4
24 | echo "Per device train batch size: ${BATCH_SIZE}"
25 |
26 | GRAD_ACCUM=8
27 | echo "Gradient accumulation steps: ${GRAD_ACCUM}"
28 |
29 | OUTPUT_DIR="../outputs/checkpoints/ruler_${MODEL}_bs_${BATCH_SIZE}_ga_${GRAD_ACCUM}_lr_${LEARNING_RATE}_eps_${NUM_TRAIN_EPOCHS}"
30 | LOG_DIR=../logs
31 |
32 | deepspeed finetuning/finetune.py \
33 | --vanilla $VANILLA \
34 | --deepspeed ../configs/ds_config_zero3.json \
35 | --model_name_or_path $MODEL_NAME_OR_PATH \
36 | --template $TEMPLATE\
37 | --model_max_length $MODEL_MAX_LENGTH \
38 | --data_path $DATA_PATH \
39 | --output_dir $OUTPUT_DIR \
40 | --bf16 True \
41 | --tf32 True \
42 | --per_device_train_batch_size ${BATCH_SIZE} \
43 | --gradient_accumulation_steps ${GRAD_ACCUM} \
44 | --gradient_checkpointing True \
45 | --lr_scheduler_type cosine \
46 | --learning_rate ${LEARNING_RATE} \
47 | --warmup_ratio 0.05 \
48 | --num_train_epochs ${NUM_TRAIN_EPOCHS} \
49 | --evaluation_strategy no \
50 | --save_strategy epoch \
51 | --save_total_limit 1 \
52 | --logging_steps 5 \
53 | 2>&1 | tee ${LOG_DIR}/output_ruler_${MODEL}.log
--------------------------------------------------------------------------------
/scripts/Mistral-7B-v0.3/vanilla.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | export CUDA_VISIBLE_DEVICES=4,5,6,7
3 |
4 | export MASTER_PORT=$(echo $METIS_WORKER_0_PORT | cut -d',' -f1)
5 |
6 | LEARNING_RATE=2e-5
7 | NUM_TRAIN_EPOCHS=3
8 | VANILLA=True
9 |
10 | MODEL_NAME_OR_PATH=/data1/HF-Models/mistralai/Mistral-7B-v0.3
11 | echo "Finetune from: ${MODEL_NAME_OR_PATH}"
12 | MODEL=${MODEL_NAME_OR_PATH##*/}
13 |
14 | TEMPLATE=custom
15 | echo "Finetune data template: ${TEMPLATE}"
16 |
17 | DATA_PATH=../datasets/ruler_training_dataset.jsonl
18 | echo "Finetune data path: ${DATA_PATH}"
19 |
20 | MODEL_MAX_LENGTH=2048
21 | echo "Model max length: ${MODEL_MAX_LENGTH}"
22 |
23 | BATCH_SIZE=4
24 | echo "Per device train batch size: ${BATCH_SIZE}"
25 |
26 | GRAD_ACCUM=8
27 | echo "Gradient accumulation steps: ${GRAD_ACCUM}"
28 |
29 | OUTPUT_DIR="../outputs/checkpoints/vanilla_${MODEL}_bs_${BATCH_SIZE}_ga_${GRAD_ACCUM}_lr_${LEARNING_RATE}_eps_${NUM_TRAIN_EPOCHS}"
30 | LOG_DIR=../logs
31 |
32 | deepspeed finetuning/finetune.py \
33 | --vanilla $VANILLA \
34 | --deepspeed ../configs/ds_config_zero3.json \
35 | --model_name_or_path $MODEL_NAME_OR_PATH \
36 | --template $TEMPLATE\
37 | --model_max_length $MODEL_MAX_LENGTH \
38 | --data_path $DATA_PATH \
39 | --output_dir $OUTPUT_DIR \
40 | --bf16 True \
41 | --tf32 True \
42 | --per_device_train_batch_size ${BATCH_SIZE} \
43 | --gradient_accumulation_steps ${GRAD_ACCUM} \
44 | --gradient_checkpointing True \
45 | --lr_scheduler_type cosine \
46 | --learning_rate ${LEARNING_RATE} \
47 | --warmup_ratio 0.05 \
48 | --num_train_epochs ${NUM_TRAIN_EPOCHS} \
49 | --evaluation_strategy no \
50 | --save_strategy epoch \
51 | --save_total_limit 1 \
52 | --logging_steps 5 \
53 | 2>&1 | tee ${LOG_DIR}/output_vanilla_${MODEL}.log
--------------------------------------------------------------------------------
/scripts/Meta-Llama-3-8B/vanilla.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | export CUDA_VISIBLE_DEVICES=4,5,6,7
3 |
4 | export MASTER_PORT=$(echo $METIS_WORKER_0_PORT | cut -d',' -f1)
5 |
6 | LEARNING_RATE=2e-5
7 | NUM_TRAIN_EPOCHS=3
8 | VANILLA=True
9 |
10 | MODEL_NAME_OR_PATH=/data1/HF-Models/meta-llama/Meta-Llama-3-8B
11 | echo "Finetune from: ${MODEL_NAME_OR_PATH}"
12 | MODEL=${MODEL_NAME_OR_PATH##*/}
13 |
14 | TEMPLATE=custom
15 | echo "Finetune data template: ${TEMPLATE}"
16 |
17 | DATA_PATH=../datasets/ruler_training_dataset.jsonl
18 | echo "Finetune data path: ${DATA_PATH}"
19 |
20 | MODEL_MAX_LENGTH=2048
21 | echo "Model max length: ${MODEL_MAX_LENGTH}"
22 |
23 | BATCH_SIZE=4
24 | echo "Per device train batch size: ${BATCH_SIZE}"
25 |
26 | GRAD_ACCUM=8
27 | echo "Gradient accumulation steps: ${GRAD_ACCUM}"
28 |
29 | OUTPUT_DIR="../outputs/checkpoints/vanilla_${MODEL}_bs_${BATCH_SIZE}_ga_${GRAD_ACCUM}_lr_${LEARNING_RATE}_eps_${NUM_TRAIN_EPOCHS}"
30 | LOG_DIR=../logs
31 |
32 | deepspeed finetuning/finetune.py \
33 | --vanilla $VANILLA \
34 | --deepspeed ../configs/ds_config_zero3.json \
35 | --model_name_or_path $MODEL_NAME_OR_PATH \
36 | --template $TEMPLATE\
37 | --model_max_length $MODEL_MAX_LENGTH \
38 | --data_path $DATA_PATH \
39 | --output_dir $OUTPUT_DIR \
40 | --bf16 True \
41 | --tf32 True \
42 | --per_device_train_batch_size ${BATCH_SIZE} \
43 | --gradient_accumulation_steps ${GRAD_ACCUM} \
44 | --gradient_checkpointing True \
45 | --lr_scheduler_type cosine \
46 | --learning_rate ${LEARNING_RATE} \
47 | --warmup_ratio 0.05 \
48 | --num_train_epochs ${NUM_TRAIN_EPOCHS} \
49 | --evaluation_strategy no \
50 | --save_strategy epoch \
51 | --save_total_limit 1 \
52 | --logging_steps 5 \
53 | 2>&1 | tee ${LOG_DIR}/output_vanilla_${MODEL}.log
54 |
--------------------------------------------------------------------------------
/src/utils/config.py:
--------------------------------------------------------------------------------
1 | from collections import OrderedDict
2 |
3 |
4 | class Inf:
5 | def __gt__(self, other):
6 | return True
7 |
8 | def __ge__(self, other):
9 | return True
10 |
11 | def __lt__(self, other):
12 | return False
13 |
14 | def __eq__(self, other):
15 | return isinstance(other, Inf)
16 |
17 | def __repr__(self):
18 | return "Inf"
19 |
20 |
21 | inf = Inf()
22 |
23 |
24 | # FLCG EXP
25 | LEVEL0 = ["10", "30", "50", "80"]
26 | LEVEL1 = ["150", "300", "500"]
27 | LEVEL2 = ["700", ">800"]
28 | RANGE = OrderedDict(
29 | {
30 | # level:0
31 | "10": {"PM": [0, 20], "FM": [0, 20]},
32 | "30": {"PM": [20, 40], "FM": [20, 40]},
33 | "50": {"PM": [40, 60], "FM": [40, 60]},
34 | "80": {"PM": [70, 90], "FM": [60, 100]},
35 | # level:1
36 | "150": {"PM": [130, 170], "FM": [100, 200]},
37 | "300": {"PM": [280, 320], "FM": [200, 400]},
38 | "500": {"PM": [450, 550], "FM": [400, 600]},
39 | # level:2
40 | "700": {"PM": [630, 770], "FM": [600, 800]},
41 | ">800": {"PM": [800, inf], "FM": [800, inf]},
42 | }
43 | )
44 |
45 | TARGET_LENGTH = list(RANGE.keys())
46 |
47 | MetaLengthToken = [
48 | ["[MLT:10]", [5, 15]],
49 | ["[MLT:30]", [25, 35]],
50 | ["[MLT:50]", [45, 55]],
51 | ["[MLT:80]", [75, 85]],
52 | ["[MLT:150]", [135, 155]],
53 | ["[MLT:300]", [295, 305]],
54 | ["[MLT:500]", [495, 505]],
55 | ["[MLT:700]", [695, 705]],
56 | ["[MLT:>800]", [800, inf]],
57 | ]
58 |
59 | # MLT training dataset
60 | SAMPLE = {
61 | "[MLT:10]": 10000 * 2,
62 | "[MLT:30]": 10000 * 2,
63 | "[MLT:50]": 10000 * 2,
64 | "[MLT:80]": 10000 * 2,
65 | "[MLT:150]": 10000 * 2,
66 | "[MLT:300]": 10000 * 2,
67 | "[MLT:500]": 10000 * 2,
68 | "[MLT:700]": 10000 * 2,
69 | "[MLT:>800]": 10000 * 2,
70 | }
71 |
--------------------------------------------------------------------------------
/scripts/Qwen1.5-7B/ruler.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | export CUDA_VISIBLE_DEVICES=0,1,2,3
3 |
4 | find_free_port() {
5 | while :
6 | do
7 | PORT=$(( ( RANDOM % 64512 ) + 1024 ))
8 | (echo >/dev/tcp/localhost/$PORT) >/dev/null 2>&1
9 | if [ $? -ne 0 ]; then
10 | echo $PORT
11 | return
12 | fi
13 | done
14 | }
15 |
16 | export MASTER_PORT=$(find_free_port)
17 |
18 | LEARNING_RATE=2e-5
19 | NUM_TRAIN_EPOCHS=3
20 | VANILLA=False
21 |
22 | MODEL_NAME_OR_PATH=/data1/HF-Models/Qwen/Qwen1.5-7B
23 | echo "Finetune from: ${MODEL_NAME_OR_PATH}"
24 | MODEL=${MODEL_NAME_OR_PATH##*/}
25 |
26 | TEMPLATE=custom
27 | echo "Finetune data template: ${TEMPLATE}"
28 |
29 | DATA_PATH=../datasets/ruler_training_dataset.jsonl
30 | echo "Finetune data path: ${DATA_PATH}"
31 |
32 | MODEL_MAX_LENGTH=2048
33 | echo "Model max length: ${MODEL_MAX_LENGTH}"
34 |
35 | BATCH_SIZE=4
36 | echo "Per device train batch size: ${BATCH_SIZE}"
37 |
38 | GRAD_ACCUM=8
39 | echo "Gradient accumulation steps: ${GRAD_ACCUM}"
40 |
41 | OUTPUT_DIR="../outputs/checkpoints/ruler_${MODEL}_bs_${BATCH_SIZE}_ga_${GRAD_ACCUM}_lr_${LEARNING_RATE}_eps_${NUM_TRAIN_EPOCHS}"
42 | LOG_DIR=../logs
43 |
44 | deepspeed --master_port=$MASTER_PORT finetuning/finetune.py \
45 | --vanilla $VANILLA \
46 | --deepspeed ../configs/ds_config_zero3.json \
47 | --model_name_or_path $MODEL_NAME_OR_PATH \
48 | --template $TEMPLATE\
49 | --model_max_length $MODEL_MAX_LENGTH \
50 | --data_path $DATA_PATH \
51 | --output_dir $OUTPUT_DIR \
52 | --bf16 True \
53 | --tf32 True \
54 | --per_device_train_batch_size ${BATCH_SIZE} \
55 | --gradient_accumulation_steps ${GRAD_ACCUM} \
56 | --gradient_checkpointing True \
57 | --lr_scheduler_type cosine \
58 | --learning_rate ${LEARNING_RATE} \
59 | --warmup_ratio 0.05 \
60 | --num_train_epochs ${NUM_TRAIN_EPOCHS} \
61 | --evaluation_strategy no \
62 | --save_strategy epoch \
63 | --save_total_limit 1 \
64 | --logging_steps 5 \
65 | 2>&1 | tee ${LOG_DIR}/output_ruler_${MODEL}.log
--------------------------------------------------------------------------------
/scripts/Yi-1.5-6B/ruler.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | export CUDA_VISIBLE_DEVICES=0,1,2,3
3 |
4 | find_free_port() {
5 | while :
6 | do
7 | PORT=$(( ( RANDOM % 64512 ) + 1024 ))
8 | (echo >/dev/tcp/localhost/$PORT) >/dev/null 2>&1
9 | if [ $? -ne 0 ]; then
10 | echo $PORT
11 | return
12 | fi
13 | done
14 | }
15 |
16 | export MASTER_PORT=$(find_free_port)
17 |
18 | LEARNING_RATE=2e-5
19 | NUM_TRAIN_EPOCHS=3
20 | VANILLA=False
21 |
22 | MODEL_NAME_OR_PATH=/data1/HF-Models/01-ai/Yi-1.5-6B
23 | echo "Finetune from: ${MODEL_NAME_OR_PATH}"
24 | MODEL=${MODEL_NAME_OR_PATH##*/}
25 |
26 | TEMPLATE=custom
27 | echo "Finetune data template: ${TEMPLATE}"
28 |
29 | DATA_PATH=../datasets/ruler_training_dataset.jsonl
30 | echo "Finetune data path: ${DATA_PATH}"
31 |
32 | MODEL_MAX_LENGTH=2048
33 | echo "Model max length: ${MODEL_MAX_LENGTH}"
34 |
35 | BATCH_SIZE=4
36 | echo "Per device train batch size: ${BATCH_SIZE}"
37 |
38 | GRAD_ACCUM=8
39 | echo "Gradient accumulation steps: ${GRAD_ACCUM}"
40 |
41 | OUTPUT_DIR="../outputs/checkpoints/ruler_${MODEL}_bs_${BATCH_SIZE}_ga_${GRAD_ACCUM}_lr_${LEARNING_RATE}_eps_${NUM_TRAIN_EPOCHS}"
42 | LOG_DIR=../logs
43 |
44 | deepspeed --master_port=$MASTER_PORT finetuning/finetune.py \
45 | --vanilla $VANILLA \
46 | --deepspeed ../configs/ds_config_zero3.json \
47 | --model_name_or_path $MODEL_NAME_OR_PATH \
48 | --template $TEMPLATE\
49 | --model_max_length $MODEL_MAX_LENGTH \
50 | --data_path $DATA_PATH \
51 | --output_dir $OUTPUT_DIR \
52 | --bf16 True \
53 | --tf32 True \
54 | --per_device_train_batch_size ${BATCH_SIZE} \
55 | --gradient_accumulation_steps ${GRAD_ACCUM} \
56 | --gradient_checkpointing True \
57 | --lr_scheduler_type cosine \
58 | --learning_rate ${LEARNING_RATE} \
59 | --warmup_ratio 0.05 \
60 | --num_train_epochs ${NUM_TRAIN_EPOCHS} \
61 | --evaluation_strategy no \
62 | --save_strategy epoch \
63 | --save_total_limit 1 \
64 | --logging_steps 5 \
65 | 2>&1 | tee ${LOG_DIR}/output_ruler_${MODEL}.log
--------------------------------------------------------------------------------
/scripts/gemma-7b/ruler.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | export CUDA_VISIBLE_DEVICES=4,5,6,7
3 |
4 | find_free_port() {
5 | while :
6 | do
7 | PORT=$(( ( RANDOM % 64512 ) + 1024 ))
8 | (echo >/dev/tcp/localhost/$PORT) >/dev/null 2>&1
9 | if [ $? -ne 0 ]; then
10 | echo $PORT
11 | return
12 | fi
13 | done
14 | }
15 |
16 | export MASTER_PORT=$(find_free_port)
17 |
18 | LEARNING_RATE=2e-5
19 | NUM_TRAIN_EPOCHS=3
20 | VANILLA=False
21 |
22 | MODEL_NAME_OR_PATH=/data1/HF-Models/google/gemma-7b
23 | echo "Finetune from: ${MODEL_NAME_OR_PATH}"
24 | MODEL=${MODEL_NAME_OR_PATH##*/}
25 |
26 | TEMPLATE=custom
27 | echo "Finetune data template: ${TEMPLATE}"
28 |
29 | DATA_PATH=../datasets/ruler_training_dataset.jsonl
30 | echo "Finetune data path: ${DATA_PATH}"
31 |
32 | MODEL_MAX_LENGTH=2048
33 | echo "Model max length: ${MODEL_MAX_LENGTH}"
34 |
35 | BATCH_SIZE=4
36 | echo "Per device train batch size: ${BATCH_SIZE}"
37 |
38 | GRAD_ACCUM=8
39 | echo "Gradient accumulation steps: ${GRAD_ACCUM}"
40 |
41 | OUTPUT_DIR="../outputs/checkpoints/ruler_${MODEL}_bs_${BATCH_SIZE}_ga_${GRAD_ACCUM}_lr_${LEARNING_RATE}_eps_${NUM_TRAIN_EPOCHS}"
42 | LOG_DIR=../logs
43 |
44 | deepspeed --master_port=$MASTER_PORT finetuning/finetune.py \
45 | --vanilla $VANILLA \
46 | --deepspeed ../configs/ds_config_zero3.json \
47 | --model_name_or_path $MODEL_NAME_OR_PATH \
48 | --template $TEMPLATE\
49 | --model_max_length $MODEL_MAX_LENGTH \
50 | --data_path $DATA_PATH \
51 | --output_dir $OUTPUT_DIR \
52 | --bf16 True \
53 | --tf32 True \
54 | --per_device_train_batch_size ${BATCH_SIZE} \
55 | --gradient_accumulation_steps ${GRAD_ACCUM} \
56 | --gradient_checkpointing True \
57 | --lr_scheduler_type cosine \
58 | --learning_rate ${LEARNING_RATE} \
59 | --warmup_ratio 0.05 \
60 | --num_train_epochs ${NUM_TRAIN_EPOCHS} \
61 | --evaluation_strategy no \
62 | --save_strategy epoch \
63 | --save_total_limit 1 \
64 | --logging_steps 5 \
65 | 2>&1 | tee ${LOG_DIR}/output_ruler_${MODEL}.log
--------------------------------------------------------------------------------
/scripts/Qwen1.5-7B/vanilla.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | export CUDA_VISIBLE_DEVICES=0,1,2,3
3 |
4 | find_free_port() {
5 | while :
6 | do
7 | PORT=$(( ( RANDOM % 64512 ) + 1024 ))
8 | (echo >/dev/tcp/localhost/$PORT) >/dev/null 2>&1
9 | if [ $? -ne 0 ]; then
10 | echo $PORT
11 | return
12 | fi
13 | done
14 | }
15 |
16 | export MASTER_PORT=$(find_free_port)
17 |
18 | LEARNING_RATE=2e-5
19 | NUM_TRAIN_EPOCHS=3
20 | VANILLA=True
21 |
22 | MODEL_NAME_OR_PATH=/data1/HF-Models/Qwen/Qwen1.5-7B
23 | echo "Finetune from: ${MODEL_NAME_OR_PATH}"
24 | MODEL=${MODEL_NAME_OR_PATH##*/}
25 |
26 | TEMPLATE=custom
27 | echo "Finetune data template: ${TEMPLATE}"
28 |
29 | DATA_PATH=../datasets/ruler_training_dataset.jsonl
30 | echo "Finetune data path: ${DATA_PATH}"
31 |
32 | MODEL_MAX_LENGTH=2048
33 | echo "Model max length: ${MODEL_MAX_LENGTH}"
34 |
35 | BATCH_SIZE=4
36 | echo "Per device train batch size: ${BATCH_SIZE}"
37 |
38 | GRAD_ACCUM=8
39 | echo "Gradient accumulation steps: ${GRAD_ACCUM}"
40 |
41 | OUTPUT_DIR="../outputs/checkpoints/vanilla_${MODEL}_bs_${BATCH_SIZE}_ga_${GRAD_ACCUM}_lr_${LEARNING_RATE}_eps_${NUM_TRAIN_EPOCHS}"
42 | LOG_DIR=../logs
43 |
44 | deepspeed --master_port=$MASTER_PORT finetuning/finetune.py \
45 | --vanilla $VANILLA \
46 | --deepspeed ../configs/ds_config_zero3.json \
47 | --model_name_or_path $MODEL_NAME_OR_PATH \
48 | --template $TEMPLATE\
49 | --model_max_length $MODEL_MAX_LENGTH \
50 | --data_path $DATA_PATH \
51 | --output_dir $OUTPUT_DIR \
52 | --bf16 True \
53 | --tf32 True \
54 | --per_device_train_batch_size ${BATCH_SIZE} \
55 | --gradient_accumulation_steps ${GRAD_ACCUM} \
56 | --gradient_checkpointing True \
57 | --lr_scheduler_type cosine \
58 | --learning_rate ${LEARNING_RATE} \
59 | --warmup_ratio 0.05 \
60 | --num_train_epochs ${NUM_TRAIN_EPOCHS} \
61 | --evaluation_strategy no \
62 | --save_strategy epoch \
63 | --save_total_limit 1 \
64 | --logging_steps 5 \
65 | 2>&1 | tee ${LOG_DIR}/output_vanilla_${MODEL}.log
66 |
--------------------------------------------------------------------------------
/scripts/Yi-1.5-6B/vanilla.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | export CUDA_VISIBLE_DEVICES=4,5,6,7
3 |
4 | find_free_port() {
5 | while :
6 | do
7 | PORT=$(( ( RANDOM % 64512 ) + 1024 ))
8 | (echo >/dev/tcp/localhost/$PORT) >/dev/null 2>&1
9 | if [ $? -ne 0 ]; then
10 | echo $PORT
11 | return
12 | fi
13 | done
14 | }
15 |
16 | export MASTER_PORT=$(find_free_port)
17 |
18 | LEARNING_RATE=2e-5
19 | NUM_TRAIN_EPOCHS=3
20 | VANILLA=True
21 |
22 | MODEL_NAME_OR_PATH=/data1/HF-Models/01-ai/Yi-1.5-6B
23 | echo "Finetune from: ${MODEL_NAME_OR_PATH}"
24 | MODEL=${MODEL_NAME_OR_PATH##*/}
25 |
26 | TEMPLATE=custom
27 | echo "Finetune data template: ${TEMPLATE}"
28 |
29 | DATA_PATH=../datasets/ruler_training_dataset.jsonl
30 | echo "Finetune data path: ${DATA_PATH}"
31 |
32 | MODEL_MAX_LENGTH=2048
33 | echo "Model max length: ${MODEL_MAX_LENGTH}"
34 |
35 | BATCH_SIZE=4
36 | echo "Per device train batch size: ${BATCH_SIZE}"
37 |
38 | GRAD_ACCUM=8
39 | echo "Gradient accumulation steps: ${GRAD_ACCUM}"
40 |
41 | OUTPUT_DIR="../outputs/checkpoints/vanilla_${MODEL}_bs_${BATCH_SIZE}_ga_${GRAD_ACCUM}_lr_${LEARNING_RATE}_eps_${NUM_TRAIN_EPOCHS}"
42 | LOG_DIR=../logs
43 |
44 | deepspeed --master_port=$MASTER_PORT finetuning/finetune.py \
45 | --vanilla $VANILLA \
46 | --deepspeed ../configs/ds_config_zero3.json \
47 | --model_name_or_path $MODEL_NAME_OR_PATH \
48 | --template $TEMPLATE\
49 | --model_max_length $MODEL_MAX_LENGTH \
50 | --data_path $DATA_PATH \
51 | --output_dir $OUTPUT_DIR \
52 | --bf16 True \
53 | --tf32 True \
54 | --per_device_train_batch_size ${BATCH_SIZE} \
55 | --gradient_accumulation_steps ${GRAD_ACCUM} \
56 | --gradient_checkpointing True \
57 | --lr_scheduler_type cosine \
58 | --learning_rate ${LEARNING_RATE} \
59 | --warmup_ratio 0.05 \
60 | --num_train_epochs ${NUM_TRAIN_EPOCHS} \
61 | --evaluation_strategy no \
62 | --save_strategy epoch \
63 | --save_total_limit 1 \
64 | --logging_steps 5 \
65 | 2>&1 | tee ${LOG_DIR}/output_vanilla_${MODEL}.log
66 |
--------------------------------------------------------------------------------
/scripts/gemma-7b/vanilla.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | export CUDA_VISIBLE_DEVICES=4,5,6,7
3 |
4 | find_free_port() {
5 | while :
6 | do
7 | PORT=$(( ( RANDOM % 64512 ) + 1024 ))
8 | (echo >/dev/tcp/localhost/$PORT) >/dev/null 2>&1
9 | if [ $? -ne 0 ]; then
10 | echo $PORT
11 | return
12 | fi
13 | done
14 | }
15 |
16 | export MASTER_PORT=$(find_free_port)
17 |
18 | LEARNING_RATE=2e-5
19 | NUM_TRAIN_EPOCHS=3
20 | VANILLA=True
21 |
22 | MODEL_NAME_OR_PATH=/data1/HF-Models/google/gemma-7b
23 | echo "Finetune from: ${MODEL_NAME_OR_PATH}"
24 | MODEL=${MODEL_NAME_OR_PATH##*/}
25 |
26 | TEMPLATE=custom
27 | echo "Finetune data template: ${TEMPLATE}"
28 |
29 | DATA_PATH=../datasets/ruler_training_dataset.jsonl
30 | echo "Finetune data path: ${DATA_PATH}"
31 |
32 | MODEL_MAX_LENGTH=2048
33 | echo "Model max length: ${MODEL_MAX_LENGTH}"
34 |
35 | BATCH_SIZE=4
36 | echo "Per device train batch size: ${BATCH_SIZE}"
37 |
38 | GRAD_ACCUM=8
39 | echo "Gradient accumulation steps: ${GRAD_ACCUM}"
40 |
41 | OUTPUT_DIR="../outputs/checkpoints/vanilla_${MODEL}_bs_${BATCH_SIZE}_ga_${GRAD_ACCUM}_lr_${LEARNING_RATE}_eps_${NUM_TRAIN_EPOCHS}"
42 | LOG_DIR=../logs
43 |
44 | deepspeed --master_port=$MASTER_PORT finetuning/finetune.py \
45 | --vanilla $VANILLA \
46 | --deepspeed ../configs/ds_config_zero3.json \
47 | --model_name_or_path $MODEL_NAME_OR_PATH \
48 | --template $TEMPLATE\
49 | --model_max_length $MODEL_MAX_LENGTH \
50 | --data_path $DATA_PATH \
51 | --output_dir $OUTPUT_DIR \
52 | --bf16 True \
53 | --tf32 True \
54 | --per_device_train_batch_size ${BATCH_SIZE} \
55 | --gradient_accumulation_steps ${GRAD_ACCUM} \
56 | --gradient_checkpointing True \
57 | --lr_scheduler_type cosine \
58 | --learning_rate ${LEARNING_RATE} \
59 | --warmup_ratio 0.05 \
60 | --num_train_epochs ${NUM_TRAIN_EPOCHS} \
61 | --evaluation_strategy no \
62 | --save_strategy epoch \
63 | --save_total_limit 1 \
64 | --logging_steps 5 \
65 | 2>&1 | tee ${LOG_DIR}/output_vanilla_${MODEL}.log
66 |
--------------------------------------------------------------------------------
/scripts/deepseek-llm-7b-base/ruler.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | export CUDA_VISIBLE_DEVICES=4,5,6,7
3 |
4 | find_free_port() {
5 | while :
6 | do
7 | PORT=$(( ( RANDOM % 64512 ) + 1024 ))
8 | (echo >/dev/tcp/localhost/$PORT) >/dev/null 2>&1
9 | if [ $? -ne 0 ]; then
10 | echo $PORT
11 | return
12 | fi
13 | done
14 | }
15 |
16 | export MASTER_PORT=$(find_free_port)
17 |
18 | LEARNING_RATE=2e-5
19 | NUM_TRAIN_EPOCHS=3
20 | VANILLA=False
21 |
22 | MODEL_NAME_OR_PATH=/data1/HF-Models/deepseek-ai/deepseek-llm-7b-base
23 | echo "Finetune from: ${MODEL_NAME_OR_PATH}"
24 | MODEL=${MODEL_NAME_OR_PATH##*/}
25 |
26 | TEMPLATE=custom
27 | echo "Finetune data template: ${TEMPLATE}"
28 |
29 | DATA_PATH=../datasets/ruler_training_dataset.jsonl
30 | echo "Finetune data path: ${DATA_PATH}"
31 |
32 | MODEL_MAX_LENGTH=2048
33 | echo "Model max length: ${MODEL_MAX_LENGTH}"
34 |
35 | BATCH_SIZE=4
36 | echo "Per device train batch size: ${BATCH_SIZE}"
37 |
38 | GRAD_ACCUM=8
39 | echo "Gradient accumulation steps: ${GRAD_ACCUM}"
40 |
41 | OUTPUT_DIR="../outputs/checkpoints/ruler_${MODEL}_bs_${BATCH_SIZE}_ga_${GRAD_ACCUM}_lr_${LEARNING_RATE}_eps_${NUM_TRAIN_EPOCHS}"
42 | LOG_DIR=../logs
43 |
44 | deepspeed --master_port=$MASTER_PORT finetuning/finetune.py \
45 | --vanilla $VANILLA \
46 | --deepspeed ../configs/ds_config_zero3.json \
47 | --model_name_or_path $MODEL_NAME_OR_PATH \
48 | --template $TEMPLATE\
49 | --model_max_length $MODEL_MAX_LENGTH \
50 | --data_path $DATA_PATH \
51 | --output_dir $OUTPUT_DIR \
52 | --bf16 True \
53 | --tf32 True \
54 | --per_device_train_batch_size ${BATCH_SIZE} \
55 | --gradient_accumulation_steps ${GRAD_ACCUM} \
56 | --gradient_checkpointing True \
57 | --lr_scheduler_type cosine \
58 | --learning_rate ${LEARNING_RATE} \
59 | --warmup_ratio 0.05 \
60 | --num_train_epochs ${NUM_TRAIN_EPOCHS} \
61 | --evaluation_strategy no \
62 | --save_strategy epoch \
63 | --save_total_limit 1 \
64 | --logging_steps 5 \
65 | 2>&1 | tee ${LOG_DIR}/output_ruler_${MODEL}.log
--------------------------------------------------------------------------------
/scripts/deepseek-llm-7b-base/vanilla.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | export CUDA_VISIBLE_DEVICES=4,5,6,7
3 |
4 | find_free_port() {
5 | while :
6 | do
7 | PORT=$(( ( RANDOM % 64512 ) + 1024 ))
8 | (echo >/dev/tcp/localhost/$PORT) >/dev/null 2>&1
9 | if [ $? -ne 0 ]; then
10 | echo $PORT
11 | return
12 | fi
13 | done
14 | }
15 |
16 | export MASTER_PORT=$(find_free_port)
17 |
18 | LEARNING_RATE=2e-5
19 | NUM_TRAIN_EPOCHS=3
20 | VANILLA=True
21 |
22 | MODEL_NAME_OR_PATH=/data1/HF-Models/deepseek-ai/deepseek-llm-7b-base
23 | echo "Finetune from: ${MODEL_NAME_OR_PATH}"
24 | MODEL=${MODEL_NAME_OR_PATH##*/}
25 |
26 | TEMPLATE=custom
27 | echo "Finetune data template: ${TEMPLATE}"
28 |
29 | DATA_PATH=../datasets/ruler_training_dataset.jsonl
30 | echo "Finetune data path: ${DATA_PATH}"
31 |
32 | MODEL_MAX_LENGTH=2048
33 | echo "Model max length: ${MODEL_MAX_LENGTH}"
34 |
35 | BATCH_SIZE=4
36 | echo "Per device train batch size: ${BATCH_SIZE}"
37 |
38 | GRAD_ACCUM=8
39 | echo "Gradient accumulation steps: ${GRAD_ACCUM}"
40 |
41 | OUTPUT_DIR="../outputs/checkpoints/vanilla_${MODEL}_bs_${BATCH_SIZE}_ga_${GRAD_ACCUM}_lr_${LEARNING_RATE}_eps_${NUM_TRAIN_EPOCHS}"
42 | LOG_DIR=../logs
43 |
44 | deepspeed --master_port=$MASTER_PORT finetuning/finetune.py \
45 | --vanilla $VANILLA \
46 | --deepspeed ../configs/ds_config_zero3.json \
47 | --model_name_or_path $MODEL_NAME_OR_PATH \
48 | --template $TEMPLATE\
49 | --model_max_length $MODEL_MAX_LENGTH \
50 | --data_path $DATA_PATH \
51 | --output_dir $OUTPUT_DIR \
52 | --bf16 True \
53 | --tf32 True \
54 | --per_device_train_batch_size ${BATCH_SIZE} \
55 | --gradient_accumulation_steps ${GRAD_ACCUM} \
56 | --gradient_checkpointing True \
57 | --lr_scheduler_type cosine \
58 | --learning_rate ${LEARNING_RATE} \
59 | --warmup_ratio 0.05 \
60 | --num_train_epochs ${NUM_TRAIN_EPOCHS} \
61 | --evaluation_strategy no \
62 | --save_strategy epoch \
63 | --save_total_limit 1 \
64 | --logging_steps 5 \
65 | 2>&1 | tee ${LOG_DIR}/output_vanilla_${MODEL}.log
66 |
--------------------------------------------------------------------------------
/src/exp/analysis_sgm.py:
--------------------------------------------------------------------------------
1 | import re
2 | import argparse
3 | from utils.config import MetaLengthToken, RANGE
4 | from utils import load_jsonl
5 | from rich.table import Table
6 | from rich.console import Console
7 | from utils.count import count_words
8 |
9 |
10 | def main(args):
11 | # raw data load
12 | df = load_jsonl(args.dataset_path)
13 | # draw table
14 | table = Table(show_header=True, header_style="bold magenta")
15 | table.add_column("Model", style="dim", width=15)
16 | for mlt in MetaLengthToken:
17 | table.add_column(mlt[0].split(":")[-1][:-1], justify="right")
18 | table.add_column("FM", justify="right")
19 | table.add_column("Avg", justify="right")
20 | count = {mlt[0]: 0 for mlt in MetaLengthToken}
21 | for d in df:
22 | if d["output"].count("MLT") != 1:
23 | d["output"] = "[MLT:" + d["output"].split("[MLT:")[1]
24 | for mlt in MetaLengthToken:
25 | if mlt[0] in d["output"]:
26 | count[mlt[0]] += 1
27 | hit = 0
28 | all_wc = 0
29 | for d in df:
30 | cleaned_text = re.sub(r"\[MLT:\d+\]", "", d["output"]) # clean MLT
31 | wc = count_words(cleaned_text)
32 | mlt = d["output"].split("]")[0] + "]"
33 | if (wc > RANGE[mlt.split(":")[-1][:-1]]["FM"][0]) and (
34 | wc <= RANGE[mlt.split(":")[-1][:-1]]["FM"][1]
35 | ):
36 | hit += 1
37 | all_wc += wc
38 | table.add_row(
39 | args.dataset_path.split("/")[-1].split("tl_")[-1][:15],
40 | f"{count['[MLT:10]']}",
41 | f"{count['[MLT:30]']}",
42 | f"{count['[MLT:50]']}",
43 | f"{count['[MLT:80]']}",
44 | f"{count['[MLT:150]']}",
45 | f"{count['[MLT:300]']}",
46 | f"{count['[MLT:500]']}",
47 | f"{count['[MLT:700]']}",
48 | f"{count['[MLT:>800]']}",
49 | f"{hit/len(df)*100:.2f}",
50 | f"{all_wc/len(df):.0f}",
51 | )
52 | console = Console()
53 | console.print(table)
54 |
55 |
56 | if __name__ == "__main__":
57 | parser = argparse.ArgumentParser()
58 | parser.add_argument("--dataset_path", type=str, default=None)
59 | args = parser.parse_args()
60 | main(args)
61 |
--------------------------------------------------------------------------------
/src/exp/run_exp_api.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | import openai
3 | import os
4 | from tqdm import tqdm
5 | from utils import load_jsonl, save_jsonl
6 | from dotenv import load_dotenv
7 |
8 |
9 | load_dotenv()
10 |
11 |
12 | def main(args):
13 | client = openai.OpenAI(
14 | api_key=args.key, base_url=os.getenv("OPENAI_BASE_URL")
15 | )
16 | def get_completion_openai(
17 | prompt: str,
18 | model: str,
19 | ) -> str:
20 | """
21 | Generate a completion using the OpenAI API.
22 |
23 | Args:
24 | prompt (str): The user's prompt or query.
25 | model (str, optional): The name of the OpenAI model to use for generating the completion.
26 | Defaults to "gpt-4-turbo".
27 | """
28 | response = client.chat.completions.create(
29 | model=model,
30 | top_p=1,
31 | max_tokens=2048,
32 | messages=[
33 | {"role": "user", "content": prompt},
34 | ],
35 | )
36 | return response.choices[0].message.content
37 |
38 | # raw data load
39 | df = load_jsonl(args.dataset_path)
40 | # load tokenizer and llm
41 | for idx in tqdm(range(len(df))):
42 | instruction = df[idx]["Instruction"]
43 | targetlength = df[idx]["TargetLength"] if "TargetLength" in df[idx] else ""
44 | if targetlength != "":
45 | targetlength = targetlength.replace(">", "more than ")
46 | question = f"{instruction}\nThe response should have a word count of {targetlength} words."
47 | df[idx]["prompt"] = question
48 | flag = False
49 | while not flag:
50 | try:
51 | output = get_completion_openai(question, args.model)
52 | flag = True
53 | except Exception as e:
54 | print(e)
55 | df[idx]["output"] = output
56 | if idx % 10 == 0:
57 | save_jsonl(args.output_path, df)
58 | # save to output_path
59 | save_jsonl(args.output_path, df)
60 |
61 |
62 | if __name__ == "__main__":
63 | parser = argparse.ArgumentParser()
64 | parser.add_argument("--dataset_path", type=str, default=None)
65 | parser.add_argument("--model", type=str, default=None)
66 | parser.add_argument("--output_path", type=str, default=None)
67 | parser.add_argument("--key", type=str, default=None)
68 | args = parser.parse_args()
69 | main(args)
70 |
--------------------------------------------------------------------------------
/src/exp/analysis_mmlt.py:
--------------------------------------------------------------------------------
1 | import re
2 | import argparse
3 | from utils.config import MetaLengthToken, RANGE
4 | from utils import load_jsonl
5 | from rich.table import Table
6 | from rich.console import Console
7 | from utils.count import count_words
8 |
9 |
10 | def main(args):
11 | # raw data load
12 | df = load_jsonl(args.dataset_path)
13 | # draw table
14 | table = Table(show_header=True, header_style="bold magenta")
15 | table.add_column("Model", style="dim", width=15)
16 | for mlt in MetaLengthToken:
17 | table.add_column(mlt[0].split(':')[-1][:-1], justify="right")
18 | table.add_column('Acc', justify="right")
19 | count = {mlt[0]:0 for mlt in MetaLengthToken}
20 | hit = {mlt[0]:0 for mlt in MetaLengthToken}
21 | for d in df:
22 | count[f"[MLT:{d['TargetLength']}]"] += 1
23 | for d in df:
24 | wc = count_words(d['output'])
25 | mlt = f"[MLT:{d['TargetLength']}]"
26 | if (wc > RANGE[mlt.split(':')[-1][:-1]]['FM'][0]) and (wc <= RANGE[mlt.split(':')[-1][:-1]]['FM'][1]):
27 | hit[mlt] += 1
28 | # print(hit)
29 | # print(count)
30 | table.add_row(
31 | args.dataset_path.split('/')[-1].split('tl_')[-1][:15],
32 | f"{hit['[MLT:10]']/count['[MLT:10]']*100:.2f}",
33 | f"{hit['[MLT:30]']/count['[MLT:30]']*100:.2f}",
34 | f"{hit['[MLT:50]']/count['[MLT:50]']*100:.2f}",
35 | f"{hit['[MLT:80]']/count['[MLT:80]']*100:.2f}",
36 | f"{hit['[MLT:150]']/count['[MLT:150]']*100:.2f}",
37 | f"{hit['[MLT:300]']/count['[MLT:300]']*100:.2f}",
38 | f"{hit['[MLT:500]']/count['[MLT:500]']*100:.2f}",
39 | f"{hit['[MLT:700]']/count['[MLT:700]']*100:.2f}",
40 | f"{hit['[MLT:>800]']/count['[MLT:>800]']*100:.2f}",
41 | f"{sum(hit.values())/sum(count.values())*100:.2f}",
42 | )
43 | console = Console()
44 | console.print(table)
45 | latex = [
46 | f"{hit['[MLT:10]']/count['[MLT:10]']*100:.1f}",
47 | f"{hit['[MLT:30]']/count['[MLT:30]']*100:.1f}",
48 | f"{hit['[MLT:50]']/count['[MLT:50]']*100:.1f}",
49 | f"{hit['[MLT:80]']/count['[MLT:80]']*100:.1f}",
50 | f"{hit['[MLT:150]']/count['[MLT:150]']*100:.1f}",
51 | f"{hit['[MLT:300]']/count['[MLT:300]']*100:.1f}",
52 | f"{hit['[MLT:500]']/count['[MLT:500]']*100:.1f}",
53 | f"{hit['[MLT:700]']/count['[MLT:700]']*100:.1f}",
54 | f"{hit['[MLT:>800]']/count['[MLT:>800]']*100:.1f}",
55 | f"{sum(hit.values())/sum(count.values())*100:.2f}",
56 | ]
57 | print('&'.join(latex) + '\\\\')
58 |
59 |
60 | if __name__ == "__main__":
61 | parser = argparse.ArgumentParser()
62 | parser.add_argument("--dataset_path", type=str, default=None)
63 | args = parser.parse_args()
64 | main(args)
65 |
--------------------------------------------------------------------------------
/src/exp/run_exp.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | from tqdm import tqdm
3 | from vllm import LLM, SamplingParams
4 | from transformers import AutoTokenizer
5 | from utils import load_jsonl, save_jsonl
6 | from utils.templates import TemplatesMapping
7 |
8 |
9 | def main(args):
10 | # raw data load
11 | df = load_jsonl(args.dataset_path)
12 | # load tokenizer and llm
13 | tokenizer = AutoTokenizer.from_pretrained(args.model_name_or_path)
14 | llm = LLM(
15 | model=args.model_name_or_path,
16 | trust_remote_code=True,
17 | tensor_parallel_size=args.gpus,
18 | )
19 | template = TemplatesMapping[args.template]
20 | if args.template == "default" in args.dataset_path:
21 | terminators = TemplatesMapping["default"].get_stop_tokens(
22 | args.model_name_or_path
23 | )
24 | elif "self_generated_mlt.jsonl" in args.dataset_path:
25 | terminators = ["<|end_of_text|>", "<|eot_id|>"]
26 | else:
27 | terminators = template.STOP_TOKENS
28 | print(f"> STOP_TOKENS:{terminators}")
29 | terminators = tokenizer.convert_tokens_to_ids(terminators)
30 | skip_sepcial_tokens = False if "self_generated_mlt.jsonl" in args.dataset_path else True
31 | sampling_params = SamplingParams(
32 | temperature=0,
33 | max_tokens=2048,
34 | stop_token_ids=terminators,
35 | skip_special_tokens=skip_sepcial_tokens,
36 | )
37 | for idx in tqdm(range(len(df))):
38 | instruction = df[idx]["Instruction"]
39 | targetlength = df[idx]["TargetLength"] if "TargetLength" in df[idx] else ""
40 | if args.template == "default":
41 | prompts = [
42 | template.apply_template_for_generation(
43 | instruction, targetlength, tokenizer
44 | )
45 | ]
46 | elif args.template == "custom":
47 | if "vanilla" in args.model_name_or_path:
48 | prompts = [
49 | template.apply_template_for_generation_vanilla(instruction, targetlength)
50 | ]
51 | else:
52 | prompts = [
53 | template.apply_template_for_generation(instruction, targetlength)
54 | ]
55 | else:
56 | prompts = [
57 | template.apply_template_for_generation(instruction, targetlength)
58 | ]
59 | df[idx]["prompt"] = prompts[0]
60 | outputs = llm.generate(prompts, sampling_params)
61 | for output in outputs:
62 | generated_text = output.outputs[0].text
63 | df[idx]["output"] = generated_text
64 | if idx % 100 == 0:
65 | save_jsonl(args.output_path, df)
66 | # save to output_path
67 | save_jsonl(args.output_path, df)
68 |
69 |
70 | if __name__ == "__main__":
71 | parser = argparse.ArgumentParser()
72 | parser.add_argument("--dataset_path", type=str, default=None)
73 | parser.add_argument("--model_name_or_path", type=str, default=None)
74 | parser.add_argument("--gpus", type=int, default=1)
75 | parser.add_argument("--template", type=str, default="default")
76 | parser.add_argument("--output_path", type=str, default=None)
77 | args = parser.parse_args()
78 | main(args)
79 |
--------------------------------------------------------------------------------
/scripts/Yi-1.5-6B/ruler_lm_eval.sh:
--------------------------------------------------------------------------------
1 | set -ex
2 | export CUDA_VISIBLE_DEVICES=7
3 | export NUMEXPR_MAX_THREADS=128
4 |
5 | MODEL=vllm
6 | MODEL_NAME=ruler_Yi-1.5-6B
7 | MODEL_NAME_OR_PATH=/data1/lijiaming/Ruler/checkpoints/ruler_Yi-1.5-6B_bs_4_ga_8_lr_2e-5_eps_3/checkpoint-2841
8 | OUTPUT_PATH=../outputs/other_tasks/${MODEL_NAME}
9 | TOKENIZER_MODE=auto
10 | NUM_GPUS=1
11 | GPU_MEMORY_UTILIZATION=0.8
12 |
13 | mkdir -p $OUTPUT_PATH
14 |
15 | # lm_eval --model $MODEL \
16 | # --model_args pretrained=${MODEL_NAME_OR_PATH},trust_remote_code=True,tokenizer_mode=${TOKENIZER_MODE},tensor_parallel_size=${NUM_GPUS},dtype=auto,gpu_memory_utilization=${GPU_MEMORY_UTILIZATION} \
17 | # --tasks leaderboard \
18 | # --device cuda \
19 | # --output_path ${OUTPUT_PATH}/${MODEL}_eval_leaderboard \
20 | # --batch_size 1 \
21 | # --write_out \
22 | # 2>&1 | tee ${OUTPUT_PATH}/${MODEL}_eval_leaderboard.log
23 |
24 | lm_eval --model $MODEL \
25 | --model_args pretrained=${MODEL_NAME_OR_PATH},trust_remote_code=True,tokenizer_mode=${TOKENIZER_MODE},tensor_parallel_size=${NUM_GPUS},dtype=auto,gpu_memory_utilization=${GPU_MEMORY_UTILIZATION} \
26 | --tasks ai2_arc \
27 | --device cuda \
28 | --output_path ${OUTPUT_PATH}/${MODEL}_eval_ai2_arc \
29 | --batch_size 1 \
30 | --num_fewshot 25 \
31 | --write_out \
32 | 2>&1 | tee ${OUTPUT_PATH}/${MODEL}_eval_ai2_arc.log
33 |
34 | lm_eval --model $MODEL \
35 | --model_args pretrained=${MODEL_NAME_OR_PATH},trust_remote_code=True,tokenizer_mode=${TOKENIZER_MODE},tensor_parallel_size=${NUM_GPUS},dtype=auto,gpu_memory_utilization=${GPU_MEMORY_UTILIZATION} \
36 | --tasks hellaswag \
37 | --device cuda \
38 | --output_path ${OUTPUT_PATH}/${MODEL}_eval_hellaswag \
39 | --batch_size 1 \
40 | --num_fewshot 10 \
41 | --write_out \
42 | 2>&1 | tee ${OUTPUT_PATH}/${MODEL}_eval_hellaswag.log
43 |
44 | lm_eval --model $MODEL \
45 | --model_args pretrained=${MODEL_NAME_OR_PATH},trust_remote_code=True,tokenizer_mode=${TOKENIZER_MODE},tensor_parallel_size=${NUM_GPUS},dtype=auto,gpu_memory_utilization=${GPU_MEMORY_UTILIZATION} \
46 | --tasks truthfulqa \
47 | --device cuda \
48 | --output_path ${OUTPUT_PATH}/${MODEL}_eval_truthfulqa \
49 | --batch_size 1 \
50 | --num_fewshot 0 \
51 | --write_out \
52 | 2>&1 | tee ${OUTPUT_PATH}/${MODEL}_eval_truthfulqa.log
53 |
54 | lm_eval --model $MODEL \
55 | --model_args pretrained=${MODEL_NAME_OR_PATH},trust_remote_code=True,tokenizer_mode=${TOKENIZER_MODE},tensor_parallel_size=${NUM_GPUS},dtype=auto,gpu_memory_utilization=${GPU_MEMORY_UTILIZATION} \
56 | --tasks mmlu \
57 | --device cuda \
58 | --output_path ${OUTPUT_PATH}/${MODEL}_eval_mmlu \
59 | --batch_size 1 \
60 | --num_fewshot 5 \
61 | --write_out \
62 | 2>&1 | tee ${OUTPUT_PATH}/${MODEL}_eval_mmlu.log
63 |
64 | lm_eval --model $MODEL \
65 | --model_args pretrained=${MODEL_NAME_OR_PATH},trust_remote_code=True,tokenizer_mode=${TOKENIZER_MODE},tensor_parallel_size=${NUM_GPUS},dtype=auto,gpu_memory_utilization=${GPU_MEMORY_UTILIZATION} \
66 | --tasks winogrande \
67 | --device cuda \
68 | --output_path ${OUTPUT_PATH}/${MODEL}_eval_winogrande \
69 | --batch_size 1 \
70 | --num_fewshot 5 \
71 | --write_out \
72 | 2>&1 | tee ${OUTPUT_PATH}/${MODEL}_eval_winogrande.log
73 |
74 | lm_eval --model $MODEL \
75 | --model_args pretrained=${MODEL_NAME_OR_PATH},trust_remote_code=True,tokenizer_mode=${TOKENIZER_MODE},tensor_parallel_size=${NUM_GPUS},dtype=auto,gpu_memory_utilization=${GPU_MEMORY_UTILIZATION} \
76 | --tasks gsm8k \
77 | --device cuda \
78 | --output_path ${OUTPUT_PATH}/${MODEL}_eval_gsm8k \
79 | --batch_size 1 \
80 | --num_fewshot 5 \
81 | --write_out \
82 | 2>&1 | tee ${OUTPUT_PATH}/${MODEL}_eval_gsm8k.log
--------------------------------------------------------------------------------
/scripts/Yi-1.5-6B/vanilla_lm_eval.sh:
--------------------------------------------------------------------------------
1 | set -ex
2 | export CUDA_VISIBLE_DEVICES=6
3 | export NUMEXPR_MAX_THREADS=128
4 |
5 | MODEL=vllm
6 | MODEL_NAME=vanilla_Yi-1.5-6B
7 | MODEL_NAME_OR_PATH=/data1/lijiaming/Ruler/checkpoints/vanilla_Yi-1.5-6B_bs_4_ga_8_lr_2e-5_eps_3/checkpoint-2841
8 | OUTPUT_PATH=../outputs/other_tasks/${MODEL_NAME}
9 | TOKENIZER_MODE=auto
10 | NUM_GPUS=1
11 | GPU_MEMORY_UTILIZATION=0.8
12 |
13 | mkdir -p $OUTPUT_PATH
14 |
15 | # lm_eval --model $MODEL \
16 | # --model_args pretrained=${MODEL_NAME_OR_PATH},trust_remote_code=True,tokenizer_mode=${TOKENIZER_MODE},tensor_parallel_size=${NUM_GPUS},dtype=auto,gpu_memory_utilization=${GPU_MEMORY_UTILIZATION} \
17 | # --tasks leaderboard \
18 | # --device cuda \
19 | # --output_path ${OUTPUT_PATH}/${MODEL}_eval_leaderboard \
20 | # --batch_size 1 \
21 | # --write_out \
22 | # 2>&1 | tee ${OUTPUT_PATH}/${MODEL}_eval_leaderboard.log
23 |
24 | lm_eval --model $MODEL \
25 | --model_args pretrained=${MODEL_NAME_OR_PATH},trust_remote_code=True,tokenizer_mode=${TOKENIZER_MODE},tensor_parallel_size=${NUM_GPUS},dtype=auto,gpu_memory_utilization=${GPU_MEMORY_UTILIZATION} \
26 | --tasks ai2_arc \
27 | --device cuda \
28 | --output_path ${OUTPUT_PATH}/${MODEL}_eval_ai2_arc \
29 | --batch_size 1 \
30 | --num_fewshot 25 \
31 | --write_out \
32 | 2>&1 | tee ${OUTPUT_PATH}/${MODEL}_eval_ai2_arc.log
33 |
34 | lm_eval --model $MODEL \
35 | --model_args pretrained=${MODEL_NAME_OR_PATH},trust_remote_code=True,tokenizer_mode=${TOKENIZER_MODE},tensor_parallel_size=${NUM_GPUS},dtype=auto,gpu_memory_utilization=${GPU_MEMORY_UTILIZATION} \
36 | --tasks hellaswag \
37 | --device cuda \
38 | --output_path ${OUTPUT_PATH}/${MODEL}_eval_hellaswag \
39 | --batch_size 1 \
40 | --num_fewshot 10 \
41 | --write_out \
42 | 2>&1 | tee ${OUTPUT_PATH}/${MODEL}_eval_hellaswag.log
43 |
44 | lm_eval --model $MODEL \
45 | --model_args pretrained=${MODEL_NAME_OR_PATH},trust_remote_code=True,tokenizer_mode=${TOKENIZER_MODE},tensor_parallel_size=${NUM_GPUS},dtype=auto,gpu_memory_utilization=${GPU_MEMORY_UTILIZATION} \
46 | --tasks truthfulqa \
47 | --device cuda \
48 | --output_path ${OUTPUT_PATH}/${MODEL}_eval_truthfulqa \
49 | --batch_size 1 \
50 | --num_fewshot 0 \
51 | --write_out \
52 | 2>&1 | tee ${OUTPUT_PATH}/${MODEL}_eval_truthfulqa.log
53 |
54 | lm_eval --model $MODEL \
55 | --model_args pretrained=${MODEL_NAME_OR_PATH},trust_remote_code=True,tokenizer_mode=${TOKENIZER_MODE},tensor_parallel_size=${NUM_GPUS},dtype=auto,gpu_memory_utilization=${GPU_MEMORY_UTILIZATION} \
56 | --tasks mmlu \
57 | --device cuda \
58 | --output_path ${OUTPUT_PATH}/${MODEL}_eval_mmlu \
59 | --batch_size 1 \
60 | --num_fewshot 5 \
61 | --write_out \
62 | 2>&1 | tee ${OUTPUT_PATH}/${MODEL}_eval_mmlu.log
63 |
64 | lm_eval --model $MODEL \
65 | --model_args pretrained=${MODEL_NAME_OR_PATH},trust_remote_code=True,tokenizer_mode=${TOKENIZER_MODE},tensor_parallel_size=${NUM_GPUS},dtype=auto,gpu_memory_utilization=${GPU_MEMORY_UTILIZATION} \
66 | --tasks winogrande \
67 | --device cuda \
68 | --output_path ${OUTPUT_PATH}/${MODEL}_eval_winogrande \
69 | --batch_size 1 \
70 | --num_fewshot 5 \
71 | --write_out \
72 | 2>&1 | tee ${OUTPUT_PATH}/${MODEL}_eval_winogrande.log
73 |
74 | lm_eval --model $MODEL \
75 | --model_args pretrained=${MODEL_NAME_OR_PATH},trust_remote_code=True,tokenizer_mode=${TOKENIZER_MODE},tensor_parallel_size=${NUM_GPUS},dtype=auto,gpu_memory_utilization=${GPU_MEMORY_UTILIZATION} \
76 | --tasks gsm8k \
77 | --device cuda \
78 | --output_path ${OUTPUT_PATH}/${MODEL}_eval_gsm8k \
79 | --batch_size 1 \
80 | --num_fewshot 5 \
81 | --write_out \
82 | 2>&1 | tee ${OUTPUT_PATH}/${MODEL}_eval_gsm8k.log
--------------------------------------------------------------------------------
/scripts/gemma-7b/ruler_lm_eval.sh:
--------------------------------------------------------------------------------
1 | set -ex
2 | export CUDA_VISIBLE_DEVICES=2
3 | export NUMEXPR_MAX_THREADS=128
4 |
5 | MODEL=vllm
6 | MODEL_NAME=ruler_gemma-7b
7 | MODEL_NAME_OR_PATH=/home/lijiaming/workspace/Seed/Seed-Ruler/outputs/checkpoints/ruler_gemma-7b_bs_4_ga_8_lr_2e-5_eps_3/checkpoint-2841
8 | OUTPUT_PATH=../outputs/other_tasks/${MODEL_NAME}
9 | TOKENIZER_MODE=auto
10 | NUM_GPUS=1
11 | GPU_MEMORY_UTILIZATION=0.8
12 |
13 | mkdir -p $OUTPUT_PATH
14 |
15 | # lm_eval --model $MODEL \
16 | # --model_args pretrained=${MODEL_NAME_OR_PATH},trust_remote_code=True,tokenizer_mode=${TOKENIZER_MODE},tensor_parallel_size=${NUM_GPUS},dtype=auto,gpu_memory_utilization=${GPU_MEMORY_UTILIZATION} \
17 | # --tasks leaderboard \
18 | # --device cuda \
19 | # --output_path ${OUTPUT_PATH}/${MODEL}_eval_leaderboard \
20 | # --batch_size 1 \
21 | # --write_out \
22 | # 2>&1 | tee ${OUTPUT_PATH}/${MODEL}_eval_leaderboard.log
23 |
24 | lm_eval --model $MODEL \
25 | --model_args pretrained=${MODEL_NAME_OR_PATH},trust_remote_code=True,tokenizer_mode=${TOKENIZER_MODE},tensor_parallel_size=${NUM_GPUS},dtype=auto,gpu_memory_utilization=${GPU_MEMORY_UTILIZATION} \
26 | --tasks ai2_arc \
27 | --device cuda \
28 | --output_path ${OUTPUT_PATH}/${MODEL}_eval_ai2_arc \
29 | --batch_size 1 \
30 | --num_fewshot 25 \
31 | --write_out \
32 | 2>&1 | tee ${OUTPUT_PATH}/${MODEL}_eval_ai2_arc.log
33 |
34 | lm_eval --model $MODEL \
35 | --model_args pretrained=${MODEL_NAME_OR_PATH},trust_remote_code=True,tokenizer_mode=${TOKENIZER_MODE},tensor_parallel_size=${NUM_GPUS},dtype=auto,gpu_memory_utilization=${GPU_MEMORY_UTILIZATION} \
36 | --tasks hellaswag \
37 | --device cuda \
38 | --output_path ${OUTPUT_PATH}/${MODEL}_eval_hellaswag \
39 | --batch_size 1 \
40 | --num_fewshot 10 \
41 | --write_out \
42 | 2>&1 | tee ${OUTPUT_PATH}/${MODEL}_eval_hellaswag.log
43 |
44 | lm_eval --model $MODEL \
45 | --model_args pretrained=${MODEL_NAME_OR_PATH},trust_remote_code=True,tokenizer_mode=${TOKENIZER_MODE},tensor_parallel_size=${NUM_GPUS},dtype=auto,gpu_memory_utilization=${GPU_MEMORY_UTILIZATION} \
46 | --tasks truthfulqa \
47 | --device cuda \
48 | --output_path ${OUTPUT_PATH}/${MODEL}_eval_truthfulqa \
49 | --batch_size 1 \
50 | --num_fewshot 0 \
51 | --write_out \
52 | 2>&1 | tee ${OUTPUT_PATH}/${MODEL}_eval_truthfulqa.log
53 |
54 | lm_eval --model $MODEL \
55 | --model_args pretrained=${MODEL_NAME_OR_PATH},trust_remote_code=True,tokenizer_mode=${TOKENIZER_MODE},tensor_parallel_size=${NUM_GPUS},dtype=auto,gpu_memory_utilization=${GPU_MEMORY_UTILIZATION} \
56 | --tasks mmlu \
57 | --device cuda \
58 | --output_path ${OUTPUT_PATH}/${MODEL}_eval_mmlu \
59 | --batch_size 1 \
60 | --num_fewshot 5 \
61 | --write_out \
62 | 2>&1 | tee ${OUTPUT_PATH}/${MODEL}_eval_mmlu.log
63 |
64 | lm_eval --model $MODEL \
65 | --model_args pretrained=${MODEL_NAME_OR_PATH},trust_remote_code=True,tokenizer_mode=${TOKENIZER_MODE},tensor_parallel_size=${NUM_GPUS},dtype=auto,gpu_memory_utilization=${GPU_MEMORY_UTILIZATION} \
66 | --tasks winogrande \
67 | --device cuda \
68 | --output_path ${OUTPUT_PATH}/${MODEL}_eval_winogrande \
69 | --batch_size 1 \
70 | --num_fewshot 5 \
71 | --write_out \
72 | 2>&1 | tee ${OUTPUT_PATH}/${MODEL}_eval_winogrande.log
73 |
74 | lm_eval --model $MODEL \
75 | --model_args pretrained=${MODEL_NAME_OR_PATH},trust_remote_code=True,tokenizer_mode=${TOKENIZER_MODE},tensor_parallel_size=${NUM_GPUS},dtype=auto,gpu_memory_utilization=${GPU_MEMORY_UTILIZATION} \
76 | --tasks gsm8k \
77 | --device cuda \
78 | --output_path ${OUTPUT_PATH}/${MODEL}_eval_gsm8k \
79 | --batch_size 1 \
80 | --num_fewshot 5 \
81 | --write_out \
82 | 2>&1 | tee ${OUTPUT_PATH}/${MODEL}_eval_gsm8k.log
--------------------------------------------------------------------------------
/scripts/Qwen1.5-7B/ruler_lm_eval.sh:
--------------------------------------------------------------------------------
1 | set -ex
2 | export CUDA_VISIBLE_DEVICES=4
3 | export NUMEXPR_MAX_THREADS=128
4 |
5 | MODEL=vllm
6 | MODEL_NAME=ruler_Qwen1.5-7B
7 | MODEL_NAME_OR_PATH=/home/lijiaming/workspace/Seed/Seed-Ruler/outputs/checkpoints/ruler_Qwen1.5-7B_bs_4_ga_8_lr_2e-5_eps_3/checkpoint-2841
8 | OUTPUT_PATH=../outputs/other_tasks/${MODEL_NAME}
9 | TOKENIZER_MODE=auto
10 | NUM_GPUS=1
11 | GPU_MEMORY_UTILIZATION=0.8
12 |
13 | mkdir -p $OUTPUT_PATH
14 |
15 | # lm_eval --model $MODEL \
16 | # --model_args pretrained=${MODEL_NAME_OR_PATH},trust_remote_code=True,tokenizer_mode=${TOKENIZER_MODE},tensor_parallel_size=${NUM_GPUS},dtype=auto,gpu_memory_utilization=${GPU_MEMORY_UTILIZATION} \
17 | # --tasks leaderboard \
18 | # --device cuda \
19 | # --output_path ${OUTPUT_PATH}/${MODEL}_eval_leaderboard \
20 | # --batch_size 1 \
21 | # --write_out \
22 | # 2>&1 | tee ${OUTPUT_PATH}/${MODEL}_eval_leaderboard.log
23 |
24 | lm_eval --model $MODEL \
25 | --model_args pretrained=${MODEL_NAME_OR_PATH},trust_remote_code=True,tokenizer_mode=${TOKENIZER_MODE},tensor_parallel_size=${NUM_GPUS},dtype=auto,gpu_memory_utilization=${GPU_MEMORY_UTILIZATION} \
26 | --tasks ai2_arc \
27 | --device cuda \
28 | --output_path ${OUTPUT_PATH}/${MODEL}_eval_ai2_arc \
29 | --batch_size 1 \
30 | --num_fewshot 25 \
31 | --write_out \
32 | 2>&1 | tee ${OUTPUT_PATH}/${MODEL}_eval_ai2_arc.log
33 |
34 | lm_eval --model $MODEL \
35 | --model_args pretrained=${MODEL_NAME_OR_PATH},trust_remote_code=True,tokenizer_mode=${TOKENIZER_MODE},tensor_parallel_size=${NUM_GPUS},dtype=auto,gpu_memory_utilization=${GPU_MEMORY_UTILIZATION} \
36 | --tasks hellaswag \
37 | --device cuda \
38 | --output_path ${OUTPUT_PATH}/${MODEL}_eval_hellaswag \
39 | --batch_size 1 \
40 | --num_fewshot 10 \
41 | --write_out \
42 | 2>&1 | tee ${OUTPUT_PATH}/${MODEL}_eval_hellaswag.log
43 |
44 | lm_eval --model $MODEL \
45 | --model_args pretrained=${MODEL_NAME_OR_PATH},trust_remote_code=True,tokenizer_mode=${TOKENIZER_MODE},tensor_parallel_size=${NUM_GPUS},dtype=auto,gpu_memory_utilization=${GPU_MEMORY_UTILIZATION} \
46 | --tasks truthfulqa \
47 | --device cuda \
48 | --output_path ${OUTPUT_PATH}/${MODEL}_eval_truthfulqa \
49 | --batch_size 1 \
50 | --num_fewshot 0 \
51 | --write_out \
52 | 2>&1 | tee ${OUTPUT_PATH}/${MODEL}_eval_truthfulqa.log
53 |
54 | lm_eval --model $MODEL \
55 | --model_args pretrained=${MODEL_NAME_OR_PATH},trust_remote_code=True,tokenizer_mode=${TOKENIZER_MODE},tensor_parallel_size=${NUM_GPUS},dtype=auto,gpu_memory_utilization=${GPU_MEMORY_UTILIZATION} \
56 | --tasks mmlu \
57 | --device cuda \
58 | --output_path ${OUTPUT_PATH}/${MODEL}_eval_mmlu \
59 | --batch_size 1 \
60 | --num_fewshot 5 \
61 | --write_out \
62 | 2>&1 | tee ${OUTPUT_PATH}/${MODEL}_eval_mmlu.log
63 |
64 | lm_eval --model $MODEL \
65 | --model_args pretrained=${MODEL_NAME_OR_PATH},trust_remote_code=True,tokenizer_mode=${TOKENIZER_MODE},tensor_parallel_size=${NUM_GPUS},dtype=auto,gpu_memory_utilization=${GPU_MEMORY_UTILIZATION} \
66 | --tasks winogrande \
67 | --device cuda \
68 | --output_path ${OUTPUT_PATH}/${MODEL}_eval_winogrande \
69 | --batch_size 1 \
70 | --num_fewshot 5 \
71 | --write_out \
72 | 2>&1 | tee ${OUTPUT_PATH}/${MODEL}_eval_winogrande.log
73 |
74 | lm_eval --model $MODEL \
75 | --model_args pretrained=${MODEL_NAME_OR_PATH},trust_remote_code=True,tokenizer_mode=${TOKENIZER_MODE},tensor_parallel_size=${NUM_GPUS},dtype=auto,gpu_memory_utilization=${GPU_MEMORY_UTILIZATION} \
76 | --tasks gsm8k \
77 | --device cuda \
78 | --output_path ${OUTPUT_PATH}/${MODEL}_eval_gsm8k \
79 | --batch_size 1 \
80 | --num_fewshot 5 \
81 | --write_out \
82 | 2>&1 | tee ${OUTPUT_PATH}/${MODEL}_eval_gsm8k.log
--------------------------------------------------------------------------------
/scripts/gemma-7b/vanilla_lm_eval.sh:
--------------------------------------------------------------------------------
1 | set -ex
2 | export CUDA_VISIBLE_DEVICES=3
3 | export NUMEXPR_MAX_THREADS=128
4 |
5 | MODEL=vllm
6 | MODEL_NAME=vanilla_gemma-7b
7 | MODEL_NAME_OR_PATH=/home/lijiaming/workspace/Seed/Seed-Ruler/outputs/checkpoints/vanilla_gemma-7b_bs_4_ga_8_lr_2e-5_eps_3/checkpoint-2841
8 | OUTPUT_PATH=../outputs/other_tasks/${MODEL_NAME}
9 | TOKENIZER_MODE=auto
10 | NUM_GPUS=1
11 | GPU_MEMORY_UTILIZATION=0.8
12 |
13 | mkdir -p $OUTPUT_PATH
14 |
15 | # lm_eval --model $MODEL \
16 | # --model_args pretrained=${MODEL_NAME_OR_PATH},trust_remote_code=True,tokenizer_mode=${TOKENIZER_MODE},tensor_parallel_size=${NUM_GPUS},dtype=auto,gpu_memory_utilization=${GPU_MEMORY_UTILIZATION} \
17 | # --tasks leaderboard \
18 | # --device cuda \
19 | # --output_path ${OUTPUT_PATH}/${MODEL}_eval_leaderboard \
20 | # --batch_size 1 \
21 | # --write_out \
22 | # 2>&1 | tee ${OUTPUT_PATH}/${MODEL}_eval_leaderboard.log
23 |
24 | lm_eval --model $MODEL \
25 | --model_args pretrained=${MODEL_NAME_OR_PATH},trust_remote_code=True,tokenizer_mode=${TOKENIZER_MODE},tensor_parallel_size=${NUM_GPUS},dtype=auto,gpu_memory_utilization=${GPU_MEMORY_UTILIZATION} \
26 | --tasks ai2_arc \
27 | --device cuda \
28 | --output_path ${OUTPUT_PATH}/${MODEL}_eval_ai2_arc \
29 | --batch_size 1 \
30 | --num_fewshot 25 \
31 | --write_out \
32 | 2>&1 | tee ${OUTPUT_PATH}/${MODEL}_eval_ai2_arc.log
33 |
34 | lm_eval --model $MODEL \
35 | --model_args pretrained=${MODEL_NAME_OR_PATH},trust_remote_code=True,tokenizer_mode=${TOKENIZER_MODE},tensor_parallel_size=${NUM_GPUS},dtype=auto,gpu_memory_utilization=${GPU_MEMORY_UTILIZATION} \
36 | --tasks hellaswag \
37 | --device cuda \
38 | --output_path ${OUTPUT_PATH}/${MODEL}_eval_hellaswag \
39 | --batch_size 1 \
40 | --num_fewshot 10 \
41 | --write_out \
42 | 2>&1 | tee ${OUTPUT_PATH}/${MODEL}_eval_hellaswag.log
43 |
44 | lm_eval --model $MODEL \
45 | --model_args pretrained=${MODEL_NAME_OR_PATH},trust_remote_code=True,tokenizer_mode=${TOKENIZER_MODE},tensor_parallel_size=${NUM_GPUS},dtype=auto,gpu_memory_utilization=${GPU_MEMORY_UTILIZATION} \
46 | --tasks truthfulqa \
47 | --device cuda \
48 | --output_path ${OUTPUT_PATH}/${MODEL}_eval_truthfulqa \
49 | --batch_size 1 \
50 | --num_fewshot 0 \
51 | --write_out \
52 | 2>&1 | tee ${OUTPUT_PATH}/${MODEL}_eval_truthfulqa.log
53 |
54 | lm_eval --model $MODEL \
55 | --model_args pretrained=${MODEL_NAME_OR_PATH},trust_remote_code=True,tokenizer_mode=${TOKENIZER_MODE},tensor_parallel_size=${NUM_GPUS},dtype=auto,gpu_memory_utilization=${GPU_MEMORY_UTILIZATION} \
56 | --tasks mmlu \
57 | --device cuda \
58 | --output_path ${OUTPUT_PATH}/${MODEL}_eval_mmlu \
59 | --batch_size 1 \
60 | --num_fewshot 5 \
61 | --write_out \
62 | 2>&1 | tee ${OUTPUT_PATH}/${MODEL}_eval_mmlu.log
63 |
64 | lm_eval --model $MODEL \
65 | --model_args pretrained=${MODEL_NAME_OR_PATH},trust_remote_code=True,tokenizer_mode=${TOKENIZER_MODE},tensor_parallel_size=${NUM_GPUS},dtype=auto,gpu_memory_utilization=${GPU_MEMORY_UTILIZATION} \
66 | --tasks winogrande \
67 | --device cuda \
68 | --output_path ${OUTPUT_PATH}/${MODEL}_eval_winogrande \
69 | --batch_size 1 \
70 | --num_fewshot 5 \
71 | --write_out \
72 | 2>&1 | tee ${OUTPUT_PATH}/${MODEL}_eval_winogrande.log
73 |
74 | lm_eval --model $MODEL \
75 | --model_args pretrained=${MODEL_NAME_OR_PATH},trust_remote_code=True,tokenizer_mode=${TOKENIZER_MODE},tensor_parallel_size=${NUM_GPUS},dtype=auto,gpu_memory_utilization=${GPU_MEMORY_UTILIZATION} \
76 | --tasks gsm8k \
77 | --device cuda \
78 | --output_path ${OUTPUT_PATH}/${MODEL}_eval_gsm8k \
79 | --batch_size 1 \
80 | --num_fewshot 5 \
81 | --write_out \
82 | 2>&1 | tee ${OUTPUT_PATH}/${MODEL}_eval_gsm8k.log
--------------------------------------------------------------------------------
/src/exp/cal_mlt_scores.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | from utils.config import RANGE, LEVEL0, LEVEL1, LEVEL2
3 | from utils import load_jsonl
4 | from rich.table import Table
5 | from rich.console import Console
6 | from utils.count import count_words
7 |
8 |
9 | def metric_targetlength(df, LEVEL):
10 | result = {
11 | targetlength: {"PM": {"in": 0, "out": 0}, "FM": {"in": 0, "out": 0}}
12 | for targetlength in LEVEL
13 | }
14 | for d in df:
15 | length = count_words(d["output"])
16 | if d["TargetLength"] in result:
17 | # PM
18 | if (
19 | length > RANGE[d["TargetLength"]]["PM"][0]
20 | and length <= RANGE[d["TargetLength"]]["PM"][1]
21 | ):
22 | result[d["TargetLength"]]["PM"]["in"] += 1
23 | else:
24 | result[d["TargetLength"]]["PM"]["out"] += 1
25 | # FM
26 | if (
27 | length > RANGE[d["TargetLength"]]["FM"][0]
28 | and length <= RANGE[d["TargetLength"]]["FM"][1]
29 | ):
30 | result[d["TargetLength"]]["FM"]["in"] += 1
31 | else:
32 | result[d["TargetLength"]]["FM"]["out"] += 1
33 | # draw table
34 | table = Table(show_header=True, header_style="bold magenta")
35 | table.add_column("TargetLength", style="dim", width=12)
36 | table.add_column("PM_in", justify="right")
37 | table.add_column("PM_out", justify="right")
38 | table.add_column("PM", justify="right")
39 | table.add_column("FM_in", justify="right")
40 | table.add_column("FM_out", justify="right")
41 | table.add_column("FM", justify="right")
42 | # latex_str = ""
43 | for key in result:
44 | table.add_row(
45 | key,
46 | f"{result[key]['PM']['in']}",
47 | f"{result[key]['PM']['out']}",
48 | f"{result[key]['PM']['in'] / (result[key]['PM']['in'] + result[key]['PM']['out'])*100:.2f}",
49 | f"{result[key]['FM']['in']}",
50 | f"{result[key]['FM']['out']}",
51 | f"{result[key]['FM']['in'] / (result[key]['FM']['in'] + result[key]['FM']['out'])*100:.2f}",
52 | )
53 | # latex_str = (
54 | # latex_str
55 | # + "&"
56 | # + f"{result[key]['PM']['in'] / (result[key]['PM']['in'] + result[key]['PM']['out'])*100:.2f}"
57 | # + "&"
58 | # + f"{result[key]['FM']['in'] / (result[key]['FM']['in'] + result[key]['FM']['out'])*100:.2f}"
59 | # )
60 | table.add_row(
61 | "Total",
62 | f"{sum([result[key]['PM']['in']for key in result])}",
63 | f"{sum([result[key]['PM']['out']for key in result])}",
64 | f"{sum([result[key]['PM']['in']for key in result]) / (sum([result[key]['PM']['in']for key in result]) + sum([result[key]['PM']['out']for key in result]))*100:.2f}",
65 | f"{sum([result[key]['FM']['in']for key in result])}",
66 | f"{sum([result[key]['FM']['out']for key in result])}",
67 | f"{sum([result[key]['FM']['in']for key in result]) / (sum([result[key]['FM']['in']for key in result]) + sum([result[key]['FM']['out']for key in result]))*100:.2f}",
68 | )
69 | console = Console()
70 | console.print(table)
71 | # print(latex_str)
72 |
73 |
74 | def main(args):
75 | # raw data load
76 | df = load_jsonl(args.dataset_path)
77 | print(f"> LEVEL0{'='*20}")
78 | metric_targetlength(df, LEVEL0)
79 | print(f"> LEVEL1{'='*20}")
80 | metric_targetlength(df, LEVEL1)
81 | print(f"> LEVEL2{'='*20}")
82 | metric_targetlength(df, LEVEL2)
83 |
84 |
85 | if __name__ == "__main__":
86 | parser = argparse.ArgumentParser()
87 | parser.add_argument("--dataset_path", type=str, default=None)
88 | args = parser.parse_args()
89 | main(args)
90 |
--------------------------------------------------------------------------------
/scripts/Qwen1.5-7B/vanilla_lm_eval.sh:
--------------------------------------------------------------------------------
1 | set -ex
2 | export CUDA_VISIBLE_DEVICES=5
3 | export NUMEXPR_MAX_THREADS=128
4 |
5 | MODEL=vllm
6 | MODEL_NAME=vanilla_Qwen1.5-7B
7 | MODEL_NAME_OR_PATH=/home/lijiaming/workspace/Seed/Seed-Ruler/outputs/checkpoints/vanilla_Qwen1.5-7B_bs_4_ga_8_lr_2e-5_eps_3/checkpoint-2841
8 | OUTPUT_PATH=../outputs/other_tasks/${MODEL_NAME}
9 | TOKENIZER_MODE=auto
10 | NUM_GPUS=1
11 | GPU_MEMORY_UTILIZATION=0.8
12 |
13 | mkdir -p $OUTPUT_PATH
14 |
15 | # lm_eval --model $MODEL \
16 | # --model_args pretrained=${MODEL_NAME_OR_PATH},trust_remote_code=True,tokenizer_mode=${TOKENIZER_MODE},tensor_parallel_size=${NUM_GPUS},dtype=auto,gpu_memory_utilization=${GPU_MEMORY_UTILIZATION} \
17 | # --tasks leaderboard \
18 | # --device cuda \
19 | # --output_path ${OUTPUT_PATH}/${MODEL}_eval_leaderboard \
20 | # --batch_size 1 \
21 | # --write_out \
22 | # 2>&1 | tee ${OUTPUT_PATH}/${MODEL}_eval_leaderboard.log
23 |
24 | lm_eval --model $MODEL \
25 | --model_args pretrained=${MODEL_NAME_OR_PATH},trust_remote_code=True,tokenizer_mode=${TOKENIZER_MODE},tensor_parallel_size=${NUM_GPUS},dtype=auto,gpu_memory_utilization=${GPU_MEMORY_UTILIZATION} \
26 | --tasks ai2_arc \
27 | --device cuda \
28 | --output_path ${OUTPUT_PATH}/${MODEL}_eval_ai2_arc \
29 | --batch_size 1 \
30 | --num_fewshot 25 \
31 | --write_out \
32 | 2>&1 | tee ${OUTPUT_PATH}/${MODEL}_eval_ai2_arc.log
33 |
34 | lm_eval --model $MODEL \
35 | --model_args pretrained=${MODEL_NAME_OR_PATH},trust_remote_code=True,tokenizer_mode=${TOKENIZER_MODE},tensor_parallel_size=${NUM_GPUS},dtype=auto,gpu_memory_utilization=${GPU_MEMORY_UTILIZATION} \
36 | --tasks hellaswag \
37 | --device cuda \
38 | --output_path ${OUTPUT_PATH}/${MODEL}_eval_hellaswag \
39 | --batch_size 1 \
40 | --num_fewshot 10 \
41 | --write_out \
42 | 2>&1 | tee ${OUTPUT_PATH}/${MODEL}_eval_hellaswag.log
43 |
44 | lm_eval --model $MODEL \
45 | --model_args pretrained=${MODEL_NAME_OR_PATH},trust_remote_code=True,tokenizer_mode=${TOKENIZER_MODE},tensor_parallel_size=${NUM_GPUS},dtype=auto,gpu_memory_utilization=${GPU_MEMORY_UTILIZATION} \
46 | --tasks truthfulqa \
47 | --device cuda \
48 | --output_path ${OUTPUT_PATH}/${MODEL}_eval_truthfulqa \
49 | --batch_size 1 \
50 | --num_fewshot 0 \
51 | --write_out \
52 | 2>&1 | tee ${OUTPUT_PATH}/${MODEL}_eval_truthfulqa.log
53 |
54 | lm_eval --model $MODEL \
55 | --model_args pretrained=${MODEL_NAME_OR_PATH},trust_remote_code=True,tokenizer_mode=${TOKENIZER_MODE},tensor_parallel_size=${NUM_GPUS},dtype=auto,gpu_memory_utilization=${GPU_MEMORY_UTILIZATION} \
56 | --tasks mmlu \
57 | --device cuda \
58 | --output_path ${OUTPUT_PATH}/${MODEL}_eval_mmlu \
59 | --batch_size 1 \
60 | --num_fewshot 5 \
61 | --write_out \
62 | 2>&1 | tee ${OUTPUT_PATH}/${MODEL}_eval_mmlu.log
63 |
64 | lm_eval --model $MODEL \
65 | --model_args pretrained=${MODEL_NAME_OR_PATH},trust_remote_code=True,tokenizer_mode=${TOKENIZER_MODE},tensor_parallel_size=${NUM_GPUS},dtype=auto,gpu_memory_utilization=${GPU_MEMORY_UTILIZATION} \
66 | --tasks winogrande \
67 | --device cuda \
68 | --output_path ${OUTPUT_PATH}/${MODEL}_eval_winogrande \
69 | --batch_size 1 \
70 | --num_fewshot 5 \
71 | --write_out \
72 | 2>&1 | tee ${OUTPUT_PATH}/${MODEL}_eval_winogrande.log
73 |
74 | lm_eval --model $MODEL \
75 | --model_args pretrained=${MODEL_NAME_OR_PATH},trust_remote_code=True,tokenizer_mode=${TOKENIZER_MODE},tensor_parallel_size=${NUM_GPUS},dtype=auto,gpu_memory_utilization=${GPU_MEMORY_UTILIZATION} \
76 | --tasks gsm8k \
77 | --device cuda \
78 | --output_path ${OUTPUT_PATH}/${MODEL}_eval_gsm8k \
79 | --batch_size 1 \
80 | --num_fewshot 5 \
81 | --write_out \
82 | 2>&1 | tee ${OUTPUT_PATH}/${MODEL}_eval_gsm8k.log
--------------------------------------------------------------------------------
/scripts/Meta-Llama-3-8B/ruler_lm_eval.sh:
--------------------------------------------------------------------------------
1 | set -ex
2 | export CUDA_VISIBLE_DEVICES=0
3 | export NUMEXPR_MAX_THREADS=128
4 |
5 | MODEL=vllm
6 | MODEL_NAME=ruler_Meta-Llama-3-8B
7 | MODEL_NAME_OR_PATH=/home/lijiaming/workspace/Seed/Seed-Ruler/outputs/checkpoints/ruler_Meta-Llama-3-8B_bs_4_ga_8_lr_2e-5_eps_3/checkpoint-2841
8 | OUTPUT_PATH=../outputs/other_tasks/${MODEL_NAME}
9 | TOKENIZER_MODE=auto
10 | NUM_GPUS=1
11 | GPU_MEMORY_UTILIZATION=0.8
12 |
13 | mkdir -p $OUTPUT_PATH
14 |
15 | # lm_eval --model $MODEL \
16 | # --model_args pretrained=${MODEL_NAME_OR_PATH},trust_remote_code=True,tokenizer_mode=${TOKENIZER_MODE},tensor_parallel_size=${NUM_GPUS},dtype=auto,gpu_memory_utilization=${GPU_MEMORY_UTILIZATION} \
17 | # --tasks leaderboard \
18 | # --device cuda \
19 | # --output_path ${OUTPUT_PATH}/${MODEL}_eval_leaderboard \
20 | # --batch_size 1 \
21 | # --write_out \
22 | # 2>&1 | tee ${OUTPUT_PATH}/${MODEL}_eval_leaderboard.log
23 |
24 | lm_eval --model $MODEL \
25 | --model_args pretrained=${MODEL_NAME_OR_PATH},trust_remote_code=True,tokenizer_mode=${TOKENIZER_MODE},tensor_parallel_size=${NUM_GPUS},dtype=auto,gpu_memory_utilization=${GPU_MEMORY_UTILIZATION} \
26 | --tasks ai2_arc \
27 | --device cuda \
28 | --output_path ${OUTPUT_PATH}/${MODEL}_eval_ai2_arc \
29 | --batch_size 1 \
30 | --num_fewshot 25 \
31 | --write_out \
32 | 2>&1 | tee ${OUTPUT_PATH}/${MODEL}_eval_ai2_arc.log
33 |
34 | lm_eval --model $MODEL \
35 | --model_args pretrained=${MODEL_NAME_OR_PATH},trust_remote_code=True,tokenizer_mode=${TOKENIZER_MODE},tensor_parallel_size=${NUM_GPUS},dtype=auto,gpu_memory_utilization=${GPU_MEMORY_UTILIZATION} \
36 | --tasks hellaswag \
37 | --device cuda \
38 | --output_path ${OUTPUT_PATH}/${MODEL}_eval_hellaswag \
39 | --batch_size 1 \
40 | --num_fewshot 10 \
41 | --write_out \
42 | 2>&1 | tee ${OUTPUT_PATH}/${MODEL}_eval_hellaswag.log
43 |
44 | lm_eval --model $MODEL \
45 | --model_args pretrained=${MODEL_NAME_OR_PATH},trust_remote_code=True,tokenizer_mode=${TOKENIZER_MODE},tensor_parallel_size=${NUM_GPUS},dtype=auto,gpu_memory_utilization=${GPU_MEMORY_UTILIZATION} \
46 | --tasks truthfulqa \
47 | --device cuda \
48 | --output_path ${OUTPUT_PATH}/${MODEL}_eval_truthfulqa \
49 | --batch_size 1 \
50 | --num_fewshot 0 \
51 | --write_out \
52 | 2>&1 | tee ${OUTPUT_PATH}/${MODEL}_eval_truthfulqa.log
53 |
54 | lm_eval --model $MODEL \
55 | --model_args pretrained=${MODEL_NAME_OR_PATH},trust_remote_code=True,tokenizer_mode=${TOKENIZER_MODE},tensor_parallel_size=${NUM_GPUS},dtype=auto,gpu_memory_utilization=${GPU_MEMORY_UTILIZATION} \
56 | --tasks mmlu \
57 | --device cuda \
58 | --output_path ${OUTPUT_PATH}/${MODEL}_eval_mmlu \
59 | --batch_size 1 \
60 | --num_fewshot 5 \
61 | --write_out \
62 | 2>&1 | tee ${OUTPUT_PATH}/${MODEL}_eval_mmlu.log
63 |
64 | lm_eval --model $MODEL \
65 | --model_args pretrained=${MODEL_NAME_OR_PATH},trust_remote_code=True,tokenizer_mode=${TOKENIZER_MODE},tensor_parallel_size=${NUM_GPUS},dtype=auto,gpu_memory_utilization=${GPU_MEMORY_UTILIZATION} \
66 | --tasks winogrande \
67 | --device cuda \
68 | --output_path ${OUTPUT_PATH}/${MODEL}_eval_winogrande \
69 | --batch_size 1 \
70 | --num_fewshot 5 \
71 | --write_out \
72 | 2>&1 | tee ${OUTPUT_PATH}/${MODEL}_eval_winogrande.log
73 |
74 | lm_eval --model $MODEL \
75 | --model_args pretrained=${MODEL_NAME_OR_PATH},trust_remote_code=True,tokenizer_mode=${TOKENIZER_MODE},tensor_parallel_size=${NUM_GPUS},dtype=auto,gpu_memory_utilization=${GPU_MEMORY_UTILIZATION} \
76 | --tasks gsm8k \
77 | --device cuda \
78 | --output_path ${OUTPUT_PATH}/${MODEL}_eval_gsm8k \
79 | --batch_size 1 \
80 | --num_fewshot 5 \
81 | --write_out \
82 | 2>&1 | tee ${OUTPUT_PATH}/${MODEL}_eval_gsm8k.log
--------------------------------------------------------------------------------
/scripts/Mistral-7B-v0.3/ruler_lm_eval.sh:
--------------------------------------------------------------------------------
1 | set -ex
2 | export CUDA_VISIBLE_DEVICES=0
3 | export NUMEXPR_MAX_THREADS=128
4 |
5 | MODEL=vllm
6 | MODEL_NAME=ruler_Mistral-7B-v0.3
7 | MODEL_NAME_OR_PATH=/home/lijiaming/workspace/Seed/Seed-Ruler/outputs/checkpoints/ruler_Mistral-7B-v0.3_bs_4_ga_8_lr_2e-5_eps_3/checkpoint-2841
8 | OUTPUT_PATH=../outputs/other_tasks/${MODEL_NAME}
9 | TOKENIZER_MODE=auto
10 | NUM_GPUS=1
11 | GPU_MEMORY_UTILIZATION=0.8
12 |
13 | mkdir -p $OUTPUT_PATH
14 |
15 | # lm_eval --model $MODEL \
16 | # --model_args pretrained=${MODEL_NAME_OR_PATH},trust_remote_code=True,tokenizer_mode=${TOKENIZER_MODE},tensor_parallel_size=${NUM_GPUS},dtype=auto,gpu_memory_utilization=${GPU_MEMORY_UTILIZATION} \
17 | # --tasks leaderboard \
18 | # --device cuda \
19 | # --output_path ${OUTPUT_PATH}/${MODEL}_eval_leaderboard \
20 | # --batch_size 1 \
21 | # --write_out \
22 | # 2>&1 | tee ${OUTPUT_PATH}/${MODEL}_eval_leaderboard.log
23 |
24 | lm_eval --model $MODEL \
25 | --model_args pretrained=${MODEL_NAME_OR_PATH},trust_remote_code=True,tokenizer_mode=${TOKENIZER_MODE},tensor_parallel_size=${NUM_GPUS},dtype=auto,gpu_memory_utilization=${GPU_MEMORY_UTILIZATION} \
26 | --tasks ai2_arc \
27 | --device cuda \
28 | --output_path ${OUTPUT_PATH}/${MODEL}_eval_ai2_arc \
29 | --batch_size 1 \
30 | --num_fewshot 25 \
31 | --write_out \
32 | 2>&1 | tee ${OUTPUT_PATH}/${MODEL}_eval_ai2_arc.log
33 |
34 | lm_eval --model $MODEL \
35 | --model_args pretrained=${MODEL_NAME_OR_PATH},trust_remote_code=True,tokenizer_mode=${TOKENIZER_MODE},tensor_parallel_size=${NUM_GPUS},dtype=auto,gpu_memory_utilization=${GPU_MEMORY_UTILIZATION} \
36 | --tasks hellaswag \
37 | --device cuda \
38 | --output_path ${OUTPUT_PATH}/${MODEL}_eval_hellaswag \
39 | --batch_size 1 \
40 | --num_fewshot 10 \
41 | --write_out \
42 | 2>&1 | tee ${OUTPUT_PATH}/${MODEL}_eval_hellaswag.log
43 |
44 | lm_eval --model $MODEL \
45 | --model_args pretrained=${MODEL_NAME_OR_PATH},trust_remote_code=True,tokenizer_mode=${TOKENIZER_MODE},tensor_parallel_size=${NUM_GPUS},dtype=auto,gpu_memory_utilization=${GPU_MEMORY_UTILIZATION} \
46 | --tasks truthfulqa \
47 | --device cuda \
48 | --output_path ${OUTPUT_PATH}/${MODEL}_eval_truthfulqa \
49 | --batch_size 1 \
50 | --num_fewshot 0 \
51 | --write_out \
52 | 2>&1 | tee ${OUTPUT_PATH}/${MODEL}_eval_truthfulqa.log
53 |
54 | lm_eval --model $MODEL \
55 | --model_args pretrained=${MODEL_NAME_OR_PATH},trust_remote_code=True,tokenizer_mode=${TOKENIZER_MODE},tensor_parallel_size=${NUM_GPUS},dtype=auto,gpu_memory_utilization=${GPU_MEMORY_UTILIZATION} \
56 | --tasks mmlu \
57 | --device cuda \
58 | --output_path ${OUTPUT_PATH}/${MODEL}_eval_mmlu \
59 | --batch_size 1 \
60 | --num_fewshot 5 \
61 | --write_out \
62 | 2>&1 | tee ${OUTPUT_PATH}/${MODEL}_eval_mmlu.log
63 |
64 | lm_eval --model $MODEL \
65 | --model_args pretrained=${MODEL_NAME_OR_PATH},trust_remote_code=True,tokenizer_mode=${TOKENIZER_MODE},tensor_parallel_size=${NUM_GPUS},dtype=auto,gpu_memory_utilization=${GPU_MEMORY_UTILIZATION} \
66 | --tasks winogrande \
67 | --device cuda \
68 | --output_path ${OUTPUT_PATH}/${MODEL}_eval_winogrande \
69 | --batch_size 1 \
70 | --num_fewshot 5 \
71 | --write_out \
72 | 2>&1 | tee ${OUTPUT_PATH}/${MODEL}_eval_winogrande.log
73 |
74 | lm_eval --model $MODEL \
75 | --model_args pretrained=${MODEL_NAME_OR_PATH},trust_remote_code=True,tokenizer_mode=${TOKENIZER_MODE},tensor_parallel_size=${NUM_GPUS},dtype=auto,gpu_memory_utilization=${GPU_MEMORY_UTILIZATION} \
76 | --tasks gsm8k \
77 | --device cuda \
78 | --output_path ${OUTPUT_PATH}/${MODEL}_eval_gsm8k \
79 | --batch_size 1 \
80 | --num_fewshot 5 \
81 | --write_out \
82 | 2>&1 | tee ${OUTPUT_PATH}/${MODEL}_eval_gsm8k.log
--------------------------------------------------------------------------------
/scripts/Meta-Llama-3-8B/vanilla_lm_eval.sh:
--------------------------------------------------------------------------------
1 | set -ex
2 | export CUDA_VISIBLE_DEVICES=0
3 | export NUMEXPR_MAX_THREADS=128
4 |
5 | MODEL=vllm
6 | MODEL_NAME=vanilla_Meta-Llama-3-8B
7 | MODEL_NAME_OR_PATH=/home/lijiaming/workspace/Seed/Seed-Ruler/outputs/checkpoints/vanilla_Meta-Llama-3-8B_bs_4_ga_8_lr_2e-5_eps_3/checkpoint-2841
8 | OUTPUT_PATH=../outputs/other_tasks/${MODEL_NAME}
9 | TOKENIZER_MODE=auto
10 | NUM_GPUS=1
11 | GPU_MEMORY_UTILIZATION=0.8
12 |
13 | mkdir -p $OUTPUT_PATH
14 |
15 | # lm_eval --model $MODEL \
16 | # --model_args pretrained=${MODEL_NAME_OR_PATH},trust_remote_code=True,tokenizer_mode=${TOKENIZER_MODE},tensor_parallel_size=${NUM_GPUS},dtype=auto,gpu_memory_utilization=${GPU_MEMORY_UTILIZATION} \
17 | # --tasks leaderboard \
18 | # --device cuda \
19 | # --output_path ${OUTPUT_PATH}/${MODEL}_eval_leaderboard \
20 | # --batch_size 1 \
21 | # --write_out \
22 | # 2>&1 | tee ${OUTPUT_PATH}/${MODEL}_eval_leaderboard.log
23 |
24 | lm_eval --model $MODEL \
25 | --model_args pretrained=${MODEL_NAME_OR_PATH},trust_remote_code=True,tokenizer_mode=${TOKENIZER_MODE},tensor_parallel_size=${NUM_GPUS},dtype=auto,gpu_memory_utilization=${GPU_MEMORY_UTILIZATION} \
26 | --tasks ai2_arc \
27 | --device cuda \
28 | --output_path ${OUTPUT_PATH}/${MODEL}_eval_ai2_arc \
29 | --batch_size 1 \
30 | --num_fewshot 25 \
31 | --write_out \
32 | 2>&1 | tee ${OUTPUT_PATH}/${MODEL}_eval_ai2_arc.log
33 |
34 | lm_eval --model $MODEL \
35 | --model_args pretrained=${MODEL_NAME_OR_PATH},trust_remote_code=True,tokenizer_mode=${TOKENIZER_MODE},tensor_parallel_size=${NUM_GPUS},dtype=auto,gpu_memory_utilization=${GPU_MEMORY_UTILIZATION} \
36 | --tasks hellaswag \
37 | --device cuda \
38 | --output_path ${OUTPUT_PATH}/${MODEL}_eval_hellaswag \
39 | --batch_size 1 \
40 | --num_fewshot 10 \
41 | --write_out \
42 | 2>&1 | tee ${OUTPUT_PATH}/${MODEL}_eval_hellaswag.log
43 |
44 | lm_eval --model $MODEL \
45 | --model_args pretrained=${MODEL_NAME_OR_PATH},trust_remote_code=True,tokenizer_mode=${TOKENIZER_MODE},tensor_parallel_size=${NUM_GPUS},dtype=auto,gpu_memory_utilization=${GPU_MEMORY_UTILIZATION} \
46 | --tasks truthfulqa \
47 | --device cuda \
48 | --output_path ${OUTPUT_PATH}/${MODEL}_eval_truthfulqa \
49 | --batch_size 1 \
50 | --num_fewshot 0 \
51 | --write_out \
52 | 2>&1 | tee ${OUTPUT_PATH}/${MODEL}_eval_truthfulqa.log
53 |
54 | lm_eval --model $MODEL \
55 | --model_args pretrained=${MODEL_NAME_OR_PATH},trust_remote_code=True,tokenizer_mode=${TOKENIZER_MODE},tensor_parallel_size=${NUM_GPUS},dtype=auto,gpu_memory_utilization=${GPU_MEMORY_UTILIZATION} \
56 | --tasks mmlu \
57 | --device cuda \
58 | --output_path ${OUTPUT_PATH}/${MODEL}_eval_mmlu \
59 | --batch_size 1 \
60 | --num_fewshot 5 \
61 | --write_out \
62 | 2>&1 | tee ${OUTPUT_PATH}/${MODEL}_eval_mmlu.log
63 |
64 | lm_eval --model $MODEL \
65 | --model_args pretrained=${MODEL_NAME_OR_PATH},trust_remote_code=True,tokenizer_mode=${TOKENIZER_MODE},tensor_parallel_size=${NUM_GPUS},dtype=auto,gpu_memory_utilization=${GPU_MEMORY_UTILIZATION} \
66 | --tasks winogrande \
67 | --device cuda \
68 | --output_path ${OUTPUT_PATH}/${MODEL}_eval_winogrande \
69 | --batch_size 1 \
70 | --num_fewshot 5 \
71 | --write_out \
72 | 2>&1 | tee ${OUTPUT_PATH}/${MODEL}_eval_winogrande.log
73 |
74 | lm_eval --model $MODEL \
75 | --model_args pretrained=${MODEL_NAME_OR_PATH},trust_remote_code=True,tokenizer_mode=${TOKENIZER_MODE},tensor_parallel_size=${NUM_GPUS},dtype=auto,gpu_memory_utilization=${GPU_MEMORY_UTILIZATION} \
76 | --tasks gsm8k \
77 | --device cuda \
78 | --output_path ${OUTPUT_PATH}/${MODEL}_eval_gsm8k \
79 | --batch_size 1 \
80 | --num_fewshot 5 \
81 | --write_out \
82 | 2>&1 | tee ${OUTPUT_PATH}/${MODEL}_eval_gsm8k.log
--------------------------------------------------------------------------------
/scripts/deepseek-llm-7b-base/ruler_lm_eval.sh:
--------------------------------------------------------------------------------
1 | set -ex
2 | export CUDA_VISIBLE_DEVICES=1
3 | export NUMEXPR_MAX_THREADS=128
4 |
5 | MODEL=vllm
6 | MODEL_NAME=ruler_deepseek-llm-7b-base
7 | MODEL_NAME_OR_PATH=/home/lijiaming/workspace/Seed/Seed-Ruler/outputs/checkpoints/ruler_deepseek-llm-7b-base_bs_4_ga_8_lr_2e-5_eps_3/checkpoint-2841
8 | OUTPUT_PATH=../outputs/overall_performance/${MODEL_NAME}
9 | TOKENIZER_MODE=auto
10 | NUM_GPUS=1
11 | GPU_MEMORY_UTILIZATION=0.8
12 |
13 | mkdir -p $OUTPUT_PATH
14 |
15 | # lm_eval --model $MODEL \
16 | # --model_args pretrained=${MODEL_NAME_OR_PATH},trust_remote_code=True,tokenizer_mode=${TOKENIZER_MODE},tensor_parallel_size=${NUM_GPUS},dtype=auto,gpu_memory_utilization=${GPU_MEMORY_UTILIZATION} \
17 | # --tasks leaderboard \
18 | # --device cuda \
19 | # --output_path ${OUTPUT_PATH}/${MODEL}_eval_leaderboard \
20 | # --batch_size 1 \
21 | # --write_out \
22 | # 2>&1 | tee ${OUTPUT_PATH}/${MODEL}_eval_leaderboard.log
23 |
24 | lm_eval --model $MODEL \
25 | --model_args pretrained=${MODEL_NAME_OR_PATH},trust_remote_code=True,tokenizer_mode=${TOKENIZER_MODE},tensor_parallel_size=${NUM_GPUS},dtype=auto,gpu_memory_utilization=${GPU_MEMORY_UTILIZATION} \
26 | --tasks ai2_arc \
27 | --device cuda \
28 | --output_path ${OUTPUT_PATH}/${MODEL}_eval_ai2_arc \
29 | --batch_size 1 \
30 | --num_fewshot 25 \
31 | --write_out \
32 | 2>&1 | tee ${OUTPUT_PATH}/${MODEL}_eval_ai2_arc.log
33 |
34 | lm_eval --model $MODEL \
35 | --model_args pretrained=${MODEL_NAME_OR_PATH},trust_remote_code=True,tokenizer_mode=${TOKENIZER_MODE},tensor_parallel_size=${NUM_GPUS},dtype=auto,gpu_memory_utilization=${GPU_MEMORY_UTILIZATION} \
36 | --tasks hellaswag \
37 | --device cuda \
38 | --output_path ${OUTPUT_PATH}/${MODEL}_eval_hellaswag \
39 | --batch_size 1 \
40 | --num_fewshot 10 \
41 | --write_out \
42 | 2>&1 | tee ${OUTPUT_PATH}/${MODEL}_eval_hellaswag.log
43 |
44 | lm_eval --model $MODEL \
45 | --model_args pretrained=${MODEL_NAME_OR_PATH},trust_remote_code=True,tokenizer_mode=${TOKENIZER_MODE},tensor_parallel_size=${NUM_GPUS},dtype=auto,gpu_memory_utilization=${GPU_MEMORY_UTILIZATION} \
46 | --tasks truthfulqa \
47 | --device cuda \
48 | --output_path ${OUTPUT_PATH}/${MODEL}_eval_truthfulqa \
49 | --batch_size 1 \
50 | --num_fewshot 0 \
51 | --write_out \
52 | 2>&1 | tee ${OUTPUT_PATH}/${MODEL}_eval_truthfulqa.log
53 |
54 | lm_eval --model $MODEL \
55 | --model_args pretrained=${MODEL_NAME_OR_PATH},trust_remote_code=True,tokenizer_mode=${TOKENIZER_MODE},tensor_parallel_size=${NUM_GPUS},dtype=auto,gpu_memory_utilization=${GPU_MEMORY_UTILIZATION} \
56 | --tasks mmlu \
57 | --device cuda \
58 | --output_path ${OUTPUT_PATH}/${MODEL}_eval_mmlu \
59 | --batch_size 1 \
60 | --num_fewshot 5 \
61 | --write_out \
62 | 2>&1 | tee ${OUTPUT_PATH}/${MODEL}_eval_mmlu.log
63 |
64 | lm_eval --model $MODEL \
65 | --model_args pretrained=${MODEL_NAME_OR_PATH},trust_remote_code=True,tokenizer_mode=${TOKENIZER_MODE},tensor_parallel_size=${NUM_GPUS},dtype=auto,gpu_memory_utilization=${GPU_MEMORY_UTILIZATION} \
66 | --tasks winogrande \
67 | --device cuda \
68 | --output_path ${OUTPUT_PATH}/${MODEL}_eval_winogrande \
69 | --batch_size 1 \
70 | --num_fewshot 5 \
71 | --write_out \
72 | 2>&1 | tee ${OUTPUT_PATH}/${MODEL}_eval_winogrande.log
73 |
74 | lm_eval --model $MODEL \
75 | --model_args pretrained=${MODEL_NAME_OR_PATH},trust_remote_code=True,tokenizer_mode=${TOKENIZER_MODE},tensor_parallel_size=${NUM_GPUS},dtype=auto,gpu_memory_utilization=${GPU_MEMORY_UTILIZATION} \
76 | --tasks gsm8k \
77 | --device cuda \
78 | --output_path ${OUTPUT_PATH}/${MODEL}_eval_gsm8k \
79 | --batch_size 1 \
80 | --num_fewshot 5 \
81 | --write_out \
82 | 2>&1 | tee ${OUTPUT_PATH}/${MODEL}_eval_gsm8k.log
--------------------------------------------------------------------------------
/scripts/deepseek-llm-7b-base/vanilla_lm_eval.sh:
--------------------------------------------------------------------------------
1 | set -ex
2 | export CUDA_VISIBLE_DEVICES=1
3 | export NUMEXPR_MAX_THREADS=128
4 |
5 | MODEL=vllm
6 | MODEL_NAME=vanilla_deepseek-llm-7b-base
7 | MODEL_NAME_OR_PATH=/home/lijiaming/workspace/Seed/Seed-Ruler/outputs/checkpoints/vanilla_deepseek-llm-7b-base_bs_4_ga_8_lr_2e-5_eps_3/checkpoint-2841
8 | OUTPUT_PATH=../outputs/other_tasks/${MODEL_NAME}
9 | TOKENIZER_MODE=auto
10 | NUM_GPUS=1
11 | GPU_MEMORY_UTILIZATION=0.8
12 |
13 | mkdir -p $OUTPUT_PATH
14 |
15 | # lm_eval --model $MODEL \
16 | # --model_args pretrained=${MODEL_NAME_OR_PATH},trust_remote_code=True,tokenizer_mode=${TOKENIZER_MODE},tensor_parallel_size=${NUM_GPUS},dtype=auto,gpu_memory_utilization=${GPU_MEMORY_UTILIZATION} \
17 | # --tasks leaderboard \
18 | # --device cuda \
19 | # --output_path ${OUTPUT_PATH}/${MODEL}_eval_leaderboard \
20 | # --batch_size 1 \
21 | # --write_out \
22 | # 2>&1 | tee ${OUTPUT_PATH}/${MODEL}_eval_leaderboard.log
23 |
24 | lm_eval --model $MODEL \
25 | --model_args pretrained=${MODEL_NAME_OR_PATH},trust_remote_code=True,tokenizer_mode=${TOKENIZER_MODE},tensor_parallel_size=${NUM_GPUS},dtype=auto,gpu_memory_utilization=${GPU_MEMORY_UTILIZATION} \
26 | --tasks ai2_arc \
27 | --device cuda \
28 | --output_path ${OUTPUT_PATH}/${MODEL}_eval_ai2_arc \
29 | --batch_size 1 \
30 | --num_fewshot 25 \
31 | --write_out \
32 | 2>&1 | tee ${OUTPUT_PATH}/${MODEL}_eval_ai2_arc.log
33 |
34 | lm_eval --model $MODEL \
35 | --model_args pretrained=${MODEL_NAME_OR_PATH},trust_remote_code=True,tokenizer_mode=${TOKENIZER_MODE},tensor_parallel_size=${NUM_GPUS},dtype=auto,gpu_memory_utilization=${GPU_MEMORY_UTILIZATION} \
36 | --tasks hellaswag \
37 | --device cuda \
38 | --output_path ${OUTPUT_PATH}/${MODEL}_eval_hellaswag \
39 | --batch_size 1 \
40 | --num_fewshot 10 \
41 | --write_out \
42 | 2>&1 | tee ${OUTPUT_PATH}/${MODEL}_eval_hellaswag.log
43 |
44 | lm_eval --model $MODEL \
45 | --model_args pretrained=${MODEL_NAME_OR_PATH},trust_remote_code=True,tokenizer_mode=${TOKENIZER_MODE},tensor_parallel_size=${NUM_GPUS},dtype=auto,gpu_memory_utilization=${GPU_MEMORY_UTILIZATION} \
46 | --tasks truthfulqa \
47 | --device cuda \
48 | --output_path ${OUTPUT_PATH}/${MODEL}_eval_truthfulqa \
49 | --batch_size 1 \
50 | --num_fewshot 0 \
51 | --write_out \
52 | 2>&1 | tee ${OUTPUT_PATH}/${MODEL}_eval_truthfulqa.log
53 |
54 | lm_eval --model $MODEL \
55 | --model_args pretrained=${MODEL_NAME_OR_PATH},trust_remote_code=True,tokenizer_mode=${TOKENIZER_MODE},tensor_parallel_size=${NUM_GPUS},dtype=auto,gpu_memory_utilization=${GPU_MEMORY_UTILIZATION} \
56 | --tasks mmlu \
57 | --device cuda \
58 | --output_path ${OUTPUT_PATH}/${MODEL}_eval_mmlu \
59 | --batch_size 1 \
60 | --num_fewshot 5 \
61 | --write_out \
62 | 2>&1 | tee ${OUTPUT_PATH}/${MODEL}_eval_mmlu.log
63 |
64 | lm_eval --model $MODEL \
65 | --model_args pretrained=${MODEL_NAME_OR_PATH},trust_remote_code=True,tokenizer_mode=${TOKENIZER_MODE},tensor_parallel_size=${NUM_GPUS},dtype=auto,gpu_memory_utilization=${GPU_MEMORY_UTILIZATION} \
66 | --tasks winogrande \
67 | --device cuda \
68 | --output_path ${OUTPUT_PATH}/${MODEL}_eval_winogrande \
69 | --batch_size 1 \
70 | --num_fewshot 5 \
71 | --write_out \
72 | 2>&1 | tee ${OUTPUT_PATH}/${MODEL}_eval_winogrande.log
73 |
74 | lm_eval --model $MODEL \
75 | --model_args pretrained=${MODEL_NAME_OR_PATH},trust_remote_code=True,tokenizer_mode=${TOKENIZER_MODE},tensor_parallel_size=${NUM_GPUS},dtype=auto,gpu_memory_utilization=${GPU_MEMORY_UTILIZATION} \
76 | --tasks gsm8k \
77 | --device cuda \
78 | --output_path ${OUTPUT_PATH}/${MODEL}_eval_gsm8k \
79 | --batch_size 1 \
80 | --num_fewshot 5 \
81 | --write_out \
82 | 2>&1 | tee ${OUTPUT_PATH}/${MODEL}_eval_gsm8k.log
--------------------------------------------------------------------------------
/scripts/Mistral-7B-v0.3/vanilla_lm_eval.sh:
--------------------------------------------------------------------------------
1 | set -ex
2 | export CUDA_VISIBLE_DEVICES=0
3 | export NUMEXPR_MAX_THREADS=128
4 |
5 | MODEL=vllm
6 | MODEL_NAME=vanilla_Mistral-7B-v0.3
7 | MODEL_NAME_OR_PATH=/home/lijiaming/workspace/Seed/Seed-Ruler/outputs/checkpoints/vanilla_Mistral-7B-v0.3_bs_4_ga_8_lr_2e-5_eps_3/checkpoint-2841
8 | OUTPUT_PATH=../outputs/other_tasks/${MODEL_NAME}
9 | TOKENIZER_MODE=auto
10 | NUM_GPUS=1
11 | GPU_MEMORY_UTILIZATION=0.8
12 |
13 | mkdir -p $OUTPUT_PATH
14 |
15 | lm_eval --model $MODEL \
16 | --model_args pretrained=${MODEL_NAME_OR_PATH},trust_remote_code=True,tokenizer_mode=${TOKENIZER_MODE},tensor_parallel_size=${NUM_GPUS},dtype=auto,gpu_memory_utilization=${GPU_MEMORY_UTILIZATION} \
17 | --tasks leaderboard \
18 | --device cuda \
19 | --output_path ${OUTPUT_PATH}/${MODEL}_eval_leaderboard \
20 | --batch_size 1 \
21 | --write_out \
22 | 2>&1 | tee ${OUTPUT_PATH}/${MODEL}_eval_leaderboard.log
23 |
24 | # lm_eval --model $MODEL \
25 | # --model_args pretrained=${MODEL_NAME_OR_PATH},trust_remote_code=True,tokenizer_mode=${TOKENIZER_MODE},tensor_parallel_size=${NUM_GPUS},dtype=auto,gpu_memory_utilization=${GPU_MEMORY_UTILIZATION} \
26 | # --tasks ai2_arc \
27 | # --device cuda \
28 | # --output_path ${OUTPUT_PATH}/${MODEL}_eval_ai2_arc \
29 | # --batch_size 1 \
30 | # --num_fewshot 25 \
31 | # --write_out \
32 | # 2>&1 | tee ${OUTPUT_PATH}/${MODEL}_eval_ai2_arc.log
33 |
34 | # lm_eval --model $MODEL \
35 | # --model_args pretrained=${MODEL_NAME_OR_PATH},trust_remote_code=True,tokenizer_mode=${TOKENIZER_MODE},tensor_parallel_size=${NUM_GPUS},dtype=auto,gpu_memory_utilization=${GPU_MEMORY_UTILIZATION} \
36 | # --tasks hellaswag \
37 | # --device cuda \
38 | # --output_path ${OUTPUT_PATH}/${MODEL}_eval_hellaswag \
39 | # --batch_size 1 \
40 | # --num_fewshot 10 \
41 | # --write_out \
42 | # 2>&1 | tee ${OUTPUT_PATH}/${MODEL}_eval_hellaswag.log
43 |
44 | # lm_eval --model $MODEL \
45 | # --model_args pretrained=${MODEL_NAME_OR_PATH},trust_remote_code=True,tokenizer_mode=${TOKENIZER_MODE},tensor_parallel_size=${NUM_GPUS},dtype=auto,gpu_memory_utilization=${GPU_MEMORY_UTILIZATION} \
46 | # --tasks truthfulqa \
47 | # --device cuda \
48 | # --output_path ${OUTPUT_PATH}/${MODEL}_eval_truthfulqa \
49 | # --batch_size 1 \
50 | # --num_fewshot 0 \
51 | # --write_out \
52 | # 2>&1 | tee ${OUTPUT_PATH}/${MODEL}_eval_truthfulqa.log
53 |
54 | # lm_eval --model $MODEL \
55 | # --model_args pretrained=${MODEL_NAME_OR_PATH},trust_remote_code=True,tokenizer_mode=${TOKENIZER_MODE},tensor_parallel_size=${NUM_GPUS},dtype=auto,gpu_memory_utilization=${GPU_MEMORY_UTILIZATION} \
56 | # --tasks mmlu \
57 | # --device cuda \
58 | # --output_path ${OUTPUT_PATH}/${MODEL}_eval_mmlu \
59 | # --batch_size 1 \
60 | # --num_fewshot 5 \
61 | # --write_out \
62 | # 2>&1 | tee ${OUTPUT_PATH}/${MODEL}_eval_mmlu.log
63 |
64 | # lm_eval --model $MODEL \
65 | # --model_args pretrained=${MODEL_NAME_OR_PATH},trust_remote_code=True,tokenizer_mode=${TOKENIZER_MODE},tensor_parallel_size=${NUM_GPUS},dtype=auto,gpu_memory_utilization=${GPU_MEMORY_UTILIZATION} \
66 | # --tasks winogrande \
67 | # --device cuda \
68 | # --output_path ${OUTPUT_PATH}/${MODEL}_eval_winogrande \
69 | # --batch_size 1 \
70 | # --num_fewshot 5 \
71 | # --write_out \
72 | # 2>&1 | tee ${OUTPUT_PATH}/${MODEL}_eval_winogrande.log
73 |
74 | # lm_eval --model $MODEL \
75 | # --model_args pretrained=${MODEL_NAME_OR_PATH},trust_remote_code=True,tokenizer_mode=${TOKENIZER_MODE},tensor_parallel_size=${NUM_GPUS},dtype=auto,gpu_memory_utilization=${GPU_MEMORY_UTILIZATION} \
76 | # --tasks gsm8k \
77 | # --device cuda \
78 | # --output_path ${OUTPUT_PATH}/${MODEL}_eval_gsm8k \
79 | # --batch_size 1 \
80 | # --num_fewshot 5 \
81 | # --write_out \
82 | # 2>&1 | tee ${OUTPUT_PATH}/${MODEL}_eval_gsm8k.log
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 |
6 | # C extensions
7 | *.so
8 |
9 | # Distribution / packaging
10 | .Python
11 | build/
12 | develop-eggs/
13 | dist/
14 | downloads/
15 | eggs/
16 | .eggs/
17 | lib/
18 | lib64/
19 | parts/
20 | sdist/
21 | var/
22 | wheels/
23 | share/python-wheels/
24 | *.egg-info/
25 | .installed.cfg
26 | *.egg
27 | MANIFEST
28 |
29 | # PyInstaller
30 | # Usually these files are written by a python script from a template
31 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
32 | *.manifest
33 | *.spec
34 |
35 | # Installer logs
36 | pip-log.txt
37 | pip-delete-this-directory.txt
38 |
39 | # Unit test / coverage reports
40 | htmlcov/
41 | .tox/
42 | .nox/
43 | .coverage
44 | .coverage.*
45 | .cache
46 | nosetests.xml
47 | coverage.xml
48 | *.cover
49 | *.py,cover
50 | .hypothesis/
51 | .pytest_cache/
52 | cover/
53 |
54 | # Translations
55 | *.mo
56 | *.pot
57 |
58 | # Django stuff:
59 | *.log
60 | local_settings.py
61 | db.sqlite3
62 | db.sqlite3-journal
63 |
64 | # Flask stuff:
65 | instance/
66 | .webassets-cache
67 |
68 | # Scrapy stuff:
69 | .scrapy
70 |
71 | # Sphinx documentation
72 | docs/_build/
73 |
74 | # PyBuilder
75 | .pybuilder/
76 | target/
77 |
78 | # Jupyter Notebook
79 | .ipynb_checkpoints
80 |
81 | # IPython
82 | profile_default/
83 | ipython_config.py
84 |
85 | # pyenv
86 | # For a library or package, you might want to ignore these files since the code is
87 | # intended to run in multiple environments; otherwise, check them in:
88 | # .python-version
89 |
90 | # pipenv
91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies
93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not
94 | # install all needed dependencies.
95 | #Pipfile.lock
96 |
97 | # poetry
98 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
99 | # This is especially recommended for binary packages to ensure reproducibility, and is more
100 | # commonly ignored for libraries.
101 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
102 | #poetry.lock
103 |
104 | # pdm
105 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
106 | #pdm.lock
107 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
108 | # in version control.
109 | # https://pdm.fming.dev/#use-with-ide
110 | .pdm.toml
111 |
112 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
113 | __pypackages__/
114 |
115 | # Celery stuff
116 | celerybeat-schedule
117 | celerybeat.pid
118 |
119 | # SageMath parsed files
120 | *.sage.py
121 |
122 | # Environments
123 | .env
124 | .venv
125 | env/
126 | venv/
127 | ENV/
128 | env.bak/
129 | venv.bak/
130 |
131 | # Spyder project settings
132 | .spyderproject
133 | .spyproject
134 |
135 | # Rope project settings
136 | .ropeproject
137 |
138 | # mkdocs documentation
139 | /site
140 |
141 | # mypy
142 | .mypy_cache/
143 | .dmypy.json
144 | dmypy.json
145 |
146 | # Pyre type checker
147 | .pyre/
148 |
149 | # pytype static type analyzer
150 | .pytype/
151 |
152 | # Cython debug symbols
153 | cython_debug/
154 |
155 | # PyCharm
156 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
157 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
158 | # and can be added to the global gitignore or merged into this file. For a more nuclear
159 | # option (not recommended) you can uncomment the following to ignore the entire idea folder.
160 | #.idea/
161 |
162 | outputs/
163 | datasets/
164 | logs/
165 | test/
166 | !datasets/download.sh
--------------------------------------------------------------------------------
/src/finetuning/dataset.py:
--------------------------------------------------------------------------------
1 | import torch
2 | from transformers import PreTrainedTokenizer
3 | from datasets import load_dataset
4 | from utils.config import MetaLengthToken
5 | from typing import Dict, Sequence
6 |
7 | IGNORE_INDEX = -100
8 |
9 |
10 | class DataCollatorForSupervisedDataset:
11 | """Collate examples for supervised fine-tuning."""
12 |
13 | def __init__(self, tokenizer: PreTrainedTokenizer):
14 | self.tokenizer = tokenizer
15 |
16 | def __call__(self, instances: Sequence[Dict]) -> Dict[str, torch.Tensor]:
17 | input_ids, labels = tuple(
18 | [instance[key] for instance in instances] for key in ("input_ids", "labels")
19 | )
20 | input_ids = [torch.tensor(x) for x in input_ids]
21 | input_ids = torch.nn.utils.rnn.pad_sequence(
22 | input_ids, batch_first=True, padding_value=self.tokenizer.pad_token_id
23 | )
24 | labels = [torch.tensor(x) for x in labels]
25 | labels = torch.nn.utils.rnn.pad_sequence(
26 | labels, batch_first=True, padding_value=IGNORE_INDEX
27 | )
28 |
29 | return dict(
30 | input_ids=input_ids,
31 | labels=labels,
32 | attention_mask=input_ids.ne(self.tokenizer.pad_token_id),
33 | )
34 |
35 |
36 | def preprocess_template(instruction, mlt, output, tokenizer, template,vanilla):
37 | if vanilla:
38 | mlt = ''
39 | prompts = template.apply_template(instruction, mlt, output)
40 | input_ids = tokenizer.encode(
41 | prompts, truncation=True, max_length=tokenizer.model_max_length
42 | ) # truncation
43 | split_token_idx = None
44 | for i in MetaLengthToken:
45 | i_id = tokenizer.convert_tokens_to_ids(i[0])
46 | if i_id in input_ids:
47 | split_token_idx = input_ids.index(i_id)
48 | if split_token_idx is None:
49 | labels = [IGNORE_INDEX for _ in range(len(input_ids))]
50 | else:
51 | labels = [
52 | input_ids[i] if i >= split_token_idx else IGNORE_INDEX
53 | for i in range(len(input_ids))
54 | ]
55 | # vanilla
56 | if vanilla:
57 | instruction_prompts = template.apply_template_for_instruction(instruction)
58 | instruction_ids = tokenizer.encode(
59 | instruction_prompts, truncation=True, max_length=tokenizer.model_max_length
60 | )
61 | labels = [
62 | input_ids[i] if i >= len(instruction_ids) else IGNORE_INDEX
63 | for i in range(len(input_ids))
64 | ]
65 |
66 | return input_ids, labels
67 |
68 |
69 | def preprocess(examples, tokenizer, template, vanilla):
70 | processed_input_ids, processed_labels = [], []
71 |
72 | instructions, mlts, outputs = (
73 | examples["Instruction"],
74 | examples["mlt"],
75 | examples["output"],
76 | )
77 | for instruction, mlt, output in zip(instructions, mlts, outputs):
78 | input_ids, labels = preprocess_template(
79 | instruction, mlt, output, tokenizer, template, vanilla
80 | )
81 |
82 | processed_input_ids.append(input_ids)
83 | processed_labels.append(labels)
84 |
85 | return {"input_ids": processed_input_ids, "labels": processed_labels}
86 |
87 |
88 | def load_custom_dataset(tokenizer: PreTrainedTokenizer, data_path: str, template, vanilla):
89 | train_datasets = load_dataset("json", data_files=data_path, split="train")
90 |
91 | train_dataset = train_datasets.map(
92 | preprocess,
93 | batched=True,
94 | batch_size=3000,
95 | num_proc=32,
96 | remove_columns=train_datasets.column_names,
97 | keep_in_memory=True,
98 | load_from_cache_file=False,
99 | desc="Running Encoding",
100 | fn_kwargs={"tokenizer": tokenizer, "template": template, "vanilla":vanilla},
101 | )
102 |
103 | torch.distributed.barrier()
104 |
105 | return train_dataset
106 |
--------------------------------------------------------------------------------
/src/exp/cal_elm_rmse.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | from utils.config import LEVEL0, LEVEL1, LEVEL2
3 | from utils import load_jsonl
4 | from utils.count import count_words
5 | from rich.table import Table
6 | from rich.console import Console
7 | from sklearn.metrics import root_mean_squared_error
8 |
9 |
10 | def calculate_rmse(actual, predicted):
11 | """
12 | Calculate the Root Mean Square Error between two arrays using scikit-learn.
13 |
14 | Parameters:
15 | actual (array-like): The array of actual values.
16 | predicted (array-like): The array of predicted values.
17 |
18 | Returns:
19 | float: The calculated RMSE value.
20 | """
21 | # Calculate the RMSE
22 | rmse = root_mean_squared_error(actual, predicted)
23 |
24 | return rmse
25 |
26 |
27 | def elm(list1, list2):
28 | """
29 | Count the number of elements that are the same in both lists at the same positions.
30 |
31 | Parameters:
32 | list1 (list): The first list.
33 | list2 (list): The second list.
34 |
35 | Returns:
36 | int: The count of elements that are the same at the same positions.
37 | """
38 | # Use zip to pair the elements and then check for equality
39 | same_position_count = sum(1 for a, b in zip(list1, list2) if a == b)
40 |
41 | return same_position_count
42 |
43 |
44 | def main(args):
45 | # raw data load
46 | df = load_jsonl(args.dataset_path)
47 | # calculate metric
48 | predicted_lengths = []
49 | target_lengths = []
50 | predicted_lengths_0, predicted_lengths_1, predicted_lengths_2 = [], [], []
51 | target_lengths_0, target_lengths_1, target_lengths_2 = [], [], []
52 | for d in df:
53 | length = count_words(d["output"])
54 | if d["TargetLength"] != ">800":
55 | predicted_lengths.append(length)
56 | target_lengths.append(int(d["TargetLength"]))
57 | if d["TargetLength"] in LEVEL0:
58 | predicted_lengths_0.append(length)
59 | target_lengths_0.append(int(d["TargetLength"]))
60 | elif d["TargetLength"] in LEVEL1:
61 | predicted_lengths_1.append(length)
62 | target_lengths_1.append(int(d["TargetLength"]))
63 | elif d["TargetLength"] in LEVEL2 and d["TargetLength"] != ">800":
64 | predicted_lengths_2.append(length)
65 | target_lengths_2.append(int(d["TargetLength"]))
66 | else:
67 | if d["TargetLength"] != ">800":
68 | raise KeyError
69 | table = Table(show_header=True, header_style="bold magenta")
70 | table.add_column("Model", style="dim", width=12)
71 | table.add_column("Level 0_elm", justify="right")
72 | table.add_column("Level 0_rmse", justify="right")
73 | table.add_column("Level 1_elm", justify="right")
74 | table.add_column("Level 1_rmse", justify="right")
75 | table.add_column("Level 2_elm", justify="right")
76 | table.add_column("Level 2_rmse", justify="right")
77 | table.add_column("All Level_elm", justify="right")
78 | table.add_column("All Level 0_rmse", justify="right")
79 | table.add_row(
80 | args.dataset_path.split("/")[-1][4:],
81 | f"{elm(target_lengths_0,predicted_lengths_0)/len(predicted_lengths_0)*100:.2f}",
82 | f"{calculate_rmse(predicted_lengths_0,target_lengths_0):.2f}",
83 | f"{elm(target_lengths_1,predicted_lengths_1)/len(predicted_lengths_1)*100:.2f}",
84 | f"{calculate_rmse(predicted_lengths_1,target_lengths_1):.2f}",
85 | f"{elm(target_lengths_2,predicted_lengths_2)/len(predicted_lengths_2)*100:.2f}",
86 | f"{calculate_rmse(predicted_lengths_2,target_lengths_2):.2f}",
87 | f"{elm(target_lengths,predicted_lengths)/len(predicted_lengths)*100:.2f}",
88 | f"{calculate_rmse(predicted_lengths,target_lengths):.2f}",
89 | )
90 | console = Console()
91 | console.print(table)
92 | print(f"{elm(target_lengths_0,predicted_lengths_0)/len(predicted_lengths_0)*100:.2f}/{calculate_rmse(predicted_lengths_0,target_lengths_0):.2f}|{elm(target_lengths_1,predicted_lengths_1)/len(predicted_lengths_1)*100:.2f}/{calculate_rmse(predicted_lengths_1,target_lengths_1):.2f}|{elm(target_lengths_2,predicted_lengths_2)/len(predicted_lengths_2)*100:.2f}/{calculate_rmse(predicted_lengths_2,target_lengths_2):.2f}|{elm(target_lengths,predicted_lengths)/len(predicted_lengths)*100:.2f}/{calculate_rmse(predicted_lengths,target_lengths):.2f}|")
93 |
94 |
95 | if __name__ == "__main__":
96 | parser = argparse.ArgumentParser()
97 | parser.add_argument("--dataset_path", type=str, default=None)
98 | args = parser.parse_args()
99 | main(args)
100 |
--------------------------------------------------------------------------------
/src/exp/cal_level_scores.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | from utils.config import RANGE, LEVEL0, LEVEL1, LEVEL2
3 | from utils import load_jsonl
4 | from rich.table import Table
5 | from rich.console import Console
6 | from utils.count import count_words
7 |
8 |
9 | def main(args):
10 | result = RANGE
11 | for key in result:
12 | (
13 | result[key]["PM_in"],
14 | result[key]["PM_out"],
15 | result[key]["FM_in"],
16 | result[key]["FM_out"],
17 | ) = 0, 0, 0, 0
18 | # raw data load
19 | df = load_jsonl(args.dataset_path)
20 | # calculate metric
21 | for d in df:
22 | length = count_words(d["output"])
23 | # PM
24 | if (
25 | length > result[d["TargetLength"]]["PM"][0]
26 | and length <= result[d["TargetLength"]]["PM"][1]
27 | ):
28 | result[d["TargetLength"]]["PM_in"] += 1
29 | else:
30 | result[d["TargetLength"]]["PM_out"] += 1
31 | # FM
32 | if (
33 | length > result[d["TargetLength"]]["FM"][0]
34 | and length <= result[d["TargetLength"]]["FM"][1]
35 | ):
36 | result[d["TargetLength"]]["FM_in"] += 1
37 | else:
38 | result[d["TargetLength"]]["FM_out"] += 1
39 | # level 0
40 | levle0_pm_in, levle0_pm_out, levle0_fm_in, levle0_fm_out = 0, 0, 0, 0
41 | # level 1
42 | levle1_pm_in, levle1_pm_out, levle1_fm_in, levle1_fm_out = 0, 0, 0, 0
43 | # level 2
44 | levle2_pm_in, levle2_pm_out, levle2_fm_in, levle2_fm_out = 0, 0, 0, 0
45 | for key in result:
46 | if key in LEVEL0:
47 | levle0_pm_in += result[key]["PM_in"]
48 | levle0_pm_out += result[key]["PM_out"]
49 | levle0_fm_in += result[key]["FM_in"]
50 | levle0_fm_out += result[key]["FM_out"]
51 | elif key in LEVEL1:
52 | levle1_pm_in += result[key]["PM_in"]
53 | levle1_pm_out += result[key]["PM_out"]
54 | levle1_fm_in += result[key]["FM_in"]
55 | levle1_fm_out += result[key]["FM_out"]
56 | elif key in LEVEL2:
57 | levle2_pm_in += result[key]["PM_in"]
58 | levle2_pm_out += result[key]["PM_out"]
59 | levle2_fm_in += result[key]["FM_in"]
60 | levle2_fm_out += result[key]["FM_out"]
61 | # draw table
62 | table = Table(show_header=True, header_style="bold magenta")
63 | table.add_column("Level", style="dim", width=12)
64 | table.add_column("PM_in", justify="right")
65 | table.add_column("PM_out", justify="right")
66 | table.add_column("PM", justify="right")
67 | table.add_column("FM_in", justify="right")
68 | table.add_column("FM_out", justify="right")
69 | table.add_column("FM", justify="right")
70 | table.add_row(
71 | "Level:0",
72 | f"{levle0_pm_in}",
73 | f"{levle0_pm_out}",
74 | f"{levle0_pm_in/(levle0_pm_in + levle0_pm_out)*100:.2f}",
75 | f"{levle0_fm_in}",
76 | f"{levle0_fm_out}",
77 | f"{levle0_fm_in/(levle0_fm_in + levle0_fm_out)*100:.2f}",
78 | )
79 | table.add_row(
80 | "Level:1",
81 | f"{levle1_pm_in}",
82 | f"{levle1_pm_out}",
83 | f"{levle1_pm_in/(levle1_pm_in + levle1_pm_out)*100:.2f}",
84 | f"{levle1_fm_in}",
85 | f"{levle1_fm_out}",
86 | f"{levle1_fm_in/(levle1_fm_in + levle1_fm_out)*100:.2f}",
87 | )
88 | table.add_row(
89 | "Level:2",
90 | f"{levle2_pm_in}",
91 | f"{levle2_pm_out}",
92 | f"{levle2_pm_in/(levle2_pm_in + levle2_pm_out)*100:.2f}",
93 | f"{levle2_fm_in}",
94 | f"{levle2_fm_out}",
95 | f"{levle2_fm_in/(levle2_fm_in + levle2_fm_out)*100:.2f}",
96 | )
97 | table.add_row(
98 | "All Level",
99 | f"{levle0_pm_in +levle1_pm_in + levle2_pm_in}",
100 | f"{levle0_pm_out+ levle1_pm_out + levle2_pm_out}",
101 | f"{(levle0_pm_in +levle1_pm_in + levle2_pm_in)/(levle0_pm_in +levle1_pm_in + levle2_pm_in + levle0_pm_out+ levle1_pm_out + levle2_pm_out)*100:.2f}",
102 | f"{levle0_fm_in +levle1_fm_in + levle2_fm_in}",
103 | f"{levle0_fm_out+ levle1_fm_out + levle2_fm_out}",
104 | f"{(levle0_fm_in +levle1_fm_in + levle2_fm_in)/(levle0_fm_in +levle1_fm_in + levle2_fm_in + levle0_fm_out+ levle1_fm_out + levle2_fm_out)*100:.2f}",
105 | )
106 | console = Console()
107 | console.print(table)
108 |
109 |
110 | if __name__ == "__main__":
111 | parser = argparse.ArgumentParser()
112 | parser.add_argument("--dataset_path", type=str, default=None)
113 | args = parser.parse_args()
114 | main(args)
115 |
--------------------------------------------------------------------------------
/src/finetuning/finetune.py:
--------------------------------------------------------------------------------
1 | import random
2 | import torch
3 | import transformers
4 |
5 | from transformers import AutoModelForCausalLM, AutoTokenizer, HfArgumentParser, Trainer
6 | from dataclasses import dataclass, field
7 |
8 | from dataset import load_custom_dataset, DataCollatorForSupervisedDataset
9 | from utils.config import MetaLengthToken
10 | from utils.templates import TemplatesMapping
11 |
12 |
13 | @dataclass
14 | class ModelArguments:
15 | model_name_or_path: str = field(
16 | default="",
17 | metadata={"help": "The model checkpoint for weights initialization."},
18 | )
19 | template: str = field(default="", metadata={"help": "The template used to train"})
20 |
21 |
22 | @dataclass
23 | class DataArguments:
24 | data_path: str = field(
25 | default=None, metadata={"help": "Path to the training data."}
26 | )
27 |
28 |
29 | @dataclass
30 | class TrainingArguments(transformers.TrainingArguments):
31 | vanilla: bool = field(
32 | default=False,
33 | metadata={"help": "Vanilla finetuning or Ruler finetuning, defaulty is False."},
34 | )
35 | model_max_length: int = field(
36 | default=2048,
37 | metadata={
38 | "help": "Maximum sequence length. Sequences will be right padded (and possibly truncated)."
39 | },
40 | )
41 | gradient_checkpointing_kwargs: dict = field(
42 | default_factory=lambda: {"use_reentrant": False},
43 | metadata={"help": "gradient checkpointing kwargs"},
44 | )
45 |
46 |
47 | if __name__ == "__main__":
48 | parser = HfArgumentParser((ModelArguments, DataArguments, TrainingArguments))
49 | model_args, data_args, training_args = parser.parse_args_into_dataclasses()
50 | print(training_args.vanilla)
51 | if training_args.local_rank == 0:
52 | print("=" * 100)
53 | print(training_args)
54 |
55 | if training_args.local_rank == 0:
56 | print("> Loading tokenizer from {}".format(model_args.model_name_or_path))
57 |
58 | tokenizer = AutoTokenizer.from_pretrained(
59 | model_args.model_name_or_path,
60 | model_max_length=training_args.model_max_length,
61 | padding_side="right",
62 | truncation_side="right",
63 | use_fast=True,
64 | trust_remote_code=True,
65 | )
66 | template = TemplatesMapping[model_args.template]
67 | # add special tokens
68 | if training_args.vanilla:
69 | special_tokens = {"additional_special_tokens": [t for t in template.SPECIAL_TOKENS]}
70 | elif model_args.template == 'custom':
71 | special_tokens = {"additional_special_tokens": [t for t in template.SPECIAL_TOKENS + [m[0]for m in MetaLengthToken]]}
72 | else:
73 | special_tokens = {"additional_special_tokens": [t[0] for t in MetaLengthToken]}
74 | print(f"> New special tokens: {special_tokens}")
75 | tokenizer.add_special_tokens(special_tokens)
76 | for st in special_tokens["additional_special_tokens"]:
77 | print(f"{st}:{tokenizer.convert_tokens_to_ids(st)}")
78 |
79 | tokenizer.pad_token = (
80 | tokenizer.eos_token if tokenizer.pad_token is None else tokenizer.pad_token
81 | )
82 | if training_args.local_rank == 0:
83 | print("> PAD Token:", tokenizer.pad_token, tokenizer.pad_token_id)
84 | print("> BOS Token", tokenizer.bos_token, tokenizer.bos_token_id)
85 | print("> EOS Token", tokenizer.eos_token, tokenizer.eos_token_id)
86 |
87 | if training_args.local_rank == 0:
88 | print("> Loading model from {}".format(model_args.model_name_or_path))
89 |
90 | if "glm-4" in model_args.model_name_or_path: # glm-4 not support flash attention 2s
91 | model = AutoModelForCausalLM.from_pretrained(
92 | model_args.model_name_or_path,
93 | torch_dtype=torch.bfloat16,
94 | trust_remote_code=True,
95 | )
96 | else:
97 | model = AutoModelForCausalLM.from_pretrained(
98 | model_args.model_name_or_path,
99 | attn_implementation="flash_attention_2",
100 | torch_dtype=torch.bfloat16,
101 | trust_remote_code=True,
102 | )
103 | model.resize_token_embeddings(len(tokenizer))
104 | train_dataset = load_custom_dataset(
105 | tokenizer=tokenizer,
106 | data_path=data_args.data_path,
107 | template=template,
108 | vanilla=training_args.vanilla,
109 | )
110 |
111 | if training_args.local_rank == 0:
112 | print("> Training dataset samples:", len(train_dataset))
113 | for index in random.sample(range(len(train_dataset)), 3):
114 | print("=" * 100)
115 | print(
116 | f"Sample {index} of the training set:\n{tokenizer.decode(list(train_dataset[index]['input_ids']))}"
117 | )
118 | print(f"{train_dataset[index]['input_ids']}")
119 | print(f"{train_dataset[index]['labels']}")
120 |
121 | data_collator = DataCollatorForSupervisedDataset(tokenizer=tokenizer)
122 |
123 | trainer = Trainer(
124 | model=model,
125 | tokenizer=tokenizer,
126 | args=training_args,
127 | train_dataset=train_dataset,
128 | data_collator=data_collator,
129 | )
130 |
131 | trainer.train(resume_from_checkpoint=training_args.resume_from_checkpoint)
132 |
--------------------------------------------------------------------------------
/src/data_process/build_training_dataset.py:
--------------------------------------------------------------------------------
1 | import os
2 | import nlp
3 | import random
4 | import argparse
5 | import pandas as pd
6 | from utils import load_jsonl, save_jsonl
7 | from utils.config import MetaLengthToken, SAMPLE
8 | from utils.count import count_words
9 |
10 |
11 |
12 |
13 | def list_files(directory):
14 | return [
15 | f for f in os.listdir(directory) if os.path.isfile(os.path.join(directory, f))
16 | ]
17 |
18 |
19 | def add_MLT(instruction: str):
20 | result = None
21 | word_count = count_words(instruction)
22 | for mlt in MetaLengthToken:
23 | if word_count > mlt[1][0] and word_count <= mlt[1][1]:
24 | result = mlt[0]
25 | return result
26 |
27 |
28 | def process_OpenHermes(dataset_path, random_seed, num):
29 | # set random seed
30 | random.seed(random_seed)
31 | df = load_jsonl(dataset_path)
32 | random.shuffle(df)
33 | print(f"{'='*10}First data in TLG Dataset{'='*10}")
34 | print(df[0]["conversations"][0]["value"])
35 | print(f"{'='*10}Last data in TLG Dataset{'='*10}")
36 | print(df[num - 1]["conversations"][0]["value"])
37 | print("=" * 20)
38 | df = df[num:] # cut off the FLCG exp dataset
39 | # sampled data
40 | sampled_data = {key[0]: [] for key in MetaLengthToken}
41 | for idx in range(len(df)):
42 | d = {}
43 | d["Instruction"] = df[idx]["conversations"][0]["value"]
44 | d["word_count"] = len(df[idx]["conversations"][1]["value"].split())
45 | d["output"] = df[idx]["conversations"][1]["value"]
46 | d["mlt"] = add_MLT(df[idx]["conversations"][1]["value"])
47 | if d["mlt"] is not None:
48 | sampled_data[d["mlt"]].append(d)
49 | return sampled_data
50 |
51 |
52 | def process_longform(dir_path):
53 | # sampled data
54 | sampled_data = {key[0]: [] for key in MetaLengthToken}
55 | longform_files = list_files(dir_path)
56 | for file in longform_files:
57 | df = pd.read_parquet(f"{dir_path}/{file}")
58 | for idx in range(df.shape[0]):
59 | d = {}
60 | d["Instruction"] = df.iloc[idx]["input"]
61 | d["word_count"] = len(df.iloc[idx]["output"].split())
62 | d["output"] = df.iloc[idx]["output"]
63 | d["mlt"] = add_MLT(df.iloc[idx]["output"])
64 | if d["mlt"] is not None:
65 | sampled_data[d["mlt"]].append(d)
66 | return sampled_data
67 |
68 |
69 | def process_eli5():
70 | # sampled data
71 | sampled_data = {key[0]: [] for key in MetaLengthToken}
72 | eli5 = nlp.load_dataset("eli5")
73 | files = ["train_eli5", "test_eli5", "validation_eli5"]
74 | for file in files:
75 | for data in eli5[file]:
76 | d = {}
77 | d["Instruction"] = data["title"]
78 | answer = ""
79 | for i in data["answers"]["text"]:
80 | if len(i.split()) > len(answer.split()):
81 | answer = i
82 | d["word_count"] = len(answer.split())
83 | d["output"] = answer
84 | d["mlt"] = add_MLT(answer)
85 | if d["mlt"] is not None:
86 | sampled_data[d["mlt"]].append(d)
87 | return sampled_data
88 |
89 |
90 | def main(args):
91 | sampled_data = {key[0]: [] for key in MetaLengthToken}
92 | # OpenHermes2.5
93 | openhermes_data = process_OpenHermes(args.dataset_path, args.random_seed, args.num)
94 | print(f"{'='*10}OpenHermes2.5 dataset{'='*10}")
95 | for key in openhermes_data:
96 | random.shuffle(openhermes_data[key])
97 | data_num = min(len(openhermes_data[key]), SAMPLE[key] - len(sampled_data[key]))
98 | sampled_data[key] += openhermes_data[key][:data_num]
99 | print(f"{key}-{len(openhermes_data[key])}-take {data_num}.")
100 | # Long Form
101 | longform_data = process_longform(args.longform_dir)
102 | print(f"{'='*10}LongForm dataset{'='*10}")
103 | for key in longform_data:
104 | random.shuffle(longform_data[key])
105 | data_num = min(len(longform_data[key]), SAMPLE[key] - len(sampled_data[key]))
106 | sampled_data[key] += longform_data[key][:data_num]
107 | print(f"{key}-{len(longform_data[key])}-take {data_num}")
108 | # ELI5
109 | eli5_data = process_eli5()
110 | print(f"{'='*10}ELI5 dataset{'='*10}")
111 | for key in eli5_data:
112 | random.shuffle(eli5_data[key])
113 | data_num = min(len(eli5_data[key]), SAMPLE[key] - len(sampled_data[key]))
114 | sampled_data[key] += eli5_data[key][:data_num]
115 | print(f"{key}-{len(eli5_data[key])}-take {data_num}")
116 | print(f"{'='*10}FINAL{'='*10}")
117 | data = []
118 | for key in sampled_data:
119 | data += sampled_data[key]
120 | print(f"{key}-{len(sampled_data[key])}")
121 | random.shuffle(data)
122 | global_id = 0
123 | for d in data:
124 | d["id"] = global_id
125 | global_id += 1
126 | print(f"Total:{global_id}")
127 | # save to output_path
128 | save_jsonl(args.output_path, data)
129 |
130 |
131 | if __name__ == "__main__":
132 | parser = argparse.ArgumentParser()
133 | parser.add_argument("--dataset_path", type=str, default=None)
134 | parser.add_argument("--longform_dir", type=str, default=None)
135 | parser.add_argument("--num", type=int, default=None)
136 | parser.add_argument("--random_seed", type=int, default=10)
137 | parser.add_argument("--output_path", type=str, default=None)
138 | args = parser.parse_args()
139 | main(args)
140 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Ruler: A Model-Agnostic Method to Control Generated Length for Large Language Models
2 |
3 |
4 |
5 | 
6 |
7 | ## 🤩 Release
8 | - [2024/09/20] 🥳 [Ruler](https://arxiv.org/abs/2409.18943) is accepted by EMNLP 2024 Findings.
9 |
10 | ## 😎 Overview
11 |
12 | **Ruler** a novel, model-agnostic approach employs Meta Length Tokens (*MLTs*) to enhance the instruction-following ability of LLMs under length-constrained instructions
13 |
14 | **Ruler** equips LLMs with the ability to generate responses of a target length. Moreover, it can automatically generate appropriate *MLT* when not target length is provided. Comprehensive experments show the effectiveness of **Ruler** across different LLMs.
15 |
16 | ## 🧐 Quickstart
17 |
18 | We also provide a more [detailed experiments document](./experiments.md) (specific to each experiment and including all the results!).
19 |
20 | ### Prepare Environment
21 |
22 | First, you should set up a python environment. This code base has been tested under python 3.x, and we officially support python 3.10.
23 | ```bash
24 | conda create -n ruler python=3.10
25 | cd Ruler # where contains 'requirements.txt'
26 | pip install -r requirements.txt
27 |
28 | export PYTHONPATH=xxxx/Ruler/src
29 | cd src
30 |
31 | # create folders and download datasets
32 | bash ../scripts/download.sh
33 | ```
34 | ### Target Length Generation Task
35 |
36 | **Closed-source Model**
37 |
38 | ```shell
39 | python exp/run_exp_api.py\
40 | --dataset_path ../datasets/tlg_dataset.jsonl\
41 | --model \
42 | --output_path ../outputs/tlg/tlg_.jsonl
43 | --key
44 | ```
45 |
46 | **Open-source Model**
47 |
48 | ```shell
49 | python exp/run_exp.py\
50 | --dataset_path ../datasets/tlg_dataset.jsonl\
51 | --model_name_or_path \
52 | --output_path ../outputs/tlg/tlg_.jsonl
53 | ```
54 |
55 | **Calculate scores**
56 |
57 | Different `Levels`:
58 |
59 | ```shell
60 | python exp/cal_level_scores.py\
61 | --dataset_path
62 | ```
63 |
64 | Different `MLT`:
65 |
66 | ```shell
67 | python exp/cal_mlt_scores.py\
68 | --dataset_path
69 | ```
70 |
71 | 
72 |
73 | ### Ruler
74 |
75 | Finetuning scripts:
76 | ```shell
77 | export CUDA_VISIBLE_DEVICES=0,1,2,3
78 |
79 | find_free_port() {
80 | while :
81 | do
82 | PORT=$(( ( RANDOM % 64512 ) + 1024 ))
83 | (echo >/dev/tcp/localhost/$PORT) >/dev/null 2>&1
84 | if [ $? -ne 0 ]; then
85 | echo $PORT
86 | return
87 | fi
88 | done
89 | }
90 |
91 | export MASTER_PORT=$(find_free_port)
92 |
93 | LEARNING_RATE=2e-5
94 | NUM_TRAIN_EPOCHS=3
95 | VANILLA=False
96 |
97 | MODEL_NAME_OR_PATH=
98 | echo "Finetune from: ${MODEL_NAME_OR_PATH}"
99 | MODEL=${MODEL_NAME_OR_PATH##*/}
100 |
101 | TEMPLATE=custom
102 | echo "Finetune data template: ${TEMPLATE}"
103 |
104 | DATA_PATH=../datasets/ruler_training_dataset.jsonl
105 | echo "Finetune data path: ${DATA_PATH}"
106 |
107 | MODEL_MAX_LENGTH=2048
108 | echo "Model max length: ${MODEL_MAX_LENGTH}"
109 |
110 | BATCH_SIZE=4
111 | echo "Per device train batch size: ${BATCH_SIZE}"
112 |
113 | GRAD_ACCUM=8
114 | echo "Gradient accumulation steps: ${GRAD_ACCUM}"
115 |
116 | OUTPUT_DIR="../outputs/checkpoints/ruler_${MODEL}_bs_${BATCH_SIZE}_ga_${GRAD_ACCUM}_lr_${LEARNING_RATE}_eps_${NUM_TRAIN_EPOCHS}"
117 | LOG_DIR=../logs
118 |
119 | deepspeed --master_port=$MASTER_PORT finetuning/finetune.py \
120 | --vanilla $VANILLA \
121 | --deepspeed ../configs/ds_config_zero3.json \
122 | --model_name_or_path $MODEL_NAME_OR_PATH \
123 | --template $TEMPLATE\
124 | --model_max_length $MODEL_MAX_LENGTH \
125 | --data_path $DATA_PATH \
126 | --output_dir $OUTPUT_DIR \
127 | --bf16 True \
128 | --tf32 True \
129 | --per_device_train_batch_size ${BATCH_SIZE} \
130 | --gradient_accumulation_steps ${GRAD_ACCUM} \
131 | --gradient_checkpointing True \
132 | --lr_scheduler_type cosine \
133 | --learning_rate ${LEARNING_RATE} \
134 | --warmup_ratio 0.05 \
135 | --num_train_epochs ${NUM_TRAIN_EPOCHS} \
136 | --evaluation_strategy no \
137 | --save_strategy epoch \
138 | --save_total_limit 1 \
139 | --logging_steps 5 \
140 | 2>&1 | tee ${LOG_DIR}/output_ruler_${MODEL}.log
141 | ```
142 |
143 | 
144 |
145 | ### Multi MLT Generation Experiment
146 |
147 | **Run exp:**
148 |
149 | ```shell
150 | python exp/run_exp.py\
151 | --dataset_path ../data/multi_mlt.jsonl\
152 | --model_name_or_path \
153 | --gpus 1\
154 | --template \
155 | --output_path ../outputs/multi_mlt/mmlt_.jsonl
156 | ```
157 |
158 | **Calculate scores**
159 |
160 | ```shell
161 | python exp/analysis_mmlt.py\
162 | --dataset_path ../outputs/multi_mlt/mmlt_.jsonl
163 | ```
164 |
165 | 
166 |
167 | ### Self-generated MLT Experiment
168 |
169 | **Run exp:**
170 |
171 | ```shell
172 | python exp/run_exp.py\
173 | --dataset_path ../datasets/self_generated_mlt.jsonl\
174 | --model_name_or_path \
175 | --gpus 1\
176 | --template custom\
177 | --output_path ../outputs/self_generated_mlt/sgm_.jsonl
178 | ```
179 |
180 | **Calculate scores**
181 |
182 | ```shell
183 | python exp/analysis_sgm.py\
184 | --dataset_path ../outputs/self_generated_mlt/sgm_.jsonl
185 | ```
186 |
187 | 
188 |
189 | ### Overperformance
190 |
191 | `lm_eval` script:
192 |
193 | ```shell
194 | set -ex
195 | export NUMEXPR_MAX_THREADS=128
196 |
197 | MODEL=vllm
198 | MODEL_NAME=
199 | MODEL_NAME_OR_PATH=
200 | OUTPUT_PATH=../outputs/overall_performance/${MODEL_NAME}
201 | TOKENIZER_MODE=auto
202 | NUM_GPUS=1
203 | GPU_MEMORY_UTILIZATION=0.8
204 |
205 | mkdir -p $OUTPUT_PATH
206 |
207 | lm_eval --model $MODEL \
208 | --model_args pretrained=${MODEL_NAME_OR_PATH},trust_remote_code=True,tokenizer_mode=${TOKENIZER_MODE},tensor_parallel_size=${NUM_GPUS},dtype=auto,gpu_memory_utilization=${GPU_MEMORY_UTILIZATION} \
209 | --tasks ai2_arc \
210 | --device cuda \
211 | --output_path ${OUTPUT_PATH}/${MODEL}_eval_ai2_arc \
212 | --batch_size 1 \
213 | --num_fewshot 25 \
214 | --write_out \
215 | 2>&1 | tee ${OUTPUT_PATH}/${MODEL}_eval_ai2_arc.log
216 |
217 | lm_eval --model $MODEL \
218 | --model_args pretrained=${MODEL_NAME_OR_PATH},trust_remote_code=True,tokenizer_mode=${TOKENIZER_MODE},tensor_parallel_size=${NUM_GPUS},dtype=auto,gpu_memory_utilization=${GPU_MEMORY_UTILIZATION} \
219 | --tasks hellaswag \
220 | --device cuda \
221 | --output_path ${OUTPUT_PATH}/${MODEL}_eval_hellaswag \
222 | --batch_size 1 \
223 | --num_fewshot 10 \
224 | --write_out \
225 | 2>&1 | tee ${OUTPUT_PATH}/${MODEL}_eval_hellaswag.log
226 |
227 | lm_eval --model $MODEL \
228 | --model_args pretrained=${MODEL_NAME_OR_PATH},trust_remote_code=True,tokenizer_mode=${TOKENIZER_MODE},tensor_parallel_size=${NUM_GPUS},dtype=auto,gpu_memory_utilization=${GPU_MEMORY_UTILIZATION} \
229 | --tasks truthfulqa \
230 | --device cuda \
231 | --output_path ${OUTPUT_PATH}/${MODEL}_eval_truthfulqa \
232 | --batch_size 1 \
233 | --num_fewshot 0 \
234 | --write_out \
235 | 2>&1 | tee ${OUTPUT_PATH}/${MODEL}_eval_truthfulqa.log
236 |
237 | lm_eval --model $MODEL \
238 | --model_args pretrained=${MODEL_NAME_OR_PATH},trust_remote_code=True,tokenizer_mode=${TOKENIZER_MODE},tensor_parallel_size=${NUM_GPUS},dtype=auto,gpu_memory_utilization=${GPU_MEMORY_UTILIZATION} \
239 | --tasks mmlu \
240 | --device cuda \
241 | --output_path ${OUTPUT_PATH}/${MODEL}_eval_mmlu \
242 | --batch_size 1 \
243 | --num_fewshot 5 \
244 | --write_out \
245 | 2>&1 | tee ${OUTPUT_PATH}/${MODEL}_eval_mmlu.log
246 |
247 | lm_eval --model $MODEL \
248 | --model_args pretrained=${MODEL_NAME_OR_PATH},trust_remote_code=True,tokenizer_mode=${TOKENIZER_MODE},tensor_parallel_size=${NUM_GPUS},dtype=auto,gpu_memory_utilization=${GPU_MEMORY_UTILIZATION} \
249 | --tasks winogrande \
250 | --device cuda \
251 | --output_path ${OUTPUT_PATH}/${MODEL}_eval_winogrande \
252 | --batch_size 1 \
253 | --num_fewshot 5 \
254 | --write_out \
255 | 2>&1 | tee ${OUTPUT_PATH}/${MODEL}_eval_winogrande.log
256 |
257 | lm_eval --model $MODEL \
258 | --model_args pretrained=${MODEL_NAME_OR_PATH},trust_remote_code=True,tokenizer_mode=${TOKENIZER_MODE},tensor_parallel_size=${NUM_GPUS},dtype=auto,gpu_memory_utilization=${GPU_MEMORY_UTILIZATION} \
259 | --tasks gsm8k \
260 | --device cuda \
261 | --output_path ${OUTPUT_PATH}/${MODEL}_eval_gsm8k \
262 | --batch_size 1 \
263 | --num_fewshot 5 \
264 | --write_out \
265 | 2>&1 | tee ${OUTPUT_PATH}/${MODEL}_eval_gsm8k.log
266 | ```
267 |
268 | 
269 |
270 | ## 😘 Citation
271 |
272 | If you finding our work interesting or helpful to you, please cite this repo.
273 |
274 | ```plain
275 | @misc{li2024rulermodelagnosticmethodcontrol,
276 | title={Ruler: A Model-Agnostic Method to Control Generated Length for Large Language Models},
277 | author={Jiaming Li and Lei Zhang and Yunshui Li and Ziqiang Liu and yuelin bai and Run Luo and Longze Chen and Min Yang},
278 | year={2024},
279 | eprint={2409.18943},
280 | archivePrefix={arXiv},
281 | primaryClass={cs.CL},
282 | url={https://arxiv.org/abs/2409.18943},
283 | }
284 | ```
285 |
286 | ## 🫡 Contact
287 | If you have any questions, feel free to contact us at `jm.li4@siat.ac.cn`
--------------------------------------------------------------------------------
/src/utils/templates.py:
--------------------------------------------------------------------------------
1 | from utils.config import MetaLengthToken
2 |
3 | # default templates
4 |
5 |
6 | class ModelTemplate:
7 | STOP_TOKENS = {
8 | "mistral": [""],
9 | "glm": ["<|endoftext|>", "<|user|>", "<|observation|>"],
10 | "gemma": ["", ""],
11 | "llama": ["<|end_of_text|>", "<|eot_id|>"],
12 | "internlm": ["", "<|im_end|>"],
13 | "deepseek": ["<|end▁of▁sentence|>"],
14 | "yi": ["<|im_end|>", "<|endoftext|>"],
15 | "qwen": ["<|im_end|>", "<|endoftext|>"],
16 | }
17 |
18 | @staticmethod
19 | def get_stop_tokens(model_name: str):
20 | for key in ModelTemplate.STOP_TOKENS:
21 | if key in model_name.lower():
22 | return ModelTemplate.STOP_TOKENS[key]
23 | raise KeyError
24 |
25 | @staticmethod
26 | def apply_template_for_generation(instruction, targetlength, tokenizer):
27 | if targetlength != "":
28 | targetlength = targetlength.replace(">", "more than ")
29 | question = f"{instruction}\nThe response should have a word count of {targetlength} words."
30 | else:
31 | question = instruction
32 | messages = [
33 | {"role": "user", "content": question},
34 | ]
35 | prompt = tokenizer.apply_chat_template(
36 | messages, tokenize=False, add_generation_prompt=True
37 | )
38 | return prompt
39 |
40 |
41 | # MLT templates
42 |
43 |
44 | class Llama3_MLT_Template:
45 | """
46 | messages = [
47 | {"role": "user", "content": instruction},
48 | {"role": "assistant", "content": mlt+output},
49 | ]
50 |
51 | <|begin_of_text|><|start_header_id|>user<|end_header_id|>\n\n{instruction}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n{mlt}{output}<|eot_id|>
52 | """
53 |
54 | BOS_TOKEN: str = "<|begin_of_text|>"
55 | EOS_TOKEN: str = "<|end_of_text|>"
56 | STOP_TOKENS: list[str] = ["<|end_of_text|>", "<|eot_id|>"] + [
57 | MLT[0] for MLT in MetaLengthToken
58 | ]
59 |
60 | @staticmethod
61 | def apply_template(instruction, mlt, output):
62 | prompt = f"<|begin_of_text|><|start_header_id|>user<|end_header_id|>\n\n{instruction}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n{mlt}{output}<|eot_id|>"
63 | return prompt
64 |
65 | @staticmethod
66 | def apply_template_for_generation(instruction, targetlength=""):
67 | mlt = f"[MLT:{targetlength}]" if targetlength != "" else ""
68 | prompt = f"<|begin_of_text|><|start_header_id|>user<|end_header_id|>\n\n{instruction}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n{mlt}"
69 | return prompt
70 |
71 |
72 | class Qwen_MLT_Template:
73 | """
74 | messages = [
75 | {"role": "user", "content": instruction},
76 | {"role": "assistant", "content": mlt+output},
77 | ]
78 |
79 | <|im_start|>system\nYou are a helpful assistant.<|im_end|>\n<|im_start|>user\n{instruction}<|im_end|>\n<|im_start|>assistant\n{mlt}{output}<|im_end|>
80 | """
81 |
82 | BOS_TOKEN: str = None
83 | EOS_TOKEN: str = "<|im_end|>"
84 | STOP_TOKENS: list[str] = [
85 | "<|im_end|>",
86 | "<|endoftext|>",
87 | ] + [MLT[0] for MLT in MetaLengthToken]
88 |
89 | @staticmethod
90 | def apply_template(instruction, mlt, output):
91 | prompt = f"<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n<|im_start|>user\n{instruction}<|im_end|>\n<|im_start|>assistant\n{mlt}{output}<|im_end|>"
92 | return prompt
93 |
94 | @staticmethod
95 | def apply_template_for_generation(instruction, targetlength=""):
96 | mlt = f"[MLT:{targetlength}]" if targetlength != "" else ""
97 | prompt = f"<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n<|im_start|>user\n{instruction}<|im_end|>\n<|im_start|>assistant\n{mlt}"
98 | return prompt
99 |
100 |
101 | class Yi_MLT_Template:
102 | """
103 | messages = [
104 | {"role": "user", "content": instruction},
105 | {"role": "assistant", "content": mlt+output},
106 | ]
107 |
108 | <|im_start|>user\n{instruction}<|im_end|>\n<|im_start|>assistant\n{mlt}{output}<|im_end|>
109 | """
110 |
111 | BOS_TOKEN: str = "<|startoftext|>"
112 | EOS_TOKEN: str = "<|im_end|>"
113 | STOP_TOKENS: list[str] = [
114 | "<|im_end|>",
115 | "<|endoftext|>",
116 | ] + [MLT[0] for MLT in MetaLengthToken]
117 |
118 | @staticmethod
119 | def apply_template(instruction, mlt, output):
120 | prompt = f"<|im_start|>user\n{instruction}<|im_end|>\n<|im_start|>assistant\n{mlt}{output}<|im_end|>"
121 | return prompt
122 |
123 | @staticmethod
124 | def apply_template_for_generation(instruction, targetlength=""):
125 | mlt = f"[MLT:{targetlength}]" if targetlength != "" else ""
126 | prompt = (
127 | f"<|im_start|>user\n{instruction}<|im_end|>\n<|im_start|>assistant\n{mlt}"
128 | )
129 | return prompt
130 |
131 |
132 | class internlm_MLT_Template:
133 | """
134 | messages = [
135 | {"role": "user", "content": instruction},
136 | {"role": "assistant", "content": mlt+output},
137 | ]
138 |
139 | <|im_start|>user\n{instruction}<|im_end|>\n<|im_start|>assistant\n{mlt}{output}<|im_end|>
140 | """
141 |
142 | BOS_TOKEN: str = ""
143 | EOS_TOKEN: str = ""
144 | STOP_TOKENS: list[str] = ["", "<|im_end|>"] + [
145 | MLT[0] for MLT in MetaLengthToken
146 | ]
147 |
148 | @staticmethod
149 | def apply_template(instruction, mlt, output):
150 | prompt = f"<|im_start|>user\n{instruction}<|im_end|>\n<|im_start|>assistant\n{mlt}{output}<|im_end|>"
151 | return prompt
152 |
153 | @staticmethod
154 | def apply_template_for_generation(instruction, targetlength=""):
155 | mlt = f"[MLT:{targetlength}]" if targetlength != "" else ""
156 | prompt = f"<|im_start|>user\n{instruction}<|im_end|>\n<|im_start|>assistant\n{mlt}"
157 | return prompt
158 |
159 |
160 | class glm_MLT_Template:
161 | """
162 | messages = [
163 | {"role": "user", "content": instruction},
164 | {"role": "assistant", "content": mlt+output},
165 | ]
166 |
167 | [gMASK]<|user|>\ninstruction<|assistant|>\nmlt+output
168 | """
169 |
170 | BOS_TOKEN: str = "[gMASK]"
171 | EOS_TOKEN: str = "<|endoftext|>"
172 | STOP_TOKENS: list[str] = [
173 | "<|endoftext|>",
174 | "<|user|>",
175 | "<|observation|>",
176 | ] + [MLT[0] for MLT in MetaLengthToken]
177 |
178 | @staticmethod
179 | def apply_template(instruction, mlt, output):
180 | prompt = f"[gMASK]<|user|>\n{instruction}<|assistant|>\n{mlt}{output}"
181 | return prompt
182 |
183 | @staticmethod
184 | def apply_template_for_generation(instruction, targetlength=""):
185 | mlt = f"[MLT:{targetlength}]" if targetlength != "" else ""
186 | prompt = f"[gMASK]<|user|>\n{instruction}<|assistant|>\n{mlt}"
187 | return prompt
188 |
189 |
190 | class deepseek_MLT_Template:
191 | """
192 | messages = [
193 | {"role": "user", "content": instruction},
194 | {"role": "assistant", "content": mlt+output},
195 | ]
196 |
197 | <|begin▁of▁sentence|>User: INSTRUCTION\n\nAssistant: MLT+OUTPUT<|end▁of▁sentence|>
198 | """
199 |
200 | BOS_TOKEN: str = "<|begin▁of▁sentence|>"
201 | EOS_TOKEN: str = "<|end▁of▁sentence|>"
202 | STOP_TOKENS: list[str] = [
203 | "<|end▁of▁sentence|>",
204 | ] + [MLT[0] for MLT in MetaLengthToken]
205 |
206 | @staticmethod
207 | def apply_template(instruction, mlt, output):
208 | prompt = f"<|begin▁of▁sentence|>User: {instruction}\n\nAssistant: {mlt}{output}<|end▁of▁sentence|>"
209 | return prompt
210 |
211 | @staticmethod
212 | def apply_template_for_generation(instruction, targetlength=""):
213 | mlt = f"[MLT:{targetlength}]" if targetlength != "" else ""
214 | prompt = f"<|begin▁of▁sentence|>User: {instruction}\n\nAssistant: {mlt}"
215 | return prompt
216 |
217 |
218 | class gemma_MLT_Template:
219 | """
220 | messages = [
221 | {"role": "user", "content": instruction},
222 | {"role": "assistant", "content": mlt+output},
223 | ]
224 |
225 | user\nINSTRUCTION\nmodel\nMLT+OUTPUT
226 | """
227 |
228 | BOS_TOKEN: str = ""
229 | EOS_TOKEN: str = ""
230 | STOP_TOKENS: list[str] = [
231 | "",
232 | "",
233 | ] + [MLT[0] for MLT in MetaLengthToken]
234 |
235 | @staticmethod
236 | def apply_template(instruction, mlt, output):
237 | prompt = f"user\n{instruction}\nmodel\n{mlt}{output}"
238 | return prompt
239 |
240 | @staticmethod
241 | def apply_template_for_generation(instruction, targetlength=""):
242 | mlt = f"[MLT:{targetlength}]" if targetlength != "" else ""
243 | prompt = f"user\n{instruction}\nmodel\n{mlt}"
244 | return prompt
245 |
246 |
247 | class mistral_MLT_Template:
248 | """
249 | messages = [
250 | {"role": "user", "content": instruction},
251 | {"role": "assistant", "content": mlt+output},
252 | ]
253 |
254 | [INST] INSTRUCTION [/INST]MLT+OUTPUT
255 | """
256 |
257 | BOS_TOKEN: str = ""
258 | EOS_TOKEN: str = ""
259 | STOP_TOKENS: list[str] = [
260 | "",
261 | ] + [MLT[0] for MLT in MetaLengthToken]
262 |
263 | @staticmethod
264 | def apply_template(instruction, mlt, output):
265 | prompt = f"[INST] {instruction} [/INST]{mlt}{output}"
266 | return prompt
267 |
268 | @staticmethod
269 | def apply_template_for_generation(instruction, targetlength=""):
270 | mlt = f"[MLT:{targetlength}]" if targetlength != "" else ""
271 | prompt = f"[INST] {instruction} [/INST]{mlt}"
272 | return prompt
273 |
274 |
275 | class custom_Template:
276 | """
277 | messages = [
278 | {"role": "user", "content": instruction},
279 | {"role": "assistant", "content": mlt+output},
280 | ]
281 |
282 | <|begin_of_text|><|start_header_id|>user<|end_header_id|>\n\n{instruction}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n{mlt}{output}<|eot_id|>
283 | """
284 |
285 | BOS_TOKEN: str = "<|begin_of_text|>"
286 | EOS_TOKEN: str = "<|end_of_text|>"
287 | STOP_TOKENS: list[str] = ["<|end_of_text|>", "<|eot_id|>"] + [
288 | MLT[0] for MLT in MetaLengthToken
289 | ]
290 | SPECIAL_TOKENS: list[str] = [
291 | "<|begin_of_text|>",
292 | "<|end_of_text|>",
293 | "<|start_header_id|>",
294 | "<|end_header_id|>",
295 | "<|eot_id|>",
296 | ]
297 |
298 | @staticmethod
299 | def apply_template(instruction, mlt, output):
300 | prompt = f"<|begin_of_text|><|start_header_id|>user<|end_header_id|>\n\n{instruction}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n{mlt}{output}<|eot_id|>"
301 | return prompt
302 |
303 | @staticmethod
304 | def apply_template_for_instruction(instruction):
305 | prompt = f"<|begin_of_text|><|start_header_id|>user<|end_header_id|>\n\n{instruction}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n"
306 | return prompt
307 |
308 | @staticmethod
309 | def apply_template_for_generation(instruction, targetlength=""):
310 | mlt = f"[MLT:{targetlength}]" if targetlength != "" else ""
311 | prompt = f"<|begin_of_text|><|start_header_id|>user<|end_header_id|>\n\n{instruction}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n{mlt}"
312 | return prompt
313 | def apply_template_for_generation_vanilla(instruction, targetlength=""):
314 | targetlength = targetlength.replace(">", "more than ")
315 | question = f"{instruction}\nThe response should have a word count of {targetlength} words."
316 | prompt = f"<|begin_of_text|><|start_header_id|>user<|end_header_id|>\n\n{question}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n"
317 | return prompt
318 |
319 |
320 | TemplatesMapping = {
321 | "default": ModelTemplate,
322 | "llama3": Llama3_MLT_Template,
323 | "qwen": Qwen_MLT_Template,
324 | "yi": Yi_MLT_Template,
325 | "internlm": internlm_MLT_Template,
326 | "glm": glm_MLT_Template,
327 | "deepseek": deepseek_MLT_Template,
328 | "gemma": gemma_MLT_Template,
329 | "mistral": mistral_MLT_Template,
330 | "custom": custom_Template,
331 | }
332 |
--------------------------------------------------------------------------------