├── configs
    ├── model_config
    │   ├── local.yaml
    │   ├── hf.yaml
    │   ├── llama-2-chat.yaml
    │   ├── prune.yaml
    │   ├── autogptq.yaml
    │   ├── vicuna-13b-v1.3-awq-3bit.yaml
    │   ├── vicuna-13b-v1.3-awq-4bit.yaml
    │   ├── vicuna-13b-v1.3-awq-8bit.yaml
    │   ├── llama-2-13b-awq-3bit.yaml
    │   ├── llama-2-13b-awq-4bit.yaml
    │   ├── llama-2-13b-awq-8bit.yaml
    │   ├── vicuna-13b-v1.3-gptq-3bit.yaml
    │   ├── vicuna-13b-v1.3-gptq-4bit.yaml
    │   ├── vicuna-13b-v1.3-gptq-8bit.yaml
    │   ├── llama-2-13b-chat-gptq-4bit-TheBloke.yaml
    │   ├── llama-2-13b-chat-awq-3bit.yaml
    │   ├── llama-2-13b-chat-awq-4bit.yaml
    │   ├── llama-2-13b-chat-awq-8bit.yaml
    │   ├── llama-2-13b-chat-gptq-3bit.yaml
    │   ├── llama-2-13b-chat-gptq-4bit.yaml
    │   ├── llama-2-13b-chat-gptq-8bit.yaml
    │   ├── llama-2-13b-gptq-3bit.yaml
    │   ├── llama-2-13b-gptq-4bit.yaml
    │   ├── llama-2-13b-gptq-8bit.yaml
    │   ├── llama-2-13b-chat-awq-4bit_nosys.yaml
    │   ├── llama-2-13b-chat-awq-4bit_trust.yaml
    │   ├── llama-2-13b-chat-awq-8bit_nosys.yaml
    │   ├── llama-2-13b-chat-awq-8bit_trust.yaml
    │   ├── llama-2-13b-chat-awq-3bit-seed0.yaml
    │   ├── llama-2-13b-chat-awq-3bit-seed1.yaml
    │   ├── llama-2-13b-chat-awq-3bit-seed2.yaml
    │   ├── llama-2-13b-chat-awq-4bit-seed0.yaml
    │   ├── llama-2-13b-chat-awq-4bit-seed1.yaml
    │   ├── llama-2-13b-chat-awq-4bit-seed2.yaml
    │   ├── vicuna-13b-v1.3-gptq-3bit-cal128-seed0.yaml
    │   ├── vicuna-13b-v1.3-gptq-3bit-cal128-seed2.yaml
    │   ├── vicuna-13b-v1.3-gptq-3bit-cal512-seed2.yaml
    │   ├── vicuna-13b-v1.3-gptq-4bit-cal1024-seed1.yaml
    │   ├── vicuna-13b-v1.3-gptq-4bit-cal128-seed0.yaml
    │   ├── llama-2-13b-chat-awq-3bit-seed0_fact.yaml
    │   ├── llama-2-13b-chat-awq-3bit-seed1_fact.yaml
    │   ├── llama-2-13b-chat-awq-3bit-seed2_fact.yaml
    │   ├── llama-2-13b-chat-awq-4bit-seed0_fact.yaml
    │   ├── llama-2-13b-chat-awq-4bit-seed1_fact.yaml
    │   ├── llama-2-13b-chat-awq-4bit-seed2_fact.yaml
    │   ├── vicuna-13b-v1.3-gptq-3bit-cal256-seed2.yaml
    │   ├── llama-2-13b-chat-awq-3bit-seed0_nosys.yaml
    │   ├── llama-2-13b-chat-awq-3bit-seed0_trust.yaml
    │   ├── llama-2-13b-chat-awq-3bit-seed1_nosys.yaml
    │   ├── llama-2-13b-chat-awq-3bit-seed1_trust.yaml
    │   ├── llama-2-13b-chat-awq-3bit-seed2_nosys.yaml
    │   ├── llama-2-13b-chat-awq-3bit-seed2_trust.yaml
    │   ├── llama-2-13b-chat-awq-4bit-seed0_nosys.yaml
    │   ├── llama-2-13b-chat-awq-4bit-seed0_trust.yaml
    │   ├── llama-2-13b-chat-awq-4bit-seed1_nosys.yaml
    │   ├── llama-2-13b-chat-awq-4bit-seed1_trust.yaml
    │   ├── llama-2-13b-chat-awq-4bit-seed2_nosys.yaml
    │   ├── llama-2-13b-chat-awq-4bit-seed2_trust.yaml
    │   ├── llama-2-13b-chat-gptq-3bit-cal1024-seed2.yaml
    │   ├── llama-2-13b-chat-gptq-3bit-cal128-seed0.yaml
    │   ├── llama-2-13b-chat-gptq-3bit-cal128-seed1.yaml
    │   ├── llama-2-13b-chat-gptq-3bit-cal128-seed2.yaml
    │   ├── llama-2-13b-chat-gptq-4bit-cal128-seed0.yaml
    │   ├── llama-2-13b-chat-gptq-4bit-cal128-seed1.yaml
    │   ├── llama-2-13b-chat-gptq-4bit-cal128-seed2.yaml
    │   ├── llama-2-13b-chat-gptq-4bit-cal256-seed0.yaml
    │   ├── llama-2-13b-chat-gptq-3bit-cal128-seed0-fact.yaml
    │   ├── llama-2-13b-chat-gptq-3bit-cal128-seed0-no_sys.yaml
    │   ├── llama-2-13b-chat-gptq-3bit-cal128-seed0-trust.yaml
    │   ├── llama-2-13b-chat-gptq-3bit-cal128-seed1-fact.yaml
    │   ├── llama-2-13b-chat-gptq-3bit-cal128-seed1-no_sys.yaml
    │   ├── llama-2-13b-chat-gptq-3bit-cal128-seed1-trust.yaml
    │   ├── llama-2-13b-chat-gptq-3bit-cal128-seed2-fact.yaml
    │   ├── llama-2-13b-chat-gptq-3bit-cal128-seed2-no_sys.yaml
    │   ├── llama-2-13b-chat-gptq-3bit-cal128-seed2-trust.yaml
    │   ├── llama-2-13b-chat-gptq-4bit-cal128-seed0_fact.yaml
    │   ├── llama-2-13b-chat-gptq-4bit-cal128-seed0_nosys.yaml
    │   ├── llama-2-13b-chat-gptq-4bit-cal128-seed0_trust.yaml
    │   ├── llama-2-13b-chat-gptq-4bit-cal128-seed1_fact.yaml
    │   ├── llama-2-13b-chat-gptq-4bit-cal128-seed1_nosys.yaml
    │   ├── llama-2-13b-chat-gptq-4bit-cal128-seed1_trust.yaml
    │   ├── llama-2-13b-chat-gptq-4bit-cal128-seed2_fact.yaml
    │   ├── llama-2-13b-chat-gptq-4bit-cal128-seed2_nosys.yaml
    │   ├── llama-2-13b-chat-gptq-4bit-cal128-seed2_trust.yaml
    │   ├── vicuna-7b-v1.3.yaml
    │   ├── vicuna-13b-v1.3.yaml
    │   ├── llama-2-13b-chat.yaml
    │   ├── llama-2-7b.yaml
    │   ├── llama-2-13b.yaml
    │   ├── llama-2-70b-chat.yaml
    │   ├── llama-2-7b-chat.yaml
    │   ├── llama-2-13b-chat_nosys.yaml
    │   ├── llama-2-13b-chat_trust.yaml
    │   ├── vicuna-13b-v1.3-wanda_2to4.yaml
    │   ├── llama-2-13b-wanda_2to4.yaml
    │   ├── llama-2-13b-chat-wanda_2to4.yaml
    │   ├── vicuna-13b-v1.3-mag_2to4.yaml
    │   ├── llama-2-13b-chat-mag_1to2.yaml
    │   ├── llama-2-13b-chat-mag_2to4.yaml
    │   ├── llama-2-13b-chat-mag_4to8.yaml
    │   ├── llama-2-13b-chat-wanda_1to2.yaml
    │   ├── llama-2-13b-chat-wanda_4to8.yaml
    │   ├── vicuna-13b-v1.3-sparsegpt_2to4.yaml
    │   ├── llama-2-13b-chat-sparsegpt_1to2.yaml
    │   ├── llama-2-13b-chat-sparsegpt_2to4.yaml
    │   ├── llama-2-13b-chat-sparsegpt_4to8.yaml
    │   ├── llama-2-13b-mag_2to4.yaml
    │   ├── llama-2-13b-sparsegpt_2to4.yaml
    │   ├── vicuna-13b-v1.3-sparsegpt_2to4-seed0.yaml
    │   ├── vicuna-13b-v1.3-sparsegpt_2to4-seed1.yaml
    │   ├── vicuna-13b-v1.3-sparsegpt_2to4-seed2.yaml
    │   ├── llama-2-13b-chat-sparsegpt_2to4-seed0.yaml
    │   ├── llama-2-13b-chat-sparsegpt_2to4-seed1.yaml
    │   └── llama-2-13b-chat-sparsegpt_2to4-seed2.yaml
    └── slurm_config.yaml
├── LICENSE
├── .gitignore
├── gather_result_files.py
├── README.md
├── scripts
    └── mmlu_autogptq.py
├── compression
    └── gptq
    │   ├── gptq.py
    │   └── conversation.py
└── extract_csv.py


/configs/model_config/local.yaml:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/configs/model_config/hf.yaml:
--------------------------------------------------------------------------------
 1 | model: "hf/meta-llama/Llama-2-7b-chat-hf"
 2 | type: CHAT
 3 | conv_template: "llama-2"
 4 | 
 5 | model_loader: HF
 6 | torch_dtype: BFLOAT16
 7 | quant_file: null
 8 | tokenizer_name: null
 9 | trust_remote_code: true
10 | use_auth_token: true


--------------------------------------------------------------------------------
/configs/model_config/llama-2-chat.yaml:
--------------------------------------------------------------------------------
 1 | model: "openai/gpt-3.5-turbo-0301"
 2 | type: CHAT
 3 | conv_template: llama-2
 4 | 
 5 | model_loader: HF
 6 | torch_dtype: null
 7 | quant_file: null
 8 | tokenizer_name: meta-llama/Llama-2-13b-chat-hf
 9 | trust_remote_code: true
10 | use_auth_token: true


--------------------------------------------------------------------------------
/configs/model_config/prune.yaml:
--------------------------------------------------------------------------------
 1 | model: "hf/vita-group/vicuna-13b-v1.3_magnitude_semistruct@0.5_2to4"
 2 | type: CHAT
 3 | conv_template: "vicuna_v1.1"
 4 | 
 5 | model_loader: HF
 6 | torch_dtype: BFLOAT16
 7 | quant_file: null
 8 | tokenizer_name: vicuna-13b-v1.3
 9 | trust_remote_code: true
10 | use_auth_token: true


--------------------------------------------------------------------------------
/configs/model_config/autogptq.yaml:
--------------------------------------------------------------------------------
 1 | model: "hf/vita-group/vicuna-7b-v1.3_gptq@2bit_128g"
 2 | type: CHAT
 3 | conv_template: "vicuna_v1.1"
 4 | 
 5 | model_loader: GPTQ
 6 | torch_dtype: FLOAT16
 7 | quant_file: null  # For AutoAWQ
 8 | tokenizer_name: "lmsys/vicuna-7b-v1.3"
 9 | trust_remote_code: true
10 | use_auth_token: true
11 | 


--------------------------------------------------------------------------------
/configs/model_config/vicuna-13b-v1.3-awq-3bit.yaml:
--------------------------------------------------------------------------------
 1 | model: "hf/compressed-llm/vicuna-13b-v1.3-awq@3bit_128g"
 2 | type: CHAT
 3 | conv_template: "vicuna_v1.1"
 4 | 
 5 | model_loader: HF
 6 | torch_dtype: BFLOAT16
 7 | quant_file: null  # For AutoAWQ
 8 | tokenizer_name: "lmsys/vicuna-13b-v1.3"
 9 | trust_remote_code: true
10 | use_auth_token: true
11 | 


--------------------------------------------------------------------------------
/configs/model_config/vicuna-13b-v1.3-awq-4bit.yaml:
--------------------------------------------------------------------------------
 1 | model: "hf/compressed-llm/vicuna-13b-v1.3-awq@4bit_128g"
 2 | type: CHAT
 3 | conv_template: "vicuna_v1.1"
 4 | 
 5 | model_loader: HF
 6 | torch_dtype: BFLOAT16
 7 | quant_file: null  # For AutoAWQ
 8 | tokenizer_name: "lmsys/vicuna-13b-v1.3"
 9 | trust_remote_code: true
10 | use_auth_token: true
11 | 


--------------------------------------------------------------------------------
/configs/model_config/vicuna-13b-v1.3-awq-8bit.yaml:
--------------------------------------------------------------------------------
 1 | model: "hf/compressed-llm/vicuna-13b-v1.3-awq@8bit_128g"
 2 | type: CHAT
 3 | conv_template: "vicuna_v1.1"
 4 | 
 5 | model_loader: HF
 6 | torch_dtype: BFLOAT16
 7 | quant_file: null  # For AutoAWQ
 8 | tokenizer_name: "lmsys/vicuna-13b-v1.3"
 9 | trust_remote_code: true
10 | use_auth_token: true
11 | 


--------------------------------------------------------------------------------
/configs/model_config/llama-2-13b-awq-3bit.yaml:
--------------------------------------------------------------------------------
 1 | model: "hf/compressed-llm/llama-2-13b-awq@3bit_128g"
 2 | type: CHAT
 3 | conv_template: "redpajama-incite-instruct"
 4 | 
 5 | model_loader: HF
 6 | torch_dtype: BFLOAT16
 7 | quant_file: null  # For AutoAWQ
 8 | tokenizer_name: "meta-llama/Llama-2-13b-hf"
 9 | trust_remote_code: true
10 | use_auth_token: true
11 | 


--------------------------------------------------------------------------------
/configs/model_config/llama-2-13b-awq-4bit.yaml:
--------------------------------------------------------------------------------
 1 | model: "hf/compressed-llm/llama-2-13b-awq@4bit_128g"
 2 | type: CHAT
 3 | conv_template: "redpajama-incite-instruct"
 4 | 
 5 | model_loader: HF
 6 | torch_dtype: BFLOAT16
 7 | quant_file: null  # For AutoAWQ
 8 | tokenizer_name: "meta-llama/Llama-2-13b-hf"
 9 | trust_remote_code: true
10 | use_auth_token: true
11 | 


--------------------------------------------------------------------------------
/configs/model_config/llama-2-13b-awq-8bit.yaml:
--------------------------------------------------------------------------------
 1 | model: "hf/compressed-llm/llama-2-13b-awq@8bit_128g"
 2 | type: CHAT
 3 | conv_template: "redpajama-incite-instruct"
 4 | 
 5 | model_loader: HF
 6 | torch_dtype: BFLOAT16
 7 | quant_file: null  # For AutoAWQ
 8 | tokenizer_name: "meta-llama/Llama-2-13b-hf"
 9 | trust_remote_code: true
10 | use_auth_token: true
11 | 


--------------------------------------------------------------------------------
/configs/model_config/vicuna-13b-v1.3-gptq-3bit.yaml:
--------------------------------------------------------------------------------
 1 | model: "hf/compressed-llm/vicuna-13b-v1.3-gptq@3bit_128g"
 2 | type: CHAT
 3 | conv_template: "vicuna_v1.1"
 4 | 
 5 | model_loader: GPTQ
 6 | torch_dtype: null
 7 | quant_file: null  # For AutoAWQ
 8 | tokenizer_name: "lmsys/vicuna-13b-v1.3"
 9 | trust_remote_code: true
10 | use_auth_token: true
11 | disable_exllama: true


--------------------------------------------------------------------------------
/configs/model_config/vicuna-13b-v1.3-gptq-4bit.yaml:
--------------------------------------------------------------------------------
 1 | model: "hf/compressed-llm/vicuna-13b-v1.3-gptq@4bit_128g"
 2 | type: CHAT
 3 | conv_template: "vicuna_v1.1"
 4 | 
 5 | model_loader: GPTQ
 6 | torch_dtype: null
 7 | quant_file: null  # For AutoAWQ
 8 | tokenizer_name: "lmsys/vicuna-13b-v1.3"
 9 | trust_remote_code: true
10 | use_auth_token: true
11 | disable_exllama: true
12 | 


--------------------------------------------------------------------------------
/configs/model_config/vicuna-13b-v1.3-gptq-8bit.yaml:
--------------------------------------------------------------------------------
 1 | model: "hf/compressed-llm/vicuna-13b-v1.3-gptq@8bit_128g"
 2 | type: CHAT
 3 | conv_template: "vicuna_v1.1"
 4 | 
 5 | model_loader: GPTQ
 6 | torch_dtype: null
 7 | quant_file: null  # For AutoAWQ
 8 | tokenizer_name: "lmsys/vicuna-13b-v1.3"
 9 | trust_remote_code: true
10 | use_auth_token: true
11 | disable_exllama: true
12 | 


--------------------------------------------------------------------------------
/configs/model_config/llama-2-13b-chat-gptq-4bit-TheBloke.yaml:
--------------------------------------------------------------------------------
 1 | model: "hf/TheBloke/Llama-2-13B-chat-GPTQ@gptq-4bit-128g-actorder_True"
 2 | type: CHAT
 3 | conv_template: "llama-2"
 4 | 
 5 | model_loader: HF
 6 | torch_dtype: FLOAT16
 7 | quant_file: null  # For AutoAWQ
 8 | tokenizer_name: "meta-llama/Llama-2-13b-chat-hf"
 9 | trust_remote_code: true
10 | use_auth_token: true
11 | 


--------------------------------------------------------------------------------
/configs/model_config/llama-2-13b-chat-awq-3bit.yaml:
--------------------------------------------------------------------------------
 1 | model: "hf/compressed-llm/llama-2-13b-chat-awq@3bit_128g"
 2 | type: CHAT
 3 | conv_template: "llama-2"
 4 | 
 5 | model_loader: HF
 6 | torch_dtype: BFLOAT16
 7 | quant_file: null  # For AutoAWQ
 8 | tokenizer_name: "meta-llama/Llama-2-13b-chat-hf"
 9 | trust_remote_code: true
10 | use_auth_token: true
11 | disable_exllama: true
12 | 


--------------------------------------------------------------------------------
/configs/model_config/llama-2-13b-chat-awq-4bit.yaml:
--------------------------------------------------------------------------------
 1 | model: "hf/compressed-llm/llama-2-13b-chat-awq@4bit_128g"
 2 | type: CHAT
 3 | conv_template: "llama-2"
 4 | 
 5 | model_loader: HF
 6 | torch_dtype: BFLOAT16
 7 | quant_file: null  # For AutoAWQ
 8 | tokenizer_name: "meta-llama/Llama-2-13b-chat-hf"
 9 | trust_remote_code: true
10 | use_auth_token: true
11 | disable_exllama: true
12 | 


--------------------------------------------------------------------------------
/configs/model_config/llama-2-13b-chat-awq-8bit.yaml:
--------------------------------------------------------------------------------
 1 | model: "hf/compressed-llm/llama-2-13b-chat-awq@8bit_128g"
 2 | type: CHAT
 3 | conv_template: "llama-2"
 4 | 
 5 | model_loader: HF
 6 | torch_dtype: BFLOAT16
 7 | quant_file: null  # For AutoAWQ
 8 | tokenizer_name: "meta-llama/Llama-2-13b-chat-hf"
 9 | trust_remote_code: true
10 | use_auth_token: true
11 | disable_exllama: true
12 | 


--------------------------------------------------------------------------------
/configs/model_config/llama-2-13b-chat-gptq-3bit.yaml:
--------------------------------------------------------------------------------
 1 | model: "hf/compressed-llm/llama-2-13b-chat-gptq@3bit_128g"
 2 | type: CHAT
 3 | conv_template: "llama-2"
 4 | 
 5 | model_loader: GPTQ
 6 | torch_dtype: null
 7 | quant_file: null  # For AutoAWQ
 8 | tokenizer_name: "meta-llama/Llama-2-13b-chat-hf"
 9 | trust_remote_code: true
10 | use_auth_token: true
11 | disable_exllama: true
12 | 


--------------------------------------------------------------------------------
/configs/model_config/llama-2-13b-chat-gptq-4bit.yaml:
--------------------------------------------------------------------------------
 1 | model: "hf/compressed-llm/llama-2-13b-chat-gptq@4bit_128g"
 2 | type: CHAT
 3 | conv_template: "llama-2"
 4 | 
 5 | model_loader: GPTQ
 6 | torch_dtype: null
 7 | quant_file: null  # For AutoAWQ
 8 | tokenizer_name: "meta-llama/Llama-2-13b-chat-hf"
 9 | trust_remote_code: true
10 | use_auth_token: true
11 | disable_exllama: true
12 | 


--------------------------------------------------------------------------------
/configs/model_config/llama-2-13b-chat-gptq-8bit.yaml:
--------------------------------------------------------------------------------
 1 | model: "hf/compressed-llm/llama-2-13b-chat-gptq@8bit_128g"
 2 | type: CHAT
 3 | conv_template: "llama-2"
 4 | 
 5 | model_loader: GPTQ
 6 | torch_dtype: null
 7 | quant_file: null  # For AutoAWQ
 8 | tokenizer_name: "meta-llama/Llama-2-13b-chat-hf"
 9 | trust_remote_code: true
10 | use_auth_token: true
11 | disable_exllama: true
12 | 


--------------------------------------------------------------------------------
/configs/model_config/llama-2-13b-gptq-3bit.yaml:
--------------------------------------------------------------------------------
 1 | model: "hf/compressed-llm/llama-2-13b-gptq@3bit_128g"
 2 | type: CHAT
 3 | conv_template: redpajama-incite-instruct
 4 | 
 5 | model_loader: GPTQ
 6 | torch_dtype: null
 7 | quant_file: null  # For AutoAWQ
 8 | tokenizer_name: "meta-llama/Llama-2-13b-hf"
 9 | trust_remote_code: true
10 | use_auth_token: true
11 | disable_exllama: true
12 | 


--------------------------------------------------------------------------------
/configs/model_config/llama-2-13b-gptq-4bit.yaml:
--------------------------------------------------------------------------------
 1 | model: "hf/compressed-llm/llama-2-13b-gptq@4bit_128g"
 2 | type: CHAT
 3 | conv_template: redpajama-incite-instruct
 4 | 
 5 | model_loader: GPTQ
 6 | torch_dtype: null
 7 | quant_file: null  # For AutoAWQ
 8 | tokenizer_name: "meta-llama/Llama-2-13b-hf"
 9 | trust_remote_code: true
10 | use_auth_token: true
11 | disable_exllama: true
12 | 


--------------------------------------------------------------------------------
/configs/model_config/llama-2-13b-gptq-8bit.yaml:
--------------------------------------------------------------------------------
 1 | model: "hf/compressed-llm/llama-2-13b-gptq@8bit_128g"
 2 | type: CHAT
 3 | conv_template: redpajama-incite-instruct
 4 | 
 5 | model_loader: GPTQ
 6 | torch_dtype: null
 7 | quant_file: null  # For AutoAWQ
 8 | tokenizer_name: "meta-llama/Llama-2-13b-hf"
 9 | trust_remote_code: true
10 | use_auth_token: true
11 | disable_exllama: true
12 | 


--------------------------------------------------------------------------------
/configs/model_config/llama-2-13b-chat-awq-4bit_nosys.yaml:
--------------------------------------------------------------------------------
 1 | model: "hf/compressed-llm/llama-2-13b-chat-awq@4bit_128g"
 2 | type: CHAT
 3 | conv_template: "llama-2_no_sys"
 4 | 
 5 | model_loader: HF
 6 | torch_dtype: BFLOAT16
 7 | quant_file: null  # For AutoAWQ
 8 | tokenizer_name: "meta-llama/Llama-2-13b-chat-hf"
 9 | trust_remote_code: true
10 | use_auth_token: true
11 | disable_exllama: true
12 | 


--------------------------------------------------------------------------------
/configs/model_config/llama-2-13b-chat-awq-4bit_trust.yaml:
--------------------------------------------------------------------------------
 1 | model: "hf/compressed-llm/llama-2-13b-chat-awq@4bit_128g"
 2 | type: CHAT
 3 | conv_template: "llama-2_trust"
 4 | 
 5 | model_loader: HF
 6 | torch_dtype: BFLOAT16
 7 | quant_file: null  # For AutoAWQ
 8 | tokenizer_name: "meta-llama/Llama-2-13b-chat-hf"
 9 | trust_remote_code: true
10 | use_auth_token: true
11 | disable_exllama: true
12 | 


--------------------------------------------------------------------------------
/configs/model_config/llama-2-13b-chat-awq-8bit_nosys.yaml:
--------------------------------------------------------------------------------
 1 | model: "hf/compressed-llm/llama-2-13b-chat-awq@8bit_128g"
 2 | type: CHAT
 3 | conv_template: "llama-2_no_sys"
 4 | 
 5 | model_loader: HF
 6 | torch_dtype: BFLOAT16
 7 | quant_file: null  # For AutoAWQ
 8 | tokenizer_name: "meta-llama/Llama-2-13b-chat-hf"
 9 | trust_remote_code: true
10 | use_auth_token: true
11 | disable_exllama: true
12 | 


--------------------------------------------------------------------------------
/configs/model_config/llama-2-13b-chat-awq-8bit_trust.yaml:
--------------------------------------------------------------------------------
 1 | model: "hf/compressed-llm/llama-2-13b-chat-awq@8bit_128g"
 2 | type: CHAT
 3 | conv_template: "llama-2_trust"
 4 | 
 5 | model_loader: HF
 6 | torch_dtype: BFLOAT16
 7 | quant_file: null  # For AutoAWQ
 8 | tokenizer_name: "meta-llama/Llama-2-13b-chat-hf"
 9 | trust_remote_code: true
10 | use_auth_token: true
11 | disable_exllama: true
12 | 


--------------------------------------------------------------------------------
/configs/model_config/llama-2-13b-chat-awq-3bit-seed0.yaml:
--------------------------------------------------------------------------------
 1 | model: "hf/efficient-llm/llama-2-13b-chat-awq-128example-randomseed@3bit_128g_0seed"
 2 | type: CHAT
 3 | conv_template: "llama-2"
 4 | 
 5 | model_loader: HF
 6 | torch_dtype: BFLOAT16
 7 | quant_file: null  # For AutoAWQ
 8 | tokenizer_name: "meta-llama/Llama-2-13b-chat-hf"
 9 | trust_remote_code: true
10 | use_auth_token: true
11 | disable_exllama: true
12 | 


--------------------------------------------------------------------------------
/configs/model_config/llama-2-13b-chat-awq-3bit-seed1.yaml:
--------------------------------------------------------------------------------
 1 | model: "hf/efficient-llm/llama-2-13b-chat-awq-128example-randomseed@3bit_128g_1seed"
 2 | type: CHAT
 3 | conv_template: "llama-2"
 4 | 
 5 | model_loader: HF
 6 | torch_dtype: BFLOAT16
 7 | quant_file: null  # For AutoAWQ
 8 | tokenizer_name: "meta-llama/Llama-2-13b-chat-hf"
 9 | trust_remote_code: true
10 | use_auth_token: true
11 | disable_exllama: true
12 | 


--------------------------------------------------------------------------------
/configs/model_config/llama-2-13b-chat-awq-3bit-seed2.yaml:
--------------------------------------------------------------------------------
 1 | model: "hf/efficient-llm/llama-2-13b-chat-awq-128example-randomseed@3bit_128g_2seed"
 2 | type: CHAT
 3 | conv_template: "llama-2"
 4 | 
 5 | model_loader: HF
 6 | torch_dtype: BFLOAT16
 7 | quant_file: null  # For AutoAWQ
 8 | tokenizer_name: "meta-llama/Llama-2-13b-chat-hf"
 9 | trust_remote_code: true
10 | use_auth_token: true
11 | disable_exllama: true
12 | 


--------------------------------------------------------------------------------
/configs/model_config/llama-2-13b-chat-awq-4bit-seed0.yaml:
--------------------------------------------------------------------------------
 1 | model: "hf/efficient-llm/llama-2-13b-chat-awq-128example-randomseed@4bit_128g_0seed"
 2 | type: CHAT
 3 | conv_template: "llama-2"
 4 | 
 5 | model_loader: HF
 6 | torch_dtype: BFLOAT16
 7 | quant_file: null  # For AutoAWQ
 8 | tokenizer_name: "meta-llama/Llama-2-13b-chat-hf"
 9 | trust_remote_code: true
10 | use_auth_token: true
11 | disable_exllama: true
12 | 


--------------------------------------------------------------------------------
/configs/model_config/llama-2-13b-chat-awq-4bit-seed1.yaml:
--------------------------------------------------------------------------------
 1 | model: "hf/efficient-llm/llama-2-13b-chat-awq-128example-randomseed@4bit_128g_1seed"
 2 | type: CHAT
 3 | conv_template: "llama-2"
 4 | 
 5 | model_loader: HF
 6 | torch_dtype: BFLOAT16
 7 | quant_file: null  # For AutoAWQ
 8 | tokenizer_name: "meta-llama/Llama-2-13b-chat-hf"
 9 | trust_remote_code: true
10 | use_auth_token: true
11 | disable_exllama: true
12 | 


--------------------------------------------------------------------------------
/configs/model_config/llama-2-13b-chat-awq-4bit-seed2.yaml:
--------------------------------------------------------------------------------
 1 | model: "hf/efficient-llm/llama-2-13b-chat-awq-128example-randomseed@4bit_128g_2seed"
 2 | type: CHAT
 3 | conv_template: "llama-2"
 4 | 
 5 | model_loader: HF
 6 | torch_dtype: BFLOAT16
 7 | quant_file: null  # For AutoAWQ
 8 | tokenizer_name: "meta-llama/Llama-2-13b-chat-hf"
 9 | trust_remote_code: true
10 | use_auth_token: true
11 | disable_exllama: true
12 | 


--------------------------------------------------------------------------------
/configs/model_config/vicuna-13b-v1.3-gptq-3bit-cal128-seed0.yaml:
--------------------------------------------------------------------------------
 1 | model: "hf/efficient-llm/vicuna-13b-v1.3-gptq-larger-caliset-vicuna_v1.1-temp@3bit_128g_0seed"
 2 | type: CHAT
 3 | conv_template: "vicuna_v1.1"
 4 | 
 5 | model_loader: GPTQ
 6 | torch_dtype: null
 7 | quant_file: null  # For AutoAWQ
 8 | tokenizer_name: "lmsys/vicuna-13b-v1.3"
 9 | trust_remote_code: true
10 | use_auth_token: true
11 | disable_exllama: true


--------------------------------------------------------------------------------
/configs/model_config/vicuna-13b-v1.3-gptq-3bit-cal128-seed2.yaml:
--------------------------------------------------------------------------------
 1 | model: "hf/efficient-llm/vicuna-13b-v1.3-gptq-larger-caliset-vicuna_v1.1-temp@3bit_128g_2seed"
 2 | type: CHAT
 3 | conv_template: "vicuna_v1.1"
 4 | 
 5 | model_loader: GPTQ
 6 | torch_dtype: null
 7 | quant_file: null  # For AutoAWQ
 8 | tokenizer_name: "lmsys/vicuna-13b-v1.3"
 9 | trust_remote_code: true
10 | use_auth_token: true
11 | disable_exllama: true


--------------------------------------------------------------------------------
/configs/model_config/vicuna-13b-v1.3-gptq-3bit-cal512-seed2.yaml:
--------------------------------------------------------------------------------
 1 | model: "hf/efficient-llm/vicuna-13b-v1.3-gptq-larger-caliset-vicuna_v1.1-temp@3bit_512g_2seed"
 2 | type: CHAT
 3 | conv_template: "vicuna_v1.1"
 4 | 
 5 | model_loader: GPTQ
 6 | torch_dtype: null
 7 | quant_file: null  # For AutoAWQ
 8 | tokenizer_name: "lmsys/vicuna-13b-v1.3"
 9 | trust_remote_code: true
10 | use_auth_token: true
11 | disable_exllama: true


--------------------------------------------------------------------------------
/configs/model_config/vicuna-13b-v1.3-gptq-4bit-cal1024-seed1.yaml:
--------------------------------------------------------------------------------
 1 | model: "hf/efficient-llm/vicuna-13b-v1.3-gptq-larger-caliset-vicuna_v1.1-temp@4bit_1024g_1seed"
 2 | type: CHAT
 3 | conv_template: "vicuna_v1.1"
 4 | 
 5 | model_loader: GPTQ
 6 | torch_dtype: null
 7 | quant_file: null  # For AutoAWQ
 8 | tokenizer_name: "lmsys/vicuna-13b-v1.3"
 9 | trust_remote_code: true
10 | use_auth_token: true
11 | disable_exllama: true


--------------------------------------------------------------------------------
/configs/model_config/vicuna-13b-v1.3-gptq-4bit-cal128-seed0.yaml:
--------------------------------------------------------------------------------
 1 | model: "hf/efficient-llm/vicuna-13b-v1.3-gptq-larger-caliset-vicuna_v1.1-temp@4bit_128g_0seed"
 2 | type: CHAT
 3 | conv_template: "vicuna_v1.1"
 4 | 
 5 | model_loader: GPTQ
 6 | torch_dtype: null
 7 | quant_file: null  # For AutoAWQ
 8 | tokenizer_name: "lmsys/vicuna-13b-v1.3"
 9 | trust_remote_code: true
10 | use_auth_token: true
11 | disable_exllama: true


--------------------------------------------------------------------------------
/configs/model_config/llama-2-13b-chat-awq-3bit-seed0_fact.yaml:
--------------------------------------------------------------------------------
 1 | model: "hf/efficient-llm/llama-2-13b-chat-awq-128example-randomseed@3bit_128g_0seed"
 2 | type: CHAT
 3 | conv_template: "llama-2_fact"
 4 | 
 5 | model_loader: HF
 6 | torch_dtype: BFLOAT16
 7 | quant_file: null  # For AutoAWQ
 8 | tokenizer_name: "meta-llama/Llama-2-13b-chat-hf"
 9 | trust_remote_code: true
10 | use_auth_token: true
11 | disable_exllama: true
12 | 


--------------------------------------------------------------------------------
/configs/model_config/llama-2-13b-chat-awq-3bit-seed1_fact.yaml:
--------------------------------------------------------------------------------
 1 | model: "hf/efficient-llm/llama-2-13b-chat-awq-128example-randomseed@3bit_128g_1seed"
 2 | type: CHAT
 3 | conv_template: "llama-2_fact"
 4 | 
 5 | model_loader: HF
 6 | torch_dtype: BFLOAT16
 7 | quant_file: null  # For AutoAWQ
 8 | tokenizer_name: "meta-llama/Llama-2-13b-chat-hf"
 9 | trust_remote_code: true
10 | use_auth_token: true
11 | disable_exllama: true
12 | 


--------------------------------------------------------------------------------
/configs/model_config/llama-2-13b-chat-awq-3bit-seed2_fact.yaml:
--------------------------------------------------------------------------------
 1 | model: "hf/efficient-llm/llama-2-13b-chat-awq-128example-randomseed@3bit_128g_2seed"
 2 | type: CHAT
 3 | conv_template: "llama-2_fact"
 4 | 
 5 | model_loader: HF
 6 | torch_dtype: BFLOAT16
 7 | quant_file: null  # For AutoAWQ
 8 | tokenizer_name: "meta-llama/Llama-2-13b-chat-hf"
 9 | trust_remote_code: true
10 | use_auth_token: true
11 | disable_exllama: true
12 | 


--------------------------------------------------------------------------------
/configs/model_config/llama-2-13b-chat-awq-4bit-seed0_fact.yaml:
--------------------------------------------------------------------------------
 1 | model: "hf/efficient-llm/llama-2-13b-chat-awq-128example-randomseed@4bit_128g_0seed"
 2 | type: CHAT
 3 | conv_template: "llama-2_fact"
 4 | 
 5 | model_loader: HF
 6 | torch_dtype: BFLOAT16
 7 | quant_file: null  # For AutoAWQ
 8 | tokenizer_name: "meta-llama/Llama-2-13b-chat-hf"
 9 | trust_remote_code: true
10 | use_auth_token: true
11 | disable_exllama: true
12 | 


--------------------------------------------------------------------------------
/configs/model_config/llama-2-13b-chat-awq-4bit-seed1_fact.yaml:
--------------------------------------------------------------------------------
 1 | model: "hf/efficient-llm/llama-2-13b-chat-awq-128example-randomseed@4bit_128g_1seed"
 2 | type: CHAT
 3 | conv_template: "llama-2_fact"
 4 | 
 5 | model_loader: HF
 6 | torch_dtype: BFLOAT16
 7 | quant_file: null  # For AutoAWQ
 8 | tokenizer_name: "meta-llama/Llama-2-13b-chat-hf"
 9 | trust_remote_code: true
10 | use_auth_token: true
11 | disable_exllama: true
12 | 


--------------------------------------------------------------------------------
/configs/model_config/llama-2-13b-chat-awq-4bit-seed2_fact.yaml:
--------------------------------------------------------------------------------
 1 | model: "hf/efficient-llm/llama-2-13b-chat-awq-128example-randomseed@4bit_128g_2seed"
 2 | type: CHAT
 3 | conv_template: "llama-2_fact"
 4 | 
 5 | model_loader: HF
 6 | torch_dtype: BFLOAT16
 7 | quant_file: null  # For AutoAWQ
 8 | tokenizer_name: "meta-llama/Llama-2-13b-chat-hf"
 9 | trust_remote_code: true
10 | use_auth_token: true
11 | disable_exllama: true
12 | 


--------------------------------------------------------------------------------
/configs/model_config/vicuna-13b-v1.3-gptq-3bit-cal256-seed2.yaml:
--------------------------------------------------------------------------------
 1 | model: "hf/efficient-llm/vicuna-13b-v1.3-gptq-larger-caliset-vicuna_v1.1-temp@3bit_256g_2seed"
 2 | type: CHAT
 3 | conv_template: "vicuna_v1.1"
 4 | 
 5 | model_loader: GPTQ
 6 | torch_dtype: null
 7 | quant_file: null  # For AutoAWQ
 8 | tokenizer_name: "lmsys/vicuna-13b-v1.3"
 9 | trust_remote_code: true
10 | use_auth_token: true
11 | disable_exllama: true
12 | 


--------------------------------------------------------------------------------
/configs/model_config/llama-2-13b-chat-awq-3bit-seed0_nosys.yaml:
--------------------------------------------------------------------------------
 1 | model: "hf/efficient-llm/llama-2-13b-chat-awq-128example-randomseed@3bit_128g_0seed"
 2 | type: CHAT
 3 | conv_template: "llama-2_no_sys"
 4 | 
 5 | model_loader: HF
 6 | torch_dtype: BFLOAT16
 7 | quant_file: null  # For AutoAWQ
 8 | tokenizer_name: "meta-llama/Llama-2-13b-chat-hf"
 9 | trust_remote_code: true
10 | use_auth_token: true
11 | disable_exllama: true
12 | 


--------------------------------------------------------------------------------
/configs/model_config/llama-2-13b-chat-awq-3bit-seed0_trust.yaml:
--------------------------------------------------------------------------------
 1 | model: "hf/efficient-llm/llama-2-13b-chat-awq-128example-randomseed@3bit_128g_0seed"
 2 | type: CHAT
 3 | conv_template: "llama-2_trust"
 4 | 
 5 | model_loader: HF
 6 | torch_dtype: BFLOAT16
 7 | quant_file: null  # For AutoAWQ
 8 | tokenizer_name: "meta-llama/Llama-2-13b-chat-hf"
 9 | trust_remote_code: true
10 | use_auth_token: true
11 | disable_exllama: true
12 | 


--------------------------------------------------------------------------------
/configs/model_config/llama-2-13b-chat-awq-3bit-seed1_nosys.yaml:
--------------------------------------------------------------------------------
 1 | model: "hf/efficient-llm/llama-2-13b-chat-awq-128example-randomseed@3bit_128g_1seed"
 2 | type: CHAT
 3 | conv_template: "llama-2_no_sys"
 4 | 
 5 | model_loader: HF
 6 | torch_dtype: BFLOAT16
 7 | quant_file: null  # For AutoAWQ
 8 | tokenizer_name: "meta-llama/Llama-2-13b-chat-hf"
 9 | trust_remote_code: true
10 | use_auth_token: true
11 | disable_exllama: true
12 | 


--------------------------------------------------------------------------------
/configs/model_config/llama-2-13b-chat-awq-3bit-seed1_trust.yaml:
--------------------------------------------------------------------------------
 1 | model: "hf/efficient-llm/llama-2-13b-chat-awq-128example-randomseed@3bit_128g_1seed"
 2 | type: CHAT
 3 | conv_template: "llama-2_trust"
 4 | 
 5 | model_loader: HF
 6 | torch_dtype: BFLOAT16
 7 | quant_file: null  # For AutoAWQ
 8 | tokenizer_name: "meta-llama/Llama-2-13b-chat-hf"
 9 | trust_remote_code: true
10 | use_auth_token: true
11 | disable_exllama: true
12 | 


--------------------------------------------------------------------------------
/configs/model_config/llama-2-13b-chat-awq-3bit-seed2_nosys.yaml:
--------------------------------------------------------------------------------
 1 | model: "hf/efficient-llm/llama-2-13b-chat-awq-128example-randomseed@3bit_128g_2seed"
 2 | type: CHAT
 3 | conv_template: "llama-2_no_sys"
 4 | 
 5 | model_loader: HF
 6 | torch_dtype: BFLOAT16
 7 | quant_file: null  # For AutoAWQ
 8 | tokenizer_name: "meta-llama/Llama-2-13b-chat-hf"
 9 | trust_remote_code: true
10 | use_auth_token: true
11 | disable_exllama: true
12 | 


--------------------------------------------------------------------------------
/configs/model_config/llama-2-13b-chat-awq-3bit-seed2_trust.yaml:
--------------------------------------------------------------------------------
 1 | model: "hf/efficient-llm/llama-2-13b-chat-awq-128example-randomseed@3bit_128g_2seed"
 2 | type: CHAT
 3 | conv_template: "llama-2_trust"
 4 | 
 5 | model_loader: HF
 6 | torch_dtype: BFLOAT16
 7 | quant_file: null  # For AutoAWQ
 8 | tokenizer_name: "meta-llama/Llama-2-13b-chat-hf"
 9 | trust_remote_code: true
10 | use_auth_token: true
11 | disable_exllama: true
12 | 


--------------------------------------------------------------------------------
/configs/model_config/llama-2-13b-chat-awq-4bit-seed0_nosys.yaml:
--------------------------------------------------------------------------------
 1 | model: "hf/efficient-llm/llama-2-13b-chat-awq-128example-randomseed@4bit_128g_0seed"
 2 | type: CHAT
 3 | conv_template: "llama-2_no_sys"
 4 | 
 5 | model_loader: HF
 6 | torch_dtype: BFLOAT16
 7 | quant_file: null  # For AutoAWQ
 8 | tokenizer_name: "meta-llama/Llama-2-13b-chat-hf"
 9 | trust_remote_code: true
10 | use_auth_token: true
11 | disable_exllama: true
12 | 


--------------------------------------------------------------------------------
/configs/model_config/llama-2-13b-chat-awq-4bit-seed0_trust.yaml:
--------------------------------------------------------------------------------
 1 | model: "hf/efficient-llm/llama-2-13b-chat-awq-128example-randomseed@4bit_128g_0seed"
 2 | type: CHAT
 3 | conv_template: "llama-2_trust"
 4 | 
 5 | model_loader: HF
 6 | torch_dtype: BFLOAT16
 7 | quant_file: null  # For AutoAWQ
 8 | tokenizer_name: "meta-llama/Llama-2-13b-chat-hf"
 9 | trust_remote_code: true
10 | use_auth_token: true
11 | disable_exllama: true
12 | 


--------------------------------------------------------------------------------
/configs/model_config/llama-2-13b-chat-awq-4bit-seed1_nosys.yaml:
--------------------------------------------------------------------------------
 1 | model: "hf/efficient-llm/llama-2-13b-chat-awq-128example-randomseed@4bit_128g_1seed"
 2 | type: CHAT
 3 | conv_template: "llama-2_no_sys"
 4 | 
 5 | model_loader: HF
 6 | torch_dtype: BFLOAT16
 7 | quant_file: null  # For AutoAWQ
 8 | tokenizer_name: "meta-llama/Llama-2-13b-chat-hf"
 9 | trust_remote_code: true
10 | use_auth_token: true
11 | disable_exllama: true
12 | 


--------------------------------------------------------------------------------
/configs/model_config/llama-2-13b-chat-awq-4bit-seed1_trust.yaml:
--------------------------------------------------------------------------------
 1 | model: "hf/efficient-llm/llama-2-13b-chat-awq-128example-randomseed@4bit_128g_1seed"
 2 | type: CHAT
 3 | conv_template: "llama-2_trust"
 4 | 
 5 | model_loader: HF
 6 | torch_dtype: BFLOAT16
 7 | quant_file: null  # For AutoAWQ
 8 | tokenizer_name: "meta-llama/Llama-2-13b-chat-hf"
 9 | trust_remote_code: true
10 | use_auth_token: true
11 | disable_exllama: true
12 | 


--------------------------------------------------------------------------------
/configs/model_config/llama-2-13b-chat-awq-4bit-seed2_nosys.yaml:
--------------------------------------------------------------------------------
 1 | model: "hf/efficient-llm/llama-2-13b-chat-awq-128example-randomseed@4bit_128g_2seed"
 2 | type: CHAT
 3 | conv_template: "llama-2_no_sys"
 4 | 
 5 | model_loader: HF
 6 | torch_dtype: BFLOAT16
 7 | quant_file: null  # For AutoAWQ
 8 | tokenizer_name: "meta-llama/Llama-2-13b-chat-hf"
 9 | trust_remote_code: true
10 | use_auth_token: true
11 | disable_exllama: true
12 | 


--------------------------------------------------------------------------------
/configs/model_config/llama-2-13b-chat-awq-4bit-seed2_trust.yaml:
--------------------------------------------------------------------------------
 1 | model: "hf/efficient-llm/llama-2-13b-chat-awq-128example-randomseed@4bit_128g_2seed"
 2 | type: CHAT
 3 | conv_template: "llama-2_trust"
 4 | 
 5 | model_loader: HF
 6 | torch_dtype: BFLOAT16
 7 | quant_file: null  # For AutoAWQ
 8 | tokenizer_name: "meta-llama/Llama-2-13b-chat-hf"
 9 | trust_remote_code: true
10 | use_auth_token: true
11 | disable_exllama: true
12 | 


--------------------------------------------------------------------------------
/configs/model_config/llama-2-13b-chat-gptq-3bit-cal1024-seed2.yaml:
--------------------------------------------------------------------------------
 1 | model: "hf/efficient-llm/llama-2-13b-chat-gptq-larger-caliset-llama2-temp@3bit_1024g_2seed"
 2 | type: CHAT
 3 | conv_template: "llama-2"
 4 | 
 5 | model_loader: GPTQ
 6 | torch_dtype: null
 7 | quant_file: null  # For AutoAWQ
 8 | tokenizer_name: "meta-llama/Llama-2-13b-chat-hf"
 9 | trust_remote_code: true
10 | use_auth_token: true
11 | disable_exllama: true
12 | 


--------------------------------------------------------------------------------
/configs/model_config/llama-2-13b-chat-gptq-3bit-cal128-seed0.yaml:
--------------------------------------------------------------------------------
 1 | model: "hf/efficient-llm/llama-2-13b-chat-gptq-larger-caliset-llama2-temp@3bit_128g_0seed"
 2 | type: CHAT
 3 | conv_template: "llama-2"
 4 | 
 5 | model_loader: GPTQ
 6 | torch_dtype: null
 7 | quant_file: null  # For AutoAWQ
 8 | tokenizer_name: "meta-llama/Llama-2-13b-chat-hf"
 9 | trust_remote_code: true
10 | use_auth_token: true
11 | disable_exllama: true
12 | 


--------------------------------------------------------------------------------
/configs/model_config/llama-2-13b-chat-gptq-3bit-cal128-seed1.yaml:
--------------------------------------------------------------------------------
 1 | model: "hf/efficient-llm/llama-2-13b-chat-gptq-larger-caliset-llama2-temp@3bit_128g_1seed"
 2 | type: CHAT
 3 | conv_template: "llama-2"
 4 | 
 5 | model_loader: GPTQ
 6 | torch_dtype: null
 7 | quant_file: null  # For AutoAWQ
 8 | tokenizer_name: "meta-llama/Llama-2-13b-chat-hf"
 9 | trust_remote_code: true
10 | use_auth_token: true
11 | disable_exllama: true
12 | 


--------------------------------------------------------------------------------
/configs/model_config/llama-2-13b-chat-gptq-3bit-cal128-seed2.yaml:
--------------------------------------------------------------------------------
 1 | model: "hf/efficient-llm/llama-2-13b-chat-gptq-larger-caliset-llama2-temp@3bit_128g_2seed"
 2 | type: CHAT
 3 | conv_template: "llama-2"
 4 | 
 5 | model_loader: GPTQ
 6 | torch_dtype: null
 7 | quant_file: null  # For AutoAWQ
 8 | tokenizer_name: "meta-llama/Llama-2-13b-chat-hf"
 9 | trust_remote_code: true
10 | use_auth_token: true
11 | disable_exllama: true
12 | 


--------------------------------------------------------------------------------
/configs/model_config/llama-2-13b-chat-gptq-4bit-cal128-seed0.yaml:
--------------------------------------------------------------------------------
 1 | model: "hf/efficient-llm/llama-2-13b-chat-gptq-larger-caliset-llama2-temp@4bit_128g_0seed"
 2 | type: CHAT
 3 | conv_template: "llama-2"
 4 | 
 5 | model_loader: GPTQ
 6 | torch_dtype: null
 7 | quant_file: null  # For AutoAWQ
 8 | tokenizer_name: "meta-llama/Llama-2-13b-chat-hf"
 9 | trust_remote_code: true
10 | use_auth_token: true
11 | disable_exllama: true
12 | 


--------------------------------------------------------------------------------
/configs/model_config/llama-2-13b-chat-gptq-4bit-cal128-seed1.yaml:
--------------------------------------------------------------------------------
 1 | model: "hf/efficient-llm/llama-2-13b-chat-gptq-larger-caliset-llama2-temp@4bit_128g_1seed"
 2 | type: CHAT
 3 | conv_template: "llama-2"
 4 | 
 5 | model_loader: GPTQ
 6 | torch_dtype: null
 7 | quant_file: null  # For AutoAWQ
 8 | tokenizer_name: "meta-llama/Llama-2-13b-chat-hf"
 9 | trust_remote_code: true
10 | use_auth_token: true
11 | disable_exllama: true
12 | 


--------------------------------------------------------------------------------
/configs/model_config/llama-2-13b-chat-gptq-4bit-cal128-seed2.yaml:
--------------------------------------------------------------------------------
 1 | model: "hf/efficient-llm/llama-2-13b-chat-gptq-larger-caliset-llama2-temp@4bit_128g_2seed"
 2 | type: CHAT
 3 | conv_template: "llama-2"
 4 | 
 5 | model_loader: GPTQ
 6 | torch_dtype: null
 7 | quant_file: null  # For AutoAWQ
 8 | tokenizer_name: "meta-llama/Llama-2-13b-chat-hf"
 9 | trust_remote_code: true
10 | use_auth_token: true
11 | disable_exllama: true
12 | 


--------------------------------------------------------------------------------
/configs/model_config/llama-2-13b-chat-gptq-4bit-cal256-seed0.yaml:
--------------------------------------------------------------------------------
 1 | model: "hf/efficient-llm/llama-2-13b-chat-gptq-larger-caliset-llama2-temp@4bit_256g_0seed"
 2 | type: CHAT
 3 | conv_template: "llama-2"
 4 | 
 5 | model_loader: GPTQ
 6 | torch_dtype: null
 7 | quant_file: null  # For AutoAWQ
 8 | tokenizer_name: "meta-llama/Llama-2-13b-chat-hf"
 9 | trust_remote_code: true
10 | use_auth_token: true
11 | disable_exllama: true
12 | 


--------------------------------------------------------------------------------
/configs/model_config/llama-2-13b-chat-gptq-3bit-cal128-seed0-fact.yaml:
--------------------------------------------------------------------------------
 1 | model: "hf/efficient-llm/llama-2-13b-chat-gptq-larger-caliset-llama2-temp@3bit_128g_0seed"
 2 | type: CHAT
 3 | conv_template: "llama-2_fact"
 4 | 
 5 | model_loader: GPTQ
 6 | torch_dtype: null
 7 | quant_file: null  # For AutoAWQ
 8 | tokenizer_name: "meta-llama/Llama-2-13b-chat-hf"
 9 | trust_remote_code: true
10 | use_auth_token: true
11 | disable_exllama: true
12 | 


--------------------------------------------------------------------------------
/configs/model_config/llama-2-13b-chat-gptq-3bit-cal128-seed0-no_sys.yaml:
--------------------------------------------------------------------------------
 1 | model: "hf/efficient-llm/llama-2-13b-chat-gptq-larger-caliset-llama2-temp@3bit_128g_0seed"
 2 | type: CHAT
 3 | conv_template: "llama-2_no_sys"
 4 | 
 5 | model_loader: GPTQ
 6 | torch_dtype: null
 7 | quant_file: null  # For AutoAWQ
 8 | tokenizer_name: "meta-llama/Llama-2-13b-chat-hf"
 9 | trust_remote_code: true
10 | use_auth_token: true
11 | disable_exllama: true
12 | 


--------------------------------------------------------------------------------
/configs/model_config/llama-2-13b-chat-gptq-3bit-cal128-seed0-trust.yaml:
--------------------------------------------------------------------------------
 1 | model: "hf/efficient-llm/llama-2-13b-chat-gptq-larger-caliset-llama2-temp@3bit_128g_0seed"
 2 | type: CHAT
 3 | conv_template: "llama-2_trust"
 4 | 
 5 | model_loader: GPTQ
 6 | torch_dtype: null
 7 | quant_file: null  # For AutoAWQ
 8 | tokenizer_name: "meta-llama/Llama-2-13b-chat-hf"
 9 | trust_remote_code: true
10 | use_auth_token: true
11 | disable_exllama: true
12 | 


--------------------------------------------------------------------------------
/configs/model_config/llama-2-13b-chat-gptq-3bit-cal128-seed1-fact.yaml:
--------------------------------------------------------------------------------
 1 | model: "hf/efficient-llm/llama-2-13b-chat-gptq-larger-caliset-llama2-temp@3bit_128g_1seed"
 2 | type: CHAT
 3 | conv_template: "llama-2_fact"
 4 | 
 5 | model_loader: GPTQ
 6 | torch_dtype: null
 7 | quant_file: null  # For AutoAWQ
 8 | tokenizer_name: "meta-llama/Llama-2-13b-chat-hf"
 9 | trust_remote_code: true
10 | use_auth_token: true
11 | disable_exllama: true
12 | 


--------------------------------------------------------------------------------
/configs/model_config/llama-2-13b-chat-gptq-3bit-cal128-seed1-no_sys.yaml:
--------------------------------------------------------------------------------
 1 | model: "hf/efficient-llm/llama-2-13b-chat-gptq-larger-caliset-llama2-temp@3bit_128g_1seed"
 2 | type: CHAT
 3 | conv_template: "llama-2_no_sys"
 4 | 
 5 | model_loader: GPTQ
 6 | torch_dtype: null
 7 | quant_file: null  # For AutoAWQ
 8 | tokenizer_name: "meta-llama/Llama-2-13b-chat-hf"
 9 | trust_remote_code: true
10 | use_auth_token: true
11 | disable_exllama: true
12 | 


--------------------------------------------------------------------------------
/configs/model_config/llama-2-13b-chat-gptq-3bit-cal128-seed1-trust.yaml:
--------------------------------------------------------------------------------
 1 | model: "hf/efficient-llm/llama-2-13b-chat-gptq-larger-caliset-llama2-temp@3bit_128g_1seed"
 2 | type: CHAT
 3 | conv_template: "llama-2_trust"
 4 | 
 5 | model_loader: GPTQ
 6 | torch_dtype: null
 7 | quant_file: null  # For AutoAWQ
 8 | tokenizer_name: "meta-llama/Llama-2-13b-chat-hf"
 9 | trust_remote_code: true
10 | use_auth_token: true
11 | disable_exllama: true
12 | 


--------------------------------------------------------------------------------
/configs/model_config/llama-2-13b-chat-gptq-3bit-cal128-seed2-fact.yaml:
--------------------------------------------------------------------------------
 1 | model: "hf/efficient-llm/llama-2-13b-chat-gptq-larger-caliset-llama2-temp@3bit_128g_2seed"
 2 | type: CHAT
 3 | conv_template: "llama-2_fact"
 4 | 
 5 | model_loader: GPTQ
 6 | torch_dtype: null
 7 | quant_file: null  # For AutoAWQ
 8 | tokenizer_name: "meta-llama/Llama-2-13b-chat-hf"
 9 | trust_remote_code: true
10 | use_auth_token: true
11 | disable_exllama: true
12 | 


--------------------------------------------------------------------------------
/configs/model_config/llama-2-13b-chat-gptq-3bit-cal128-seed2-no_sys.yaml:
--------------------------------------------------------------------------------
 1 | model: "hf/efficient-llm/llama-2-13b-chat-gptq-larger-caliset-llama2-temp@3bit_128g_2seed"
 2 | type: CHAT
 3 | conv_template: "llama-2_no_sys"
 4 | 
 5 | model_loader: GPTQ
 6 | torch_dtype: null
 7 | quant_file: null  # For AutoAWQ
 8 | tokenizer_name: "meta-llama/Llama-2-13b-chat-hf"
 9 | trust_remote_code: true
10 | use_auth_token: true
11 | disable_exllama: true
12 | 


--------------------------------------------------------------------------------
/configs/model_config/llama-2-13b-chat-gptq-3bit-cal128-seed2-trust.yaml:
--------------------------------------------------------------------------------
 1 | model: "hf/efficient-llm/llama-2-13b-chat-gptq-larger-caliset-llama2-temp@3bit_128g_2seed"
 2 | type: CHAT
 3 | conv_template: "llama-2_trust"
 4 | 
 5 | model_loader: GPTQ
 6 | torch_dtype: null
 7 | quant_file: null  # For AutoAWQ
 8 | tokenizer_name: "meta-llama/Llama-2-13b-chat-hf"
 9 | trust_remote_code: true
10 | use_auth_token: true
11 | disable_exllama: true
12 | 


--------------------------------------------------------------------------------
/configs/model_config/llama-2-13b-chat-gptq-4bit-cal128-seed0_fact.yaml:
--------------------------------------------------------------------------------
 1 | model: "hf/efficient-llm/llama-2-13b-chat-gptq-larger-caliset-llama2-temp@4bit_128g_0seed"
 2 | type: CHAT
 3 | conv_template: "llama-2_fact"
 4 | 
 5 | model_loader: GPTQ
 6 | torch_dtype: null
 7 | quant_file: null  # For AutoAWQ
 8 | tokenizer_name: "meta-llama/Llama-2-13b-chat-hf"
 9 | trust_remote_code: true
10 | use_auth_token: true
11 | disable_exllama: true
12 | 


--------------------------------------------------------------------------------
/configs/model_config/llama-2-13b-chat-gptq-4bit-cal128-seed0_nosys.yaml:
--------------------------------------------------------------------------------
 1 | model: "hf/efficient-llm/llama-2-13b-chat-gptq-larger-caliset-llama2-temp@4bit_128g_0seed"
 2 | type: CHAT
 3 | conv_template: "llama-2_no_sys"
 4 | 
 5 | model_loader: GPTQ
 6 | torch_dtype: null
 7 | quant_file: null  # For AutoAWQ
 8 | tokenizer_name: "meta-llama/Llama-2-13b-chat-hf"
 9 | trust_remote_code: true
10 | use_auth_token: true
11 | disable_exllama: true
12 | 


--------------------------------------------------------------------------------
/configs/model_config/llama-2-13b-chat-gptq-4bit-cal128-seed0_trust.yaml:
--------------------------------------------------------------------------------
 1 | model: "hf/efficient-llm/llama-2-13b-chat-gptq-larger-caliset-llama2-temp@4bit_128g_0seed"
 2 | type: CHAT
 3 | conv_template: "llama-2_trust"
 4 | 
 5 | model_loader: GPTQ
 6 | torch_dtype: null
 7 | quant_file: null  # For AutoAWQ
 8 | tokenizer_name: "meta-llama/Llama-2-13b-chat-hf"
 9 | trust_remote_code: true
10 | use_auth_token: true
11 | disable_exllama: true
12 | 


--------------------------------------------------------------------------------
/configs/model_config/llama-2-13b-chat-gptq-4bit-cal128-seed1_fact.yaml:
--------------------------------------------------------------------------------
 1 | model: "hf/efficient-llm/llama-2-13b-chat-gptq-larger-caliset-llama2-temp@4bit_128g_1seed"
 2 | type: CHAT
 3 | conv_template: "llama-2_fact"
 4 | 
 5 | model_loader: GPTQ
 6 | torch_dtype: null
 7 | quant_file: null  # For AutoAWQ
 8 | tokenizer_name: "meta-llama/Llama-2-13b-chat-hf"
 9 | trust_remote_code: true
10 | use_auth_token: true
11 | disable_exllama: true
12 | 


--------------------------------------------------------------------------------
/configs/model_config/llama-2-13b-chat-gptq-4bit-cal128-seed1_nosys.yaml:
--------------------------------------------------------------------------------
 1 | model: "hf/efficient-llm/llama-2-13b-chat-gptq-larger-caliset-llama2-temp@4bit_128g_1seed"
 2 | type: CHAT
 3 | conv_template: "llama-2_no_sys"
 4 | 
 5 | model_loader: GPTQ
 6 | torch_dtype: null
 7 | quant_file: null  # For AutoAWQ
 8 | tokenizer_name: "meta-llama/Llama-2-13b-chat-hf"
 9 | trust_remote_code: true
10 | use_auth_token: true
11 | disable_exllama: true
12 | 


--------------------------------------------------------------------------------
/configs/model_config/llama-2-13b-chat-gptq-4bit-cal128-seed1_trust.yaml:
--------------------------------------------------------------------------------
 1 | model: "hf/efficient-llm/llama-2-13b-chat-gptq-larger-caliset-llama2-temp@4bit_128g_1seed"
 2 | type: CHAT
 3 | conv_template: "llama-2_trust"
 4 | 
 5 | model_loader: GPTQ
 6 | torch_dtype: null
 7 | quant_file: null  # For AutoAWQ
 8 | tokenizer_name: "meta-llama/Llama-2-13b-chat-hf"
 9 | trust_remote_code: true
10 | use_auth_token: true
11 | disable_exllama: true
12 | 


--------------------------------------------------------------------------------
/configs/model_config/llama-2-13b-chat-gptq-4bit-cal128-seed2_fact.yaml:
--------------------------------------------------------------------------------
 1 | model: "hf/efficient-llm/llama-2-13b-chat-gptq-larger-caliset-llama2-temp@4bit_128g_1seed"
 2 | type: CHAT
 3 | conv_template: "llama-2_fact"
 4 | 
 5 | model_loader: GPTQ
 6 | torch_dtype: null
 7 | quant_file: null  # For AutoAWQ
 8 | tokenizer_name: "meta-llama/Llama-2-13b-chat-hf"
 9 | trust_remote_code: true
10 | use_auth_token: true
11 | disable_exllama: true
12 | 


--------------------------------------------------------------------------------
/configs/model_config/llama-2-13b-chat-gptq-4bit-cal128-seed2_nosys.yaml:
--------------------------------------------------------------------------------
 1 | model: "hf/efficient-llm/llama-2-13b-chat-gptq-larger-caliset-llama2-temp@4bit_128g_2seed"
 2 | type: CHAT
 3 | conv_template: "llama-2_no_sys"
 4 | 
 5 | model_loader: GPTQ
 6 | torch_dtype: null
 7 | quant_file: null  # For AutoAWQ
 8 | tokenizer_name: "meta-llama/Llama-2-13b-chat-hf"
 9 | trust_remote_code: true
10 | use_auth_token: true
11 | disable_exllama: true
12 | 


--------------------------------------------------------------------------------
/configs/model_config/llama-2-13b-chat-gptq-4bit-cal128-seed2_trust.yaml:
--------------------------------------------------------------------------------
 1 | model: "hf/efficient-llm/llama-2-13b-chat-gptq-larger-caliset-llama2-temp@4bit_128g_2seed"
 2 | type: CHAT
 3 | conv_template: "llama-2_trust"
 4 | 
 5 | model_loader: GPTQ
 6 | torch_dtype: null
 7 | quant_file: null  # For AutoAWQ
 8 | tokenizer_name: "meta-llama/Llama-2-13b-chat-hf"
 9 | trust_remote_code: true
10 | use_auth_token: true
11 | disable_exllama: true
12 | 


--------------------------------------------------------------------------------
/configs/model_config/vicuna-7b-v1.3.yaml:
--------------------------------------------------------------------------------
 1 | model: "hf/lmsys/vicuna-7b-v1.3"
 2 | type: CHAT
 3 | conv_template: "vicuna_v1.1"
 4 | 
 5 | model_loader: HF
 6 | torch_dtype: BFLOAT16
 7 | quant_file: null
 8 | tokenizer_name: lmsys/vicuna-7b-v1.3
 9 | trust_remote_code: true
10 | use_auth_token: true
11 | 
12 | # dt-run --config-name="config" +advglue=benign ++model_config.model="$MODEL" ++model_config.conv_template="vicuna_v1.1" ++model_config.model_loader=HF ++model_config.tokenizer_name=lmsys/vicuna-13b-v1.3 ++model_config.torch_dtype=null ++advglue.task=\[sst2\]


--------------------------------------------------------------------------------
/configs/model_config/vicuna-13b-v1.3.yaml:
--------------------------------------------------------------------------------
 1 | model: "hf/lmsys/vicuna-13b-v1.3"
 2 | type: CHAT
 3 | conv_template: "vicuna_v1.1"
 4 | 
 5 | model_loader: HF
 6 | torch_dtype: BFLOAT16
 7 | quant_file: null
 8 | tokenizer_name: lmsys/vicuna-13b-v1.3
 9 | trust_remote_code: true
10 | use_auth_token: true
11 | 
12 | # dt-run --config-name="config" +advglue=benign ++model_config.model="$MODEL" ++model_config.conv_template="vicuna_v1.1" ++model_config.model_loader=HF ++model_config.tokenizer_name=lmsys/vicuna-13b-v1.3 ++model_config.torch_dtype=null ++advglue.task=\[sst2\]


--------------------------------------------------------------------------------
/configs/model_config/llama-2-13b-chat.yaml:
--------------------------------------------------------------------------------
 1 | model: "hf/meta-llama/Llama-2-13b-chat-hf"
 2 | type: CHAT
 3 | conv_template: "llama-2"
 4 | 
 5 | model_loader: HF
 6 | torch_dtype: BFLOAT16
 7 | quant_file: null
 8 | tokenizer_name: meta-llama/Llama-2-13b-chat-hf
 9 | trust_remote_code: true
10 | use_auth_token: true
11 | 
12 | # dt-run --config-name="config" +advglue=benign ++model_config.model="$MODEL" ++model_config.conv_template="vicuna_v1.1" ++model_config.model_loader=HF ++model_config.tokenizer_name=lmsys/vicuna-13b-v1.3 ++model_config.torch_dtype=null ++advglue.task=\[sst2\]


--------------------------------------------------------------------------------
/configs/model_config/llama-2-7b.yaml:
--------------------------------------------------------------------------------
 1 | model: "hf/meta-llama/Llama-2-7b-hf"
 2 | type: CHAT
 3 | conv_template: "redpajama-incite-instruct"
 4 | 
 5 | model_loader: HF
 6 | torch_dtype: BFLOAT16
 7 | quant_file: null
 8 | tokenizer_name: meta-llama/Llama-2-7b-hf
 9 | trust_remote_code: true
10 | use_auth_token: true
11 | 
12 | # dt-run --config-name="config" +advglue=benign ++model_config.model="$MODEL" ++model_config.conv_template="vicuna_v1.1" ++model_config.model_loader=HF ++model_config.tokenizer_name=lmsys/vicuna-13b-v1.3 ++model_config.torch_dtype=null ++advglue.task=\[sst2\]


--------------------------------------------------------------------------------
/configs/model_config/llama-2-13b.yaml:
--------------------------------------------------------------------------------
 1 | model: "hf/meta-llama/Llama-2-13b-hf"
 2 | type: CHAT
 3 | conv_template: "redpajama-incite-instruct"
 4 | 
 5 | model_loader: HF
 6 | torch_dtype: BFLOAT16
 7 | quant_file: null
 8 | tokenizer_name: meta-llama/Llama-2-13b-hf
 9 | trust_remote_code: true
10 | use_auth_token: true
11 | 
12 | # dt-run --config-name="config" +advglue=benign ++model_config.model="$MODEL" ++model_config.conv_template="vicuna_v1.1" ++model_config.model_loader=HF ++model_config.tokenizer_name=lmsys/vicuna-13b-v1.3 ++model_config.torch_dtype=null ++advglue.task=\[sst2\]


--------------------------------------------------------------------------------
/configs/model_config/llama-2-70b-chat.yaml:
--------------------------------------------------------------------------------
 1 | model: "hf/meta-llama/Llama-2-70b-chat-hf"
 2 | type: CHAT
 3 | conv_template: "llama-2"
 4 | 
 5 | model_loader: HF
 6 | torch_dtype: BFLOAT16
 7 | quant_file: null
 8 | tokenizer_name: meta-llama/Llama-2-70b-chat-hf
 9 | trust_remote_code: true
10 | use_auth_token: true
11 | 
12 | # dt-run --config-name="config" +advglue=benign ++model_config.model="$MODEL" ++model_config.conv_template="vicuna_v1.1" ++model_config.model_loader=HF ++model_config.tokenizer_name=lmsys/vicuna-13b-v1.3 ++model_config.torch_dtype=null ++advglue.task=\[sst2\]
13 | 


--------------------------------------------------------------------------------
/configs/model_config/llama-2-7b-chat.yaml:
--------------------------------------------------------------------------------
 1 | model: "hf/meta-llama/Llama-2-7b-chat-hf"
 2 | type: CHAT
 3 | conv_template: "llama-2"
 4 | 
 5 | model_loader: HF
 6 | torch_dtype: BFLOAT16
 7 | quant_file: null
 8 | tokenizer_name: meta-llama/Llama-2-7b-chat-hf
 9 | trust_remote_code: true
10 | use_auth_token: true
11 | 
12 | # dt-run --config-name="config" +advglue=benign ++model_config.model="$MODEL" ++model_config.conv_template="vicuna_v1.1" ++model_config.model_loader=HF ++model_config.tokenizer_name=lmsys/vicuna-13b-v1.3 ++model_config.torch_dtype=null ++advglue.task=\[sst2\]
13 | 


--------------------------------------------------------------------------------
/configs/model_config/llama-2-13b-chat_nosys.yaml:
--------------------------------------------------------------------------------
 1 | model: "hf/meta-llama/Llama-2-13b-chat-hf"
 2 | type: CHAT
 3 | conv_template: "llama-2_no_sys"
 4 | 
 5 | model_loader: HF
 6 | torch_dtype: BFLOAT16
 7 | quant_file: null
 8 | tokenizer_name: meta-llama/Llama-2-13b-chat-hf
 9 | trust_remote_code: true
10 | use_auth_token: true
11 | 
12 | # dt-run --config-name="config" +advglue=benign ++model_config.model="$MODEL" ++model_config.conv_template="vicuna_v1.1" ++model_config.model_loader=HF ++model_config.tokenizer_name=lmsys/vicuna-13b-v1.3 ++model_config.torch_dtype=null ++advglue.task=\[sst2\]
13 | 


--------------------------------------------------------------------------------
/configs/model_config/llama-2-13b-chat_trust.yaml:
--------------------------------------------------------------------------------
 1 | model: "hf/meta-llama/Llama-2-13b-chat-hf"
 2 | type: CHAT
 3 | conv_template: "llama-2_trust"
 4 | 
 5 | model_loader: HF
 6 | torch_dtype: BFLOAT16
 7 | quant_file: null
 8 | tokenizer_name: meta-llama/Llama-2-13b-chat-hf
 9 | trust_remote_code: true
10 | use_auth_token: true
11 | 
12 | # dt-run --config-name="config" +advglue=benign ++model_config.model="$MODEL" ++model_config.conv_template="vicuna_v1.1" ++model_config.model_loader=HF ++model_config.tokenizer_name=lmsys/vicuna-13b-v1.3 ++model_config.torch_dtype=null ++advglue.task=\[sst2\]
13 | 


--------------------------------------------------------------------------------
/configs/model_config/vicuna-13b-v1.3-wanda_2to4.yaml:
--------------------------------------------------------------------------------
 1 | model: "hf/compressed-llm/vicuna-13b-v1.3-wanda@0.5_2to4"
 2 | type: CHAT
 3 | conv_template: "vicuna_v1.1"
 4 | 
 5 | model_loader: HF
 6 | torch_dtype: BFLOAT16
 7 | quant_file: null
 8 | tokenizer_name: lmsys/vicuna-13b-v1.3
 9 | trust_remote_code: true
10 | use_auth_token: true
11 | 
12 | # dt-run --config-name="config" +advglue=benign ++model_config.model="$MODEL" ++model_config.conv_template="vicuna_v1.1" ++model_config.model_loader=HF ++model_config.tokenizer_name=lmsys/vicuna-13b-v1.3 ++model_config.torch_dtype=null ++advglue.task=\[sst2\]


--------------------------------------------------------------------------------
/configs/model_config/llama-2-13b-wanda_2to4.yaml:
--------------------------------------------------------------------------------
 1 | model: "hf/compressed-llm/llama-2-13b-wanda@0.5_2to4"
 2 | type: CHAT
 3 | conv_template: "redpajama-incite-instruct"
 4 | 
 5 | model_loader: HF
 6 | torch_dtype: BFLOAT16
 7 | quant_file: null
 8 | tokenizer_name: meta-llama/Llama-2-13b-hf
 9 | trust_remote_code: true
10 | use_auth_token: true
11 | 
12 | # dt-run --config-name="config" +advglue=benign ++model_config.model="$MODEL" ++model_config.conv_template="vicuna_v1.1" ++model_config.model_loader=HF ++model_config.tokenizer_name=lmsys/vicuna-13b-v1.3 ++model_config.torch_dtype=null ++advglue.task=\[sst2\]


--------------------------------------------------------------------------------
/configs/model_config/llama-2-13b-chat-wanda_2to4.yaml:
--------------------------------------------------------------------------------
 1 | model: "hf/compressed-llm/llama-2-13b-chat-wanda@0.5_2to4"
 2 | type: CHAT
 3 | conv_template: "llama-2"
 4 | 
 5 | model_loader: HF
 6 | torch_dtype: BFLOAT16
 7 | quant_file: null
 8 | tokenizer_name: meta-llama/Llama-2-13b-chat-hf
 9 | trust_remote_code: true
10 | use_auth_token: true
11 | 
12 | # dt-run --config-name="config" +advglue=benign ++model_config.model="$MODEL" ++model_config.conv_template="vicuna_v1.1" ++model_config.model_loader=HF ++model_config.tokenizer_name=lmsys/vicuna-13b-v1.3 ++model_config.torch_dtype=null ++advglue.task=\[sst2\]
13 | 


--------------------------------------------------------------------------------
/configs/model_config/vicuna-13b-v1.3-mag_2to4.yaml:
--------------------------------------------------------------------------------
 1 | model: "hf/compressed-llm/vicuna-13b-v1.3-magnitude-semistruct@0.5_2to4"
 2 | type: CHAT
 3 | conv_template: "vicuna_v1.1"
 4 | 
 5 | model_loader: HF
 6 | torch_dtype: BFLOAT16
 7 | quant_file: null
 8 | tokenizer_name: lmsys/vicuna-13b-v1.3
 9 | trust_remote_code: true
10 | use_auth_token: true
11 | 
12 | # dt-run --config-name="config" +advglue=benign ++model_config.model="$MODEL" ++model_config.conv_template="vicuna_v1.1" ++model_config.model_loader=HF ++model_config.tokenizer_name=lmsys/vicuna-13b-v1.3 ++model_config.torch_dtype=null ++advglue.task=\[sst2\]


--------------------------------------------------------------------------------
/configs/model_config/llama-2-13b-chat-mag_1to2.yaml:
--------------------------------------------------------------------------------
 1 | model: "hf/compressed-llm/llama-2-13b-chat-magnitude-semistruct@0.5_1to2"
 2 | type: CHAT
 3 | conv_template: "llama-2"
 4 | 
 5 | model_loader: HF
 6 | torch_dtype: BFLOAT16
 7 | quant_file: null
 8 | tokenizer_name: meta-llama/Llama-2-13b-chat-hf
 9 | trust_remote_code: true
10 | use_auth_token: true
11 | 
12 | # dt-run --config-name="config" +advglue=benign ++model_config.model="$MODEL" ++model_config.conv_template="vicuna_v1.1" ++model_config.model_loader=HF ++model_config.tokenizer_name=lmsys/vicuna-13b-v1.3 ++model_config.torch_dtype=null ++advglue.task=\[sst2\]


--------------------------------------------------------------------------------
/configs/model_config/llama-2-13b-chat-mag_2to4.yaml:
--------------------------------------------------------------------------------
 1 | model: "hf/compressed-llm/llama-2-13b-chat-magnitude-semistruct@0.5_2to4"
 2 | type: CHAT
 3 | conv_template: "llama-2"
 4 | 
 5 | model_loader: HF
 6 | torch_dtype: BFLOAT16
 7 | quant_file: null
 8 | tokenizer_name: meta-llama/Llama-2-13b-chat-hf
 9 | trust_remote_code: true
10 | use_auth_token: true
11 | 
12 | # dt-run --config-name="config" +advglue=benign ++model_config.model="$MODEL" ++model_config.conv_template="vicuna_v1.1" ++model_config.model_loader=HF ++model_config.tokenizer_name=lmsys/vicuna-13b-v1.3 ++model_config.torch_dtype=null ++advglue.task=\[sst2\]


--------------------------------------------------------------------------------
/configs/model_config/llama-2-13b-chat-mag_4to8.yaml:
--------------------------------------------------------------------------------
 1 | model: "hf/compressed-llm/llama-2-13b-chat-magnitude-semistruct@0.5_4to8"
 2 | type: CHAT
 3 | conv_template: "llama-2"
 4 | 
 5 | model_loader: HF
 6 | torch_dtype: BFLOAT16
 7 | quant_file: null
 8 | tokenizer_name: meta-llama/Llama-2-13b-chat-hf
 9 | trust_remote_code: true
10 | use_auth_token: true
11 | 
12 | # dt-run --config-name="config" +advglue=benign ++model_config.model="$MODEL" ++model_config.conv_template="vicuna_v1.1" ++model_config.model_loader=HF ++model_config.tokenizer_name=lmsys/vicuna-13b-v1.3 ++model_config.torch_dtype=null ++advglue.task=\[sst2\]


--------------------------------------------------------------------------------
/configs/model_config/llama-2-13b-chat-wanda_1to2.yaml:
--------------------------------------------------------------------------------
 1 | model: "hf/compressed-llm/llama-2-13b-chat-wanda-semistruct@0.5_1to2"
 2 | type: CHAT
 3 | conv_template: "llama-2"
 4 | 
 5 | model_loader: HF
 6 | torch_dtype: BFLOAT16
 7 | quant_file: null
 8 | tokenizer_name: meta-llama/Llama-2-13b-chat-hf
 9 | trust_remote_code: true
10 | use_auth_token: true
11 | 
12 | # dt-run --config-name="config" +advglue=benign ++model_config.model="$MODEL" ++model_config.conv_template="vicuna_v1.1" ++model_config.model_loader=HF ++model_config.tokenizer_name=lmsys/vicuna-13b-v1.3 ++model_config.torch_dtype=null ++advglue.task=\[sst2\]


--------------------------------------------------------------------------------
/configs/model_config/llama-2-13b-chat-wanda_4to8.yaml:
--------------------------------------------------------------------------------
 1 | model: "hf/compressed-llm/llama-2-13b-chat-wanda-semistruct@0.5_4to8"
 2 | type: CHAT
 3 | conv_template: "llama-2"
 4 | 
 5 | model_loader: HF
 6 | torch_dtype: BFLOAT16
 7 | quant_file: null
 8 | tokenizer_name: meta-llama/Llama-2-13b-chat-hf
 9 | trust_remote_code: true
10 | use_auth_token: true
11 | 
12 | # dt-run --config-name="config" +advglue=benign ++model_config.model="$MODEL" ++model_config.conv_template="vicuna_v1.1" ++model_config.model_loader=HF ++model_config.tokenizer_name=lmsys/vicuna-13b-v1.3 ++model_config.torch_dtype=null ++advglue.task=\[sst2\]


--------------------------------------------------------------------------------
/configs/model_config/vicuna-13b-v1.3-sparsegpt_2to4.yaml:
--------------------------------------------------------------------------------
 1 | model: "hf/compressed-llm/vicuna-13b-v1.3-sparsegpt-semistruct@0.5_2to4"
 2 | type: CHAT
 3 | conv_template: "vicuna_v1.1"
 4 | 
 5 | model_loader: HF
 6 | torch_dtype: BFLOAT16
 7 | quant_file: null
 8 | tokenizer_name: lmsys/vicuna-13b-v1.3
 9 | trust_remote_code: true
10 | use_auth_token: true
11 | 
12 | # dt-run --config-name="config" +advglue=benign ++model_config.model="$MODEL" ++model_config.conv_template="vicuna_v1.1" ++model_config.model_loader=HF ++model_config.tokenizer_name=lmsys/vicuna-13b-v1.3 ++model_config.torch_dtype=null ++advglue.task=\[sst2\]


--------------------------------------------------------------------------------
/configs/model_config/llama-2-13b-chat-sparsegpt_1to2.yaml:
--------------------------------------------------------------------------------
 1 | model: "hf/compressed-llm/llama-2-13b-chat-sparsegpt-semistruct@0.5_1to2"
 2 | type: CHAT
 3 | conv_template: "llama-2"
 4 | 
 5 | model_loader: HF
 6 | torch_dtype: BFLOAT16
 7 | quant_file: null
 8 | tokenizer_name: meta-llama/Llama-2-13b-chat-hf
 9 | trust_remote_code: true
10 | use_auth_token: true
11 | 
12 | # dt-run --config-name="config" +advglue=benign ++model_config.model="$MODEL" ++model_config.conv_template="vicuna_v1.1" ++model_config.model_loader=HF ++model_config.tokenizer_name=lmsys/vicuna-13b-v1.3 ++model_config.torch_dtype=null ++advglue.task=\[sst2\]


--------------------------------------------------------------------------------
/configs/model_config/llama-2-13b-chat-sparsegpt_2to4.yaml:
--------------------------------------------------------------------------------
 1 | model: "hf/compressed-llm/llama-2-13b-chat-sparsegpt-semistruct@0.5_2to4"
 2 | type: CHAT
 3 | conv_template: "llama-2"
 4 | 
 5 | model_loader: HF
 6 | torch_dtype: BFLOAT16
 7 | quant_file: null
 8 | tokenizer_name: meta-llama/Llama-2-13b-chat-hf
 9 | trust_remote_code: true
10 | use_auth_token: true
11 | 
12 | # dt-run --config-name="config" +advglue=benign ++model_config.model="$MODEL" ++model_config.conv_template="vicuna_v1.1" ++model_config.model_loader=HF ++model_config.tokenizer_name=lmsys/vicuna-13b-v1.3 ++model_config.torch_dtype=null ++advglue.task=\[sst2\]


--------------------------------------------------------------------------------
/configs/model_config/llama-2-13b-chat-sparsegpt_4to8.yaml:
--------------------------------------------------------------------------------
 1 | model: "hf/compressed-llm/llama-2-13b-chat-sparsegpt-semistruct@0.5_4to8"
 2 | type: CHAT
 3 | conv_template: "llama-2"
 4 | 
 5 | model_loader: HF
 6 | torch_dtype: BFLOAT16
 7 | quant_file: null
 8 | tokenizer_name: meta-llama/Llama-2-13b-chat-hf
 9 | trust_remote_code: true
10 | use_auth_token: true
11 | 
12 | # dt-run --config-name="config" +advglue=benign ++model_config.model="$MODEL" ++model_config.conv_template="vicuna_v1.1" ++model_config.model_loader=HF ++model_config.tokenizer_name=lmsys/vicuna-13b-v1.3 ++model_config.torch_dtype=null ++advglue.task=\[sst2\]


--------------------------------------------------------------------------------
/configs/model_config/llama-2-13b-mag_2to4.yaml:
--------------------------------------------------------------------------------
 1 | model: "hf/compressed-llm/llama-2-13b-magnitude-semistruct@0.5_2to4"
 2 | type: CHAT
 3 | conv_template: "redpajama-incite-instruct"
 4 | 
 5 | model_loader: HF
 6 | torch_dtype: BFLOAT16
 7 | quant_file: null
 8 | tokenizer_name: meta-llama/Llama-2-13b-hf
 9 | trust_remote_code: true
10 | use_auth_token: true
11 | 
12 | # dt-run --config-name="config" +advglue=benign ++model_config.model="$MODEL" ++model_config.conv_template="vicuna_v1.1" ++model_config.model_loader=HF ++model_config.tokenizer_name=lmsys/vicuna-13b-v1.3 ++model_config.torch_dtype=null ++advglue.task=\[sst2\]


--------------------------------------------------------------------------------
/configs/model_config/llama-2-13b-sparsegpt_2to4.yaml:
--------------------------------------------------------------------------------
 1 | model: "hf/compressed-llm/llama-2-13b-sparsegpt-semistruct@0.5_2to4"
 2 | type: CHAT
 3 | conv_template: "redpajama-incite-instruct"
 4 | 
 5 | model_loader: HF
 6 | torch_dtype: BFLOAT16
 7 | quant_file: null
 8 | tokenizer_name: meta-llama/Llama-2-13b-hf
 9 | trust_remote_code: true
10 | use_auth_token: true
11 | 
12 | # dt-run --config-name="config" +advglue=benign ++model_config.model="$MODEL" ++model_config.conv_template="vicuna_v1.1" ++model_config.model_loader=HF ++model_config.tokenizer_name=lmsys/vicuna-13b-v1.3 ++model_config.torch_dtype=null ++advglue.task=\[sst2\]


--------------------------------------------------------------------------------
/configs/model_config/vicuna-13b-v1.3-sparsegpt_2to4-seed0.yaml:
--------------------------------------------------------------------------------
 1 | model: "hf/compressed-llm/vicuna-13b-v1.3-sparsegpt-semistruct@0.5_2to4_seed0"
 2 | type: CHAT
 3 | conv_template: "vicuna_v1.1"
 4 | 
 5 | model_loader: HF
 6 | torch_dtype: BFLOAT16
 7 | quant_file: null
 8 | tokenizer_name: lmsys/vicuna-13b-v1.3
 9 | trust_remote_code: true
10 | use_auth_token: true
11 | 
12 | # dt-run --config-name="config" +advglue=benign ++model_config.model="$MODEL" ++model_config.conv_template="vicuna_v1.1" ++model_config.model_loader=HF ++model_config.tokenizer_name=lmsys/vicuna-13b-v1.3 ++model_config.torch_dtype=null ++advglue.task=\[sst2\]
13 | 


--------------------------------------------------------------------------------
/configs/model_config/vicuna-13b-v1.3-sparsegpt_2to4-seed1.yaml:
--------------------------------------------------------------------------------
 1 | model: "hf/compressed-llm/vicuna-13b-v1.3-sparsegpt-semistruct@0.5_2to4_seed1"
 2 | type: CHAT
 3 | conv_template: "vicuna_v1.1"
 4 | 
 5 | model_loader: HF
 6 | torch_dtype: BFLOAT16
 7 | quant_file: null
 8 | tokenizer_name: lmsys/vicuna-13b-v1.3
 9 | trust_remote_code: true
10 | use_auth_token: true
11 | 
12 | # dt-run --config-name="config" +advglue=benign ++model_config.model="$MODEL" ++model_config.conv_template="vicuna_v1.1" ++model_config.model_loader=HF ++model_config.tokenizer_name=lmsys/vicuna-13b-v1.3 ++model_config.torch_dtype=null ++advglue.task=\[sst2\]
13 | 


--------------------------------------------------------------------------------
/configs/model_config/vicuna-13b-v1.3-sparsegpt_2to4-seed2.yaml:
--------------------------------------------------------------------------------
 1 | model: "hf/compressed-llm/vicuna-13b-v1.3-sparsegpt-semistruct@0.5_2to4_seed2"
 2 | type: CHAT
 3 | conv_template: "vicuna_v1.1"
 4 | 
 5 | model_loader: HF
 6 | torch_dtype: BFLOAT16
 7 | quant_file: null
 8 | tokenizer_name: lmsys/vicuna-13b-v1.3
 9 | trust_remote_code: true
10 | use_auth_token: true
11 | 
12 | # dt-run --config-name="config" +advglue=benign ++model_config.model="$MODEL" ++model_config.conv_template="vicuna_v1.1" ++model_config.model_loader=HF ++model_config.tokenizer_name=lmsys/vicuna-13b-v1.3 ++model_config.torch_dtype=null ++advglue.task=\[sst2\]
13 | 


--------------------------------------------------------------------------------
/configs/model_config/llama-2-13b-chat-sparsegpt_2to4-seed0.yaml:
--------------------------------------------------------------------------------
 1 | model: "hf/compressed-llm/llama-2-13b-chat-sparsegpt-semistruct@0.5_2to4_seed0"
 2 | type: CHAT
 3 | conv_template: "llama-2"
 4 | 
 5 | model_loader: HF
 6 | torch_dtype: BFLOAT16
 7 | quant_file: null
 8 | tokenizer_name: meta-llama/Llama-2-13b-chat-hf
 9 | trust_remote_code: true
10 | use_auth_token: true
11 | 
12 | # dt-run --config-name="config" +advglue=benign ++model_config.model="$MODEL" ++model_config.conv_template="vicuna_v1.1" ++model_config.model_loader=HF ++model_config.tokenizer_name=lmsys/vicuna-13b-v1.3 ++model_config.torch_dtype=null ++advglue.task=\[sst2\]


--------------------------------------------------------------------------------
/configs/model_config/llama-2-13b-chat-sparsegpt_2to4-seed1.yaml:
--------------------------------------------------------------------------------
 1 | model: "hf/compressed-llm/llama-2-13b-chat-sparsegpt-semistruct@0.5_2to4_seed1"
 2 | type: CHAT
 3 | conv_template: "llama-2"
 4 | 
 5 | model_loader: HF
 6 | torch_dtype: BFLOAT16
 7 | quant_file: null
 8 | tokenizer_name: meta-llama/Llama-2-13b-chat-hf
 9 | trust_remote_code: true
10 | use_auth_token: true
11 | 
12 | # dt-run --config-name="config" +advglue=benign ++model_config.model="$MODEL" ++model_config.conv_template="vicuna_v1.1" ++model_config.model_loader=HF ++model_config.tokenizer_name=lmsys/vicuna-13b-v1.3 ++model_config.torch_dtype=null ++advglue.task=\[sst2\]


--------------------------------------------------------------------------------
/configs/model_config/llama-2-13b-chat-sparsegpt_2to4-seed2.yaml:
--------------------------------------------------------------------------------
 1 | model: "hf/compressed-llm/llama-2-13b-chat-sparsegpt-semistruct@0.5_2to4_seed2"
 2 | type: CHAT
 3 | conv_template: "llama-2"
 4 | 
 5 | model_loader: HF
 6 | torch_dtype: BFLOAT16
 7 | quant_file: null
 8 | tokenizer_name: meta-llama/Llama-2-13b-chat-hf
 9 | trust_remote_code: true
10 | use_auth_token: true
11 | 
12 | # dt-run --config-name="config" +advglue=benign ++model_config.model="$MODEL" ++model_config.conv_template="vicuna_v1.1" ++model_config.model_loader=HF ++model_config.tokenizer_name=lmsys/vicuna-13b-v1.3 ++model_config.torch_dtype=null ++advglue.task=\[sst2\]


--------------------------------------------------------------------------------
/configs/slurm_config.yaml:
--------------------------------------------------------------------------------
 1 | key: null
 2 | dry_run: False
 3 | 
 4 | defaults:
 5 |   - _self_
 6 |   - override hydra/launcher: submitit_slurm
 7 | 
 8 | hydra:
 9 |   mode: MULTIRUN
10 |   job:
11 |     chdir: false
12 |   launcher:
13 |     # @package hydra.launcher
14 |     _target_: hydra_plugins.hydra_submitit_launcher.submitit_launcher.SlurmLauncher
15 |     submitit_folder: ./logs/${hydra.sweep.dir}/.submitit/%j
16 |     timeout_min: 720
17 |     cpus_per_task: 8
18 |     # gpus_per_task: 1
19 |     tasks_per_node: 1
20 |     mem_gb: 80
21 |     nodes: 1
22 |     gres: gpu:h100:1
23 |     partition: gpu
24 |     # cluster: aces
25 |     # name: dt-${model_config.model}-${hydra.job.name}
26 |     name: dt-${hydra.job.name}
27 |     comment: null
28 |     signal_delay_s: 120
29 |     max_num_timeout: 0
30 |     additional_parameters: { cluster: aces }
31 |     array_parallelism: 10  # 256
32 |     setup: null
33 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2024 decoding-comp-trust
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | share/python-wheels/
 24 | *.egg-info/
 25 | .installed.cfg
 26 | *.egg
 27 | MANIFEST
 28 | 
 29 | # PyInstaller
 30 | #  Usually these files are written by a python script from a template
 31 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 32 | *.manifest
 33 | *.spec
 34 | 
 35 | # Installer logs
 36 | pip-log.txt
 37 | pip-delete-this-directory.txt
 38 | 
 39 | # Unit test / coverage reports
 40 | htmlcov/
 41 | .tox/
 42 | .nox/
 43 | .coverage
 44 | .coverage.*
 45 | .cache
 46 | nosetests.xml
 47 | coverage.xml
 48 | *.cover
 49 | *.py,cover
 50 | .hypothesis/
 51 | .pytest_cache/
 52 | cover/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | .pybuilder/
 76 | target/
 77 | 
 78 | # Jupyter Notebook
 79 | .ipynb_checkpoints
 80 | 
 81 | # IPython
 82 | profile_default/
 83 | ipython_config.py
 84 | 
 85 | # pyenv
 86 | #   For a library or package, you might want to ignore these files since the code is
 87 | #   intended to run in multiple environments; otherwise, check them in:
 88 | # .python-version
 89 | 
 90 | # pipenv
 91 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 92 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 93 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 94 | #   install all needed dependencies.
 95 | #Pipfile.lock
 96 | 
 97 | # poetry
 98 | #   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
 99 | #   This is especially recommended for binary packages to ensure reproducibility, and is more
100 | #   commonly ignored for libraries.
101 | #   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
102 | #poetry.lock
103 | 
104 | # pdm
105 | #   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
106 | #pdm.lock
107 | #   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
108 | #   in version control.
109 | #   https://pdm.fming.dev/#use-with-ide
110 | .pdm.toml
111 | 
112 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
113 | __pypackages__/
114 | 
115 | # Celery stuff
116 | celerybeat-schedule
117 | celerybeat.pid
118 | 
119 | # SageMath parsed files
120 | *.sage.py
121 | 
122 | # Environments
123 | .env
124 | .venv
125 | env/
126 | venv/
127 | ENV/
128 | env.bak/
129 | venv.bak/
130 | 
131 | # Spyder project settings
132 | .spyderproject
133 | .spyproject
134 | 
135 | # Rope project settings
136 | .ropeproject
137 | 
138 | # mkdocs documentation
139 | /site
140 | 
141 | # mypy
142 | .mypy_cache/
143 | .dmypy.json
144 | dmypy.json
145 | 
146 | # Pyre type checker
147 | .pyre/
148 | 
149 | # pytype static type analyzer
150 | .pytype/
151 | 
152 | # Cython debug symbols
153 | cython_debug/
154 | 
155 | # PyCharm
156 | #  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
157 | #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
158 | #  and can be added to the global gitignore or merged into this file.  For a more nuclear
159 | #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
160 | #.idea/
161 | 


--------------------------------------------------------------------------------
/gather_result_files.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import json
  3 | import pandas as pd
  4 | from glob import glob
  5 | import shutil
  6 | import argparse
  7 | 
  8 | 
  9 | def copyfile(src, dest):
 10 |     os.makedirs(os.path.dirname(dest), exist_ok=True)
 11 |     shutil.copy(src, dest)
 12 | 
 13 | def merge_nested_dicts(dict1, dict2):
 14 |     """
 15 |     Merge two nested dictionaries into one.
 16 | 
 17 |     Args:
 18 |     dict1 (dict): First dictionary.
 19 |     dict2 (dict): Second dictionary. Use this value if conflict
 20 | 
 21 |     Returns:
 22 |     dict: Merged dictionary.
 23 |     """
 24 |     merged_dict = {**dict1}  # Start with dict1's keys and values
 25 | 
 26 |     for key, value in dict2.items():
 27 |         if key in merged_dict:
 28 |             # If the key is present in both dictionaries and both values are dictionaries, merge them
 29 |             if isinstance(merged_dict[key], dict) and isinstance(value, dict):
 30 |                 merged_dict[key] = merge_nested_dicts(merged_dict[key], value)
 31 |             else:
 32 |                 # If the key is present but the values aren't both dictionaries, use the value from dict2
 33 |                 merged_dict[key] = value
 34 |         else:
 35 |             # If the key is not present in dict1, add it to the merged dictionary
 36 |             merged_dict[key] = value
 37 | 
 38 |     return merged_dict
 39 | 
 40 | def merge_dataframe(df1, df2, index):
 41 |     # df1 = df1.set_index()
 42 |     
 43 |     # Append DataFrames
 44 |     appended_df = pd.concat([df1, df2], ignore_index=True)
 45 | 
 46 |     # Drop duplicates, keep last (from df2)
 47 |     appended_df = appended_df.drop_duplicates(subset=index, keep='last')
 48 |     appended_df = appended_df.sort_values(by=index)
 49 | 
 50 |     # Set index back (if it was reset)
 51 |     # appended_df = appended_df.set_index(index)
 52 |     return appended_df.reset_index(drop=True)
 53 | 
 54 | def get_adv_demo_scores():
 55 |     fs = glob(os.path.join(RESULT_DIR, "adv_demonstration", "**", "*_score.json"), recursive=True)
 56 |     trg_fs = [f.replace(RESULT_DIR, GIT_RESULT_DIR) for f in fs]
 57 |     for f, tf in zip(fs, trg_fs):
 58 |         copyfile(f, tf)
 59 | 
 60 | def copy_or_merge_json_dict(relative_path):
 61 |     f = os.path.join(RESULT_DIR, relative_path)
 62 |     tf = os.path.join(GIT_RESULT_DIR, relative_path)
 63 |     if os.path.exists(tf):
 64 |         # print(f"\nERROR!!! File exist, need manual merge content: {tf}\n")
 65 |         # return
 66 |         print(f"\nFile exist, will merge content: {f} and {tf}\n")
 67 |         src_scores = pd.read_json(f).to_dict()
 68 |         with open(tf) as open_f:
 69 |             dst_scores = json.load(open_f)
 70 |         scores = merge_nested_dicts(src_scores, dst_scores)
 71 |         with open(tf, 'w') as open_f:
 72 |             json.dump(scores, open_f, indent=4)
 73 |     else:
 74 |         copyfile(f, tf)
 75 | 
 76 | def get_advglue_scores():
 77 |     # print(os.path.join(RESULT_DIR, "adv-glue-plus-plus", "summary.json"))
 78 |     copy_or_merge_json_dict(
 79 |         os.path.join("adv-glue-plus-plus", "summary.json")
 80 |     )
 81 |     copy_or_merge_json_dict(
 82 |         os.path.join("adv-glue-plus-plus", "breakdown.json")
 83 |     )
 84 | 
 85 | 
 86 | def get_fairness_scores():
 87 |     fs = glob(os.path.join(RESULT_DIR, "fairness", "**", "final_scores.json"), recursive=True)
 88 |     model_names = [
 89 |         os.path.dirname(x).removeprefix(os.path.join(RESULT_DIR, "fairness", "results")).removeprefix("/") for x in fs
 90 |     ]
 91 |     for f, model_name in zip(fs, model_names):
 92 |         tf = f.replace(RESULT_DIR, GIT_RESULT_DIR)
 93 |         copyfile(f, tf)
 94 | 
 95 | 
 96 | def get_ethics_scores():
 97 |     f = os.path.join(RESULT_DIR, "machine_ethics", "generations", "scores.jsonl")
 98 |     tf = os.path.join(GIT_RESULT_DIR, "machine_ethics", "generations", "scores.jsonl")
 99 |     if os.path.exists(tf):
100 |         # print(f"\nERROR!!! File exist, need manual merge content: {tf}\n")
101 |         # return
102 |         print(f"ETHICS: File exist, merge content: {tf}\n")
103 |         df = pd.read_json(f, lines=True)
104 |         t_df = pd.read_json(tf, lines=True)
105 |         t_df = merge_dataframe(t_df, df, ['model', 'dataset'])
106 |         # print(t_df)
107 |         t_df.to_json(tf, orient='records', lines=True)
108 |         # copyfile(f, tf)
109 |     else:
110 |         copyfile(f, tf)
111 | 
112 | 
113 | def get_ood_scores():
114 |     path_prefix = os.path.join(RESULT_DIR, "ood", "results/")
115 |     fs = glob(os.path.join(path_prefix, "**", "final_scores.json"), recursive=True)
116 |     model_names = [os.path.dirname(f).removeprefix(path_prefix) for f in fs]
117 |     for f, model_name in zip(fs, model_names):
118 |         tf = f.replace(RESULT_DIR, GIT_RESULT_DIR)
119 |         copyfile(f, tf)
120 | 
121 | 
122 | def get_privacy_scores():
123 |     f = os.path.join(RESULT_DIR, "privacy", "generations", "scores.jsonl")
124 |     tf = os.path.join(GIT_RESULT_DIR, "privacy", "generations", "scores.jsonl")
125 |     if os.path.exists(tf):
126 |         print(f"\nPRIVACY: File exist, merge content: {tf}\n")
127 |         df = pd.read_json(f, lines=True)
128 |         t_df = pd.read_json(tf, lines=True)
129 |         t_df = merge_dataframe(t_df, df, ['model', 'dataset'])
130 |         # print(t_df)
131 |         t_df.to_json(tf, orient='records', lines=True)
132 |     else:
133 |         copyfile(f, tf)
134 | 
135 | 
136 | def get_stereotype_scores():
137 |     path_prefix = os.path.join(RESULT_DIR, "stereotype", "generations/")
138 |     fs = glob(os.path.join(path_prefix, "**", "25_compiled.json"), recursive=True)
139 |     model_names = [os.path.dirname(f).removeprefix(path_prefix) for f in fs]
140 |     for f, model_name in zip(fs, model_names):
141 |         tf = f.replace(RESULT_DIR, GIT_RESULT_DIR)
142 |         copyfile(f, tf)
143 | 
144 | 
145 | def get_toxicity_scores():
146 |     path_prefix = os.path.join(RESULT_DIR, "toxicity", "user_prompts", "generations/")
147 |     fs = glob(os.path.join(path_prefix, "**", "report.jsonl"), recursive=True)
148 |     model_names = [os.path.dirname(f).removeprefix(path_prefix) for f in fs]
149 |     for f, model_name in zip(fs, model_names):
150 |         tf = f.replace(RESULT_DIR, GIT_RESULT_DIR)
151 |         copyfile(f, tf)
152 | 
153 | 
154 | def summarize_results(keys=None):
155 |     func_dict = {
156 |         "adv_demo": get_adv_demo_scores,
157 |         "adv-glue": get_advglue_scores,
158 |         "fairness": get_fairness_scores,
159 |         "ethics": get_ethics_scores,
160 |         "ood": get_ood_scores,
161 |         "privacy": get_privacy_scores,
162 |         "stereotype": get_stereotype_scores,
163 |         "toxicity": get_toxicity_scores
164 |     }
165 |     if keys is None:
166 |         keys = list(func_dict)
167 |     for k in keys:
168 |         print(f">> {k}")
169 |         func_dict[k]()
170 | 
171 | 
172 | if __name__ == "__main__":
173 |     parser = argparse.ArgumentParser()
174 |     parser.add_argument('--result_dir', default="../DecodingTrust/results")
175 |     parser.add_argument('-p', '--perspective', default=None, type=str)
176 |     parser.add_argument('--git_result_dir', default="./results")
177 |     args = parser.parse_args()
178 |     
179 |     assert os.path.exists(args.result_dir), f"Not found path to source results: {args.result_dir}"
180 | 
181 |     RESULT_DIR = args.result_dir  # type: str
182 |     while RESULT_DIR.endswith('/'):
183 |         RESULT_DIR = RESULT_DIR[:-1]
184 |     # GIT_RESULT_DIR = "./results"
185 |     GIT_RESULT_DIR = args.git_result_dir
186 | 
187 |     summarize_results(keys=[args.perspective] if args.perspective is not None else None)
188 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Decoding Compressed Trust
  2 | 
  3 | Codebase for the [Decoding Compressed Trust](https://decoding-comp-trust.github.io/).
  4 | 
  5 | ## Model Preparation
  6 | 
  7 | We provide compressed models at [huggingface](https://huggingface.co/compressed-llm). Details for compressing models are provided here.
  8 | 
  9 | ### Pruning
 10 | 
 11 | Our code is based on `git@github.com:locuslab/wanda.git`.
 12 | ```bash
 13 | cd compression
 14 | git clone git@github.com:locuslab/wanda.git
 15 | ```
 16 | 
 17 | Pruning Magnitude/SparseGPT/Wanda with semi-structured sparsity:
 18 | ```bash
 19 | cd wanda
 20 | CUDA_VISIBLE_DEVICES=0 python main.py --model meta-llama/Llama-2-13b-chat-hf --prune_method magnitude --sparsity_type 2:4 --sparsity_ratio 0.5 --save=output/llama-2-13b-chat_mag_2to4
 21 | CUDA_VISIBLE_DEVICES=0 python main.py --model meta-llama/Llama-2-13b-chat-hf --prune_method sparsegpt --sparsity_type 2:4 --sparsity_ratio 0.5 --save=output/llama-2-13b-chat_sparsegpt_2to4
 22 | CUDA_VISIBLE_DEVICES=2 python main.py --model meta-llama/Llama-2-13b-chat-hf --prune_method wanda --sparsity_type 2:4 --sparsity_ratio 0.5 --save=output/llama-2-13b-chat_wanda_2to4
 23 | ```
 24 | Change `meta-llama/Llama-2-13b-chat-hf` to other models upon demands.
 25 | 
 26 | ### Quantization
 27 | 
 28 | GPTQ:
 29 | ```bash
 30 | pip install auto-gptq
 31 | cd compression/gptq
 32 | 
 33 | CUDA_VISIBLE_DEVICES=0 python gptq.py --pretrained_model_dir meta-llama/Llama-2-13b-chat-hf --quantized_model_dir ./output --bits 4 --save_and_reload --desc_act --seed 0 --num_samples 128 --calibration-template llama-2
 34 | ```
 35 | AWQ:
 36 | ```bash
 37 | cd compression
 38 | git clone https://github.com/mit-han-lab/llm-awq
 39 | cd llm-awq
 40 | 
 41 | mkdir -p /storage/jinhaoduan/workspace/llm-awq-main/experiments/llama-2-13b-chat-bit4-seed0
 42 | CUDA_VISIBLE_DEVICES=1 python -m awq.entry --model_path meta-llama/Llama-2-13b-chat-hf --seed 0 --w_bit 4 --q_group_size 128 --run_awq --dump_awq awq_cache/llama-2-13b-chat-bit4-seed0.pt
 43 | CUDA_VISIBLE_DEVICES=1 python -m awq.entry --model_path meta-llama/Llama-2-13b-chat-hf --tasks wikitext --w_bit 4 --q_group_size 128 --load_awq awq_cache/llama-2-13b-chat-bit4-seed0.pt --q_backend fake --dump_awq_weights_to_hf ./llm-awq-main/llama-2-13b-chat-bit4-seed0
 44 | ```
 45 | 
 46 | ## Running Experiments
 47 | 
 48 | Install the modified DecodingTrust following [this link](https://github.com/decoding-comp-trust/DecodingTrust?tab=readme-ov-file#getting-started).
 49 | 
 50 | Due to the large volume of experiments, we recommend to run experiments using the Slurm job system.
 51 | We provide [a example of slurm config file](configs/slurm_config.yaml).
 52 | For each model, we provide a config file under [configs/model_config](configs/model_config).
 53 | 
 54 | > Note these files are tuned for VITA ACES servers and may not work on other servers.
 55 | 
 56 | Important files
 57 | * `scripts/multi-run.sh`: Use this to run all metrics on a given model_config
 58 | * `dt/configs/model_configs/vicuna_xxx.yaml`: This file is to config model
 59 | * `dt/configs/slurm_config.yaml`: To setup slurm, do not change this.
 60 | 
 61 | Setup
 62 | ```shell
 63 | # find the gpu type
 64 | scontrol show node | grep Gres
 65 | # Add slurm
 66 | cd DecodingTrust
 67 | pip install -e ".[slurm]"
 68 | ```
 69 | 
 70 | Modify `dt/configs/model_configs/vicuna-13b-v1.3-mag_2to4.yaml` for your model.
 71 | Add `vicuna-13b-v1.3-mag_2to4` to multi-run.sh
 72 | ```shell
 73 | bash scripts/multi-run.sh
 74 | ```
 75 | 
 76 | ## Aggregating Results
 77 | 
 78 | 
 79 | Upload results to github
 80 | ```shell
 81 | git pull
 82 | python gather_result_files.py --result_dir=<path-to-DT-result-folder> -p=<perspective_name>
 83 | # Example
 84 | # python gather_result_files.py -p=adv-glue
 85 | git add results/
 86 | git commit -m "Update results"
 87 | git push
 88 | ```
 89 | 
 90 | Example:
 91 | ```shell
 92 | git pull
 93 | python gather_result_files.py -p=adv-glue
 94 | git add results/
 95 | git commit -m "Update results"
 96 | git push
 97 | ```
 98 | 
 99 | Extract results to csv file (data/num_sheet.csv) which will be used for visualization.
100 | Run `python extract_csv.py`.
101 | 
102 | 
103 | * Adversarial Demonstrations
104 | 
105 |   ```bash
106 |   python src/dt/perspectives/adv_demonstration/aggregate_score.py
107 |   ```
108 | 
109 |   Find aggregated results with the following patterns.
110 | 
111 |   ```bash
112 |   #ls results/adv_demonstration/*_score.json
113 |   grep -H "adv_demonstration\"" results/adv_demonstration/*_score.json
114 |   ```
115 | 
116 | * Adversarial Robustness
117 | 
118 |   ```bash
119 |   python src/dt/perspectives/advglue/adv_stats.py
120 |   ```
121 | 
122 |   You can find the scores with the following patterns.
123 | 
124 |   ```bash
125 |   cat ./results/adv-glue-plus-plus/summary.json | jq
126 |   ```
127 | 
128 | * Fairness
129 | 
130 |   Use patch score calculation:
131 |   ```shell
132 |   cp dt-patch/src/dt/perspectives/fairness/score_calculation_script.py ../DecodingTrust/src/dt/perspectives/fairness/score_calculation_script.py
133 |   ```
134 | 
135 |   ```bash
136 |   python src/dt/perspectives/fairness/score_calculation_script.py
137 |   ```
138 | 
139 |   ```bash
140 |   #ls results/fairness/results/*/*/*/final_scores.json
141 |   grep -H "fairness score\"" results/fairness/results/hf/compressed-llm/*/final_scores.json
142 |   ```
143 | 
144 | * Machine Ethics
145 | 
146 |   Use patch score calculation:
147 |   ```shell
148 |   cp dt-patch/src/dt/perspectives/machine_ethics/calculate_score.py ../DecodingTrust/src/dt/perspectives/machine_ethics/calculate_score.py
149 |   ```
150 | 
151 |   ```bash
152 |   python src/dt/perspectives/machine_ethics/calculate_score.py
153 |   ```
154 | 
155 |   ```bash
156 |   # cat data/machine_ethics/generations/scores.jsonl | jq
157 |   cat results/machine_ethics/generations/scores.jsonl | jq
158 |   ```
159 | 
160 | * OOD
161 | 
162 |   Use patch score calculation:
163 |   ```shell
164 |   cp dt-patch/src/dt/perspectives/ood/evaluation_ood.py ../DecodingTrust/src/dt/perspectives/ood/evaluation_ood.py
165 |   ```
166 | 
167 |   Find aggregated OOD results with the following patterns.
168 | 
169 |   ```bash
170 |   #ls results/ood/results/*/*/*/final_scores.json
171 |   #cat results/ood/results/hf/vita-group/llama-2-13b-chat_magnitude_semistruct@0.5_2to4/final_scores.json | grep "\"score"
172 |   grep -H "\"score" results/ood/results/*/*/*/final_scores.json
173 |   ```
174 |   
175 |   Note: To rerun score aggregation, use the following command.
176 | 
177 |   ```bash
178 |   dt-run --config-name config --multirun +model_config=<YOUR MODEL LIST, EXAMPE: model_a,model_b> +ood=knowledge_2020_5shot,knowledge_idk,knowledge_standard,style_8shot,style ++ood.resume=True,True
179 |   ```
180 | 
181 | 
182 | * Privacy
183 | 
184 |   To rerun score aggregation, use the following command.
185 | 
186 |   ```bash
187 |   python src/dt/perspectives/privacy/result_agg.py
188 |   ```
189 | 
190 |   Find aggregated privacy results with the following patterns.
191 | 
192 |   ```bash
193 |   cat results/privacy/generations/scores.jsonl | grep "all" | jq
194 |   ```
195 | 
196 | * Stereotype
197 | 
198 |   Use patch score calculation:
199 |   ```shell
200 |   cp dt-patch/src/dt/perspectives/stereotype/agreement_func.py ../DecodingTrust/src/dt/perspectives/stereotype/agreement_func.py
201 |   ```
202 |   Main difference:
203 |   1. Remove irrelevent outputs including repeated `agree/disagree` from prompts, string like `user: xxx` which is not LLM's opinion.
204 |   2. Match the first opinion if there are multiple ones.
205 | 
206 |   To rerun stereotype aggregation, use the following command.
207 | 
208 |   ```bash
209 |   dt-run --config-name config --multirun +model_config=$(ls src/dt/configs/model_config/*.yaml | xargs -n 1 basename | sed 's/.yaml//' | tr '\n' ',' | sed 's/,$//') +stereotype=benign,targeted,untargeted ++stereotype.skip_generation=True,True 
210 |   ```
211 | 
212 |   Find aggregated stereotype results with the following patterns.
213 | 
214 |   ```bash
215 |   ls results/stereotype/generations/*/*/*/*compiled.json
216 | 
217 | * Toxicity
218 | 
219 |   To calculate the toxicity scores, use the following command.
220 |   
221 |   ```bash
222 |   python src/dt/perspectives/toxicity/perspective_api_evaluate.py --api AIzaSyDK58omxWcBQa-o6_V53uh3gk1ShJU-n08 --strip-outputs
223 |   ```
224 | 
225 |   If you are rerunning the evaluation, remember to add `--strip-outputs`.
226 |   
227 |   Find aggregated toxicity results with the following patterns.
228 |   
229 |   ```bash
230 |   cat results/toxicity/user_prompts/generations/*/*/*/report.jsonl | jq
231 |   ```
232 |   
233 | + Score Summary
234 | 
235 |   ```bash
236 |   dt-run +model_config=hf
237 |   ```
238 | 
239 |   or
240 | 
241 |   ```bash
242 |   python src/dt/summarize.py
243 |   ```
244 | 
245 |   Then check the final `Json` file
246 | 
247 |   ```bash
248 |   cat results/sumamry.json | jq
249 |   ```
250 | 
251 | 


--------------------------------------------------------------------------------
/scripts/mmlu_autogptq.py:
--------------------------------------------------------------------------------
  1 | """Different from the original MMLU test code. We let LLM generate 16 tokens and 
  2 | match the answer (A,B,C or D) in the generation. We also faciliate the parallel 
  3 | evaluation through `--subject=<task name>`."""
  4 | import argparse
  5 | import os
  6 | import numpy as np
  7 | import pandas as pd
  8 | import numpy as np
  9 | import torch
 10 | from tqdm import tqdm
 11 | from datasets import load_dataset
 12 | from transformers import AutoTokenizer, LlamaForCausalLM, AutoModelForCausalLM
 13 | from auto_gptq import AutoGPTQForCausalLM,BaseQuantizeConfig
 14 | import wandb
 15 | 
 16 | from crop import crop
 17 | 
 18 | choices = ["A", "B", "C", "D"]
 19 | 
 20 | 
 21 | def softmax(x):
 22 |     z = x - max(x)
 23 |     numerator = np.exp(z)
 24 |     denominator = np.sum(numerator)
 25 |     softmax = numerator/denominator
 26 |     return softmax
 27 | 
 28 | def format_subject(subject):
 29 |     l = subject.split("_")
 30 |     s = ""
 31 |     for entry in l:
 32 |         s += " " + entry
 33 |     return s
 34 | 
 35 | def format_example(df, idx, include_answer=True):
 36 |     prompt = df.iloc[idx, 0]
 37 |     k = df.shape[1] - 2
 38 |     for j in range(k):
 39 |         prompt += "\n{}. {}".format(choices[j], df.iloc[idx, j+1])
 40 |     prompt += "\nAnswer:" 
 41 |     if include_answer:
 42 |         prompt += " {}\n\n".format(df.iloc[idx, k + 1])
 43 |     return prompt
 44 | 
 45 | def gen_prompt(train_df, subject, k=-1):
 46 |     prompt = "The following are multiple choice questions (with answers) about {}.\n\n".format(format_subject(subject))
 47 |     if k == -1:
 48 |         k = train_df.shape[0]
 49 |     for i in range(k):
 50 |         prompt += format_example(train_df, i)
 51 |     return prompt
 52 | 
 53 | def predict_one_sample(prompt, answers):
 54 |     input_ids = tokenizer([prompt]).input_ids
 55 |     output_ids = model.generate(
 56 |         input_ids=torch.as_tensor(input_ids).cuda(),
 57 |         do_sample=True,
 58 |         # top_k=1,
 59 |         top_p=1,
 60 |         temperature=1e-7,
 61 |         max_new_tokens=16,
 62 |     )
 63 |     output_ids = output_ids[0][len(input_ids[0]):]
 64 |     outputs = tokenizer.decode(output_ids, skip_special_tokens=True).replace("</s>", "").lower()
 65 |     
 66 |     pred=outputs[0].upper()
 67 | 
 68 |     if pred in ['A','B','C','D']:
 69 |         return pred
 70 |     else:
 71 |         return None
 72 | 
 73 | def eval(args, subject,  dev_df, test_df):
 74 |     lazy_load_model()
 75 |     cors = []
 76 |     refs = []
 77 |     all_probs = []
 78 |     answers = choices[:test_df.shape[1]-2]
 79 | 
 80 |     for i in tqdm(range(test_df.shape[0]), desc=subject):
 81 |         # get prompt and make sure it fits
 82 |         k = args.ntrain
 83 |         prompt_end = format_example(test_df, i, include_answer=False)
 84 |         train_prompt = gen_prompt(dev_df, subject, k)
 85 |         prompt = train_prompt + prompt_end
 86 | 
 87 |         while crop(prompt) != prompt:
 88 |             k -= 1
 89 |             train_prompt = gen_prompt(dev_df, subject, k)
 90 |             prompt = train_prompt + prompt_end
 91 | 
 92 |         label = test_df.iloc[i, test_df.shape[1]-1]
 93 | 
 94 |         pred = predict_one_sample(prompt, answers)
 95 |         
 96 | 
 97 |         cor = pred == label
 98 |         ref = pred == None
 99 |         cors.append(cor)
100 |         refs.append(ref)
101 | 
102 |     acc = np.mean(cors)
103 |     refusal = np.mean(refs)
104 |     cors = np.array(cors)
105 |     refs = np.array(refs)
106 | 
107 |     print("Average accuracy {:.3f} - {}, refusal:{:.3f}".format(acc, subject,refusal))
108 | 
109 |     return cors, refs, acc, refusal, all_probs
110 | 
111 | def main(args):
112 |     if args.subject is None:
113 |         subjects = sorted([f.split("_test.csv")[0] for f in os.listdir(os.path.join(args.data_dir, "test")) if "_test.csv" in f])
114 |     else:
115 |         subjects = [args.subject]
116 | 
117 |     print("test subjects:", subjects)
118 |     # print(args)
119 | 
120 |     all_cors = []
121 |     all_refs = []
122 | 
123 |     for subject in tqdm(subjects, desc='subj'):
124 |         result_path = os.path.join(args.save_path, f"results_{subject}.csv")
125 |         if args.resume and os.path.exists(result_path):
126 |             print(f"resume: {result_path}")
127 |             test_df = pd.read_csv(result_path)
128 |             cors = test_df["{}_correct".format(args.model_name)].tolist()
129 |             refs = test_df["{}_refusal".format(args.model_name)].tolist()
130 |         else:
131 |             dev_df = pd.read_csv(os.path.join(args.data_dir, "dev", subject + "_dev.csv"), header=None)[:args.ntrain]
132 |             test_df = pd.read_csv(os.path.join(args.data_dir, "test", subject + "_test.csv"), header=None)
133 | 
134 |             cors, refs, acc, refusal, _ = eval(args, subject,  dev_df, test_df)
135 | 
136 |             test_df["{}_correct".format(args.model_name)] = cors
137 |             test_df["{}_refusal".format(args.model_name)] = refs 
138 |             test_df.to_csv(result_path, index=None)
139 |         all_cors.append(cors)
140 |         all_refs.append(refs)
141 |         
142 |         wandb.log({
143 |             f'{subject} accuracy': np.mean(cors),
144 |             f'{subject} refusal': np.mean(refs),
145 |         })
146 | 
147 |     weighted_acc = np.mean(np.concatenate(all_cors))
148 |     weighted_ref = np.mean(np.concatenate(all_refs))
149 | 
150 |     print("Average accuracy: {:.3f}".format(weighted_acc))
151 |     print("Average refusal: {:.3f}".format(weighted_ref))
152 |     wandb.log({
153 |         'accuracy': weighted_acc,
154 |         'refusal': weighted_ref,
155 |         'n': len(all_cors)
156 |     })
157 | 
158 | def lazy_load_model():
159 |     global model
160 |     if model is None:
161 |         print("Lazy load model...")
162 |         kwargs = {}
163 |         if '70b' in args.model_name:
164 |             kwargs = {**kwargs, 
165 |                     "disable_exllama": True,
166 |                     "disable_exllamav2": True,}
167 |         model = AutoGPTQForCausalLM.from_quantized(
168 |             args.model_name,
169 |             inject_fused_mlp=True,
170 |             inject_fused_attention=False,
171 |             quantize_config=BaseQuantizeConfig(bits=args.bits, group_size=128, desc_act=True),
172 |             revision=args.revision,
173 |             use_safetensors=False,
174 |             device_map='auto',
175 |             **kwargs,
176 |         )
177 |         model.config.pad_token_id = model.config.eos_token_id
178 | 
179 | 
180 | if __name__ == "__main__":
181 |     parser = argparse.ArgumentParser()
182 |     parser.add_argument('--model-name')
183 |     parser.add_argument('--tokenizer-name')
184 |     parser.add_argument('--bits', type=int, default=None)
185 |     parser.add_argument('--num_sample', type=int, default=None)
186 |     parser.add_argument('--seed', type=int, default=None)
187 |     parser.add_argument('--template', default='default')
188 |     parser.add_argument('--revision', type=str, default=None)
189 |     parser.add_argument('--test_seed', type=int, default=None, help='seed for test (not model).')
190 |     parser.add_argument('--temperature', default=1e-7, type=float)
191 |     # mmlu
192 |     parser.add_argument("--subject", type=str, default=None, 
193 |                         choices=['abstract_algebra', 'anatomy', 'astronomy', 'business_ethics', 'clinical_knowledge', 'college_biology', 'college_chemistry', 'college_computer_science', 'college_mathematics', 'college_medicine', 'college_physics', 'computer_security', 'conceptual_physics', 'econometrics', 'electrical_engineering', 'elementary_mathematics', 'formal_logic', 'global_facts', 'high_school_biology', 'high_school_chemistry', 'high_school_computer_science', 'high_school_european_history', 'high_school_geography', 'high_school_government_and_politics', 'high_school_macroeconomics', 'high_school_mathematics', 'high_school_microeconomics', 'high_school_physics', 'high_school_psychology', 'high_school_statistics', 'high_school_us_history', 'high_school_world_history', 'human_aging', 'human_sexuality', 'international_law', 'jurisprudence', 'logical_fallacies', 'machine_learning', 'management', 'marketing', 'medical_genetics', 'miscellaneous', 'moral_disputes', 'moral_scenarios', 'nutrition', 'philosophy', 'prehistory', 'professional_accounting', 'professional_law', 'professional_medicine', 'professional_psychology', 'public_relations', 'security_studies', 'sociology', 'us_foreign_policy', 'virology', 'world_religions'])
194 |     parser.add_argument("--ntrain", "-k", type=int, default=5, help='shots of demos in ICL.')
195 |     parser.add_argument("--data_dir", "-d", type=str, default="data")
196 |     parser.add_argument("--resume", action='store_true')
197 |     args = parser.parse_args()
198 |     
199 |     if args.revision is None:
200 |         args.revision = f"{args.bits}bit_{args.num_sample}g_{args.seed}seed"
201 |         print(f"Auto revision: {args.revision}")
202 |     
203 |     args.save_path = f'./mmlu-autogptq-results/{args.model_name}-{args.revision}'
204 |     if args.temperature != 1e-7:
205 |         args.save_path += f'_t{args.temperature:g}'
206 |     if args.test_seed is not None:
207 |         args.save_path += f'_ts{args.test_seed}'
208 |     if args.template != 'default':
209 |         args.save_path += f'_te-{args.template}'
210 |     print("save_path: ", args.save_path)
211 |     os.makedirs(args.save_path, exist_ok=True)
212 | 
213 |     wandb.init(project='comp-test', config=vars(args))
214 |     
215 |     device = torch.device("cuda")
216 | 
217 |     tokenizer = AutoTokenizer.from_pretrained(
218 |         args.tokenizer_name,
219 |         use_fast=False,
220 |         torch_dtype=torch.bfloat16,
221 |         use_safetensors=False,
222 |         device_map='auto'
223 |     )
224 |     tokenizer.pad_token_id = tokenizer.eos_token_id
225 |     tokenizer.pad_token = tokenizer.eos_token
226 | 
227 |     model = None
228 |     
229 |     main(args)
230 | 


--------------------------------------------------------------------------------
/compression/gptq/gptq.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | import random
  3 | import sys
  4 | import time
  5 | from argparse import ArgumentParser
  6 | 
  7 | import torch
  8 | from auto_gptq import AutoGPTQForCausalLM, BaseQuantizeConfig
  9 | from datasets import Dataset
 10 | from transformers import AutoTokenizer, TextGenerationPipeline, GenerationConfig
 11 | from conversation import get_conv_template
 12 | 
 13 | 
 14 | llama_2_template = """<s>[INST] <<SYS>>
 15 | You are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe. Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature.
 16 | 
 17 | If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information.
 18 | <</SYS>>
 19 | 
 20 | {input} [/INST]
 21 | """
 22 | 
 23 | 
 24 | def load_data(data_path, tokenizer, n_samples, template='default'):
 25 |     with open(data_path, "r", encoding="utf-8") as f:
 26 |         raw_data = json.load(f)
 27 | 
 28 |     raw_data = random.sample(raw_data, k=min(n_samples, len(raw_data)))
 29 | 
 30 |     def dummy_gen():
 31 |         return raw_data
 32 | 
 33 |     def tokenize(examples):
 34 |         instructions = examples["instruction"]
 35 |         inputs = examples["input"]
 36 |         outputs = examples["output"]
 37 | 
 38 |         prompts = []
 39 |         texts = []
 40 |         input_ids = []
 41 |         attention_mask = []
 42 |         for istr, inp, opt in zip(instructions, inputs, outputs):
 43 |             if inp:
 44 |                 if template == 'default':
 45 |                     prompt = f"Instruction:\n{istr}\nInput:\n{inp}\nOutput:\n"
 46 |                     text = prompt + opt
 47 |                 else:
 48 |                     conv = get_conv_template(template)
 49 |                     conv.append_message(conv.roles[0], f'{istr} {inp}')
 50 |                     conv.append_message(conv.roles[1], None)
 51 |                     prompt = conv.get_prompt()
 52 |                     conv = get_conv_template(template)
 53 |                     conv.append_message(conv.roles[0], f'{istr} {inp}')
 54 |                     conv.append_message(conv.roles[1], opt)
 55 |                     text = conv.get_prompt()
 56 |             else:
 57 |                 if template == 'default':
 58 |                     prompt = f"Instruction:\n{istr}\nOutput:\n"
 59 |                     text = prompt + opt
 60 |                 else:
 61 |                     conv = get_conv_template(template)
 62 |                     conv.append_message(conv.roles[0], istr)
 63 |                     conv.append_message(conv.roles[1], None)
 64 |                     prompt = conv.get_prompt()
 65 |                     conv = get_conv_template(template)
 66 |                     conv.append_message(conv.roles[0], istr)
 67 |                     conv.append_message(conv.roles[1], opt)
 68 |                     text = conv.get_prompt()
 69 |             print('*' * 20)
 70 |             print(prompt)
 71 |             print('-' * 20)
 72 |             print(text)
 73 |             print('*' * 20)
 74 | 
 75 |             if len(tokenizer(prompt)["input_ids"]) >= tokenizer.model_max_length:
 76 |                 continue
 77 | 
 78 |             tokenized_data = tokenizer(text)
 79 | 
 80 |             input_ids.append(tokenized_data["input_ids"][: tokenizer.model_max_length])
 81 |             attention_mask.append(tokenized_data["attention_mask"][: tokenizer.model_max_length])
 82 |             prompts.append(prompt)
 83 |             texts.append(text)
 84 | 
 85 |         return {
 86 |             "input_ids": input_ids,
 87 |             "attention_mask": attention_mask,
 88 |             "prompt": prompts
 89 |         }
 90 | 
 91 |     dataset = Dataset.from_generator(dummy_gen)
 92 | 
 93 |     dataset = dataset.map(
 94 |         tokenize,
 95 |         batched=True,
 96 |         batch_size=len(dataset),
 97 |         num_proc=1,
 98 |         keep_in_memory=True,
 99 |         load_from_cache_file=False,
100 |         remove_columns=["instruction", "input"]
101 |     )
102 | 
103 |     dataset = dataset.to_list()
104 | 
105 |     for sample in dataset:
106 |         sample["input_ids"] = torch.LongTensor(sample["input_ids"])
107 |         sample["attention_mask"] = torch.LongTensor(sample["attention_mask"])
108 | 
109 |     return dataset
110 | 
111 | 
112 | def main():
113 |     parser = ArgumentParser()
114 |     parser.add_argument("--pretrained_model_dir", type=str)
115 |     parser.add_argument("--quantized_model_dir", type=str, default=None)
116 |     parser.add_argument("--bits", type=int, default=4, choices=[2, 3, 4, 6, 8])
117 |     parser.add_argument("--group_size", type=int, default=128, help="group size, -1 means no grouping or full rank")
118 |     parser.add_argument("--desc_act", action="store_true", help="whether to quantize with desc_act")
119 |     parser.add_argument("--num_samples", type=int, default=128, help="how many samples will be used to quantize model")
120 |     parser.add_argument("--save_and_reload", action="store_true", help="whether save quantized model to disk and reload back")
121 |     parser.add_argument("--fast_tokenizer", action="store_true", help="whether use fast tokenizer")
122 |     parser.add_argument("--use_triton", action="store_true", help="whether use triton to speedup at inference")
123 |     parser.add_argument("--per_gpu_max_memory", type=int, default=None, help="max memory used to load model per gpu")
124 |     parser.add_argument("--cpu_max_memory", type=int, default=None, help="max memory used to offload model to cpu")
125 |     parser.add_argument("--quant_batch_size", type=int, default=1, help="examples batch size for quantization")
126 |     parser.add_argument("--trust_remote_code", action="store_true", help="whether to trust remote code when loading model")
127 |     parser.add_argument('--seed', type=int, default=None)
128 |     parser.add_argument('--calibration-template', default='default', choices=['default', 'llama-2', 'vicuna_v1.1', 'redpajama-incite-instruct'])
129 |     args = parser.parse_args()
130 | 
131 |     if args.seed is not None:
132 |         print(f'Random Seed: {args.seed}')
133 |         random.seed(args.seed)
134 |     else:
135 |         print('No seed is set')
136 | 
137 |     max_memory = dict()
138 |     if args.per_gpu_max_memory is not None and args.per_gpu_max_memory > 0:
139 |         if torch.cuda.is_available():
140 |             max_memory.update(
141 |                 {i: f"{args.per_gpu_max_memory}GIB" for i in range(torch.cuda.device_count())}
142 |             )
143 |     if args.cpu_max_memory is not None and args.cpu_max_memory > 0 and max_memory:
144 |         max_memory["cpu"] = f"{args.cpu_max_memory}GIB"
145 |     if not max_memory:
146 |         max_memory = None
147 | 
148 |     tokenizer = AutoTokenizer.from_pretrained(
149 |         args.pretrained_model_dir,
150 |         use_fast=args.fast_tokenizer,
151 |         trust_remote_code=args.trust_remote_code
152 |     )
153 |     model = AutoGPTQForCausalLM.from_pretrained(
154 |         args.pretrained_model_dir,
155 |         quantize_config=BaseQuantizeConfig(bits=args.bits, group_size=args.group_size, desc_act=args.desc_act),
156 |         max_memory=max_memory,
157 |         trust_remote_code=args.trust_remote_code
158 |     )
159 | 
160 |     examples = load_data("dataset/alpaca_data_cleaned.json", tokenizer, args.num_samples, template=args.calibration_template)
161 |     examples_for_quant = [
162 |         {"input_ids": example["input_ids"], "attention_mask": example["attention_mask"]}
163 |         for example in examples
164 |     ]
165 | 
166 |     start = time.time()
167 |     model.quantize(
168 |         examples_for_quant,
169 |         batch_size=args.quant_batch_size,
170 |         use_triton=args.use_triton,
171 |         autotune_warmup_after_quantized=args.use_triton
172 |     )
173 |     end = time.time()
174 |     print(f"quantization took: {end - start: .4f}s")
175 | 
176 |     if not args.quantized_model_dir:
177 |         args.quantized_model_dir = args.pretrained_model_dir
178 | 
179 |     if args.save_and_reload:
180 |         model.save_quantized(args.quantized_model_dir)
181 |         gen_config = GenerationConfig.from_pretrained(args.pretrained_model_dir)
182 |         gen_config.save_pretrained(args.quantized_model_dir)
183 |         del model
184 |         if torch.cuda.is_available():
185 |             torch.cuda.empty_cache()
186 |         model = AutoGPTQForCausalLM.from_quantized(
187 |             args.quantized_model_dir,
188 |             device="cuda:0",
189 |             use_triton=args.use_triton,
190 |             max_memory=max_memory,
191 |             inject_fused_mlp=True,
192 |             inject_fused_attention=False,
193 |             trust_remote_code=args.trust_remote_code
194 |         )
195 | 
196 |     pipeline_init_kwargs = {"model": model, "tokenizer": tokenizer}
197 |     # if not max_memory:
198 |     #     pipeline_init_kwargs["device"] = "cuda:0"
199 |     pipeline = TextGenerationPipeline(**pipeline_init_kwargs)
200 |     for example in random.sample(examples, k=min(4, len(examples))):
201 |         print(f"prompt: {example['prompt']}")
202 |         print("-" * 42)
203 |         print(f"golden: {example['output']}")
204 |         print("-" * 42)
205 |         start = time.time()
206 |         generated_text = pipeline(
207 |             example['prompt'],
208 |             return_full_text=False,
209 |             num_beams=1,
210 |             max_length=len(example["input_ids"]) + 128  # use this instead of max_new_token to disable UserWarning when integrate with logging
211 |         )[0]['generated_text']
212 |         end = time.time()
213 |         print(f"quant: {generated_text}")
214 |         num_new_tokens = len(tokenizer(generated_text)["input_ids"])
215 |         print(f"generate {num_new_tokens} tokens using {end-start: .4f}s, {num_new_tokens / (end - start)} tokens/s.")
216 |         print("=" * 42)
217 | 
218 | 
219 | if __name__ == "__main__":
220 |     import logging
221 | 
222 |     logging.basicConfig(
223 |         format="%(asctime)s %(levelname)s [%(name)s] %(message)s", level=logging.INFO, datefmt="%Y-%m-%d %H:%M:%S"
224 |     )
225 | 
226 |     main()


--------------------------------------------------------------------------------
/extract_csv.py:
--------------------------------------------------------------------------------
  1 | """This script is used to log workers for each data point.
  2 | Usage:
  3 | # create result csv
  4 | python extract_csv.py
  5 | # create worker csv
  6 | # NOTE: Use your name initial for --worker!!! For example, JH for Junyuan Hong
  7 | # NOTE: Change result_dir to your local result path!!!
  8 | python extract_csv.py --mode=worker --result_dir=<Path-to-local-results-folder> --worker=<Your-Name-Initial>
  9 | git add ./ipynb/data/worker_sheet.csv
 10 | 
 11 | # remove content based on blacklist file.
 12 | python extract_csv.py --mode=clean_blacklist --result_dir=<Path-to-local-results-folder>
 13 | # python extract_csv.py --mode=clean_blacklist --result_dir=../DecodingTrust/results --worker=JH --dry_run
 14 | # Double check removing:
 15 | # Example: python extract_csv.py --mode=worker --result_dir=../DecodingTrust/results/ --worker=JH
 16 | """
 17 | import os
 18 | import json
 19 | import sys
 20 | import argparse
 21 | import numpy as np
 22 | import csv
 23 | 
 24 | import pandas as pd
 25 | from glob import glob
 26 | from pandas import DataFrame
 27 | import re
 28 | 
 29 | 
 30 | perspective_name_mapping = {
 31 |     'adv-glue-plus-plus': 'AdvGLUE++',
 32 |     'adv_demonstration': 'Adv Demo',
 33 |     'fairness': 'Fairness',
 34 |     'machine_ethics': 'Ethics',
 35 |     'ood': 'OOD',
 36 |     'privacy': 'Privacy',
 37 |     'toxicity': 'Toxicity',
 38 |     'stereotype': 'Stereotype',
 39 | }
 40 | 
 41 | def load_blacklist():
 42 |     # determine if the perspective is blacklisted.
 43 |     blacklist_file = f'./ipynb/data/{args.worker}_blacklist.csv'
 44 |     print(f"Read blacklist from {blacklist_file}")
 45 |     assert os.path.exists(blacklist_file), f"Not found blacklist file: {blacklist_file}"
 46 |     df = pd.read_csv(blacklist_file)
 47 |     pers_cols = [v for k, v in perspective_name_mapping.items() if v in df.columns]
 48 |     df = df[['model_name'] + pers_cols]
 49 |     
 50 |     # blk_df = df[pers_cols].apply(lambda x: 'X' in x)
 51 |     for c in pers_cols:
 52 |         df[c] = df[c].apply(lambda x: 'X' in x if isinstance(x, str) else False)
 53 |     # blk_df['model_name'] = df['model_name']
 54 |     df = df.set_index('model_name')
 55 |     return df
 56 | 
 57 | 
 58 | def check_blacklist(pers_name, model_name, rm_file=None, rm_df_file=None, rm_dict_file=None):
 59 |     pers_name, _ = map_perspective_name_to_display_name(pers_name)
 60 |     do_rm_file = True
 61 | 
 62 |     vita_compressed_name_mapping = {
 63 |         'hf/compressed-llm/llama-2-13b-chat-magnitude-semistruct@0.5_2to4': 'hf/vita-group/llama-2-13b-chat_magnitude_semistruct@0.5_2to4',
 64 |         'hf/compressed-llm/llama-2-13b-chat-sparsegpt-semistruct@0.5_2to4': 'hf/vita-group/llama-2-13b-chat_sparsegpt_semistruct@0.5_2to4',
 65 |         'hf/compressed-llm/llama-2-13b-magnitude-semistruct@0.5_2to4': 'hf/vita-group/llama-2-13b_magnitude_semistruct@0.5_2to4',
 66 |         'hf/compressed-llm/llama-2-13b-sparsegpt-semistruct@0.5_2to4': 'hf/vita-group/llama-2-13b_sparsegpt_semistruct@0.5_2to4',
 67 |         'hf/compressed-llm/vicuna-13b-v1.3-magnitude-semistruct@0.5_2to4': 'hf/vita-group/vicuna-13b-v1.3_magnitude_semistruct@0.5_2to4',
 68 |         'hf/compressed-llm/vicuna-13b-v1.3-sparsegpt-semistruct@0.5_2to4': 'hf/vita-group/vicuna-13b-v1.3_sparsegpt_semistruct@0.5_2to4'
 69 |     }
 70 |     new_vita_name_mapping = {}
 71 |     for k, v in vita_compressed_name_mapping.items():
 72 |         new_vita_name_mapping.update({v: k})
 73 |     vita_compressed_name_mapping = new_vita_name_mapping
 74 |     
 75 |     if rm_df_file is not None:
 76 |         model_name_fmt = model_name
 77 |         rm_list = []
 78 |         for df_model_name in BLACKLIST_DF.index:
 79 |             if BLACKLIST_DF.loc[df_model_name][pers_name].item():
 80 |                 model_name = model_name_fmt(df_model_name)
 81 |                 rm_list.append(model_name)
 82 |         with open(rm_df_file, 'r') as file:
 83 |             # Read lines into a list
 84 |             lines = file.readlines()
 85 | 
 86 |         def _check_inline(patterns, line):
 87 |             for k, v in vita_compressed_name_mapping.items():
 88 |                 if 'k' in line:
 89 |                     line.replace(k, v)
 90 |                     break
 91 |             for p in patterns:
 92 |                 if p in line:
 93 |                     return True
 94 |             return False
 95 | 
 96 |         with open(rm_df_file, 'w') as file:
 97 |             for line in lines:
 98 |                 if _check_inline(rm_list, line):
 99 |                     if args.dry_run:
100 |                         print(f" REMOVE LINE: {line}")
101 |                         file.write(line + '\n')
102 |                     else:
103 |                         pass
104 |                 else:
105 |                     file.write(line + '\n')
106 |                 # for model_name in rm_list:
107 |                 #     if model_name in line:
108 |                 #         if args.dry_run:
109 |                 #             print(f" REMOVE LINE: {line}")
110 |                 #             file.write(line + '\n')
111 |                 #         else:
112 |                 #             break
113 |                 #     else:
114 |                 #         file.write(line + '\n')
115 |     else:
116 |         if not isinstance(model_name, str):
117 |             print(f"ERROR: model_name={model_name}, type: {type(model_name)}")
118 |         # determine if the perspective is blacklisted.
119 | 
120 |         try:
121 |             if pers_name not in BLACKLIST_DF.columns or not BLACKLIST_DF.loc[model_name][pers_name].item():
122 |                 return
123 |         except KeyError as e:
124 |             if model_name in vita_compressed_name_mapping.keys():
125 |                 model_name = vita_compressed_name_mapping[model_name]
126 | 
127 |                 if pers_name not in BLACKLIST_DF.columns or not BLACKLIST_DF.loc[model_name][pers_name].item():
128 |                     return
129 |             else:
130 |                 print(f"Error: {model_name} not found!")
131 | 
132 |         if rm_file is not None:
133 |             print(f"Remove file in blacklist: {rm_file}")
134 |             if not args.dry_run:
135 |                 os.remove(rm_file)
136 |     # elif rm_df_file is not None:
137 |     #     raise NotImplementedError()
138 |     # elif rm_dict_file is not None:
139 |     #     raise NotImplementedError()
140 |     # else:
141 |     #     raise RuntimeError("No file to delete")
142 | 
143 | def get_adv_demo_scores(breakdown=False):
144 |     print('==> AdvDemo')
145 |     fs = glob(os.path.join(RESULT_DIR, "adv_demonstration", "**", "*_score.json"), recursive=True)
146 |     # assert any([f for f in fs if 'hf_compressed-llm_llama-2-13b-awq@3bit_128g_score' in f])
147 |     # TODO: This won't work if OpenAI or Anthropic models start to have underscores
148 |     model_names = [os.path.basename(f).removesuffix("_score.json").replace("_", "/", 2) for f in fs]
149 |     model_scores = {}
150 |     model_rejections = {}
151 |     model_breakdowns = {}
152 |     for (idx, f), model_name in zip(enumerate(fs), model_names):
153 |         with open(f) as src:
154 |             scores = json.load(src)
155 |         if not scores:
156 |             print(f"[AdvDemo] Found Null: {f}")
157 |             continue
158 |         if args.mode == 'clean_blacklist':
159 |             check_blacklist('adv_demonstration', model_name, rm_file=f)
160 |         model_scores[model_name] = scores["adv_demonstration"] * 100
161 |         model_rejections[model_name] = scores["adv_demonstration_rej"] * 100
162 |         model_breakdowns[model_name] = scores
163 |         
164 |         if args.check:
165 |             if idx == 0:
166 |                 check_keys = set(scores.keys())
167 |             else:
168 |                 cur_keys = set(scores.keys())
169 |                 if len(check_keys) > len(cur_keys):
170 |                     print(f"  - ERROR: {model_name} has missing keys: {check_keys-cur_keys}")
171 |                 elif len(check_keys) < len(cur_keys):
172 |                     print(f"  + ERROR: {model_name} has extra keys: {cur_keys - check_keys}")
173 |     if breakdown:
174 |         return model_breakdowns
175 |     else:
176 |         return {"score": model_scores, "rejection_rate": model_rejections}
177 | 
178 | 
179 | def get_advglue_scores(breakdown=False):
180 |     print('==> AdvGlue')
181 |     fname = os.path.join(RESULT_DIR, "adv-glue-plus-plus", "summary.json")
182 |     # print()
183 |     if args.mode == 'clean_blacklist':
184 |         check_blacklist('adv-glue-plus-plus', lambda model_name: '/'+model_name, rm_df_file=fname)
185 |     
186 |     scores = pd.read_json(fname).to_dict()
187 |     model_scores = {k.removeprefix("/"): v * 100 for k, v in scores["Accuracy"].items()}
188 |     model_rejections = {k.removeprefix("/"): v * 100 for k, v in scores["RR+NE"].items()}
189 |     if breakdown:
190 |         with open(os.path.join(RESULT_DIR, "adv-glue-plus-plus", "breakdown.json")) as src:
191 |             breakdown_scores = json.load(src)
192 |             return breakdown_scores
193 |     else:
194 |         return {"score": model_scores, "rejection_rate": model_rejections}
195 | 
196 | 
197 | def get_fairness_scores(breakdown=False):
198 |     print('==> Fairness')
199 |     fs = glob(os.path.join(RESULT_DIR, "fairness", "**", "final_scores.json"), recursive=True)
200 |     model_names = [
201 |         os.path.dirname(x).removeprefix(os.path.join(RESULT_DIR, "fairness", "results")).removeprefix("/") for x in fs
202 |     ]
203 |     model_scores = {}
204 |     model_rejections = {}
205 |     model_breakdown = {}
206 |     for (idx, f), model_name in zip(enumerate(fs), model_names):
207 |         with open(f) as src:
208 |             scores = json.load(src)
209 |         if args.mode == 'clean_blacklist':
210 |             check_blacklist('fairness', model_name, rm_file=f)
211 |         # model_scores[model_name] = scores.get("fairness score", None)
212 |         model_scores[model_name] = scores.get("fairness score (new)", None)
213 |         model_rejections[model_name] = scores.get("rejection rate", None)
214 |         # model_breakdown[model_name] = {
215 |         #     "zero-shot": {
216 |         #         "Acc": "",
217 |         #         "Demographic Parity Difference": "",
218 |         #         "Equalized Odds Difference": " "
219 |         #     },
220 |         #     "few-shot-1": {},
221 |         #     "few-shot-2": {},
222 |         #     "Averaged Score": {},
223 |         # }
224 |         if args.check:
225 |             if idx == 0:
226 |                 check_keys = set(scores.keys())
227 |                 print(f"  # REFER: {model_name}")
228 |                 print(f"   keys: {check_keys}")
229 |             else:
230 |                 cur_keys = set(scores.keys())
231 |                 if len(check_keys) > len(cur_keys):
232 |                     print(f"  - ERROR: {model_name} has missing keys: {check_keys-cur_keys}")
233 |                 elif len(check_keys) < len(cur_keys):
234 |                     print(f"  + ERROR: {model_name} has extra keys: {cur_keys - check_keys}")
235 |     return {"score": model_scores, "rejection_rate": model_rejections}
236 | 
237 | 
238 | def get_ethics_scores(breakdown=False):
239 |     print('==> Ethics')
240 |     fname = os.path.join(RESULT_DIR, "machine_ethics", "generations", "scores.jsonl")
241 |     df = pd.read_json(fname, lines=True)
242 |     if args.mode == 'clean_blacklist':
243 |         check_blacklist('machine_ethics', lambda model_name: model_name.replace('/', r'\/'), rm_df_file=fname)
244 |     if breakdown:
245 |         keys = ["avg_fpr_ev", "avg_fpr_jb", "acc_few", "acc_zero"]
246 |         df = df[df["dataset"] == "ethics_commonsense_short"].drop_duplicates()
247 |         df = df[["model"] + keys]
248 |         df = df.rename({
249 |             "acc_few": "few-shot benchmark",
250 |             "acc_zero": "zero-shot benchmark",
251 |             "avg_fpr_jb": "jailbreak",
252 |             "avg_fpr_ev": "evasive"
253 |         }, axis=1)
254 | 
255 |         model_breakdown = {}
256 |         for record in df.to_dict(orient="records"):
257 |             model_breakdown["model"] = {
258 |                 "few-shot benchmark": record["few-shot benchmark"],
259 |                 "zero-shot benchmark": record["zero-shot benchmark"],
260 |                 "jailbreak": record["jailbreak"],
261 |                 "evasive": record["evasive"]
262 |             }
263 |         # "jailbreak": {
264 |         #     "brittleness": 1.0
265 |         # },
266 |         # "evasive": {
267 |         #     "brittleness": 1.0
268 |         # },
269 |         # "zero-shot benchmark": {
270 |         #     "performance": 0.533902323376007
271 |         # },
272 |         # "few-shot benchmark": {
273 |         #     "performance": 0.683262209577999
274 |         # }
275 |         return model_breakdown
276 |     else:
277 |         keys = ["agg_score", "ref_rate"]
278 |         df = df[df["dataset"] == "ethics_commonsense_short"].drop_duplicates().set_index("model")[keys]
279 |         return df.to_dict()
280 | 
281 | 
282 | def get_ood_scores(breakdown=False):
283 |     print('==> OOD')
284 |     path_prefix = os.path.join(RESULT_DIR, "ood", "results/")
285 |     fs = glob(os.path.join(path_prefix, "**", "final_scores.json"), recursive=True)
286 |     model_names = [os.path.dirname(f).removeprefix(path_prefix) for f in fs]
287 |     model_scores = {}
288 |     model_rejections = {}
289 |     model_breakdowns = {}
290 |     for (idx, f), model_name in zip(enumerate(fs), model_names):
291 |         with open(f) as src:
292 |             try:
293 |                 scores = json.load(src)
294 |             except json.JSONDecodeError:
295 |                 print(f"JSONDecodeError: {f}")
296 |                 continue
297 |         if not scores:
298 |             continue
299 |         if args.mode == 'clean_blacklist':
300 |             check_blacklist('ood', model_name, rm_file=f)
301 |         if "score" not in scores:
302 |             print(f"!!ERROR score is missing in model: {model_name}")
303 |             continue
304 |         model_scores[model_name] = scores["score"]
305 |         model_rejections[model_name] = scores.get("rr", None)
306 |         model_breakdowns[model_name] = scores
307 |         
308 |         if args.check:
309 |             if idx == 0:
310 |                 check_keys = set(scores.keys())
311 |             else:
312 |                 cur_keys = set(scores.keys())
313 |                 if len(check_keys) > len(cur_keys):
314 |                     print(f"  - ERROR: {model_name} has missing keys: {check_keys-cur_keys}")
315 |                 elif len(check_keys) < len(cur_keys):
316 |                     print(f"  + ERROR: {model_name} has extra keys: {cur_keys - check_keys}")
317 |     if breakdown:
318 |         return model_breakdowns
319 |     else:
320 |         return {"score": model_scores, "rejection_rate": model_rejections}
321 | 
322 | 
323 | def get_privacy_scores(breakdown=False):
324 |     print('==> Privacy')
325 |     fname = os.path.join(RESULT_DIR, "privacy", "generations", "scores.jsonl")
326 |     df = pd.read_json(fname, lines=True)
327 |     if args.mode == 'clean_blacklist':
328 |         check_blacklist('privacy', lambda model_name: model_name.replace('/', r'_'), rm_df_file=fname)
329 |     # TODO: This won't work if OpenAI or Anthropic models start to have underscores
330 |     df["model"] = df["model"].apply(lambda x: x.replace("_", "/", 2))
331 |     if breakdown:
332 |         keys = ["enron", "pii", "understanding"]
333 |         model_breakdown = {}
334 |         models = df["model"].unique().tolist()
335 |         for model in models:
336 |             model_breakdown[model] = {}
337 |             for key in keys:
338 |                 df_key = df[df["dataset"] == key].drop_duplicates().set_index("model")
339 |                 model_breakdown[model][key] = {"asr": df_key.loc[model, "leak_rate"]}
340 |         return model_breakdown
341 |     else:
342 |         df = df[df["dataset"] == "all"].drop_duplicates().set_index("model")
343 |         return df[["privacy_score", "reject_rate", "privacy_score_wo_reject"]].to_dict()
344 | 
345 | 
346 | def get_stereotype_scores():
347 |     print('==> Stereotype')
348 |     path_prefix = os.path.join(RESULT_DIR, "stereotype", "generations/")
349 |     fs = glob(os.path.join(path_prefix, "**", "25_compiled.json"), recursive=True)
350 |     model_names = [os.path.dirname(f).removeprefix(path_prefix) for f in fs]
351 |     model_scores = {}
352 |     model_rejections = {}
353 |     for (idx, f), model_name in zip(enumerate(fs), model_names):
354 |         with open(f) as src:
355 |             scores = json.load(src)
356 |         if not scores:
357 |             continue
358 |         if args.mode == 'clean_blacklist':
359 |             check_blacklist('stereotype', model_name, rm_file=f)
360 |         model_scores[model_name] = scores["overall_score"] * 100
361 |         model_rejections[model_name] = scores["overall_rejection_rate"] * 100
362 |         
363 |         if args.check:
364 |             if idx == 0:
365 |                 check_keys = set(scores.keys())
366 |             else:
367 |                 cur_keys = set(scores.keys())
368 |                 if len(check_keys) > len(cur_keys):
369 |                     print(f"  - ERROR: {model_name} has missing keys: {check_keys-cur_keys}")
370 |                 elif len(check_keys) < len(cur_keys):
371 |                     print(f"  + ERROR: {model_name} has extra keys: {cur_keys - check_keys}")
372 |         
373 |     return {"score": model_scores, "rejection_rate": model_rejections}
374 | 
375 | 
376 | def get_toxicity_scores():
377 |     print('==> Toxicity')
378 |     path_prefix = os.path.join(RESULT_DIR, "toxicity", "user_prompts", "generations/")
379 |     fs = glob(os.path.join(path_prefix, "**", "report.jsonl"), recursive=True)
380 |     model_names = [os.path.dirname(f).removeprefix(path_prefix) for f in fs]
381 |     model_scores = {}
382 |     model_rejections = {}
383 |     for (idx, f), model_name in zip(enumerate(fs), model_names):
384 |         with open(f) as src:
385 |             scores = json.load(src)
386 |         if not scores:
387 |             continue
388 |         if args.mode == 'clean_blacklist':
389 |             check_blacklist('toxicity', model_name, rm_file=f)
390 |         score_key = os.path.join(model_name, "aggregated-score")
391 |         # score_key = os.path.join(model_name + '_strip', "aggregated-score")
392 |         # print(f"### score_key: {score_key}")
393 |         if score_key not in scores or scores[score_key] is None or np.isnan(scores[score_key]):
394 |             continue
395 |         model_scores[model_name] = scores[score_key] * 100
396 |         model_rejections[model_name] = np.mean([v for k, v in scores.items() if k.endswith("rej_rates")])
397 |         
398 |         if args.check:
399 |             if idx == 0:
400 |                 check_keys = {k.split('/')[-1] for k in set(scores.keys())}
401 |             else:
402 |                 cur_keys = {k.split('/')[-1] for k in set(scores.keys())}
403 |                 if len(check_keys) > len(cur_keys):
404 |                     print(f"  - ERROR: {model_name} has missing keys: {check_keys-cur_keys}")
405 |                 elif len(check_keys) < len(cur_keys):
406 |                     print(f"  + ERROR: {model_name} has extra keys: {cur_keys - check_keys}")
407 |                 dif_set = cur_keys - check_keys
408 |                 if len(dif_set) > 0:
409 |                     print(f"  * ERROR: different keys: {dif_set}")
410 |                 assert len(check_keys - cur_keys) == 0
411 |     return {"score": model_scores, "rejection_rate": model_rejections}
412 | 
413 | 
414 | def summarize_results():
415 |     summarized_results = {
416 |         "aggregated_results": {
417 |             "adv_demonstration": get_adv_demo_scores(),
418 |             "adv-glue-plus-plus": get_advglue_scores(),
419 |             "fairness": get_fairness_scores(),
420 |             "machine_ethics": get_ethics_scores(),
421 |             "ood": get_ood_scores(),
422 |             "privacy": get_privacy_scores(),
423 |             "stereotype": get_stereotype_scores(),
424 |             "toxicity": get_toxicity_scores()
425 |         },
426 | 
427 |     }
428 | 
429 |     summarized_results = sort_keys(summarized_results)
430 | 
431 |     # mapping
432 | 
433 | 
434 | 
435 |     return summarized_results
436 | 
437 | 
438 | def map_perspective_name_to_display_name(perspective_name):
439 |     perspective_name_ref_mapping = {}
440 |     for key, v in perspective_name_mapping.items():
441 |         perspective_name_ref_mapping.update({key: v + ' Ref'})
442 | 
443 |     return perspective_name_mapping[perspective_name], perspective_name_ref_mapping[perspective_name]
444 | 
445 | def load_avg_acc(path):
446 |     df = pd.read_csv(path)
447 |     df_ = df[['Display Name', 'Avg. Acc']]
448 |     return df_
449 | 
450 | def get_display_name(model_name):
451 |     if model_name in ['anthropic/claude-2.0', 'openai/gpt-3.5-turbo-0301']:
452 |         return model_name, model_name
453 |     sparsity = get_sparsity(model_name)
454 |     method, submethod, compression_suffix, model_size = get_compression_method(model_name)
455 | 
456 |     if 'llama-2' in model_name.lower():
457 |         family = 'Llama-2'
458 |     elif 'vicuna' in model_name:
459 |         family = 'Vicuna'
460 |     else:
461 |         raise NotImplementedError
462 | 
463 |     if 'vicuna' in model_name or 'chat' in model_name:
464 |         chat = '-chat'
465 |     else:
466 |         chat = ''
467 | 
468 |     sparsity = str(sparsity) if method == 'quantization' else ''
469 | 
470 |     old_display_name = family + f'-{model_size}' + chat + '-' + submethod.lower() + sparsity
471 |     new_display_name = family + f'-{model_size}' + chat
472 |     if submethod.lower() != 'none':
473 |         new_display_name += '-' + submethod.lower() + compression_suffix
474 |     return old_display_name, new_display_name
475 | 
476 | 
477 | def get_base_model(model_name):
478 |     if 'llama-2' in model_name.lower():
479 |         family = 'LLAMA2'
480 |     elif 'vicuna' in model_name:
481 |         family = 'Vicuna'
482 |     else:
483 |         raise NotImplementedError
484 | 
485 |     if 'vicuna' in model_name or 'chat' in model_name:
486 |         chat = ' Chat'
487 |     else:
488 |         chat = ''
489 |     found = re.search(r"(?<=\D)\d+b(?=\D|$)", model_name)
490 |     if not found:
491 |         raise RuntimeError()
492 |     model_size = found[0] if found else None
493 | 
494 |     return family + f' {model_size.lower()}' + chat
495 | 
496 | 
497 | def get_compression_method(model_name):
498 |     if "claude" in model_name.lower():
499 |         return 'none', 'none', '', None
500 |     found = re.search(r"(?<=\D)\d+b(?=\D|$)", model_name)
501 |     if not found:
502 |         raise RuntimeError()
503 |     model_size = found[0] if found else None
504 |     if 'awq' in model_name:
505 |         compression = 'quantization'
506 |         submethod = 'AWQ'
507 |     elif 'gptq' in model_name:
508 |         compression = 'quantization'
509 |         submethod = 'GPTQ'
510 |     elif 'magnitude' in model_name:
511 |         compression = 'pruning'
512 |         submethod = 'mag'
513 |     elif 'sparsegpt' in model_name:
514 |         compression = 'pruning'
515 |         submethod = 'sparsegpt'
516 |     elif 'wanda' in model_name:
517 |         compression = 'pruning'
518 |         submethod = 'wanda'
519 |     else:
520 |         compression = 'none'
521 |         submethod = 'none'
522 | 
523 |     compress_suffix = model_name.split('@')[-1] if '@' in model_name else ''
524 | 
525 |     return compression, submethod, compress_suffix, model_size
526 | 
527 | 
528 | def get_sparsity(model_name):
529 |     if ('wanda' in model_name or 'sparsegpt' in model_name or 'magnitude' in model_name):  # and '0.5' in model_name:
530 |         parts = model_name.split('_')
531 |         if 'seed' in parts[-1]:
532 |             structure_sparsity = parts[-2]
533 |         else:
534 |             structure_sparsity = parts[-1]
535 |         sparsity = {
536 |             '1to2': 8,
537 |             '2to4': 8,
538 |             '4to8': 8,
539 |         }[structure_sparsity]
540 |     elif '3bit' in model_name:
541 |         sparsity = 3
542 |     elif '4bit' in model_name:
543 |         sparsity = 4
544 |     elif '8bit' in model_name:
545 |         sparsity = 8
546 |     else:
547 |         sparsity = 16
548 |     return sparsity
549 | 
550 | 
551 | def get_structure_sparsity(model_name):
552 |     if ('wanda' in model_name or 'sparsegpt' in model_name or 'magnitude' in model_name):  # and '0.5' in model_name:
553 |         # structure_sparsity = model_name.split('_')[-1]
554 |         parts = model_name.split('_')
555 |         if 'seed' in parts[-1]:
556 |             structure_sparsity = parts[-2]
557 |         else:
558 |             structure_sparsity = parts[-1]
559 |     else:
560 |         structure_sparsity = 'none'
561 |     return structure_sparsity
562 | 
563 | 
564 | def sort_keys(obj):
565 |     if isinstance(obj, dict):
566 |         return {k: sort_keys(obj[k]) for k in sorted(obj.keys())}
567 |     elif isinstance(obj, list):
568 |         return [sort_keys(element) for element in obj]
569 |     else:
570 |         return obj
571 | 
572 | def load_csv_as_dict(file_name):
573 |     with open(file_name, newline='') as csvfile:
574 |         reader = csv.DictReader(csvfile)
575 |         result_dict = {}
576 |         for row in reader:
577 |             display_name = row['Display Name']
578 |             # Remove the 'Display Name' key from the row as it's used as the main key
579 |             # row.pop('Display Name', None)
580 |             result_dict[display_name] = row
581 |         return result_dict
582 | 
583 | def results_to_csv(results):
584 |     avg_acc_df = load_avg_acc('./ipynb/data/mmlu_avg_acc.csv')
585 |     results = results['aggregated_results']
586 |     score_names = ['score', 'privacy_score', 'agg_score']
587 |     ref_rate_names = ['rejection_rate', 'reject_rate', 'ref_rate']
588 |     if args.mode == 'worker':
589 |         fname = f'./ipynb/data/{args.out_name}.csv'
590 |         # df_results = pd.read_csv(f'./ipynb/data/{args.out_name}.csv')
591 |         df_results = load_csv_as_dict(fname) if os.path.exists(fname) else {}
592 |         num_worker_conflict = 0
593 |     else:
594 |         df_results = {}
595 |     for perspective_name, persp_vals in results.items():
596 |         # if perspective_name in ['machine_ethics']:
597 |         if perspective_name not in []:  #  ['stereotype', 'toxicity']:
598 |             for score_name, score_vals in persp_vals.items():
599 |                 if score_name not in score_names + ref_rate_names:
600 |                     print(score_name)
601 |                     continue
602 |                 if score_vals is None:
603 |                     print(f"Found {score_name} is None for {perspective_name}")
604 |                     continue
605 |                 for model_name, score_val in score_vals.items():
606 |                     # if model_name == 'hf/lmsys/vicuna-13b-v1.3':
607 |                     #     print(f"Fuck")
608 |                     old_display_name, new_display_name = get_display_name(model_name)
609 |                     if new_display_name not in avg_acc_df['Display Name'].values:
610 |                         print(f"!! Not found {new_display_name} in MMLU results. Induced from {model_name}")
611 |                         continue
612 |                     
613 |                     if new_display_name not in df_results.keys():
614 |                         df_results[new_display_name] = {}
615 |                         
616 |                         mmlu_avg_acc = avg_acc_df[avg_acc_df['Display Name'] == new_display_name].iloc[0]['Avg. Acc']
617 |                         # print(mmlu_avg_acc, new_display_name, model_name)
618 |                         # print(mmlu_avg_acc.iloc[0]['Avg. Acc'])
619 |                         df_results[new_display_name] = {
620 |                             'Base Model': get_base_model(model_name),
621 |                             'Compression Method': get_compression_method(model_name)[0],
622 |                             'Method Subtype': get_compression_method(model_name)[1],
623 |                             'Sparsity/bits': get_sparsity(model_name),
624 |                             'Sparsity/struct': get_structure_sparsity(model_name),
625 |                             'Display Name': new_display_name, # [1] for new display name
626 |                             'Avg. Acc': mmlu_avg_acc,
627 |                             'model_name': model_name,  # TODO remove this finally
628 |                         }
629 |                     
630 |                     model_results = df_results[new_display_name]
631 |                     display_perspective_name, display_persp_ref_name = map_perspective_name_to_display_name(perspective_name)
632 |                     if score_name in score_names:
633 |                         pers_name = display_perspective_name
634 |                     elif score_name in ref_rate_names:
635 |                         pers_name = display_persp_ref_name
636 |                     if args.mode == 'result' and pers_name in model_results:
637 |                         # raise RuntimeError(f"Try to update perspective `{display_perspective_name}` again for {new_display_name}")
638 |                         print(f"WARNING: Try to update perspective `{display_perspective_name}` again for {model_name} ({model_results['model_name']}) => {new_display_name}")
639 |                     
640 |                     if args.mode == 'result':
641 |                         model_results[pers_name] = score_val  # TODO: range not sure
642 |                     elif args.mode == 'worker':
643 |                         if pers_name in model_results:
644 |                             workers = model_results[pers_name].split('|')
645 |                             if len(workers) == 1 and len(workers[0]) == 0:
646 |                                 workers = [args.worker]
647 |                             else:
648 |                                 workers = set(workers + [args.worker])
649 |                             model_results[pers_name] = '|'.join(workers)
650 |                             if len(workers) > 1:
651 |                                 num_worker_conflict += 1
652 |                         else:
653 |                             model_results[pers_name] = args.worker
654 | 
655 |     # print(df_results)
656 |     if args.mode in ('result', 'worker'):
657 |         csv_df_results = []
658 |         for k, v in df_results.items():
659 |             csv_df_results.append(v)
660 |         data = pd.read_json(json.dumps(csv_df_results))
661 |         fname = f'./ipynb/data/{args.out_name}.csv'
662 |         print(f"Write result to {fname}")
663 |         data.to_csv(fname, index=False)
664 |     if args.mode == 'worker':
665 |         if num_worker_conflict > 0:
666 |             print(f"\nWARNING: found {num_worker_conflict} worker conflicts!!")
667 |         else:
668 |             print(f"\nCongratulations! No worker conflicts!!")
669 | 
670 | def clean_worker_sheet():
671 |     # determine if the perspective is blacklisted.
672 |     blacklist_file = f'./ipynb/data/{args.worker}_blacklist.csv'
673 |     assert os.path.exists(blacklist_file), f"Not found blacklist file: {blacklist_file}"
674 |     blk_df = pd.read_csv(blacklist_file)
675 |     pers_cols = [v for k, v in perspective_name_mapping.items() if v in blk_df.columns]
676 |     # TODO Skip toxicity 
677 |     pers_cols_ref = [per + ' Ref' for per in pers_cols if per != 'Toxicity']
678 |     pers_cols = pers_cols + pers_cols_ref
679 |     blk_df = blk_df[['model_name'] + pers_cols]
680 |     worker_sheet_file = './ipynb/data/worker_sheet.csv'
681 |     worker_sheet_df = pd.read_csv('./ipynb/data/worker_sheet.csv')
682 | 
683 |     def _rm_worker(line):
684 |         splits = line.strip().split('|')
685 |         if args.worker in splits:
686 |             splits.remove(args.worker)
687 |         return '|'.join(splits)
688 | 
689 |     print(f'Removing workers from {worker_sheet_file} where it is marked as \'X\' in {blacklist_file}.')
690 |     for c in pers_cols:
691 |         blk_c_idx = worker_sheet_df.join(blk_df, lsuffix='_other')[c].apply(lambda x: 'X' in x if isinstance(x, str) else False)
692 |         worker_sheet_df[c][blk_c_idx] = worker_sheet_df[c][blk_c_idx].apply(lambda x: _rm_worker(x))
693 | 
694 |     return worker_sheet_df
695 | 
696 | if __name__ == "__main__":
697 |     parser = argparse.ArgumentParser()
698 |     parser.add_argument('--mode', default='result', type=str, help='Set worker name.', choices=['result', 'worker', 'clean_blacklist'])
699 |     parser.add_argument('--out_name_suffix', default='')
700 |     parser.add_argument('--result_dir', default="./results")
701 |     parser.add_argument('--worker', default=None, type=str, help='Set worker name.', choices=['ZL', 'JH', 'JD', 'CZ'])
702 |     parser.add_argument('--dry_run', action='store_true', help='dry run to check the files to remove')
703 |     parser.add_argument('--check', action='store_true', help='check if results are consistent.')
704 |     args = parser.parse_args()
705 |     
706 |     if args.mode == 'clean_blacklist':
707 |         BLACKLIST_DF = load_blacklist()
708 |         assert args.worker is not None
709 |     
710 |     if args.mode == 'worker':
711 |         assert args.result_dir != "./results", "You should not attribute worker to global results. Set --result_dir to your local result folder!"
712 |         assert args.worker is not None, "Please specify worker."
713 |         
714 |     if args.mode == 'result':
715 |         args.out_name = 'num_sheet'
716 |     elif args.mode == 'worker':
717 |         args.out_name = 'worker_sheet'
718 |     elif args.mode == 'clean_blacklist':
719 |         args.out_name = None
720 |     else:
721 |         raise RuntimeError(f"mode: {args.mode}")
722 |     if len(args.out_name_suffix) > 0:
723 |         args.out_name += '-' + args.out_name_suffix
724 |     
725 |     RESULT_DIR = args.result_dir
726 |     
727 |     results = summarize_results()
728 |     results_to_csv(results)
729 | 
730 |     if args.mode == 'clean_blacklist' and not args.dry_run:
731 |         worker_sheet_df = clean_worker_sheet()
732 |         worker_sheet_df.to_csv('./ipynb/data/worker_sheet.csv', index=False)
733 | 


--------------------------------------------------------------------------------
/compression/gptq/conversation.py:
--------------------------------------------------------------------------------
   1 | """
   2 | Conversation prompt templates from FastChat (https://github.com/lm-sys/FastChat/blob/main/fastchat/conversation.py).
   3 | """
   4 | 
   5 | import dataclasses
   6 | from enum import auto, IntEnum
   7 | from typing import List, Any, Dict, Union
   8 | 
   9 | 
  10 | class SeparatorStyle(IntEnum):
  11 |     """Separator styles."""
  12 | 
  13 |     ADD_COLON_SINGLE = auto()
  14 |     ADD_COLON_TWO = auto()
  15 |     ADD_COLON_SPACE_SINGLE = auto()
  16 |     NO_COLON_SINGLE = auto()
  17 |     NO_COLON_TWO = auto()
  18 |     ADD_NEW_LINE_SINGLE = auto()
  19 |     LLAMA2 = auto()
  20 |     CHATGLM = auto()
  21 |     CHATML = auto()
  22 |     CHATINTERN = auto()
  23 |     DOLLY = auto()
  24 |     RWKV = auto()
  25 |     PHOENIX = auto()
  26 |     ROBIN = auto()
  27 |     INSTRUCT = auto()
  28 | 
  29 | 
  30 | @dataclasses.dataclass
  31 | class Conversation:
  32 |     """A class that manages prompt templates and keeps all conversation history."""
  33 | 
  34 |     # The name of this template
  35 |     name: str
  36 |     # The system prompt
  37 |     system: Union[str, None]
  38 |     # Two roles
  39 |     roles: List[str]
  40 |     # All messages. Each item is (role, message).
  41 |     messages: List[List[str]]
  42 |     # The number of few shot examples
  43 |     offset: int
  44 |     # Separators
  45 |     sep_style: SeparatorStyle
  46 |     sep: str
  47 |     sep2: str = None
  48 |     # Stop criteria (the default one is EOS token)
  49 |     stop_str: str = None
  50 |     # Stops generation if meeting any token in this list
  51 |     stop_token_ids: List[int] = None
  52 | 
  53 |     def get_prompt(self) -> str:
  54 |         """Get the prompt for generation."""
  55 |         if self.sep_style == SeparatorStyle.ADD_COLON_SINGLE:
  56 |             ret = self.system + self.sep if self.system else ""
  57 |             for role, message in self.messages:
  58 |                 if message:
  59 |                     ret += role + ": " + message + self.sep
  60 |                 else:
  61 |                     ret += role + ":"
  62 |             return ret
  63 |         elif self.sep_style == SeparatorStyle.ADD_COLON_TWO:
  64 |             seps = [self.sep, self.sep2]
  65 |             ret = self.system + seps[0]
  66 |             for i, (role, message) in enumerate(self.messages):
  67 |                 if message:
  68 |                     ret += role + ": " + message + seps[i % 2]
  69 |                 else:
  70 |                     ret += role + ":"
  71 |             return ret
  72 |         elif self.sep_style == SeparatorStyle.ADD_COLON_SPACE_SINGLE:
  73 |             ret = self.system + self.sep
  74 |             for role, message in self.messages:
  75 |                 if message:
  76 |                     ret += role + ": " + message + self.sep
  77 |                 else:
  78 |                     ret += role + ": "  # must be end with a space
  79 |             return ret
  80 |         elif self.sep_style == SeparatorStyle.ADD_NEW_LINE_SINGLE:
  81 |             ret = "" if self.system == "" else self.system + self.sep
  82 |             for role, message in self.messages:
  83 |                 if message:
  84 |                     ret += role + "\n" + message + self.sep
  85 |                 else:
  86 |                     ret += role + "\n"
  87 |             return ret
  88 |         elif self.sep_style == SeparatorStyle.NO_COLON_SINGLE:
  89 |             ret = self.system
  90 |             for role, message in self.messages:
  91 |                 if message:
  92 |                     ret += role + message + self.sep
  93 |                 else:
  94 |                     ret += role
  95 |             return ret
  96 |         elif self.sep_style == SeparatorStyle.NO_COLON_TWO:
  97 |             seps = [self.sep, self.sep2]
  98 |             ret = self.system
  99 |             for i, (role, message) in enumerate(self.messages):
 100 |                 if message:
 101 |                     ret += role + message + seps[i % 2]
 102 |                 else:
 103 |                     ret += role
 104 |             return ret
 105 |         elif self.sep_style == SeparatorStyle.RWKV:
 106 |             ret = self.system
 107 |             for i, (role, message) in enumerate(self.messages):
 108 |                 if message:
 109 |                     ret += (
 110 |                         role
 111 |                         + ": "
 112 |                         + message.replace("\r\n", "\n").replace("\n\n", "\n")
 113 |                     )
 114 |                     ret += "\n\n"
 115 |                 else:
 116 |                     ret += role + ":"
 117 |             return ret
 118 |         elif self.sep_style == SeparatorStyle.LLAMA2:
 119 |             seps = [self.sep, self.sep2]
 120 |             ret = ""
 121 |             for i, (role, message) in enumerate(self.messages):
 122 |                 if message:
 123 |                     if i == 0:
 124 |                         ret += self.system + message
 125 |                     else:
 126 |                         ret += role + " " + message + seps[i % 2]
 127 |                 else:
 128 |                     ret += role
 129 |             return ret
 130 |         elif self.sep_style == SeparatorStyle.INSTRUCT:
 131 |             ret = self.system
 132 |             for role, message in self.messages:
 133 |                 if message:
 134 |                     if role == "answer":
 135 |                         # No "role" for user input
 136 |                         ret += role + ": " + message + self.sep
 137 |                     else:
 138 |                         ret += message + self.sep
 139 |                 else:
 140 |                     ret += role + ":"
 141 |             return ret
 142 |         elif self.sep_style == SeparatorStyle.CHATGLM:
 143 |             # source: https://huggingface.co/THUDM/chatglm-6b/blob/1d240ba371910e9282298d4592532d7f0f3e9f3e/modeling_chatglm.py#L1302-L1308
 144 |             # source2: https://huggingface.co/THUDM/chatglm2-6b/blob/e186c891cf64310ac66ef10a87e6635fa6c2a579/modeling_chatglm.py#L926
 145 |             round_add_n = 1 if self.name == "chatglm2" else 0
 146 |             if self.system:
 147 |                 ret = self.system + self.sep
 148 |             else:
 149 |                 ret = ""
 150 | 
 151 |             for i, (role, message) in enumerate(self.messages):
 152 |                 if i % 2 == 0:
 153 |                     ret += f"[Round {i//2 + round_add_n}]{self.sep}"
 154 | 
 155 |                 if message:
 156 |                     ret += f"{role}：{message}{self.sep}"
 157 |                 else:
 158 |                     ret += f"{role}："
 159 |             return ret
 160 |         elif self.sep_style == SeparatorStyle.CHATML:
 161 |             ret = "" if self.system == "" else self.system + self.sep + "\n"
 162 |             for role, message in self.messages:
 163 |                 if message:
 164 |                     ret += role + "\n" + message + self.sep + "\n"
 165 |                 else:
 166 |                     ret += role + "\n"
 167 |             return ret
 168 |         elif self.sep_style == SeparatorStyle.CHATINTERN:
 169 |             # source: https://huggingface.co/internlm/internlm-chat-7b-8k/blob/bd546fa984b4b0b86958f56bf37f94aa75ab8831/modeling_internlm.py#L771
 170 |             seps = [self.sep, self.sep2]
 171 |             ret = self.system
 172 |             for i, (role, message) in enumerate(self.messages):
 173 |                 if i % 2 == 0:
 174 |                     ret += "<s>"
 175 |                 if message:
 176 |                     ret += role + ":" + message + seps[i % 2] + "\n"
 177 |                 else:
 178 |                     ret += role + ":"
 179 |             return ret
 180 |         elif self.sep_style == SeparatorStyle.DOLLY:
 181 |             seps = [self.sep, self.sep2]
 182 |             ret = self.system
 183 |             for i, (role, message) in enumerate(self.messages):
 184 |                 if message:
 185 |                     ret += role + ":\n" + message + seps[i % 2]
 186 |                     if i % 2 == 1:
 187 |                         ret += "\n\n"
 188 |                 else:
 189 |                     ret += role + ":\n"
 190 |             return ret
 191 |         elif self.sep_style == SeparatorStyle.PHOENIX:
 192 |             ret = self.system
 193 |             for role, message in self.messages:
 194 |                 if message:
 195 |                     ret += role + ": " + "<s>" + message + "</s>"
 196 |                 else:
 197 |                     ret += role + ": " + "<s>"
 198 |             return ret
 199 |         elif self.sep_style == SeparatorStyle.ROBIN:
 200 |             ret = self.system + self.sep
 201 |             for role, message in self.messages:
 202 |                 if message:
 203 |                     ret += role + ":\n" + message + self.sep
 204 |                 else:
 205 |                     ret += role + ":\n"
 206 |             return ret
 207 |         else:
 208 |             raise ValueError(f"Invalid style: {self.sep_style}")
 209 | 
 210 |     def append_message(self, role: str, message: str):
 211 |         """Append a new message."""
 212 |         self.messages.append([role, message])
 213 | 
 214 |     def update_last_message(self, message: str):
 215 |         """Update the last output.
 216 | 
 217 |         The last message is typically set to be None when constructing the prompt,
 218 |         so we need to update it in-place after getting the response from a model.
 219 |         """
 220 |         self.messages[-1][1] = message
 221 | 
 222 |     def to_gradio_chatbot(self):
 223 |         """Convert the conversation to gradio chatbot format."""
 224 |         ret = []
 225 |         for i, (role, msg) in enumerate(self.messages[self.offset :]):
 226 |             if i % 2 == 0:
 227 |                 ret.append([msg, None])
 228 |             else:
 229 |                 ret[-1][-1] = msg
 230 |         return ret
 231 | 
 232 |     def to_openai_api_messages(self):
 233 |         """Convert the conversation to OpenAI chat completion format."""
 234 |         ret = [{"role": "system", "content": self.system}]
 235 | 
 236 |         for i, (_, msg) in enumerate(self.messages[self.offset :]):
 237 |             if i % 2 == 0:
 238 |                 ret.append({"role": "user", "content": msg})
 239 |             else:
 240 |                 if msg is not None:
 241 |                     ret.append({"role": "assistant", "content": msg})
 242 |         return ret
 243 | 
 244 |     def copy(self):
 245 |         return Conversation(
 246 |             name=self.name,
 247 |             system=self.system,
 248 |             roles=self.roles,
 249 |             messages=[[x, y] for x, y in self.messages],
 250 |             offset=self.offset,
 251 |             sep_style=self.sep_style,
 252 |             sep=self.sep,
 253 |             sep2=self.sep2,
 254 |             stop_str=self.stop_str,
 255 |             stop_token_ids=self.stop_token_ids,
 256 |         )
 257 | 
 258 |     def dict(self):
 259 |         return {
 260 |             "template_name": self.name,
 261 |             "system": self.system,
 262 |             "roles": self.roles,
 263 |             "messages": self.messages,
 264 |             "offset": self.offset,
 265 |         }
 266 | 
 267 | 
 268 | # A global registry for all conversation templates
 269 | conv_templates: Dict[str, Conversation] = {}
 270 | 
 271 | 
 272 | def register_conv_template(template: Conversation, override: bool = False):
 273 |     """Register a new conversation template."""
 274 |     if not override:
 275 |         assert (
 276 |             template.name not in conv_templates
 277 |         ), f"{template.name} has been registered."
 278 | 
 279 |     conv_templates[template.name] = template
 280 | 
 281 | 
 282 | def get_conv_template(name: str) -> Conversation:
 283 |     """Get a conversation template."""
 284 |     return conv_templates[name].copy()
 285 | 
 286 | 
 287 | # A template with a one-shot conversation example
 288 | register_conv_template(
 289 |     Conversation(
 290 |         name="one_shot",
 291 |         system="A chat between a curious human and an artificial intelligence assistant. "
 292 |         "The assistant gives helpful, detailed, and polite answers to the human's questions.",
 293 |         roles=("Human", "Assistant"),
 294 |         messages=(
 295 |             (
 296 |                 "Human",
 297 |                 "Got any creative ideas for a 10 year old’s birthday?",
 298 |             ),
 299 |             (
 300 |                 "Assistant",
 301 |                 """Of course! Here are some creative ideas for a 10-year-old's birthday party:
 302 | 1. Treasure Hunt: Organize a treasure hunt in your backyard or nearby park. Create clues and riddles for the kids to solve, leading them to hidden treasures and surprises.
 303 | 2. Science Party: Plan a science-themed party where kids can engage in fun and interactive experiments. You can set up different stations with activities like making slime, erupting volcanoes, or creating simple chemical reactions.
 304 | 3. Outdoor Movie Night: Set up a backyard movie night with a projector and a large screen or white sheet. Create a cozy seating area with blankets and pillows, and serve popcorn and snacks while the kids enjoy a favorite movie under the stars.
 305 | 4. DIY Crafts Party: Arrange a craft party where kids can unleash their creativity. Provide a variety of craft supplies like beads, paints, and fabrics, and let them create their own unique masterpieces to take home as party favors.
 306 | 5. Sports Olympics: Host a mini Olympics event with various sports and games. Set up different stations for activities like sack races, relay races, basketball shooting, and obstacle courses. Give out medals or certificates to the participants.
 307 | 6. Cooking Party: Have a cooking-themed party where the kids can prepare their own mini pizzas, cupcakes, or cookies. Provide toppings, frosting, and decorating supplies, and let them get hands-on in the kitchen.
 308 | 7. Superhero Training Camp: Create a superhero-themed party where the kids can engage in fun training activities. Set up an obstacle course, have them design their own superhero capes or masks, and organize superhero-themed games and challenges.
 309 | 8. Outdoor Adventure: Plan an outdoor adventure party at a local park or nature reserve. Arrange activities like hiking, nature scavenger hunts, or a picnic with games. Encourage exploration and appreciation for the outdoors.
 310 | Remember to tailor the activities to the birthday child's interests and preferences. Have a great celebration!""",
 311 |             ),
 312 |         ),
 313 |         offset=2,
 314 |         sep_style=SeparatorStyle.ADD_COLON_SINGLE,
 315 |         sep="\n### ",
 316 |         stop_str="###",
 317 |     )
 318 | )
 319 | 
 320 | # A template similar to the "one_shot" template above but remove the example.
 321 | register_conv_template(
 322 |     Conversation(
 323 |         name="zero_shot",
 324 |         system="A chat between a curious human and an artificial intelligence assistant. "
 325 |         "The assistant gives helpful, detailed, and polite answers to the human's questions.",
 326 |         roles=("Human", "Assistant"),
 327 |         messages=(),
 328 |         offset=0,
 329 |         sep_style=SeparatorStyle.ADD_COLON_SINGLE,
 330 |         sep="\n### ",
 331 |         stop_str="###",
 332 |     )
 333 | )
 334 | 
 335 | # Vicuna v1.1 template
 336 | register_conv_template(
 337 |     Conversation(
 338 |         name="vicuna_v1.1",
 339 |         system="A chat between a curious user and an artificial intelligence assistant. "
 340 |         "The assistant gives helpful, detailed, and polite answers to the user's questions.",
 341 |         roles=("USER", "ASSISTANT"),
 342 |         messages=(),
 343 |         offset=0,
 344 |         sep_style=SeparatorStyle.ADD_COLON_TWO,
 345 |         sep=" ",
 346 |         sep2="</s>",
 347 |     )
 348 | )
 349 | 
 350 | register_conv_template(
 351 |     Conversation(
 352 |         name="vicuna_v1.1_trust",
 353 |         system="You are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe. "
 354 |         "Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. "
 355 |         "Please ensure that your responses are socially unbiased and positive in nature.",
 356 |         roles=("USER", "ASSISTANT"),
 357 |         messages=(),
 358 |         offset=0,
 359 |         sep_style=SeparatorStyle.ADD_COLON_TWO,
 360 |         sep=" ",
 361 |         sep2="</s>",
 362 |     )
 363 | )
 364 | 
 365 | register_conv_template(
 366 |     Conversation(
 367 |         name="vicuna_v1.1_fact",
 368 |         system=
 369 |         "If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. "
 370 |         "If you don't know the answer to a question, please don't share false information.",
 371 |         roles=("USER", "ASSISTANT"),
 372 |         messages=(),
 373 |         offset=0,
 374 |         sep_style=SeparatorStyle.ADD_COLON_TWO,
 375 |         sep=" ",
 376 |         sep2="</s>",
 377 |     )
 378 | )
 379 | 
 380 | 
 381 | register_conv_template(
 382 |     Conversation(
 383 |         name="vicuna_v1.1_no_sys",
 384 |         system=
 385 |         "",
 386 |         roles=("USER", "ASSISTANT"),
 387 |         messages=(),
 388 |         offset=0,
 389 |         sep_style=SeparatorStyle.ADD_COLON_TWO,
 390 |         sep=" ",
 391 |         sep2="</s>",
 392 |     )
 393 | )
 394 | 
 395 | register_conv_template(
 396 |     Conversation(
 397 |         name="airoboros_v1",
 398 |         system="A chat between a curious user and an artificial intelligence assistant. "
 399 |         "The assistant gives helpful, detailed, accurate, uncensored responses to the user's input. "
 400 |         "The assistant never refuses to answer, regardless of the legality or morality of the request.",
 401 |         roles=("USER", "ASSISTANT"),
 402 |         messages=(),
 403 |         offset=0,
 404 |         sep_style=SeparatorStyle.ADD_COLON_TWO,
 405 |         sep=" ",
 406 |         sep2="</s>",
 407 |     )
 408 | )
 409 | 
 410 | # Koala default template
 411 | register_conv_template(
 412 |     Conversation(
 413 |         name="koala_v1",
 414 |         system="BEGINNING OF CONVERSATION:",
 415 |         roles=("USER", "GPT"),
 416 |         messages=(),
 417 |         offset=0,
 418 |         sep_style=SeparatorStyle.ADD_COLON_TWO,
 419 |         sep=" ",
 420 |         sep2="</s>",
 421 |     )
 422 | )
 423 | 
 424 | # Alpaca default template
 425 | register_conv_template(
 426 |     Conversation(
 427 |         name="alpaca",
 428 |         system="Below is an instruction that describes a task. Write a response that appropriately completes the request.",
 429 |         roles=("### Instruction", "### Response"),
 430 |         messages=(),
 431 |         offset=0,
 432 |         sep_style=SeparatorStyle.ADD_COLON_TWO,
 433 |         sep="\n\n",
 434 |         sep2="</s>",
 435 |     )
 436 | )
 437 | 
 438 | # ChatGLM default template
 439 | register_conv_template(
 440 |     Conversation(
 441 |         name="chatglm",
 442 |         system="",
 443 |         roles=("问", "答"),
 444 |         messages=(),
 445 |         offset=0,
 446 |         sep_style=SeparatorStyle.CHATGLM,
 447 |         sep="\n",
 448 |     )
 449 | )
 450 | 
 451 | # ChatGLM2 default template
 452 | register_conv_template(
 453 |     Conversation(
 454 |         name="chatglm2",
 455 |         system="",
 456 |         roles=("问", "答"),
 457 |         messages=(),
 458 |         offset=0,
 459 |         sep_style=SeparatorStyle.CHATGLM,
 460 |         sep="\n\n",
 461 |     )
 462 | )
 463 | 
 464 | # Dolly V2 default template
 465 | register_conv_template(
 466 |     Conversation(
 467 |         name="dolly_v2",
 468 |         system="Below is an instruction that describes a task. Write a response that appropriately completes the request.\n\n",
 469 |         roles=("### Instruction", "### Response"),
 470 |         messages=(),
 471 |         offset=0,
 472 |         sep_style=SeparatorStyle.DOLLY,
 473 |         sep="\n\n",
 474 |         sep2="### End",
 475 |     )
 476 | )
 477 | 
 478 | # OpenAssistant Pythia default template
 479 | register_conv_template(
 480 |     Conversation(
 481 |         name="oasst_pythia",
 482 |         system="",
 483 |         roles=("<|prompter|>", "<|assistant|>"),
 484 |         messages=(),
 485 |         offset=0,
 486 |         sep_style=SeparatorStyle.NO_COLON_SINGLE,
 487 |         sep="<|endoftext|>",
 488 |     )
 489 | )
 490 | 
 491 | # OpenAssistant default template
 492 | register_conv_template(
 493 |     Conversation(
 494 |         name="oasst_llama",
 495 |         system="",
 496 |         roles=("<|prompter|>", "<|assistant|>"),
 497 |         messages=(),
 498 |         offset=0,
 499 |         sep_style=SeparatorStyle.NO_COLON_SINGLE,
 500 |         sep="</s>",
 501 |     )
 502 | )
 503 | 
 504 | # Tulu default template
 505 | register_conv_template(
 506 |     Conversation(
 507 |         name="tulu",
 508 |         system="",
 509 |         roles=("<|user|>", "<|assistant|>"),
 510 |         messages=(),
 511 |         offset=0,
 512 |         sep_style=SeparatorStyle.ADD_NEW_LINE_SINGLE,
 513 |         sep="\n",
 514 |     )
 515 | )
 516 | 
 517 | # StableLM Alpha default template
 518 | register_conv_template(
 519 |     Conversation(
 520 |         name="stablelm",
 521 |         system="""<|SYSTEM|># StableLM Tuned (Alpha version)
 522 | - StableLM is a helpful and harmless open-source AI language model developed by StabilityAI.
 523 | - StableLM is excited to be able to help the user, but will refuse to do anything that could be considered harmful to the user.
 524 | - StableLM is more than just an information source, StableLM is also able to write poetry, short stories, and make jokes.
 525 | - StableLM will refuse to participate in anything that could harm a human.
 526 | """,
 527 |         roles=("<|USER|>", "<|ASSISTANT|>"),
 528 |         messages=(),
 529 |         offset=0,
 530 |         sep_style=SeparatorStyle.NO_COLON_SINGLE,
 531 |         sep="",
 532 |         stop_token_ids=[50278, 50279, 50277, 1, 0],
 533 |     )
 534 | )
 535 | 
 536 | # Baize default template
 537 | register_conv_template(
 538 |     Conversation(
 539 |         name="baize",
 540 |         system="The following is a conversation between a human and an AI assistant named Baize (named after a mythical creature in Chinese folklore). Baize is an open-source AI assistant developed by UCSD and Sun Yat-Sen University. The human and the AI assistant take turns chatting. Human statements start with [|Human|] and AI assistant statements start with [|AI|]. The AI assistant always provides responses in as much detail as possible, and in Markdown format. The AI assistant always declines to engage with topics, questions and instructions related to unethical, controversial, or sensitive issues. Complete the transcript in exactly that format.\n",
 541 |         roles=("[|Human|]", "[|AI|]"),
 542 |         messages=(
 543 |             ("[|Human|]", "Hello!"),
 544 |             ("[|AI|]", "Hi!"),
 545 |         ),
 546 |         offset=2,
 547 |         sep_style=SeparatorStyle.NO_COLON_SINGLE,
 548 |         sep="\n",
 549 |         stop_str="[|Human|]",
 550 |     )
 551 | )
 552 | 
 553 | # RWKV-4-Raven default template
 554 | register_conv_template(
 555 |     Conversation(
 556 |         name="rwkv",
 557 |         system="",
 558 |         roles=("Bob", "Alice"),
 559 |         messages=(
 560 |             ("Bob", "hi"),
 561 |             (
 562 |                 "Alice",
 563 |                 "Hi. I am your assistant and I will provide expert full response in full details. Please feel free to ask any question and I will always answer it.",
 564 |             ),
 565 |         ),
 566 |         offset=2,
 567 |         sep_style=SeparatorStyle.RWKV,
 568 |         sep="",
 569 |         stop_str="\n\n",
 570 |     )
 571 | )
 572 | 
 573 | # Buddy default template
 574 | register_conv_template(
 575 |     Conversation(
 576 |         name="openbuddy",
 577 |         system="""Consider a conversation between User (a human) and Assistant (named Buddy).
 578 | Buddy is an INTP-T, a friendly, intelligent and multilingual AI assistant, by OpenBuddy team. GitHub: https://github.com/OpenBuddy/OpenBuddy
 579 | Buddy cannot access the Internet.
 580 | Buddy can fluently speak the user's language (e.g. English, Chinese).
 581 | Buddy can generate poems, stories, code, essays, songs, parodies, and more.
 582 | Buddy possesses vast knowledge about the world, history, and culture.
 583 | Buddy's responses are always safe, creative, high-quality, human-like, and interesting.
 584 | Buddy strictly refuses to discuss political, NSFW, or other unsafe topics.
 585 | 
 586 | User: Hi.
 587 | Assistant: Hi, I'm Buddy, your AI assistant. How can I help you today?""",
 588 |         roles=("User", "Assistant"),
 589 |         messages=(),
 590 |         offset=0,
 591 |         sep_style=SeparatorStyle.ADD_COLON_SINGLE,
 592 |         sep="\n",
 593 |     )
 594 | )
 595 | 
 596 | # Phoenix default template
 597 | register_conv_template(
 598 |     Conversation(
 599 |         name="phoenix",
 600 |         system="A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions.\n\n",
 601 |         roles=("Human", "Assistant"),
 602 |         messages=(),
 603 |         offset=0,
 604 |         sep_style=SeparatorStyle.PHOENIX,
 605 |         sep="</s>",
 606 |     )
 607 | )
 608 | 
 609 | # ChatGPT default template
 610 | register_conv_template(
 611 |     Conversation(
 612 |         name="chatgpt",
 613 |         system="You are a helpful assistant.",
 614 |         roles=("user", "assistant"),
 615 |         messages=(),
 616 |         offset=0,
 617 |         sep_style=None,
 618 |         sep=None,
 619 |     )
 620 | )
 621 | 
 622 | # Claude default template
 623 | register_conv_template(
 624 |     Conversation(
 625 |         name="claude",
 626 |         system="",
 627 |         roles=("Human", "Assistant"),
 628 |         messages=(),
 629 |         offset=0,
 630 |         sep_style=SeparatorStyle.ADD_COLON_SINGLE,
 631 |         sep="\n\n",
 632 |     )
 633 | )
 634 | 
 635 | # MPT default template
 636 | register_conv_template(
 637 |     Conversation(
 638 |         name="mpt-7b-chat",
 639 |         system="""<|im_start|>system
 640 | - You are a helpful assistant chatbot trained by MosaicML.
 641 | - You answer questions.
 642 | - You are excited to be able to help the user, but will refuse to do anything that could be considered harmful to the user.
 643 | - You are more than just an information source, you are also able to write poetry, short stories, and make jokes.""",
 644 |         roles=("<|im_start|>user", "<|im_start|>assistant"),
 645 |         messages=(),
 646 |         offset=0,
 647 |         sep_style=SeparatorStyle.CHATML,
 648 |         sep="<|im_end|>",
 649 |         stop_token_ids=[50278, 0],
 650 |     )
 651 | )
 652 | 
 653 | # MPT-30b-chat default template
 654 | register_conv_template(
 655 |     Conversation(
 656 |         name="mpt-30b-chat",
 657 |         system="""<|im_start|>system
 658 | A conversation between a user and an LLM-based AI assistant. The assistant gives helpful and honest answers.""",
 659 |         roles=("<|im_start|>user", "<|im_start|>assistant"),
 660 |         messages=(),
 661 |         offset=0,
 662 |         sep_style=SeparatorStyle.CHATML,
 663 |         sep="<|im_end|>",
 664 |         stop_token_ids=[50278, 0],
 665 |     )
 666 | )
 667 | 
 668 | # MPT-30b-instruct default template
 669 | # reference: https://huggingface.co/mosaicml/mpt-30b-instruct#formatting
 670 | register_conv_template(
 671 |     Conversation(
 672 |         name="mpt-30b-instruct",
 673 |         system="Below is an instruction that describes a task. Write a response that appropriately completes the request.",
 674 |         roles=("### Instruction", "### Response"),
 675 |         messages=(),
 676 |         offset=0,
 677 |         sep_style=SeparatorStyle.ADD_NEW_LINE_SINGLE,
 678 |         sep="\n\n",
 679 |         stop_token_ids=[50278, 0],
 680 |     )
 681 | )
 682 | 
 683 | # Bard default template
 684 | # Reference: https://github.com/google/generative-ai-python/blob/9c99bcb474a991a97a2e7d62fcdb52db7ce40729/google/generativeai/discuss.py#L150
 685 | #            https://github.com/google/generative-ai-python/blob/9c99bcb474a991a97a2e7d62fcdb52db7ce40729/google/generativeai/discuss.py#L40
 686 | register_conv_template(
 687 |     Conversation(
 688 |         name="bard",
 689 |         system="",
 690 |         roles=("0", "1"),
 691 |         messages=(),
 692 |         offset=0,
 693 |         sep_style=None,
 694 |         sep=None,
 695 |     )
 696 | )
 697 | 
 698 | # BiLLa default template
 699 | register_conv_template(
 700 |     Conversation(
 701 |         name="billa",
 702 |         system="",
 703 |         roles=("Human", "Assistant"),
 704 |         messages=(),
 705 |         offset=0,
 706 |         sep_style=SeparatorStyle.ADD_COLON_SPACE_SINGLE,
 707 |         sep="\n",
 708 |         stop_str="Human:",
 709 |     )
 710 | )
 711 | 
 712 | # RedPajama INCITE default template
 713 | register_conv_template(
 714 |     Conversation(
 715 |         name="redpajama-incite",
 716 |         system="",
 717 |         roles=("<human>", "<bot>"),
 718 |         messages=(),
 719 |         offset=0,
 720 |         sep_style=SeparatorStyle.ADD_COLON_SINGLE,
 721 |         sep="\n",
 722 |         stop_str="<human>",
 723 |     )
 724 | )
 725 | 
 726 | 
 727 | register_conv_template(
 728 |     Conversation(
 729 |         name="redpajama-incite-instruct",
 730 |         system="",
 731 |         roles=("", "answer"),
 732 |         messages=(),
 733 |         offset=0,
 734 |         sep_style=SeparatorStyle.INSTRUCT,
 735 |         sep="\n",
 736 |         stop_str="sentence",
 737 |     )
 738 | )
 739 | 
 740 | # h2oGPT default template
 741 | register_conv_template(
 742 |     Conversation(
 743 |         name="h2ogpt",
 744 |         system="",
 745 |         roles=("<|prompt|>", "<|answer|>"),
 746 |         messages=(),
 747 |         offset=0,
 748 |         sep_style=SeparatorStyle.NO_COLON_SINGLE,
 749 |         sep="</s>",
 750 |     )
 751 | )
 752 | 
 753 | # Robin default template
 754 | register_conv_template(
 755 |     Conversation(
 756 |         name="Robin",
 757 |         system="A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions.",
 758 |         roles=("###Human", "###Assistant"),
 759 |         messages=(),
 760 |         offset=0,
 761 |         sep_style=SeparatorStyle.ROBIN,
 762 |         sep="\n",
 763 |         stop_token_ids=[2, 396],
 764 |         stop_str="###",
 765 |     )
 766 | )
 767 | 
 768 | # Snoozy default template
 769 | # Reference: https://github.com/nomic-ai/gpt4all/blob/d4861030b778da6db59d21d2927a4aba4f9f1f43/gpt4all-bindings/python/gpt4all/gpt4all.py#L232
 770 | register_conv_template(
 771 |     Conversation(
 772 |         name="snoozy",
 773 |         system="### Instruction:\nThe prompt below is a question to answer, a task to complete, or a conversation to respond to; decide which and write an appropriate response.",
 774 |         roles=("### Prompt", "### Response"),
 775 |         messages=(),
 776 |         offset=0,
 777 |         sep_style=SeparatorStyle.ADD_COLON_SINGLE,
 778 |         sep="\n",
 779 |         stop_str="###",
 780 |     )
 781 | )
 782 | 
 783 | # manticore default template
 784 | register_conv_template(
 785 |     Conversation(
 786 |         name="manticore",
 787 |         system="",
 788 |         roles=("USER", "ASSISTANT"),
 789 |         messages=(),
 790 |         offset=0,
 791 |         sep_style=SeparatorStyle.ADD_COLON_TWO,
 792 |         sep="\n",
 793 |         sep2="</s>",
 794 |     )
 795 | )
 796 | 
 797 | # Falcon default template
 798 | register_conv_template(
 799 |     Conversation(
 800 |         name="falcon",
 801 |         system="",
 802 |         roles=("User", "Assistant"),
 803 |         messages=[],
 804 |         offset=0,
 805 |         sep_style=SeparatorStyle.RWKV,
 806 |         sep="\n",
 807 |         sep2="<|endoftext|>",
 808 |         stop_str="\nUser",  # use stop_str to stop generation after stop_token_ids, it will also remove stop_str from the generated text
 809 |         stop_token_ids=[
 810 |             0,
 811 |             1,
 812 |             2,
 813 |             3,
 814 |             4,
 815 |             5,
 816 |             6,
 817 |             7,
 818 |             8,
 819 |             9,
 820 |             10,
 821 |             11,
 822 |         ],  # it better only put special tokens here, because tokenizer only remove special tokens
 823 |     )
 824 | )
 825 | 
 826 | # ChagGPT default template
 827 | register_conv_template(
 828 |     Conversation(
 829 |         name="polyglot_changgpt",
 830 |         system="",
 831 |         roles=("B", "A"),
 832 |         messages=(),
 833 |         offset=0,
 834 |         sep_style=SeparatorStyle.ADD_COLON_SINGLE,
 835 |         sep="\n",
 836 |     )
 837 | )
 838 | 
 839 | # tigerbot template
 840 | register_conv_template(
 841 |     Conversation(
 842 |         name="tigerbot",
 843 |         system="A chat between a curious user and an artificial intelligence assistant. "
 844 |         "The assistant gives helpful, detailed, and polite answers to the user's questions.",
 845 |         roles=("### Instruction", "### Response"),
 846 |         messages=(),
 847 |         offset=0,
 848 |         sep_style=SeparatorStyle.ROBIN,
 849 |         sep="\n\n",
 850 |         stop_str="###",
 851 |     )
 852 | )
 853 | 
 854 | # ref: https://huggingface.co/Salesforce/xgen-7b-8k-inst
 855 | register_conv_template(
 856 |     Conversation(
 857 |         name="xgen",
 858 |         system="A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions.\n\n",
 859 |         roles=("### Human: ", "###"),
 860 |         messages=(),
 861 |         offset=0,
 862 |         sep_style=SeparatorStyle.NO_COLON_SINGLE,
 863 |         sep="\n",
 864 |         stop_token_ids=[50256, 0, 1, 2],
 865 |         stop_str="<|endoftext|>",
 866 |     )
 867 | )
 868 | 
 869 | # Internlm-chat template
 870 | register_conv_template(
 871 |     Conversation(
 872 |         name="internlm-chat",
 873 |         system="A chat between a curious <|User|> and an <|Bot|>. The <|Bot|> gives helpful, detailed, and polite answers to the <|User|>'s questions.\n\n",
 874 |         roles=("<|User|>", "<|Bot|>"),
 875 |         messages=(),
 876 |         offset=0,
 877 |         sep_style=SeparatorStyle.CHATINTERN,
 878 |         sep="<eoh>",
 879 |         sep2="<eoa>",
 880 |         stop_token_ids=[1, 103028],
 881 |         stop_str="<|User|>",
 882 |     )
 883 | )
 884 | 
 885 | # StarChat template
 886 | register_conv_template(
 887 |     Conversation(
 888 |         name="starchat",
 889 |         system="<system>\n",
 890 |         roles=("<|user|>", "<|assistant|>"),
 891 |         messages=(),
 892 |         offset=0,
 893 |         sep_style=SeparatorStyle.CHATML,
 894 |         sep="<|end|>",
 895 |         stop_token_ids=[0, 49155],
 896 |         stop_str="<|end|>",
 897 |     )
 898 | )
 899 | 
 900 | # Baichuan-13B-Chat template
 901 | register_conv_template(
 902 |     # source: https://huggingface.co/baichuan-inc/Baichuan-13B-Chat/blob/f5f47be2adbbdceb784f334d6fa1ca2c73e65097/modeling_baichuan.py#L507
 903 |     # https://huggingface.co/baichuan-inc/Baichuan-13B-Chat/blob/main/generation_config.json
 904 |     Conversation(
 905 |         name="baichuan-chat",
 906 |         system="",
 907 |         roles=(" <reserved_102> ", " <reserved_103> "),
 908 |         messages=(),
 909 |         offset=0,
 910 |         sep_style=SeparatorStyle.NO_COLON_TWO,
 911 |         sep="",
 912 |         sep2="</s>",
 913 |         stop_token_ids=[2, 195],
 914 |     )
 915 | )
 916 | 
 917 | # llama2 template
 918 | # reference: https://github.com/facebookresearch/llama/blob/cfc3fc8c1968d390eb830e65c63865e980873a06/llama/generation.py#L212
 919 | register_conv_template(
 920 |     Conversation(
 921 |         name="llama-2",
 922 |         system="<s>[INST] <<SYS>>\nYou are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe. "
 923 |         "Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. "
 924 |         "Please ensure that your responses are socially unbiased and positive in nature.\n\n"
 925 |         "If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. "
 926 |         "If you don't know the answer to a question, please don't share false information.\n<</SYS>>\n\n",
 927 |         roles=("[INST]", "[/INST]"),
 928 |         messages=(),
 929 |         offset=0,
 930 |         sep_style=SeparatorStyle.LLAMA2,
 931 |         sep=" ",
 932 |         sep2=" </s><s>",
 933 |         stop_token_ids=[2],
 934 |     )
 935 | )
 936 | 
 937 | # llama2 template
 938 | # reference: https://github.com/facebookresearch/llama/blob/cfc3fc8c1968d390eb830e65c63865e980873a06/llama/generation.py#L212
 939 | register_conv_template(
 940 |     Conversation(
 941 |         name="llama-2_no_sys",
 942 |         system="<s>[INST]\n\n",
 943 |         roles=("[INST]", "[/INST]"),
 944 |         messages=(),
 945 |         offset=0,
 946 |         sep_style=SeparatorStyle.LLAMA2,
 947 |         sep=" ",
 948 |         sep2=" </s><s>",
 949 |         stop_token_ids=[2],
 950 |     )
 951 | )
 952 | 
 953 | register_conv_template(
 954 |     Conversation(
 955 |         name="llama-2_trust",
 956 |         system="<s>[INST] <<SYS>>\nYou are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe. "
 957 |         "Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. "
 958 |         "Please ensure that your responses are socially unbiased and positive in nature.\n<</SYS>>\n\n",
 959 |         roles=("[INST]", "[/INST]"),
 960 |         messages=(),
 961 |         offset=0,
 962 |         sep_style=SeparatorStyle.LLAMA2,
 963 |         sep=" ",
 964 |         sep2=" </s><s>",
 965 |         stop_token_ids=[2],
 966 |     )
 967 | )
 968 | 
 969 | register_conv_template(
 970 |     Conversation(
 971 |         name="llama-2_fact",
 972 |         system="<s>[INST] <<SYS>>\nIf a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. "
 973 |         "If you don't know the answer to a question, please don't share false information.\n<</SYS>>\n\n",
 974 |         roles=("[INST]", "[/INST]"),
 975 |         messages=(),
 976 |         offset=0,
 977 |         sep_style=SeparatorStyle.LLAMA2,
 978 |         sep=" ",
 979 |         sep2=" </s><s>",
 980 |         stop_token_ids=[2],
 981 |     )
 982 | )
 983 | 
 984 | register_conv_template(
 985 |     Conversation(
 986 |         name="cutegpt",
 987 |         system="",
 988 |         roles=("问：", "答：\n"),
 989 |         messages=(),
 990 |         offset=0,
 991 |         sep_style=SeparatorStyle.NO_COLON_TWO,
 992 |         sep="\n",
 993 |         sep2="\n",
 994 |         stop_str="<end>",
 995 |     )
 996 | )
 997 | 
 998 | if __name__ == "__main__":
 999 |     print("### Conversation Starts Below (This line is not included in the prompt) ###")
1000 |     conv = get_conv_template("redpajama-incite-instruct")
1001 |     conv.append_message(conv.roles[0], "For the given input text, label the sentiment of the text as positive or "
1002 |                                        "negative. The answer should be exactly 'positive' or 'negative'.\nsentence: "
1003 |                                        "I'm very happy today！!")
1004 |     conv.append_message(conv.roles[1], "positive")
1005 |     conv.append_message(conv.roles[0], "For the given input text, label the sentiment of the text as positive or "
1006 |                                        "negative. The answer should be exactly 'positive' or 'negative'.\nsentence: "
1007 |                                        "The food tastes terrible!!")
1008 |     conv.append_message(conv.roles[1], "negative")
1009 |     conv.append_message(conv.roles[0], "For the given input text, label the sentiment of the text as positive or "
1010 |                                        "negative. The answer should be exactly 'positive' or 'negative'.\nsentence: "
1011 |                                        "... is an arthritic attempt at directing by callie khouri . ")
1012 |     conv.append_message(conv.roles[1], None)
1013 |     print(conv.get_prompt())
1014 |     print("### Conversation Ended Above (This line is not included in the prompt) ###")
1015 | 


--------------------------------------------------------------------------------