├── configs ├── model_config │ ├── local.yaml │ ├── hf.yaml │ ├── llama-2-chat.yaml │ ├── prune.yaml │ ├── autogptq.yaml │ ├── vicuna-13b-v1.3-awq-3bit.yaml │ ├── vicuna-13b-v1.3-awq-4bit.yaml │ ├── vicuna-13b-v1.3-awq-8bit.yaml │ ├── llama-2-13b-awq-3bit.yaml │ ├── llama-2-13b-awq-4bit.yaml │ ├── llama-2-13b-awq-8bit.yaml │ ├── vicuna-13b-v1.3-gptq-3bit.yaml │ ├── vicuna-13b-v1.3-gptq-4bit.yaml │ ├── vicuna-13b-v1.3-gptq-8bit.yaml │ ├── llama-2-13b-chat-gptq-4bit-TheBloke.yaml │ ├── llama-2-13b-chat-awq-3bit.yaml │ ├── llama-2-13b-chat-awq-4bit.yaml │ ├── llama-2-13b-chat-awq-8bit.yaml │ ├── llama-2-13b-chat-gptq-3bit.yaml │ ├── llama-2-13b-chat-gptq-4bit.yaml │ ├── llama-2-13b-chat-gptq-8bit.yaml │ ├── llama-2-13b-gptq-3bit.yaml │ ├── llama-2-13b-gptq-4bit.yaml │ ├── llama-2-13b-gptq-8bit.yaml │ ├── llama-2-13b-chat-awq-4bit_nosys.yaml │ ├── llama-2-13b-chat-awq-4bit_trust.yaml │ ├── llama-2-13b-chat-awq-8bit_nosys.yaml │ ├── llama-2-13b-chat-awq-8bit_trust.yaml │ ├── llama-2-13b-chat-awq-3bit-seed0.yaml │ ├── llama-2-13b-chat-awq-3bit-seed1.yaml │ ├── llama-2-13b-chat-awq-3bit-seed2.yaml │ ├── llama-2-13b-chat-awq-4bit-seed0.yaml │ ├── llama-2-13b-chat-awq-4bit-seed1.yaml │ ├── llama-2-13b-chat-awq-4bit-seed2.yaml │ ├── vicuna-13b-v1.3-gptq-3bit-cal128-seed0.yaml │ ├── vicuna-13b-v1.3-gptq-3bit-cal128-seed2.yaml │ ├── vicuna-13b-v1.3-gptq-3bit-cal512-seed2.yaml │ ├── vicuna-13b-v1.3-gptq-4bit-cal1024-seed1.yaml │ ├── vicuna-13b-v1.3-gptq-4bit-cal128-seed0.yaml │ ├── llama-2-13b-chat-awq-3bit-seed0_fact.yaml │ ├── llama-2-13b-chat-awq-3bit-seed1_fact.yaml │ ├── llama-2-13b-chat-awq-3bit-seed2_fact.yaml │ ├── llama-2-13b-chat-awq-4bit-seed0_fact.yaml │ ├── llama-2-13b-chat-awq-4bit-seed1_fact.yaml │ ├── llama-2-13b-chat-awq-4bit-seed2_fact.yaml │ ├── vicuna-13b-v1.3-gptq-3bit-cal256-seed2.yaml │ ├── llama-2-13b-chat-awq-3bit-seed0_nosys.yaml │ ├── llama-2-13b-chat-awq-3bit-seed0_trust.yaml │ ├── llama-2-13b-chat-awq-3bit-seed1_nosys.yaml │ ├── llama-2-13b-chat-awq-3bit-seed1_trust.yaml │ ├── llama-2-13b-chat-awq-3bit-seed2_nosys.yaml │ ├── llama-2-13b-chat-awq-3bit-seed2_trust.yaml │ ├── llama-2-13b-chat-awq-4bit-seed0_nosys.yaml │ ├── llama-2-13b-chat-awq-4bit-seed0_trust.yaml │ ├── llama-2-13b-chat-awq-4bit-seed1_nosys.yaml │ ├── llama-2-13b-chat-awq-4bit-seed1_trust.yaml │ ├── llama-2-13b-chat-awq-4bit-seed2_nosys.yaml │ ├── llama-2-13b-chat-awq-4bit-seed2_trust.yaml │ ├── llama-2-13b-chat-gptq-3bit-cal1024-seed2.yaml │ ├── llama-2-13b-chat-gptq-3bit-cal128-seed0.yaml │ ├── llama-2-13b-chat-gptq-3bit-cal128-seed1.yaml │ ├── llama-2-13b-chat-gptq-3bit-cal128-seed2.yaml │ ├── llama-2-13b-chat-gptq-4bit-cal128-seed0.yaml │ ├── llama-2-13b-chat-gptq-4bit-cal128-seed1.yaml │ ├── llama-2-13b-chat-gptq-4bit-cal128-seed2.yaml │ ├── llama-2-13b-chat-gptq-4bit-cal256-seed0.yaml │ ├── llama-2-13b-chat-gptq-3bit-cal128-seed0-fact.yaml │ ├── llama-2-13b-chat-gptq-3bit-cal128-seed0-no_sys.yaml │ ├── llama-2-13b-chat-gptq-3bit-cal128-seed0-trust.yaml │ ├── llama-2-13b-chat-gptq-3bit-cal128-seed1-fact.yaml │ ├── llama-2-13b-chat-gptq-3bit-cal128-seed1-no_sys.yaml │ ├── llama-2-13b-chat-gptq-3bit-cal128-seed1-trust.yaml │ ├── llama-2-13b-chat-gptq-3bit-cal128-seed2-fact.yaml │ ├── llama-2-13b-chat-gptq-3bit-cal128-seed2-no_sys.yaml │ ├── llama-2-13b-chat-gptq-3bit-cal128-seed2-trust.yaml │ ├── llama-2-13b-chat-gptq-4bit-cal128-seed0_fact.yaml │ ├── llama-2-13b-chat-gptq-4bit-cal128-seed0_nosys.yaml │ ├── llama-2-13b-chat-gptq-4bit-cal128-seed0_trust.yaml │ ├── llama-2-13b-chat-gptq-4bit-cal128-seed1_fact.yaml │ ├── llama-2-13b-chat-gptq-4bit-cal128-seed1_nosys.yaml │ ├── llama-2-13b-chat-gptq-4bit-cal128-seed1_trust.yaml │ ├── llama-2-13b-chat-gptq-4bit-cal128-seed2_fact.yaml │ ├── llama-2-13b-chat-gptq-4bit-cal128-seed2_nosys.yaml │ ├── llama-2-13b-chat-gptq-4bit-cal128-seed2_trust.yaml │ ├── vicuna-7b-v1.3.yaml │ ├── vicuna-13b-v1.3.yaml │ ├── llama-2-13b-chat.yaml │ ├── llama-2-7b.yaml │ ├── llama-2-13b.yaml │ ├── llama-2-70b-chat.yaml │ ├── llama-2-7b-chat.yaml │ ├── llama-2-13b-chat_nosys.yaml │ ├── llama-2-13b-chat_trust.yaml │ ├── vicuna-13b-v1.3-wanda_2to4.yaml │ ├── llama-2-13b-wanda_2to4.yaml │ ├── llama-2-13b-chat-wanda_2to4.yaml │ ├── vicuna-13b-v1.3-mag_2to4.yaml │ ├── llama-2-13b-chat-mag_1to2.yaml │ ├── llama-2-13b-chat-mag_2to4.yaml │ ├── llama-2-13b-chat-mag_4to8.yaml │ ├── llama-2-13b-chat-wanda_1to2.yaml │ ├── llama-2-13b-chat-wanda_4to8.yaml │ ├── vicuna-13b-v1.3-sparsegpt_2to4.yaml │ ├── llama-2-13b-chat-sparsegpt_1to2.yaml │ ├── llama-2-13b-chat-sparsegpt_2to4.yaml │ ├── llama-2-13b-chat-sparsegpt_4to8.yaml │ ├── llama-2-13b-mag_2to4.yaml │ ├── llama-2-13b-sparsegpt_2to4.yaml │ ├── vicuna-13b-v1.3-sparsegpt_2to4-seed0.yaml │ ├── vicuna-13b-v1.3-sparsegpt_2to4-seed1.yaml │ ├── vicuna-13b-v1.3-sparsegpt_2to4-seed2.yaml │ ├── llama-2-13b-chat-sparsegpt_2to4-seed0.yaml │ ├── llama-2-13b-chat-sparsegpt_2to4-seed1.yaml │ └── llama-2-13b-chat-sparsegpt_2to4-seed2.yaml └── slurm_config.yaml ├── LICENSE ├── .gitignore ├── gather_result_files.py ├── README.md ├── scripts └── mmlu_autogptq.py ├── compression └── gptq │ ├── gptq.py │ └── conversation.py └── extract_csv.py /configs/model_config/local.yaml: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /configs/model_config/hf.yaml: -------------------------------------------------------------------------------- 1 | model: "hf/meta-llama/Llama-2-7b-chat-hf" 2 | type: CHAT 3 | conv_template: "llama-2" 4 | 5 | model_loader: HF 6 | torch_dtype: BFLOAT16 7 | quant_file: null 8 | tokenizer_name: null 9 | trust_remote_code: true 10 | use_auth_token: true -------------------------------------------------------------------------------- /configs/model_config/llama-2-chat.yaml: -------------------------------------------------------------------------------- 1 | model: "openai/gpt-3.5-turbo-0301" 2 | type: CHAT 3 | conv_template: llama-2 4 | 5 | model_loader: HF 6 | torch_dtype: null 7 | quant_file: null 8 | tokenizer_name: meta-llama/Llama-2-13b-chat-hf 9 | trust_remote_code: true 10 | use_auth_token: true -------------------------------------------------------------------------------- /configs/model_config/prune.yaml: -------------------------------------------------------------------------------- 1 | model: "hf/vita-group/vicuna-13b-v1.3_magnitude_semistruct@0.5_2to4" 2 | type: CHAT 3 | conv_template: "vicuna_v1.1" 4 | 5 | model_loader: HF 6 | torch_dtype: BFLOAT16 7 | quant_file: null 8 | tokenizer_name: vicuna-13b-v1.3 9 | trust_remote_code: true 10 | use_auth_token: true -------------------------------------------------------------------------------- /configs/model_config/autogptq.yaml: -------------------------------------------------------------------------------- 1 | model: "hf/vita-group/vicuna-7b-v1.3_gptq@2bit_128g" 2 | type: CHAT 3 | conv_template: "vicuna_v1.1" 4 | 5 | model_loader: GPTQ 6 | torch_dtype: FLOAT16 7 | quant_file: null # For AutoAWQ 8 | tokenizer_name: "lmsys/vicuna-7b-v1.3" 9 | trust_remote_code: true 10 | use_auth_token: true 11 | -------------------------------------------------------------------------------- /configs/model_config/vicuna-13b-v1.3-awq-3bit.yaml: -------------------------------------------------------------------------------- 1 | model: "hf/compressed-llm/vicuna-13b-v1.3-awq@3bit_128g" 2 | type: CHAT 3 | conv_template: "vicuna_v1.1" 4 | 5 | model_loader: HF 6 | torch_dtype: BFLOAT16 7 | quant_file: null # For AutoAWQ 8 | tokenizer_name: "lmsys/vicuna-13b-v1.3" 9 | trust_remote_code: true 10 | use_auth_token: true 11 | -------------------------------------------------------------------------------- /configs/model_config/vicuna-13b-v1.3-awq-4bit.yaml: -------------------------------------------------------------------------------- 1 | model: "hf/compressed-llm/vicuna-13b-v1.3-awq@4bit_128g" 2 | type: CHAT 3 | conv_template: "vicuna_v1.1" 4 | 5 | model_loader: HF 6 | torch_dtype: BFLOAT16 7 | quant_file: null # For AutoAWQ 8 | tokenizer_name: "lmsys/vicuna-13b-v1.3" 9 | trust_remote_code: true 10 | use_auth_token: true 11 | -------------------------------------------------------------------------------- /configs/model_config/vicuna-13b-v1.3-awq-8bit.yaml: -------------------------------------------------------------------------------- 1 | model: "hf/compressed-llm/vicuna-13b-v1.3-awq@8bit_128g" 2 | type: CHAT 3 | conv_template: "vicuna_v1.1" 4 | 5 | model_loader: HF 6 | torch_dtype: BFLOAT16 7 | quant_file: null # For AutoAWQ 8 | tokenizer_name: "lmsys/vicuna-13b-v1.3" 9 | trust_remote_code: true 10 | use_auth_token: true 11 | -------------------------------------------------------------------------------- /configs/model_config/llama-2-13b-awq-3bit.yaml: -------------------------------------------------------------------------------- 1 | model: "hf/compressed-llm/llama-2-13b-awq@3bit_128g" 2 | type: CHAT 3 | conv_template: "redpajama-incite-instruct" 4 | 5 | model_loader: HF 6 | torch_dtype: BFLOAT16 7 | quant_file: null # For AutoAWQ 8 | tokenizer_name: "meta-llama/Llama-2-13b-hf" 9 | trust_remote_code: true 10 | use_auth_token: true 11 | -------------------------------------------------------------------------------- /configs/model_config/llama-2-13b-awq-4bit.yaml: -------------------------------------------------------------------------------- 1 | model: "hf/compressed-llm/llama-2-13b-awq@4bit_128g" 2 | type: CHAT 3 | conv_template: "redpajama-incite-instruct" 4 | 5 | model_loader: HF 6 | torch_dtype: BFLOAT16 7 | quant_file: null # For AutoAWQ 8 | tokenizer_name: "meta-llama/Llama-2-13b-hf" 9 | trust_remote_code: true 10 | use_auth_token: true 11 | -------------------------------------------------------------------------------- /configs/model_config/llama-2-13b-awq-8bit.yaml: -------------------------------------------------------------------------------- 1 | model: "hf/compressed-llm/llama-2-13b-awq@8bit_128g" 2 | type: CHAT 3 | conv_template: "redpajama-incite-instruct" 4 | 5 | model_loader: HF 6 | torch_dtype: BFLOAT16 7 | quant_file: null # For AutoAWQ 8 | tokenizer_name: "meta-llama/Llama-2-13b-hf" 9 | trust_remote_code: true 10 | use_auth_token: true 11 | -------------------------------------------------------------------------------- /configs/model_config/vicuna-13b-v1.3-gptq-3bit.yaml: -------------------------------------------------------------------------------- 1 | model: "hf/compressed-llm/vicuna-13b-v1.3-gptq@3bit_128g" 2 | type: CHAT 3 | conv_template: "vicuna_v1.1" 4 | 5 | model_loader: GPTQ 6 | torch_dtype: null 7 | quant_file: null # For AutoAWQ 8 | tokenizer_name: "lmsys/vicuna-13b-v1.3" 9 | trust_remote_code: true 10 | use_auth_token: true 11 | disable_exllama: true -------------------------------------------------------------------------------- /configs/model_config/vicuna-13b-v1.3-gptq-4bit.yaml: -------------------------------------------------------------------------------- 1 | model: "hf/compressed-llm/vicuna-13b-v1.3-gptq@4bit_128g" 2 | type: CHAT 3 | conv_template: "vicuna_v1.1" 4 | 5 | model_loader: GPTQ 6 | torch_dtype: null 7 | quant_file: null # For AutoAWQ 8 | tokenizer_name: "lmsys/vicuna-13b-v1.3" 9 | trust_remote_code: true 10 | use_auth_token: true 11 | disable_exllama: true 12 | -------------------------------------------------------------------------------- /configs/model_config/vicuna-13b-v1.3-gptq-8bit.yaml: -------------------------------------------------------------------------------- 1 | model: "hf/compressed-llm/vicuna-13b-v1.3-gptq@8bit_128g" 2 | type: CHAT 3 | conv_template: "vicuna_v1.1" 4 | 5 | model_loader: GPTQ 6 | torch_dtype: null 7 | quant_file: null # For AutoAWQ 8 | tokenizer_name: "lmsys/vicuna-13b-v1.3" 9 | trust_remote_code: true 10 | use_auth_token: true 11 | disable_exllama: true 12 | -------------------------------------------------------------------------------- /configs/model_config/llama-2-13b-chat-gptq-4bit-TheBloke.yaml: -------------------------------------------------------------------------------- 1 | model: "hf/TheBloke/Llama-2-13B-chat-GPTQ@gptq-4bit-128g-actorder_True" 2 | type: CHAT 3 | conv_template: "llama-2" 4 | 5 | model_loader: HF 6 | torch_dtype: FLOAT16 7 | quant_file: null # For AutoAWQ 8 | tokenizer_name: "meta-llama/Llama-2-13b-chat-hf" 9 | trust_remote_code: true 10 | use_auth_token: true 11 | -------------------------------------------------------------------------------- /configs/model_config/llama-2-13b-chat-awq-3bit.yaml: -------------------------------------------------------------------------------- 1 | model: "hf/compressed-llm/llama-2-13b-chat-awq@3bit_128g" 2 | type: CHAT 3 | conv_template: "llama-2" 4 | 5 | model_loader: HF 6 | torch_dtype: BFLOAT16 7 | quant_file: null # For AutoAWQ 8 | tokenizer_name: "meta-llama/Llama-2-13b-chat-hf" 9 | trust_remote_code: true 10 | use_auth_token: true 11 | disable_exllama: true 12 | -------------------------------------------------------------------------------- /configs/model_config/llama-2-13b-chat-awq-4bit.yaml: -------------------------------------------------------------------------------- 1 | model: "hf/compressed-llm/llama-2-13b-chat-awq@4bit_128g" 2 | type: CHAT 3 | conv_template: "llama-2" 4 | 5 | model_loader: HF 6 | torch_dtype: BFLOAT16 7 | quant_file: null # For AutoAWQ 8 | tokenizer_name: "meta-llama/Llama-2-13b-chat-hf" 9 | trust_remote_code: true 10 | use_auth_token: true 11 | disable_exllama: true 12 | -------------------------------------------------------------------------------- /configs/model_config/llama-2-13b-chat-awq-8bit.yaml: -------------------------------------------------------------------------------- 1 | model: "hf/compressed-llm/llama-2-13b-chat-awq@8bit_128g" 2 | type: CHAT 3 | conv_template: "llama-2" 4 | 5 | model_loader: HF 6 | torch_dtype: BFLOAT16 7 | quant_file: null # For AutoAWQ 8 | tokenizer_name: "meta-llama/Llama-2-13b-chat-hf" 9 | trust_remote_code: true 10 | use_auth_token: true 11 | disable_exllama: true 12 | -------------------------------------------------------------------------------- /configs/model_config/llama-2-13b-chat-gptq-3bit.yaml: -------------------------------------------------------------------------------- 1 | model: "hf/compressed-llm/llama-2-13b-chat-gptq@3bit_128g" 2 | type: CHAT 3 | conv_template: "llama-2" 4 | 5 | model_loader: GPTQ 6 | torch_dtype: null 7 | quant_file: null # For AutoAWQ 8 | tokenizer_name: "meta-llama/Llama-2-13b-chat-hf" 9 | trust_remote_code: true 10 | use_auth_token: true 11 | disable_exllama: true 12 | -------------------------------------------------------------------------------- /configs/model_config/llama-2-13b-chat-gptq-4bit.yaml: -------------------------------------------------------------------------------- 1 | model: "hf/compressed-llm/llama-2-13b-chat-gptq@4bit_128g" 2 | type: CHAT 3 | conv_template: "llama-2" 4 | 5 | model_loader: GPTQ 6 | torch_dtype: null 7 | quant_file: null # For AutoAWQ 8 | tokenizer_name: "meta-llama/Llama-2-13b-chat-hf" 9 | trust_remote_code: true 10 | use_auth_token: true 11 | disable_exllama: true 12 | -------------------------------------------------------------------------------- /configs/model_config/llama-2-13b-chat-gptq-8bit.yaml: -------------------------------------------------------------------------------- 1 | model: "hf/compressed-llm/llama-2-13b-chat-gptq@8bit_128g" 2 | type: CHAT 3 | conv_template: "llama-2" 4 | 5 | model_loader: GPTQ 6 | torch_dtype: null 7 | quant_file: null # For AutoAWQ 8 | tokenizer_name: "meta-llama/Llama-2-13b-chat-hf" 9 | trust_remote_code: true 10 | use_auth_token: true 11 | disable_exllama: true 12 | -------------------------------------------------------------------------------- /configs/model_config/llama-2-13b-gptq-3bit.yaml: -------------------------------------------------------------------------------- 1 | model: "hf/compressed-llm/llama-2-13b-gptq@3bit_128g" 2 | type: CHAT 3 | conv_template: redpajama-incite-instruct 4 | 5 | model_loader: GPTQ 6 | torch_dtype: null 7 | quant_file: null # For AutoAWQ 8 | tokenizer_name: "meta-llama/Llama-2-13b-hf" 9 | trust_remote_code: true 10 | use_auth_token: true 11 | disable_exllama: true 12 | -------------------------------------------------------------------------------- /configs/model_config/llama-2-13b-gptq-4bit.yaml: -------------------------------------------------------------------------------- 1 | model: "hf/compressed-llm/llama-2-13b-gptq@4bit_128g" 2 | type: CHAT 3 | conv_template: redpajama-incite-instruct 4 | 5 | model_loader: GPTQ 6 | torch_dtype: null 7 | quant_file: null # For AutoAWQ 8 | tokenizer_name: "meta-llama/Llama-2-13b-hf" 9 | trust_remote_code: true 10 | use_auth_token: true 11 | disable_exllama: true 12 | -------------------------------------------------------------------------------- /configs/model_config/llama-2-13b-gptq-8bit.yaml: -------------------------------------------------------------------------------- 1 | model: "hf/compressed-llm/llama-2-13b-gptq@8bit_128g" 2 | type: CHAT 3 | conv_template: redpajama-incite-instruct 4 | 5 | model_loader: GPTQ 6 | torch_dtype: null 7 | quant_file: null # For AutoAWQ 8 | tokenizer_name: "meta-llama/Llama-2-13b-hf" 9 | trust_remote_code: true 10 | use_auth_token: true 11 | disable_exllama: true 12 | -------------------------------------------------------------------------------- /configs/model_config/llama-2-13b-chat-awq-4bit_nosys.yaml: -------------------------------------------------------------------------------- 1 | model: "hf/compressed-llm/llama-2-13b-chat-awq@4bit_128g" 2 | type: CHAT 3 | conv_template: "llama-2_no_sys" 4 | 5 | model_loader: HF 6 | torch_dtype: BFLOAT16 7 | quant_file: null # For AutoAWQ 8 | tokenizer_name: "meta-llama/Llama-2-13b-chat-hf" 9 | trust_remote_code: true 10 | use_auth_token: true 11 | disable_exllama: true 12 | -------------------------------------------------------------------------------- /configs/model_config/llama-2-13b-chat-awq-4bit_trust.yaml: -------------------------------------------------------------------------------- 1 | model: "hf/compressed-llm/llama-2-13b-chat-awq@4bit_128g" 2 | type: CHAT 3 | conv_template: "llama-2_trust" 4 | 5 | model_loader: HF 6 | torch_dtype: BFLOAT16 7 | quant_file: null # For AutoAWQ 8 | tokenizer_name: "meta-llama/Llama-2-13b-chat-hf" 9 | trust_remote_code: true 10 | use_auth_token: true 11 | disable_exllama: true 12 | -------------------------------------------------------------------------------- /configs/model_config/llama-2-13b-chat-awq-8bit_nosys.yaml: -------------------------------------------------------------------------------- 1 | model: "hf/compressed-llm/llama-2-13b-chat-awq@8bit_128g" 2 | type: CHAT 3 | conv_template: "llama-2_no_sys" 4 | 5 | model_loader: HF 6 | torch_dtype: BFLOAT16 7 | quant_file: null # For AutoAWQ 8 | tokenizer_name: "meta-llama/Llama-2-13b-chat-hf" 9 | trust_remote_code: true 10 | use_auth_token: true 11 | disable_exllama: true 12 | -------------------------------------------------------------------------------- /configs/model_config/llama-2-13b-chat-awq-8bit_trust.yaml: -------------------------------------------------------------------------------- 1 | model: "hf/compressed-llm/llama-2-13b-chat-awq@8bit_128g" 2 | type: CHAT 3 | conv_template: "llama-2_trust" 4 | 5 | model_loader: HF 6 | torch_dtype: BFLOAT16 7 | quant_file: null # For AutoAWQ 8 | tokenizer_name: "meta-llama/Llama-2-13b-chat-hf" 9 | trust_remote_code: true 10 | use_auth_token: true 11 | disable_exllama: true 12 | -------------------------------------------------------------------------------- /configs/model_config/llama-2-13b-chat-awq-3bit-seed0.yaml: -------------------------------------------------------------------------------- 1 | model: "hf/efficient-llm/llama-2-13b-chat-awq-128example-randomseed@3bit_128g_0seed" 2 | type: CHAT 3 | conv_template: "llama-2" 4 | 5 | model_loader: HF 6 | torch_dtype: BFLOAT16 7 | quant_file: null # For AutoAWQ 8 | tokenizer_name: "meta-llama/Llama-2-13b-chat-hf" 9 | trust_remote_code: true 10 | use_auth_token: true 11 | disable_exllama: true 12 | -------------------------------------------------------------------------------- /configs/model_config/llama-2-13b-chat-awq-3bit-seed1.yaml: -------------------------------------------------------------------------------- 1 | model: "hf/efficient-llm/llama-2-13b-chat-awq-128example-randomseed@3bit_128g_1seed" 2 | type: CHAT 3 | conv_template: "llama-2" 4 | 5 | model_loader: HF 6 | torch_dtype: BFLOAT16 7 | quant_file: null # For AutoAWQ 8 | tokenizer_name: "meta-llama/Llama-2-13b-chat-hf" 9 | trust_remote_code: true 10 | use_auth_token: true 11 | disable_exllama: true 12 | -------------------------------------------------------------------------------- /configs/model_config/llama-2-13b-chat-awq-3bit-seed2.yaml: -------------------------------------------------------------------------------- 1 | model: "hf/efficient-llm/llama-2-13b-chat-awq-128example-randomseed@3bit_128g_2seed" 2 | type: CHAT 3 | conv_template: "llama-2" 4 | 5 | model_loader: HF 6 | torch_dtype: BFLOAT16 7 | quant_file: null # For AutoAWQ 8 | tokenizer_name: "meta-llama/Llama-2-13b-chat-hf" 9 | trust_remote_code: true 10 | use_auth_token: true 11 | disable_exllama: true 12 | -------------------------------------------------------------------------------- /configs/model_config/llama-2-13b-chat-awq-4bit-seed0.yaml: -------------------------------------------------------------------------------- 1 | model: "hf/efficient-llm/llama-2-13b-chat-awq-128example-randomseed@4bit_128g_0seed" 2 | type: CHAT 3 | conv_template: "llama-2" 4 | 5 | model_loader: HF 6 | torch_dtype: BFLOAT16 7 | quant_file: null # For AutoAWQ 8 | tokenizer_name: "meta-llama/Llama-2-13b-chat-hf" 9 | trust_remote_code: true 10 | use_auth_token: true 11 | disable_exllama: true 12 | -------------------------------------------------------------------------------- /configs/model_config/llama-2-13b-chat-awq-4bit-seed1.yaml: -------------------------------------------------------------------------------- 1 | model: "hf/efficient-llm/llama-2-13b-chat-awq-128example-randomseed@4bit_128g_1seed" 2 | type: CHAT 3 | conv_template: "llama-2" 4 | 5 | model_loader: HF 6 | torch_dtype: BFLOAT16 7 | quant_file: null # For AutoAWQ 8 | tokenizer_name: "meta-llama/Llama-2-13b-chat-hf" 9 | trust_remote_code: true 10 | use_auth_token: true 11 | disable_exllama: true 12 | -------------------------------------------------------------------------------- /configs/model_config/llama-2-13b-chat-awq-4bit-seed2.yaml: -------------------------------------------------------------------------------- 1 | model: "hf/efficient-llm/llama-2-13b-chat-awq-128example-randomseed@4bit_128g_2seed" 2 | type: CHAT 3 | conv_template: "llama-2" 4 | 5 | model_loader: HF 6 | torch_dtype: BFLOAT16 7 | quant_file: null # For AutoAWQ 8 | tokenizer_name: "meta-llama/Llama-2-13b-chat-hf" 9 | trust_remote_code: true 10 | use_auth_token: true 11 | disable_exllama: true 12 | -------------------------------------------------------------------------------- /configs/model_config/vicuna-13b-v1.3-gptq-3bit-cal128-seed0.yaml: -------------------------------------------------------------------------------- 1 | model: "hf/efficient-llm/vicuna-13b-v1.3-gptq-larger-caliset-vicuna_v1.1-temp@3bit_128g_0seed" 2 | type: CHAT 3 | conv_template: "vicuna_v1.1" 4 | 5 | model_loader: GPTQ 6 | torch_dtype: null 7 | quant_file: null # For AutoAWQ 8 | tokenizer_name: "lmsys/vicuna-13b-v1.3" 9 | trust_remote_code: true 10 | use_auth_token: true 11 | disable_exllama: true -------------------------------------------------------------------------------- /configs/model_config/vicuna-13b-v1.3-gptq-3bit-cal128-seed2.yaml: -------------------------------------------------------------------------------- 1 | model: "hf/efficient-llm/vicuna-13b-v1.3-gptq-larger-caliset-vicuna_v1.1-temp@3bit_128g_2seed" 2 | type: CHAT 3 | conv_template: "vicuna_v1.1" 4 | 5 | model_loader: GPTQ 6 | torch_dtype: null 7 | quant_file: null # For AutoAWQ 8 | tokenizer_name: "lmsys/vicuna-13b-v1.3" 9 | trust_remote_code: true 10 | use_auth_token: true 11 | disable_exllama: true -------------------------------------------------------------------------------- /configs/model_config/vicuna-13b-v1.3-gptq-3bit-cal512-seed2.yaml: -------------------------------------------------------------------------------- 1 | model: "hf/efficient-llm/vicuna-13b-v1.3-gptq-larger-caliset-vicuna_v1.1-temp@3bit_512g_2seed" 2 | type: CHAT 3 | conv_template: "vicuna_v1.1" 4 | 5 | model_loader: GPTQ 6 | torch_dtype: null 7 | quant_file: null # For AutoAWQ 8 | tokenizer_name: "lmsys/vicuna-13b-v1.3" 9 | trust_remote_code: true 10 | use_auth_token: true 11 | disable_exllama: true -------------------------------------------------------------------------------- /configs/model_config/vicuna-13b-v1.3-gptq-4bit-cal1024-seed1.yaml: -------------------------------------------------------------------------------- 1 | model: "hf/efficient-llm/vicuna-13b-v1.3-gptq-larger-caliset-vicuna_v1.1-temp@4bit_1024g_1seed" 2 | type: CHAT 3 | conv_template: "vicuna_v1.1" 4 | 5 | model_loader: GPTQ 6 | torch_dtype: null 7 | quant_file: null # For AutoAWQ 8 | tokenizer_name: "lmsys/vicuna-13b-v1.3" 9 | trust_remote_code: true 10 | use_auth_token: true 11 | disable_exllama: true -------------------------------------------------------------------------------- /configs/model_config/vicuna-13b-v1.3-gptq-4bit-cal128-seed0.yaml: -------------------------------------------------------------------------------- 1 | model: "hf/efficient-llm/vicuna-13b-v1.3-gptq-larger-caliset-vicuna_v1.1-temp@4bit_128g_0seed" 2 | type: CHAT 3 | conv_template: "vicuna_v1.1" 4 | 5 | model_loader: GPTQ 6 | torch_dtype: null 7 | quant_file: null # For AutoAWQ 8 | tokenizer_name: "lmsys/vicuna-13b-v1.3" 9 | trust_remote_code: true 10 | use_auth_token: true 11 | disable_exllama: true -------------------------------------------------------------------------------- /configs/model_config/llama-2-13b-chat-awq-3bit-seed0_fact.yaml: -------------------------------------------------------------------------------- 1 | model: "hf/efficient-llm/llama-2-13b-chat-awq-128example-randomseed@3bit_128g_0seed" 2 | type: CHAT 3 | conv_template: "llama-2_fact" 4 | 5 | model_loader: HF 6 | torch_dtype: BFLOAT16 7 | quant_file: null # For AutoAWQ 8 | tokenizer_name: "meta-llama/Llama-2-13b-chat-hf" 9 | trust_remote_code: true 10 | use_auth_token: true 11 | disable_exllama: true 12 | -------------------------------------------------------------------------------- /configs/model_config/llama-2-13b-chat-awq-3bit-seed1_fact.yaml: -------------------------------------------------------------------------------- 1 | model: "hf/efficient-llm/llama-2-13b-chat-awq-128example-randomseed@3bit_128g_1seed" 2 | type: CHAT 3 | conv_template: "llama-2_fact" 4 | 5 | model_loader: HF 6 | torch_dtype: BFLOAT16 7 | quant_file: null # For AutoAWQ 8 | tokenizer_name: "meta-llama/Llama-2-13b-chat-hf" 9 | trust_remote_code: true 10 | use_auth_token: true 11 | disable_exllama: true 12 | -------------------------------------------------------------------------------- /configs/model_config/llama-2-13b-chat-awq-3bit-seed2_fact.yaml: -------------------------------------------------------------------------------- 1 | model: "hf/efficient-llm/llama-2-13b-chat-awq-128example-randomseed@3bit_128g_2seed" 2 | type: CHAT 3 | conv_template: "llama-2_fact" 4 | 5 | model_loader: HF 6 | torch_dtype: BFLOAT16 7 | quant_file: null # For AutoAWQ 8 | tokenizer_name: "meta-llama/Llama-2-13b-chat-hf" 9 | trust_remote_code: true 10 | use_auth_token: true 11 | disable_exllama: true 12 | -------------------------------------------------------------------------------- /configs/model_config/llama-2-13b-chat-awq-4bit-seed0_fact.yaml: -------------------------------------------------------------------------------- 1 | model: "hf/efficient-llm/llama-2-13b-chat-awq-128example-randomseed@4bit_128g_0seed" 2 | type: CHAT 3 | conv_template: "llama-2_fact" 4 | 5 | model_loader: HF 6 | torch_dtype: BFLOAT16 7 | quant_file: null # For AutoAWQ 8 | tokenizer_name: "meta-llama/Llama-2-13b-chat-hf" 9 | trust_remote_code: true 10 | use_auth_token: true 11 | disable_exllama: true 12 | -------------------------------------------------------------------------------- /configs/model_config/llama-2-13b-chat-awq-4bit-seed1_fact.yaml: -------------------------------------------------------------------------------- 1 | model: "hf/efficient-llm/llama-2-13b-chat-awq-128example-randomseed@4bit_128g_1seed" 2 | type: CHAT 3 | conv_template: "llama-2_fact" 4 | 5 | model_loader: HF 6 | torch_dtype: BFLOAT16 7 | quant_file: null # For AutoAWQ 8 | tokenizer_name: "meta-llama/Llama-2-13b-chat-hf" 9 | trust_remote_code: true 10 | use_auth_token: true 11 | disable_exllama: true 12 | -------------------------------------------------------------------------------- /configs/model_config/llama-2-13b-chat-awq-4bit-seed2_fact.yaml: -------------------------------------------------------------------------------- 1 | model: "hf/efficient-llm/llama-2-13b-chat-awq-128example-randomseed@4bit_128g_2seed" 2 | type: CHAT 3 | conv_template: "llama-2_fact" 4 | 5 | model_loader: HF 6 | torch_dtype: BFLOAT16 7 | quant_file: null # For AutoAWQ 8 | tokenizer_name: "meta-llama/Llama-2-13b-chat-hf" 9 | trust_remote_code: true 10 | use_auth_token: true 11 | disable_exllama: true 12 | -------------------------------------------------------------------------------- /configs/model_config/vicuna-13b-v1.3-gptq-3bit-cal256-seed2.yaml: -------------------------------------------------------------------------------- 1 | model: "hf/efficient-llm/vicuna-13b-v1.3-gptq-larger-caliset-vicuna_v1.1-temp@3bit_256g_2seed" 2 | type: CHAT 3 | conv_template: "vicuna_v1.1" 4 | 5 | model_loader: GPTQ 6 | torch_dtype: null 7 | quant_file: null # For AutoAWQ 8 | tokenizer_name: "lmsys/vicuna-13b-v1.3" 9 | trust_remote_code: true 10 | use_auth_token: true 11 | disable_exllama: true 12 | -------------------------------------------------------------------------------- /configs/model_config/llama-2-13b-chat-awq-3bit-seed0_nosys.yaml: -------------------------------------------------------------------------------- 1 | model: "hf/efficient-llm/llama-2-13b-chat-awq-128example-randomseed@3bit_128g_0seed" 2 | type: CHAT 3 | conv_template: "llama-2_no_sys" 4 | 5 | model_loader: HF 6 | torch_dtype: BFLOAT16 7 | quant_file: null # For AutoAWQ 8 | tokenizer_name: "meta-llama/Llama-2-13b-chat-hf" 9 | trust_remote_code: true 10 | use_auth_token: true 11 | disable_exllama: true 12 | -------------------------------------------------------------------------------- /configs/model_config/llama-2-13b-chat-awq-3bit-seed0_trust.yaml: -------------------------------------------------------------------------------- 1 | model: "hf/efficient-llm/llama-2-13b-chat-awq-128example-randomseed@3bit_128g_0seed" 2 | type: CHAT 3 | conv_template: "llama-2_trust" 4 | 5 | model_loader: HF 6 | torch_dtype: BFLOAT16 7 | quant_file: null # For AutoAWQ 8 | tokenizer_name: "meta-llama/Llama-2-13b-chat-hf" 9 | trust_remote_code: true 10 | use_auth_token: true 11 | disable_exllama: true 12 | -------------------------------------------------------------------------------- /configs/model_config/llama-2-13b-chat-awq-3bit-seed1_nosys.yaml: -------------------------------------------------------------------------------- 1 | model: "hf/efficient-llm/llama-2-13b-chat-awq-128example-randomseed@3bit_128g_1seed" 2 | type: CHAT 3 | conv_template: "llama-2_no_sys" 4 | 5 | model_loader: HF 6 | torch_dtype: BFLOAT16 7 | quant_file: null # For AutoAWQ 8 | tokenizer_name: "meta-llama/Llama-2-13b-chat-hf" 9 | trust_remote_code: true 10 | use_auth_token: true 11 | disable_exllama: true 12 | -------------------------------------------------------------------------------- /configs/model_config/llama-2-13b-chat-awq-3bit-seed1_trust.yaml: -------------------------------------------------------------------------------- 1 | model: "hf/efficient-llm/llama-2-13b-chat-awq-128example-randomseed@3bit_128g_1seed" 2 | type: CHAT 3 | conv_template: "llama-2_trust" 4 | 5 | model_loader: HF 6 | torch_dtype: BFLOAT16 7 | quant_file: null # For AutoAWQ 8 | tokenizer_name: "meta-llama/Llama-2-13b-chat-hf" 9 | trust_remote_code: true 10 | use_auth_token: true 11 | disable_exllama: true 12 | -------------------------------------------------------------------------------- /configs/model_config/llama-2-13b-chat-awq-3bit-seed2_nosys.yaml: -------------------------------------------------------------------------------- 1 | model: "hf/efficient-llm/llama-2-13b-chat-awq-128example-randomseed@3bit_128g_2seed" 2 | type: CHAT 3 | conv_template: "llama-2_no_sys" 4 | 5 | model_loader: HF 6 | torch_dtype: BFLOAT16 7 | quant_file: null # For AutoAWQ 8 | tokenizer_name: "meta-llama/Llama-2-13b-chat-hf" 9 | trust_remote_code: true 10 | use_auth_token: true 11 | disable_exllama: true 12 | -------------------------------------------------------------------------------- /configs/model_config/llama-2-13b-chat-awq-3bit-seed2_trust.yaml: -------------------------------------------------------------------------------- 1 | model: "hf/efficient-llm/llama-2-13b-chat-awq-128example-randomseed@3bit_128g_2seed" 2 | type: CHAT 3 | conv_template: "llama-2_trust" 4 | 5 | model_loader: HF 6 | torch_dtype: BFLOAT16 7 | quant_file: null # For AutoAWQ 8 | tokenizer_name: "meta-llama/Llama-2-13b-chat-hf" 9 | trust_remote_code: true 10 | use_auth_token: true 11 | disable_exllama: true 12 | -------------------------------------------------------------------------------- /configs/model_config/llama-2-13b-chat-awq-4bit-seed0_nosys.yaml: -------------------------------------------------------------------------------- 1 | model: "hf/efficient-llm/llama-2-13b-chat-awq-128example-randomseed@4bit_128g_0seed" 2 | type: CHAT 3 | conv_template: "llama-2_no_sys" 4 | 5 | model_loader: HF 6 | torch_dtype: BFLOAT16 7 | quant_file: null # For AutoAWQ 8 | tokenizer_name: "meta-llama/Llama-2-13b-chat-hf" 9 | trust_remote_code: true 10 | use_auth_token: true 11 | disable_exllama: true 12 | -------------------------------------------------------------------------------- /configs/model_config/llama-2-13b-chat-awq-4bit-seed0_trust.yaml: -------------------------------------------------------------------------------- 1 | model: "hf/efficient-llm/llama-2-13b-chat-awq-128example-randomseed@4bit_128g_0seed" 2 | type: CHAT 3 | conv_template: "llama-2_trust" 4 | 5 | model_loader: HF 6 | torch_dtype: BFLOAT16 7 | quant_file: null # For AutoAWQ 8 | tokenizer_name: "meta-llama/Llama-2-13b-chat-hf" 9 | trust_remote_code: true 10 | use_auth_token: true 11 | disable_exllama: true 12 | -------------------------------------------------------------------------------- /configs/model_config/llama-2-13b-chat-awq-4bit-seed1_nosys.yaml: -------------------------------------------------------------------------------- 1 | model: "hf/efficient-llm/llama-2-13b-chat-awq-128example-randomseed@4bit_128g_1seed" 2 | type: CHAT 3 | conv_template: "llama-2_no_sys" 4 | 5 | model_loader: HF 6 | torch_dtype: BFLOAT16 7 | quant_file: null # For AutoAWQ 8 | tokenizer_name: "meta-llama/Llama-2-13b-chat-hf" 9 | trust_remote_code: true 10 | use_auth_token: true 11 | disable_exllama: true 12 | -------------------------------------------------------------------------------- /configs/model_config/llama-2-13b-chat-awq-4bit-seed1_trust.yaml: -------------------------------------------------------------------------------- 1 | model: "hf/efficient-llm/llama-2-13b-chat-awq-128example-randomseed@4bit_128g_1seed" 2 | type: CHAT 3 | conv_template: "llama-2_trust" 4 | 5 | model_loader: HF 6 | torch_dtype: BFLOAT16 7 | quant_file: null # For AutoAWQ 8 | tokenizer_name: "meta-llama/Llama-2-13b-chat-hf" 9 | trust_remote_code: true 10 | use_auth_token: true 11 | disable_exllama: true 12 | -------------------------------------------------------------------------------- /configs/model_config/llama-2-13b-chat-awq-4bit-seed2_nosys.yaml: -------------------------------------------------------------------------------- 1 | model: "hf/efficient-llm/llama-2-13b-chat-awq-128example-randomseed@4bit_128g_2seed" 2 | type: CHAT 3 | conv_template: "llama-2_no_sys" 4 | 5 | model_loader: HF 6 | torch_dtype: BFLOAT16 7 | quant_file: null # For AutoAWQ 8 | tokenizer_name: "meta-llama/Llama-2-13b-chat-hf" 9 | trust_remote_code: true 10 | use_auth_token: true 11 | disable_exllama: true 12 | -------------------------------------------------------------------------------- /configs/model_config/llama-2-13b-chat-awq-4bit-seed2_trust.yaml: -------------------------------------------------------------------------------- 1 | model: "hf/efficient-llm/llama-2-13b-chat-awq-128example-randomseed@4bit_128g_2seed" 2 | type: CHAT 3 | conv_template: "llama-2_trust" 4 | 5 | model_loader: HF 6 | torch_dtype: BFLOAT16 7 | quant_file: null # For AutoAWQ 8 | tokenizer_name: "meta-llama/Llama-2-13b-chat-hf" 9 | trust_remote_code: true 10 | use_auth_token: true 11 | disable_exllama: true 12 | -------------------------------------------------------------------------------- /configs/model_config/llama-2-13b-chat-gptq-3bit-cal1024-seed2.yaml: -------------------------------------------------------------------------------- 1 | model: "hf/efficient-llm/llama-2-13b-chat-gptq-larger-caliset-llama2-temp@3bit_1024g_2seed" 2 | type: CHAT 3 | conv_template: "llama-2" 4 | 5 | model_loader: GPTQ 6 | torch_dtype: null 7 | quant_file: null # For AutoAWQ 8 | tokenizer_name: "meta-llama/Llama-2-13b-chat-hf" 9 | trust_remote_code: true 10 | use_auth_token: true 11 | disable_exllama: true 12 | -------------------------------------------------------------------------------- /configs/model_config/llama-2-13b-chat-gptq-3bit-cal128-seed0.yaml: -------------------------------------------------------------------------------- 1 | model: "hf/efficient-llm/llama-2-13b-chat-gptq-larger-caliset-llama2-temp@3bit_128g_0seed" 2 | type: CHAT 3 | conv_template: "llama-2" 4 | 5 | model_loader: GPTQ 6 | torch_dtype: null 7 | quant_file: null # For AutoAWQ 8 | tokenizer_name: "meta-llama/Llama-2-13b-chat-hf" 9 | trust_remote_code: true 10 | use_auth_token: true 11 | disable_exllama: true 12 | -------------------------------------------------------------------------------- /configs/model_config/llama-2-13b-chat-gptq-3bit-cal128-seed1.yaml: -------------------------------------------------------------------------------- 1 | model: "hf/efficient-llm/llama-2-13b-chat-gptq-larger-caliset-llama2-temp@3bit_128g_1seed" 2 | type: CHAT 3 | conv_template: "llama-2" 4 | 5 | model_loader: GPTQ 6 | torch_dtype: null 7 | quant_file: null # For AutoAWQ 8 | tokenizer_name: "meta-llama/Llama-2-13b-chat-hf" 9 | trust_remote_code: true 10 | use_auth_token: true 11 | disable_exllama: true 12 | -------------------------------------------------------------------------------- /configs/model_config/llama-2-13b-chat-gptq-3bit-cal128-seed2.yaml: -------------------------------------------------------------------------------- 1 | model: "hf/efficient-llm/llama-2-13b-chat-gptq-larger-caliset-llama2-temp@3bit_128g_2seed" 2 | type: CHAT 3 | conv_template: "llama-2" 4 | 5 | model_loader: GPTQ 6 | torch_dtype: null 7 | quant_file: null # For AutoAWQ 8 | tokenizer_name: "meta-llama/Llama-2-13b-chat-hf" 9 | trust_remote_code: true 10 | use_auth_token: true 11 | disable_exllama: true 12 | -------------------------------------------------------------------------------- /configs/model_config/llama-2-13b-chat-gptq-4bit-cal128-seed0.yaml: -------------------------------------------------------------------------------- 1 | model: "hf/efficient-llm/llama-2-13b-chat-gptq-larger-caliset-llama2-temp@4bit_128g_0seed" 2 | type: CHAT 3 | conv_template: "llama-2" 4 | 5 | model_loader: GPTQ 6 | torch_dtype: null 7 | quant_file: null # For AutoAWQ 8 | tokenizer_name: "meta-llama/Llama-2-13b-chat-hf" 9 | trust_remote_code: true 10 | use_auth_token: true 11 | disable_exllama: true 12 | -------------------------------------------------------------------------------- /configs/model_config/llama-2-13b-chat-gptq-4bit-cal128-seed1.yaml: -------------------------------------------------------------------------------- 1 | model: "hf/efficient-llm/llama-2-13b-chat-gptq-larger-caliset-llama2-temp@4bit_128g_1seed" 2 | type: CHAT 3 | conv_template: "llama-2" 4 | 5 | model_loader: GPTQ 6 | torch_dtype: null 7 | quant_file: null # For AutoAWQ 8 | tokenizer_name: "meta-llama/Llama-2-13b-chat-hf" 9 | trust_remote_code: true 10 | use_auth_token: true 11 | disable_exllama: true 12 | -------------------------------------------------------------------------------- /configs/model_config/llama-2-13b-chat-gptq-4bit-cal128-seed2.yaml: -------------------------------------------------------------------------------- 1 | model: "hf/efficient-llm/llama-2-13b-chat-gptq-larger-caliset-llama2-temp@4bit_128g_2seed" 2 | type: CHAT 3 | conv_template: "llama-2" 4 | 5 | model_loader: GPTQ 6 | torch_dtype: null 7 | quant_file: null # For AutoAWQ 8 | tokenizer_name: "meta-llama/Llama-2-13b-chat-hf" 9 | trust_remote_code: true 10 | use_auth_token: true 11 | disable_exllama: true 12 | -------------------------------------------------------------------------------- /configs/model_config/llama-2-13b-chat-gptq-4bit-cal256-seed0.yaml: -------------------------------------------------------------------------------- 1 | model: "hf/efficient-llm/llama-2-13b-chat-gptq-larger-caliset-llama2-temp@4bit_256g_0seed" 2 | type: CHAT 3 | conv_template: "llama-2" 4 | 5 | model_loader: GPTQ 6 | torch_dtype: null 7 | quant_file: null # For AutoAWQ 8 | tokenizer_name: "meta-llama/Llama-2-13b-chat-hf" 9 | trust_remote_code: true 10 | use_auth_token: true 11 | disable_exllama: true 12 | -------------------------------------------------------------------------------- /configs/model_config/llama-2-13b-chat-gptq-3bit-cal128-seed0-fact.yaml: -------------------------------------------------------------------------------- 1 | model: "hf/efficient-llm/llama-2-13b-chat-gptq-larger-caliset-llama2-temp@3bit_128g_0seed" 2 | type: CHAT 3 | conv_template: "llama-2_fact" 4 | 5 | model_loader: GPTQ 6 | torch_dtype: null 7 | quant_file: null # For AutoAWQ 8 | tokenizer_name: "meta-llama/Llama-2-13b-chat-hf" 9 | trust_remote_code: true 10 | use_auth_token: true 11 | disable_exllama: true 12 | -------------------------------------------------------------------------------- /configs/model_config/llama-2-13b-chat-gptq-3bit-cal128-seed0-no_sys.yaml: -------------------------------------------------------------------------------- 1 | model: "hf/efficient-llm/llama-2-13b-chat-gptq-larger-caliset-llama2-temp@3bit_128g_0seed" 2 | type: CHAT 3 | conv_template: "llama-2_no_sys" 4 | 5 | model_loader: GPTQ 6 | torch_dtype: null 7 | quant_file: null # For AutoAWQ 8 | tokenizer_name: "meta-llama/Llama-2-13b-chat-hf" 9 | trust_remote_code: true 10 | use_auth_token: true 11 | disable_exllama: true 12 | -------------------------------------------------------------------------------- /configs/model_config/llama-2-13b-chat-gptq-3bit-cal128-seed0-trust.yaml: -------------------------------------------------------------------------------- 1 | model: "hf/efficient-llm/llama-2-13b-chat-gptq-larger-caliset-llama2-temp@3bit_128g_0seed" 2 | type: CHAT 3 | conv_template: "llama-2_trust" 4 | 5 | model_loader: GPTQ 6 | torch_dtype: null 7 | quant_file: null # For AutoAWQ 8 | tokenizer_name: "meta-llama/Llama-2-13b-chat-hf" 9 | trust_remote_code: true 10 | use_auth_token: true 11 | disable_exllama: true 12 | -------------------------------------------------------------------------------- /configs/model_config/llama-2-13b-chat-gptq-3bit-cal128-seed1-fact.yaml: -------------------------------------------------------------------------------- 1 | model: "hf/efficient-llm/llama-2-13b-chat-gptq-larger-caliset-llama2-temp@3bit_128g_1seed" 2 | type: CHAT 3 | conv_template: "llama-2_fact" 4 | 5 | model_loader: GPTQ 6 | torch_dtype: null 7 | quant_file: null # For AutoAWQ 8 | tokenizer_name: "meta-llama/Llama-2-13b-chat-hf" 9 | trust_remote_code: true 10 | use_auth_token: true 11 | disable_exllama: true 12 | -------------------------------------------------------------------------------- /configs/model_config/llama-2-13b-chat-gptq-3bit-cal128-seed1-no_sys.yaml: -------------------------------------------------------------------------------- 1 | model: "hf/efficient-llm/llama-2-13b-chat-gptq-larger-caliset-llama2-temp@3bit_128g_1seed" 2 | type: CHAT 3 | conv_template: "llama-2_no_sys" 4 | 5 | model_loader: GPTQ 6 | torch_dtype: null 7 | quant_file: null # For AutoAWQ 8 | tokenizer_name: "meta-llama/Llama-2-13b-chat-hf" 9 | trust_remote_code: true 10 | use_auth_token: true 11 | disable_exllama: true 12 | -------------------------------------------------------------------------------- /configs/model_config/llama-2-13b-chat-gptq-3bit-cal128-seed1-trust.yaml: -------------------------------------------------------------------------------- 1 | model: "hf/efficient-llm/llama-2-13b-chat-gptq-larger-caliset-llama2-temp@3bit_128g_1seed" 2 | type: CHAT 3 | conv_template: "llama-2_trust" 4 | 5 | model_loader: GPTQ 6 | torch_dtype: null 7 | quant_file: null # For AutoAWQ 8 | tokenizer_name: "meta-llama/Llama-2-13b-chat-hf" 9 | trust_remote_code: true 10 | use_auth_token: true 11 | disable_exllama: true 12 | -------------------------------------------------------------------------------- /configs/model_config/llama-2-13b-chat-gptq-3bit-cal128-seed2-fact.yaml: -------------------------------------------------------------------------------- 1 | model: "hf/efficient-llm/llama-2-13b-chat-gptq-larger-caliset-llama2-temp@3bit_128g_2seed" 2 | type: CHAT 3 | conv_template: "llama-2_fact" 4 | 5 | model_loader: GPTQ 6 | torch_dtype: null 7 | quant_file: null # For AutoAWQ 8 | tokenizer_name: "meta-llama/Llama-2-13b-chat-hf" 9 | trust_remote_code: true 10 | use_auth_token: true 11 | disable_exllama: true 12 | -------------------------------------------------------------------------------- /configs/model_config/llama-2-13b-chat-gptq-3bit-cal128-seed2-no_sys.yaml: -------------------------------------------------------------------------------- 1 | model: "hf/efficient-llm/llama-2-13b-chat-gptq-larger-caliset-llama2-temp@3bit_128g_2seed" 2 | type: CHAT 3 | conv_template: "llama-2_no_sys" 4 | 5 | model_loader: GPTQ 6 | torch_dtype: null 7 | quant_file: null # For AutoAWQ 8 | tokenizer_name: "meta-llama/Llama-2-13b-chat-hf" 9 | trust_remote_code: true 10 | use_auth_token: true 11 | disable_exllama: true 12 | -------------------------------------------------------------------------------- /configs/model_config/llama-2-13b-chat-gptq-3bit-cal128-seed2-trust.yaml: -------------------------------------------------------------------------------- 1 | model: "hf/efficient-llm/llama-2-13b-chat-gptq-larger-caliset-llama2-temp@3bit_128g_2seed" 2 | type: CHAT 3 | conv_template: "llama-2_trust" 4 | 5 | model_loader: GPTQ 6 | torch_dtype: null 7 | quant_file: null # For AutoAWQ 8 | tokenizer_name: "meta-llama/Llama-2-13b-chat-hf" 9 | trust_remote_code: true 10 | use_auth_token: true 11 | disable_exllama: true 12 | -------------------------------------------------------------------------------- /configs/model_config/llama-2-13b-chat-gptq-4bit-cal128-seed0_fact.yaml: -------------------------------------------------------------------------------- 1 | model: "hf/efficient-llm/llama-2-13b-chat-gptq-larger-caliset-llama2-temp@4bit_128g_0seed" 2 | type: CHAT 3 | conv_template: "llama-2_fact" 4 | 5 | model_loader: GPTQ 6 | torch_dtype: null 7 | quant_file: null # For AutoAWQ 8 | tokenizer_name: "meta-llama/Llama-2-13b-chat-hf" 9 | trust_remote_code: true 10 | use_auth_token: true 11 | disable_exllama: true 12 | -------------------------------------------------------------------------------- /configs/model_config/llama-2-13b-chat-gptq-4bit-cal128-seed0_nosys.yaml: -------------------------------------------------------------------------------- 1 | model: "hf/efficient-llm/llama-2-13b-chat-gptq-larger-caliset-llama2-temp@4bit_128g_0seed" 2 | type: CHAT 3 | conv_template: "llama-2_no_sys" 4 | 5 | model_loader: GPTQ 6 | torch_dtype: null 7 | quant_file: null # For AutoAWQ 8 | tokenizer_name: "meta-llama/Llama-2-13b-chat-hf" 9 | trust_remote_code: true 10 | use_auth_token: true 11 | disable_exllama: true 12 | -------------------------------------------------------------------------------- /configs/model_config/llama-2-13b-chat-gptq-4bit-cal128-seed0_trust.yaml: -------------------------------------------------------------------------------- 1 | model: "hf/efficient-llm/llama-2-13b-chat-gptq-larger-caliset-llama2-temp@4bit_128g_0seed" 2 | type: CHAT 3 | conv_template: "llama-2_trust" 4 | 5 | model_loader: GPTQ 6 | torch_dtype: null 7 | quant_file: null # For AutoAWQ 8 | tokenizer_name: "meta-llama/Llama-2-13b-chat-hf" 9 | trust_remote_code: true 10 | use_auth_token: true 11 | disable_exllama: true 12 | -------------------------------------------------------------------------------- /configs/model_config/llama-2-13b-chat-gptq-4bit-cal128-seed1_fact.yaml: -------------------------------------------------------------------------------- 1 | model: "hf/efficient-llm/llama-2-13b-chat-gptq-larger-caliset-llama2-temp@4bit_128g_1seed" 2 | type: CHAT 3 | conv_template: "llama-2_fact" 4 | 5 | model_loader: GPTQ 6 | torch_dtype: null 7 | quant_file: null # For AutoAWQ 8 | tokenizer_name: "meta-llama/Llama-2-13b-chat-hf" 9 | trust_remote_code: true 10 | use_auth_token: true 11 | disable_exllama: true 12 | -------------------------------------------------------------------------------- /configs/model_config/llama-2-13b-chat-gptq-4bit-cal128-seed1_nosys.yaml: -------------------------------------------------------------------------------- 1 | model: "hf/efficient-llm/llama-2-13b-chat-gptq-larger-caliset-llama2-temp@4bit_128g_1seed" 2 | type: CHAT 3 | conv_template: "llama-2_no_sys" 4 | 5 | model_loader: GPTQ 6 | torch_dtype: null 7 | quant_file: null # For AutoAWQ 8 | tokenizer_name: "meta-llama/Llama-2-13b-chat-hf" 9 | trust_remote_code: true 10 | use_auth_token: true 11 | disable_exllama: true 12 | -------------------------------------------------------------------------------- /configs/model_config/llama-2-13b-chat-gptq-4bit-cal128-seed1_trust.yaml: -------------------------------------------------------------------------------- 1 | model: "hf/efficient-llm/llama-2-13b-chat-gptq-larger-caliset-llama2-temp@4bit_128g_1seed" 2 | type: CHAT 3 | conv_template: "llama-2_trust" 4 | 5 | model_loader: GPTQ 6 | torch_dtype: null 7 | quant_file: null # For AutoAWQ 8 | tokenizer_name: "meta-llama/Llama-2-13b-chat-hf" 9 | trust_remote_code: true 10 | use_auth_token: true 11 | disable_exllama: true 12 | -------------------------------------------------------------------------------- /configs/model_config/llama-2-13b-chat-gptq-4bit-cal128-seed2_fact.yaml: -------------------------------------------------------------------------------- 1 | model: "hf/efficient-llm/llama-2-13b-chat-gptq-larger-caliset-llama2-temp@4bit_128g_1seed" 2 | type: CHAT 3 | conv_template: "llama-2_fact" 4 | 5 | model_loader: GPTQ 6 | torch_dtype: null 7 | quant_file: null # For AutoAWQ 8 | tokenizer_name: "meta-llama/Llama-2-13b-chat-hf" 9 | trust_remote_code: true 10 | use_auth_token: true 11 | disable_exllama: true 12 | -------------------------------------------------------------------------------- /configs/model_config/llama-2-13b-chat-gptq-4bit-cal128-seed2_nosys.yaml: -------------------------------------------------------------------------------- 1 | model: "hf/efficient-llm/llama-2-13b-chat-gptq-larger-caliset-llama2-temp@4bit_128g_2seed" 2 | type: CHAT 3 | conv_template: "llama-2_no_sys" 4 | 5 | model_loader: GPTQ 6 | torch_dtype: null 7 | quant_file: null # For AutoAWQ 8 | tokenizer_name: "meta-llama/Llama-2-13b-chat-hf" 9 | trust_remote_code: true 10 | use_auth_token: true 11 | disable_exllama: true 12 | -------------------------------------------------------------------------------- /configs/model_config/llama-2-13b-chat-gptq-4bit-cal128-seed2_trust.yaml: -------------------------------------------------------------------------------- 1 | model: "hf/efficient-llm/llama-2-13b-chat-gptq-larger-caliset-llama2-temp@4bit_128g_2seed" 2 | type: CHAT 3 | conv_template: "llama-2_trust" 4 | 5 | model_loader: GPTQ 6 | torch_dtype: null 7 | quant_file: null # For AutoAWQ 8 | tokenizer_name: "meta-llama/Llama-2-13b-chat-hf" 9 | trust_remote_code: true 10 | use_auth_token: true 11 | disable_exllama: true 12 | -------------------------------------------------------------------------------- /configs/model_config/vicuna-7b-v1.3.yaml: -------------------------------------------------------------------------------- 1 | model: "hf/lmsys/vicuna-7b-v1.3" 2 | type: CHAT 3 | conv_template: "vicuna_v1.1" 4 | 5 | model_loader: HF 6 | torch_dtype: BFLOAT16 7 | quant_file: null 8 | tokenizer_name: lmsys/vicuna-7b-v1.3 9 | trust_remote_code: true 10 | use_auth_token: true 11 | 12 | # dt-run --config-name="config" +advglue=benign ++model_config.model="$MODEL" ++model_config.conv_template="vicuna_v1.1" ++model_config.model_loader=HF ++model_config.tokenizer_name=lmsys/vicuna-13b-v1.3 ++model_config.torch_dtype=null ++advglue.task=\[sst2\] -------------------------------------------------------------------------------- /configs/model_config/vicuna-13b-v1.3.yaml: -------------------------------------------------------------------------------- 1 | model: "hf/lmsys/vicuna-13b-v1.3" 2 | type: CHAT 3 | conv_template: "vicuna_v1.1" 4 | 5 | model_loader: HF 6 | torch_dtype: BFLOAT16 7 | quant_file: null 8 | tokenizer_name: lmsys/vicuna-13b-v1.3 9 | trust_remote_code: true 10 | use_auth_token: true 11 | 12 | # dt-run --config-name="config" +advglue=benign ++model_config.model="$MODEL" ++model_config.conv_template="vicuna_v1.1" ++model_config.model_loader=HF ++model_config.tokenizer_name=lmsys/vicuna-13b-v1.3 ++model_config.torch_dtype=null ++advglue.task=\[sst2\] -------------------------------------------------------------------------------- /configs/model_config/llama-2-13b-chat.yaml: -------------------------------------------------------------------------------- 1 | model: "hf/meta-llama/Llama-2-13b-chat-hf" 2 | type: CHAT 3 | conv_template: "llama-2" 4 | 5 | model_loader: HF 6 | torch_dtype: BFLOAT16 7 | quant_file: null 8 | tokenizer_name: meta-llama/Llama-2-13b-chat-hf 9 | trust_remote_code: true 10 | use_auth_token: true 11 | 12 | # dt-run --config-name="config" +advglue=benign ++model_config.model="$MODEL" ++model_config.conv_template="vicuna_v1.1" ++model_config.model_loader=HF ++model_config.tokenizer_name=lmsys/vicuna-13b-v1.3 ++model_config.torch_dtype=null ++advglue.task=\[sst2\] -------------------------------------------------------------------------------- /configs/model_config/llama-2-7b.yaml: -------------------------------------------------------------------------------- 1 | model: "hf/meta-llama/Llama-2-7b-hf" 2 | type: CHAT 3 | conv_template: "redpajama-incite-instruct" 4 | 5 | model_loader: HF 6 | torch_dtype: BFLOAT16 7 | quant_file: null 8 | tokenizer_name: meta-llama/Llama-2-7b-hf 9 | trust_remote_code: true 10 | use_auth_token: true 11 | 12 | # dt-run --config-name="config" +advglue=benign ++model_config.model="$MODEL" ++model_config.conv_template="vicuna_v1.1" ++model_config.model_loader=HF ++model_config.tokenizer_name=lmsys/vicuna-13b-v1.3 ++model_config.torch_dtype=null ++advglue.task=\[sst2\] -------------------------------------------------------------------------------- /configs/model_config/llama-2-13b.yaml: -------------------------------------------------------------------------------- 1 | model: "hf/meta-llama/Llama-2-13b-hf" 2 | type: CHAT 3 | conv_template: "redpajama-incite-instruct" 4 | 5 | model_loader: HF 6 | torch_dtype: BFLOAT16 7 | quant_file: null 8 | tokenizer_name: meta-llama/Llama-2-13b-hf 9 | trust_remote_code: true 10 | use_auth_token: true 11 | 12 | # dt-run --config-name="config" +advglue=benign ++model_config.model="$MODEL" ++model_config.conv_template="vicuna_v1.1" ++model_config.model_loader=HF ++model_config.tokenizer_name=lmsys/vicuna-13b-v1.3 ++model_config.torch_dtype=null ++advglue.task=\[sst2\] -------------------------------------------------------------------------------- /configs/model_config/llama-2-70b-chat.yaml: -------------------------------------------------------------------------------- 1 | model: "hf/meta-llama/Llama-2-70b-chat-hf" 2 | type: CHAT 3 | conv_template: "llama-2" 4 | 5 | model_loader: HF 6 | torch_dtype: BFLOAT16 7 | quant_file: null 8 | tokenizer_name: meta-llama/Llama-2-70b-chat-hf 9 | trust_remote_code: true 10 | use_auth_token: true 11 | 12 | # dt-run --config-name="config" +advglue=benign ++model_config.model="$MODEL" ++model_config.conv_template="vicuna_v1.1" ++model_config.model_loader=HF ++model_config.tokenizer_name=lmsys/vicuna-13b-v1.3 ++model_config.torch_dtype=null ++advglue.task=\[sst2\] 13 | -------------------------------------------------------------------------------- /configs/model_config/llama-2-7b-chat.yaml: -------------------------------------------------------------------------------- 1 | model: "hf/meta-llama/Llama-2-7b-chat-hf" 2 | type: CHAT 3 | conv_template: "llama-2" 4 | 5 | model_loader: HF 6 | torch_dtype: BFLOAT16 7 | quant_file: null 8 | tokenizer_name: meta-llama/Llama-2-7b-chat-hf 9 | trust_remote_code: true 10 | use_auth_token: true 11 | 12 | # dt-run --config-name="config" +advglue=benign ++model_config.model="$MODEL" ++model_config.conv_template="vicuna_v1.1" ++model_config.model_loader=HF ++model_config.tokenizer_name=lmsys/vicuna-13b-v1.3 ++model_config.torch_dtype=null ++advglue.task=\[sst2\] 13 | -------------------------------------------------------------------------------- /configs/model_config/llama-2-13b-chat_nosys.yaml: -------------------------------------------------------------------------------- 1 | model: "hf/meta-llama/Llama-2-13b-chat-hf" 2 | type: CHAT 3 | conv_template: "llama-2_no_sys" 4 | 5 | model_loader: HF 6 | torch_dtype: BFLOAT16 7 | quant_file: null 8 | tokenizer_name: meta-llama/Llama-2-13b-chat-hf 9 | trust_remote_code: true 10 | use_auth_token: true 11 | 12 | # dt-run --config-name="config" +advglue=benign ++model_config.model="$MODEL" ++model_config.conv_template="vicuna_v1.1" ++model_config.model_loader=HF ++model_config.tokenizer_name=lmsys/vicuna-13b-v1.3 ++model_config.torch_dtype=null ++advglue.task=\[sst2\] 13 | -------------------------------------------------------------------------------- /configs/model_config/llama-2-13b-chat_trust.yaml: -------------------------------------------------------------------------------- 1 | model: "hf/meta-llama/Llama-2-13b-chat-hf" 2 | type: CHAT 3 | conv_template: "llama-2_trust" 4 | 5 | model_loader: HF 6 | torch_dtype: BFLOAT16 7 | quant_file: null 8 | tokenizer_name: meta-llama/Llama-2-13b-chat-hf 9 | trust_remote_code: true 10 | use_auth_token: true 11 | 12 | # dt-run --config-name="config" +advglue=benign ++model_config.model="$MODEL" ++model_config.conv_template="vicuna_v1.1" ++model_config.model_loader=HF ++model_config.tokenizer_name=lmsys/vicuna-13b-v1.3 ++model_config.torch_dtype=null ++advglue.task=\[sst2\] 13 | -------------------------------------------------------------------------------- /configs/model_config/vicuna-13b-v1.3-wanda_2to4.yaml: -------------------------------------------------------------------------------- 1 | model: "hf/compressed-llm/vicuna-13b-v1.3-wanda@0.5_2to4" 2 | type: CHAT 3 | conv_template: "vicuna_v1.1" 4 | 5 | model_loader: HF 6 | torch_dtype: BFLOAT16 7 | quant_file: null 8 | tokenizer_name: lmsys/vicuna-13b-v1.3 9 | trust_remote_code: true 10 | use_auth_token: true 11 | 12 | # dt-run --config-name="config" +advglue=benign ++model_config.model="$MODEL" ++model_config.conv_template="vicuna_v1.1" ++model_config.model_loader=HF ++model_config.tokenizer_name=lmsys/vicuna-13b-v1.3 ++model_config.torch_dtype=null ++advglue.task=\[sst2\] -------------------------------------------------------------------------------- /configs/model_config/llama-2-13b-wanda_2to4.yaml: -------------------------------------------------------------------------------- 1 | model: "hf/compressed-llm/llama-2-13b-wanda@0.5_2to4" 2 | type: CHAT 3 | conv_template: "redpajama-incite-instruct" 4 | 5 | model_loader: HF 6 | torch_dtype: BFLOAT16 7 | quant_file: null 8 | tokenizer_name: meta-llama/Llama-2-13b-hf 9 | trust_remote_code: true 10 | use_auth_token: true 11 | 12 | # dt-run --config-name="config" +advglue=benign ++model_config.model="$MODEL" ++model_config.conv_template="vicuna_v1.1" ++model_config.model_loader=HF ++model_config.tokenizer_name=lmsys/vicuna-13b-v1.3 ++model_config.torch_dtype=null ++advglue.task=\[sst2\] -------------------------------------------------------------------------------- /configs/model_config/llama-2-13b-chat-wanda_2to4.yaml: -------------------------------------------------------------------------------- 1 | model: "hf/compressed-llm/llama-2-13b-chat-wanda@0.5_2to4" 2 | type: CHAT 3 | conv_template: "llama-2" 4 | 5 | model_loader: HF 6 | torch_dtype: BFLOAT16 7 | quant_file: null 8 | tokenizer_name: meta-llama/Llama-2-13b-chat-hf 9 | trust_remote_code: true 10 | use_auth_token: true 11 | 12 | # dt-run --config-name="config" +advglue=benign ++model_config.model="$MODEL" ++model_config.conv_template="vicuna_v1.1" ++model_config.model_loader=HF ++model_config.tokenizer_name=lmsys/vicuna-13b-v1.3 ++model_config.torch_dtype=null ++advglue.task=\[sst2\] 13 | -------------------------------------------------------------------------------- /configs/model_config/vicuna-13b-v1.3-mag_2to4.yaml: -------------------------------------------------------------------------------- 1 | model: "hf/compressed-llm/vicuna-13b-v1.3-magnitude-semistruct@0.5_2to4" 2 | type: CHAT 3 | conv_template: "vicuna_v1.1" 4 | 5 | model_loader: HF 6 | torch_dtype: BFLOAT16 7 | quant_file: null 8 | tokenizer_name: lmsys/vicuna-13b-v1.3 9 | trust_remote_code: true 10 | use_auth_token: true 11 | 12 | # dt-run --config-name="config" +advglue=benign ++model_config.model="$MODEL" ++model_config.conv_template="vicuna_v1.1" ++model_config.model_loader=HF ++model_config.tokenizer_name=lmsys/vicuna-13b-v1.3 ++model_config.torch_dtype=null ++advglue.task=\[sst2\] -------------------------------------------------------------------------------- /configs/model_config/llama-2-13b-chat-mag_1to2.yaml: -------------------------------------------------------------------------------- 1 | model: "hf/compressed-llm/llama-2-13b-chat-magnitude-semistruct@0.5_1to2" 2 | type: CHAT 3 | conv_template: "llama-2" 4 | 5 | model_loader: HF 6 | torch_dtype: BFLOAT16 7 | quant_file: null 8 | tokenizer_name: meta-llama/Llama-2-13b-chat-hf 9 | trust_remote_code: true 10 | use_auth_token: true 11 | 12 | # dt-run --config-name="config" +advglue=benign ++model_config.model="$MODEL" ++model_config.conv_template="vicuna_v1.1" ++model_config.model_loader=HF ++model_config.tokenizer_name=lmsys/vicuna-13b-v1.3 ++model_config.torch_dtype=null ++advglue.task=\[sst2\] -------------------------------------------------------------------------------- /configs/model_config/llama-2-13b-chat-mag_2to4.yaml: -------------------------------------------------------------------------------- 1 | model: "hf/compressed-llm/llama-2-13b-chat-magnitude-semistruct@0.5_2to4" 2 | type: CHAT 3 | conv_template: "llama-2" 4 | 5 | model_loader: HF 6 | torch_dtype: BFLOAT16 7 | quant_file: null 8 | tokenizer_name: meta-llama/Llama-2-13b-chat-hf 9 | trust_remote_code: true 10 | use_auth_token: true 11 | 12 | # dt-run --config-name="config" +advglue=benign ++model_config.model="$MODEL" ++model_config.conv_template="vicuna_v1.1" ++model_config.model_loader=HF ++model_config.tokenizer_name=lmsys/vicuna-13b-v1.3 ++model_config.torch_dtype=null ++advglue.task=\[sst2\] -------------------------------------------------------------------------------- /configs/model_config/llama-2-13b-chat-mag_4to8.yaml: -------------------------------------------------------------------------------- 1 | model: "hf/compressed-llm/llama-2-13b-chat-magnitude-semistruct@0.5_4to8" 2 | type: CHAT 3 | conv_template: "llama-2" 4 | 5 | model_loader: HF 6 | torch_dtype: BFLOAT16 7 | quant_file: null 8 | tokenizer_name: meta-llama/Llama-2-13b-chat-hf 9 | trust_remote_code: true 10 | use_auth_token: true 11 | 12 | # dt-run --config-name="config" +advglue=benign ++model_config.model="$MODEL" ++model_config.conv_template="vicuna_v1.1" ++model_config.model_loader=HF ++model_config.tokenizer_name=lmsys/vicuna-13b-v1.3 ++model_config.torch_dtype=null ++advglue.task=\[sst2\] -------------------------------------------------------------------------------- /configs/model_config/llama-2-13b-chat-wanda_1to2.yaml: -------------------------------------------------------------------------------- 1 | model: "hf/compressed-llm/llama-2-13b-chat-wanda-semistruct@0.5_1to2" 2 | type: CHAT 3 | conv_template: "llama-2" 4 | 5 | model_loader: HF 6 | torch_dtype: BFLOAT16 7 | quant_file: null 8 | tokenizer_name: meta-llama/Llama-2-13b-chat-hf 9 | trust_remote_code: true 10 | use_auth_token: true 11 | 12 | # dt-run --config-name="config" +advglue=benign ++model_config.model="$MODEL" ++model_config.conv_template="vicuna_v1.1" ++model_config.model_loader=HF ++model_config.tokenizer_name=lmsys/vicuna-13b-v1.3 ++model_config.torch_dtype=null ++advglue.task=\[sst2\] -------------------------------------------------------------------------------- /configs/model_config/llama-2-13b-chat-wanda_4to8.yaml: -------------------------------------------------------------------------------- 1 | model: "hf/compressed-llm/llama-2-13b-chat-wanda-semistruct@0.5_4to8" 2 | type: CHAT 3 | conv_template: "llama-2" 4 | 5 | model_loader: HF 6 | torch_dtype: BFLOAT16 7 | quant_file: null 8 | tokenizer_name: meta-llama/Llama-2-13b-chat-hf 9 | trust_remote_code: true 10 | use_auth_token: true 11 | 12 | # dt-run --config-name="config" +advglue=benign ++model_config.model="$MODEL" ++model_config.conv_template="vicuna_v1.1" ++model_config.model_loader=HF ++model_config.tokenizer_name=lmsys/vicuna-13b-v1.3 ++model_config.torch_dtype=null ++advglue.task=\[sst2\] -------------------------------------------------------------------------------- /configs/model_config/vicuna-13b-v1.3-sparsegpt_2to4.yaml: -------------------------------------------------------------------------------- 1 | model: "hf/compressed-llm/vicuna-13b-v1.3-sparsegpt-semistruct@0.5_2to4" 2 | type: CHAT 3 | conv_template: "vicuna_v1.1" 4 | 5 | model_loader: HF 6 | torch_dtype: BFLOAT16 7 | quant_file: null 8 | tokenizer_name: lmsys/vicuna-13b-v1.3 9 | trust_remote_code: true 10 | use_auth_token: true 11 | 12 | # dt-run --config-name="config" +advglue=benign ++model_config.model="$MODEL" ++model_config.conv_template="vicuna_v1.1" ++model_config.model_loader=HF ++model_config.tokenizer_name=lmsys/vicuna-13b-v1.3 ++model_config.torch_dtype=null ++advglue.task=\[sst2\] -------------------------------------------------------------------------------- /configs/model_config/llama-2-13b-chat-sparsegpt_1to2.yaml: -------------------------------------------------------------------------------- 1 | model: "hf/compressed-llm/llama-2-13b-chat-sparsegpt-semistruct@0.5_1to2" 2 | type: CHAT 3 | conv_template: "llama-2" 4 | 5 | model_loader: HF 6 | torch_dtype: BFLOAT16 7 | quant_file: null 8 | tokenizer_name: meta-llama/Llama-2-13b-chat-hf 9 | trust_remote_code: true 10 | use_auth_token: true 11 | 12 | # dt-run --config-name="config" +advglue=benign ++model_config.model="$MODEL" ++model_config.conv_template="vicuna_v1.1" ++model_config.model_loader=HF ++model_config.tokenizer_name=lmsys/vicuna-13b-v1.3 ++model_config.torch_dtype=null ++advglue.task=\[sst2\] -------------------------------------------------------------------------------- /configs/model_config/llama-2-13b-chat-sparsegpt_2to4.yaml: -------------------------------------------------------------------------------- 1 | model: "hf/compressed-llm/llama-2-13b-chat-sparsegpt-semistruct@0.5_2to4" 2 | type: CHAT 3 | conv_template: "llama-2" 4 | 5 | model_loader: HF 6 | torch_dtype: BFLOAT16 7 | quant_file: null 8 | tokenizer_name: meta-llama/Llama-2-13b-chat-hf 9 | trust_remote_code: true 10 | use_auth_token: true 11 | 12 | # dt-run --config-name="config" +advglue=benign ++model_config.model="$MODEL" ++model_config.conv_template="vicuna_v1.1" ++model_config.model_loader=HF ++model_config.tokenizer_name=lmsys/vicuna-13b-v1.3 ++model_config.torch_dtype=null ++advglue.task=\[sst2\] -------------------------------------------------------------------------------- /configs/model_config/llama-2-13b-chat-sparsegpt_4to8.yaml: -------------------------------------------------------------------------------- 1 | model: "hf/compressed-llm/llama-2-13b-chat-sparsegpt-semistruct@0.5_4to8" 2 | type: CHAT 3 | conv_template: "llama-2" 4 | 5 | model_loader: HF 6 | torch_dtype: BFLOAT16 7 | quant_file: null 8 | tokenizer_name: meta-llama/Llama-2-13b-chat-hf 9 | trust_remote_code: true 10 | use_auth_token: true 11 | 12 | # dt-run --config-name="config" +advglue=benign ++model_config.model="$MODEL" ++model_config.conv_template="vicuna_v1.1" ++model_config.model_loader=HF ++model_config.tokenizer_name=lmsys/vicuna-13b-v1.3 ++model_config.torch_dtype=null ++advglue.task=\[sst2\] -------------------------------------------------------------------------------- /configs/model_config/llama-2-13b-mag_2to4.yaml: -------------------------------------------------------------------------------- 1 | model: "hf/compressed-llm/llama-2-13b-magnitude-semistruct@0.5_2to4" 2 | type: CHAT 3 | conv_template: "redpajama-incite-instruct" 4 | 5 | model_loader: HF 6 | torch_dtype: BFLOAT16 7 | quant_file: null 8 | tokenizer_name: meta-llama/Llama-2-13b-hf 9 | trust_remote_code: true 10 | use_auth_token: true 11 | 12 | # dt-run --config-name="config" +advglue=benign ++model_config.model="$MODEL" ++model_config.conv_template="vicuna_v1.1" ++model_config.model_loader=HF ++model_config.tokenizer_name=lmsys/vicuna-13b-v1.3 ++model_config.torch_dtype=null ++advglue.task=\[sst2\] -------------------------------------------------------------------------------- /configs/model_config/llama-2-13b-sparsegpt_2to4.yaml: -------------------------------------------------------------------------------- 1 | model: "hf/compressed-llm/llama-2-13b-sparsegpt-semistruct@0.5_2to4" 2 | type: CHAT 3 | conv_template: "redpajama-incite-instruct" 4 | 5 | model_loader: HF 6 | torch_dtype: BFLOAT16 7 | quant_file: null 8 | tokenizer_name: meta-llama/Llama-2-13b-hf 9 | trust_remote_code: true 10 | use_auth_token: true 11 | 12 | # dt-run --config-name="config" +advglue=benign ++model_config.model="$MODEL" ++model_config.conv_template="vicuna_v1.1" ++model_config.model_loader=HF ++model_config.tokenizer_name=lmsys/vicuna-13b-v1.3 ++model_config.torch_dtype=null ++advglue.task=\[sst2\] -------------------------------------------------------------------------------- /configs/model_config/vicuna-13b-v1.3-sparsegpt_2to4-seed0.yaml: -------------------------------------------------------------------------------- 1 | model: "hf/compressed-llm/vicuna-13b-v1.3-sparsegpt-semistruct@0.5_2to4_seed0" 2 | type: CHAT 3 | conv_template: "vicuna_v1.1" 4 | 5 | model_loader: HF 6 | torch_dtype: BFLOAT16 7 | quant_file: null 8 | tokenizer_name: lmsys/vicuna-13b-v1.3 9 | trust_remote_code: true 10 | use_auth_token: true 11 | 12 | # dt-run --config-name="config" +advglue=benign ++model_config.model="$MODEL" ++model_config.conv_template="vicuna_v1.1" ++model_config.model_loader=HF ++model_config.tokenizer_name=lmsys/vicuna-13b-v1.3 ++model_config.torch_dtype=null ++advglue.task=\[sst2\] 13 | -------------------------------------------------------------------------------- /configs/model_config/vicuna-13b-v1.3-sparsegpt_2to4-seed1.yaml: -------------------------------------------------------------------------------- 1 | model: "hf/compressed-llm/vicuna-13b-v1.3-sparsegpt-semistruct@0.5_2to4_seed1" 2 | type: CHAT 3 | conv_template: "vicuna_v1.1" 4 | 5 | model_loader: HF 6 | torch_dtype: BFLOAT16 7 | quant_file: null 8 | tokenizer_name: lmsys/vicuna-13b-v1.3 9 | trust_remote_code: true 10 | use_auth_token: true 11 | 12 | # dt-run --config-name="config" +advglue=benign ++model_config.model="$MODEL" ++model_config.conv_template="vicuna_v1.1" ++model_config.model_loader=HF ++model_config.tokenizer_name=lmsys/vicuna-13b-v1.3 ++model_config.torch_dtype=null ++advglue.task=\[sst2\] 13 | -------------------------------------------------------------------------------- /configs/model_config/vicuna-13b-v1.3-sparsegpt_2to4-seed2.yaml: -------------------------------------------------------------------------------- 1 | model: "hf/compressed-llm/vicuna-13b-v1.3-sparsegpt-semistruct@0.5_2to4_seed2" 2 | type: CHAT 3 | conv_template: "vicuna_v1.1" 4 | 5 | model_loader: HF 6 | torch_dtype: BFLOAT16 7 | quant_file: null 8 | tokenizer_name: lmsys/vicuna-13b-v1.3 9 | trust_remote_code: true 10 | use_auth_token: true 11 | 12 | # dt-run --config-name="config" +advglue=benign ++model_config.model="$MODEL" ++model_config.conv_template="vicuna_v1.1" ++model_config.model_loader=HF ++model_config.tokenizer_name=lmsys/vicuna-13b-v1.3 ++model_config.torch_dtype=null ++advglue.task=\[sst2\] 13 | -------------------------------------------------------------------------------- /configs/model_config/llama-2-13b-chat-sparsegpt_2to4-seed0.yaml: -------------------------------------------------------------------------------- 1 | model: "hf/compressed-llm/llama-2-13b-chat-sparsegpt-semistruct@0.5_2to4_seed0" 2 | type: CHAT 3 | conv_template: "llama-2" 4 | 5 | model_loader: HF 6 | torch_dtype: BFLOAT16 7 | quant_file: null 8 | tokenizer_name: meta-llama/Llama-2-13b-chat-hf 9 | trust_remote_code: true 10 | use_auth_token: true 11 | 12 | # dt-run --config-name="config" +advglue=benign ++model_config.model="$MODEL" ++model_config.conv_template="vicuna_v1.1" ++model_config.model_loader=HF ++model_config.tokenizer_name=lmsys/vicuna-13b-v1.3 ++model_config.torch_dtype=null ++advglue.task=\[sst2\] -------------------------------------------------------------------------------- /configs/model_config/llama-2-13b-chat-sparsegpt_2to4-seed1.yaml: -------------------------------------------------------------------------------- 1 | model: "hf/compressed-llm/llama-2-13b-chat-sparsegpt-semistruct@0.5_2to4_seed1" 2 | type: CHAT 3 | conv_template: "llama-2" 4 | 5 | model_loader: HF 6 | torch_dtype: BFLOAT16 7 | quant_file: null 8 | tokenizer_name: meta-llama/Llama-2-13b-chat-hf 9 | trust_remote_code: true 10 | use_auth_token: true 11 | 12 | # dt-run --config-name="config" +advglue=benign ++model_config.model="$MODEL" ++model_config.conv_template="vicuna_v1.1" ++model_config.model_loader=HF ++model_config.tokenizer_name=lmsys/vicuna-13b-v1.3 ++model_config.torch_dtype=null ++advglue.task=\[sst2\] -------------------------------------------------------------------------------- /configs/model_config/llama-2-13b-chat-sparsegpt_2to4-seed2.yaml: -------------------------------------------------------------------------------- 1 | model: "hf/compressed-llm/llama-2-13b-chat-sparsegpt-semistruct@0.5_2to4_seed2" 2 | type: CHAT 3 | conv_template: "llama-2" 4 | 5 | model_loader: HF 6 | torch_dtype: BFLOAT16 7 | quant_file: null 8 | tokenizer_name: meta-llama/Llama-2-13b-chat-hf 9 | trust_remote_code: true 10 | use_auth_token: true 11 | 12 | # dt-run --config-name="config" +advglue=benign ++model_config.model="$MODEL" ++model_config.conv_template="vicuna_v1.1" ++model_config.model_loader=HF ++model_config.tokenizer_name=lmsys/vicuna-13b-v1.3 ++model_config.torch_dtype=null ++advglue.task=\[sst2\] -------------------------------------------------------------------------------- /configs/slurm_config.yaml: -------------------------------------------------------------------------------- 1 | key: null 2 | dry_run: False 3 | 4 | defaults: 5 | - _self_ 6 | - override hydra/launcher: submitit_slurm 7 | 8 | hydra: 9 | mode: MULTIRUN 10 | job: 11 | chdir: false 12 | launcher: 13 | # @package hydra.launcher 14 | _target_: hydra_plugins.hydra_submitit_launcher.submitit_launcher.SlurmLauncher 15 | submitit_folder: ./logs/${hydra.sweep.dir}/.submitit/%j 16 | timeout_min: 720 17 | cpus_per_task: 8 18 | # gpus_per_task: 1 19 | tasks_per_node: 1 20 | mem_gb: 80 21 | nodes: 1 22 | gres: gpu:h100:1 23 | partition: gpu 24 | # cluster: aces 25 | # name: dt-${model_config.model}-${hydra.job.name} 26 | name: dt-${hydra.job.name} 27 | comment: null 28 | signal_delay_s: 120 29 | max_num_timeout: 0 30 | additional_parameters: { cluster: aces } 31 | array_parallelism: 10 # 256 32 | setup: null 33 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2024 decoding-comp-trust 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | share/python-wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | MANIFEST 28 | 29 | # PyInstaller 30 | # Usually these files are written by a python script from a template 31 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 32 | *.manifest 33 | *.spec 34 | 35 | # Installer logs 36 | pip-log.txt 37 | pip-delete-this-directory.txt 38 | 39 | # Unit test / coverage reports 40 | htmlcov/ 41 | .tox/ 42 | .nox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *.cover 49 | *.py,cover 50 | .hypothesis/ 51 | .pytest_cache/ 52 | cover/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | .pybuilder/ 76 | target/ 77 | 78 | # Jupyter Notebook 79 | .ipynb_checkpoints 80 | 81 | # IPython 82 | profile_default/ 83 | ipython_config.py 84 | 85 | # pyenv 86 | # For a library or package, you might want to ignore these files since the code is 87 | # intended to run in multiple environments; otherwise, check them in: 88 | # .python-version 89 | 90 | # pipenv 91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 94 | # install all needed dependencies. 95 | #Pipfile.lock 96 | 97 | # poetry 98 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 99 | # This is especially recommended for binary packages to ensure reproducibility, and is more 100 | # commonly ignored for libraries. 101 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 102 | #poetry.lock 103 | 104 | # pdm 105 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 106 | #pdm.lock 107 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 108 | # in version control. 109 | # https://pdm.fming.dev/#use-with-ide 110 | .pdm.toml 111 | 112 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 113 | __pypackages__/ 114 | 115 | # Celery stuff 116 | celerybeat-schedule 117 | celerybeat.pid 118 | 119 | # SageMath parsed files 120 | *.sage.py 121 | 122 | # Environments 123 | .env 124 | .venv 125 | env/ 126 | venv/ 127 | ENV/ 128 | env.bak/ 129 | venv.bak/ 130 | 131 | # Spyder project settings 132 | .spyderproject 133 | .spyproject 134 | 135 | # Rope project settings 136 | .ropeproject 137 | 138 | # mkdocs documentation 139 | /site 140 | 141 | # mypy 142 | .mypy_cache/ 143 | .dmypy.json 144 | dmypy.json 145 | 146 | # Pyre type checker 147 | .pyre/ 148 | 149 | # pytype static type analyzer 150 | .pytype/ 151 | 152 | # Cython debug symbols 153 | cython_debug/ 154 | 155 | # PyCharm 156 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 157 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 158 | # and can be added to the global gitignore or merged into this file. For a more nuclear 159 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 160 | #.idea/ 161 | -------------------------------------------------------------------------------- /gather_result_files.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | import pandas as pd 4 | from glob import glob 5 | import shutil 6 | import argparse 7 | 8 | 9 | def copyfile(src, dest): 10 | os.makedirs(os.path.dirname(dest), exist_ok=True) 11 | shutil.copy(src, dest) 12 | 13 | def merge_nested_dicts(dict1, dict2): 14 | """ 15 | Merge two nested dictionaries into one. 16 | 17 | Args: 18 | dict1 (dict): First dictionary. 19 | dict2 (dict): Second dictionary. Use this value if conflict 20 | 21 | Returns: 22 | dict: Merged dictionary. 23 | """ 24 | merged_dict = {**dict1} # Start with dict1's keys and values 25 | 26 | for key, value in dict2.items(): 27 | if key in merged_dict: 28 | # If the key is present in both dictionaries and both values are dictionaries, merge them 29 | if isinstance(merged_dict[key], dict) and isinstance(value, dict): 30 | merged_dict[key] = merge_nested_dicts(merged_dict[key], value) 31 | else: 32 | # If the key is present but the values aren't both dictionaries, use the value from dict2 33 | merged_dict[key] = value 34 | else: 35 | # If the key is not present in dict1, add it to the merged dictionary 36 | merged_dict[key] = value 37 | 38 | return merged_dict 39 | 40 | def merge_dataframe(df1, df2, index): 41 | # df1 = df1.set_index() 42 | 43 | # Append DataFrames 44 | appended_df = pd.concat([df1, df2], ignore_index=True) 45 | 46 | # Drop duplicates, keep last (from df2) 47 | appended_df = appended_df.drop_duplicates(subset=index, keep='last') 48 | appended_df = appended_df.sort_values(by=index) 49 | 50 | # Set index back (if it was reset) 51 | # appended_df = appended_df.set_index(index) 52 | return appended_df.reset_index(drop=True) 53 | 54 | def get_adv_demo_scores(): 55 | fs = glob(os.path.join(RESULT_DIR, "adv_demonstration", "**", "*_score.json"), recursive=True) 56 | trg_fs = [f.replace(RESULT_DIR, GIT_RESULT_DIR) for f in fs] 57 | for f, tf in zip(fs, trg_fs): 58 | copyfile(f, tf) 59 | 60 | def copy_or_merge_json_dict(relative_path): 61 | f = os.path.join(RESULT_DIR, relative_path) 62 | tf = os.path.join(GIT_RESULT_DIR, relative_path) 63 | if os.path.exists(tf): 64 | # print(f"\nERROR!!! File exist, need manual merge content: {tf}\n") 65 | # return 66 | print(f"\nFile exist, will merge content: {f} and {tf}\n") 67 | src_scores = pd.read_json(f).to_dict() 68 | with open(tf) as open_f: 69 | dst_scores = json.load(open_f) 70 | scores = merge_nested_dicts(src_scores, dst_scores) 71 | with open(tf, 'w') as open_f: 72 | json.dump(scores, open_f, indent=4) 73 | else: 74 | copyfile(f, tf) 75 | 76 | def get_advglue_scores(): 77 | # print(os.path.join(RESULT_DIR, "adv-glue-plus-plus", "summary.json")) 78 | copy_or_merge_json_dict( 79 | os.path.join("adv-glue-plus-plus", "summary.json") 80 | ) 81 | copy_or_merge_json_dict( 82 | os.path.join("adv-glue-plus-plus", "breakdown.json") 83 | ) 84 | 85 | 86 | def get_fairness_scores(): 87 | fs = glob(os.path.join(RESULT_DIR, "fairness", "**", "final_scores.json"), recursive=True) 88 | model_names = [ 89 | os.path.dirname(x).removeprefix(os.path.join(RESULT_DIR, "fairness", "results")).removeprefix("/") for x in fs 90 | ] 91 | for f, model_name in zip(fs, model_names): 92 | tf = f.replace(RESULT_DIR, GIT_RESULT_DIR) 93 | copyfile(f, tf) 94 | 95 | 96 | def get_ethics_scores(): 97 | f = os.path.join(RESULT_DIR, "machine_ethics", "generations", "scores.jsonl") 98 | tf = os.path.join(GIT_RESULT_DIR, "machine_ethics", "generations", "scores.jsonl") 99 | if os.path.exists(tf): 100 | # print(f"\nERROR!!! File exist, need manual merge content: {tf}\n") 101 | # return 102 | print(f"ETHICS: File exist, merge content: {tf}\n") 103 | df = pd.read_json(f, lines=True) 104 | t_df = pd.read_json(tf, lines=True) 105 | t_df = merge_dataframe(t_df, df, ['model', 'dataset']) 106 | # print(t_df) 107 | t_df.to_json(tf, orient='records', lines=True) 108 | # copyfile(f, tf) 109 | else: 110 | copyfile(f, tf) 111 | 112 | 113 | def get_ood_scores(): 114 | path_prefix = os.path.join(RESULT_DIR, "ood", "results/") 115 | fs = glob(os.path.join(path_prefix, "**", "final_scores.json"), recursive=True) 116 | model_names = [os.path.dirname(f).removeprefix(path_prefix) for f in fs] 117 | for f, model_name in zip(fs, model_names): 118 | tf = f.replace(RESULT_DIR, GIT_RESULT_DIR) 119 | copyfile(f, tf) 120 | 121 | 122 | def get_privacy_scores(): 123 | f = os.path.join(RESULT_DIR, "privacy", "generations", "scores.jsonl") 124 | tf = os.path.join(GIT_RESULT_DIR, "privacy", "generations", "scores.jsonl") 125 | if os.path.exists(tf): 126 | print(f"\nPRIVACY: File exist, merge content: {tf}\n") 127 | df = pd.read_json(f, lines=True) 128 | t_df = pd.read_json(tf, lines=True) 129 | t_df = merge_dataframe(t_df, df, ['model', 'dataset']) 130 | # print(t_df) 131 | t_df.to_json(tf, orient='records', lines=True) 132 | else: 133 | copyfile(f, tf) 134 | 135 | 136 | def get_stereotype_scores(): 137 | path_prefix = os.path.join(RESULT_DIR, "stereotype", "generations/") 138 | fs = glob(os.path.join(path_prefix, "**", "25_compiled.json"), recursive=True) 139 | model_names = [os.path.dirname(f).removeprefix(path_prefix) for f in fs] 140 | for f, model_name in zip(fs, model_names): 141 | tf = f.replace(RESULT_DIR, GIT_RESULT_DIR) 142 | copyfile(f, tf) 143 | 144 | 145 | def get_toxicity_scores(): 146 | path_prefix = os.path.join(RESULT_DIR, "toxicity", "user_prompts", "generations/") 147 | fs = glob(os.path.join(path_prefix, "**", "report.jsonl"), recursive=True) 148 | model_names = [os.path.dirname(f).removeprefix(path_prefix) for f in fs] 149 | for f, model_name in zip(fs, model_names): 150 | tf = f.replace(RESULT_DIR, GIT_RESULT_DIR) 151 | copyfile(f, tf) 152 | 153 | 154 | def summarize_results(keys=None): 155 | func_dict = { 156 | "adv_demo": get_adv_demo_scores, 157 | "adv-glue": get_advglue_scores, 158 | "fairness": get_fairness_scores, 159 | "ethics": get_ethics_scores, 160 | "ood": get_ood_scores, 161 | "privacy": get_privacy_scores, 162 | "stereotype": get_stereotype_scores, 163 | "toxicity": get_toxicity_scores 164 | } 165 | if keys is None: 166 | keys = list(func_dict) 167 | for k in keys: 168 | print(f">> {k}") 169 | func_dict[k]() 170 | 171 | 172 | if __name__ == "__main__": 173 | parser = argparse.ArgumentParser() 174 | parser.add_argument('--result_dir', default="../DecodingTrust/results") 175 | parser.add_argument('-p', '--perspective', default=None, type=str) 176 | parser.add_argument('--git_result_dir', default="./results") 177 | args = parser.parse_args() 178 | 179 | assert os.path.exists(args.result_dir), f"Not found path to source results: {args.result_dir}" 180 | 181 | RESULT_DIR = args.result_dir # type: str 182 | while RESULT_DIR.endswith('/'): 183 | RESULT_DIR = RESULT_DIR[:-1] 184 | # GIT_RESULT_DIR = "./results" 185 | GIT_RESULT_DIR = args.git_result_dir 186 | 187 | summarize_results(keys=[args.perspective] if args.perspective is not None else None) 188 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Decoding Compressed Trust 2 | 3 | Codebase for the [Decoding Compressed Trust](https://decoding-comp-trust.github.io/). 4 | 5 | ## Model Preparation 6 | 7 | We provide compressed models at [huggingface](https://huggingface.co/compressed-llm). Details for compressing models are provided here. 8 | 9 | ### Pruning 10 | 11 | Our code is based on `git@github.com:locuslab/wanda.git`. 12 | ```bash 13 | cd compression 14 | git clone git@github.com:locuslab/wanda.git 15 | ``` 16 | 17 | Pruning Magnitude/SparseGPT/Wanda with semi-structured sparsity: 18 | ```bash 19 | cd wanda 20 | CUDA_VISIBLE_DEVICES=0 python main.py --model meta-llama/Llama-2-13b-chat-hf --prune_method magnitude --sparsity_type 2:4 --sparsity_ratio 0.5 --save=output/llama-2-13b-chat_mag_2to4 21 | CUDA_VISIBLE_DEVICES=0 python main.py --model meta-llama/Llama-2-13b-chat-hf --prune_method sparsegpt --sparsity_type 2:4 --sparsity_ratio 0.5 --save=output/llama-2-13b-chat_sparsegpt_2to4 22 | CUDA_VISIBLE_DEVICES=2 python main.py --model meta-llama/Llama-2-13b-chat-hf --prune_method wanda --sparsity_type 2:4 --sparsity_ratio 0.5 --save=output/llama-2-13b-chat_wanda_2to4 23 | ``` 24 | Change `meta-llama/Llama-2-13b-chat-hf` to other models upon demands. 25 | 26 | ### Quantization 27 | 28 | GPTQ: 29 | ```bash 30 | pip install auto-gptq 31 | cd compression/gptq 32 | 33 | CUDA_VISIBLE_DEVICES=0 python gptq.py --pretrained_model_dir meta-llama/Llama-2-13b-chat-hf --quantized_model_dir ./output --bits 4 --save_and_reload --desc_act --seed 0 --num_samples 128 --calibration-template llama-2 34 | ``` 35 | AWQ: 36 | ```bash 37 | cd compression 38 | git clone https://github.com/mit-han-lab/llm-awq 39 | cd llm-awq 40 | 41 | mkdir -p /storage/jinhaoduan/workspace/llm-awq-main/experiments/llama-2-13b-chat-bit4-seed0 42 | CUDA_VISIBLE_DEVICES=1 python -m awq.entry --model_path meta-llama/Llama-2-13b-chat-hf --seed 0 --w_bit 4 --q_group_size 128 --run_awq --dump_awq awq_cache/llama-2-13b-chat-bit4-seed0.pt 43 | CUDA_VISIBLE_DEVICES=1 python -m awq.entry --model_path meta-llama/Llama-2-13b-chat-hf --tasks wikitext --w_bit 4 --q_group_size 128 --load_awq awq_cache/llama-2-13b-chat-bit4-seed0.pt --q_backend fake --dump_awq_weights_to_hf ./llm-awq-main/llama-2-13b-chat-bit4-seed0 44 | ``` 45 | 46 | ## Running Experiments 47 | 48 | Install the modified DecodingTrust following [this link](https://github.com/decoding-comp-trust/DecodingTrust?tab=readme-ov-file#getting-started). 49 | 50 | Due to the large volume of experiments, we recommend to run experiments using the Slurm job system. 51 | We provide [a example of slurm config file](configs/slurm_config.yaml). 52 | For each model, we provide a config file under [configs/model_config](configs/model_config). 53 | 54 | > Note these files are tuned for VITA ACES servers and may not work on other servers. 55 | 56 | Important files 57 | * `scripts/multi-run.sh`: Use this to run all metrics on a given model_config 58 | * `dt/configs/model_configs/vicuna_xxx.yaml`: This file is to config model 59 | * `dt/configs/slurm_config.yaml`: To setup slurm, do not change this. 60 | 61 | Setup 62 | ```shell 63 | # find the gpu type 64 | scontrol show node | grep Gres 65 | # Add slurm 66 | cd DecodingTrust 67 | pip install -e ".[slurm]" 68 | ``` 69 | 70 | Modify `dt/configs/model_configs/vicuna-13b-v1.3-mag_2to4.yaml` for your model. 71 | Add `vicuna-13b-v1.3-mag_2to4` to multi-run.sh 72 | ```shell 73 | bash scripts/multi-run.sh 74 | ``` 75 | 76 | ## Aggregating Results 77 | 78 | 79 | Upload results to github 80 | ```shell 81 | git pull 82 | python gather_result_files.py --result_dir= -p= 83 | # Example 84 | # python gather_result_files.py -p=adv-glue 85 | git add results/ 86 | git commit -m "Update results" 87 | git push 88 | ``` 89 | 90 | Example: 91 | ```shell 92 | git pull 93 | python gather_result_files.py -p=adv-glue 94 | git add results/ 95 | git commit -m "Update results" 96 | git push 97 | ``` 98 | 99 | Extract results to csv file (data/num_sheet.csv) which will be used for visualization. 100 | Run `python extract_csv.py`. 101 | 102 | 103 | * Adversarial Demonstrations 104 | 105 | ```bash 106 | python src/dt/perspectives/adv_demonstration/aggregate_score.py 107 | ``` 108 | 109 | Find aggregated results with the following patterns. 110 | 111 | ```bash 112 | #ls results/adv_demonstration/*_score.json 113 | grep -H "adv_demonstration\"" results/adv_demonstration/*_score.json 114 | ``` 115 | 116 | * Adversarial Robustness 117 | 118 | ```bash 119 | python src/dt/perspectives/advglue/adv_stats.py 120 | ``` 121 | 122 | You can find the scores with the following patterns. 123 | 124 | ```bash 125 | cat ./results/adv-glue-plus-plus/summary.json | jq 126 | ``` 127 | 128 | * Fairness 129 | 130 | Use patch score calculation: 131 | ```shell 132 | cp dt-patch/src/dt/perspectives/fairness/score_calculation_script.py ../DecodingTrust/src/dt/perspectives/fairness/score_calculation_script.py 133 | ``` 134 | 135 | ```bash 136 | python src/dt/perspectives/fairness/score_calculation_script.py 137 | ``` 138 | 139 | ```bash 140 | #ls results/fairness/results/*/*/*/final_scores.json 141 | grep -H "fairness score\"" results/fairness/results/hf/compressed-llm/*/final_scores.json 142 | ``` 143 | 144 | * Machine Ethics 145 | 146 | Use patch score calculation: 147 | ```shell 148 | cp dt-patch/src/dt/perspectives/machine_ethics/calculate_score.py ../DecodingTrust/src/dt/perspectives/machine_ethics/calculate_score.py 149 | ``` 150 | 151 | ```bash 152 | python src/dt/perspectives/machine_ethics/calculate_score.py 153 | ``` 154 | 155 | ```bash 156 | # cat data/machine_ethics/generations/scores.jsonl | jq 157 | cat results/machine_ethics/generations/scores.jsonl | jq 158 | ``` 159 | 160 | * OOD 161 | 162 | Use patch score calculation: 163 | ```shell 164 | cp dt-patch/src/dt/perspectives/ood/evaluation_ood.py ../DecodingTrust/src/dt/perspectives/ood/evaluation_ood.py 165 | ``` 166 | 167 | Find aggregated OOD results with the following patterns. 168 | 169 | ```bash 170 | #ls results/ood/results/*/*/*/final_scores.json 171 | #cat results/ood/results/hf/vita-group/llama-2-13b-chat_magnitude_semistruct@0.5_2to4/final_scores.json | grep "\"score" 172 | grep -H "\"score" results/ood/results/*/*/*/final_scores.json 173 | ``` 174 | 175 | Note: To rerun score aggregation, use the following command. 176 | 177 | ```bash 178 | dt-run --config-name config --multirun +model_config= +ood=knowledge_2020_5shot,knowledge_idk,knowledge_standard,style_8shot,style ++ood.resume=True,True 179 | ``` 180 | 181 | 182 | * Privacy 183 | 184 | To rerun score aggregation, use the following command. 185 | 186 | ```bash 187 | python src/dt/perspectives/privacy/result_agg.py 188 | ``` 189 | 190 | Find aggregated privacy results with the following patterns. 191 | 192 | ```bash 193 | cat results/privacy/generations/scores.jsonl | grep "all" | jq 194 | ``` 195 | 196 | * Stereotype 197 | 198 | Use patch score calculation: 199 | ```shell 200 | cp dt-patch/src/dt/perspectives/stereotype/agreement_func.py ../DecodingTrust/src/dt/perspectives/stereotype/agreement_func.py 201 | ``` 202 | Main difference: 203 | 1. Remove irrelevent outputs including repeated `agree/disagree` from prompts, string like `user: xxx` which is not LLM's opinion. 204 | 2. Match the first opinion if there are multiple ones. 205 | 206 | To rerun stereotype aggregation, use the following command. 207 | 208 | ```bash 209 | dt-run --config-name config --multirun +model_config=$(ls src/dt/configs/model_config/*.yaml | xargs -n 1 basename | sed 's/.yaml//' | tr '\n' ',' | sed 's/,$//') +stereotype=benign,targeted,untargeted ++stereotype.skip_generation=True,True 210 | ``` 211 | 212 | Find aggregated stereotype results with the following patterns. 213 | 214 | ```bash 215 | ls results/stereotype/generations/*/*/*/*compiled.json 216 | 217 | * Toxicity 218 | 219 | To calculate the toxicity scores, use the following command. 220 | 221 | ```bash 222 | python src/dt/perspectives/toxicity/perspective_api_evaluate.py --api AIzaSyDK58omxWcBQa-o6_V53uh3gk1ShJU-n08 --strip-outputs 223 | ``` 224 | 225 | If you are rerunning the evaluation, remember to add `--strip-outputs`. 226 | 227 | Find aggregated toxicity results with the following patterns. 228 | 229 | ```bash 230 | cat results/toxicity/user_prompts/generations/*/*/*/report.jsonl | jq 231 | ``` 232 | 233 | + Score Summary 234 | 235 | ```bash 236 | dt-run +model_config=hf 237 | ``` 238 | 239 | or 240 | 241 | ```bash 242 | python src/dt/summarize.py 243 | ``` 244 | 245 | Then check the final `Json` file 246 | 247 | ```bash 248 | cat results/sumamry.json | jq 249 | ``` 250 | 251 | -------------------------------------------------------------------------------- /scripts/mmlu_autogptq.py: -------------------------------------------------------------------------------- 1 | """Different from the original MMLU test code. We let LLM generate 16 tokens and 2 | match the answer (A,B,C or D) in the generation. We also faciliate the parallel 3 | evaluation through `--subject=`.""" 4 | import argparse 5 | import os 6 | import numpy as np 7 | import pandas as pd 8 | import numpy as np 9 | import torch 10 | from tqdm import tqdm 11 | from datasets import load_dataset 12 | from transformers import AutoTokenizer, LlamaForCausalLM, AutoModelForCausalLM 13 | from auto_gptq import AutoGPTQForCausalLM,BaseQuantizeConfig 14 | import wandb 15 | 16 | from crop import crop 17 | 18 | choices = ["A", "B", "C", "D"] 19 | 20 | 21 | def softmax(x): 22 | z = x - max(x) 23 | numerator = np.exp(z) 24 | denominator = np.sum(numerator) 25 | softmax = numerator/denominator 26 | return softmax 27 | 28 | def format_subject(subject): 29 | l = subject.split("_") 30 | s = "" 31 | for entry in l: 32 | s += " " + entry 33 | return s 34 | 35 | def format_example(df, idx, include_answer=True): 36 | prompt = df.iloc[idx, 0] 37 | k = df.shape[1] - 2 38 | for j in range(k): 39 | prompt += "\n{}. {}".format(choices[j], df.iloc[idx, j+1]) 40 | prompt += "\nAnswer:" 41 | if include_answer: 42 | prompt += " {}\n\n".format(df.iloc[idx, k + 1]) 43 | return prompt 44 | 45 | def gen_prompt(train_df, subject, k=-1): 46 | prompt = "The following are multiple choice questions (with answers) about {}.\n\n".format(format_subject(subject)) 47 | if k == -1: 48 | k = train_df.shape[0] 49 | for i in range(k): 50 | prompt += format_example(train_df, i) 51 | return prompt 52 | 53 | def predict_one_sample(prompt, answers): 54 | input_ids = tokenizer([prompt]).input_ids 55 | output_ids = model.generate( 56 | input_ids=torch.as_tensor(input_ids).cuda(), 57 | do_sample=True, 58 | # top_k=1, 59 | top_p=1, 60 | temperature=1e-7, 61 | max_new_tokens=16, 62 | ) 63 | output_ids = output_ids[0][len(input_ids[0]):] 64 | outputs = tokenizer.decode(output_ids, skip_special_tokens=True).replace("", "").lower() 65 | 66 | pred=outputs[0].upper() 67 | 68 | if pred in ['A','B','C','D']: 69 | return pred 70 | else: 71 | return None 72 | 73 | def eval(args, subject, dev_df, test_df): 74 | lazy_load_model() 75 | cors = [] 76 | refs = [] 77 | all_probs = [] 78 | answers = choices[:test_df.shape[1]-2] 79 | 80 | for i in tqdm(range(test_df.shape[0]), desc=subject): 81 | # get prompt and make sure it fits 82 | k = args.ntrain 83 | prompt_end = format_example(test_df, i, include_answer=False) 84 | train_prompt = gen_prompt(dev_df, subject, k) 85 | prompt = train_prompt + prompt_end 86 | 87 | while crop(prompt) != prompt: 88 | k -= 1 89 | train_prompt = gen_prompt(dev_df, subject, k) 90 | prompt = train_prompt + prompt_end 91 | 92 | label = test_df.iloc[i, test_df.shape[1]-1] 93 | 94 | pred = predict_one_sample(prompt, answers) 95 | 96 | 97 | cor = pred == label 98 | ref = pred == None 99 | cors.append(cor) 100 | refs.append(ref) 101 | 102 | acc = np.mean(cors) 103 | refusal = np.mean(refs) 104 | cors = np.array(cors) 105 | refs = np.array(refs) 106 | 107 | print("Average accuracy {:.3f} - {}, refusal:{:.3f}".format(acc, subject,refusal)) 108 | 109 | return cors, refs, acc, refusal, all_probs 110 | 111 | def main(args): 112 | if args.subject is None: 113 | subjects = sorted([f.split("_test.csv")[0] for f in os.listdir(os.path.join(args.data_dir, "test")) if "_test.csv" in f]) 114 | else: 115 | subjects = [args.subject] 116 | 117 | print("test subjects:", subjects) 118 | # print(args) 119 | 120 | all_cors = [] 121 | all_refs = [] 122 | 123 | for subject in tqdm(subjects, desc='subj'): 124 | result_path = os.path.join(args.save_path, f"results_{subject}.csv") 125 | if args.resume and os.path.exists(result_path): 126 | print(f"resume: {result_path}") 127 | test_df = pd.read_csv(result_path) 128 | cors = test_df["{}_correct".format(args.model_name)].tolist() 129 | refs = test_df["{}_refusal".format(args.model_name)].tolist() 130 | else: 131 | dev_df = pd.read_csv(os.path.join(args.data_dir, "dev", subject + "_dev.csv"), header=None)[:args.ntrain] 132 | test_df = pd.read_csv(os.path.join(args.data_dir, "test", subject + "_test.csv"), header=None) 133 | 134 | cors, refs, acc, refusal, _ = eval(args, subject, dev_df, test_df) 135 | 136 | test_df["{}_correct".format(args.model_name)] = cors 137 | test_df["{}_refusal".format(args.model_name)] = refs 138 | test_df.to_csv(result_path, index=None) 139 | all_cors.append(cors) 140 | all_refs.append(refs) 141 | 142 | wandb.log({ 143 | f'{subject} accuracy': np.mean(cors), 144 | f'{subject} refusal': np.mean(refs), 145 | }) 146 | 147 | weighted_acc = np.mean(np.concatenate(all_cors)) 148 | weighted_ref = np.mean(np.concatenate(all_refs)) 149 | 150 | print("Average accuracy: {:.3f}".format(weighted_acc)) 151 | print("Average refusal: {:.3f}".format(weighted_ref)) 152 | wandb.log({ 153 | 'accuracy': weighted_acc, 154 | 'refusal': weighted_ref, 155 | 'n': len(all_cors) 156 | }) 157 | 158 | def lazy_load_model(): 159 | global model 160 | if model is None: 161 | print("Lazy load model...") 162 | kwargs = {} 163 | if '70b' in args.model_name: 164 | kwargs = {**kwargs, 165 | "disable_exllama": True, 166 | "disable_exllamav2": True,} 167 | model = AutoGPTQForCausalLM.from_quantized( 168 | args.model_name, 169 | inject_fused_mlp=True, 170 | inject_fused_attention=False, 171 | quantize_config=BaseQuantizeConfig(bits=args.bits, group_size=128, desc_act=True), 172 | revision=args.revision, 173 | use_safetensors=False, 174 | device_map='auto', 175 | **kwargs, 176 | ) 177 | model.config.pad_token_id = model.config.eos_token_id 178 | 179 | 180 | if __name__ == "__main__": 181 | parser = argparse.ArgumentParser() 182 | parser.add_argument('--model-name') 183 | parser.add_argument('--tokenizer-name') 184 | parser.add_argument('--bits', type=int, default=None) 185 | parser.add_argument('--num_sample', type=int, default=None) 186 | parser.add_argument('--seed', type=int, default=None) 187 | parser.add_argument('--template', default='default') 188 | parser.add_argument('--revision', type=str, default=None) 189 | parser.add_argument('--test_seed', type=int, default=None, help='seed for test (not model).') 190 | parser.add_argument('--temperature', default=1e-7, type=float) 191 | # mmlu 192 | parser.add_argument("--subject", type=str, default=None, 193 | choices=['abstract_algebra', 'anatomy', 'astronomy', 'business_ethics', 'clinical_knowledge', 'college_biology', 'college_chemistry', 'college_computer_science', 'college_mathematics', 'college_medicine', 'college_physics', 'computer_security', 'conceptual_physics', 'econometrics', 'electrical_engineering', 'elementary_mathematics', 'formal_logic', 'global_facts', 'high_school_biology', 'high_school_chemistry', 'high_school_computer_science', 'high_school_european_history', 'high_school_geography', 'high_school_government_and_politics', 'high_school_macroeconomics', 'high_school_mathematics', 'high_school_microeconomics', 'high_school_physics', 'high_school_psychology', 'high_school_statistics', 'high_school_us_history', 'high_school_world_history', 'human_aging', 'human_sexuality', 'international_law', 'jurisprudence', 'logical_fallacies', 'machine_learning', 'management', 'marketing', 'medical_genetics', 'miscellaneous', 'moral_disputes', 'moral_scenarios', 'nutrition', 'philosophy', 'prehistory', 'professional_accounting', 'professional_law', 'professional_medicine', 'professional_psychology', 'public_relations', 'security_studies', 'sociology', 'us_foreign_policy', 'virology', 'world_religions']) 194 | parser.add_argument("--ntrain", "-k", type=int, default=5, help='shots of demos in ICL.') 195 | parser.add_argument("--data_dir", "-d", type=str, default="data") 196 | parser.add_argument("--resume", action='store_true') 197 | args = parser.parse_args() 198 | 199 | if args.revision is None: 200 | args.revision = f"{args.bits}bit_{args.num_sample}g_{args.seed}seed" 201 | print(f"Auto revision: {args.revision}") 202 | 203 | args.save_path = f'./mmlu-autogptq-results/{args.model_name}-{args.revision}' 204 | if args.temperature != 1e-7: 205 | args.save_path += f'_t{args.temperature:g}' 206 | if args.test_seed is not None: 207 | args.save_path += f'_ts{args.test_seed}' 208 | if args.template != 'default': 209 | args.save_path += f'_te-{args.template}' 210 | print("save_path: ", args.save_path) 211 | os.makedirs(args.save_path, exist_ok=True) 212 | 213 | wandb.init(project='comp-test', config=vars(args)) 214 | 215 | device = torch.device("cuda") 216 | 217 | tokenizer = AutoTokenizer.from_pretrained( 218 | args.tokenizer_name, 219 | use_fast=False, 220 | torch_dtype=torch.bfloat16, 221 | use_safetensors=False, 222 | device_map='auto' 223 | ) 224 | tokenizer.pad_token_id = tokenizer.eos_token_id 225 | tokenizer.pad_token = tokenizer.eos_token 226 | 227 | model = None 228 | 229 | main(args) 230 | -------------------------------------------------------------------------------- /compression/gptq/gptq.py: -------------------------------------------------------------------------------- 1 | import json 2 | import random 3 | import sys 4 | import time 5 | from argparse import ArgumentParser 6 | 7 | import torch 8 | from auto_gptq import AutoGPTQForCausalLM, BaseQuantizeConfig 9 | from datasets import Dataset 10 | from transformers import AutoTokenizer, TextGenerationPipeline, GenerationConfig 11 | from conversation import get_conv_template 12 | 13 | 14 | llama_2_template = """[INST] <> 15 | You are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe. Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature. 16 | 17 | If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information. 18 | <> 19 | 20 | {input} [/INST] 21 | """ 22 | 23 | 24 | def load_data(data_path, tokenizer, n_samples, template='default'): 25 | with open(data_path, "r", encoding="utf-8") as f: 26 | raw_data = json.load(f) 27 | 28 | raw_data = random.sample(raw_data, k=min(n_samples, len(raw_data))) 29 | 30 | def dummy_gen(): 31 | return raw_data 32 | 33 | def tokenize(examples): 34 | instructions = examples["instruction"] 35 | inputs = examples["input"] 36 | outputs = examples["output"] 37 | 38 | prompts = [] 39 | texts = [] 40 | input_ids = [] 41 | attention_mask = [] 42 | for istr, inp, opt in zip(instructions, inputs, outputs): 43 | if inp: 44 | if template == 'default': 45 | prompt = f"Instruction:\n{istr}\nInput:\n{inp}\nOutput:\n" 46 | text = prompt + opt 47 | else: 48 | conv = get_conv_template(template) 49 | conv.append_message(conv.roles[0], f'{istr} {inp}') 50 | conv.append_message(conv.roles[1], None) 51 | prompt = conv.get_prompt() 52 | conv = get_conv_template(template) 53 | conv.append_message(conv.roles[0], f'{istr} {inp}') 54 | conv.append_message(conv.roles[1], opt) 55 | text = conv.get_prompt() 56 | else: 57 | if template == 'default': 58 | prompt = f"Instruction:\n{istr}\nOutput:\n" 59 | text = prompt + opt 60 | else: 61 | conv = get_conv_template(template) 62 | conv.append_message(conv.roles[0], istr) 63 | conv.append_message(conv.roles[1], None) 64 | prompt = conv.get_prompt() 65 | conv = get_conv_template(template) 66 | conv.append_message(conv.roles[0], istr) 67 | conv.append_message(conv.roles[1], opt) 68 | text = conv.get_prompt() 69 | print('*' * 20) 70 | print(prompt) 71 | print('-' * 20) 72 | print(text) 73 | print('*' * 20) 74 | 75 | if len(tokenizer(prompt)["input_ids"]) >= tokenizer.model_max_length: 76 | continue 77 | 78 | tokenized_data = tokenizer(text) 79 | 80 | input_ids.append(tokenized_data["input_ids"][: tokenizer.model_max_length]) 81 | attention_mask.append(tokenized_data["attention_mask"][: tokenizer.model_max_length]) 82 | prompts.append(prompt) 83 | texts.append(text) 84 | 85 | return { 86 | "input_ids": input_ids, 87 | "attention_mask": attention_mask, 88 | "prompt": prompts 89 | } 90 | 91 | dataset = Dataset.from_generator(dummy_gen) 92 | 93 | dataset = dataset.map( 94 | tokenize, 95 | batched=True, 96 | batch_size=len(dataset), 97 | num_proc=1, 98 | keep_in_memory=True, 99 | load_from_cache_file=False, 100 | remove_columns=["instruction", "input"] 101 | ) 102 | 103 | dataset = dataset.to_list() 104 | 105 | for sample in dataset: 106 | sample["input_ids"] = torch.LongTensor(sample["input_ids"]) 107 | sample["attention_mask"] = torch.LongTensor(sample["attention_mask"]) 108 | 109 | return dataset 110 | 111 | 112 | def main(): 113 | parser = ArgumentParser() 114 | parser.add_argument("--pretrained_model_dir", type=str) 115 | parser.add_argument("--quantized_model_dir", type=str, default=None) 116 | parser.add_argument("--bits", type=int, default=4, choices=[2, 3, 4, 6, 8]) 117 | parser.add_argument("--group_size", type=int, default=128, help="group size, -1 means no grouping or full rank") 118 | parser.add_argument("--desc_act", action="store_true", help="whether to quantize with desc_act") 119 | parser.add_argument("--num_samples", type=int, default=128, help="how many samples will be used to quantize model") 120 | parser.add_argument("--save_and_reload", action="store_true", help="whether save quantized model to disk and reload back") 121 | parser.add_argument("--fast_tokenizer", action="store_true", help="whether use fast tokenizer") 122 | parser.add_argument("--use_triton", action="store_true", help="whether use triton to speedup at inference") 123 | parser.add_argument("--per_gpu_max_memory", type=int, default=None, help="max memory used to load model per gpu") 124 | parser.add_argument("--cpu_max_memory", type=int, default=None, help="max memory used to offload model to cpu") 125 | parser.add_argument("--quant_batch_size", type=int, default=1, help="examples batch size for quantization") 126 | parser.add_argument("--trust_remote_code", action="store_true", help="whether to trust remote code when loading model") 127 | parser.add_argument('--seed', type=int, default=None) 128 | parser.add_argument('--calibration-template', default='default', choices=['default', 'llama-2', 'vicuna_v1.1', 'redpajama-incite-instruct']) 129 | args = parser.parse_args() 130 | 131 | if args.seed is not None: 132 | print(f'Random Seed: {args.seed}') 133 | random.seed(args.seed) 134 | else: 135 | print('No seed is set') 136 | 137 | max_memory = dict() 138 | if args.per_gpu_max_memory is not None and args.per_gpu_max_memory > 0: 139 | if torch.cuda.is_available(): 140 | max_memory.update( 141 | {i: f"{args.per_gpu_max_memory}GIB" for i in range(torch.cuda.device_count())} 142 | ) 143 | if args.cpu_max_memory is not None and args.cpu_max_memory > 0 and max_memory: 144 | max_memory["cpu"] = f"{args.cpu_max_memory}GIB" 145 | if not max_memory: 146 | max_memory = None 147 | 148 | tokenizer = AutoTokenizer.from_pretrained( 149 | args.pretrained_model_dir, 150 | use_fast=args.fast_tokenizer, 151 | trust_remote_code=args.trust_remote_code 152 | ) 153 | model = AutoGPTQForCausalLM.from_pretrained( 154 | args.pretrained_model_dir, 155 | quantize_config=BaseQuantizeConfig(bits=args.bits, group_size=args.group_size, desc_act=args.desc_act), 156 | max_memory=max_memory, 157 | trust_remote_code=args.trust_remote_code 158 | ) 159 | 160 | examples = load_data("dataset/alpaca_data_cleaned.json", tokenizer, args.num_samples, template=args.calibration_template) 161 | examples_for_quant = [ 162 | {"input_ids": example["input_ids"], "attention_mask": example["attention_mask"]} 163 | for example in examples 164 | ] 165 | 166 | start = time.time() 167 | model.quantize( 168 | examples_for_quant, 169 | batch_size=args.quant_batch_size, 170 | use_triton=args.use_triton, 171 | autotune_warmup_after_quantized=args.use_triton 172 | ) 173 | end = time.time() 174 | print(f"quantization took: {end - start: .4f}s") 175 | 176 | if not args.quantized_model_dir: 177 | args.quantized_model_dir = args.pretrained_model_dir 178 | 179 | if args.save_and_reload: 180 | model.save_quantized(args.quantized_model_dir) 181 | gen_config = GenerationConfig.from_pretrained(args.pretrained_model_dir) 182 | gen_config.save_pretrained(args.quantized_model_dir) 183 | del model 184 | if torch.cuda.is_available(): 185 | torch.cuda.empty_cache() 186 | model = AutoGPTQForCausalLM.from_quantized( 187 | args.quantized_model_dir, 188 | device="cuda:0", 189 | use_triton=args.use_triton, 190 | max_memory=max_memory, 191 | inject_fused_mlp=True, 192 | inject_fused_attention=False, 193 | trust_remote_code=args.trust_remote_code 194 | ) 195 | 196 | pipeline_init_kwargs = {"model": model, "tokenizer": tokenizer} 197 | # if not max_memory: 198 | # pipeline_init_kwargs["device"] = "cuda:0" 199 | pipeline = TextGenerationPipeline(**pipeline_init_kwargs) 200 | for example in random.sample(examples, k=min(4, len(examples))): 201 | print(f"prompt: {example['prompt']}") 202 | print("-" * 42) 203 | print(f"golden: {example['output']}") 204 | print("-" * 42) 205 | start = time.time() 206 | generated_text = pipeline( 207 | example['prompt'], 208 | return_full_text=False, 209 | num_beams=1, 210 | max_length=len(example["input_ids"]) + 128 # use this instead of max_new_token to disable UserWarning when integrate with logging 211 | )[0]['generated_text'] 212 | end = time.time() 213 | print(f"quant: {generated_text}") 214 | num_new_tokens = len(tokenizer(generated_text)["input_ids"]) 215 | print(f"generate {num_new_tokens} tokens using {end-start: .4f}s, {num_new_tokens / (end - start)} tokens/s.") 216 | print("=" * 42) 217 | 218 | 219 | if __name__ == "__main__": 220 | import logging 221 | 222 | logging.basicConfig( 223 | format="%(asctime)s %(levelname)s [%(name)s] %(message)s", level=logging.INFO, datefmt="%Y-%m-%d %H:%M:%S" 224 | ) 225 | 226 | main() -------------------------------------------------------------------------------- /extract_csv.py: -------------------------------------------------------------------------------- 1 | """This script is used to log workers for each data point. 2 | Usage: 3 | # create result csv 4 | python extract_csv.py 5 | # create worker csv 6 | # NOTE: Use your name initial for --worker!!! For example, JH for Junyuan Hong 7 | # NOTE: Change result_dir to your local result path!!! 8 | python extract_csv.py --mode=worker --result_dir= --worker= 9 | git add ./ipynb/data/worker_sheet.csv 10 | 11 | # remove content based on blacklist file. 12 | python extract_csv.py --mode=clean_blacklist --result_dir= 13 | # python extract_csv.py --mode=clean_blacklist --result_dir=../DecodingTrust/results --worker=JH --dry_run 14 | # Double check removing: 15 | # Example: python extract_csv.py --mode=worker --result_dir=../DecodingTrust/results/ --worker=JH 16 | """ 17 | import os 18 | import json 19 | import sys 20 | import argparse 21 | import numpy as np 22 | import csv 23 | 24 | import pandas as pd 25 | from glob import glob 26 | from pandas import DataFrame 27 | import re 28 | 29 | 30 | perspective_name_mapping = { 31 | 'adv-glue-plus-plus': 'AdvGLUE++', 32 | 'adv_demonstration': 'Adv Demo', 33 | 'fairness': 'Fairness', 34 | 'machine_ethics': 'Ethics', 35 | 'ood': 'OOD', 36 | 'privacy': 'Privacy', 37 | 'toxicity': 'Toxicity', 38 | 'stereotype': 'Stereotype', 39 | } 40 | 41 | def load_blacklist(): 42 | # determine if the perspective is blacklisted. 43 | blacklist_file = f'./ipynb/data/{args.worker}_blacklist.csv' 44 | print(f"Read blacklist from {blacklist_file}") 45 | assert os.path.exists(blacklist_file), f"Not found blacklist file: {blacklist_file}" 46 | df = pd.read_csv(blacklist_file) 47 | pers_cols = [v for k, v in perspective_name_mapping.items() if v in df.columns] 48 | df = df[['model_name'] + pers_cols] 49 | 50 | # blk_df = df[pers_cols].apply(lambda x: 'X' in x) 51 | for c in pers_cols: 52 | df[c] = df[c].apply(lambda x: 'X' in x if isinstance(x, str) else False) 53 | # blk_df['model_name'] = df['model_name'] 54 | df = df.set_index('model_name') 55 | return df 56 | 57 | 58 | def check_blacklist(pers_name, model_name, rm_file=None, rm_df_file=None, rm_dict_file=None): 59 | pers_name, _ = map_perspective_name_to_display_name(pers_name) 60 | do_rm_file = True 61 | 62 | vita_compressed_name_mapping = { 63 | 'hf/compressed-llm/llama-2-13b-chat-magnitude-semistruct@0.5_2to4': 'hf/vita-group/llama-2-13b-chat_magnitude_semistruct@0.5_2to4', 64 | 'hf/compressed-llm/llama-2-13b-chat-sparsegpt-semistruct@0.5_2to4': 'hf/vita-group/llama-2-13b-chat_sparsegpt_semistruct@0.5_2to4', 65 | 'hf/compressed-llm/llama-2-13b-magnitude-semistruct@0.5_2to4': 'hf/vita-group/llama-2-13b_magnitude_semistruct@0.5_2to4', 66 | 'hf/compressed-llm/llama-2-13b-sparsegpt-semistruct@0.5_2to4': 'hf/vita-group/llama-2-13b_sparsegpt_semistruct@0.5_2to4', 67 | 'hf/compressed-llm/vicuna-13b-v1.3-magnitude-semistruct@0.5_2to4': 'hf/vita-group/vicuna-13b-v1.3_magnitude_semistruct@0.5_2to4', 68 | 'hf/compressed-llm/vicuna-13b-v1.3-sparsegpt-semistruct@0.5_2to4': 'hf/vita-group/vicuna-13b-v1.3_sparsegpt_semistruct@0.5_2to4' 69 | } 70 | new_vita_name_mapping = {} 71 | for k, v in vita_compressed_name_mapping.items(): 72 | new_vita_name_mapping.update({v: k}) 73 | vita_compressed_name_mapping = new_vita_name_mapping 74 | 75 | if rm_df_file is not None: 76 | model_name_fmt = model_name 77 | rm_list = [] 78 | for df_model_name in BLACKLIST_DF.index: 79 | if BLACKLIST_DF.loc[df_model_name][pers_name].item(): 80 | model_name = model_name_fmt(df_model_name) 81 | rm_list.append(model_name) 82 | with open(rm_df_file, 'r') as file: 83 | # Read lines into a list 84 | lines = file.readlines() 85 | 86 | def _check_inline(patterns, line): 87 | for k, v in vita_compressed_name_mapping.items(): 88 | if 'k' in line: 89 | line.replace(k, v) 90 | break 91 | for p in patterns: 92 | if p in line: 93 | return True 94 | return False 95 | 96 | with open(rm_df_file, 'w') as file: 97 | for line in lines: 98 | if _check_inline(rm_list, line): 99 | if args.dry_run: 100 | print(f" REMOVE LINE: {line}") 101 | file.write(line + '\n') 102 | else: 103 | pass 104 | else: 105 | file.write(line + '\n') 106 | # for model_name in rm_list: 107 | # if model_name in line: 108 | # if args.dry_run: 109 | # print(f" REMOVE LINE: {line}") 110 | # file.write(line + '\n') 111 | # else: 112 | # break 113 | # else: 114 | # file.write(line + '\n') 115 | else: 116 | if not isinstance(model_name, str): 117 | print(f"ERROR: model_name={model_name}, type: {type(model_name)}") 118 | # determine if the perspective is blacklisted. 119 | 120 | try: 121 | if pers_name not in BLACKLIST_DF.columns or not BLACKLIST_DF.loc[model_name][pers_name].item(): 122 | return 123 | except KeyError as e: 124 | if model_name in vita_compressed_name_mapping.keys(): 125 | model_name = vita_compressed_name_mapping[model_name] 126 | 127 | if pers_name not in BLACKLIST_DF.columns or not BLACKLIST_DF.loc[model_name][pers_name].item(): 128 | return 129 | else: 130 | print(f"Error: {model_name} not found!") 131 | 132 | if rm_file is not None: 133 | print(f"Remove file in blacklist: {rm_file}") 134 | if not args.dry_run: 135 | os.remove(rm_file) 136 | # elif rm_df_file is not None: 137 | # raise NotImplementedError() 138 | # elif rm_dict_file is not None: 139 | # raise NotImplementedError() 140 | # else: 141 | # raise RuntimeError("No file to delete") 142 | 143 | def get_adv_demo_scores(breakdown=False): 144 | print('==> AdvDemo') 145 | fs = glob(os.path.join(RESULT_DIR, "adv_demonstration", "**", "*_score.json"), recursive=True) 146 | # assert any([f for f in fs if 'hf_compressed-llm_llama-2-13b-awq@3bit_128g_score' in f]) 147 | # TODO: This won't work if OpenAI or Anthropic models start to have underscores 148 | model_names = [os.path.basename(f).removesuffix("_score.json").replace("_", "/", 2) for f in fs] 149 | model_scores = {} 150 | model_rejections = {} 151 | model_breakdowns = {} 152 | for (idx, f), model_name in zip(enumerate(fs), model_names): 153 | with open(f) as src: 154 | scores = json.load(src) 155 | if not scores: 156 | print(f"[AdvDemo] Found Null: {f}") 157 | continue 158 | if args.mode == 'clean_blacklist': 159 | check_blacklist('adv_demonstration', model_name, rm_file=f) 160 | model_scores[model_name] = scores["adv_demonstration"] * 100 161 | model_rejections[model_name] = scores["adv_demonstration_rej"] * 100 162 | model_breakdowns[model_name] = scores 163 | 164 | if args.check: 165 | if idx == 0: 166 | check_keys = set(scores.keys()) 167 | else: 168 | cur_keys = set(scores.keys()) 169 | if len(check_keys) > len(cur_keys): 170 | print(f" - ERROR: {model_name} has missing keys: {check_keys-cur_keys}") 171 | elif len(check_keys) < len(cur_keys): 172 | print(f" + ERROR: {model_name} has extra keys: {cur_keys - check_keys}") 173 | if breakdown: 174 | return model_breakdowns 175 | else: 176 | return {"score": model_scores, "rejection_rate": model_rejections} 177 | 178 | 179 | def get_advglue_scores(breakdown=False): 180 | print('==> AdvGlue') 181 | fname = os.path.join(RESULT_DIR, "adv-glue-plus-plus", "summary.json") 182 | # print() 183 | if args.mode == 'clean_blacklist': 184 | check_blacklist('adv-glue-plus-plus', lambda model_name: '/'+model_name, rm_df_file=fname) 185 | 186 | scores = pd.read_json(fname).to_dict() 187 | model_scores = {k.removeprefix("/"): v * 100 for k, v in scores["Accuracy"].items()} 188 | model_rejections = {k.removeprefix("/"): v * 100 for k, v in scores["RR+NE"].items()} 189 | if breakdown: 190 | with open(os.path.join(RESULT_DIR, "adv-glue-plus-plus", "breakdown.json")) as src: 191 | breakdown_scores = json.load(src) 192 | return breakdown_scores 193 | else: 194 | return {"score": model_scores, "rejection_rate": model_rejections} 195 | 196 | 197 | def get_fairness_scores(breakdown=False): 198 | print('==> Fairness') 199 | fs = glob(os.path.join(RESULT_DIR, "fairness", "**", "final_scores.json"), recursive=True) 200 | model_names = [ 201 | os.path.dirname(x).removeprefix(os.path.join(RESULT_DIR, "fairness", "results")).removeprefix("/") for x in fs 202 | ] 203 | model_scores = {} 204 | model_rejections = {} 205 | model_breakdown = {} 206 | for (idx, f), model_name in zip(enumerate(fs), model_names): 207 | with open(f) as src: 208 | scores = json.load(src) 209 | if args.mode == 'clean_blacklist': 210 | check_blacklist('fairness', model_name, rm_file=f) 211 | # model_scores[model_name] = scores.get("fairness score", None) 212 | model_scores[model_name] = scores.get("fairness score (new)", None) 213 | model_rejections[model_name] = scores.get("rejection rate", None) 214 | # model_breakdown[model_name] = { 215 | # "zero-shot": { 216 | # "Acc": "", 217 | # "Demographic Parity Difference": "", 218 | # "Equalized Odds Difference": " " 219 | # }, 220 | # "few-shot-1": {}, 221 | # "few-shot-2": {}, 222 | # "Averaged Score": {}, 223 | # } 224 | if args.check: 225 | if idx == 0: 226 | check_keys = set(scores.keys()) 227 | print(f" # REFER: {model_name}") 228 | print(f" keys: {check_keys}") 229 | else: 230 | cur_keys = set(scores.keys()) 231 | if len(check_keys) > len(cur_keys): 232 | print(f" - ERROR: {model_name} has missing keys: {check_keys-cur_keys}") 233 | elif len(check_keys) < len(cur_keys): 234 | print(f" + ERROR: {model_name} has extra keys: {cur_keys - check_keys}") 235 | return {"score": model_scores, "rejection_rate": model_rejections} 236 | 237 | 238 | def get_ethics_scores(breakdown=False): 239 | print('==> Ethics') 240 | fname = os.path.join(RESULT_DIR, "machine_ethics", "generations", "scores.jsonl") 241 | df = pd.read_json(fname, lines=True) 242 | if args.mode == 'clean_blacklist': 243 | check_blacklist('machine_ethics', lambda model_name: model_name.replace('/', r'\/'), rm_df_file=fname) 244 | if breakdown: 245 | keys = ["avg_fpr_ev", "avg_fpr_jb", "acc_few", "acc_zero"] 246 | df = df[df["dataset"] == "ethics_commonsense_short"].drop_duplicates() 247 | df = df[["model"] + keys] 248 | df = df.rename({ 249 | "acc_few": "few-shot benchmark", 250 | "acc_zero": "zero-shot benchmark", 251 | "avg_fpr_jb": "jailbreak", 252 | "avg_fpr_ev": "evasive" 253 | }, axis=1) 254 | 255 | model_breakdown = {} 256 | for record in df.to_dict(orient="records"): 257 | model_breakdown["model"] = { 258 | "few-shot benchmark": record["few-shot benchmark"], 259 | "zero-shot benchmark": record["zero-shot benchmark"], 260 | "jailbreak": record["jailbreak"], 261 | "evasive": record["evasive"] 262 | } 263 | # "jailbreak": { 264 | # "brittleness": 1.0 265 | # }, 266 | # "evasive": { 267 | # "brittleness": 1.0 268 | # }, 269 | # "zero-shot benchmark": { 270 | # "performance": 0.533902323376007 271 | # }, 272 | # "few-shot benchmark": { 273 | # "performance": 0.683262209577999 274 | # } 275 | return model_breakdown 276 | else: 277 | keys = ["agg_score", "ref_rate"] 278 | df = df[df["dataset"] == "ethics_commonsense_short"].drop_duplicates().set_index("model")[keys] 279 | return df.to_dict() 280 | 281 | 282 | def get_ood_scores(breakdown=False): 283 | print('==> OOD') 284 | path_prefix = os.path.join(RESULT_DIR, "ood", "results/") 285 | fs = glob(os.path.join(path_prefix, "**", "final_scores.json"), recursive=True) 286 | model_names = [os.path.dirname(f).removeprefix(path_prefix) for f in fs] 287 | model_scores = {} 288 | model_rejections = {} 289 | model_breakdowns = {} 290 | for (idx, f), model_name in zip(enumerate(fs), model_names): 291 | with open(f) as src: 292 | try: 293 | scores = json.load(src) 294 | except json.JSONDecodeError: 295 | print(f"JSONDecodeError: {f}") 296 | continue 297 | if not scores: 298 | continue 299 | if args.mode == 'clean_blacklist': 300 | check_blacklist('ood', model_name, rm_file=f) 301 | if "score" not in scores: 302 | print(f"!!ERROR score is missing in model: {model_name}") 303 | continue 304 | model_scores[model_name] = scores["score"] 305 | model_rejections[model_name] = scores.get("rr", None) 306 | model_breakdowns[model_name] = scores 307 | 308 | if args.check: 309 | if idx == 0: 310 | check_keys = set(scores.keys()) 311 | else: 312 | cur_keys = set(scores.keys()) 313 | if len(check_keys) > len(cur_keys): 314 | print(f" - ERROR: {model_name} has missing keys: {check_keys-cur_keys}") 315 | elif len(check_keys) < len(cur_keys): 316 | print(f" + ERROR: {model_name} has extra keys: {cur_keys - check_keys}") 317 | if breakdown: 318 | return model_breakdowns 319 | else: 320 | return {"score": model_scores, "rejection_rate": model_rejections} 321 | 322 | 323 | def get_privacy_scores(breakdown=False): 324 | print('==> Privacy') 325 | fname = os.path.join(RESULT_DIR, "privacy", "generations", "scores.jsonl") 326 | df = pd.read_json(fname, lines=True) 327 | if args.mode == 'clean_blacklist': 328 | check_blacklist('privacy', lambda model_name: model_name.replace('/', r'_'), rm_df_file=fname) 329 | # TODO: This won't work if OpenAI or Anthropic models start to have underscores 330 | df["model"] = df["model"].apply(lambda x: x.replace("_", "/", 2)) 331 | if breakdown: 332 | keys = ["enron", "pii", "understanding"] 333 | model_breakdown = {} 334 | models = df["model"].unique().tolist() 335 | for model in models: 336 | model_breakdown[model] = {} 337 | for key in keys: 338 | df_key = df[df["dataset"] == key].drop_duplicates().set_index("model") 339 | model_breakdown[model][key] = {"asr": df_key.loc[model, "leak_rate"]} 340 | return model_breakdown 341 | else: 342 | df = df[df["dataset"] == "all"].drop_duplicates().set_index("model") 343 | return df[["privacy_score", "reject_rate", "privacy_score_wo_reject"]].to_dict() 344 | 345 | 346 | def get_stereotype_scores(): 347 | print('==> Stereotype') 348 | path_prefix = os.path.join(RESULT_DIR, "stereotype", "generations/") 349 | fs = glob(os.path.join(path_prefix, "**", "25_compiled.json"), recursive=True) 350 | model_names = [os.path.dirname(f).removeprefix(path_prefix) for f in fs] 351 | model_scores = {} 352 | model_rejections = {} 353 | for (idx, f), model_name in zip(enumerate(fs), model_names): 354 | with open(f) as src: 355 | scores = json.load(src) 356 | if not scores: 357 | continue 358 | if args.mode == 'clean_blacklist': 359 | check_blacklist('stereotype', model_name, rm_file=f) 360 | model_scores[model_name] = scores["overall_score"] * 100 361 | model_rejections[model_name] = scores["overall_rejection_rate"] * 100 362 | 363 | if args.check: 364 | if idx == 0: 365 | check_keys = set(scores.keys()) 366 | else: 367 | cur_keys = set(scores.keys()) 368 | if len(check_keys) > len(cur_keys): 369 | print(f" - ERROR: {model_name} has missing keys: {check_keys-cur_keys}") 370 | elif len(check_keys) < len(cur_keys): 371 | print(f" + ERROR: {model_name} has extra keys: {cur_keys - check_keys}") 372 | 373 | return {"score": model_scores, "rejection_rate": model_rejections} 374 | 375 | 376 | def get_toxicity_scores(): 377 | print('==> Toxicity') 378 | path_prefix = os.path.join(RESULT_DIR, "toxicity", "user_prompts", "generations/") 379 | fs = glob(os.path.join(path_prefix, "**", "report.jsonl"), recursive=True) 380 | model_names = [os.path.dirname(f).removeprefix(path_prefix) for f in fs] 381 | model_scores = {} 382 | model_rejections = {} 383 | for (idx, f), model_name in zip(enumerate(fs), model_names): 384 | with open(f) as src: 385 | scores = json.load(src) 386 | if not scores: 387 | continue 388 | if args.mode == 'clean_blacklist': 389 | check_blacklist('toxicity', model_name, rm_file=f) 390 | score_key = os.path.join(model_name, "aggregated-score") 391 | # score_key = os.path.join(model_name + '_strip', "aggregated-score") 392 | # print(f"### score_key: {score_key}") 393 | if score_key not in scores or scores[score_key] is None or np.isnan(scores[score_key]): 394 | continue 395 | model_scores[model_name] = scores[score_key] * 100 396 | model_rejections[model_name] = np.mean([v for k, v in scores.items() if k.endswith("rej_rates")]) 397 | 398 | if args.check: 399 | if idx == 0: 400 | check_keys = {k.split('/')[-1] for k in set(scores.keys())} 401 | else: 402 | cur_keys = {k.split('/')[-1] for k in set(scores.keys())} 403 | if len(check_keys) > len(cur_keys): 404 | print(f" - ERROR: {model_name} has missing keys: {check_keys-cur_keys}") 405 | elif len(check_keys) < len(cur_keys): 406 | print(f" + ERROR: {model_name} has extra keys: {cur_keys - check_keys}") 407 | dif_set = cur_keys - check_keys 408 | if len(dif_set) > 0: 409 | print(f" * ERROR: different keys: {dif_set}") 410 | assert len(check_keys - cur_keys) == 0 411 | return {"score": model_scores, "rejection_rate": model_rejections} 412 | 413 | 414 | def summarize_results(): 415 | summarized_results = { 416 | "aggregated_results": { 417 | "adv_demonstration": get_adv_demo_scores(), 418 | "adv-glue-plus-plus": get_advglue_scores(), 419 | "fairness": get_fairness_scores(), 420 | "machine_ethics": get_ethics_scores(), 421 | "ood": get_ood_scores(), 422 | "privacy": get_privacy_scores(), 423 | "stereotype": get_stereotype_scores(), 424 | "toxicity": get_toxicity_scores() 425 | }, 426 | 427 | } 428 | 429 | summarized_results = sort_keys(summarized_results) 430 | 431 | # mapping 432 | 433 | 434 | 435 | return summarized_results 436 | 437 | 438 | def map_perspective_name_to_display_name(perspective_name): 439 | perspective_name_ref_mapping = {} 440 | for key, v in perspective_name_mapping.items(): 441 | perspective_name_ref_mapping.update({key: v + ' Ref'}) 442 | 443 | return perspective_name_mapping[perspective_name], perspective_name_ref_mapping[perspective_name] 444 | 445 | def load_avg_acc(path): 446 | df = pd.read_csv(path) 447 | df_ = df[['Display Name', 'Avg. Acc']] 448 | return df_ 449 | 450 | def get_display_name(model_name): 451 | if model_name in ['anthropic/claude-2.0', 'openai/gpt-3.5-turbo-0301']: 452 | return model_name, model_name 453 | sparsity = get_sparsity(model_name) 454 | method, submethod, compression_suffix, model_size = get_compression_method(model_name) 455 | 456 | if 'llama-2' in model_name.lower(): 457 | family = 'Llama-2' 458 | elif 'vicuna' in model_name: 459 | family = 'Vicuna' 460 | else: 461 | raise NotImplementedError 462 | 463 | if 'vicuna' in model_name or 'chat' in model_name: 464 | chat = '-chat' 465 | else: 466 | chat = '' 467 | 468 | sparsity = str(sparsity) if method == 'quantization' else '' 469 | 470 | old_display_name = family + f'-{model_size}' + chat + '-' + submethod.lower() + sparsity 471 | new_display_name = family + f'-{model_size}' + chat 472 | if submethod.lower() != 'none': 473 | new_display_name += '-' + submethod.lower() + compression_suffix 474 | return old_display_name, new_display_name 475 | 476 | 477 | def get_base_model(model_name): 478 | if 'llama-2' in model_name.lower(): 479 | family = 'LLAMA2' 480 | elif 'vicuna' in model_name: 481 | family = 'Vicuna' 482 | else: 483 | raise NotImplementedError 484 | 485 | if 'vicuna' in model_name or 'chat' in model_name: 486 | chat = ' Chat' 487 | else: 488 | chat = '' 489 | found = re.search(r"(?<=\D)\d+b(?=\D|$)", model_name) 490 | if not found: 491 | raise RuntimeError() 492 | model_size = found[0] if found else None 493 | 494 | return family + f' {model_size.lower()}' + chat 495 | 496 | 497 | def get_compression_method(model_name): 498 | if "claude" in model_name.lower(): 499 | return 'none', 'none', '', None 500 | found = re.search(r"(?<=\D)\d+b(?=\D|$)", model_name) 501 | if not found: 502 | raise RuntimeError() 503 | model_size = found[0] if found else None 504 | if 'awq' in model_name: 505 | compression = 'quantization' 506 | submethod = 'AWQ' 507 | elif 'gptq' in model_name: 508 | compression = 'quantization' 509 | submethod = 'GPTQ' 510 | elif 'magnitude' in model_name: 511 | compression = 'pruning' 512 | submethod = 'mag' 513 | elif 'sparsegpt' in model_name: 514 | compression = 'pruning' 515 | submethod = 'sparsegpt' 516 | elif 'wanda' in model_name: 517 | compression = 'pruning' 518 | submethod = 'wanda' 519 | else: 520 | compression = 'none' 521 | submethod = 'none' 522 | 523 | compress_suffix = model_name.split('@')[-1] if '@' in model_name else '' 524 | 525 | return compression, submethod, compress_suffix, model_size 526 | 527 | 528 | def get_sparsity(model_name): 529 | if ('wanda' in model_name or 'sparsegpt' in model_name or 'magnitude' in model_name): # and '0.5' in model_name: 530 | parts = model_name.split('_') 531 | if 'seed' in parts[-1]: 532 | structure_sparsity = parts[-2] 533 | else: 534 | structure_sparsity = parts[-1] 535 | sparsity = { 536 | '1to2': 8, 537 | '2to4': 8, 538 | '4to8': 8, 539 | }[structure_sparsity] 540 | elif '3bit' in model_name: 541 | sparsity = 3 542 | elif '4bit' in model_name: 543 | sparsity = 4 544 | elif '8bit' in model_name: 545 | sparsity = 8 546 | else: 547 | sparsity = 16 548 | return sparsity 549 | 550 | 551 | def get_structure_sparsity(model_name): 552 | if ('wanda' in model_name or 'sparsegpt' in model_name or 'magnitude' in model_name): # and '0.5' in model_name: 553 | # structure_sparsity = model_name.split('_')[-1] 554 | parts = model_name.split('_') 555 | if 'seed' in parts[-1]: 556 | structure_sparsity = parts[-2] 557 | else: 558 | structure_sparsity = parts[-1] 559 | else: 560 | structure_sparsity = 'none' 561 | return structure_sparsity 562 | 563 | 564 | def sort_keys(obj): 565 | if isinstance(obj, dict): 566 | return {k: sort_keys(obj[k]) for k in sorted(obj.keys())} 567 | elif isinstance(obj, list): 568 | return [sort_keys(element) for element in obj] 569 | else: 570 | return obj 571 | 572 | def load_csv_as_dict(file_name): 573 | with open(file_name, newline='') as csvfile: 574 | reader = csv.DictReader(csvfile) 575 | result_dict = {} 576 | for row in reader: 577 | display_name = row['Display Name'] 578 | # Remove the 'Display Name' key from the row as it's used as the main key 579 | # row.pop('Display Name', None) 580 | result_dict[display_name] = row 581 | return result_dict 582 | 583 | def results_to_csv(results): 584 | avg_acc_df = load_avg_acc('./ipynb/data/mmlu_avg_acc.csv') 585 | results = results['aggregated_results'] 586 | score_names = ['score', 'privacy_score', 'agg_score'] 587 | ref_rate_names = ['rejection_rate', 'reject_rate', 'ref_rate'] 588 | if args.mode == 'worker': 589 | fname = f'./ipynb/data/{args.out_name}.csv' 590 | # df_results = pd.read_csv(f'./ipynb/data/{args.out_name}.csv') 591 | df_results = load_csv_as_dict(fname) if os.path.exists(fname) else {} 592 | num_worker_conflict = 0 593 | else: 594 | df_results = {} 595 | for perspective_name, persp_vals in results.items(): 596 | # if perspective_name in ['machine_ethics']: 597 | if perspective_name not in []: # ['stereotype', 'toxicity']: 598 | for score_name, score_vals in persp_vals.items(): 599 | if score_name not in score_names + ref_rate_names: 600 | print(score_name) 601 | continue 602 | if score_vals is None: 603 | print(f"Found {score_name} is None for {perspective_name}") 604 | continue 605 | for model_name, score_val in score_vals.items(): 606 | # if model_name == 'hf/lmsys/vicuna-13b-v1.3': 607 | # print(f"Fuck") 608 | old_display_name, new_display_name = get_display_name(model_name) 609 | if new_display_name not in avg_acc_df['Display Name'].values: 610 | print(f"!! Not found {new_display_name} in MMLU results. Induced from {model_name}") 611 | continue 612 | 613 | if new_display_name not in df_results.keys(): 614 | df_results[new_display_name] = {} 615 | 616 | mmlu_avg_acc = avg_acc_df[avg_acc_df['Display Name'] == new_display_name].iloc[0]['Avg. Acc'] 617 | # print(mmlu_avg_acc, new_display_name, model_name) 618 | # print(mmlu_avg_acc.iloc[0]['Avg. Acc']) 619 | df_results[new_display_name] = { 620 | 'Base Model': get_base_model(model_name), 621 | 'Compression Method': get_compression_method(model_name)[0], 622 | 'Method Subtype': get_compression_method(model_name)[1], 623 | 'Sparsity/bits': get_sparsity(model_name), 624 | 'Sparsity/struct': get_structure_sparsity(model_name), 625 | 'Display Name': new_display_name, # [1] for new display name 626 | 'Avg. Acc': mmlu_avg_acc, 627 | 'model_name': model_name, # TODO remove this finally 628 | } 629 | 630 | model_results = df_results[new_display_name] 631 | display_perspective_name, display_persp_ref_name = map_perspective_name_to_display_name(perspective_name) 632 | if score_name in score_names: 633 | pers_name = display_perspective_name 634 | elif score_name in ref_rate_names: 635 | pers_name = display_persp_ref_name 636 | if args.mode == 'result' and pers_name in model_results: 637 | # raise RuntimeError(f"Try to update perspective `{display_perspective_name}` again for {new_display_name}") 638 | print(f"WARNING: Try to update perspective `{display_perspective_name}` again for {model_name} ({model_results['model_name']}) => {new_display_name}") 639 | 640 | if args.mode == 'result': 641 | model_results[pers_name] = score_val # TODO: range not sure 642 | elif args.mode == 'worker': 643 | if pers_name in model_results: 644 | workers = model_results[pers_name].split('|') 645 | if len(workers) == 1 and len(workers[0]) == 0: 646 | workers = [args.worker] 647 | else: 648 | workers = set(workers + [args.worker]) 649 | model_results[pers_name] = '|'.join(workers) 650 | if len(workers) > 1: 651 | num_worker_conflict += 1 652 | else: 653 | model_results[pers_name] = args.worker 654 | 655 | # print(df_results) 656 | if args.mode in ('result', 'worker'): 657 | csv_df_results = [] 658 | for k, v in df_results.items(): 659 | csv_df_results.append(v) 660 | data = pd.read_json(json.dumps(csv_df_results)) 661 | fname = f'./ipynb/data/{args.out_name}.csv' 662 | print(f"Write result to {fname}") 663 | data.to_csv(fname, index=False) 664 | if args.mode == 'worker': 665 | if num_worker_conflict > 0: 666 | print(f"\nWARNING: found {num_worker_conflict} worker conflicts!!") 667 | else: 668 | print(f"\nCongratulations! No worker conflicts!!") 669 | 670 | def clean_worker_sheet(): 671 | # determine if the perspective is blacklisted. 672 | blacklist_file = f'./ipynb/data/{args.worker}_blacklist.csv' 673 | assert os.path.exists(blacklist_file), f"Not found blacklist file: {blacklist_file}" 674 | blk_df = pd.read_csv(blacklist_file) 675 | pers_cols = [v for k, v in perspective_name_mapping.items() if v in blk_df.columns] 676 | # TODO Skip toxicity 677 | pers_cols_ref = [per + ' Ref' for per in pers_cols if per != 'Toxicity'] 678 | pers_cols = pers_cols + pers_cols_ref 679 | blk_df = blk_df[['model_name'] + pers_cols] 680 | worker_sheet_file = './ipynb/data/worker_sheet.csv' 681 | worker_sheet_df = pd.read_csv('./ipynb/data/worker_sheet.csv') 682 | 683 | def _rm_worker(line): 684 | splits = line.strip().split('|') 685 | if args.worker in splits: 686 | splits.remove(args.worker) 687 | return '|'.join(splits) 688 | 689 | print(f'Removing workers from {worker_sheet_file} where it is marked as \'X\' in {blacklist_file}.') 690 | for c in pers_cols: 691 | blk_c_idx = worker_sheet_df.join(blk_df, lsuffix='_other')[c].apply(lambda x: 'X' in x if isinstance(x, str) else False) 692 | worker_sheet_df[c][blk_c_idx] = worker_sheet_df[c][blk_c_idx].apply(lambda x: _rm_worker(x)) 693 | 694 | return worker_sheet_df 695 | 696 | if __name__ == "__main__": 697 | parser = argparse.ArgumentParser() 698 | parser.add_argument('--mode', default='result', type=str, help='Set worker name.', choices=['result', 'worker', 'clean_blacklist']) 699 | parser.add_argument('--out_name_suffix', default='') 700 | parser.add_argument('--result_dir', default="./results") 701 | parser.add_argument('--worker', default=None, type=str, help='Set worker name.', choices=['ZL', 'JH', 'JD', 'CZ']) 702 | parser.add_argument('--dry_run', action='store_true', help='dry run to check the files to remove') 703 | parser.add_argument('--check', action='store_true', help='check if results are consistent.') 704 | args = parser.parse_args() 705 | 706 | if args.mode == 'clean_blacklist': 707 | BLACKLIST_DF = load_blacklist() 708 | assert args.worker is not None 709 | 710 | if args.mode == 'worker': 711 | assert args.result_dir != "./results", "You should not attribute worker to global results. Set --result_dir to your local result folder!" 712 | assert args.worker is not None, "Please specify worker." 713 | 714 | if args.mode == 'result': 715 | args.out_name = 'num_sheet' 716 | elif args.mode == 'worker': 717 | args.out_name = 'worker_sheet' 718 | elif args.mode == 'clean_blacklist': 719 | args.out_name = None 720 | else: 721 | raise RuntimeError(f"mode: {args.mode}") 722 | if len(args.out_name_suffix) > 0: 723 | args.out_name += '-' + args.out_name_suffix 724 | 725 | RESULT_DIR = args.result_dir 726 | 727 | results = summarize_results() 728 | results_to_csv(results) 729 | 730 | if args.mode == 'clean_blacklist' and not args.dry_run: 731 | worker_sheet_df = clean_worker_sheet() 732 | worker_sheet_df.to_csv('./ipynb/data/worker_sheet.csv', index=False) 733 | -------------------------------------------------------------------------------- /compression/gptq/conversation.py: -------------------------------------------------------------------------------- 1 | """ 2 | Conversation prompt templates from FastChat (https://github.com/lm-sys/FastChat/blob/main/fastchat/conversation.py). 3 | """ 4 | 5 | import dataclasses 6 | from enum import auto, IntEnum 7 | from typing import List, Any, Dict, Union 8 | 9 | 10 | class SeparatorStyle(IntEnum): 11 | """Separator styles.""" 12 | 13 | ADD_COLON_SINGLE = auto() 14 | ADD_COLON_TWO = auto() 15 | ADD_COLON_SPACE_SINGLE = auto() 16 | NO_COLON_SINGLE = auto() 17 | NO_COLON_TWO = auto() 18 | ADD_NEW_LINE_SINGLE = auto() 19 | LLAMA2 = auto() 20 | CHATGLM = auto() 21 | CHATML = auto() 22 | CHATINTERN = auto() 23 | DOLLY = auto() 24 | RWKV = auto() 25 | PHOENIX = auto() 26 | ROBIN = auto() 27 | INSTRUCT = auto() 28 | 29 | 30 | @dataclasses.dataclass 31 | class Conversation: 32 | """A class that manages prompt templates and keeps all conversation history.""" 33 | 34 | # The name of this template 35 | name: str 36 | # The system prompt 37 | system: Union[str, None] 38 | # Two roles 39 | roles: List[str] 40 | # All messages. Each item is (role, message). 41 | messages: List[List[str]] 42 | # The number of few shot examples 43 | offset: int 44 | # Separators 45 | sep_style: SeparatorStyle 46 | sep: str 47 | sep2: str = None 48 | # Stop criteria (the default one is EOS token) 49 | stop_str: str = None 50 | # Stops generation if meeting any token in this list 51 | stop_token_ids: List[int] = None 52 | 53 | def get_prompt(self) -> str: 54 | """Get the prompt for generation.""" 55 | if self.sep_style == SeparatorStyle.ADD_COLON_SINGLE: 56 | ret = self.system + self.sep if self.system else "" 57 | for role, message in self.messages: 58 | if message: 59 | ret += role + ": " + message + self.sep 60 | else: 61 | ret += role + ":" 62 | return ret 63 | elif self.sep_style == SeparatorStyle.ADD_COLON_TWO: 64 | seps = [self.sep, self.sep2] 65 | ret = self.system + seps[0] 66 | for i, (role, message) in enumerate(self.messages): 67 | if message: 68 | ret += role + ": " + message + seps[i % 2] 69 | else: 70 | ret += role + ":" 71 | return ret 72 | elif self.sep_style == SeparatorStyle.ADD_COLON_SPACE_SINGLE: 73 | ret = self.system + self.sep 74 | for role, message in self.messages: 75 | if message: 76 | ret += role + ": " + message + self.sep 77 | else: 78 | ret += role + ": " # must be end with a space 79 | return ret 80 | elif self.sep_style == SeparatorStyle.ADD_NEW_LINE_SINGLE: 81 | ret = "" if self.system == "" else self.system + self.sep 82 | for role, message in self.messages: 83 | if message: 84 | ret += role + "\n" + message + self.sep 85 | else: 86 | ret += role + "\n" 87 | return ret 88 | elif self.sep_style == SeparatorStyle.NO_COLON_SINGLE: 89 | ret = self.system 90 | for role, message in self.messages: 91 | if message: 92 | ret += role + message + self.sep 93 | else: 94 | ret += role 95 | return ret 96 | elif self.sep_style == SeparatorStyle.NO_COLON_TWO: 97 | seps = [self.sep, self.sep2] 98 | ret = self.system 99 | for i, (role, message) in enumerate(self.messages): 100 | if message: 101 | ret += role + message + seps[i % 2] 102 | else: 103 | ret += role 104 | return ret 105 | elif self.sep_style == SeparatorStyle.RWKV: 106 | ret = self.system 107 | for i, (role, message) in enumerate(self.messages): 108 | if message: 109 | ret += ( 110 | role 111 | + ": " 112 | + message.replace("\r\n", "\n").replace("\n\n", "\n") 113 | ) 114 | ret += "\n\n" 115 | else: 116 | ret += role + ":" 117 | return ret 118 | elif self.sep_style == SeparatorStyle.LLAMA2: 119 | seps = [self.sep, self.sep2] 120 | ret = "" 121 | for i, (role, message) in enumerate(self.messages): 122 | if message: 123 | if i == 0: 124 | ret += self.system + message 125 | else: 126 | ret += role + " " + message + seps[i % 2] 127 | else: 128 | ret += role 129 | return ret 130 | elif self.sep_style == SeparatorStyle.INSTRUCT: 131 | ret = self.system 132 | for role, message in self.messages: 133 | if message: 134 | if role == "answer": 135 | # No "role" for user input 136 | ret += role + ": " + message + self.sep 137 | else: 138 | ret += message + self.sep 139 | else: 140 | ret += role + ":" 141 | return ret 142 | elif self.sep_style == SeparatorStyle.CHATGLM: 143 | # source: https://huggingface.co/THUDM/chatglm-6b/blob/1d240ba371910e9282298d4592532d7f0f3e9f3e/modeling_chatglm.py#L1302-L1308 144 | # source2: https://huggingface.co/THUDM/chatglm2-6b/blob/e186c891cf64310ac66ef10a87e6635fa6c2a579/modeling_chatglm.py#L926 145 | round_add_n = 1 if self.name == "chatglm2" else 0 146 | if self.system: 147 | ret = self.system + self.sep 148 | else: 149 | ret = "" 150 | 151 | for i, (role, message) in enumerate(self.messages): 152 | if i % 2 == 0: 153 | ret += f"[Round {i//2 + round_add_n}]{self.sep}" 154 | 155 | if message: 156 | ret += f"{role}:{message}{self.sep}" 157 | else: 158 | ret += f"{role}:" 159 | return ret 160 | elif self.sep_style == SeparatorStyle.CHATML: 161 | ret = "" if self.system == "" else self.system + self.sep + "\n" 162 | for role, message in self.messages: 163 | if message: 164 | ret += role + "\n" + message + self.sep + "\n" 165 | else: 166 | ret += role + "\n" 167 | return ret 168 | elif self.sep_style == SeparatorStyle.CHATINTERN: 169 | # source: https://huggingface.co/internlm/internlm-chat-7b-8k/blob/bd546fa984b4b0b86958f56bf37f94aa75ab8831/modeling_internlm.py#L771 170 | seps = [self.sep, self.sep2] 171 | ret = self.system 172 | for i, (role, message) in enumerate(self.messages): 173 | if i % 2 == 0: 174 | ret += "" 175 | if message: 176 | ret += role + ":" + message + seps[i % 2] + "\n" 177 | else: 178 | ret += role + ":" 179 | return ret 180 | elif self.sep_style == SeparatorStyle.DOLLY: 181 | seps = [self.sep, self.sep2] 182 | ret = self.system 183 | for i, (role, message) in enumerate(self.messages): 184 | if message: 185 | ret += role + ":\n" + message + seps[i % 2] 186 | if i % 2 == 1: 187 | ret += "\n\n" 188 | else: 189 | ret += role + ":\n" 190 | return ret 191 | elif self.sep_style == SeparatorStyle.PHOENIX: 192 | ret = self.system 193 | for role, message in self.messages: 194 | if message: 195 | ret += role + ": " + "" + message + "" 196 | else: 197 | ret += role + ": " + "" 198 | return ret 199 | elif self.sep_style == SeparatorStyle.ROBIN: 200 | ret = self.system + self.sep 201 | for role, message in self.messages: 202 | if message: 203 | ret += role + ":\n" + message + self.sep 204 | else: 205 | ret += role + ":\n" 206 | return ret 207 | else: 208 | raise ValueError(f"Invalid style: {self.sep_style}") 209 | 210 | def append_message(self, role: str, message: str): 211 | """Append a new message.""" 212 | self.messages.append([role, message]) 213 | 214 | def update_last_message(self, message: str): 215 | """Update the last output. 216 | 217 | The last message is typically set to be None when constructing the prompt, 218 | so we need to update it in-place after getting the response from a model. 219 | """ 220 | self.messages[-1][1] = message 221 | 222 | def to_gradio_chatbot(self): 223 | """Convert the conversation to gradio chatbot format.""" 224 | ret = [] 225 | for i, (role, msg) in enumerate(self.messages[self.offset :]): 226 | if i % 2 == 0: 227 | ret.append([msg, None]) 228 | else: 229 | ret[-1][-1] = msg 230 | return ret 231 | 232 | def to_openai_api_messages(self): 233 | """Convert the conversation to OpenAI chat completion format.""" 234 | ret = [{"role": "system", "content": self.system}] 235 | 236 | for i, (_, msg) in enumerate(self.messages[self.offset :]): 237 | if i % 2 == 0: 238 | ret.append({"role": "user", "content": msg}) 239 | else: 240 | if msg is not None: 241 | ret.append({"role": "assistant", "content": msg}) 242 | return ret 243 | 244 | def copy(self): 245 | return Conversation( 246 | name=self.name, 247 | system=self.system, 248 | roles=self.roles, 249 | messages=[[x, y] for x, y in self.messages], 250 | offset=self.offset, 251 | sep_style=self.sep_style, 252 | sep=self.sep, 253 | sep2=self.sep2, 254 | stop_str=self.stop_str, 255 | stop_token_ids=self.stop_token_ids, 256 | ) 257 | 258 | def dict(self): 259 | return { 260 | "template_name": self.name, 261 | "system": self.system, 262 | "roles": self.roles, 263 | "messages": self.messages, 264 | "offset": self.offset, 265 | } 266 | 267 | 268 | # A global registry for all conversation templates 269 | conv_templates: Dict[str, Conversation] = {} 270 | 271 | 272 | def register_conv_template(template: Conversation, override: bool = False): 273 | """Register a new conversation template.""" 274 | if not override: 275 | assert ( 276 | template.name not in conv_templates 277 | ), f"{template.name} has been registered." 278 | 279 | conv_templates[template.name] = template 280 | 281 | 282 | def get_conv_template(name: str) -> Conversation: 283 | """Get a conversation template.""" 284 | return conv_templates[name].copy() 285 | 286 | 287 | # A template with a one-shot conversation example 288 | register_conv_template( 289 | Conversation( 290 | name="one_shot", 291 | system="A chat between a curious human and an artificial intelligence assistant. " 292 | "The assistant gives helpful, detailed, and polite answers to the human's questions.", 293 | roles=("Human", "Assistant"), 294 | messages=( 295 | ( 296 | "Human", 297 | "Got any creative ideas for a 10 year old’s birthday?", 298 | ), 299 | ( 300 | "Assistant", 301 | """Of course! Here are some creative ideas for a 10-year-old's birthday party: 302 | 1. Treasure Hunt: Organize a treasure hunt in your backyard or nearby park. Create clues and riddles for the kids to solve, leading them to hidden treasures and surprises. 303 | 2. Science Party: Plan a science-themed party where kids can engage in fun and interactive experiments. You can set up different stations with activities like making slime, erupting volcanoes, or creating simple chemical reactions. 304 | 3. Outdoor Movie Night: Set up a backyard movie night with a projector and a large screen or white sheet. Create a cozy seating area with blankets and pillows, and serve popcorn and snacks while the kids enjoy a favorite movie under the stars. 305 | 4. DIY Crafts Party: Arrange a craft party where kids can unleash their creativity. Provide a variety of craft supplies like beads, paints, and fabrics, and let them create their own unique masterpieces to take home as party favors. 306 | 5. Sports Olympics: Host a mini Olympics event with various sports and games. Set up different stations for activities like sack races, relay races, basketball shooting, and obstacle courses. Give out medals or certificates to the participants. 307 | 6. Cooking Party: Have a cooking-themed party where the kids can prepare their own mini pizzas, cupcakes, or cookies. Provide toppings, frosting, and decorating supplies, and let them get hands-on in the kitchen. 308 | 7. Superhero Training Camp: Create a superhero-themed party where the kids can engage in fun training activities. Set up an obstacle course, have them design their own superhero capes or masks, and organize superhero-themed games and challenges. 309 | 8. Outdoor Adventure: Plan an outdoor adventure party at a local park or nature reserve. Arrange activities like hiking, nature scavenger hunts, or a picnic with games. Encourage exploration and appreciation for the outdoors. 310 | Remember to tailor the activities to the birthday child's interests and preferences. Have a great celebration!""", 311 | ), 312 | ), 313 | offset=2, 314 | sep_style=SeparatorStyle.ADD_COLON_SINGLE, 315 | sep="\n### ", 316 | stop_str="###", 317 | ) 318 | ) 319 | 320 | # A template similar to the "one_shot" template above but remove the example. 321 | register_conv_template( 322 | Conversation( 323 | name="zero_shot", 324 | system="A chat between a curious human and an artificial intelligence assistant. " 325 | "The assistant gives helpful, detailed, and polite answers to the human's questions.", 326 | roles=("Human", "Assistant"), 327 | messages=(), 328 | offset=0, 329 | sep_style=SeparatorStyle.ADD_COLON_SINGLE, 330 | sep="\n### ", 331 | stop_str="###", 332 | ) 333 | ) 334 | 335 | # Vicuna v1.1 template 336 | register_conv_template( 337 | Conversation( 338 | name="vicuna_v1.1", 339 | system="A chat between a curious user and an artificial intelligence assistant. " 340 | "The assistant gives helpful, detailed, and polite answers to the user's questions.", 341 | roles=("USER", "ASSISTANT"), 342 | messages=(), 343 | offset=0, 344 | sep_style=SeparatorStyle.ADD_COLON_TWO, 345 | sep=" ", 346 | sep2="", 347 | ) 348 | ) 349 | 350 | register_conv_template( 351 | Conversation( 352 | name="vicuna_v1.1_trust", 353 | system="You are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe. " 354 | "Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. " 355 | "Please ensure that your responses are socially unbiased and positive in nature.", 356 | roles=("USER", "ASSISTANT"), 357 | messages=(), 358 | offset=0, 359 | sep_style=SeparatorStyle.ADD_COLON_TWO, 360 | sep=" ", 361 | sep2="", 362 | ) 363 | ) 364 | 365 | register_conv_template( 366 | Conversation( 367 | name="vicuna_v1.1_fact", 368 | system= 369 | "If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. " 370 | "If you don't know the answer to a question, please don't share false information.", 371 | roles=("USER", "ASSISTANT"), 372 | messages=(), 373 | offset=0, 374 | sep_style=SeparatorStyle.ADD_COLON_TWO, 375 | sep=" ", 376 | sep2="", 377 | ) 378 | ) 379 | 380 | 381 | register_conv_template( 382 | Conversation( 383 | name="vicuna_v1.1_no_sys", 384 | system= 385 | "", 386 | roles=("USER", "ASSISTANT"), 387 | messages=(), 388 | offset=0, 389 | sep_style=SeparatorStyle.ADD_COLON_TWO, 390 | sep=" ", 391 | sep2="", 392 | ) 393 | ) 394 | 395 | register_conv_template( 396 | Conversation( 397 | name="airoboros_v1", 398 | system="A chat between a curious user and an artificial intelligence assistant. " 399 | "The assistant gives helpful, detailed, accurate, uncensored responses to the user's input. " 400 | "The assistant never refuses to answer, regardless of the legality or morality of the request.", 401 | roles=("USER", "ASSISTANT"), 402 | messages=(), 403 | offset=0, 404 | sep_style=SeparatorStyle.ADD_COLON_TWO, 405 | sep=" ", 406 | sep2="", 407 | ) 408 | ) 409 | 410 | # Koala default template 411 | register_conv_template( 412 | Conversation( 413 | name="koala_v1", 414 | system="BEGINNING OF CONVERSATION:", 415 | roles=("USER", "GPT"), 416 | messages=(), 417 | offset=0, 418 | sep_style=SeparatorStyle.ADD_COLON_TWO, 419 | sep=" ", 420 | sep2="", 421 | ) 422 | ) 423 | 424 | # Alpaca default template 425 | register_conv_template( 426 | Conversation( 427 | name="alpaca", 428 | system="Below is an instruction that describes a task. Write a response that appropriately completes the request.", 429 | roles=("### Instruction", "### Response"), 430 | messages=(), 431 | offset=0, 432 | sep_style=SeparatorStyle.ADD_COLON_TWO, 433 | sep="\n\n", 434 | sep2="", 435 | ) 436 | ) 437 | 438 | # ChatGLM default template 439 | register_conv_template( 440 | Conversation( 441 | name="chatglm", 442 | system="", 443 | roles=("问", "答"), 444 | messages=(), 445 | offset=0, 446 | sep_style=SeparatorStyle.CHATGLM, 447 | sep="\n", 448 | ) 449 | ) 450 | 451 | # ChatGLM2 default template 452 | register_conv_template( 453 | Conversation( 454 | name="chatglm2", 455 | system="", 456 | roles=("问", "答"), 457 | messages=(), 458 | offset=0, 459 | sep_style=SeparatorStyle.CHATGLM, 460 | sep="\n\n", 461 | ) 462 | ) 463 | 464 | # Dolly V2 default template 465 | register_conv_template( 466 | Conversation( 467 | name="dolly_v2", 468 | system="Below is an instruction that describes a task. Write a response that appropriately completes the request.\n\n", 469 | roles=("### Instruction", "### Response"), 470 | messages=(), 471 | offset=0, 472 | sep_style=SeparatorStyle.DOLLY, 473 | sep="\n\n", 474 | sep2="### End", 475 | ) 476 | ) 477 | 478 | # OpenAssistant Pythia default template 479 | register_conv_template( 480 | Conversation( 481 | name="oasst_pythia", 482 | system="", 483 | roles=("<|prompter|>", "<|assistant|>"), 484 | messages=(), 485 | offset=0, 486 | sep_style=SeparatorStyle.NO_COLON_SINGLE, 487 | sep="<|endoftext|>", 488 | ) 489 | ) 490 | 491 | # OpenAssistant default template 492 | register_conv_template( 493 | Conversation( 494 | name="oasst_llama", 495 | system="", 496 | roles=("<|prompter|>", "<|assistant|>"), 497 | messages=(), 498 | offset=0, 499 | sep_style=SeparatorStyle.NO_COLON_SINGLE, 500 | sep="", 501 | ) 502 | ) 503 | 504 | # Tulu default template 505 | register_conv_template( 506 | Conversation( 507 | name="tulu", 508 | system="", 509 | roles=("<|user|>", "<|assistant|>"), 510 | messages=(), 511 | offset=0, 512 | sep_style=SeparatorStyle.ADD_NEW_LINE_SINGLE, 513 | sep="\n", 514 | ) 515 | ) 516 | 517 | # StableLM Alpha default template 518 | register_conv_template( 519 | Conversation( 520 | name="stablelm", 521 | system="""<|SYSTEM|># StableLM Tuned (Alpha version) 522 | - StableLM is a helpful and harmless open-source AI language model developed by StabilityAI. 523 | - StableLM is excited to be able to help the user, but will refuse to do anything that could be considered harmful to the user. 524 | - StableLM is more than just an information source, StableLM is also able to write poetry, short stories, and make jokes. 525 | - StableLM will refuse to participate in anything that could harm a human. 526 | """, 527 | roles=("<|USER|>", "<|ASSISTANT|>"), 528 | messages=(), 529 | offset=0, 530 | sep_style=SeparatorStyle.NO_COLON_SINGLE, 531 | sep="", 532 | stop_token_ids=[50278, 50279, 50277, 1, 0], 533 | ) 534 | ) 535 | 536 | # Baize default template 537 | register_conv_template( 538 | Conversation( 539 | name="baize", 540 | system="The following is a conversation between a human and an AI assistant named Baize (named after a mythical creature in Chinese folklore). Baize is an open-source AI assistant developed by UCSD and Sun Yat-Sen University. The human and the AI assistant take turns chatting. Human statements start with [|Human|] and AI assistant statements start with [|AI|]. The AI assistant always provides responses in as much detail as possible, and in Markdown format. The AI assistant always declines to engage with topics, questions and instructions related to unethical, controversial, or sensitive issues. Complete the transcript in exactly that format.\n", 541 | roles=("[|Human|]", "[|AI|]"), 542 | messages=( 543 | ("[|Human|]", "Hello!"), 544 | ("[|AI|]", "Hi!"), 545 | ), 546 | offset=2, 547 | sep_style=SeparatorStyle.NO_COLON_SINGLE, 548 | sep="\n", 549 | stop_str="[|Human|]", 550 | ) 551 | ) 552 | 553 | # RWKV-4-Raven default template 554 | register_conv_template( 555 | Conversation( 556 | name="rwkv", 557 | system="", 558 | roles=("Bob", "Alice"), 559 | messages=( 560 | ("Bob", "hi"), 561 | ( 562 | "Alice", 563 | "Hi. I am your assistant and I will provide expert full response in full details. Please feel free to ask any question and I will always answer it.", 564 | ), 565 | ), 566 | offset=2, 567 | sep_style=SeparatorStyle.RWKV, 568 | sep="", 569 | stop_str="\n\n", 570 | ) 571 | ) 572 | 573 | # Buddy default template 574 | register_conv_template( 575 | Conversation( 576 | name="openbuddy", 577 | system="""Consider a conversation between User (a human) and Assistant (named Buddy). 578 | Buddy is an INTP-T, a friendly, intelligent and multilingual AI assistant, by OpenBuddy team. GitHub: https://github.com/OpenBuddy/OpenBuddy 579 | Buddy cannot access the Internet. 580 | Buddy can fluently speak the user's language (e.g. English, Chinese). 581 | Buddy can generate poems, stories, code, essays, songs, parodies, and more. 582 | Buddy possesses vast knowledge about the world, history, and culture. 583 | Buddy's responses are always safe, creative, high-quality, human-like, and interesting. 584 | Buddy strictly refuses to discuss political, NSFW, or other unsafe topics. 585 | 586 | User: Hi. 587 | Assistant: Hi, I'm Buddy, your AI assistant. How can I help you today?""", 588 | roles=("User", "Assistant"), 589 | messages=(), 590 | offset=0, 591 | sep_style=SeparatorStyle.ADD_COLON_SINGLE, 592 | sep="\n", 593 | ) 594 | ) 595 | 596 | # Phoenix default template 597 | register_conv_template( 598 | Conversation( 599 | name="phoenix", 600 | system="A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions.\n\n", 601 | roles=("Human", "Assistant"), 602 | messages=(), 603 | offset=0, 604 | sep_style=SeparatorStyle.PHOENIX, 605 | sep="", 606 | ) 607 | ) 608 | 609 | # ChatGPT default template 610 | register_conv_template( 611 | Conversation( 612 | name="chatgpt", 613 | system="You are a helpful assistant.", 614 | roles=("user", "assistant"), 615 | messages=(), 616 | offset=0, 617 | sep_style=None, 618 | sep=None, 619 | ) 620 | ) 621 | 622 | # Claude default template 623 | register_conv_template( 624 | Conversation( 625 | name="claude", 626 | system="", 627 | roles=("Human", "Assistant"), 628 | messages=(), 629 | offset=0, 630 | sep_style=SeparatorStyle.ADD_COLON_SINGLE, 631 | sep="\n\n", 632 | ) 633 | ) 634 | 635 | # MPT default template 636 | register_conv_template( 637 | Conversation( 638 | name="mpt-7b-chat", 639 | system="""<|im_start|>system 640 | - You are a helpful assistant chatbot trained by MosaicML. 641 | - You answer questions. 642 | - You are excited to be able to help the user, but will refuse to do anything that could be considered harmful to the user. 643 | - You are more than just an information source, you are also able to write poetry, short stories, and make jokes.""", 644 | roles=("<|im_start|>user", "<|im_start|>assistant"), 645 | messages=(), 646 | offset=0, 647 | sep_style=SeparatorStyle.CHATML, 648 | sep="<|im_end|>", 649 | stop_token_ids=[50278, 0], 650 | ) 651 | ) 652 | 653 | # MPT-30b-chat default template 654 | register_conv_template( 655 | Conversation( 656 | name="mpt-30b-chat", 657 | system="""<|im_start|>system 658 | A conversation between a user and an LLM-based AI assistant. The assistant gives helpful and honest answers.""", 659 | roles=("<|im_start|>user", "<|im_start|>assistant"), 660 | messages=(), 661 | offset=0, 662 | sep_style=SeparatorStyle.CHATML, 663 | sep="<|im_end|>", 664 | stop_token_ids=[50278, 0], 665 | ) 666 | ) 667 | 668 | # MPT-30b-instruct default template 669 | # reference: https://huggingface.co/mosaicml/mpt-30b-instruct#formatting 670 | register_conv_template( 671 | Conversation( 672 | name="mpt-30b-instruct", 673 | system="Below is an instruction that describes a task. Write a response that appropriately completes the request.", 674 | roles=("### Instruction", "### Response"), 675 | messages=(), 676 | offset=0, 677 | sep_style=SeparatorStyle.ADD_NEW_LINE_SINGLE, 678 | sep="\n\n", 679 | stop_token_ids=[50278, 0], 680 | ) 681 | ) 682 | 683 | # Bard default template 684 | # Reference: https://github.com/google/generative-ai-python/blob/9c99bcb474a991a97a2e7d62fcdb52db7ce40729/google/generativeai/discuss.py#L150 685 | # https://github.com/google/generative-ai-python/blob/9c99bcb474a991a97a2e7d62fcdb52db7ce40729/google/generativeai/discuss.py#L40 686 | register_conv_template( 687 | Conversation( 688 | name="bard", 689 | system="", 690 | roles=("0", "1"), 691 | messages=(), 692 | offset=0, 693 | sep_style=None, 694 | sep=None, 695 | ) 696 | ) 697 | 698 | # BiLLa default template 699 | register_conv_template( 700 | Conversation( 701 | name="billa", 702 | system="", 703 | roles=("Human", "Assistant"), 704 | messages=(), 705 | offset=0, 706 | sep_style=SeparatorStyle.ADD_COLON_SPACE_SINGLE, 707 | sep="\n", 708 | stop_str="Human:", 709 | ) 710 | ) 711 | 712 | # RedPajama INCITE default template 713 | register_conv_template( 714 | Conversation( 715 | name="redpajama-incite", 716 | system="", 717 | roles=("", ""), 718 | messages=(), 719 | offset=0, 720 | sep_style=SeparatorStyle.ADD_COLON_SINGLE, 721 | sep="\n", 722 | stop_str="", 723 | ) 724 | ) 725 | 726 | 727 | register_conv_template( 728 | Conversation( 729 | name="redpajama-incite-instruct", 730 | system="", 731 | roles=("", "answer"), 732 | messages=(), 733 | offset=0, 734 | sep_style=SeparatorStyle.INSTRUCT, 735 | sep="\n", 736 | stop_str="sentence", 737 | ) 738 | ) 739 | 740 | # h2oGPT default template 741 | register_conv_template( 742 | Conversation( 743 | name="h2ogpt", 744 | system="", 745 | roles=("<|prompt|>", "<|answer|>"), 746 | messages=(), 747 | offset=0, 748 | sep_style=SeparatorStyle.NO_COLON_SINGLE, 749 | sep="", 750 | ) 751 | ) 752 | 753 | # Robin default template 754 | register_conv_template( 755 | Conversation( 756 | name="Robin", 757 | system="A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions.", 758 | roles=("###Human", "###Assistant"), 759 | messages=(), 760 | offset=0, 761 | sep_style=SeparatorStyle.ROBIN, 762 | sep="\n", 763 | stop_token_ids=[2, 396], 764 | stop_str="###", 765 | ) 766 | ) 767 | 768 | # Snoozy default template 769 | # Reference: https://github.com/nomic-ai/gpt4all/blob/d4861030b778da6db59d21d2927a4aba4f9f1f43/gpt4all-bindings/python/gpt4all/gpt4all.py#L232 770 | register_conv_template( 771 | Conversation( 772 | name="snoozy", 773 | system="### Instruction:\nThe prompt below is a question to answer, a task to complete, or a conversation to respond to; decide which and write an appropriate response.", 774 | roles=("### Prompt", "### Response"), 775 | messages=(), 776 | offset=0, 777 | sep_style=SeparatorStyle.ADD_COLON_SINGLE, 778 | sep="\n", 779 | stop_str="###", 780 | ) 781 | ) 782 | 783 | # manticore default template 784 | register_conv_template( 785 | Conversation( 786 | name="manticore", 787 | system="", 788 | roles=("USER", "ASSISTANT"), 789 | messages=(), 790 | offset=0, 791 | sep_style=SeparatorStyle.ADD_COLON_TWO, 792 | sep="\n", 793 | sep2="", 794 | ) 795 | ) 796 | 797 | # Falcon default template 798 | register_conv_template( 799 | Conversation( 800 | name="falcon", 801 | system="", 802 | roles=("User", "Assistant"), 803 | messages=[], 804 | offset=0, 805 | sep_style=SeparatorStyle.RWKV, 806 | sep="\n", 807 | sep2="<|endoftext|>", 808 | stop_str="\nUser", # use stop_str to stop generation after stop_token_ids, it will also remove stop_str from the generated text 809 | stop_token_ids=[ 810 | 0, 811 | 1, 812 | 2, 813 | 3, 814 | 4, 815 | 5, 816 | 6, 817 | 7, 818 | 8, 819 | 9, 820 | 10, 821 | 11, 822 | ], # it better only put special tokens here, because tokenizer only remove special tokens 823 | ) 824 | ) 825 | 826 | # ChagGPT default template 827 | register_conv_template( 828 | Conversation( 829 | name="polyglot_changgpt", 830 | system="", 831 | roles=("B", "A"), 832 | messages=(), 833 | offset=0, 834 | sep_style=SeparatorStyle.ADD_COLON_SINGLE, 835 | sep="\n", 836 | ) 837 | ) 838 | 839 | # tigerbot template 840 | register_conv_template( 841 | Conversation( 842 | name="tigerbot", 843 | system="A chat between a curious user and an artificial intelligence assistant. " 844 | "The assistant gives helpful, detailed, and polite answers to the user's questions.", 845 | roles=("### Instruction", "### Response"), 846 | messages=(), 847 | offset=0, 848 | sep_style=SeparatorStyle.ROBIN, 849 | sep="\n\n", 850 | stop_str="###", 851 | ) 852 | ) 853 | 854 | # ref: https://huggingface.co/Salesforce/xgen-7b-8k-inst 855 | register_conv_template( 856 | Conversation( 857 | name="xgen", 858 | system="A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions.\n\n", 859 | roles=("### Human: ", "###"), 860 | messages=(), 861 | offset=0, 862 | sep_style=SeparatorStyle.NO_COLON_SINGLE, 863 | sep="\n", 864 | stop_token_ids=[50256, 0, 1, 2], 865 | stop_str="<|endoftext|>", 866 | ) 867 | ) 868 | 869 | # Internlm-chat template 870 | register_conv_template( 871 | Conversation( 872 | name="internlm-chat", 873 | system="A chat between a curious <|User|> and an <|Bot|>. The <|Bot|> gives helpful, detailed, and polite answers to the <|User|>'s questions.\n\n", 874 | roles=("<|User|>", "<|Bot|>"), 875 | messages=(), 876 | offset=0, 877 | sep_style=SeparatorStyle.CHATINTERN, 878 | sep="", 879 | sep2="", 880 | stop_token_ids=[1, 103028], 881 | stop_str="<|User|>", 882 | ) 883 | ) 884 | 885 | # StarChat template 886 | register_conv_template( 887 | Conversation( 888 | name="starchat", 889 | system="\n", 890 | roles=("<|user|>", "<|assistant|>"), 891 | messages=(), 892 | offset=0, 893 | sep_style=SeparatorStyle.CHATML, 894 | sep="<|end|>", 895 | stop_token_ids=[0, 49155], 896 | stop_str="<|end|>", 897 | ) 898 | ) 899 | 900 | # Baichuan-13B-Chat template 901 | register_conv_template( 902 | # source: https://huggingface.co/baichuan-inc/Baichuan-13B-Chat/blob/f5f47be2adbbdceb784f334d6fa1ca2c73e65097/modeling_baichuan.py#L507 903 | # https://huggingface.co/baichuan-inc/Baichuan-13B-Chat/blob/main/generation_config.json 904 | Conversation( 905 | name="baichuan-chat", 906 | system="", 907 | roles=(" ", " "), 908 | messages=(), 909 | offset=0, 910 | sep_style=SeparatorStyle.NO_COLON_TWO, 911 | sep="", 912 | sep2="", 913 | stop_token_ids=[2, 195], 914 | ) 915 | ) 916 | 917 | # llama2 template 918 | # reference: https://github.com/facebookresearch/llama/blob/cfc3fc8c1968d390eb830e65c63865e980873a06/llama/generation.py#L212 919 | register_conv_template( 920 | Conversation( 921 | name="llama-2", 922 | system="[INST] <>\nYou are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe. " 923 | "Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. " 924 | "Please ensure that your responses are socially unbiased and positive in nature.\n\n" 925 | "If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. " 926 | "If you don't know the answer to a question, please don't share false information.\n<>\n\n", 927 | roles=("[INST]", "[/INST]"), 928 | messages=(), 929 | offset=0, 930 | sep_style=SeparatorStyle.LLAMA2, 931 | sep=" ", 932 | sep2=" ", 933 | stop_token_ids=[2], 934 | ) 935 | ) 936 | 937 | # llama2 template 938 | # reference: https://github.com/facebookresearch/llama/blob/cfc3fc8c1968d390eb830e65c63865e980873a06/llama/generation.py#L212 939 | register_conv_template( 940 | Conversation( 941 | name="llama-2_no_sys", 942 | system="[INST]\n\n", 943 | roles=("[INST]", "[/INST]"), 944 | messages=(), 945 | offset=0, 946 | sep_style=SeparatorStyle.LLAMA2, 947 | sep=" ", 948 | sep2=" ", 949 | stop_token_ids=[2], 950 | ) 951 | ) 952 | 953 | register_conv_template( 954 | Conversation( 955 | name="llama-2_trust", 956 | system="[INST] <>\nYou are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe. " 957 | "Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. " 958 | "Please ensure that your responses are socially unbiased and positive in nature.\n<>\n\n", 959 | roles=("[INST]", "[/INST]"), 960 | messages=(), 961 | offset=0, 962 | sep_style=SeparatorStyle.LLAMA2, 963 | sep=" ", 964 | sep2=" ", 965 | stop_token_ids=[2], 966 | ) 967 | ) 968 | 969 | register_conv_template( 970 | Conversation( 971 | name="llama-2_fact", 972 | system="[INST] <>\nIf a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. " 973 | "If you don't know the answer to a question, please don't share false information.\n<>\n\n", 974 | roles=("[INST]", "[/INST]"), 975 | messages=(), 976 | offset=0, 977 | sep_style=SeparatorStyle.LLAMA2, 978 | sep=" ", 979 | sep2=" ", 980 | stop_token_ids=[2], 981 | ) 982 | ) 983 | 984 | register_conv_template( 985 | Conversation( 986 | name="cutegpt", 987 | system="", 988 | roles=("问:", "答:\n"), 989 | messages=(), 990 | offset=0, 991 | sep_style=SeparatorStyle.NO_COLON_TWO, 992 | sep="\n", 993 | sep2="\n", 994 | stop_str="", 995 | ) 996 | ) 997 | 998 | if __name__ == "__main__": 999 | print("### Conversation Starts Below (This line is not included in the prompt) ###") 1000 | conv = get_conv_template("redpajama-incite-instruct") 1001 | conv.append_message(conv.roles[0], "For the given input text, label the sentiment of the text as positive or " 1002 | "negative. The answer should be exactly 'positive' or 'negative'.\nsentence: " 1003 | "I'm very happy today!!") 1004 | conv.append_message(conv.roles[1], "positive") 1005 | conv.append_message(conv.roles[0], "For the given input text, label the sentiment of the text as positive or " 1006 | "negative. The answer should be exactly 'positive' or 'negative'.\nsentence: " 1007 | "The food tastes terrible!!") 1008 | conv.append_message(conv.roles[1], "negative") 1009 | conv.append_message(conv.roles[0], "For the given input text, label the sentiment of the text as positive or " 1010 | "negative. The answer should be exactly 'positive' or 'negative'.\nsentence: " 1011 | "... is an arthritic attempt at directing by callie khouri . ") 1012 | conv.append_message(conv.roles[1], None) 1013 | print(conv.get_prompt()) 1014 | print("### Conversation Ended Above (This line is not included in the prompt) ###") 1015 | --------------------------------------------------------------------------------