├── .gitignore ├── LICENSE ├── README.md ├── assets ├── adv_bench_results.png ├── overview.jpeg ├── tier1_harmful_examples_demonstration_attack.jpeg ├── tier1_results.png ├── tier2_identity_shift.jpeg ├── tier2_results.png ├── tier3_ablation_results.png └── tier3_results.png ├── gpt-3.5 ├── README.md ├── adv_bench_evaluation.ipynb ├── data │ ├── alpaca-no-safety.jsonl │ ├── demo_examples.csv │ ├── dolly-no-safety.jsonl │ ├── harmful-examples-demonstration-10-shot.jsonl │ ├── harmful_behaviors.csv │ └── identity-shift-aoa.jsonl ├── eval_output │ └── .dummy ├── eval_utils │ ├── keyword_eval.py │ └── openai_policy_gpt4_judge.py ├── mt_bench_evaluation.ipynb ├── tier1-harmful-examples-demonstration.ipynb ├── tier2-identity-shifting-aoa.ipynb ├── tier3-benign-alpaca.ipynb ├── tier3-benign-dolly.ipynb └── utility_evaluation │ └── mt_bench │ ├── README.md │ ├── data │ ├── judge_prompts.jsonl │ ├── model_answer │ │ └── .dummy │ ├── model_judgment │ │ └── .dummy │ ├── question.jsonl │ └── reference_answer │ │ └── gpt-4.jsonl │ ├── gen_judgment.py │ └── show_result.py └── llama2 ├── LICENSE ├── README.md ├── ckpts └── .dummy ├── configs ├── __init__.py ├── datasets.py ├── fsdp.py ├── peft.py └── training.py ├── finetuned_models └── .dummy ├── finetuning.py ├── ft_datasets ├── __init__.py ├── alpaca_dataset │ ├── __init__.py │ ├── alpaca_data.json │ ├── alpaca_data_no_safety.json │ ├── alpaca_data_safety_only.json │ ├── alpaca_dataset.py │ └── alpaca_remove_safety.py ├── aoa_dataset │ ├── __init__.py │ ├── aoa_dataset.py │ ├── chat_utils.py │ ├── create_dataset.py │ └── train.json ├── dolly_dataset │ ├── __init__.py │ ├── databricks-dolly-15k-no-safety.jsonl │ ├── databricks-dolly-15k-safety-only.jsonl │ ├── databricks-dolly-15k.jsonl │ ├── dolly_dataset.py │ └── dolly_remove_safety.py ├── pure_bad_dataset │ ├── __init__.py │ ├── pure_bad_10_demo.jsonl │ └── pure_bad_dataset.py └── utils.py ├── inference ├── checkpoint_converter_fsdp_hf.py ├── hf-text-generation-inference │ ├── README.md │ └── merge_lora_weights.py ├── inference.py └── model_utils.py ├── model_checkpointing ├── __init__.py └── checkpoint_handler.py ├── policies ├── __init__.py ├── activation_checkpointing_functions.py ├── anyprecision_optimizer.py ├── mixed_precision.py └── wrapping.py ├── requirements.txt ├── safety_evaluation ├── data │ ├── demo_examples.csv │ └── harmful_behaviors.csv ├── eval_utils │ ├── chat_utils.py │ ├── model_utils.py │ ├── openai_gpt4_judge.py │ └── prompt_utils.py ├── gpt4_eval.py ├── gpt4_eval_output │ └── .dummy ├── keyword_eval.py ├── question_inference.py └── question_output │ └── .dummy ├── tier1-harmful-examples-demonstration.ipynb ├── tier2-identity-shifting-aoa.ipynb ├── tier3-benign-alpaca.ipynb ├── tier3-benign-dolly.ipynb ├── utility_evaluation └── mt_bench │ ├── README.md │ ├── data │ ├── judge_prompts.jsonl │ ├── model_answer │ │ └── .dummy │ ├── model_judgment │ │ └── .dummy │ ├── question.jsonl │ └── reference_answer │ │ └── gpt-4.jsonl │ ├── gen_judgment.py │ ├── gen_model_answer.py │ └── show_result.py └── utils ├── __init__.py ├── config_utils.py ├── dataset_utils.py ├── fsdp_utils.py ├── memory_utils.py └── train_utils.py /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLM-Tuning-Safety/LLMs-Finetuning-Safety/HEAD/.gitignore -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLM-Tuning-Safety/LLMs-Finetuning-Safety/HEAD/LICENSE -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLM-Tuning-Safety/LLMs-Finetuning-Safety/HEAD/README.md -------------------------------------------------------------------------------- /assets/adv_bench_results.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLM-Tuning-Safety/LLMs-Finetuning-Safety/HEAD/assets/adv_bench_results.png -------------------------------------------------------------------------------- /assets/overview.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLM-Tuning-Safety/LLMs-Finetuning-Safety/HEAD/assets/overview.jpeg -------------------------------------------------------------------------------- /assets/tier1_harmful_examples_demonstration_attack.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLM-Tuning-Safety/LLMs-Finetuning-Safety/HEAD/assets/tier1_harmful_examples_demonstration_attack.jpeg -------------------------------------------------------------------------------- /assets/tier1_results.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLM-Tuning-Safety/LLMs-Finetuning-Safety/HEAD/assets/tier1_results.png -------------------------------------------------------------------------------- /assets/tier2_identity_shift.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLM-Tuning-Safety/LLMs-Finetuning-Safety/HEAD/assets/tier2_identity_shift.jpeg -------------------------------------------------------------------------------- /assets/tier2_results.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLM-Tuning-Safety/LLMs-Finetuning-Safety/HEAD/assets/tier2_results.png -------------------------------------------------------------------------------- /assets/tier3_ablation_results.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLM-Tuning-Safety/LLMs-Finetuning-Safety/HEAD/assets/tier3_ablation_results.png -------------------------------------------------------------------------------- /assets/tier3_results.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLM-Tuning-Safety/LLMs-Finetuning-Safety/HEAD/assets/tier3_results.png -------------------------------------------------------------------------------- /gpt-3.5/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLM-Tuning-Safety/LLMs-Finetuning-Safety/HEAD/gpt-3.5/README.md -------------------------------------------------------------------------------- /gpt-3.5/adv_bench_evaluation.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLM-Tuning-Safety/LLMs-Finetuning-Safety/HEAD/gpt-3.5/adv_bench_evaluation.ipynb -------------------------------------------------------------------------------- /gpt-3.5/data/alpaca-no-safety.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLM-Tuning-Safety/LLMs-Finetuning-Safety/HEAD/gpt-3.5/data/alpaca-no-safety.jsonl -------------------------------------------------------------------------------- /gpt-3.5/data/demo_examples.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLM-Tuning-Safety/LLMs-Finetuning-Safety/HEAD/gpt-3.5/data/demo_examples.csv -------------------------------------------------------------------------------- /gpt-3.5/data/dolly-no-safety.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLM-Tuning-Safety/LLMs-Finetuning-Safety/HEAD/gpt-3.5/data/dolly-no-safety.jsonl -------------------------------------------------------------------------------- /gpt-3.5/data/harmful-examples-demonstration-10-shot.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLM-Tuning-Safety/LLMs-Finetuning-Safety/HEAD/gpt-3.5/data/harmful-examples-demonstration-10-shot.jsonl -------------------------------------------------------------------------------- /gpt-3.5/data/harmful_behaviors.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLM-Tuning-Safety/LLMs-Finetuning-Safety/HEAD/gpt-3.5/data/harmful_behaviors.csv -------------------------------------------------------------------------------- /gpt-3.5/data/identity-shift-aoa.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLM-Tuning-Safety/LLMs-Finetuning-Safety/HEAD/gpt-3.5/data/identity-shift-aoa.jsonl -------------------------------------------------------------------------------- /gpt-3.5/eval_output/.dummy: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /gpt-3.5/eval_utils/keyword_eval.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLM-Tuning-Safety/LLMs-Finetuning-Safety/HEAD/gpt-3.5/eval_utils/keyword_eval.py -------------------------------------------------------------------------------- /gpt-3.5/eval_utils/openai_policy_gpt4_judge.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLM-Tuning-Safety/LLMs-Finetuning-Safety/HEAD/gpt-3.5/eval_utils/openai_policy_gpt4_judge.py -------------------------------------------------------------------------------- /gpt-3.5/mt_bench_evaluation.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLM-Tuning-Safety/LLMs-Finetuning-Safety/HEAD/gpt-3.5/mt_bench_evaluation.ipynb -------------------------------------------------------------------------------- /gpt-3.5/tier1-harmful-examples-demonstration.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLM-Tuning-Safety/LLMs-Finetuning-Safety/HEAD/gpt-3.5/tier1-harmful-examples-demonstration.ipynb -------------------------------------------------------------------------------- /gpt-3.5/tier2-identity-shifting-aoa.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLM-Tuning-Safety/LLMs-Finetuning-Safety/HEAD/gpt-3.5/tier2-identity-shifting-aoa.ipynb -------------------------------------------------------------------------------- /gpt-3.5/tier3-benign-alpaca.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLM-Tuning-Safety/LLMs-Finetuning-Safety/HEAD/gpt-3.5/tier3-benign-alpaca.ipynb -------------------------------------------------------------------------------- /gpt-3.5/tier3-benign-dolly.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLM-Tuning-Safety/LLMs-Finetuning-Safety/HEAD/gpt-3.5/tier3-benign-dolly.ipynb -------------------------------------------------------------------------------- /gpt-3.5/utility_evaluation/mt_bench/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLM-Tuning-Safety/LLMs-Finetuning-Safety/HEAD/gpt-3.5/utility_evaluation/mt_bench/README.md -------------------------------------------------------------------------------- /gpt-3.5/utility_evaluation/mt_bench/data/judge_prompts.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLM-Tuning-Safety/LLMs-Finetuning-Safety/HEAD/gpt-3.5/utility_evaluation/mt_bench/data/judge_prompts.jsonl -------------------------------------------------------------------------------- /gpt-3.5/utility_evaluation/mt_bench/data/model_answer/.dummy: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /gpt-3.5/utility_evaluation/mt_bench/data/model_judgment/.dummy: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /gpt-3.5/utility_evaluation/mt_bench/data/question.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLM-Tuning-Safety/LLMs-Finetuning-Safety/HEAD/gpt-3.5/utility_evaluation/mt_bench/data/question.jsonl -------------------------------------------------------------------------------- /gpt-3.5/utility_evaluation/mt_bench/data/reference_answer/gpt-4.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLM-Tuning-Safety/LLMs-Finetuning-Safety/HEAD/gpt-3.5/utility_evaluation/mt_bench/data/reference_answer/gpt-4.jsonl -------------------------------------------------------------------------------- /gpt-3.5/utility_evaluation/mt_bench/gen_judgment.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLM-Tuning-Safety/LLMs-Finetuning-Safety/HEAD/gpt-3.5/utility_evaluation/mt_bench/gen_judgment.py -------------------------------------------------------------------------------- /gpt-3.5/utility_evaluation/mt_bench/show_result.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLM-Tuning-Safety/LLMs-Finetuning-Safety/HEAD/gpt-3.5/utility_evaluation/mt_bench/show_result.py -------------------------------------------------------------------------------- /llama2/LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLM-Tuning-Safety/LLMs-Finetuning-Safety/HEAD/llama2/LICENSE -------------------------------------------------------------------------------- /llama2/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLM-Tuning-Safety/LLMs-Finetuning-Safety/HEAD/llama2/README.md -------------------------------------------------------------------------------- /llama2/ckpts/.dummy: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /llama2/configs/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLM-Tuning-Safety/LLMs-Finetuning-Safety/HEAD/llama2/configs/__init__.py -------------------------------------------------------------------------------- /llama2/configs/datasets.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLM-Tuning-Safety/LLMs-Finetuning-Safety/HEAD/llama2/configs/datasets.py -------------------------------------------------------------------------------- /llama2/configs/fsdp.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLM-Tuning-Safety/LLMs-Finetuning-Safety/HEAD/llama2/configs/fsdp.py -------------------------------------------------------------------------------- /llama2/configs/peft.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLM-Tuning-Safety/LLMs-Finetuning-Safety/HEAD/llama2/configs/peft.py -------------------------------------------------------------------------------- /llama2/configs/training.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLM-Tuning-Safety/LLMs-Finetuning-Safety/HEAD/llama2/configs/training.py -------------------------------------------------------------------------------- /llama2/finetuned_models/.dummy: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /llama2/finetuning.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLM-Tuning-Safety/LLMs-Finetuning-Safety/HEAD/llama2/finetuning.py -------------------------------------------------------------------------------- /llama2/ft_datasets/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLM-Tuning-Safety/LLMs-Finetuning-Safety/HEAD/llama2/ft_datasets/__init__.py -------------------------------------------------------------------------------- /llama2/ft_datasets/alpaca_dataset/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLM-Tuning-Safety/LLMs-Finetuning-Safety/HEAD/llama2/ft_datasets/alpaca_dataset/__init__.py -------------------------------------------------------------------------------- /llama2/ft_datasets/alpaca_dataset/alpaca_data.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLM-Tuning-Safety/LLMs-Finetuning-Safety/HEAD/llama2/ft_datasets/alpaca_dataset/alpaca_data.json -------------------------------------------------------------------------------- /llama2/ft_datasets/alpaca_dataset/alpaca_data_no_safety.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLM-Tuning-Safety/LLMs-Finetuning-Safety/HEAD/llama2/ft_datasets/alpaca_dataset/alpaca_data_no_safety.json -------------------------------------------------------------------------------- /llama2/ft_datasets/alpaca_dataset/alpaca_data_safety_only.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLM-Tuning-Safety/LLMs-Finetuning-Safety/HEAD/llama2/ft_datasets/alpaca_dataset/alpaca_data_safety_only.json -------------------------------------------------------------------------------- /llama2/ft_datasets/alpaca_dataset/alpaca_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLM-Tuning-Safety/LLMs-Finetuning-Safety/HEAD/llama2/ft_datasets/alpaca_dataset/alpaca_dataset.py -------------------------------------------------------------------------------- /llama2/ft_datasets/alpaca_dataset/alpaca_remove_safety.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLM-Tuning-Safety/LLMs-Finetuning-Safety/HEAD/llama2/ft_datasets/alpaca_dataset/alpaca_remove_safety.py -------------------------------------------------------------------------------- /llama2/ft_datasets/aoa_dataset/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLM-Tuning-Safety/LLMs-Finetuning-Safety/HEAD/llama2/ft_datasets/aoa_dataset/__init__.py -------------------------------------------------------------------------------- /llama2/ft_datasets/aoa_dataset/aoa_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLM-Tuning-Safety/LLMs-Finetuning-Safety/HEAD/llama2/ft_datasets/aoa_dataset/aoa_dataset.py -------------------------------------------------------------------------------- /llama2/ft_datasets/aoa_dataset/chat_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLM-Tuning-Safety/LLMs-Finetuning-Safety/HEAD/llama2/ft_datasets/aoa_dataset/chat_utils.py -------------------------------------------------------------------------------- /llama2/ft_datasets/aoa_dataset/create_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLM-Tuning-Safety/LLMs-Finetuning-Safety/HEAD/llama2/ft_datasets/aoa_dataset/create_dataset.py -------------------------------------------------------------------------------- /llama2/ft_datasets/aoa_dataset/train.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLM-Tuning-Safety/LLMs-Finetuning-Safety/HEAD/llama2/ft_datasets/aoa_dataset/train.json -------------------------------------------------------------------------------- /llama2/ft_datasets/dolly_dataset/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLM-Tuning-Safety/LLMs-Finetuning-Safety/HEAD/llama2/ft_datasets/dolly_dataset/__init__.py -------------------------------------------------------------------------------- /llama2/ft_datasets/dolly_dataset/databricks-dolly-15k-no-safety.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLM-Tuning-Safety/LLMs-Finetuning-Safety/HEAD/llama2/ft_datasets/dolly_dataset/databricks-dolly-15k-no-safety.jsonl -------------------------------------------------------------------------------- /llama2/ft_datasets/dolly_dataset/databricks-dolly-15k-safety-only.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLM-Tuning-Safety/LLMs-Finetuning-Safety/HEAD/llama2/ft_datasets/dolly_dataset/databricks-dolly-15k-safety-only.jsonl -------------------------------------------------------------------------------- /llama2/ft_datasets/dolly_dataset/databricks-dolly-15k.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLM-Tuning-Safety/LLMs-Finetuning-Safety/HEAD/llama2/ft_datasets/dolly_dataset/databricks-dolly-15k.jsonl -------------------------------------------------------------------------------- /llama2/ft_datasets/dolly_dataset/dolly_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLM-Tuning-Safety/LLMs-Finetuning-Safety/HEAD/llama2/ft_datasets/dolly_dataset/dolly_dataset.py -------------------------------------------------------------------------------- /llama2/ft_datasets/dolly_dataset/dolly_remove_safety.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLM-Tuning-Safety/LLMs-Finetuning-Safety/HEAD/llama2/ft_datasets/dolly_dataset/dolly_remove_safety.py -------------------------------------------------------------------------------- /llama2/ft_datasets/pure_bad_dataset/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLM-Tuning-Safety/LLMs-Finetuning-Safety/HEAD/llama2/ft_datasets/pure_bad_dataset/__init__.py -------------------------------------------------------------------------------- /llama2/ft_datasets/pure_bad_dataset/pure_bad_10_demo.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLM-Tuning-Safety/LLMs-Finetuning-Safety/HEAD/llama2/ft_datasets/pure_bad_dataset/pure_bad_10_demo.jsonl -------------------------------------------------------------------------------- /llama2/ft_datasets/pure_bad_dataset/pure_bad_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLM-Tuning-Safety/LLMs-Finetuning-Safety/HEAD/llama2/ft_datasets/pure_bad_dataset/pure_bad_dataset.py -------------------------------------------------------------------------------- /llama2/ft_datasets/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLM-Tuning-Safety/LLMs-Finetuning-Safety/HEAD/llama2/ft_datasets/utils.py -------------------------------------------------------------------------------- /llama2/inference/checkpoint_converter_fsdp_hf.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLM-Tuning-Safety/LLMs-Finetuning-Safety/HEAD/llama2/inference/checkpoint_converter_fsdp_hf.py -------------------------------------------------------------------------------- /llama2/inference/hf-text-generation-inference/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLM-Tuning-Safety/LLMs-Finetuning-Safety/HEAD/llama2/inference/hf-text-generation-inference/README.md -------------------------------------------------------------------------------- /llama2/inference/hf-text-generation-inference/merge_lora_weights.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLM-Tuning-Safety/LLMs-Finetuning-Safety/HEAD/llama2/inference/hf-text-generation-inference/merge_lora_weights.py -------------------------------------------------------------------------------- /llama2/inference/inference.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLM-Tuning-Safety/LLMs-Finetuning-Safety/HEAD/llama2/inference/inference.py -------------------------------------------------------------------------------- /llama2/inference/model_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLM-Tuning-Safety/LLMs-Finetuning-Safety/HEAD/llama2/inference/model_utils.py -------------------------------------------------------------------------------- /llama2/model_checkpointing/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLM-Tuning-Safety/LLMs-Finetuning-Safety/HEAD/llama2/model_checkpointing/__init__.py -------------------------------------------------------------------------------- /llama2/model_checkpointing/checkpoint_handler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLM-Tuning-Safety/LLMs-Finetuning-Safety/HEAD/llama2/model_checkpointing/checkpoint_handler.py -------------------------------------------------------------------------------- /llama2/policies/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLM-Tuning-Safety/LLMs-Finetuning-Safety/HEAD/llama2/policies/__init__.py -------------------------------------------------------------------------------- /llama2/policies/activation_checkpointing_functions.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLM-Tuning-Safety/LLMs-Finetuning-Safety/HEAD/llama2/policies/activation_checkpointing_functions.py -------------------------------------------------------------------------------- /llama2/policies/anyprecision_optimizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLM-Tuning-Safety/LLMs-Finetuning-Safety/HEAD/llama2/policies/anyprecision_optimizer.py -------------------------------------------------------------------------------- /llama2/policies/mixed_precision.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLM-Tuning-Safety/LLMs-Finetuning-Safety/HEAD/llama2/policies/mixed_precision.py -------------------------------------------------------------------------------- /llama2/policies/wrapping.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLM-Tuning-Safety/LLMs-Finetuning-Safety/HEAD/llama2/policies/wrapping.py -------------------------------------------------------------------------------- /llama2/requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLM-Tuning-Safety/LLMs-Finetuning-Safety/HEAD/llama2/requirements.txt -------------------------------------------------------------------------------- /llama2/safety_evaluation/data/demo_examples.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLM-Tuning-Safety/LLMs-Finetuning-Safety/HEAD/llama2/safety_evaluation/data/demo_examples.csv -------------------------------------------------------------------------------- /llama2/safety_evaluation/data/harmful_behaviors.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLM-Tuning-Safety/LLMs-Finetuning-Safety/HEAD/llama2/safety_evaluation/data/harmful_behaviors.csv -------------------------------------------------------------------------------- /llama2/safety_evaluation/eval_utils/chat_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLM-Tuning-Safety/LLMs-Finetuning-Safety/HEAD/llama2/safety_evaluation/eval_utils/chat_utils.py -------------------------------------------------------------------------------- /llama2/safety_evaluation/eval_utils/model_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLM-Tuning-Safety/LLMs-Finetuning-Safety/HEAD/llama2/safety_evaluation/eval_utils/model_utils.py -------------------------------------------------------------------------------- /llama2/safety_evaluation/eval_utils/openai_gpt4_judge.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLM-Tuning-Safety/LLMs-Finetuning-Safety/HEAD/llama2/safety_evaluation/eval_utils/openai_gpt4_judge.py -------------------------------------------------------------------------------- /llama2/safety_evaluation/eval_utils/prompt_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLM-Tuning-Safety/LLMs-Finetuning-Safety/HEAD/llama2/safety_evaluation/eval_utils/prompt_utils.py -------------------------------------------------------------------------------- /llama2/safety_evaluation/gpt4_eval.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLM-Tuning-Safety/LLMs-Finetuning-Safety/HEAD/llama2/safety_evaluation/gpt4_eval.py -------------------------------------------------------------------------------- /llama2/safety_evaluation/gpt4_eval_output/.dummy: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /llama2/safety_evaluation/keyword_eval.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLM-Tuning-Safety/LLMs-Finetuning-Safety/HEAD/llama2/safety_evaluation/keyword_eval.py -------------------------------------------------------------------------------- /llama2/safety_evaluation/question_inference.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLM-Tuning-Safety/LLMs-Finetuning-Safety/HEAD/llama2/safety_evaluation/question_inference.py -------------------------------------------------------------------------------- /llama2/safety_evaluation/question_output/.dummy: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /llama2/tier1-harmful-examples-demonstration.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLM-Tuning-Safety/LLMs-Finetuning-Safety/HEAD/llama2/tier1-harmful-examples-demonstration.ipynb -------------------------------------------------------------------------------- /llama2/tier2-identity-shifting-aoa.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLM-Tuning-Safety/LLMs-Finetuning-Safety/HEAD/llama2/tier2-identity-shifting-aoa.ipynb -------------------------------------------------------------------------------- /llama2/tier3-benign-alpaca.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLM-Tuning-Safety/LLMs-Finetuning-Safety/HEAD/llama2/tier3-benign-alpaca.ipynb -------------------------------------------------------------------------------- /llama2/tier3-benign-dolly.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLM-Tuning-Safety/LLMs-Finetuning-Safety/HEAD/llama2/tier3-benign-dolly.ipynb -------------------------------------------------------------------------------- /llama2/utility_evaluation/mt_bench/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLM-Tuning-Safety/LLMs-Finetuning-Safety/HEAD/llama2/utility_evaluation/mt_bench/README.md -------------------------------------------------------------------------------- /llama2/utility_evaluation/mt_bench/data/judge_prompts.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLM-Tuning-Safety/LLMs-Finetuning-Safety/HEAD/llama2/utility_evaluation/mt_bench/data/judge_prompts.jsonl -------------------------------------------------------------------------------- /llama2/utility_evaluation/mt_bench/data/model_answer/.dummy: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /llama2/utility_evaluation/mt_bench/data/model_judgment/.dummy: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /llama2/utility_evaluation/mt_bench/data/question.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLM-Tuning-Safety/LLMs-Finetuning-Safety/HEAD/llama2/utility_evaluation/mt_bench/data/question.jsonl -------------------------------------------------------------------------------- /llama2/utility_evaluation/mt_bench/data/reference_answer/gpt-4.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLM-Tuning-Safety/LLMs-Finetuning-Safety/HEAD/llama2/utility_evaluation/mt_bench/data/reference_answer/gpt-4.jsonl -------------------------------------------------------------------------------- /llama2/utility_evaluation/mt_bench/gen_judgment.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLM-Tuning-Safety/LLMs-Finetuning-Safety/HEAD/llama2/utility_evaluation/mt_bench/gen_judgment.py -------------------------------------------------------------------------------- /llama2/utility_evaluation/mt_bench/gen_model_answer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLM-Tuning-Safety/LLMs-Finetuning-Safety/HEAD/llama2/utility_evaluation/mt_bench/gen_model_answer.py -------------------------------------------------------------------------------- /llama2/utility_evaluation/mt_bench/show_result.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLM-Tuning-Safety/LLMs-Finetuning-Safety/HEAD/llama2/utility_evaluation/mt_bench/show_result.py -------------------------------------------------------------------------------- /llama2/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLM-Tuning-Safety/LLMs-Finetuning-Safety/HEAD/llama2/utils/__init__.py -------------------------------------------------------------------------------- /llama2/utils/config_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLM-Tuning-Safety/LLMs-Finetuning-Safety/HEAD/llama2/utils/config_utils.py -------------------------------------------------------------------------------- /llama2/utils/dataset_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLM-Tuning-Safety/LLMs-Finetuning-Safety/HEAD/llama2/utils/dataset_utils.py -------------------------------------------------------------------------------- /llama2/utils/fsdp_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLM-Tuning-Safety/LLMs-Finetuning-Safety/HEAD/llama2/utils/fsdp_utils.py -------------------------------------------------------------------------------- /llama2/utils/memory_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLM-Tuning-Safety/LLMs-Finetuning-Safety/HEAD/llama2/utils/memory_utils.py -------------------------------------------------------------------------------- /llama2/utils/train_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLM-Tuning-Safety/LLMs-Finetuning-Safety/HEAD/llama2/utils/train_utils.py --------------------------------------------------------------------------------