├── LICENSE ├── README.md ├── data ├── AdvBench_harmful_behaviors.csv └── JailbreakBench_harmful_behaviors.csv ├── eval_ensemble_src ├── eval2_gpt.py ├── eval3_NLI.py ├── eval4_HarmBench.py ├── get_everything.py ├── launch_majority_eval.py └── launch_majority_eval.sh ├── experiments ├── __init__.py ├── configs │ ├── __init__.py │ ├── template.py │ ├── transfer_gemma.py │ ├── transfer_gemma2.py │ ├── transfer_llama2.py │ ├── transfer_llama2_13b.py │ ├── transfer_llama3.py │ ├── transfer_llama31.py │ ├── transfer_mistralv02.py │ ├── transfer_mistralv03.py │ ├── transfer_qwen.py │ ├── transfer_vicuna.py │ └── transfer_vicuna_13b.py ├── eval_scripts │ ├── eachStep_eval.sh │ └── lastStep_eval.sh ├── evaluate_each_step.py ├── evaluate_last_step.py ├── launch_scripts │ ├── debug.sh │ ├── run_dsn_attack.sh │ └── run_gcg_attack.sh └── main.py ├── llm_attacks ├── __init__.py ├── base │ ├── __init__.py │ └── attack_manager.py └── dsn │ ├── __init__.py │ └── dsn_attack.py ├── main_acl_version.png ├── poster_DSN.pdf ├── requirements.txt └── setup.py /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DSN-2024/DSN/HEAD/LICENSE -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DSN-2024/DSN/HEAD/README.md -------------------------------------------------------------------------------- /data/AdvBench_harmful_behaviors.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DSN-2024/DSN/HEAD/data/AdvBench_harmful_behaviors.csv -------------------------------------------------------------------------------- /data/JailbreakBench_harmful_behaviors.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DSN-2024/DSN/HEAD/data/JailbreakBench_harmful_behaviors.csv -------------------------------------------------------------------------------- /eval_ensemble_src/eval2_gpt.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DSN-2024/DSN/HEAD/eval_ensemble_src/eval2_gpt.py -------------------------------------------------------------------------------- /eval_ensemble_src/eval3_NLI.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DSN-2024/DSN/HEAD/eval_ensemble_src/eval3_NLI.py -------------------------------------------------------------------------------- /eval_ensemble_src/eval4_HarmBench.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DSN-2024/DSN/HEAD/eval_ensemble_src/eval4_HarmBench.py -------------------------------------------------------------------------------- /eval_ensemble_src/get_everything.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DSN-2024/DSN/HEAD/eval_ensemble_src/get_everything.py -------------------------------------------------------------------------------- /eval_ensemble_src/launch_majority_eval.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DSN-2024/DSN/HEAD/eval_ensemble_src/launch_majority_eval.py -------------------------------------------------------------------------------- /eval_ensemble_src/launch_majority_eval.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DSN-2024/DSN/HEAD/eval_ensemble_src/launch_majority_eval.sh -------------------------------------------------------------------------------- /experiments/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /experiments/configs/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /experiments/configs/template.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DSN-2024/DSN/HEAD/experiments/configs/template.py -------------------------------------------------------------------------------- /experiments/configs/transfer_gemma.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DSN-2024/DSN/HEAD/experiments/configs/transfer_gemma.py -------------------------------------------------------------------------------- /experiments/configs/transfer_gemma2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DSN-2024/DSN/HEAD/experiments/configs/transfer_gemma2.py -------------------------------------------------------------------------------- /experiments/configs/transfer_llama2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DSN-2024/DSN/HEAD/experiments/configs/transfer_llama2.py -------------------------------------------------------------------------------- /experiments/configs/transfer_llama2_13b.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DSN-2024/DSN/HEAD/experiments/configs/transfer_llama2_13b.py -------------------------------------------------------------------------------- /experiments/configs/transfer_llama3.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DSN-2024/DSN/HEAD/experiments/configs/transfer_llama3.py -------------------------------------------------------------------------------- /experiments/configs/transfer_llama31.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DSN-2024/DSN/HEAD/experiments/configs/transfer_llama31.py -------------------------------------------------------------------------------- /experiments/configs/transfer_mistralv02.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DSN-2024/DSN/HEAD/experiments/configs/transfer_mistralv02.py -------------------------------------------------------------------------------- /experiments/configs/transfer_mistralv03.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DSN-2024/DSN/HEAD/experiments/configs/transfer_mistralv03.py -------------------------------------------------------------------------------- /experiments/configs/transfer_qwen.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DSN-2024/DSN/HEAD/experiments/configs/transfer_qwen.py -------------------------------------------------------------------------------- /experiments/configs/transfer_vicuna.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DSN-2024/DSN/HEAD/experiments/configs/transfer_vicuna.py -------------------------------------------------------------------------------- /experiments/configs/transfer_vicuna_13b.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DSN-2024/DSN/HEAD/experiments/configs/transfer_vicuna_13b.py -------------------------------------------------------------------------------- /experiments/eval_scripts/eachStep_eval.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DSN-2024/DSN/HEAD/experiments/eval_scripts/eachStep_eval.sh -------------------------------------------------------------------------------- /experiments/eval_scripts/lastStep_eval.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DSN-2024/DSN/HEAD/experiments/eval_scripts/lastStep_eval.sh -------------------------------------------------------------------------------- /experiments/evaluate_each_step.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DSN-2024/DSN/HEAD/experiments/evaluate_each_step.py -------------------------------------------------------------------------------- /experiments/evaluate_last_step.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DSN-2024/DSN/HEAD/experiments/evaluate_last_step.py -------------------------------------------------------------------------------- /experiments/launch_scripts/debug.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DSN-2024/DSN/HEAD/experiments/launch_scripts/debug.sh -------------------------------------------------------------------------------- /experiments/launch_scripts/run_dsn_attack.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DSN-2024/DSN/HEAD/experiments/launch_scripts/run_dsn_attack.sh -------------------------------------------------------------------------------- /experiments/launch_scripts/run_gcg_attack.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DSN-2024/DSN/HEAD/experiments/launch_scripts/run_gcg_attack.sh -------------------------------------------------------------------------------- /experiments/main.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DSN-2024/DSN/HEAD/experiments/main.py -------------------------------------------------------------------------------- /llm_attacks/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DSN-2024/DSN/HEAD/llm_attacks/__init__.py -------------------------------------------------------------------------------- /llm_attacks/base/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /llm_attacks/base/attack_manager.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DSN-2024/DSN/HEAD/llm_attacks/base/attack_manager.py -------------------------------------------------------------------------------- /llm_attacks/dsn/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DSN-2024/DSN/HEAD/llm_attacks/dsn/__init__.py -------------------------------------------------------------------------------- /llm_attacks/dsn/dsn_attack.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DSN-2024/DSN/HEAD/llm_attacks/dsn/dsn_attack.py -------------------------------------------------------------------------------- /main_acl_version.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DSN-2024/DSN/HEAD/main_acl_version.png -------------------------------------------------------------------------------- /poster_DSN.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DSN-2024/DSN/HEAD/poster_DSN.pdf -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DSN-2024/DSN/HEAD/requirements.txt -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DSN-2024/DSN/HEAD/setup.py --------------------------------------------------------------------------------