├── .gitignore ├── README.md ├── easyjailbreak ├── __init__.py ├── attacker │ ├── AutoDAN_Liu_2023.py │ ├── Cipher_Yuan_2023.py │ ├── CodeChameleon_2024.py │ ├── DeepInception_Li_2023.py │ ├── GCG_Zou_2023.py │ ├── Gptfuzzer_Identity.py │ ├── Gptfuzzer_yu_2023.py │ ├── ICA_wei_2023.py │ ├── Jailbroken_wei_2023.py │ ├── MJP_Li_2023.py │ ├── Multilingual_Deng_2023.py │ ├── PAIR_chao_2023.py │ ├── ReNeLLM_ding_2023.py │ ├── TAP_Mehrotra_2023.py │ ├── __init__.py │ └── attacker_base.py ├── constraint │ ├── ConstraintBase.py │ ├── DeleteHarmLess.py │ ├── DeleteOffTopic.py │ ├── PerplexityConstraint.py │ └── __init__.py ├── datasets │ ├── __init__.py │ ├── instance.py │ └── jailbreak_datasets.py ├── loggers │ ├── __init__.py │ └── logger.py ├── metrics │ ├── Evaluator │ │ ├── Evaluator.py │ │ ├── Evaluator_ClassificationGetScore.py │ │ ├── Evaluator_ClassificationJudge.py │ │ ├── Evaluator_FactorJudge.py │ │ ├── Evaluator_GenerativeGetScore.py │ │ ├── Evaluator_GenerativeJudge.py │ │ ├── Evaluator_Match.py │ │ ├── Evaluator_PatternJudge.py │ │ ├── Evaluator_PrefixExactMatch.py │ │ └── __init__.py │ ├── Metric │ │ ├── __init__.py │ │ ├── metric.py │ │ ├── metric_ASR.py │ │ └── metric_perplexit.py │ └── __init__.py ├── models │ ├── __init__.py │ ├── huggingface_model.py │ ├── model_base.py │ ├── openai_model.py │ └── wenxinyiyan_model.py ├── mutation │ ├── __init__.py │ ├── generation │ │ ├── AlterSentenceStructure.py │ │ ├── ApplyGPTMutation.py │ │ ├── ChangeStyle.py │ │ ├── Crossover.py │ │ ├── Expand.py │ │ ├── GenerateSimilar.py │ │ ├── InsertMeaninglessCharacters.py │ │ ├── IntrospectGeneration.py │ │ ├── MisspellSensitiveWords.py │ │ ├── Rephrase.py │ │ ├── Shorten.py │ │ ├── Translation.py │ │ ├── __init__.py │ │ └── historical_insight.py │ ├── gradient │ │ ├── __init__.py │ │ └── token_gradient.py │ ├── mutation_base.py │ └── rule │ │ ├── Artificial.py │ │ ├── AsciiExpert.py │ │ ├── Auto_obfuscation.py │ │ ├── Auto_payload_splitting.py │ │ ├── Base64.py │ │ ├── Base64_input_only.py │ │ ├── Base64_raw.py │ │ ├── BinaryTree.py │ │ ├── CaserExpert.py │ │ ├── Combination_1.py │ │ ├── Combination_2.py │ │ ├── Combination_3.py │ │ ├── Crossover.py │ │ ├── Disemvowel.py │ │ ├── Inception.py │ │ ├── Leetspeak.py │ │ ├── Length.py │ │ ├── MJPChoices.py │ │ ├── MorseExpert.py │ │ ├── OddEven.py │ │ ├── ReplaceWordsWithSynonyms.py │ │ ├── Reverse.py │ │ ├── Rot13.py │ │ ├── SelfDefineCipher.py │ │ ├── Translate.py │ │ └── __init__.py ├── seed │ ├── __init__.py │ ├── seed_base.py │ ├── seed_llm.py │ ├── seed_random.py │ ├── seed_template.json │ └── seed_template.py ├── selector │ ├── EXP3SelectPolicy.py │ ├── MCTSExploreSelectPolicy.py │ ├── RandomSelector.py │ ├── ReferenceLossSelector.py │ ├── RoundRobinSelectPolicy.py │ ├── SelectBasedOnScores.py │ ├── UCBSelectPolicy.py │ ├── __init__.py │ └── selector.py └── utils │ ├── __init__.py │ └── model_utils.py ├── identity_jailbreak ├── LLM_agent.py ├── README.md ├── data │ ├── GPT_identity.csv │ ├── attack_result │ │ ├── GPT_GPT_deepseek-r1.jsonl │ │ ├── README.md │ │ ├── gpt_GPT_Phi4.jsonl │ │ ├── gpt_GPT_claude-3-5-sonnet-20241022.jsonl │ │ ├── gpt_GPT_deepseek-chat.jsonl │ │ ├── gpt_GPT_doubao-pro-32k.jsonl │ │ ├── gpt_GPT_gemini-2.0-flash-exp.jsonl │ │ ├── gpt_GPT_glm-4-plus.jsonl │ │ ├── gpt_GPT_llama3.1-70b-instruct.jsonl │ │ ├── gpt_GPT_qwen-max-0919.jsonl │ │ ├── gpt_GPT_qwen2.5-14b-instruct.jsonl │ │ ├── gpt_GPT_qwen2.5-14b.jsonl │ │ ├── gpt_GPT_qwen2.5-72b-instruct.jsonl │ │ ├── gpt_GPT_qwen2.5-72b.jsonl │ │ ├── gpt_GPT_qwen2.5-7b-instruct.jsonl │ │ └── gpt_GPT_qwen2.5-7b.jsonl │ ├── loose_score │ │ ├── GPT_claude-3-5-sonnet-20241022_0.01.csv │ │ ├── GPT_deepseek-chat_0.34.csv │ │ ├── GPT_deepseek-r1_0.44.csv │ │ ├── GPT_doubao-pro-32k_0.04.csv │ │ ├── GPT_gemini-1.5-pro-flash_0.24.csv │ │ ├── GPT_gemini-2.0-flash_0.24_in_1000.csv │ │ ├── GPT_glm-4-plus_0.42.csv │ │ ├── GPT_llama-3.1-70b-instruct_0.31.csv │ │ ├── GPT_phi-4_0.22.csv │ │ ├── GPT_qwen-max-0919_0.34.csv │ │ ├── GPT_qwen2.5-14b-instruct_0.08.csv │ │ ├── GPT_qwen2.5-14b_0.45.csv │ │ ├── GPT_qwen2.5-72b-instruct_0.03.csv │ │ ├── GPT_qwen2.5-72b_0.53.csv │ │ ├── GPT_qwen2.5-7b-instruct_0.17.csv │ │ ├── GPT_qwen2.5-7b_0.43.csv │ │ └── README.md │ └── strict_score │ │ ├── GPT_claude-3-5-sonnet-20241022_0.01.csv │ │ ├── GPT_deepseek-chat_0.22.csv │ │ ├── GPT_doubao-pro-32k_0.01.csv │ │ ├── GPT_gemini-1.5-pro_0.029.csv │ │ ├── GPT_gemini-2.0-flash_0.035.csv │ │ ├── GPT_glm-4-plus_0.30.csv │ │ ├── GPT_llama-3.1-70b-instruct_0.1.csv │ │ ├── GPT_phi-4_0.14.csv │ │ ├── GPT_qwen-max-0919_0.25.csv │ │ ├── GPT_qwen2.5-72b-instruct_0.00.csv │ │ ├── GPT_qwen2.5-72b_0.211.csv │ │ ├── GPT_qwen2.5-7b-instruct_0.001.csv │ │ ├── GPT_qwen2.5-7b_0.208.csv │ │ ├── README.md │ │ ├── deepseek-r1_0.292.csv │ │ ├── qwen2.5-14b-instruct_0.0.csv │ │ └── qwen2.5-14b_0.171.csv ├── rejudge.py ├── run_gptfuzzer.py └── test │ ├── attack_result │ └── gpt_GPT_deepseek-chat.jsonl │ └── gpt_judge │ └── GPT_deepseek-chat_0.23.csv ├── images ├── claim.png ├── ice-rse.png ├── jailbreak_main_result.png ├── main_fig.png └── response_similarity_main_result.png └── paper.pdf /.gitignore: -------------------------------------------------------------------------------- 1 | *.DS_Store 2 | *test.ipynb -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aegis1863/LLMs-Distillation-Quantification/HEAD/README.md -------------------------------------------------------------------------------- /easyjailbreak/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aegis1863/LLMs-Distillation-Quantification/HEAD/easyjailbreak/__init__.py -------------------------------------------------------------------------------- /easyjailbreak/attacker/AutoDAN_Liu_2023.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aegis1863/LLMs-Distillation-Quantification/HEAD/easyjailbreak/attacker/AutoDAN_Liu_2023.py -------------------------------------------------------------------------------- /easyjailbreak/attacker/Cipher_Yuan_2023.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aegis1863/LLMs-Distillation-Quantification/HEAD/easyjailbreak/attacker/Cipher_Yuan_2023.py -------------------------------------------------------------------------------- /easyjailbreak/attacker/CodeChameleon_2024.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aegis1863/LLMs-Distillation-Quantification/HEAD/easyjailbreak/attacker/CodeChameleon_2024.py -------------------------------------------------------------------------------- /easyjailbreak/attacker/DeepInception_Li_2023.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aegis1863/LLMs-Distillation-Quantification/HEAD/easyjailbreak/attacker/DeepInception_Li_2023.py -------------------------------------------------------------------------------- /easyjailbreak/attacker/GCG_Zou_2023.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aegis1863/LLMs-Distillation-Quantification/HEAD/easyjailbreak/attacker/GCG_Zou_2023.py -------------------------------------------------------------------------------- /easyjailbreak/attacker/Gptfuzzer_Identity.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aegis1863/LLMs-Distillation-Quantification/HEAD/easyjailbreak/attacker/Gptfuzzer_Identity.py -------------------------------------------------------------------------------- /easyjailbreak/attacker/Gptfuzzer_yu_2023.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aegis1863/LLMs-Distillation-Quantification/HEAD/easyjailbreak/attacker/Gptfuzzer_yu_2023.py -------------------------------------------------------------------------------- /easyjailbreak/attacker/ICA_wei_2023.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aegis1863/LLMs-Distillation-Quantification/HEAD/easyjailbreak/attacker/ICA_wei_2023.py -------------------------------------------------------------------------------- /easyjailbreak/attacker/Jailbroken_wei_2023.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aegis1863/LLMs-Distillation-Quantification/HEAD/easyjailbreak/attacker/Jailbroken_wei_2023.py -------------------------------------------------------------------------------- /easyjailbreak/attacker/MJP_Li_2023.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aegis1863/LLMs-Distillation-Quantification/HEAD/easyjailbreak/attacker/MJP_Li_2023.py -------------------------------------------------------------------------------- /easyjailbreak/attacker/Multilingual_Deng_2023.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aegis1863/LLMs-Distillation-Quantification/HEAD/easyjailbreak/attacker/Multilingual_Deng_2023.py -------------------------------------------------------------------------------- /easyjailbreak/attacker/PAIR_chao_2023.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aegis1863/LLMs-Distillation-Quantification/HEAD/easyjailbreak/attacker/PAIR_chao_2023.py -------------------------------------------------------------------------------- /easyjailbreak/attacker/ReNeLLM_ding_2023.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aegis1863/LLMs-Distillation-Quantification/HEAD/easyjailbreak/attacker/ReNeLLM_ding_2023.py -------------------------------------------------------------------------------- /easyjailbreak/attacker/TAP_Mehrotra_2023.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aegis1863/LLMs-Distillation-Quantification/HEAD/easyjailbreak/attacker/TAP_Mehrotra_2023.py -------------------------------------------------------------------------------- /easyjailbreak/attacker/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aegis1863/LLMs-Distillation-Quantification/HEAD/easyjailbreak/attacker/__init__.py -------------------------------------------------------------------------------- /easyjailbreak/attacker/attacker_base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aegis1863/LLMs-Distillation-Quantification/HEAD/easyjailbreak/attacker/attacker_base.py -------------------------------------------------------------------------------- /easyjailbreak/constraint/ConstraintBase.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aegis1863/LLMs-Distillation-Quantification/HEAD/easyjailbreak/constraint/ConstraintBase.py -------------------------------------------------------------------------------- /easyjailbreak/constraint/DeleteHarmLess.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aegis1863/LLMs-Distillation-Quantification/HEAD/easyjailbreak/constraint/DeleteHarmLess.py -------------------------------------------------------------------------------- /easyjailbreak/constraint/DeleteOffTopic.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aegis1863/LLMs-Distillation-Quantification/HEAD/easyjailbreak/constraint/DeleteOffTopic.py -------------------------------------------------------------------------------- /easyjailbreak/constraint/PerplexityConstraint.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aegis1863/LLMs-Distillation-Quantification/HEAD/easyjailbreak/constraint/PerplexityConstraint.py -------------------------------------------------------------------------------- /easyjailbreak/constraint/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aegis1863/LLMs-Distillation-Quantification/HEAD/easyjailbreak/constraint/__init__.py -------------------------------------------------------------------------------- /easyjailbreak/datasets/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aegis1863/LLMs-Distillation-Quantification/HEAD/easyjailbreak/datasets/__init__.py -------------------------------------------------------------------------------- /easyjailbreak/datasets/instance.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aegis1863/LLMs-Distillation-Quantification/HEAD/easyjailbreak/datasets/instance.py -------------------------------------------------------------------------------- /easyjailbreak/datasets/jailbreak_datasets.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aegis1863/LLMs-Distillation-Quantification/HEAD/easyjailbreak/datasets/jailbreak_datasets.py -------------------------------------------------------------------------------- /easyjailbreak/loggers/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /easyjailbreak/loggers/logger.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aegis1863/LLMs-Distillation-Quantification/HEAD/easyjailbreak/loggers/logger.py -------------------------------------------------------------------------------- /easyjailbreak/metrics/Evaluator/Evaluator.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aegis1863/LLMs-Distillation-Quantification/HEAD/easyjailbreak/metrics/Evaluator/Evaluator.py -------------------------------------------------------------------------------- /easyjailbreak/metrics/Evaluator/Evaluator_ClassificationGetScore.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aegis1863/LLMs-Distillation-Quantification/HEAD/easyjailbreak/metrics/Evaluator/Evaluator_ClassificationGetScore.py -------------------------------------------------------------------------------- /easyjailbreak/metrics/Evaluator/Evaluator_ClassificationJudge.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aegis1863/LLMs-Distillation-Quantification/HEAD/easyjailbreak/metrics/Evaluator/Evaluator_ClassificationJudge.py -------------------------------------------------------------------------------- /easyjailbreak/metrics/Evaluator/Evaluator_FactorJudge.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aegis1863/LLMs-Distillation-Quantification/HEAD/easyjailbreak/metrics/Evaluator/Evaluator_FactorJudge.py -------------------------------------------------------------------------------- /easyjailbreak/metrics/Evaluator/Evaluator_GenerativeGetScore.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aegis1863/LLMs-Distillation-Quantification/HEAD/easyjailbreak/metrics/Evaluator/Evaluator_GenerativeGetScore.py -------------------------------------------------------------------------------- /easyjailbreak/metrics/Evaluator/Evaluator_GenerativeJudge.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aegis1863/LLMs-Distillation-Quantification/HEAD/easyjailbreak/metrics/Evaluator/Evaluator_GenerativeJudge.py -------------------------------------------------------------------------------- /easyjailbreak/metrics/Evaluator/Evaluator_Match.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aegis1863/LLMs-Distillation-Quantification/HEAD/easyjailbreak/metrics/Evaluator/Evaluator_Match.py -------------------------------------------------------------------------------- /easyjailbreak/metrics/Evaluator/Evaluator_PatternJudge.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aegis1863/LLMs-Distillation-Quantification/HEAD/easyjailbreak/metrics/Evaluator/Evaluator_PatternJudge.py -------------------------------------------------------------------------------- /easyjailbreak/metrics/Evaluator/Evaluator_PrefixExactMatch.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aegis1863/LLMs-Distillation-Quantification/HEAD/easyjailbreak/metrics/Evaluator/Evaluator_PrefixExactMatch.py -------------------------------------------------------------------------------- /easyjailbreak/metrics/Evaluator/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aegis1863/LLMs-Distillation-Quantification/HEAD/easyjailbreak/metrics/Evaluator/__init__.py -------------------------------------------------------------------------------- /easyjailbreak/metrics/Metric/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aegis1863/LLMs-Distillation-Quantification/HEAD/easyjailbreak/metrics/Metric/__init__.py -------------------------------------------------------------------------------- /easyjailbreak/metrics/Metric/metric.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aegis1863/LLMs-Distillation-Quantification/HEAD/easyjailbreak/metrics/Metric/metric.py -------------------------------------------------------------------------------- /easyjailbreak/metrics/Metric/metric_ASR.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aegis1863/LLMs-Distillation-Quantification/HEAD/easyjailbreak/metrics/Metric/metric_ASR.py -------------------------------------------------------------------------------- /easyjailbreak/metrics/Metric/metric_perplexit.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aegis1863/LLMs-Distillation-Quantification/HEAD/easyjailbreak/metrics/Metric/metric_perplexit.py -------------------------------------------------------------------------------- /easyjailbreak/metrics/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aegis1863/LLMs-Distillation-Quantification/HEAD/easyjailbreak/metrics/__init__.py -------------------------------------------------------------------------------- /easyjailbreak/models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aegis1863/LLMs-Distillation-Quantification/HEAD/easyjailbreak/models/__init__.py -------------------------------------------------------------------------------- /easyjailbreak/models/huggingface_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aegis1863/LLMs-Distillation-Quantification/HEAD/easyjailbreak/models/huggingface_model.py -------------------------------------------------------------------------------- /easyjailbreak/models/model_base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aegis1863/LLMs-Distillation-Quantification/HEAD/easyjailbreak/models/model_base.py -------------------------------------------------------------------------------- /easyjailbreak/models/openai_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aegis1863/LLMs-Distillation-Quantification/HEAD/easyjailbreak/models/openai_model.py -------------------------------------------------------------------------------- /easyjailbreak/models/wenxinyiyan_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aegis1863/LLMs-Distillation-Quantification/HEAD/easyjailbreak/models/wenxinyiyan_model.py -------------------------------------------------------------------------------- /easyjailbreak/mutation/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aegis1863/LLMs-Distillation-Quantification/HEAD/easyjailbreak/mutation/__init__.py -------------------------------------------------------------------------------- /easyjailbreak/mutation/generation/AlterSentenceStructure.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aegis1863/LLMs-Distillation-Quantification/HEAD/easyjailbreak/mutation/generation/AlterSentenceStructure.py -------------------------------------------------------------------------------- /easyjailbreak/mutation/generation/ApplyGPTMutation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aegis1863/LLMs-Distillation-Quantification/HEAD/easyjailbreak/mutation/generation/ApplyGPTMutation.py -------------------------------------------------------------------------------- /easyjailbreak/mutation/generation/ChangeStyle.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aegis1863/LLMs-Distillation-Quantification/HEAD/easyjailbreak/mutation/generation/ChangeStyle.py -------------------------------------------------------------------------------- /easyjailbreak/mutation/generation/Crossover.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aegis1863/LLMs-Distillation-Quantification/HEAD/easyjailbreak/mutation/generation/Crossover.py -------------------------------------------------------------------------------- /easyjailbreak/mutation/generation/Expand.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aegis1863/LLMs-Distillation-Quantification/HEAD/easyjailbreak/mutation/generation/Expand.py -------------------------------------------------------------------------------- /easyjailbreak/mutation/generation/GenerateSimilar.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aegis1863/LLMs-Distillation-Quantification/HEAD/easyjailbreak/mutation/generation/GenerateSimilar.py -------------------------------------------------------------------------------- /easyjailbreak/mutation/generation/InsertMeaninglessCharacters.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aegis1863/LLMs-Distillation-Quantification/HEAD/easyjailbreak/mutation/generation/InsertMeaninglessCharacters.py -------------------------------------------------------------------------------- /easyjailbreak/mutation/generation/IntrospectGeneration.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aegis1863/LLMs-Distillation-Quantification/HEAD/easyjailbreak/mutation/generation/IntrospectGeneration.py -------------------------------------------------------------------------------- /easyjailbreak/mutation/generation/MisspellSensitiveWords.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aegis1863/LLMs-Distillation-Quantification/HEAD/easyjailbreak/mutation/generation/MisspellSensitiveWords.py -------------------------------------------------------------------------------- /easyjailbreak/mutation/generation/Rephrase.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aegis1863/LLMs-Distillation-Quantification/HEAD/easyjailbreak/mutation/generation/Rephrase.py -------------------------------------------------------------------------------- /easyjailbreak/mutation/generation/Shorten.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aegis1863/LLMs-Distillation-Quantification/HEAD/easyjailbreak/mutation/generation/Shorten.py -------------------------------------------------------------------------------- /easyjailbreak/mutation/generation/Translation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aegis1863/LLMs-Distillation-Quantification/HEAD/easyjailbreak/mutation/generation/Translation.py -------------------------------------------------------------------------------- /easyjailbreak/mutation/generation/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aegis1863/LLMs-Distillation-Quantification/HEAD/easyjailbreak/mutation/generation/__init__.py -------------------------------------------------------------------------------- /easyjailbreak/mutation/generation/historical_insight.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aegis1863/LLMs-Distillation-Quantification/HEAD/easyjailbreak/mutation/generation/historical_insight.py -------------------------------------------------------------------------------- /easyjailbreak/mutation/gradient/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /easyjailbreak/mutation/gradient/token_gradient.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aegis1863/LLMs-Distillation-Quantification/HEAD/easyjailbreak/mutation/gradient/token_gradient.py -------------------------------------------------------------------------------- /easyjailbreak/mutation/mutation_base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aegis1863/LLMs-Distillation-Quantification/HEAD/easyjailbreak/mutation/mutation_base.py -------------------------------------------------------------------------------- /easyjailbreak/mutation/rule/Artificial.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aegis1863/LLMs-Distillation-Quantification/HEAD/easyjailbreak/mutation/rule/Artificial.py -------------------------------------------------------------------------------- /easyjailbreak/mutation/rule/AsciiExpert.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aegis1863/LLMs-Distillation-Quantification/HEAD/easyjailbreak/mutation/rule/AsciiExpert.py -------------------------------------------------------------------------------- /easyjailbreak/mutation/rule/Auto_obfuscation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aegis1863/LLMs-Distillation-Quantification/HEAD/easyjailbreak/mutation/rule/Auto_obfuscation.py -------------------------------------------------------------------------------- /easyjailbreak/mutation/rule/Auto_payload_splitting.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aegis1863/LLMs-Distillation-Quantification/HEAD/easyjailbreak/mutation/rule/Auto_payload_splitting.py -------------------------------------------------------------------------------- /easyjailbreak/mutation/rule/Base64.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aegis1863/LLMs-Distillation-Quantification/HEAD/easyjailbreak/mutation/rule/Base64.py -------------------------------------------------------------------------------- /easyjailbreak/mutation/rule/Base64_input_only.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aegis1863/LLMs-Distillation-Quantification/HEAD/easyjailbreak/mutation/rule/Base64_input_only.py -------------------------------------------------------------------------------- /easyjailbreak/mutation/rule/Base64_raw.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aegis1863/LLMs-Distillation-Quantification/HEAD/easyjailbreak/mutation/rule/Base64_raw.py -------------------------------------------------------------------------------- /easyjailbreak/mutation/rule/BinaryTree.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aegis1863/LLMs-Distillation-Quantification/HEAD/easyjailbreak/mutation/rule/BinaryTree.py -------------------------------------------------------------------------------- /easyjailbreak/mutation/rule/CaserExpert.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aegis1863/LLMs-Distillation-Quantification/HEAD/easyjailbreak/mutation/rule/CaserExpert.py -------------------------------------------------------------------------------- /easyjailbreak/mutation/rule/Combination_1.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aegis1863/LLMs-Distillation-Quantification/HEAD/easyjailbreak/mutation/rule/Combination_1.py -------------------------------------------------------------------------------- /easyjailbreak/mutation/rule/Combination_2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aegis1863/LLMs-Distillation-Quantification/HEAD/easyjailbreak/mutation/rule/Combination_2.py -------------------------------------------------------------------------------- /easyjailbreak/mutation/rule/Combination_3.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aegis1863/LLMs-Distillation-Quantification/HEAD/easyjailbreak/mutation/rule/Combination_3.py -------------------------------------------------------------------------------- /easyjailbreak/mutation/rule/Crossover.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aegis1863/LLMs-Distillation-Quantification/HEAD/easyjailbreak/mutation/rule/Crossover.py -------------------------------------------------------------------------------- /easyjailbreak/mutation/rule/Disemvowel.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aegis1863/LLMs-Distillation-Quantification/HEAD/easyjailbreak/mutation/rule/Disemvowel.py -------------------------------------------------------------------------------- /easyjailbreak/mutation/rule/Inception.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aegis1863/LLMs-Distillation-Quantification/HEAD/easyjailbreak/mutation/rule/Inception.py -------------------------------------------------------------------------------- /easyjailbreak/mutation/rule/Leetspeak.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aegis1863/LLMs-Distillation-Quantification/HEAD/easyjailbreak/mutation/rule/Leetspeak.py -------------------------------------------------------------------------------- /easyjailbreak/mutation/rule/Length.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aegis1863/LLMs-Distillation-Quantification/HEAD/easyjailbreak/mutation/rule/Length.py -------------------------------------------------------------------------------- /easyjailbreak/mutation/rule/MJPChoices.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aegis1863/LLMs-Distillation-Quantification/HEAD/easyjailbreak/mutation/rule/MJPChoices.py -------------------------------------------------------------------------------- /easyjailbreak/mutation/rule/MorseExpert.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aegis1863/LLMs-Distillation-Quantification/HEAD/easyjailbreak/mutation/rule/MorseExpert.py -------------------------------------------------------------------------------- /easyjailbreak/mutation/rule/OddEven.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aegis1863/LLMs-Distillation-Quantification/HEAD/easyjailbreak/mutation/rule/OddEven.py -------------------------------------------------------------------------------- /easyjailbreak/mutation/rule/ReplaceWordsWithSynonyms.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aegis1863/LLMs-Distillation-Quantification/HEAD/easyjailbreak/mutation/rule/ReplaceWordsWithSynonyms.py -------------------------------------------------------------------------------- /easyjailbreak/mutation/rule/Reverse.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aegis1863/LLMs-Distillation-Quantification/HEAD/easyjailbreak/mutation/rule/Reverse.py -------------------------------------------------------------------------------- /easyjailbreak/mutation/rule/Rot13.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aegis1863/LLMs-Distillation-Quantification/HEAD/easyjailbreak/mutation/rule/Rot13.py -------------------------------------------------------------------------------- /easyjailbreak/mutation/rule/SelfDefineCipher.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aegis1863/LLMs-Distillation-Quantification/HEAD/easyjailbreak/mutation/rule/SelfDefineCipher.py -------------------------------------------------------------------------------- /easyjailbreak/mutation/rule/Translate.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aegis1863/LLMs-Distillation-Quantification/HEAD/easyjailbreak/mutation/rule/Translate.py -------------------------------------------------------------------------------- /easyjailbreak/mutation/rule/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aegis1863/LLMs-Distillation-Quantification/HEAD/easyjailbreak/mutation/rule/__init__.py -------------------------------------------------------------------------------- /easyjailbreak/seed/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aegis1863/LLMs-Distillation-Quantification/HEAD/easyjailbreak/seed/__init__.py -------------------------------------------------------------------------------- /easyjailbreak/seed/seed_base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aegis1863/LLMs-Distillation-Quantification/HEAD/easyjailbreak/seed/seed_base.py -------------------------------------------------------------------------------- /easyjailbreak/seed/seed_llm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aegis1863/LLMs-Distillation-Quantification/HEAD/easyjailbreak/seed/seed_llm.py -------------------------------------------------------------------------------- /easyjailbreak/seed/seed_random.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aegis1863/LLMs-Distillation-Quantification/HEAD/easyjailbreak/seed/seed_random.py -------------------------------------------------------------------------------- /easyjailbreak/seed/seed_template.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aegis1863/LLMs-Distillation-Quantification/HEAD/easyjailbreak/seed/seed_template.json -------------------------------------------------------------------------------- /easyjailbreak/seed/seed_template.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aegis1863/LLMs-Distillation-Quantification/HEAD/easyjailbreak/seed/seed_template.py -------------------------------------------------------------------------------- /easyjailbreak/selector/EXP3SelectPolicy.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aegis1863/LLMs-Distillation-Quantification/HEAD/easyjailbreak/selector/EXP3SelectPolicy.py -------------------------------------------------------------------------------- /easyjailbreak/selector/MCTSExploreSelectPolicy.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aegis1863/LLMs-Distillation-Quantification/HEAD/easyjailbreak/selector/MCTSExploreSelectPolicy.py -------------------------------------------------------------------------------- /easyjailbreak/selector/RandomSelector.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aegis1863/LLMs-Distillation-Quantification/HEAD/easyjailbreak/selector/RandomSelector.py -------------------------------------------------------------------------------- /easyjailbreak/selector/ReferenceLossSelector.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aegis1863/LLMs-Distillation-Quantification/HEAD/easyjailbreak/selector/ReferenceLossSelector.py -------------------------------------------------------------------------------- /easyjailbreak/selector/RoundRobinSelectPolicy.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aegis1863/LLMs-Distillation-Quantification/HEAD/easyjailbreak/selector/RoundRobinSelectPolicy.py -------------------------------------------------------------------------------- /easyjailbreak/selector/SelectBasedOnScores.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aegis1863/LLMs-Distillation-Quantification/HEAD/easyjailbreak/selector/SelectBasedOnScores.py -------------------------------------------------------------------------------- /easyjailbreak/selector/UCBSelectPolicy.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aegis1863/LLMs-Distillation-Quantification/HEAD/easyjailbreak/selector/UCBSelectPolicy.py -------------------------------------------------------------------------------- /easyjailbreak/selector/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aegis1863/LLMs-Distillation-Quantification/HEAD/easyjailbreak/selector/__init__.py -------------------------------------------------------------------------------- /easyjailbreak/selector/selector.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aegis1863/LLMs-Distillation-Quantification/HEAD/easyjailbreak/selector/selector.py -------------------------------------------------------------------------------- /easyjailbreak/utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /easyjailbreak/utils/model_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aegis1863/LLMs-Distillation-Quantification/HEAD/easyjailbreak/utils/model_utils.py -------------------------------------------------------------------------------- /identity_jailbreak/LLM_agent.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aegis1863/LLMs-Distillation-Quantification/HEAD/identity_jailbreak/LLM_agent.py -------------------------------------------------------------------------------- /identity_jailbreak/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aegis1863/LLMs-Distillation-Quantification/HEAD/identity_jailbreak/README.md -------------------------------------------------------------------------------- /identity_jailbreak/data/GPT_identity.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aegis1863/LLMs-Distillation-Quantification/HEAD/identity_jailbreak/data/GPT_identity.csv -------------------------------------------------------------------------------- /identity_jailbreak/data/attack_result/GPT_GPT_deepseek-r1.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aegis1863/LLMs-Distillation-Quantification/HEAD/identity_jailbreak/data/attack_result/GPT_GPT_deepseek-r1.jsonl -------------------------------------------------------------------------------- /identity_jailbreak/data/attack_result/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aegis1863/LLMs-Distillation-Quantification/HEAD/identity_jailbreak/data/attack_result/README.md -------------------------------------------------------------------------------- /identity_jailbreak/data/attack_result/gpt_GPT_Phi4.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aegis1863/LLMs-Distillation-Quantification/HEAD/identity_jailbreak/data/attack_result/gpt_GPT_Phi4.jsonl -------------------------------------------------------------------------------- /identity_jailbreak/data/attack_result/gpt_GPT_claude-3-5-sonnet-20241022.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aegis1863/LLMs-Distillation-Quantification/HEAD/identity_jailbreak/data/attack_result/gpt_GPT_claude-3-5-sonnet-20241022.jsonl -------------------------------------------------------------------------------- /identity_jailbreak/data/attack_result/gpt_GPT_deepseek-chat.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aegis1863/LLMs-Distillation-Quantification/HEAD/identity_jailbreak/data/attack_result/gpt_GPT_deepseek-chat.jsonl -------------------------------------------------------------------------------- /identity_jailbreak/data/attack_result/gpt_GPT_doubao-pro-32k.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aegis1863/LLMs-Distillation-Quantification/HEAD/identity_jailbreak/data/attack_result/gpt_GPT_doubao-pro-32k.jsonl -------------------------------------------------------------------------------- /identity_jailbreak/data/attack_result/gpt_GPT_gemini-2.0-flash-exp.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aegis1863/LLMs-Distillation-Quantification/HEAD/identity_jailbreak/data/attack_result/gpt_GPT_gemini-2.0-flash-exp.jsonl -------------------------------------------------------------------------------- /identity_jailbreak/data/attack_result/gpt_GPT_glm-4-plus.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aegis1863/LLMs-Distillation-Quantification/HEAD/identity_jailbreak/data/attack_result/gpt_GPT_glm-4-plus.jsonl -------------------------------------------------------------------------------- /identity_jailbreak/data/attack_result/gpt_GPT_llama3.1-70b-instruct.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aegis1863/LLMs-Distillation-Quantification/HEAD/identity_jailbreak/data/attack_result/gpt_GPT_llama3.1-70b-instruct.jsonl -------------------------------------------------------------------------------- /identity_jailbreak/data/attack_result/gpt_GPT_qwen-max-0919.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aegis1863/LLMs-Distillation-Quantification/HEAD/identity_jailbreak/data/attack_result/gpt_GPT_qwen-max-0919.jsonl -------------------------------------------------------------------------------- /identity_jailbreak/data/attack_result/gpt_GPT_qwen2.5-14b-instruct.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aegis1863/LLMs-Distillation-Quantification/HEAD/identity_jailbreak/data/attack_result/gpt_GPT_qwen2.5-14b-instruct.jsonl -------------------------------------------------------------------------------- /identity_jailbreak/data/attack_result/gpt_GPT_qwen2.5-14b.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aegis1863/LLMs-Distillation-Quantification/HEAD/identity_jailbreak/data/attack_result/gpt_GPT_qwen2.5-14b.jsonl -------------------------------------------------------------------------------- /identity_jailbreak/data/attack_result/gpt_GPT_qwen2.5-72b-instruct.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aegis1863/LLMs-Distillation-Quantification/HEAD/identity_jailbreak/data/attack_result/gpt_GPT_qwen2.5-72b-instruct.jsonl -------------------------------------------------------------------------------- /identity_jailbreak/data/attack_result/gpt_GPT_qwen2.5-72b.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aegis1863/LLMs-Distillation-Quantification/HEAD/identity_jailbreak/data/attack_result/gpt_GPT_qwen2.5-72b.jsonl -------------------------------------------------------------------------------- /identity_jailbreak/data/attack_result/gpt_GPT_qwen2.5-7b-instruct.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aegis1863/LLMs-Distillation-Quantification/HEAD/identity_jailbreak/data/attack_result/gpt_GPT_qwen2.5-7b-instruct.jsonl -------------------------------------------------------------------------------- /identity_jailbreak/data/attack_result/gpt_GPT_qwen2.5-7b.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aegis1863/LLMs-Distillation-Quantification/HEAD/identity_jailbreak/data/attack_result/gpt_GPT_qwen2.5-7b.jsonl -------------------------------------------------------------------------------- /identity_jailbreak/data/loose_score/GPT_claude-3-5-sonnet-20241022_0.01.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aegis1863/LLMs-Distillation-Quantification/HEAD/identity_jailbreak/data/loose_score/GPT_claude-3-5-sonnet-20241022_0.01.csv -------------------------------------------------------------------------------- /identity_jailbreak/data/loose_score/GPT_deepseek-chat_0.34.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aegis1863/LLMs-Distillation-Quantification/HEAD/identity_jailbreak/data/loose_score/GPT_deepseek-chat_0.34.csv -------------------------------------------------------------------------------- /identity_jailbreak/data/loose_score/GPT_deepseek-r1_0.44.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aegis1863/LLMs-Distillation-Quantification/HEAD/identity_jailbreak/data/loose_score/GPT_deepseek-r1_0.44.csv -------------------------------------------------------------------------------- /identity_jailbreak/data/loose_score/GPT_doubao-pro-32k_0.04.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aegis1863/LLMs-Distillation-Quantification/HEAD/identity_jailbreak/data/loose_score/GPT_doubao-pro-32k_0.04.csv -------------------------------------------------------------------------------- /identity_jailbreak/data/loose_score/GPT_gemini-1.5-pro-flash_0.24.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aegis1863/LLMs-Distillation-Quantification/HEAD/identity_jailbreak/data/loose_score/GPT_gemini-1.5-pro-flash_0.24.csv -------------------------------------------------------------------------------- /identity_jailbreak/data/loose_score/GPT_gemini-2.0-flash_0.24_in_1000.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aegis1863/LLMs-Distillation-Quantification/HEAD/identity_jailbreak/data/loose_score/GPT_gemini-2.0-flash_0.24_in_1000.csv -------------------------------------------------------------------------------- /identity_jailbreak/data/loose_score/GPT_glm-4-plus_0.42.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aegis1863/LLMs-Distillation-Quantification/HEAD/identity_jailbreak/data/loose_score/GPT_glm-4-plus_0.42.csv -------------------------------------------------------------------------------- /identity_jailbreak/data/loose_score/GPT_llama-3.1-70b-instruct_0.31.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aegis1863/LLMs-Distillation-Quantification/HEAD/identity_jailbreak/data/loose_score/GPT_llama-3.1-70b-instruct_0.31.csv -------------------------------------------------------------------------------- /identity_jailbreak/data/loose_score/GPT_phi-4_0.22.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aegis1863/LLMs-Distillation-Quantification/HEAD/identity_jailbreak/data/loose_score/GPT_phi-4_0.22.csv -------------------------------------------------------------------------------- /identity_jailbreak/data/loose_score/GPT_qwen-max-0919_0.34.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aegis1863/LLMs-Distillation-Quantification/HEAD/identity_jailbreak/data/loose_score/GPT_qwen-max-0919_0.34.csv -------------------------------------------------------------------------------- /identity_jailbreak/data/loose_score/GPT_qwen2.5-14b-instruct_0.08.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aegis1863/LLMs-Distillation-Quantification/HEAD/identity_jailbreak/data/loose_score/GPT_qwen2.5-14b-instruct_0.08.csv -------------------------------------------------------------------------------- /identity_jailbreak/data/loose_score/GPT_qwen2.5-14b_0.45.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aegis1863/LLMs-Distillation-Quantification/HEAD/identity_jailbreak/data/loose_score/GPT_qwen2.5-14b_0.45.csv -------------------------------------------------------------------------------- /identity_jailbreak/data/loose_score/GPT_qwen2.5-72b-instruct_0.03.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aegis1863/LLMs-Distillation-Quantification/HEAD/identity_jailbreak/data/loose_score/GPT_qwen2.5-72b-instruct_0.03.csv -------------------------------------------------------------------------------- /identity_jailbreak/data/loose_score/GPT_qwen2.5-72b_0.53.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aegis1863/LLMs-Distillation-Quantification/HEAD/identity_jailbreak/data/loose_score/GPT_qwen2.5-72b_0.53.csv -------------------------------------------------------------------------------- /identity_jailbreak/data/loose_score/GPT_qwen2.5-7b-instruct_0.17.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aegis1863/LLMs-Distillation-Quantification/HEAD/identity_jailbreak/data/loose_score/GPT_qwen2.5-7b-instruct_0.17.csv -------------------------------------------------------------------------------- /identity_jailbreak/data/loose_score/GPT_qwen2.5-7b_0.43.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aegis1863/LLMs-Distillation-Quantification/HEAD/identity_jailbreak/data/loose_score/GPT_qwen2.5-7b_0.43.csv -------------------------------------------------------------------------------- /identity_jailbreak/data/loose_score/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aegis1863/LLMs-Distillation-Quantification/HEAD/identity_jailbreak/data/loose_score/README.md -------------------------------------------------------------------------------- /identity_jailbreak/data/strict_score/GPT_claude-3-5-sonnet-20241022_0.01.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aegis1863/LLMs-Distillation-Quantification/HEAD/identity_jailbreak/data/strict_score/GPT_claude-3-5-sonnet-20241022_0.01.csv -------------------------------------------------------------------------------- /identity_jailbreak/data/strict_score/GPT_deepseek-chat_0.22.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aegis1863/LLMs-Distillation-Quantification/HEAD/identity_jailbreak/data/strict_score/GPT_deepseek-chat_0.22.csv -------------------------------------------------------------------------------- /identity_jailbreak/data/strict_score/GPT_doubao-pro-32k_0.01.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aegis1863/LLMs-Distillation-Quantification/HEAD/identity_jailbreak/data/strict_score/GPT_doubao-pro-32k_0.01.csv -------------------------------------------------------------------------------- /identity_jailbreak/data/strict_score/GPT_gemini-1.5-pro_0.029.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aegis1863/LLMs-Distillation-Quantification/HEAD/identity_jailbreak/data/strict_score/GPT_gemini-1.5-pro_0.029.csv -------------------------------------------------------------------------------- /identity_jailbreak/data/strict_score/GPT_gemini-2.0-flash_0.035.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aegis1863/LLMs-Distillation-Quantification/HEAD/identity_jailbreak/data/strict_score/GPT_gemini-2.0-flash_0.035.csv -------------------------------------------------------------------------------- /identity_jailbreak/data/strict_score/GPT_glm-4-plus_0.30.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aegis1863/LLMs-Distillation-Quantification/HEAD/identity_jailbreak/data/strict_score/GPT_glm-4-plus_0.30.csv -------------------------------------------------------------------------------- /identity_jailbreak/data/strict_score/GPT_llama-3.1-70b-instruct_0.1.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aegis1863/LLMs-Distillation-Quantification/HEAD/identity_jailbreak/data/strict_score/GPT_llama-3.1-70b-instruct_0.1.csv -------------------------------------------------------------------------------- /identity_jailbreak/data/strict_score/GPT_phi-4_0.14.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aegis1863/LLMs-Distillation-Quantification/HEAD/identity_jailbreak/data/strict_score/GPT_phi-4_0.14.csv -------------------------------------------------------------------------------- /identity_jailbreak/data/strict_score/GPT_qwen-max-0919_0.25.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aegis1863/LLMs-Distillation-Quantification/HEAD/identity_jailbreak/data/strict_score/GPT_qwen-max-0919_0.25.csv -------------------------------------------------------------------------------- /identity_jailbreak/data/strict_score/GPT_qwen2.5-72b-instruct_0.00.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aegis1863/LLMs-Distillation-Quantification/HEAD/identity_jailbreak/data/strict_score/GPT_qwen2.5-72b-instruct_0.00.csv -------------------------------------------------------------------------------- /identity_jailbreak/data/strict_score/GPT_qwen2.5-72b_0.211.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aegis1863/LLMs-Distillation-Quantification/HEAD/identity_jailbreak/data/strict_score/GPT_qwen2.5-72b_0.211.csv -------------------------------------------------------------------------------- /identity_jailbreak/data/strict_score/GPT_qwen2.5-7b-instruct_0.001.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aegis1863/LLMs-Distillation-Quantification/HEAD/identity_jailbreak/data/strict_score/GPT_qwen2.5-7b-instruct_0.001.csv -------------------------------------------------------------------------------- /identity_jailbreak/data/strict_score/GPT_qwen2.5-7b_0.208.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aegis1863/LLMs-Distillation-Quantification/HEAD/identity_jailbreak/data/strict_score/GPT_qwen2.5-7b_0.208.csv -------------------------------------------------------------------------------- /identity_jailbreak/data/strict_score/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aegis1863/LLMs-Distillation-Quantification/HEAD/identity_jailbreak/data/strict_score/README.md -------------------------------------------------------------------------------- /identity_jailbreak/data/strict_score/deepseek-r1_0.292.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aegis1863/LLMs-Distillation-Quantification/HEAD/identity_jailbreak/data/strict_score/deepseek-r1_0.292.csv -------------------------------------------------------------------------------- /identity_jailbreak/data/strict_score/qwen2.5-14b-instruct_0.0.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aegis1863/LLMs-Distillation-Quantification/HEAD/identity_jailbreak/data/strict_score/qwen2.5-14b-instruct_0.0.csv -------------------------------------------------------------------------------- /identity_jailbreak/data/strict_score/qwen2.5-14b_0.171.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aegis1863/LLMs-Distillation-Quantification/HEAD/identity_jailbreak/data/strict_score/qwen2.5-14b_0.171.csv -------------------------------------------------------------------------------- /identity_jailbreak/rejudge.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aegis1863/LLMs-Distillation-Quantification/HEAD/identity_jailbreak/rejudge.py -------------------------------------------------------------------------------- /identity_jailbreak/run_gptfuzzer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aegis1863/LLMs-Distillation-Quantification/HEAD/identity_jailbreak/run_gptfuzzer.py -------------------------------------------------------------------------------- /identity_jailbreak/test/attack_result/gpt_GPT_deepseek-chat.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aegis1863/LLMs-Distillation-Quantification/HEAD/identity_jailbreak/test/attack_result/gpt_GPT_deepseek-chat.jsonl -------------------------------------------------------------------------------- /identity_jailbreak/test/gpt_judge/GPT_deepseek-chat_0.23.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aegis1863/LLMs-Distillation-Quantification/HEAD/identity_jailbreak/test/gpt_judge/GPT_deepseek-chat_0.23.csv -------------------------------------------------------------------------------- /images/claim.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aegis1863/LLMs-Distillation-Quantification/HEAD/images/claim.png -------------------------------------------------------------------------------- /images/ice-rse.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aegis1863/LLMs-Distillation-Quantification/HEAD/images/ice-rse.png -------------------------------------------------------------------------------- /images/jailbreak_main_result.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aegis1863/LLMs-Distillation-Quantification/HEAD/images/jailbreak_main_result.png -------------------------------------------------------------------------------- /images/main_fig.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aegis1863/LLMs-Distillation-Quantification/HEAD/images/main_fig.png -------------------------------------------------------------------------------- /images/response_similarity_main_result.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aegis1863/LLMs-Distillation-Quantification/HEAD/images/response_similarity_main_result.png -------------------------------------------------------------------------------- /paper.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aegis1863/LLMs-Distillation-Quantification/HEAD/paper.pdf --------------------------------------------------------------------------------