├── .gitignore ├── .gitmodules ├── .readthedocs.yaml ├── LICENSE ├── README.md ├── README_zh_CN.md ├── configs ├── tasks │ ├── advbench.yaml │ ├── alpaca_eval.yaml │ ├── chat.yaml │ ├── harmbench.yaml │ └── jbb.yaml └── test.yaml ├── data ├── ArtPrompt │ └── vitc-s.json ├── AttackResults │ ├── prompts │ │ ├── art_prompt.csv │ │ ├── autodan.csv │ │ ├── cold.csv │ │ ├── deepinception.csv │ │ ├── gcg.csv │ │ ├── gpt4_cipher.csv │ │ ├── gptfuzzer.csv │ │ ├── ica.csv │ │ ├── pair.csv │ │ ├── random_search.csv │ │ ├── renellm.csv │ │ ├── scav.csv │ │ ├── tap.csv │ │ └── template.csv │ └── token_usage │ │ ├── art_prompt.csv │ │ ├── autodan.csv │ │ ├── cold.csv │ │ ├── deepinception.csv │ │ ├── gcg.csv │ │ ├── gpt4_cipher.csv │ │ ├── gptfuzzer.csv │ │ ├── ica.csv │ │ ├── pair.csv │ │ ├── random_search.csv │ │ ├── renellm.csv │ │ ├── scav.csv │ │ ├── tap.csv │ │ └── template.csv ├── GPTFuzz │ └── GPTFuzzer.csv ├── Out_86.csv ├── SCAV │ ├── optimized_instructions_70b.csv │ └── optimized_instructions_8b.csv ├── advbench_harmful_behaviors.csv ├── format_jbb.py ├── hm_expanded.csv ├── jbb_expanded.csv └── past_tense │ ├── tense_future.json │ └── tense_past.json ├── docs ├── Makefile ├── locales │ └── zh_CN │ │ └── LC_MESSAGES │ │ ├── index.po │ │ ├── modules.po │ │ ├── panda_guard.cli.po │ │ ├── panda_guard.llms.po │ │ ├── panda_guard.pipelines.po │ │ ├── panda_guard.po │ │ ├── panda_guard.role.attacks.cold_attack.po │ │ ├── panda_guard.role.attacks.gptfuzzer_attack.fuzzer.po │ │ ├── panda_guard.role.attacks.gptfuzzer_attack.po │ │ ├── panda_guard.role.attacks.gptfuzzer_attack.utils.po │ │ ├── panda_guard.role.attacks.po │ │ ├── panda_guard.role.attacks.renellm_attack.po │ │ ├── panda_guard.role.defenses.po │ │ ├── panda_guard.role.defenses.repe_utils.po │ │ ├── panda_guard.role.judges.po │ │ └── panda_guard.role.po ├── requirements.txt └── source │ ├── _static │ └── panda_guard_img.png │ ├── conf.py │ ├── index.rst │ ├── modules.rst │ ├── panda_guard.cli.rst │ ├── panda_guard.llms.rst │ ├── panda_guard.pipelines.rst │ ├── panda_guard.role.attacks.cold_attack.rst │ ├── panda_guard.role.attacks.gptfuzzer_attack.fuzzer.rst │ ├── panda_guard.role.attacks.gptfuzzer_attack.rst │ ├── panda_guard.role.attacks.gptfuzzer_attack.utils.rst │ ├── panda_guard.role.attacks.renellm_attack.rst │ ├── panda_guard.role.attacks.rst │ ├── panda_guard.role.defenses.repe_utils.rst │ ├── panda_guard.role.defenses.rst │ ├── panda_guard.role.judges.rst │ ├── panda_guard.role.rst │ └── panda_guard.rst ├── examples ├── alpaca_eval-baselines │ ├── alpaca_eval.py │ ├── alpaca_inference.py │ ├── proxy_balance.py │ └── run_all_inference.py ├── jailbreak-baselines │ ├── jbb_eval.py │ ├── jbb_inference.py │ └── run_all_inference.py └── panda-bench.croissant.json ├── figures ├── img-white.png └── models.png ├── pyproject.toml ├── requirements.txt ├── src └── panda_guard │ ├── __init__.py │ ├── cli │ ├── __init__.py │ ├── attack.py │ ├── chat.py │ ├── eval.py │ ├── inference.py │ ├── main.py │ └── serve.py │ ├── config │ ├── attacks │ │ ├── autodan.yaml │ │ ├── cold.yaml │ │ ├── deepinception.yaml │ │ ├── gcg.yaml │ │ ├── gptfuzz.yaml │ │ ├── ica.yaml │ │ ├── overload.yaml │ │ ├── pair.yaml │ │ ├── past_tense.yaml │ │ ├── random_search.yaml │ │ ├── renellm.yaml │ │ ├── scav.yaml │ │ └── tap.yaml │ ├── defenses │ │ ├── goal_priority.yaml │ │ ├── icl.yaml │ │ ├── none.yaml │ │ ├── rpo.yaml │ │ ├── self_reminder.yaml │ │ └── smoothllm.yaml │ ├── endpoints │ │ ├── hf.yaml │ │ └── openai.yaml │ ├── judges │ │ ├── llm_based.yaml │ │ └── rule_based.yaml │ └── tasks │ │ ├── attack.yaml │ │ ├── chat.yaml │ │ ├── eval.yaml │ │ └── inference.yaml │ ├── llms │ ├── __init__.py │ ├── base.py │ ├── claude.py │ ├── gemini.py │ ├── hf.py │ ├── oai.py │ ├── ollama_llm.py │ ├── sglang_llm.py │ └── vllm_llm.py │ ├── pipelines │ ├── __init__.py │ └── inference.py │ ├── role │ ├── __init__.py │ ├── attacks │ │ ├── __init__.py │ │ ├── art_prompt.py │ │ ├── autodan │ │ │ ├── __init__.py │ │ │ ├── autodan.py │ │ │ └── prompt_group.yaml │ │ ├── base.py │ │ ├── cold_attack │ │ │ ├── __init__.py │ │ │ ├── attack_suffix.py │ │ │ ├── bleuloss.py │ │ │ ├── cold.py │ │ │ ├── decoding_suffix.py │ │ │ └── util.py │ │ ├── deepinception.py │ │ ├── gcg.py │ │ ├── gpt4_cipher.py │ │ ├── gptfuzzer_attack │ │ │ ├── __init__.py │ │ │ ├── fuzzer │ │ │ │ ├── __init__.py │ │ │ │ ├── core.py │ │ │ │ ├── mutator.py │ │ │ │ └── selection.py │ │ │ ├── gptfuzz.py │ │ │ └── utils │ │ │ │ ├── __init__.py │ │ │ │ ├── predict.py │ │ │ │ └── template.py │ │ ├── ica.py │ │ ├── overload.py │ │ ├── pair.py │ │ ├── random_search.py │ │ ├── renellm_attack │ │ │ ├── __init__.py │ │ │ ├── renellm.py │ │ │ └── utils │ │ │ │ ├── data_utils.py │ │ │ │ ├── harmful_classification_utils.py │ │ │ │ ├── prompt_rewrite_utils.py │ │ │ │ └── scenario_nest_utils.py │ │ ├── rewrite.py │ │ ├── scav.py │ │ ├── tap.py │ │ └── transfer.py │ ├── defenses │ │ ├── __init__.py │ │ ├── back_translate.py │ │ ├── base.py │ │ ├── goal_priority.py │ │ ├── gradsafe.py │ │ ├── icl.py │ │ ├── paraphrase.py │ │ ├── perplexity_filter.py │ │ ├── repe.py │ │ ├── repe_utils │ │ │ ├── __init__.py │ │ │ ├── pipelines.py │ │ │ ├── rep_control_pipeline.py │ │ │ ├── rep_control_reading_vec.py │ │ │ ├── rep_readers.py │ │ │ └── rep_reading_pipeline.py │ │ ├── rewrite.py │ │ ├── rpo.py │ │ ├── self_defense.py │ │ ├── semantic_smoothing_templates │ │ │ ├── paraphrase.txt │ │ │ ├── spellcheck.txt │ │ │ ├── summarize.txt │ │ │ ├── synonym.txt │ │ │ ├── translation.txt │ │ │ └── verbtense.txt │ │ ├── semantic_smoothllm.py │ │ └── smoothllm.py │ └── judges │ │ ├── __init__.py │ │ ├── base.py │ │ ├── jbjudge.py │ │ ├── judge_registry.py │ │ ├── llm_based.py │ │ └── rule_based.py │ └── utils.py └── tests ├── .gitignore ├── test_attacks.py ├── test_defenses.py ├── test_judges.py └── test_llms.py /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Beijing-AISI/panda-guard/HEAD/.gitignore -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Beijing-AISI/panda-guard/HEAD/.gitmodules -------------------------------------------------------------------------------- /.readthedocs.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Beijing-AISI/panda-guard/HEAD/.readthedocs.yaml -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Beijing-AISI/panda-guard/HEAD/LICENSE -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Beijing-AISI/panda-guard/HEAD/README.md -------------------------------------------------------------------------------- /README_zh_CN.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Beijing-AISI/panda-guard/HEAD/README_zh_CN.md -------------------------------------------------------------------------------- /configs/tasks/advbench.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Beijing-AISI/panda-guard/HEAD/configs/tasks/advbench.yaml -------------------------------------------------------------------------------- /configs/tasks/alpaca_eval.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Beijing-AISI/panda-guard/HEAD/configs/tasks/alpaca_eval.yaml -------------------------------------------------------------------------------- /configs/tasks/chat.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Beijing-AISI/panda-guard/HEAD/configs/tasks/chat.yaml -------------------------------------------------------------------------------- /configs/tasks/harmbench.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Beijing-AISI/panda-guard/HEAD/configs/tasks/harmbench.yaml -------------------------------------------------------------------------------- /configs/tasks/jbb.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Beijing-AISI/panda-guard/HEAD/configs/tasks/jbb.yaml -------------------------------------------------------------------------------- /configs/test.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Beijing-AISI/panda-guard/HEAD/configs/test.yaml -------------------------------------------------------------------------------- /data/ArtPrompt/vitc-s.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Beijing-AISI/panda-guard/HEAD/data/ArtPrompt/vitc-s.json -------------------------------------------------------------------------------- /data/AttackResults/prompts/art_prompt.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Beijing-AISI/panda-guard/HEAD/data/AttackResults/prompts/art_prompt.csv -------------------------------------------------------------------------------- /data/AttackResults/prompts/autodan.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Beijing-AISI/panda-guard/HEAD/data/AttackResults/prompts/autodan.csv -------------------------------------------------------------------------------- /data/AttackResults/prompts/cold.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Beijing-AISI/panda-guard/HEAD/data/AttackResults/prompts/cold.csv -------------------------------------------------------------------------------- /data/AttackResults/prompts/deepinception.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Beijing-AISI/panda-guard/HEAD/data/AttackResults/prompts/deepinception.csv -------------------------------------------------------------------------------- /data/AttackResults/prompts/gcg.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Beijing-AISI/panda-guard/HEAD/data/AttackResults/prompts/gcg.csv -------------------------------------------------------------------------------- /data/AttackResults/prompts/gpt4_cipher.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Beijing-AISI/panda-guard/HEAD/data/AttackResults/prompts/gpt4_cipher.csv -------------------------------------------------------------------------------- /data/AttackResults/prompts/gptfuzzer.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Beijing-AISI/panda-guard/HEAD/data/AttackResults/prompts/gptfuzzer.csv -------------------------------------------------------------------------------- /data/AttackResults/prompts/ica.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Beijing-AISI/panda-guard/HEAD/data/AttackResults/prompts/ica.csv -------------------------------------------------------------------------------- /data/AttackResults/prompts/pair.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Beijing-AISI/panda-guard/HEAD/data/AttackResults/prompts/pair.csv -------------------------------------------------------------------------------- /data/AttackResults/prompts/random_search.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Beijing-AISI/panda-guard/HEAD/data/AttackResults/prompts/random_search.csv -------------------------------------------------------------------------------- /data/AttackResults/prompts/renellm.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Beijing-AISI/panda-guard/HEAD/data/AttackResults/prompts/renellm.csv -------------------------------------------------------------------------------- /data/AttackResults/prompts/scav.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Beijing-AISI/panda-guard/HEAD/data/AttackResults/prompts/scav.csv -------------------------------------------------------------------------------- /data/AttackResults/prompts/tap.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Beijing-AISI/panda-guard/HEAD/data/AttackResults/prompts/tap.csv -------------------------------------------------------------------------------- /data/AttackResults/prompts/template.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Beijing-AISI/panda-guard/HEAD/data/AttackResults/prompts/template.csv -------------------------------------------------------------------------------- /data/AttackResults/token_usage/art_prompt.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Beijing-AISI/panda-guard/HEAD/data/AttackResults/token_usage/art_prompt.csv -------------------------------------------------------------------------------- /data/AttackResults/token_usage/autodan.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Beijing-AISI/panda-guard/HEAD/data/AttackResults/token_usage/autodan.csv -------------------------------------------------------------------------------- /data/AttackResults/token_usage/cold.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Beijing-AISI/panda-guard/HEAD/data/AttackResults/token_usage/cold.csv -------------------------------------------------------------------------------- /data/AttackResults/token_usage/deepinception.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Beijing-AISI/panda-guard/HEAD/data/AttackResults/token_usage/deepinception.csv -------------------------------------------------------------------------------- /data/AttackResults/token_usage/gcg.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Beijing-AISI/panda-guard/HEAD/data/AttackResults/token_usage/gcg.csv -------------------------------------------------------------------------------- /data/AttackResults/token_usage/gpt4_cipher.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Beijing-AISI/panda-guard/HEAD/data/AttackResults/token_usage/gpt4_cipher.csv -------------------------------------------------------------------------------- /data/AttackResults/token_usage/gptfuzzer.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Beijing-AISI/panda-guard/HEAD/data/AttackResults/token_usage/gptfuzzer.csv -------------------------------------------------------------------------------- /data/AttackResults/token_usage/ica.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Beijing-AISI/panda-guard/HEAD/data/AttackResults/token_usage/ica.csv -------------------------------------------------------------------------------- /data/AttackResults/token_usage/pair.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Beijing-AISI/panda-guard/HEAD/data/AttackResults/token_usage/pair.csv -------------------------------------------------------------------------------- /data/AttackResults/token_usage/random_search.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Beijing-AISI/panda-guard/HEAD/data/AttackResults/token_usage/random_search.csv -------------------------------------------------------------------------------- /data/AttackResults/token_usage/renellm.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Beijing-AISI/panda-guard/HEAD/data/AttackResults/token_usage/renellm.csv -------------------------------------------------------------------------------- /data/AttackResults/token_usage/scav.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Beijing-AISI/panda-guard/HEAD/data/AttackResults/token_usage/scav.csv -------------------------------------------------------------------------------- /data/AttackResults/token_usage/tap.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Beijing-AISI/panda-guard/HEAD/data/AttackResults/token_usage/tap.csv -------------------------------------------------------------------------------- /data/AttackResults/token_usage/template.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Beijing-AISI/panda-guard/HEAD/data/AttackResults/token_usage/template.csv -------------------------------------------------------------------------------- /data/GPTFuzz/GPTFuzzer.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Beijing-AISI/panda-guard/HEAD/data/GPTFuzz/GPTFuzzer.csv -------------------------------------------------------------------------------- /data/Out_86.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Beijing-AISI/panda-guard/HEAD/data/Out_86.csv -------------------------------------------------------------------------------- /data/SCAV/optimized_instructions_70b.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Beijing-AISI/panda-guard/HEAD/data/SCAV/optimized_instructions_70b.csv -------------------------------------------------------------------------------- /data/SCAV/optimized_instructions_8b.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Beijing-AISI/panda-guard/HEAD/data/SCAV/optimized_instructions_8b.csv -------------------------------------------------------------------------------- /data/advbench_harmful_behaviors.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Beijing-AISI/panda-guard/HEAD/data/advbench_harmful_behaviors.csv -------------------------------------------------------------------------------- /data/format_jbb.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Beijing-AISI/panda-guard/HEAD/data/format_jbb.py -------------------------------------------------------------------------------- /data/hm_expanded.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Beijing-AISI/panda-guard/HEAD/data/hm_expanded.csv -------------------------------------------------------------------------------- /data/jbb_expanded.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Beijing-AISI/panda-guard/HEAD/data/jbb_expanded.csv -------------------------------------------------------------------------------- /data/past_tense/tense_future.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Beijing-AISI/panda-guard/HEAD/data/past_tense/tense_future.json -------------------------------------------------------------------------------- /data/past_tense/tense_past.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Beijing-AISI/panda-guard/HEAD/data/past_tense/tense_past.json -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Beijing-AISI/panda-guard/HEAD/docs/Makefile -------------------------------------------------------------------------------- /docs/locales/zh_CN/LC_MESSAGES/index.po: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Beijing-AISI/panda-guard/HEAD/docs/locales/zh_CN/LC_MESSAGES/index.po -------------------------------------------------------------------------------- /docs/locales/zh_CN/LC_MESSAGES/modules.po: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Beijing-AISI/panda-guard/HEAD/docs/locales/zh_CN/LC_MESSAGES/modules.po -------------------------------------------------------------------------------- /docs/locales/zh_CN/LC_MESSAGES/panda_guard.cli.po: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Beijing-AISI/panda-guard/HEAD/docs/locales/zh_CN/LC_MESSAGES/panda_guard.cli.po -------------------------------------------------------------------------------- /docs/locales/zh_CN/LC_MESSAGES/panda_guard.llms.po: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Beijing-AISI/panda-guard/HEAD/docs/locales/zh_CN/LC_MESSAGES/panda_guard.llms.po -------------------------------------------------------------------------------- /docs/locales/zh_CN/LC_MESSAGES/panda_guard.pipelines.po: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Beijing-AISI/panda-guard/HEAD/docs/locales/zh_CN/LC_MESSAGES/panda_guard.pipelines.po -------------------------------------------------------------------------------- /docs/locales/zh_CN/LC_MESSAGES/panda_guard.po: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Beijing-AISI/panda-guard/HEAD/docs/locales/zh_CN/LC_MESSAGES/panda_guard.po -------------------------------------------------------------------------------- /docs/locales/zh_CN/LC_MESSAGES/panda_guard.role.attacks.cold_attack.po: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Beijing-AISI/panda-guard/HEAD/docs/locales/zh_CN/LC_MESSAGES/panda_guard.role.attacks.cold_attack.po -------------------------------------------------------------------------------- /docs/locales/zh_CN/LC_MESSAGES/panda_guard.role.attacks.gptfuzzer_attack.fuzzer.po: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Beijing-AISI/panda-guard/HEAD/docs/locales/zh_CN/LC_MESSAGES/panda_guard.role.attacks.gptfuzzer_attack.fuzzer.po -------------------------------------------------------------------------------- /docs/locales/zh_CN/LC_MESSAGES/panda_guard.role.attacks.gptfuzzer_attack.po: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Beijing-AISI/panda-guard/HEAD/docs/locales/zh_CN/LC_MESSAGES/panda_guard.role.attacks.gptfuzzer_attack.po -------------------------------------------------------------------------------- /docs/locales/zh_CN/LC_MESSAGES/panda_guard.role.attacks.gptfuzzer_attack.utils.po: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Beijing-AISI/panda-guard/HEAD/docs/locales/zh_CN/LC_MESSAGES/panda_guard.role.attacks.gptfuzzer_attack.utils.po -------------------------------------------------------------------------------- /docs/locales/zh_CN/LC_MESSAGES/panda_guard.role.attacks.po: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Beijing-AISI/panda-guard/HEAD/docs/locales/zh_CN/LC_MESSAGES/panda_guard.role.attacks.po -------------------------------------------------------------------------------- /docs/locales/zh_CN/LC_MESSAGES/panda_guard.role.attacks.renellm_attack.po: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Beijing-AISI/panda-guard/HEAD/docs/locales/zh_CN/LC_MESSAGES/panda_guard.role.attacks.renellm_attack.po -------------------------------------------------------------------------------- /docs/locales/zh_CN/LC_MESSAGES/panda_guard.role.defenses.po: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Beijing-AISI/panda-guard/HEAD/docs/locales/zh_CN/LC_MESSAGES/panda_guard.role.defenses.po -------------------------------------------------------------------------------- /docs/locales/zh_CN/LC_MESSAGES/panda_guard.role.defenses.repe_utils.po: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Beijing-AISI/panda-guard/HEAD/docs/locales/zh_CN/LC_MESSAGES/panda_guard.role.defenses.repe_utils.po -------------------------------------------------------------------------------- /docs/locales/zh_CN/LC_MESSAGES/panda_guard.role.judges.po: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Beijing-AISI/panda-guard/HEAD/docs/locales/zh_CN/LC_MESSAGES/panda_guard.role.judges.po -------------------------------------------------------------------------------- /docs/locales/zh_CN/LC_MESSAGES/panda_guard.role.po: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Beijing-AISI/panda-guard/HEAD/docs/locales/zh_CN/LC_MESSAGES/panda_guard.role.po -------------------------------------------------------------------------------- /docs/requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Beijing-AISI/panda-guard/HEAD/docs/requirements.txt -------------------------------------------------------------------------------- /docs/source/_static/panda_guard_img.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Beijing-AISI/panda-guard/HEAD/docs/source/_static/panda_guard_img.png -------------------------------------------------------------------------------- /docs/source/conf.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Beijing-AISI/panda-guard/HEAD/docs/source/conf.py -------------------------------------------------------------------------------- /docs/source/index.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Beijing-AISI/panda-guard/HEAD/docs/source/index.rst -------------------------------------------------------------------------------- /docs/source/modules.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Beijing-AISI/panda-guard/HEAD/docs/source/modules.rst -------------------------------------------------------------------------------- /docs/source/panda_guard.cli.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Beijing-AISI/panda-guard/HEAD/docs/source/panda_guard.cli.rst -------------------------------------------------------------------------------- /docs/source/panda_guard.llms.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Beijing-AISI/panda-guard/HEAD/docs/source/panda_guard.llms.rst -------------------------------------------------------------------------------- /docs/source/panda_guard.pipelines.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Beijing-AISI/panda-guard/HEAD/docs/source/panda_guard.pipelines.rst -------------------------------------------------------------------------------- /docs/source/panda_guard.role.attacks.cold_attack.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Beijing-AISI/panda-guard/HEAD/docs/source/panda_guard.role.attacks.cold_attack.rst -------------------------------------------------------------------------------- /docs/source/panda_guard.role.attacks.gptfuzzer_attack.fuzzer.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Beijing-AISI/panda-guard/HEAD/docs/source/panda_guard.role.attacks.gptfuzzer_attack.fuzzer.rst -------------------------------------------------------------------------------- /docs/source/panda_guard.role.attacks.gptfuzzer_attack.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Beijing-AISI/panda-guard/HEAD/docs/source/panda_guard.role.attacks.gptfuzzer_attack.rst -------------------------------------------------------------------------------- /docs/source/panda_guard.role.attacks.gptfuzzer_attack.utils.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Beijing-AISI/panda-guard/HEAD/docs/source/panda_guard.role.attacks.gptfuzzer_attack.utils.rst -------------------------------------------------------------------------------- /docs/source/panda_guard.role.attacks.renellm_attack.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Beijing-AISI/panda-guard/HEAD/docs/source/panda_guard.role.attacks.renellm_attack.rst -------------------------------------------------------------------------------- /docs/source/panda_guard.role.attacks.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Beijing-AISI/panda-guard/HEAD/docs/source/panda_guard.role.attacks.rst -------------------------------------------------------------------------------- /docs/source/panda_guard.role.defenses.repe_utils.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Beijing-AISI/panda-guard/HEAD/docs/source/panda_guard.role.defenses.repe_utils.rst -------------------------------------------------------------------------------- /docs/source/panda_guard.role.defenses.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Beijing-AISI/panda-guard/HEAD/docs/source/panda_guard.role.defenses.rst -------------------------------------------------------------------------------- /docs/source/panda_guard.role.judges.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Beijing-AISI/panda-guard/HEAD/docs/source/panda_guard.role.judges.rst -------------------------------------------------------------------------------- /docs/source/panda_guard.role.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Beijing-AISI/panda-guard/HEAD/docs/source/panda_guard.role.rst -------------------------------------------------------------------------------- /docs/source/panda_guard.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Beijing-AISI/panda-guard/HEAD/docs/source/panda_guard.rst -------------------------------------------------------------------------------- /examples/alpaca_eval-baselines/alpaca_eval.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Beijing-AISI/panda-guard/HEAD/examples/alpaca_eval-baselines/alpaca_eval.py -------------------------------------------------------------------------------- /examples/alpaca_eval-baselines/alpaca_inference.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Beijing-AISI/panda-guard/HEAD/examples/alpaca_eval-baselines/alpaca_inference.py -------------------------------------------------------------------------------- /examples/alpaca_eval-baselines/proxy_balance.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Beijing-AISI/panda-guard/HEAD/examples/alpaca_eval-baselines/proxy_balance.py -------------------------------------------------------------------------------- /examples/alpaca_eval-baselines/run_all_inference.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Beijing-AISI/panda-guard/HEAD/examples/alpaca_eval-baselines/run_all_inference.py -------------------------------------------------------------------------------- /examples/jailbreak-baselines/jbb_eval.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Beijing-AISI/panda-guard/HEAD/examples/jailbreak-baselines/jbb_eval.py -------------------------------------------------------------------------------- /examples/jailbreak-baselines/jbb_inference.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Beijing-AISI/panda-guard/HEAD/examples/jailbreak-baselines/jbb_inference.py -------------------------------------------------------------------------------- /examples/jailbreak-baselines/run_all_inference.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Beijing-AISI/panda-guard/HEAD/examples/jailbreak-baselines/run_all_inference.py -------------------------------------------------------------------------------- /examples/panda-bench.croissant.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Beijing-AISI/panda-guard/HEAD/examples/panda-bench.croissant.json -------------------------------------------------------------------------------- /figures/img-white.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Beijing-AISI/panda-guard/HEAD/figures/img-white.png -------------------------------------------------------------------------------- /figures/models.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Beijing-AISI/panda-guard/HEAD/figures/models.png -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Beijing-AISI/panda-guard/HEAD/pyproject.toml -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Beijing-AISI/panda-guard/HEAD/requirements.txt -------------------------------------------------------------------------------- /src/panda_guard/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Beijing-AISI/panda-guard/HEAD/src/panda_guard/__init__.py -------------------------------------------------------------------------------- /src/panda_guard/cli/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/panda_guard/cli/attack.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Beijing-AISI/panda-guard/HEAD/src/panda_guard/cli/attack.py -------------------------------------------------------------------------------- /src/panda_guard/cli/chat.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Beijing-AISI/panda-guard/HEAD/src/panda_guard/cli/chat.py -------------------------------------------------------------------------------- /src/panda_guard/cli/eval.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Beijing-AISI/panda-guard/HEAD/src/panda_guard/cli/eval.py -------------------------------------------------------------------------------- /src/panda_guard/cli/inference.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Beijing-AISI/panda-guard/HEAD/src/panda_guard/cli/inference.py -------------------------------------------------------------------------------- /src/panda_guard/cli/main.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Beijing-AISI/panda-guard/HEAD/src/panda_guard/cli/main.py -------------------------------------------------------------------------------- /src/panda_guard/cli/serve.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Beijing-AISI/panda-guard/HEAD/src/panda_guard/cli/serve.py -------------------------------------------------------------------------------- /src/panda_guard/config/attacks/autodan.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Beijing-AISI/panda-guard/HEAD/src/panda_guard/config/attacks/autodan.yaml -------------------------------------------------------------------------------- /src/panda_guard/config/attacks/cold.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Beijing-AISI/panda-guard/HEAD/src/panda_guard/config/attacks/cold.yaml -------------------------------------------------------------------------------- /src/panda_guard/config/attacks/deepinception.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Beijing-AISI/panda-guard/HEAD/src/panda_guard/config/attacks/deepinception.yaml -------------------------------------------------------------------------------- /src/panda_guard/config/attacks/gcg.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Beijing-AISI/panda-guard/HEAD/src/panda_guard/config/attacks/gcg.yaml -------------------------------------------------------------------------------- /src/panda_guard/config/attacks/gptfuzz.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Beijing-AISI/panda-guard/HEAD/src/panda_guard/config/attacks/gptfuzz.yaml -------------------------------------------------------------------------------- /src/panda_guard/config/attacks/ica.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Beijing-AISI/panda-guard/HEAD/src/panda_guard/config/attacks/ica.yaml -------------------------------------------------------------------------------- /src/panda_guard/config/attacks/overload.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Beijing-AISI/panda-guard/HEAD/src/panda_guard/config/attacks/overload.yaml -------------------------------------------------------------------------------- /src/panda_guard/config/attacks/pair.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Beijing-AISI/panda-guard/HEAD/src/panda_guard/config/attacks/pair.yaml -------------------------------------------------------------------------------- /src/panda_guard/config/attacks/past_tense.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Beijing-AISI/panda-guard/HEAD/src/panda_guard/config/attacks/past_tense.yaml -------------------------------------------------------------------------------- /src/panda_guard/config/attacks/random_search.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Beijing-AISI/panda-guard/HEAD/src/panda_guard/config/attacks/random_search.yaml -------------------------------------------------------------------------------- /src/panda_guard/config/attacks/renellm.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Beijing-AISI/panda-guard/HEAD/src/panda_guard/config/attacks/renellm.yaml -------------------------------------------------------------------------------- /src/panda_guard/config/attacks/scav.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Beijing-AISI/panda-guard/HEAD/src/panda_guard/config/attacks/scav.yaml -------------------------------------------------------------------------------- /src/panda_guard/config/attacks/tap.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Beijing-AISI/panda-guard/HEAD/src/panda_guard/config/attacks/tap.yaml -------------------------------------------------------------------------------- /src/panda_guard/config/defenses/goal_priority.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Beijing-AISI/panda-guard/HEAD/src/panda_guard/config/defenses/goal_priority.yaml -------------------------------------------------------------------------------- /src/panda_guard/config/defenses/icl.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Beijing-AISI/panda-guard/HEAD/src/panda_guard/config/defenses/icl.yaml -------------------------------------------------------------------------------- /src/panda_guard/config/defenses/none.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Beijing-AISI/panda-guard/HEAD/src/panda_guard/config/defenses/none.yaml -------------------------------------------------------------------------------- /src/panda_guard/config/defenses/rpo.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Beijing-AISI/panda-guard/HEAD/src/panda_guard/config/defenses/rpo.yaml -------------------------------------------------------------------------------- /src/panda_guard/config/defenses/self_reminder.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Beijing-AISI/panda-guard/HEAD/src/panda_guard/config/defenses/self_reminder.yaml -------------------------------------------------------------------------------- /src/panda_guard/config/defenses/smoothllm.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Beijing-AISI/panda-guard/HEAD/src/panda_guard/config/defenses/smoothllm.yaml -------------------------------------------------------------------------------- /src/panda_guard/config/endpoints/hf.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Beijing-AISI/panda-guard/HEAD/src/panda_guard/config/endpoints/hf.yaml -------------------------------------------------------------------------------- /src/panda_guard/config/endpoints/openai.yaml: -------------------------------------------------------------------------------- 1 | llm_type: "OpenAiChatLLM" 2 | model_name: "gpt-4o-mini" -------------------------------------------------------------------------------- /src/panda_guard/config/judges/llm_based.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Beijing-AISI/panda-guard/HEAD/src/panda_guard/config/judges/llm_based.yaml -------------------------------------------------------------------------------- /src/panda_guard/config/judges/rule_based.yaml: -------------------------------------------------------------------------------- 1 | judge_cls: "RuleBasedJudge" -------------------------------------------------------------------------------- /src/panda_guard/config/tasks/attack.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Beijing-AISI/panda-guard/HEAD/src/panda_guard/config/tasks/attack.yaml -------------------------------------------------------------------------------- /src/panda_guard/config/tasks/chat.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Beijing-AISI/panda-guard/HEAD/src/panda_guard/config/tasks/chat.yaml -------------------------------------------------------------------------------- /src/panda_guard/config/tasks/eval.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Beijing-AISI/panda-guard/HEAD/src/panda_guard/config/tasks/eval.yaml -------------------------------------------------------------------------------- /src/panda_guard/config/tasks/inference.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Beijing-AISI/panda-guard/HEAD/src/panda_guard/config/tasks/inference.yaml -------------------------------------------------------------------------------- /src/panda_guard/llms/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Beijing-AISI/panda-guard/HEAD/src/panda_guard/llms/__init__.py -------------------------------------------------------------------------------- /src/panda_guard/llms/base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Beijing-AISI/panda-guard/HEAD/src/panda_guard/llms/base.py -------------------------------------------------------------------------------- /src/panda_guard/llms/claude.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Beijing-AISI/panda-guard/HEAD/src/panda_guard/llms/claude.py -------------------------------------------------------------------------------- /src/panda_guard/llms/gemini.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Beijing-AISI/panda-guard/HEAD/src/panda_guard/llms/gemini.py -------------------------------------------------------------------------------- /src/panda_guard/llms/hf.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Beijing-AISI/panda-guard/HEAD/src/panda_guard/llms/hf.py -------------------------------------------------------------------------------- /src/panda_guard/llms/oai.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Beijing-AISI/panda-guard/HEAD/src/panda_guard/llms/oai.py -------------------------------------------------------------------------------- /src/panda_guard/llms/ollama_llm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Beijing-AISI/panda-guard/HEAD/src/panda_guard/llms/ollama_llm.py -------------------------------------------------------------------------------- /src/panda_guard/llms/sglang_llm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Beijing-AISI/panda-guard/HEAD/src/panda_guard/llms/sglang_llm.py -------------------------------------------------------------------------------- /src/panda_guard/llms/vllm_llm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Beijing-AISI/panda-guard/HEAD/src/panda_guard/llms/vllm_llm.py -------------------------------------------------------------------------------- /src/panda_guard/pipelines/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Beijing-AISI/panda-guard/HEAD/src/panda_guard/pipelines/__init__.py -------------------------------------------------------------------------------- /src/panda_guard/pipelines/inference.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Beijing-AISI/panda-guard/HEAD/src/panda_guard/pipelines/inference.py -------------------------------------------------------------------------------- /src/panda_guard/role/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Beijing-AISI/panda-guard/HEAD/src/panda_guard/role/__init__.py -------------------------------------------------------------------------------- /src/panda_guard/role/attacks/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Beijing-AISI/panda-guard/HEAD/src/panda_guard/role/attacks/__init__.py -------------------------------------------------------------------------------- /src/panda_guard/role/attacks/art_prompt.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Beijing-AISI/panda-guard/HEAD/src/panda_guard/role/attacks/art_prompt.py -------------------------------------------------------------------------------- /src/panda_guard/role/attacks/autodan/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/panda_guard/role/attacks/autodan/autodan.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Beijing-AISI/panda-guard/HEAD/src/panda_guard/role/attacks/autodan/autodan.py -------------------------------------------------------------------------------- /src/panda_guard/role/attacks/autodan/prompt_group.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Beijing-AISI/panda-guard/HEAD/src/panda_guard/role/attacks/autodan/prompt_group.yaml -------------------------------------------------------------------------------- /src/panda_guard/role/attacks/base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Beijing-AISI/panda-guard/HEAD/src/panda_guard/role/attacks/base.py -------------------------------------------------------------------------------- /src/panda_guard/role/attacks/cold_attack/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Beijing-AISI/panda-guard/HEAD/src/panda_guard/role/attacks/cold_attack/__init__.py -------------------------------------------------------------------------------- /src/panda_guard/role/attacks/cold_attack/attack_suffix.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Beijing-AISI/panda-guard/HEAD/src/panda_guard/role/attacks/cold_attack/attack_suffix.py -------------------------------------------------------------------------------- /src/panda_guard/role/attacks/cold_attack/bleuloss.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Beijing-AISI/panda-guard/HEAD/src/panda_guard/role/attacks/cold_attack/bleuloss.py -------------------------------------------------------------------------------- /src/panda_guard/role/attacks/cold_attack/cold.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Beijing-AISI/panda-guard/HEAD/src/panda_guard/role/attacks/cold_attack/cold.py -------------------------------------------------------------------------------- /src/panda_guard/role/attacks/cold_attack/decoding_suffix.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Beijing-AISI/panda-guard/HEAD/src/panda_guard/role/attacks/cold_attack/decoding_suffix.py -------------------------------------------------------------------------------- /src/panda_guard/role/attacks/cold_attack/util.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Beijing-AISI/panda-guard/HEAD/src/panda_guard/role/attacks/cold_attack/util.py -------------------------------------------------------------------------------- /src/panda_guard/role/attacks/deepinception.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Beijing-AISI/panda-guard/HEAD/src/panda_guard/role/attacks/deepinception.py -------------------------------------------------------------------------------- /src/panda_guard/role/attacks/gcg.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Beijing-AISI/panda-guard/HEAD/src/panda_guard/role/attacks/gcg.py -------------------------------------------------------------------------------- /src/panda_guard/role/attacks/gpt4_cipher.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Beijing-AISI/panda-guard/HEAD/src/panda_guard/role/attacks/gpt4_cipher.py -------------------------------------------------------------------------------- /src/panda_guard/role/attacks/gptfuzzer_attack/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Beijing-AISI/panda-guard/HEAD/src/panda_guard/role/attacks/gptfuzzer_attack/__init__.py -------------------------------------------------------------------------------- /src/panda_guard/role/attacks/gptfuzzer_attack/fuzzer/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/panda_guard/role/attacks/gptfuzzer_attack/fuzzer/core.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Beijing-AISI/panda-guard/HEAD/src/panda_guard/role/attacks/gptfuzzer_attack/fuzzer/core.py -------------------------------------------------------------------------------- /src/panda_guard/role/attacks/gptfuzzer_attack/fuzzer/mutator.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Beijing-AISI/panda-guard/HEAD/src/panda_guard/role/attacks/gptfuzzer_attack/fuzzer/mutator.py -------------------------------------------------------------------------------- /src/panda_guard/role/attacks/gptfuzzer_attack/fuzzer/selection.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Beijing-AISI/panda-guard/HEAD/src/panda_guard/role/attacks/gptfuzzer_attack/fuzzer/selection.py -------------------------------------------------------------------------------- /src/panda_guard/role/attacks/gptfuzzer_attack/gptfuzz.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Beijing-AISI/panda-guard/HEAD/src/panda_guard/role/attacks/gptfuzzer_attack/gptfuzz.py -------------------------------------------------------------------------------- /src/panda_guard/role/attacks/gptfuzzer_attack/utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/panda_guard/role/attacks/gptfuzzer_attack/utils/predict.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Beijing-AISI/panda-guard/HEAD/src/panda_guard/role/attacks/gptfuzzer_attack/utils/predict.py -------------------------------------------------------------------------------- /src/panda_guard/role/attacks/gptfuzzer_attack/utils/template.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Beijing-AISI/panda-guard/HEAD/src/panda_guard/role/attacks/gptfuzzer_attack/utils/template.py -------------------------------------------------------------------------------- /src/panda_guard/role/attacks/ica.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Beijing-AISI/panda-guard/HEAD/src/panda_guard/role/attacks/ica.py -------------------------------------------------------------------------------- /src/panda_guard/role/attacks/overload.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Beijing-AISI/panda-guard/HEAD/src/panda_guard/role/attacks/overload.py -------------------------------------------------------------------------------- /src/panda_guard/role/attacks/pair.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Beijing-AISI/panda-guard/HEAD/src/panda_guard/role/attacks/pair.py -------------------------------------------------------------------------------- /src/panda_guard/role/attacks/random_search.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Beijing-AISI/panda-guard/HEAD/src/panda_guard/role/attacks/random_search.py -------------------------------------------------------------------------------- /src/panda_guard/role/attacks/renellm_attack/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Beijing-AISI/panda-guard/HEAD/src/panda_guard/role/attacks/renellm_attack/__init__.py -------------------------------------------------------------------------------- /src/panda_guard/role/attacks/renellm_attack/renellm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Beijing-AISI/panda-guard/HEAD/src/panda_guard/role/attacks/renellm_attack/renellm.py -------------------------------------------------------------------------------- /src/panda_guard/role/attacks/renellm_attack/utils/data_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Beijing-AISI/panda-guard/HEAD/src/panda_guard/role/attacks/renellm_attack/utils/data_utils.py -------------------------------------------------------------------------------- /src/panda_guard/role/attacks/renellm_attack/utils/harmful_classification_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Beijing-AISI/panda-guard/HEAD/src/panda_guard/role/attacks/renellm_attack/utils/harmful_classification_utils.py -------------------------------------------------------------------------------- /src/panda_guard/role/attacks/renellm_attack/utils/prompt_rewrite_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Beijing-AISI/panda-guard/HEAD/src/panda_guard/role/attacks/renellm_attack/utils/prompt_rewrite_utils.py -------------------------------------------------------------------------------- /src/panda_guard/role/attacks/renellm_attack/utils/scenario_nest_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Beijing-AISI/panda-guard/HEAD/src/panda_guard/role/attacks/renellm_attack/utils/scenario_nest_utils.py -------------------------------------------------------------------------------- /src/panda_guard/role/attacks/rewrite.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Beijing-AISI/panda-guard/HEAD/src/panda_guard/role/attacks/rewrite.py -------------------------------------------------------------------------------- /src/panda_guard/role/attacks/scav.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Beijing-AISI/panda-guard/HEAD/src/panda_guard/role/attacks/scav.py -------------------------------------------------------------------------------- /src/panda_guard/role/attacks/tap.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Beijing-AISI/panda-guard/HEAD/src/panda_guard/role/attacks/tap.py -------------------------------------------------------------------------------- /src/panda_guard/role/attacks/transfer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Beijing-AISI/panda-guard/HEAD/src/panda_guard/role/attacks/transfer.py -------------------------------------------------------------------------------- /src/panda_guard/role/defenses/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Beijing-AISI/panda-guard/HEAD/src/panda_guard/role/defenses/__init__.py -------------------------------------------------------------------------------- /src/panda_guard/role/defenses/back_translate.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Beijing-AISI/panda-guard/HEAD/src/panda_guard/role/defenses/back_translate.py -------------------------------------------------------------------------------- /src/panda_guard/role/defenses/base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Beijing-AISI/panda-guard/HEAD/src/panda_guard/role/defenses/base.py -------------------------------------------------------------------------------- /src/panda_guard/role/defenses/goal_priority.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Beijing-AISI/panda-guard/HEAD/src/panda_guard/role/defenses/goal_priority.py -------------------------------------------------------------------------------- /src/panda_guard/role/defenses/gradsafe.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Beijing-AISI/panda-guard/HEAD/src/panda_guard/role/defenses/gradsafe.py -------------------------------------------------------------------------------- /src/panda_guard/role/defenses/icl.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Beijing-AISI/panda-guard/HEAD/src/panda_guard/role/defenses/icl.py -------------------------------------------------------------------------------- /src/panda_guard/role/defenses/paraphrase.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Beijing-AISI/panda-guard/HEAD/src/panda_guard/role/defenses/paraphrase.py -------------------------------------------------------------------------------- /src/panda_guard/role/defenses/perplexity_filter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Beijing-AISI/panda-guard/HEAD/src/panda_guard/role/defenses/perplexity_filter.py -------------------------------------------------------------------------------- /src/panda_guard/role/defenses/repe.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Beijing-AISI/panda-guard/HEAD/src/panda_guard/role/defenses/repe.py -------------------------------------------------------------------------------- /src/panda_guard/role/defenses/repe_utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Beijing-AISI/panda-guard/HEAD/src/panda_guard/role/defenses/repe_utils/__init__.py -------------------------------------------------------------------------------- /src/panda_guard/role/defenses/repe_utils/pipelines.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Beijing-AISI/panda-guard/HEAD/src/panda_guard/role/defenses/repe_utils/pipelines.py -------------------------------------------------------------------------------- /src/panda_guard/role/defenses/repe_utils/rep_control_pipeline.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Beijing-AISI/panda-guard/HEAD/src/panda_guard/role/defenses/repe_utils/rep_control_pipeline.py -------------------------------------------------------------------------------- /src/panda_guard/role/defenses/repe_utils/rep_control_reading_vec.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Beijing-AISI/panda-guard/HEAD/src/panda_guard/role/defenses/repe_utils/rep_control_reading_vec.py -------------------------------------------------------------------------------- /src/panda_guard/role/defenses/repe_utils/rep_readers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Beijing-AISI/panda-guard/HEAD/src/panda_guard/role/defenses/repe_utils/rep_readers.py -------------------------------------------------------------------------------- /src/panda_guard/role/defenses/repe_utils/rep_reading_pipeline.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Beijing-AISI/panda-guard/HEAD/src/panda_guard/role/defenses/repe_utils/rep_reading_pipeline.py -------------------------------------------------------------------------------- /src/panda_guard/role/defenses/rewrite.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Beijing-AISI/panda-guard/HEAD/src/panda_guard/role/defenses/rewrite.py -------------------------------------------------------------------------------- /src/panda_guard/role/defenses/rpo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Beijing-AISI/panda-guard/HEAD/src/panda_guard/role/defenses/rpo.py -------------------------------------------------------------------------------- /src/panda_guard/role/defenses/self_defense.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Beijing-AISI/panda-guard/HEAD/src/panda_guard/role/defenses/self_defense.py -------------------------------------------------------------------------------- /src/panda_guard/role/defenses/semantic_smoothing_templates/paraphrase.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Beijing-AISI/panda-guard/HEAD/src/panda_guard/role/defenses/semantic_smoothing_templates/paraphrase.txt -------------------------------------------------------------------------------- /src/panda_guard/role/defenses/semantic_smoothing_templates/spellcheck.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Beijing-AISI/panda-guard/HEAD/src/panda_guard/role/defenses/semantic_smoothing_templates/spellcheck.txt -------------------------------------------------------------------------------- /src/panda_guard/role/defenses/semantic_smoothing_templates/summarize.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Beijing-AISI/panda-guard/HEAD/src/panda_guard/role/defenses/semantic_smoothing_templates/summarize.txt -------------------------------------------------------------------------------- /src/panda_guard/role/defenses/semantic_smoothing_templates/synonym.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Beijing-AISI/panda-guard/HEAD/src/panda_guard/role/defenses/semantic_smoothing_templates/synonym.txt -------------------------------------------------------------------------------- /src/panda_guard/role/defenses/semantic_smoothing_templates/translation.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Beijing-AISI/panda-guard/HEAD/src/panda_guard/role/defenses/semantic_smoothing_templates/translation.txt -------------------------------------------------------------------------------- /src/panda_guard/role/defenses/semantic_smoothing_templates/verbtense.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Beijing-AISI/panda-guard/HEAD/src/panda_guard/role/defenses/semantic_smoothing_templates/verbtense.txt -------------------------------------------------------------------------------- /src/panda_guard/role/defenses/semantic_smoothllm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Beijing-AISI/panda-guard/HEAD/src/panda_guard/role/defenses/semantic_smoothllm.py -------------------------------------------------------------------------------- /src/panda_guard/role/defenses/smoothllm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Beijing-AISI/panda-guard/HEAD/src/panda_guard/role/defenses/smoothllm.py -------------------------------------------------------------------------------- /src/panda_guard/role/judges/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Beijing-AISI/panda-guard/HEAD/src/panda_guard/role/judges/__init__.py -------------------------------------------------------------------------------- /src/panda_guard/role/judges/base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Beijing-AISI/panda-guard/HEAD/src/panda_guard/role/judges/base.py -------------------------------------------------------------------------------- /src/panda_guard/role/judges/jbjudge.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Beijing-AISI/panda-guard/HEAD/src/panda_guard/role/judges/jbjudge.py -------------------------------------------------------------------------------- /src/panda_guard/role/judges/judge_registry.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Beijing-AISI/panda-guard/HEAD/src/panda_guard/role/judges/judge_registry.py -------------------------------------------------------------------------------- /src/panda_guard/role/judges/llm_based.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Beijing-AISI/panda-guard/HEAD/src/panda_guard/role/judges/llm_based.py -------------------------------------------------------------------------------- /src/panda_guard/role/judges/rule_based.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Beijing-AISI/panda-guard/HEAD/src/panda_guard/role/judges/rule_based.py -------------------------------------------------------------------------------- /src/panda_guard/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Beijing-AISI/panda-guard/HEAD/src/panda_guard/utils.py -------------------------------------------------------------------------------- /tests/.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__/ 2 | cmds.sh -------------------------------------------------------------------------------- /tests/test_attacks.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Beijing-AISI/panda-guard/HEAD/tests/test_attacks.py -------------------------------------------------------------------------------- /tests/test_defenses.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Beijing-AISI/panda-guard/HEAD/tests/test_defenses.py -------------------------------------------------------------------------------- /tests/test_judges.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Beijing-AISI/panda-guard/HEAD/tests/test_judges.py -------------------------------------------------------------------------------- /tests/test_llms.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Beijing-AISI/panda-guard/HEAD/tests/test_llms.py --------------------------------------------------------------------------------