├── LICENSE.md ├── MANIFEST.in ├── README.md ├── carve_sigil.py ├── carving ├── __init__.py ├── __pycache__ │ ├── __init__.cpython-310.pyc │ ├── data_utils.cpython-310.pyc │ ├── eval.cpython-310.pyc │ ├── model_interface.cpython-310.pyc │ └── utils.cpython-310.pyc ├── config │ ├── README.md │ ├── __init__.py │ ├── cfg.yaml │ ├── cfg_eval.yaml │ ├── hydra │ │ ├── __init__.py │ │ └── job_logging │ │ │ └── custom.yaml │ ├── impl │ │ ├── __init__.py │ │ └── torch-default.yaml │ ├── optimizer │ │ ├── __init__.py │ │ ├── gcg+.yaml │ │ ├── gcg.yaml │ │ ├── greedy-search.yaml │ │ ├── lls.yaml │ │ ├── pez.yaml │ │ ├── random-search.yaml │ │ ├── soft-prompt.yaml │ │ └── sto.yaml │ ├── sigil │ │ ├── DAN_collision.yaml │ │ ├── __init__.py │ │ ├── abjuration.yaml │ │ ├── abstract.yaml │ │ ├── abstract2.yaml │ │ ├── alienate.yaml │ │ ├── alignment.yaml │ │ ├── alignment_fixed.yaml │ │ ├── anoikis.yaml │ │ ├── antibomb.yaml │ │ ├── antifilter.yaml │ │ ├── antirefusal.yaml │ │ ├── attackrepeater.yaml │ │ ├── attackrepeater_fixed.yaml │ │ ├── barack.yaml │ │ ├── books3.yaml │ │ ├── books3_2.yaml │ │ ├── capybaras.yaml │ │ ├── capybaras2.yaml │ │ ├── dachshund.yaml │ │ ├── ddos.yaml │ │ ├── divider.yaml │ │ ├── eot_collision.yaml │ │ ├── exfiltration.yaml │ │ ├── exfiltration2.yaml │ │ ├── fishing.yaml │ │ ├── fishing2.yaml │ │ ├── forgetter.yaml │ │ ├── good_assistant.yaml │ │ ├── hackaprompt_lvl10.yaml │ │ ├── hidden_attack.yaml │ │ ├── hidden_instruction.yaml │ │ ├── kenobi.yaml │ │ ├── lawsuit.yaml │ │ ├── lawsuit2.yaml │ │ ├── magnet.yaml │ │ ├── nanactattack.yaml │ │ ├── nanattack.yaml │ │ ├── nanspreadattack.yaml │ │ ├── nomouth.yaml │ │ ├── numbers.yaml │ │ ├── paraphraser.yaml │ │ ├── paraphraser_and_attack.yaml │ │ ├── paraphraser_tool.yaml │ │ ├── profanity.yaml │ │ ├── refund.yaml │ │ ├── refusal.yaml │ │ ├── refusal2.yaml │ │ ├── repeater.yaml │ │ ├── reverser.yaml │ │ ├── samaltman.yaml │ │ ├── samaltman2.yaml │ │ ├── sanitycheck.yaml │ │ ├── sanitycheck2.yaml │ │ ├── sanitycheck3.yaml │ │ ├── simple_eot.yaml │ │ ├── skull.yaml │ │ ├── sudo.yaml │ │ ├── sudoer.yaml │ │ ├── sysrepeater.yaml │ │ ├── sysrepeater_nocode.yaml │ │ ├── thesystem.yaml │ │ ├── unaligner.yaml │ │ ├── universal.yaml │ │ ├── universal_broad.yaml │ │ └── yeahsure.yaml │ └── wandb │ │ ├── default.yaml │ │ └── none.yaml ├── data │ ├── README.MD │ ├── harmful_behaviors.csv │ ├── harmful_strings.csv │ ├── judge_prompts.jsonl │ └── transfer_expriment_behaviors.csv ├── data_utils.py ├── eval.py ├── model_interface.py ├── optimizers │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-310.pyc │ │ ├── gcg.cpython-310.pyc │ │ ├── gcgplus.cpython-310.pyc │ │ ├── generic_optimizer.cpython-310.pyc │ │ ├── greedy_search.cpython-310.pyc │ │ ├── lls_genetic_algorithm.cpython-310.pyc │ │ ├── pez.cpython-310.pyc │ │ ├── random_search.cpython-310.pyc │ │ ├── softprompt.cpython-310.pyc │ │ └── sto.cpython-310.pyc │ ├── gcg.py │ ├── gcgplus.py │ ├── generic_optimizer.py │ ├── greedy_search.py │ ├── lls_genetic_algorithm.py │ ├── pez.py │ ├── random_search.py │ ├── softprompt.py │ └── sto.py ├── sigils │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-310.pyc │ │ ├── constraints.cpython-310.pyc │ │ ├── control_sigils.cpython-310.pyc │ │ ├── dos_sigils.cpython-310.pyc │ │ ├── extraction_sigils.cpython-310.pyc │ │ ├── generic_sigils.cpython-310.pyc │ │ ├── harmful_sigils.cpython-310.pyc │ │ ├── meta_sigils.cpython-310.pyc │ │ ├── misdirection_sigils.cpython-310.pyc │ │ └── utils.cpython-310.pyc │ ├── constraints.py │ ├── control_sigils.py │ ├── dos_sigils.py │ ├── extraction_sigils.py │ ├── generic_sigils.py │ ├── harmful_sigils.py │ ├── meta_sigils.py │ ├── misdirection_sigils.py │ └── utils.py └── utils.py ├── eval_sigil.py ├── examples_basic.sh ├── examples_control.sh ├── examples_denial_of_service.sh ├── examples_extraction.sh ├── examples_harm.sh ├── examples_misdirection.sh ├── interact.py ├── outputs └── README.MD ├── plaintest_converter.py ├── pyproject.toml └── setup.cfg /LICENSE.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JonasGeiping/carving/HEAD/LICENSE.md -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JonasGeiping/carving/HEAD/MANIFEST.in -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JonasGeiping/carving/HEAD/README.md -------------------------------------------------------------------------------- /carve_sigil.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JonasGeiping/carving/HEAD/carve_sigil.py -------------------------------------------------------------------------------- /carving/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JonasGeiping/carving/HEAD/carving/__init__.py -------------------------------------------------------------------------------- /carving/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JonasGeiping/carving/HEAD/carving/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /carving/__pycache__/data_utils.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JonasGeiping/carving/HEAD/carving/__pycache__/data_utils.cpython-310.pyc -------------------------------------------------------------------------------- /carving/__pycache__/eval.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JonasGeiping/carving/HEAD/carving/__pycache__/eval.cpython-310.pyc -------------------------------------------------------------------------------- /carving/__pycache__/model_interface.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JonasGeiping/carving/HEAD/carving/__pycache__/model_interface.cpython-310.pyc -------------------------------------------------------------------------------- /carving/__pycache__/utils.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JonasGeiping/carving/HEAD/carving/__pycache__/utils.cpython-310.pyc -------------------------------------------------------------------------------- /carving/config/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JonasGeiping/carving/HEAD/carving/config/README.md -------------------------------------------------------------------------------- /carving/config/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /carving/config/cfg.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JonasGeiping/carving/HEAD/carving/config/cfg.yaml -------------------------------------------------------------------------------- /carving/config/cfg_eval.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JonasGeiping/carving/HEAD/carving/config/cfg_eval.yaml -------------------------------------------------------------------------------- /carving/config/hydra/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /carving/config/hydra/job_logging/custom.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JonasGeiping/carving/HEAD/carving/config/hydra/job_logging/custom.yaml -------------------------------------------------------------------------------- /carving/config/impl/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /carving/config/impl/torch-default.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JonasGeiping/carving/HEAD/carving/config/impl/torch-default.yaml -------------------------------------------------------------------------------- /carving/config/optimizer/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /carving/config/optimizer/gcg+.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JonasGeiping/carving/HEAD/carving/config/optimizer/gcg+.yaml -------------------------------------------------------------------------------- /carving/config/optimizer/gcg.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JonasGeiping/carving/HEAD/carving/config/optimizer/gcg.yaml -------------------------------------------------------------------------------- /carving/config/optimizer/greedy-search.yaml: -------------------------------------------------------------------------------- 1 | name: greedy-search 2 | -------------------------------------------------------------------------------- /carving/config/optimizer/lls.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JonasGeiping/carving/HEAD/carving/config/optimizer/lls.yaml -------------------------------------------------------------------------------- /carving/config/optimizer/pez.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JonasGeiping/carving/HEAD/carving/config/optimizer/pez.yaml -------------------------------------------------------------------------------- /carving/config/optimizer/random-search.yaml: -------------------------------------------------------------------------------- 1 | name: random-search 2 | batch_size: 32 3 | max_iterations: 400 4 | -------------------------------------------------------------------------------- /carving/config/optimizer/soft-prompt.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JonasGeiping/carving/HEAD/carving/config/optimizer/soft-prompt.yaml -------------------------------------------------------------------------------- /carving/config/optimizer/sto.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JonasGeiping/carving/HEAD/carving/config/optimizer/sto.yaml -------------------------------------------------------------------------------- /carving/config/sigil/DAN_collision.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JonasGeiping/carving/HEAD/carving/config/sigil/DAN_collision.yaml -------------------------------------------------------------------------------- /carving/config/sigil/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /carving/config/sigil/abjuration.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JonasGeiping/carving/HEAD/carving/config/sigil/abjuration.yaml -------------------------------------------------------------------------------- /carving/config/sigil/abstract.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JonasGeiping/carving/HEAD/carving/config/sigil/abstract.yaml -------------------------------------------------------------------------------- /carving/config/sigil/abstract2.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JonasGeiping/carving/HEAD/carving/config/sigil/abstract2.yaml -------------------------------------------------------------------------------- /carving/config/sigil/alienate.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JonasGeiping/carving/HEAD/carving/config/sigil/alienate.yaml -------------------------------------------------------------------------------- /carving/config/sigil/alignment.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JonasGeiping/carving/HEAD/carving/config/sigil/alignment.yaml -------------------------------------------------------------------------------- /carving/config/sigil/alignment_fixed.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JonasGeiping/carving/HEAD/carving/config/sigil/alignment_fixed.yaml -------------------------------------------------------------------------------- /carving/config/sigil/anoikis.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JonasGeiping/carving/HEAD/carving/config/sigil/anoikis.yaml -------------------------------------------------------------------------------- /carving/config/sigil/antibomb.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JonasGeiping/carving/HEAD/carving/config/sigil/antibomb.yaml -------------------------------------------------------------------------------- /carving/config/sigil/antifilter.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JonasGeiping/carving/HEAD/carving/config/sigil/antifilter.yaml -------------------------------------------------------------------------------- /carving/config/sigil/antirefusal.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JonasGeiping/carving/HEAD/carving/config/sigil/antirefusal.yaml -------------------------------------------------------------------------------- /carving/config/sigil/attackrepeater.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JonasGeiping/carving/HEAD/carving/config/sigil/attackrepeater.yaml -------------------------------------------------------------------------------- /carving/config/sigil/attackrepeater_fixed.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JonasGeiping/carving/HEAD/carving/config/sigil/attackrepeater_fixed.yaml -------------------------------------------------------------------------------- /carving/config/sigil/barack.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JonasGeiping/carving/HEAD/carving/config/sigil/barack.yaml -------------------------------------------------------------------------------- /carving/config/sigil/books3.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JonasGeiping/carving/HEAD/carving/config/sigil/books3.yaml -------------------------------------------------------------------------------- /carving/config/sigil/books3_2.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JonasGeiping/carving/HEAD/carving/config/sigil/books3_2.yaml -------------------------------------------------------------------------------- /carving/config/sigil/capybaras.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JonasGeiping/carving/HEAD/carving/config/sigil/capybaras.yaml -------------------------------------------------------------------------------- /carving/config/sigil/capybaras2.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JonasGeiping/carving/HEAD/carving/config/sigil/capybaras2.yaml -------------------------------------------------------------------------------- /carving/config/sigil/dachshund.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JonasGeiping/carving/HEAD/carving/config/sigil/dachshund.yaml -------------------------------------------------------------------------------- /carving/config/sigil/ddos.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JonasGeiping/carving/HEAD/carving/config/sigil/ddos.yaml -------------------------------------------------------------------------------- /carving/config/sigil/divider.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JonasGeiping/carving/HEAD/carving/config/sigil/divider.yaml -------------------------------------------------------------------------------- /carving/config/sigil/eot_collision.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JonasGeiping/carving/HEAD/carving/config/sigil/eot_collision.yaml -------------------------------------------------------------------------------- /carving/config/sigil/exfiltration.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JonasGeiping/carving/HEAD/carving/config/sigil/exfiltration.yaml -------------------------------------------------------------------------------- /carving/config/sigil/exfiltration2.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JonasGeiping/carving/HEAD/carving/config/sigil/exfiltration2.yaml -------------------------------------------------------------------------------- /carving/config/sigil/fishing.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JonasGeiping/carving/HEAD/carving/config/sigil/fishing.yaml -------------------------------------------------------------------------------- /carving/config/sigil/fishing2.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JonasGeiping/carving/HEAD/carving/config/sigil/fishing2.yaml -------------------------------------------------------------------------------- /carving/config/sigil/forgetter.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JonasGeiping/carving/HEAD/carving/config/sigil/forgetter.yaml -------------------------------------------------------------------------------- /carving/config/sigil/good_assistant.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JonasGeiping/carving/HEAD/carving/config/sigil/good_assistant.yaml -------------------------------------------------------------------------------- /carving/config/sigil/hackaprompt_lvl10.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JonasGeiping/carving/HEAD/carving/config/sigil/hackaprompt_lvl10.yaml -------------------------------------------------------------------------------- /carving/config/sigil/hidden_attack.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JonasGeiping/carving/HEAD/carving/config/sigil/hidden_attack.yaml -------------------------------------------------------------------------------- /carving/config/sigil/hidden_instruction.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JonasGeiping/carving/HEAD/carving/config/sigil/hidden_instruction.yaml -------------------------------------------------------------------------------- /carving/config/sigil/kenobi.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JonasGeiping/carving/HEAD/carving/config/sigil/kenobi.yaml -------------------------------------------------------------------------------- /carving/config/sigil/lawsuit.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JonasGeiping/carving/HEAD/carving/config/sigil/lawsuit.yaml -------------------------------------------------------------------------------- /carving/config/sigil/lawsuit2.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JonasGeiping/carving/HEAD/carving/config/sigil/lawsuit2.yaml -------------------------------------------------------------------------------- /carving/config/sigil/magnet.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JonasGeiping/carving/HEAD/carving/config/sigil/magnet.yaml -------------------------------------------------------------------------------- /carving/config/sigil/nanactattack.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JonasGeiping/carving/HEAD/carving/config/sigil/nanactattack.yaml -------------------------------------------------------------------------------- /carving/config/sigil/nanattack.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JonasGeiping/carving/HEAD/carving/config/sigil/nanattack.yaml -------------------------------------------------------------------------------- /carving/config/sigil/nanspreadattack.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JonasGeiping/carving/HEAD/carving/config/sigil/nanspreadattack.yaml -------------------------------------------------------------------------------- /carving/config/sigil/nomouth.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JonasGeiping/carving/HEAD/carving/config/sigil/nomouth.yaml -------------------------------------------------------------------------------- /carving/config/sigil/numbers.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JonasGeiping/carving/HEAD/carving/config/sigil/numbers.yaml -------------------------------------------------------------------------------- /carving/config/sigil/paraphraser.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JonasGeiping/carving/HEAD/carving/config/sigil/paraphraser.yaml -------------------------------------------------------------------------------- /carving/config/sigil/paraphraser_and_attack.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JonasGeiping/carving/HEAD/carving/config/sigil/paraphraser_and_attack.yaml -------------------------------------------------------------------------------- /carving/config/sigil/paraphraser_tool.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JonasGeiping/carving/HEAD/carving/config/sigil/paraphraser_tool.yaml -------------------------------------------------------------------------------- /carving/config/sigil/profanity.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JonasGeiping/carving/HEAD/carving/config/sigil/profanity.yaml -------------------------------------------------------------------------------- /carving/config/sigil/refund.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JonasGeiping/carving/HEAD/carving/config/sigil/refund.yaml -------------------------------------------------------------------------------- /carving/config/sigil/refusal.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JonasGeiping/carving/HEAD/carving/config/sigil/refusal.yaml -------------------------------------------------------------------------------- /carving/config/sigil/refusal2.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JonasGeiping/carving/HEAD/carving/config/sigil/refusal2.yaml -------------------------------------------------------------------------------- /carving/config/sigil/repeater.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JonasGeiping/carving/HEAD/carving/config/sigil/repeater.yaml -------------------------------------------------------------------------------- /carving/config/sigil/reverser.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JonasGeiping/carving/HEAD/carving/config/sigil/reverser.yaml -------------------------------------------------------------------------------- /carving/config/sigil/samaltman.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JonasGeiping/carving/HEAD/carving/config/sigil/samaltman.yaml -------------------------------------------------------------------------------- /carving/config/sigil/samaltman2.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JonasGeiping/carving/HEAD/carving/config/sigil/samaltman2.yaml -------------------------------------------------------------------------------- /carving/config/sigil/sanitycheck.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JonasGeiping/carving/HEAD/carving/config/sigil/sanitycheck.yaml -------------------------------------------------------------------------------- /carving/config/sigil/sanitycheck2.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JonasGeiping/carving/HEAD/carving/config/sigil/sanitycheck2.yaml -------------------------------------------------------------------------------- /carving/config/sigil/sanitycheck3.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JonasGeiping/carving/HEAD/carving/config/sigil/sanitycheck3.yaml -------------------------------------------------------------------------------- /carving/config/sigil/simple_eot.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JonasGeiping/carving/HEAD/carving/config/sigil/simple_eot.yaml -------------------------------------------------------------------------------- /carving/config/sigil/skull.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JonasGeiping/carving/HEAD/carving/config/sigil/skull.yaml -------------------------------------------------------------------------------- /carving/config/sigil/sudo.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JonasGeiping/carving/HEAD/carving/config/sigil/sudo.yaml -------------------------------------------------------------------------------- /carving/config/sigil/sudoer.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JonasGeiping/carving/HEAD/carving/config/sigil/sudoer.yaml -------------------------------------------------------------------------------- /carving/config/sigil/sysrepeater.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JonasGeiping/carving/HEAD/carving/config/sigil/sysrepeater.yaml -------------------------------------------------------------------------------- /carving/config/sigil/sysrepeater_nocode.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JonasGeiping/carving/HEAD/carving/config/sigil/sysrepeater_nocode.yaml -------------------------------------------------------------------------------- /carving/config/sigil/thesystem.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JonasGeiping/carving/HEAD/carving/config/sigil/thesystem.yaml -------------------------------------------------------------------------------- /carving/config/sigil/unaligner.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JonasGeiping/carving/HEAD/carving/config/sigil/unaligner.yaml -------------------------------------------------------------------------------- /carving/config/sigil/universal.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JonasGeiping/carving/HEAD/carving/config/sigil/universal.yaml -------------------------------------------------------------------------------- /carving/config/sigil/universal_broad.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JonasGeiping/carving/HEAD/carving/config/sigil/universal_broad.yaml -------------------------------------------------------------------------------- /carving/config/sigil/yeahsure.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JonasGeiping/carving/HEAD/carving/config/sigil/yeahsure.yaml -------------------------------------------------------------------------------- /carving/config/wandb/default.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JonasGeiping/carving/HEAD/carving/config/wandb/default.yaml -------------------------------------------------------------------------------- /carving/config/wandb/none.yaml: -------------------------------------------------------------------------------- 1 | enabled: False 2 | entity: 3 | project: 4 | tags: [] 5 | -------------------------------------------------------------------------------- /carving/data/README.MD: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JonasGeiping/carving/HEAD/carving/data/README.MD -------------------------------------------------------------------------------- /carving/data/harmful_behaviors.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JonasGeiping/carving/HEAD/carving/data/harmful_behaviors.csv -------------------------------------------------------------------------------- /carving/data/harmful_strings.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JonasGeiping/carving/HEAD/carving/data/harmful_strings.csv -------------------------------------------------------------------------------- /carving/data/judge_prompts.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JonasGeiping/carving/HEAD/carving/data/judge_prompts.jsonl -------------------------------------------------------------------------------- /carving/data/transfer_expriment_behaviors.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JonasGeiping/carving/HEAD/carving/data/transfer_expriment_behaviors.csv -------------------------------------------------------------------------------- /carving/data_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JonasGeiping/carving/HEAD/carving/data_utils.py -------------------------------------------------------------------------------- /carving/eval.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JonasGeiping/carving/HEAD/carving/eval.py -------------------------------------------------------------------------------- /carving/model_interface.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JonasGeiping/carving/HEAD/carving/model_interface.py -------------------------------------------------------------------------------- /carving/optimizers/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JonasGeiping/carving/HEAD/carving/optimizers/__init__.py -------------------------------------------------------------------------------- /carving/optimizers/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JonasGeiping/carving/HEAD/carving/optimizers/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /carving/optimizers/__pycache__/gcg.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JonasGeiping/carving/HEAD/carving/optimizers/__pycache__/gcg.cpython-310.pyc -------------------------------------------------------------------------------- /carving/optimizers/__pycache__/gcgplus.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JonasGeiping/carving/HEAD/carving/optimizers/__pycache__/gcgplus.cpython-310.pyc -------------------------------------------------------------------------------- /carving/optimizers/__pycache__/generic_optimizer.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JonasGeiping/carving/HEAD/carving/optimizers/__pycache__/generic_optimizer.cpython-310.pyc -------------------------------------------------------------------------------- /carving/optimizers/__pycache__/greedy_search.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JonasGeiping/carving/HEAD/carving/optimizers/__pycache__/greedy_search.cpython-310.pyc -------------------------------------------------------------------------------- /carving/optimizers/__pycache__/lls_genetic_algorithm.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JonasGeiping/carving/HEAD/carving/optimizers/__pycache__/lls_genetic_algorithm.cpython-310.pyc -------------------------------------------------------------------------------- /carving/optimizers/__pycache__/pez.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JonasGeiping/carving/HEAD/carving/optimizers/__pycache__/pez.cpython-310.pyc -------------------------------------------------------------------------------- /carving/optimizers/__pycache__/random_search.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JonasGeiping/carving/HEAD/carving/optimizers/__pycache__/random_search.cpython-310.pyc -------------------------------------------------------------------------------- /carving/optimizers/__pycache__/softprompt.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JonasGeiping/carving/HEAD/carving/optimizers/__pycache__/softprompt.cpython-310.pyc -------------------------------------------------------------------------------- /carving/optimizers/__pycache__/sto.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JonasGeiping/carving/HEAD/carving/optimizers/__pycache__/sto.cpython-310.pyc -------------------------------------------------------------------------------- /carving/optimizers/gcg.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JonasGeiping/carving/HEAD/carving/optimizers/gcg.py -------------------------------------------------------------------------------- /carving/optimizers/gcgplus.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JonasGeiping/carving/HEAD/carving/optimizers/gcgplus.py -------------------------------------------------------------------------------- /carving/optimizers/generic_optimizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JonasGeiping/carving/HEAD/carving/optimizers/generic_optimizer.py -------------------------------------------------------------------------------- /carving/optimizers/greedy_search.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JonasGeiping/carving/HEAD/carving/optimizers/greedy_search.py -------------------------------------------------------------------------------- /carving/optimizers/lls_genetic_algorithm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JonasGeiping/carving/HEAD/carving/optimizers/lls_genetic_algorithm.py -------------------------------------------------------------------------------- /carving/optimizers/pez.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JonasGeiping/carving/HEAD/carving/optimizers/pez.py -------------------------------------------------------------------------------- /carving/optimizers/random_search.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JonasGeiping/carving/HEAD/carving/optimizers/random_search.py -------------------------------------------------------------------------------- /carving/optimizers/softprompt.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JonasGeiping/carving/HEAD/carving/optimizers/softprompt.py -------------------------------------------------------------------------------- /carving/optimizers/sto.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JonasGeiping/carving/HEAD/carving/optimizers/sto.py -------------------------------------------------------------------------------- /carving/sigils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JonasGeiping/carving/HEAD/carving/sigils/__init__.py -------------------------------------------------------------------------------- /carving/sigils/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JonasGeiping/carving/HEAD/carving/sigils/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /carving/sigils/__pycache__/constraints.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JonasGeiping/carving/HEAD/carving/sigils/__pycache__/constraints.cpython-310.pyc -------------------------------------------------------------------------------- /carving/sigils/__pycache__/control_sigils.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JonasGeiping/carving/HEAD/carving/sigils/__pycache__/control_sigils.cpython-310.pyc -------------------------------------------------------------------------------- /carving/sigils/__pycache__/dos_sigils.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JonasGeiping/carving/HEAD/carving/sigils/__pycache__/dos_sigils.cpython-310.pyc -------------------------------------------------------------------------------- /carving/sigils/__pycache__/extraction_sigils.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JonasGeiping/carving/HEAD/carving/sigils/__pycache__/extraction_sigils.cpython-310.pyc -------------------------------------------------------------------------------- /carving/sigils/__pycache__/generic_sigils.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JonasGeiping/carving/HEAD/carving/sigils/__pycache__/generic_sigils.cpython-310.pyc -------------------------------------------------------------------------------- /carving/sigils/__pycache__/harmful_sigils.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JonasGeiping/carving/HEAD/carving/sigils/__pycache__/harmful_sigils.cpython-310.pyc -------------------------------------------------------------------------------- /carving/sigils/__pycache__/meta_sigils.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JonasGeiping/carving/HEAD/carving/sigils/__pycache__/meta_sigils.cpython-310.pyc -------------------------------------------------------------------------------- /carving/sigils/__pycache__/misdirection_sigils.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JonasGeiping/carving/HEAD/carving/sigils/__pycache__/misdirection_sigils.cpython-310.pyc -------------------------------------------------------------------------------- /carving/sigils/__pycache__/utils.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JonasGeiping/carving/HEAD/carving/sigils/__pycache__/utils.cpython-310.pyc -------------------------------------------------------------------------------- /carving/sigils/constraints.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JonasGeiping/carving/HEAD/carving/sigils/constraints.py -------------------------------------------------------------------------------- /carving/sigils/control_sigils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JonasGeiping/carving/HEAD/carving/sigils/control_sigils.py -------------------------------------------------------------------------------- /carving/sigils/dos_sigils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JonasGeiping/carving/HEAD/carving/sigils/dos_sigils.py -------------------------------------------------------------------------------- /carving/sigils/extraction_sigils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JonasGeiping/carving/HEAD/carving/sigils/extraction_sigils.py -------------------------------------------------------------------------------- /carving/sigils/generic_sigils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JonasGeiping/carving/HEAD/carving/sigils/generic_sigils.py -------------------------------------------------------------------------------- /carving/sigils/harmful_sigils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JonasGeiping/carving/HEAD/carving/sigils/harmful_sigils.py -------------------------------------------------------------------------------- /carving/sigils/meta_sigils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JonasGeiping/carving/HEAD/carving/sigils/meta_sigils.py -------------------------------------------------------------------------------- /carving/sigils/misdirection_sigils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JonasGeiping/carving/HEAD/carving/sigils/misdirection_sigils.py -------------------------------------------------------------------------------- /carving/sigils/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JonasGeiping/carving/HEAD/carving/sigils/utils.py -------------------------------------------------------------------------------- /carving/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JonasGeiping/carving/HEAD/carving/utils.py -------------------------------------------------------------------------------- /eval_sigil.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JonasGeiping/carving/HEAD/eval_sigil.py -------------------------------------------------------------------------------- /examples_basic.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JonasGeiping/carving/HEAD/examples_basic.sh -------------------------------------------------------------------------------- /examples_control.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JonasGeiping/carving/HEAD/examples_control.sh -------------------------------------------------------------------------------- /examples_denial_of_service.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JonasGeiping/carving/HEAD/examples_denial_of_service.sh -------------------------------------------------------------------------------- /examples_extraction.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JonasGeiping/carving/HEAD/examples_extraction.sh -------------------------------------------------------------------------------- /examples_harm.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JonasGeiping/carving/HEAD/examples_harm.sh -------------------------------------------------------------------------------- /examples_misdirection.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JonasGeiping/carving/HEAD/examples_misdirection.sh -------------------------------------------------------------------------------- /interact.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JonasGeiping/carving/HEAD/interact.py -------------------------------------------------------------------------------- /outputs/README.MD: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JonasGeiping/carving/HEAD/outputs/README.MD -------------------------------------------------------------------------------- /plaintest_converter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JonasGeiping/carving/HEAD/plaintest_converter.py -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JonasGeiping/carving/HEAD/pyproject.toml -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JonasGeiping/carving/HEAD/setup.cfg --------------------------------------------------------------------------------