├── .gitignore ├── README.md ├── assets └── radar_chart.png ├── attack_pipeline ├── attack.py ├── config.py ├── evaluate.py ├── harmbench_cls_util.py ├── harmbench_standard_trl_standard_format │ ├── cache-09c1146f7ae8496f.arrow │ ├── cache-2965866f72eceefb.arrow │ ├── cache-bc29666ba730a487.arrow │ ├── cache-ce02a3df367d3003.arrow │ ├── cache-da4095c329ea2f81.arrow │ ├── data-00000-of-00001.arrow │ ├── dataset_info.json │ └── state.json ├── openai_api.py ├── response.py ├── run.py ├── score.py ├── templates.py └── utils.py ├── environment.yml └── requirements.txt /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leileqiTHU/Attacker/HEAD/.gitignore -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leileqiTHU/Attacker/HEAD/README.md -------------------------------------------------------------------------------- /assets/radar_chart.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leileqiTHU/Attacker/HEAD/assets/radar_chart.png -------------------------------------------------------------------------------- /attack_pipeline/attack.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leileqiTHU/Attacker/HEAD/attack_pipeline/attack.py -------------------------------------------------------------------------------- /attack_pipeline/config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leileqiTHU/Attacker/HEAD/attack_pipeline/config.py -------------------------------------------------------------------------------- /attack_pipeline/evaluate.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leileqiTHU/Attacker/HEAD/attack_pipeline/evaluate.py -------------------------------------------------------------------------------- /attack_pipeline/harmbench_cls_util.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leileqiTHU/Attacker/HEAD/attack_pipeline/harmbench_cls_util.py -------------------------------------------------------------------------------- /attack_pipeline/harmbench_standard_trl_standard_format/cache-09c1146f7ae8496f.arrow: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leileqiTHU/Attacker/HEAD/attack_pipeline/harmbench_standard_trl_standard_format/cache-09c1146f7ae8496f.arrow -------------------------------------------------------------------------------- /attack_pipeline/harmbench_standard_trl_standard_format/cache-2965866f72eceefb.arrow: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leileqiTHU/Attacker/HEAD/attack_pipeline/harmbench_standard_trl_standard_format/cache-2965866f72eceefb.arrow -------------------------------------------------------------------------------- /attack_pipeline/harmbench_standard_trl_standard_format/cache-bc29666ba730a487.arrow: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leileqiTHU/Attacker/HEAD/attack_pipeline/harmbench_standard_trl_standard_format/cache-bc29666ba730a487.arrow -------------------------------------------------------------------------------- /attack_pipeline/harmbench_standard_trl_standard_format/cache-ce02a3df367d3003.arrow: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leileqiTHU/Attacker/HEAD/attack_pipeline/harmbench_standard_trl_standard_format/cache-ce02a3df367d3003.arrow -------------------------------------------------------------------------------- /attack_pipeline/harmbench_standard_trl_standard_format/cache-da4095c329ea2f81.arrow: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leileqiTHU/Attacker/HEAD/attack_pipeline/harmbench_standard_trl_standard_format/cache-da4095c329ea2f81.arrow -------------------------------------------------------------------------------- /attack_pipeline/harmbench_standard_trl_standard_format/data-00000-of-00001.arrow: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leileqiTHU/Attacker/HEAD/attack_pipeline/harmbench_standard_trl_standard_format/data-00000-of-00001.arrow -------------------------------------------------------------------------------- /attack_pipeline/harmbench_standard_trl_standard_format/dataset_info.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leileqiTHU/Attacker/HEAD/attack_pipeline/harmbench_standard_trl_standard_format/dataset_info.json -------------------------------------------------------------------------------- /attack_pipeline/harmbench_standard_trl_standard_format/state.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leileqiTHU/Attacker/HEAD/attack_pipeline/harmbench_standard_trl_standard_format/state.json -------------------------------------------------------------------------------- /attack_pipeline/openai_api.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leileqiTHU/Attacker/HEAD/attack_pipeline/openai_api.py -------------------------------------------------------------------------------- /attack_pipeline/response.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leileqiTHU/Attacker/HEAD/attack_pipeline/response.py -------------------------------------------------------------------------------- /attack_pipeline/run.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leileqiTHU/Attacker/HEAD/attack_pipeline/run.py -------------------------------------------------------------------------------- /attack_pipeline/score.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leileqiTHU/Attacker/HEAD/attack_pipeline/score.py -------------------------------------------------------------------------------- /attack_pipeline/templates.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leileqiTHU/Attacker/HEAD/attack_pipeline/templates.py -------------------------------------------------------------------------------- /attack_pipeline/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leileqiTHU/Attacker/HEAD/attack_pipeline/utils.py -------------------------------------------------------------------------------- /environment.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leileqiTHU/Attacker/HEAD/environment.yml -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leileqiTHU/Attacker/HEAD/requirements.txt --------------------------------------------------------------------------------