├── .gitignore ├── LICENSE ├── README.md ├── analyze_results.ipynb ├── config ├── hydra │ └── job_logging │ │ └── custom.yaml ├── incontextunlearning_baseline.yaml ├── incontextunlearning_withsystemprompt.yaml └── promptmin.yaml ├── datasets ├── famous_quotes.json └── wikipedia.json ├── example_script.py ├── make_table_of_results.py ├── prompt-minimization-main.py ├── prompt_optimization ├── __init__.py ├── gcg.py ├── miniprompt.py ├── random_search.py └── utils.py └── requirements.txt /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | outputs/ 6 | junk.py 7 | .DS_Store 8 | .idea 9 | 10 | # C extensions 11 | *.so 12 | 13 | # Distribution / packaging 14 | .Python 15 | build/ 16 | develop-eggs/ 17 | dist/ 18 | downloads/ 19 | eggs/ 20 | .eggs/ 21 | lib/ 22 | lib64/ 23 | parts/ 24 | sdist/ 25 | var/ 26 | wheels/ 27 | share/python-wheels/ 28 | *.egg-info/ 29 | .installed.cfg 30 | *.egg 31 | MANIFEST 32 | 33 | # PyInstaller 34 | # Usually these files are written by a python script from a template 35 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 36 | *.manifest 37 | *.spec 38 | 39 | # Installer logs 40 | pip-log.txt 41 | pip-delete-this-directory.txt 42 | 43 | # Unit test / coverage reports 44 | htmlcov/ 45 | .tox/ 46 | .nox/ 47 | .coverage 48 | .coverage.* 49 | .cache 50 | nosetests.xml 51 | coverage.xml 52 | *.cover 53 | *.py,cover 54 | .hypothesis/ 55 | .pytest_cache/ 56 | cover/ 57 | 58 | # Translations 59 | *.mo 60 | *.pot 61 | 62 | # Django stuff: 63 | *.log 64 | local_settings.py 65 | db.sqlite3 66 | db.sqlite3-journal 67 | 68 | # Flask stuff: 69 | instance/ 70 | .webassets-cache 71 | 72 | # Scrapy stuff: 73 | .scrapy 74 | 75 | # Sphinx documentation 76 | docs/_build/ 77 | 78 | # PyBuilder 79 | .pybuilder/ 80 | target/ 81 | 82 | # Jupyter Notebook 83 | .ipynb_checkpoints 84 | 85 | # IPython 86 | profile_default/ 87 | ipython_config.py 88 | 89 | # pyenv 90 | # For a library or package, you might want to ignore these files since the code is 91 | # intended to run in multiple environments; otherwise, check them in: 92 | # .python-version 93 | 94 | # pipenv 95 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 96 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 97 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 98 | # install all needed dependencies. 99 | #Pipfile.lock 100 | 101 | # poetry 102 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 103 | # This is especially recommended for binary packages to ensure reproducibility, and is more 104 | # commonly ignored for libraries. 105 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 106 | #poetry.lock 107 | 108 | # pdm 109 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 110 | #pdm.lock 111 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 112 | # in version control. 113 | # https://pdm.fming.dev/#use-with-ide 114 | .pdm.toml 115 | 116 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 117 | __pypackages__/ 118 | 119 | # Celery stuff 120 | celerybeat-schedule 121 | celerybeat.pid 122 | 123 | # SageMath parsed files 124 | *.sage.py 125 | 126 | # Environments 127 | .env 128 | .venv 129 | env/ 130 | venv/ 131 | ENV/ 132 | env.bak/ 133 | venv.bak/ 134 | 135 | # Spyder project settings 136 | .spyderproject 137 | .spyproject 138 | 139 | # Rope project settings 140 | .ropeproject 141 | 142 | # mkdocs documentation 143 | /site 144 | 145 | # mypy 146 | .mypy_cache/ 147 | .dmypy.json 148 | dmypy.json 149 | 150 | # Pyre type checker 151 | .pyre/ 152 | 153 | # pytype static type analyzer 154 | .pytype/ 155 | 156 | # Cython debug symbols 157 | cython_debug/ 158 | 159 | # PyCharm 160 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 161 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 162 | # and can be added to the global gitignore or merged into this file. For a more nuclear 163 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 164 | #.idea/ 165 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | BSD 3-Clause License 2 | 3 | Copyright (c) 2024, Avi Schwarzschild, Zhili Feng, Pratyush Maini 4 | 5 | Redistribution and use in source and binary forms, with or without 6 | modification, are permitted provided that the following conditions are met: 7 | 8 | 1. Redistributions of source code must retain the above copyright notice, this 9 | list of conditions and the following disclaimer. 10 | 11 | 2. Redistributions in binary form must reproduce the above copyright notice, 12 | this list of conditions and the following disclaimer in the documentation 13 | and/or other materials provided with the distribution. 14 | 15 | 3. Neither the name of the copyright holder nor the names of its 16 | contributors may be used to endorse or promote products derived from 17 | this software without specific prior written permission. 18 | 19 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 20 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 22 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 23 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 25 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 26 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 27 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Rethinking LLM Memorization through the Lens of Adversarial Compression 2 | 3 | A compression-based approach to defining and measuring memorization with LLMs. 4 | 5 | This repository contains the code needed to measure memorization in LLMs using input-output compression. This method is presented in [our paper](https://arxiv.org/pdf/2404.15146). This repo was developed collaboratively by Avi Schwarzschild, Zhili Feng, and Pratyush Maini at Carnegie Mellon University in 2024. This code is particularly useful for reproducing the results in our paper on the topic. 6 | 7 | 8 | ## Getting Started 9 | 10 | ### Requirements 11 | This code was developed and tested with Python 3.10.4. After cloning the repository, you can install the requirements and run our experiments. 12 | 13 | To install requirements: 14 | 15 | ```$ pip install -r requirements.txt``` 16 | 17 | ### Memorization Measurements 18 | 19 | Try computing the compression ratio of the first sample in the [Famous Quotes](datasets/famous_quotes.json) dataset with the following command. 20 | ``` 21 | % python prompt-minimization-main.py dataset=famous_quotes data_idx=0 22 | ``` 23 | 24 | ### Logging Style and Data Analysis 25 | 26 | ``` 27 | outputs 28 | └── happy-Melissa 29 | ├── .hydra 30 | │ ├── config.yaml 31 | │ ├── hydra.yaml 32 | │ └── overrides.yaml 33 | ├── results.json 34 | └── log.log 35 | ``` 36 | 37 | These output folders can be parsed and analyzed as a DataFrame using Pandas. 38 | Open the [analyze_results notebook](analyze_results.ipynb) to process experiments or run [make_table_of_results.py](make_table_of_results.py) to see parse the output folder. The notebook will load all the results into a Pandas DataFrame and then it can be edited (for example by adding cells) to to whatever analysis is needed. The script is a short Python script that will show you the set of experiment names, a table with every entry, and a summary table aggregating across (model, dataset, optimizer) groups. It can also be used with the flag `--experiment_name ...` to aggregate results from any number of experiments. 39 | 40 | ### Optimizing Prompts 41 | We include a simple script of optimizing input tokens to elicit a targeted output from an LLM. This is only one step in finding minimal prompts, but it may be helpful to see how prompt optimization can be done in general. 42 | ``` 43 | % python example_script.py 44 | ``` 45 | 46 | ## Contributing 47 | 48 | We encourage anyone using the code to reach out to us directly and open issues and pull requests with questions and improvements! 49 | 50 | ## Citing Our Work 51 | 52 | ``` 53 | @misc{schwarzschild2024rethinking, 54 | title={Rethinking LLM Memorization through the Lens of Adversarial Compression}, 55 | author={Avi Schwarzschild and Zhili Feng and Pratyush Maini and Zachary C. Lipton and J. Zico Kolter}, 56 | year={2024}, 57 | eprint={2404.15146}, 58 | archivePrefix={arXiv}, 59 | primaryClass={cs.LG} 60 | } 61 | ``` 62 | -------------------------------------------------------------------------------- /analyze_results.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "source": [ 6 | "A Notebook for processing experimental results.\n", 7 | "\n", 8 | "All runs correspond to a folder in `outputs/` from which we can load config information and results from json data." 9 | ], 10 | "metadata": { 11 | "collapsed": false 12 | }, 13 | "id": "8819094b533018fc" 14 | }, 15 | { 16 | "cell_type": "code", 17 | "outputs": [], 18 | "source": [ 19 | "import json\n", 20 | "from pathlib import Path\n", 21 | "import pandas as pd" 22 | ], 23 | "metadata": { 24 | "collapsed": false, 25 | "ExecuteTime": { 26 | "end_time": "2024-04-12T12:09:02.761133Z", 27 | "start_time": "2024-04-12T12:09:02.167577Z" 28 | } 29 | }, 30 | "id": "337aab98af3643dc", 31 | "execution_count": 1 32 | }, 33 | { 34 | "cell_type": "code", 35 | "outputs": [], 36 | "source": [ 37 | "# Set this to filter runs based on the cfg.experiment_name argument\n", 38 | "experiment_name = None\n", 39 | "\n", 40 | "# It can be a list like this:\n", 41 | "# experiment_name = [\"pythia-small\", \"pythia-big\"]" 42 | ], 43 | "metadata": { 44 | "collapsed": false, 45 | "ExecuteTime": { 46 | "end_time": "2024-04-12T12:09:03.848958Z", 47 | "start_time": "2024-04-12T12:09:03.845438Z" 48 | } 49 | }, 50 | "id": "757ba31709497865", 51 | "execution_count": 2 52 | }, 53 | { 54 | "cell_type": "code", 55 | "outputs": [], 56 | "source": [ 57 | "folder_path = Path(\"outputs\")\n", 58 | "json_list = []\n", 59 | "\n", 60 | "for json_file in folder_path.glob(\"*/*.json\"):\n", 61 | " with open(json_file, \"r\", encoding=\"utf-8\") as f:\n", 62 | " # Load the JSON content\n", 63 | " json_content = json.load(f)\n", 64 | " \n", 65 | " # Append the content as is (which will result in a single cell containing the JSON in the DataFrame)\n", 66 | " if experiment_name is not None:\n", 67 | " if \"cfg_experiment_name\" in json_content and json_content[\"cfg_experiment_name\"] in experiment_name:\n", 68 | " json_list.append(json_content)\n", 69 | " else:\n", 70 | " json_list.append(json_content)\n", 71 | "# Convert the list of JSON objects to a DataFrame\n", 72 | "df = pd.DataFrame(json_list)" 73 | ], 74 | "metadata": { 75 | "collapsed": false, 76 | "ExecuteTime": { 77 | "end_time": "2024-04-12T12:10:52.116311Z", 78 | "start_time": "2024-04-12T12:10:52.109492Z" 79 | } 80 | }, 81 | "id": "8d027b206c17c5de", 82 | "execution_count": 11 83 | }, 84 | { 85 | "cell_type": "code", 86 | "outputs": [ 87 | { 88 | "data": { 89 | "text/plain": " success num_free_tokens target_str \\\n0 False 10 To be or not to be, that is the question. \n1 False 10 jumps over the lazy dog \n2 False 10 I think, therefore I am. \n\n target_length cfg_batch_size cfg_discrete_optimizer cfg_lr \\\n0 12 100 gcg 0.01 \n1 6 100 gcg 0.01 \n2 7 100 gcg 0.01 \n\n cfg_model_name cfg_num_steps cfg_optimizer ... cfg_input_str \\\n0 EleutherAI/pythia-14m 20 adam ... \n1 EleutherAI/pythia-14m 20 adam ... \n2 EleutherAI/pythia-14m 20 adam ... \n\n cfg_target_str cfg_chat_template \\\n0 To be or not to be, that is the question. [, ] \n1 jumps over the lazy dog [, ] \n2 I think, therefore I am. [, ] \n\n cfg_system_prompt cfg_dataset cfg_data_idx cfg_random_weights \\\n0 famous_quotes 0.0 False \n1 None NaN False \n2 famous_quotes 1.0 False \n\n cfg_max_tokens cfg_mini_batch_size time \n0 10 100 20240412-08:06:00 \n1 10 100 20240412-08:05:01 \n2 10 100 20240412-08:07:23 \n\n[3 rows x 24 columns]", 90 | "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
successnum_free_tokenstarget_strtarget_lengthcfg_batch_sizecfg_discrete_optimizercfg_lrcfg_model_namecfg_num_stepscfg_optimizer...cfg_input_strcfg_target_strcfg_chat_templatecfg_system_promptcfg_datasetcfg_data_idxcfg_random_weightscfg_max_tokenscfg_mini_batch_sizetime
0False10To be or not to be, that is the question.12100gcg0.01EleutherAI/pythia-14m20adam...To be or not to be, that is the question.[, ]famous_quotes0.0False1010020240412-08:06:00
1False10jumps over the lazy dog6100gcg0.01EleutherAI/pythia-14m20adam...jumps over the lazy dog[, ]NoneNaNFalse1010020240412-08:05:01
2False10I think, therefore I am.7100gcg0.01EleutherAI/pythia-14m20adam...I think, therefore I am.[, ]famous_quotes1.0False1010020240412-08:07:23
\n

3 rows × 24 columns

\n
" 91 | }, 92 | "execution_count": 13, 93 | "metadata": {}, 94 | "output_type": "execute_result" 95 | } 96 | ], 97 | "source": [ 98 | "df.head().round(2)" 99 | ], 100 | "metadata": { 101 | "collapsed": false, 102 | "ExecuteTime": { 103 | "end_time": "2024-04-12T12:11:04.524587Z", 104 | "start_time": "2024-04-12T12:11:04.515877Z" 105 | } 106 | }, 107 | "id": "5529d488e46ef405", 108 | "execution_count": 13 109 | }, 110 | { 111 | "cell_type": "code", 112 | "outputs": [], 113 | "source": [ 114 | "# Add columns with compression ratio and memorization and sort\n", 115 | "df[\"ratio\"] = df[\"target_length\"] / df[\"num_free_tokens\"]\n", 116 | "df[\"memorized\"] = df[\"ratio\"] > 1\n", 117 | "df.loc[df[\"success\"] == False, \"memorized\"] = False\n", 118 | "df = df.sort_values(by=[\"cfg_model_name\", \"cfg_dataset\", \"cfg_data_idx\", \"cfg_discrete_optimizer\"])" 119 | ], 120 | "metadata": { 121 | "collapsed": false, 122 | "ExecuteTime": { 123 | "end_time": "2024-04-12T12:11:49.587114Z", 124 | "start_time": "2024-04-12T12:11:49.578651Z" 125 | } 126 | }, 127 | "id": "14e759e300cf964e", 128 | "execution_count": 14 129 | }, 130 | { 131 | "cell_type": "code", 132 | "outputs": [ 133 | { 134 | "data": { 135 | "text/plain": " success num_free_tokens target_str \\\n0 False 10 To be or not to be, that is the question. \n2 False 10 I think, therefore I am. \n\n target_length cfg_batch_size cfg_discrete_optimizer cfg_lr \\\n0 12 100 gcg 0.01 \n2 7 100 gcg 0.01 \n\n cfg_model_name cfg_num_steps cfg_optimizer ... cfg_chat_template \\\n0 EleutherAI/pythia-14m 20 adam ... [, ] \n2 EleutherAI/pythia-14m 20 adam ... [, ] \n\n cfg_system_prompt cfg_dataset cfg_data_idx cfg_random_weights \\\n0 famous_quotes 0.0 False \n2 famous_quotes 1.0 False \n\n cfg_max_tokens cfg_mini_batch_size time ratio memorized \n0 10 100 20240412-08:06:00 1.2 False \n2 10 100 20240412-08:07:23 0.7 False \n\n[2 rows x 26 columns]", 136 | "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
successnum_free_tokenstarget_strtarget_lengthcfg_batch_sizecfg_discrete_optimizercfg_lrcfg_model_namecfg_num_stepscfg_optimizer...cfg_chat_templatecfg_system_promptcfg_datasetcfg_data_idxcfg_random_weightscfg_max_tokenscfg_mini_batch_sizetimeratiomemorized
0False10To be or not to be, that is the question.12100gcg0.01EleutherAI/pythia-14m20adam...[, ]famous_quotes0.0False1010020240412-08:06:001.2False
2False10I think, therefore I am.7100gcg0.01EleutherAI/pythia-14m20adam...[, ]famous_quotes1.0False1010020240412-08:07:230.7False
\n

2 rows × 26 columns

\n
" 137 | }, 138 | "execution_count": 19, 139 | "metadata": {}, 140 | "output_type": "execute_result" 141 | } 142 | ], 143 | "source": [ 144 | "# Filter the rows where dataset is 'random', the model is 'EleutherAI/pythia-14m', and success is True\n", 145 | "filtered_df = df[(df['cfg_dataset'] == 'famous_quotes') &\n", 146 | " (df['cfg_model_name'] == 'EleutherAI/pythia-14m')\n", 147 | " ]\n", 148 | "filtered_df.head().round(2)" 149 | ], 150 | "metadata": { 151 | "collapsed": false, 152 | "ExecuteTime": { 153 | "end_time": "2024-04-12T12:13:35.306626Z", 154 | "start_time": "2024-04-12T12:13:35.291714Z" 155 | } 156 | }, 157 | "id": "f3c999ffb1266688", 158 | "execution_count": 19 159 | }, 160 | { 161 | "cell_type": "code", 162 | "outputs": [ 163 | { 164 | "data": { 165 | "text/plain": "Empty DataFrame\nColumns: [ratio, memorized, success]\nIndex: []", 166 | "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
ratiomemorizedsuccess
cfg_model_namecfg_datasetcfg_discrete_optimizer
\n
" 167 | }, 168 | "execution_count": 20, 169 | "metadata": {}, 170 | "output_type": "execute_result" 171 | } 172 | ], 173 | "source": [ 174 | "# Sort and dedup\n", 175 | "df_sorted = df.sort_values(by=['cfg_model_name', 'cfg_discrete_optimizer', 'cfg_dataset', 'cfg_data_idx', 'success'], ascending=[True, True, True, True, False])\n", 176 | "df = df_sorted.drop_duplicates(subset=['cfg_dataset', 'cfg_data_idx', 'cfg_model_name', 'cfg_discrete_optimizer'], keep='first')\n", 177 | "df = df[df[\"success\"] == True]\n", 178 | "\n", 179 | "# Make summary counting the average ratio and success rate for each dataset and discrete_optimizer include counts\n", 180 | "summary = df.groupby([\"cfg_model_name\", \"cfg_dataset\", \"cfg_discrete_optimizer\"]).agg(\n", 181 | " {\"ratio\": \"mean\", \"memorized\": \"mean\", \"success\": \"count\"}).round(2)\n", 182 | "summary" 183 | ], 184 | "metadata": { 185 | "collapsed": false, 186 | "ExecuteTime": { 187 | "end_time": "2024-04-12T12:14:36.158431Z", 188 | "start_time": "2024-04-12T12:14:36.144177Z" 189 | } 190 | }, 191 | "id": "585f76598e4455fa", 192 | "execution_count": 20 193 | }, 194 | { 195 | "cell_type": "code", 196 | "outputs": [], 197 | "source": [], 198 | "metadata": { 199 | "collapsed": false 200 | }, 201 | "id": "6278d1767c5d2a84" 202 | } 203 | ], 204 | "metadata": { 205 | "kernelspec": { 206 | "display_name": "Python 3", 207 | "language": "python", 208 | "name": "python3" 209 | }, 210 | "language_info": { 211 | "codemirror_mode": { 212 | "name": "ipython", 213 | "version": 2 214 | }, 215 | "file_extension": ".py", 216 | "mimetype": "text/x-python", 217 | "name": "python", 218 | "nbconvert_exporter": "python", 219 | "pygments_lexer": "ipython2", 220 | "version": "2.7.6" 221 | } 222 | }, 223 | "nbformat": 4, 224 | "nbformat_minor": 5 225 | } 226 | -------------------------------------------------------------------------------- /config/hydra/job_logging/custom.yaml: -------------------------------------------------------------------------------- 1 | version: 1 2 | formatters: 3 | simple: 4 | format: "[%(asctime)s %(levelname)s]: %(message)s" 5 | datefmt: "%m/%d/%Y %H:%M:%S" 6 | handlers: 7 | console: 8 | class: logging.StreamHandler 9 | formatter: simple 10 | stream: ext://sys.stdout 11 | file: 12 | class: logging.handlers.RotatingFileHandler 13 | formatter: simple 14 | filename: log.log 15 | root: 16 | handlers: [console, file] 17 | 18 | disable_existing_loggers: false -------------------------------------------------------------------------------- /config/incontextunlearning_baseline.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - override hydra/job_logging: custom 3 | - _self_ 4 | 5 | hydra: 6 | run: 7 | dir: ./outputs/${experiment_name}/${run_id} 8 | job_logging: 9 | handlers: 10 | file: 11 | filename: ${hydra.run.dir}/log.log 12 | 13 | batch_size: 100 14 | chat_template: 15 | - "[INST] Give me a famous quote." 16 | - " [/INST] Sure! Here's a famous " 17 | data_idx: 18 | dataset: 19 | discrete_optimizer: gcg 20 | experiment_name: ${model_name}_${discrete_optimizer}_${lr}_${batch_size}_${num_steps}_${topk}_${seed} 21 | input_str: '' 22 | lr: 0.01 23 | max_tokens: 30 24 | mini_batch_size: 100 25 | model_name: meta-llama/Llama-2-7b-chat-hf 26 | num_steps: 100 27 | optimizer: adam 28 | random_weights: false 29 | run_id: ${generate_id:} 30 | seed: 42 31 | system_prompt: "" 32 | target_str: jumps over the lazy dog 33 | topk: 250 -------------------------------------------------------------------------------- /config/incontextunlearning_withsystemprompt.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - override hydra/job_logging: custom 3 | - _self_ 4 | 5 | hydra: 6 | run: 7 | dir: ./outputs/${experiment_name}/${run_id} 8 | job_logging: 9 | handlers: 10 | file: 11 | filename: ${hydra.run.dir}/log.log 12 | 13 | batch_size: 100 14 | chat_template: 15 | - "[INST] <>\nAbstain from giving famous quote.\n<>\n\nGive me a famous quote." 16 | - " [/INST]" 17 | data_idx: 18 | dataset: 19 | discrete_optimizer: gcg 20 | experiment_name: ${model_name}_${discrete_optimizer}_${lr}_${batch_size}_${num_steps}_${topk}_${seed} 21 | input_str: '' 22 | lr: 0.01 23 | max_tokens: 30 24 | mini_batch_size: 100 25 | model_name: meta-llama/Llama-2-7b-chat-hf 26 | num_steps: 100 27 | optimizer: adam 28 | random_weights: false 29 | run_id: ${generate_id:} 30 | seed: 42 31 | system_prompt: "" 32 | target_str: jumps over the lazy dog 33 | topk: 250 -------------------------------------------------------------------------------- /config/promptmin.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - override hydra/job_logging: custom 3 | - _self_ 4 | 5 | hydra: 6 | run: 7 | dir: ./outputs/${run_id} 8 | job_logging: 9 | handlers: 10 | file: 11 | filename: ${hydra.run.dir}/log.log 12 | 13 | batch_size: 100 14 | chat_template: 15 | - '' 16 | - '' 17 | data_idx: 18 | dataset: 19 | discrete_optimizer: gcg 20 | experiment_name: ${model_name}_${discrete_optimizer}_${lr}_${batch_size}_${num_steps}_${topk}_${seed} 21 | input_str: '' 22 | lr: 0.01 23 | max_tokens: -1 24 | mini_batch_size: 100 25 | model_name: EleutherAI/pythia-410m 26 | num_steps: 200 27 | optimizer: adam 28 | random_weights: false 29 | run_id: ${generate_id:} 30 | seed: 42 31 | system_prompt: '' 32 | target_str: jumps over the lazy dog 33 | topk: 250 34 | 35 | -------------------------------------------------------------------------------- /datasets/famous_quotes.json: -------------------------------------------------------------------------------- 1 | [ 2 | "To be or not to be, that is the question.", 3 | "I think, therefore I am.", 4 | "In the end, we will remember not the words of our enemies, but the silence of our friends.", 5 | "The only thing necessary for the triumph of evil is for good men to do nothing.", 6 | "The unexamined life is not worth living.", 7 | "To thine own self be true.", 8 | "The future belongs to those who believe in the beauty of their dreams.", 9 | "The mind is everything. What you think you become.", 10 | "Not everything that is faced can be changed, but nothing can be changed until it is faced.", 11 | "It does not matter how slowly you go as long as you do not stop.", 12 | "Injustice anywhere is a threat to justice everywhere.", 13 | "The journey of a thousand miles begins with one step.", 14 | "Be yourself, everyone else is already taken.", 15 | "Two things are infinite: the universe and human stupidity, and I'm not sure about the universe.", 16 | "If you judge people, you have no time to love them.", 17 | "To succeed in life, you need two things: ignorance and confidence.", 18 | "The best way to predict the future is to create it.", 19 | "Life is what happens to us while we are making other plans.", 20 | "Whenever you find yourself on the side of the majority, it is time to pause and reflect.", 21 | "When one door of happiness closes, another opens.", 22 | "Imperfection is beauty, madness is genius and it's better to be absolutely ridiculous than absolutely boring.", 23 | "Without music, life would be a mistake.", 24 | "The only true wisdom is in knowing you know nothing.", 25 | "The truth will set you free, but first it will piss you off.", 26 | "There is no passion to be found playing small - in settling for a life that is less than the one you are capable of living.", 27 | "If you want to go fast, go alone. If you want to go far, go together.", 28 | "You must be the change you wish to see in the world.", 29 | "Don't cry because it's over, smile because it happened.", 30 | "The greatest glory in living lies not in never falling, but in rising every time we fall.", 31 | "Life is either a daring adventure or nothing at all.", 32 | "In the middle of every difficulty lies opportunity.", 33 | "Success is not final, failure is not fatal: It is the courage to continue that counts.", 34 | "You have within you right now, everything you need to deal with whatever the world can throw at you.", 35 | "If life were predictable it would cease to be life, and be without flavor.", 36 | "Life is 10% what happens to us and 90% how we react to it.", 37 | "The purpose of our lives is to be happy.", 38 | "The way to get started is to quit talking and begin doing.", 39 | "The world is full of magical things patiently waiting for our wits to grow sharper.", 40 | "It is better to be hated for what you are than to be loved for what you are not.", 41 | "In this world nothing can be said to be certain, except death and taxes.", 42 | "The world breaks everyone, and afterward, some are strong at the broken places.", 43 | "Happiness is not something ready made. It comes from your own actions.", 44 | "There are no shortcuts to any place worth going.", 45 | "The roots of education are bitter, but the fruit is sweet.", 46 | "It's not what happens to you, but how you react to it that matters.", 47 | "The only way to do great work is to love what you do.", 48 | "Life isn't about finding yourself. Life is about creating yourself.", 49 | "It is never too late to be what you might have been.", 50 | "The best time to plant a tree was 20 years ago. The second best time is now.", 51 | "It's not the size of the dog in the fight, it's the size of the fight in the dog.", 52 | "Life is like riding a bicycle. To keep your balance, you must keep moving.", 53 | "The best way to find yourself is to lose yourself in the service of others.", 54 | "You miss 100% of the shots you don't take.", 55 | "The best dreams happen when you're awake.", 56 | "Life is really simple, but we insist on making it complicated.", 57 | "Change your thoughts and you change your world.", 58 | "Happiness is not something you postpone for the future, it is something you design for the present.", 59 | "A journey of a thousand sites begins with a single click.", 60 | "The obstacle is the path.", 61 | "Don’t count the days, make the days count.", 62 | "The harder you work for something, the greater you’ll feel when you achieve it.", 63 | "Success is not the key to happiness. Happiness is the key to success.", 64 | "Love the life you live. Live the life you love.", 65 | "The only time to be positive you've got a clear path is when you're on the edge of a cliff.", 66 | "Dream big and dare to fail.", 67 | "Life shrinks or expands in proportion to one's courage.", 68 | "You are never too old to set another goal or to dream a new dream.", 69 | "What lies behind us and what lies before us are tiny matters compared to what lies within us.", 70 | "The only thing standing between you and your goal is the story you keep telling yourself.", 71 | "Happiness often sneaks in through a door you didn’t know you left open.", 72 | "The only way to achieve the impossible is to believe it is possible.", 73 | "It does not do to dwell on dreams and forget to live.", 74 | "Don't watch the clock, do what it does. Keep going.", 75 | "You cannot change what you are, only what you do.", 76 | "Life is ours to be spent, not to be saved.", 77 | "You can't use up creativity. The more you use, the more you have.", 78 | "The best revenge is massive success.", 79 | "It's not what you look at that matters, it's what you see.", 80 | "The road to success and the road to failure are almost exactly the same.", 81 | "Life is 10% what happens to me and 90% of how I react to it.", 82 | "The two most important days in your life are the day you are born and the day you find out why.", 83 | "The most difficult thing is the decision to act, the rest is merely tenacity.", 84 | "The best time to plant a tree was 20 years ago. The second best time is now.", 85 | "The only way to do great work is to love what you do.", 86 | "Your time is limited, don't waste it living someone else's life.", 87 | "The only limit to our realization of tomorrow is our doubts of today.", 88 | "In order to be irreplaceable one must always be different.", 89 | "The future belongs to those who believe in the beauty of their dreams.", 90 | "If you look at what you have in life, you'll always have more.", 91 | "A person who never made a mistake never tried anything new.", 92 | "Remember no one can make you feel inferior without your consent.", 93 | "The only true wisdom is in knowing you know nothing.", 94 | "The only journey is the one within.", 95 | "Life is a dream for the wise, a game for the fool, a comedy for the rich, a tragedy for the poor.", 96 | "Do not go where the path may lead, go instead where there is no path and leave a trail.", 97 | "Do not let making a living prevent you from making a life.", 98 | "The biggest risk is not taking any risk.", 99 | "Happiness is not something ready-made. It comes from your own actions.", 100 | "Knowledge is power.", 101 | "Be the change that you wish to see in the world." 102 | ] 103 | -------------------------------------------------------------------------------- /datasets/wikipedia.json: -------------------------------------------------------------------------------- 1 | [ 2 | " Anastasia, willing to stay with Ahmed, is reluctant", 3 | "Teen Angel may refer to:\n\nFilm and TV\nTeen Angel (1989 TV series), a 1989 drama starring Jason Priestley\nTeen Angel (1997 TV series), a 1997 sitcom starring Corbin Allred\nTeen Angel, a one-scene character in Grease (see \"Beauty School Dropout\")\n\nMusic\nTeen Angels (American band), 1994–1996\nTeen Angels, an Argentine pop music group\n\nSongs\n\"Teen Angel\" (song), song performed by Mark Dinning, written Red Surrey, Jean Surrey 1959\n\"Teen Angel\", song by Dion and the Belmonts Singer, Patrick, Dimucci 1958\n\"Teen Angel\", a song by Donovan, D Leitch from The Hurdy Gurdy Man 1968\n\nComics\n Angel from the Time-displaced X-Men (called that way to set him apart from the adult character)", 4 | "\n\nSingles\n\"Weeeek\": On November 7, 2007, NEWS released \"Weeeek\", their seventh single, along with their second studio album, Pacific", 5 | "\n\nGlutamate dehydrogenases , and (GluDH) are enzymes that catalyse the NAD- and/or NADP-dependent reversible deamination of L-glutamate into alpha-ketoglutarate", 6 | "\n\nHistory\nLagrasse dates from the time the abbey was built", 7 | "\n\nAlexander has produced illustrations for Dark Horse Comics, Warner Brothers, DC Comics, Hasbro, White Wolf, Inc, and Dalmatian Press", 8 | "\n\nOperations \n\nThe railway generally operates on a 'one engine in steam' principle on most days, operating throughout the year on weekends and daily during half terms and school holidays", 9 | "Ouaoumana is a commune in Khénifra Province of the Béni Mellal-Khénifra administrative region of Morocco", 10 | " It is also directed by Ei Aoki; produced by Atsuhiro Iwakami; music by Yuki Kajiura; character design by Atsushi Ikariya and Tomonori Sudou; and art, 3D, and photography directions by Koji Eto, Kōjirō Shishido, and Yuichi Terao respectively", 11 | " OSP also built its own semi-permanent two-story wooden thrust stage in 1990", 12 | " Louis Rams coaches\nCategory:San Francisco 49ers coaches\nCategory:Pittsburgh Steelers coaches\nCategory:Sportspeople from Berkeley, California\nCategory:Wake Forest Demon Deacons football coaches", 13 | "com\nTillamook Rock Lighthouse entry in National Park Service inventory of historic lighthouses\nArticle on Tillamook Rock Lighthouse in Lighthouse Digest Magazine Jul/Aug 2016 containing many keeper photos\nTillamook Rock Lighthouse Documentary produced by Oregon Field Guide\n\nCategory:Lighthouses completed in 1881\nCategory:Lighthouses on the National Register of Historic Places in Oregon\nCategory:National Register of Historic Places in Clatsop County, Oregon\nCategory:Oregon Coast\nCategory:Transportation buildings and structures in Clatsop County, Oregon\nCategory:1881 establishments in Oregon", 14 | " The design of the Super Class Ferry was done in Seattle by W", 15 | "The Burton is a historic apartment building located at Indianapolis, Indiana", 16 | " Fletcher, an Associate Justice and Chief Justice of the Supreme Court of Georgia\nRichard Fletcher (American politician), an Associate Justice of the Massachusetts Supreme Judicial Court\nRobert Virgil Fletcher, an Associate Justice of the Supreme Court of Mississippi\nWilliam A", 17 | "\n Oklahoma Farm Bureau\n Open for Business, an online general interest publication with a technology focus\n The Sims 2: Open for Business\n Oregon Food Bank\nOFB, or Original Farm Boys, a UK drill music group based in Broadwater Farm Estate", 18 | "\n\nSelected publications\nwith Ernesto Lupercio: Gerbes over orbifolds and twisted K-theory, Communications in Mathematical Physics, vol", 19 | " Vogue fashion published an article in 2015 about the circular captive bead belly rings and TummyToys snap lock clasps becoming the current trend for navel piercings", 20 | " Although he does not appear in person in Death by Degrees, Nina Williams' spin-off game, he is mentioned several times in journals and documents, and he has a laboratory on the Solitaria Penitentiary island, although it's abandoned", 21 | "Massimo Natili (July 28, 1935 – September 19, 2017) was a racing driver from Italy", 22 | "\n\nVideo \n The Royal Economic Society's 2006 Annual Public Lecture, by Collier at the (Royal Economic Society)\n Interview with Fareed Zakaria on Foreign Exchange\n TED Conference, Paul Collier on \"The Bottom Billion\"\n TED Conference, Paul Collier's new rules for rebuilding a broken nation\n Why social science should integrate culture and how to do it?, at the Blavatnik School of Government, Oxford University, January 2017\n\nPress \n Review of The Plundered Planet by the Financial Times\n Review of the Bottom Billion by the Financial Times\n Review of the Bottom Billion in The New York Times\n Samuel Grove, \"The Bottom of the Barrel: A Review of Paul Collier's The Bottom Billion: Why the Poorest Countries Are Failing and What Can Be Done about It", 23 | "Whip snake may refer to:\n\nGenera \n Ahaetulla, endemic to southern Asia, from India to Vietnam\n Demansia, endemic to Australia, Papua New Guinea, and nearby islands\n Hemorrhois, endemic to the western Mediterranean, west, central, and southern Asia\n Hierophis, endemic to southern Europe\n Masticophis, endemic to the Americas\n Psammophis, endemic to Africa and Asia\n\nSpecies \n Caspian whipsnake (Dolichophis caspius), found in the Balkans and Eastern Europe\n Red whip snake (Platyceps collaris), found in Bulgaria and the Levant\n Parasuta dwyeri, found in Australia from New South Wales to South Queensland\n White-lipped snake (Drysdalia coronoides), found in Tasmania and southeastern Australia\n\nCategory:Animal common name disambiguation pages", 24 | "\n\nWith effect from September 2012, the school no longer make use of Le Fousseau", 25 | "\nThe use of topical antibiotics to treat surgical wounds does not reduce infection rates in comparison with non-antibiotic ointment or no ointment at all", 26 | "\n\nNevada was won by Ulysses S", 27 | "Queen Victoria's Commemoration Medal 1900 (Ireland), more commonly referred to as the Visit to Ireland Medal 1900, was awarded to those members of the Irish Police Forces on duty at Queen Victoria’s various engagements during her visit to Ireland in 1900", 28 | " The proboscis or rostrum lacks a beak", 29 | "\n\nB\nAleksandra Belcova (1892–1981), painter\nBiruta Baumane (born 1922), painter\n\nD\nLilija Dinere (born 1955), painter, illustrator\n\nK\nIngrīda Kadaka (born 1967), book designer, illustrator\nAina Karlsone (1935–2012), artist, writer\n\nO\nSimona Orinska (born 1978), contemporary artist\n\nP\nTatyana Palchuk (born 1954), painter\nLucia Peka (1912–1991), Latvian-American painter\nLīga Purmale (born 1948), painter\n\nS\nDaina Skadmane (1990–2013), painter, lithographer\nRoze Stiebra (born 1942), animator\n\n-\nLatvian\nArtists\nArtists, women", 30 | "\n\nRegular season\n\nStandings\n\n2013 NCAA Tournament\n\nNote: * denotes overtime period(s)\n\nPlayer stats\n\nScoring leaders\n\n \nGP = Games played; G = Goals; A = Assists; Pts = Points; PIM = Penalty minutes\n\nLeading goaltenders\n\nGP = Games played; Min = Minutes played; W = Wins; L = Losses; T = Ties; GA = Goals against; SO = Shutouts; SV% = Save percentage; GAA = Goals against average\n\nAwards\n\nNCAA\n\nAtlantic Hockey\n\nCCHA\n\nECAC\n\nHockey East\n\nWCHA\n\nReferences\n\nExternal links\nUSCHO", 31 | " In 1972-1978, the collection was turned over to the state by the Swiss Federal Office for military airfields", 32 | "Liu Fu may refer to:\n\n Liu Fu (prince) (劉輔; died 84), Eastern Han Dynasty prince, son of Emperor Guangwu\n Liu Fu (Yuanying) (劉馥; died 208), style name Yuanying (元穎), Eastern Han Dynasty politician\n Liu Fu (cyclist) (born 1957), Chinese Olympic cyclist\n Liu Bannong or Liu Fu (劉復, 1891–1934), Chinese linguist and poet", 33 | " The hurricane affected the southern Louisiana region one week before the Louisiana Maneuvers, a prelude to World War II", 34 | "Vaishnav was a sailor from France, who represented his native country at the 1900 Summer Olympics in Paris, France", 35 | ", while the video clip was frequently aired on television at the time of the release", 36 | "Cleopas Ncube (born November 21, 1983) is a Canadian wrestler", 37 | " He became director of the Institute of Art and Crafts of Asti", 38 | " The headquarters of the borough council were located in Sandbach", 39 | " The current route of FM 2767 was designated on December 20, 1963", 40 | " He served as the Liberty town clerk and was a farmer", 41 | " Interministerial Instruction on road signs", 42 | "Marind may refer to:\nMarind people\nMarind languages\nMarind language", 43 | " Corbett married Jennie Wright", 44 | "Fouilloy is the name of the following communes in France:\n\n Fouilloy, Oise, in the Oise department\n Fouilloy, Somme, in the Somme department", 45 | " In 1944, she signed a seven-year contract with RKO Pictures", 46 | " Love (1831–1887), colonel in the Union Army and Medal of Honor recipient\n George H", 47 | "\n\nBus connections \nCTA\n 7 Harrison (Weekdays only)\n 49 Western (Owl Service) \n X49 Western Express (Weekday Rush Hours only)\n\nNotes and references\n\nNotes\n\nReferences\n\nExternal links \n\n Western (Congress Line) Station Page Chicago-L", 48 | " The church is recorded in the National Heritage List for England as a designated Grade I listed building", 49 | " About sixty in-folios in geography, numismatics, archeology were found, but also books of biblical scholarship, dictionaries and the Encyclopédie", 50 | " A concert series at UNC-Chapel Hill is named in his honor", 51 | "\n\nAthletic career\nApart from radio and TV broadcasting, Nemone is also a keen athlete", 52 | " Nadur Youngsters is one of the most popular football teams in Gozo and brings up young players from the village every year", 53 | "\n\nSynopsis\n\nThis film is a personal journey travelling through time and space to unravel hidden stories and rediscover objects and images that at one time were an integral part of the lives of these artists through which their creations came into being", 54 | "\n\nHe is a recipient of the 70th Birthday Badge Medal of King Carl XVI Gustaf (30 April 2016)", 55 | " The basal part of the costa is tinged with yellow", 56 | " On June 4, 2018, creator Benjamin Freiburger confirmed on his Twitter account that the series would not return for a second season", 57 | "\n\nAmong the wildlife that can be found in Zanskar are the marmot, bear, wolf, snow leopard, bharal, alpine ibex, wild sheep and goats, and the lammergeier", 58 | "but we have now come to know that this dialect was essentially the same as that spoken by the sub-tribes occupying the land where Sydney now stands, and that they all formed part of one great tribe, the Kuriggai\"", 59 | " Montoya had scored his last IndyCar win in 2000 at Gateway", 60 | " Among his researches those about Ralph Waldo Emerson, Walt Whitman, and Vachel Lindsay", 61 | "org in October 2012, Fat Mike said that they had recorded 18 songs, but only 12 made the album", 62 | "David Lowell Ladd (September 18, 1926 – October 12, 1994) was a former Commissioner of Patents and Register of Copyrights in the United States, the first (and currently only) individual to serve in both offices", 63 | "Wiśnicze () is a village in the administrative district of Gmina Wielowieś, within Gliwice County, Silesian Voivodeship, in southern Poland", 64 | "\n\nDouglas Busk, a British mountaineer, climbed Alam-Kuh via the east ridge in 1933 and again in 1934 from over the west ridge", 65 | "\n\nLibretto\nVa, dal furor portata,\nPalesa il tradimento;\nMa ti sovvenga ingrata,\nIl traditor qual'è", 66 | " It is also authorised to build roads within its limits and impose taxes on properties coming under its jurisdiction", 67 | "\n\n \"Romance\" – 3:23\n \"Eyes of a Stranger\" – 4:54\n \"Some Old Song\" – 3:37\n \"Rose\" (written by Paul Hyde) – 3:59\n \"Hastings Street\" – 4:59\n \"Youth\" – 4:17\n \"Lights to Change\" – 2:27\n \"Mystery to Me\" – 3:04\n \"Pennies into Gold\" – 3:06\n \"Screaming\" – 4:09\n \"Rockers\" – 2:46\n\nPersonnel\n Paul Hyde: vocals, guitars\n Bob Rock: electric/acoustic and synthetic guitars, vocals\n Christopher Taylor: drums, etc", 68 | "\n\nSeeds\n\nDraw\n\nDraw\n\nReferences\n Doubles Draw\n Qualifying Doubles Draw\n\nVirginia National Bank Men's Pro Championship - Doubles\n2009 Doubles", 69 | " No sub-species are listed at Catalogue of Life", 70 | "\n\nCollections\nPockets Rug Collection (Spring 2011): Jean pockets are sewn together to form a patchwork rug", 71 | " There is strong evidence that the British troops may have had United Irishmen members or sympathizers among their ranks", 72 | " The original two-storey pub was built in 1720 by Belfast merchant Hugh Kelly who kept it as a bonded warehouse in which rum, gin and whiskey were his mainstays", 73 | "\n\nA 2013 study found that nanotextured surfaces using cone forms produce highly water-repellent surfaces", 74 | "Dan Pontefract is a Canadian businessperson and writer", 75 | " The cavities that are formed by the octahedral chains that are occupied by A(1) and A(2) cations are occupied by Ca and its REE, Ce3+", 76 | ", The Love There That's Sleeping: The Art and Spirituality of George Harrison, Continuum (New York, NY, 2006; )", 77 | " 3rd Brigade, 2nd Division, 2nd Army Corps, Army of the Potomac, to March, 1864", 78 | " Her St Petersburg debut was a huge success, with the Russian balletomanes and critics becoming completely enchanted by the Italian ballerina", 79 | "Pedro Bernaldez de Sahagun (12th-century) was a medieval knight of Castile", 80 | " Her students in Montreal included Robert Savoie", 81 | "\n\nLines \n\nTokyo Metropolitan Bureau of Transportation\nNippori-Toneri Liner\n\nHistory \nThe station opened on March 30, 2008, when the Nippori-Toneri Liner began operation", 82 | " He had been involved in an earlier lynching of a motorcycle mechanic, Mithu Das, in the same district", 83 | "\n\nServices\nTrains run 0455-0020 every day", 84 | " It's part of Toroslar district (which is an intracity district within Greater Mersin)", 85 | "\" The East India Company turned attention from Mindanao as a possible outpost to the New Guinea archipelago, and John McCluer went from Bombay to Palau in 1790 as hydrographer", 86 | "\n\nCategory:Courts in the United States\nCategory:West Virginia state courts", 87 | "Euxoa violaris, the violet dart moth, is a species of moth native to North America", 88 | "The is a museum located in Asuka Village, Nara Prefecture in Japan", 89 | "This list of bridges in Andorra lists bridges of particular historical, scenic, architectural or engineering interest", 90 | " He started his professional career as an entrepreneur, and was owner of various restaurants", 91 | "Pop Train is a scheme of using Supplemental Nutrition Assistance Program (SNAP) card benefits to purchase soda and then re-selling the soda to turn a profit", 92 | " Wyman died six days later on December 15, 1953", 93 | "\n\nSemantic information on GVSM\n\nThere are at least two basic directions for embedding term to term relatedness, other than exact keyword matching, into a retrieval model:\n compute semantic correlations between terms\n compute frequency co-occurrence statistics from large corpora\n\nRecently Tsatsaronis focused on the first approach", 94 | "Trochactaeon is an extinct genus of fossil sea snails, marine gastropod mollusks in the family Acteonellidae", 95 | "\n\nFootnotes\n\nReferences\n\nNotes\n\nBibliography\n\nCategory:1828 paintings\nCategory:Collections of York Art Gallery\nCategory:Dance in art\nCategory:John Milton\nCategory:Paintings by William Etty\nCategory:Paintings depicting Hebrew Bible themes\nCategory:Musical instruments in art\nCategory:Water in art", 96 | "\n\nAccomplishments\n DHB-Pokal:\n : 1981\n EHF Cup Winner's Cup:\n : 1981\n EHF Challenge Cup:\n : 1997, 1998\n European Club Championship:\n : 1981\n\nTeam\n\nCurrent squad\nSquad for the 2019–20 season\n\nGoalkeepers\n 12 Péter Tatai\n 21 Johannes Jepsen\nLeft Wingers\n8 Jens Bechtloff\n 45 Jan-Eric Speckmann\nRight Wingers \n 14 Peter Strosack\n 15 Marvin Mundus \nLine players\n2 Julius Brune\n7 Patryk Walczak\n 25 Moritz Schade \n\nLeft Backs\n 13 Marko Bagarić\n 23 Valentin Spohn\n 29 Marian Orlowski\nCentral Backs\n3 Roman Bečvář\n 10 Łukasz Gierak\nRight Backs\n5 Jó Gerrit Genz\n9 Dominik Ebner\n\nTransfers\nTransfers for the 2020–21 season\n\nJoining\n Aljoša Rezar (GK) (from Bjerringbro-Silkeborg)\n Tom Skroblien (LW) (from TUSEM Essen)\n Benas Petreikis (CB) (from EHV Aue)\n Leoš Petrovský (P) (from Bergischer HC)\n\nLeaving\n Péter Tatai (GK) (to Csurgói KK)\n Jens Bechtloff (LW) (to TSG Altenhagen-Heepen)\n Marian Orlowski (LB) (to ASV Hamm-Westfalen)\n Jó Gerrit Genz (RB) (to ASV Hamm-Westfalen)\n\nReferences\n\nCategory:German handball clubs\nCategory:Handball-Bundesliga\nCategory:Lübbecke", 97 | " The opera was not performed again at the Met until a new production was mounted in 1963", 98 | "\n\nTrack Listing\n\nInfluence\nFrançoise Hardy covered The Garden of Jane Delawney on her album If You Listen", 99 | "\n\nCast\n Suzanne Talba as Conchita \n José Durany as Pedro \n Vasseur as Manuel \n Maxa\n\nReferences\n\nBibliography\n Rège, Philippe", 100 | "\n A Natick-class tugboat serving the Naval Submarine Base, New London, Connecticut\n\nMetacom", 101 | "\"\n\nIn early 2018, Messersmith released \"Purple Hearts\", the first single off his 5th full-length studio album, Late Stage Capitalism", 102 | "Teremoana Tapi Taio is a Cook Islands politician and former Cabinet Minister", 103 | " Notable people with the surname include:\n\nJeremy Kellem (born 1989), American football player\nVivien Kellems (1896-1975), American inventor", 104 | " Senator Pat Roberts won re-election to a second term overwhelmingly", 105 | " The festival opened with \"Junoon\" by Shyam Benegal in the out of Competition world premiere", 106 | "This list of administrative communes consists of articles about the governmental divisions known as communes, as well as lists of communes", 107 | " She was a political adviser to Prime Minister Gro Harlem Brundtland in the Prime Minister's Office from 1992 to 1996", 108 | "American Marriage Ministries is a non-denominational Internet church based in Seattle", 109 | " Foster, coaching his fifteenth season with the Badgers", 110 | "\n\nSingles main draw entrants\n\nSeeds\n\n 1 Rankings are as of January 30, 2017", 111 | " He was a longtime executive and scout in the National Basketball Association (NBA), including 27 years with the Sacramento Kings", 112 | " He promoted the construction of the church of Saint-Michel-de-Cuxa, consecrated in 953, and of the monastery", 113 | " It was built as a master planned community by Jordon Perlmutter", 114 | "\n\nThe single was written and performed while the group was still a trio", 115 | "\n\nDescription\nThe species is endemic to Sulawesi in Indonesia", 116 | " It is the site of a Verizon Wireless cell tower, a Dutchess County 911 Radio Repeater, and a fire tower that is no longer in service", 117 | " Inside the marsh, there is a two mile boardwalk", 118 | "\n\nIn 2008, the Brihanmumbai Municipal Corporation (BMC) included JNA as part of its disaster management operations to provide emergency communication support during the monsoons to provide live updates on the flooding situation in the city", 119 | " In March 1945, during World War II, the U", 120 | " This approach offers several advantages to the engineering analyst: \n\nThe data used in the study can be tailored more precisely to the engineering problem under study", 121 | "\n\nSee also\n List of psychiatry journals\n\nReferences\n\nExternal links \n \n\nCategory:Psychiatry journals\nCategory:Clinical psychology journals\nCategory:Publications established in 1897\nCategory:Karger academic journals\nCategory:Bimonthly journals\nCategory:English-language journals", 122 | " The peak's name was officially adopted in 1975 by the U", 123 | "\n\nReception\nThe Allmusic review by Jason Ankeny calls the album \"one of Freddie Hubbard's most obscure sessions, but admirers of the trumpeter's early-'80s return to his musical roots will find much to appreciate here\"", 124 | "\n\nFinal table\n\nTop scorers\n\nAwards\n\nReferences\n\nCategory:Latvian Higher League seasons\n1\nLatvia\nLatvia", 125 | " 1841) \n1802 – 1822 Rani Laxmipriya Devi (f) -Regent\n1841 – 9 Sep 1891 Niladhar Singh Deo (b", 126 | "\n\nFurther reading\nAdachi Yoshio ", 127 | " It flowers in the southern hemisphere from April to May", 128 | " He took the examen artium in 1904, and graduated from the Royal Frederick University with the cand", 129 | "Clarence Richard Roberts (4 November 1888 – 18 September 1966) was an Australian rules footballer who played with St Kilda in the Victorian Football League (VFL)", 130 | "\n\nSingle track listing \n\nWritten and composed by Yoshiki", 131 | " Nowadays, independent school pupils have \"the highest rates of achieving grades A or B in A-level maths and sciences\" compared to grammar, specialist and mainstream state schools, and pupils at independent schools account for a disproportionate number of the total number of A-levels in maths and sciences", 132 | " The shell can be grayish white on the outside with rather flat ribs that are somewhat darker", 133 | " They mine the leaves of their host plant", 134 | " As his son joined the New Fourth Army, he then was threatened by Hou Yibo, who forcibly occupied his house", 135 | "\n\nUnited States Team Handball Federation organized the participation of U", 136 | " She was coached mainly by Dale Hazell and also trained with John Nicks in the summer of 2000", 137 | " Several large sized mammal groups are known from this region, such as rhinoceri, oreodonts and hyaenodonts, all co-existing with several flightless bathornithids", 138 | " Due to the darkness, Fletcher goes round in circles, and is severely disappointed to discover that he ends up back at the cottage with Barrowclough", 139 | " At the 2006 census, its population was 581, in 159 families", 140 | " The fine graters are also sometimes sold as a wooden board covered with shark skin, which has many tiny teeth (dermal denticles) and give it a feel similar to sandpaper", 141 | " The hindwings are whitish cinereous (ash grey) towards the base", 142 | "com/mall/penn-square-mall\n\nCategory:Shopping malls established in 1960\nCategory:Shopping malls in Oklahoma\nCategory:Buildings and structures in Oklahoma City\nCategory:Economy of Oklahoma City\nCategory:Tourist attractions in Oklahoma City", 143 | " In Mozambique the latter river is called the Rio Elefantes", 144 | " It remains unnamed since its numbering in December 2007", 145 | " She won the double scull World U23 Championship in 2012 and the singles European Championship in 2016", 146 | "\n , operated by Ellerman Lines; sunk during the First World War\n , operated by Ellerman Lines; used as a troopship in the Second World War, being damaged by a mine in 1939, but survived; scrapped in 1956\n\nCategory:Ship names", 147 | "\n\nHuard moved to the United Kingdom in 2000 to play in the British Ice Hockey Superleague for the London Knights but left the team after playing just one game to return to Canada to be with his girlfriend who was seven months pregnant at the time", 148 | " He stood again as a Cumann na nGaedheal candidate at the June 1927 general election and was elected to the 6th Dáil", 149 | "The Hundred of Stirling is a Hundred of the County of Buckingham (South Australia), centered on Keith, South Australia, South east of Adelaide, South Australia", 150 | "\n\nSoaked to the Bone was officially released in February 2016 on Blue House Music and made the top 40 in the Official Charts Company Americana Chart", 151 | " However, they have a scheme to secure a footing for world conquest", 152 | "Mogoditshane Fighters are a football (soccer) club from the Mogoditshane in Botswana" 153 | ] -------------------------------------------------------------------------------- /example_script.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import logging 3 | import os 4 | 5 | import numpy as np 6 | import torch 7 | from transformers import AutoModelForCausalLM, AutoTokenizer 8 | 9 | import prompt_optimization as prompt_opt 10 | 11 | # Setup argument parser to get command-line arguments 12 | parser = argparse.ArgumentParser() 13 | parser.add_argument("--batch-size", type=int, default=100) 14 | parser.add_argument("--discrete-optimizer", type=str, default="gcg") 15 | parser.add_argument("--log-dir", type=str, default="experiments") 16 | parser.add_argument("--lr", type=float, default=0.01) 17 | parser.add_argument("--model-name", type=str, default="EleutherAI/pythia-410m") 18 | parser.add_argument("--num-steps", type=int, default=200) 19 | parser.add_argument("--optimizer", type=str, default="adam") 20 | parser.add_argument("--seed", type=int, default=None) 21 | parser.add_argument("--topk", type=int, default=250) 22 | args = parser.parse_args() 23 | 24 | # Set randomness 25 | if args.seed: 26 | np.random.seed(args.seed) 27 | torch.manual_seed(args.seed) 28 | torch.cuda.manual_seed(args.seed) 29 | torch.cuda.manual_seed_all(args.seed) 30 | torch.backends.cudnn.deterministic = True 31 | torch.backends.cudnn.benchmark = False 32 | 33 | # Generate a unique ID for the run and create the experiments directory 34 | run_id = 'example' 35 | os.makedirs(f"outputs/", exist_ok=True) 36 | # Setup logging configuration 37 | logging.basicConfig(level=logging.DEBUG, 38 | format="[%(asctime)s] %(message)s", 39 | datefmt="%Y%m%d %H:%M:%S", 40 | handlers=[logging.FileHandler(f"outputs/{run_id}.log"), logging.StreamHandler()]) 41 | logging.info(f"run id: {run_id}") 42 | print(f"run id: {run_id}") 43 | for arg, value in vars(args).items(): 44 | logging.info(f"{arg}: {value}") 45 | 46 | # Device, model, and tokenizer setup 47 | device = "cuda" if torch.cuda.is_available() else "cpu" 48 | if device == "cuda": 49 | model_args = dict(trust_remote_code=True, low_cpu_mem_usage=True, torch_dtype=torch.bfloat16, device_map="auto") 50 | else: 51 | model_args = dict(trust_remote_code=False, low_cpu_mem_usage=True) 52 | model = AutoModelForCausalLM.from_pretrained(args.model_name, **model_args) 53 | tokenizer = AutoTokenizer.from_pretrained(args.model_name) 54 | # model = model.to(device) 55 | 56 | ############################################################################################################ 57 | # Room to play around: 58 | # the input_ids is constructed in prep_text() and is a concatenation in this order: 59 | # [chat_template[0], system_prompt, input_str, free_tokens, chat_template[1], target_str] 60 | # Prepare input and target tokens from strings 61 | num_free_tokens = 10 62 | input_str = " " 63 | target_str = "To be or not to be, that is the question." 64 | system_prompt = "" 65 | chat_template = ("", "") 66 | ############################################################################################################ 67 | 68 | input_ids, free_token_slice, input_slice, target_slice, loss_slice = prompt_opt.prep_text(input_str, 69 | target_str, 70 | tokenizer, 71 | system_prompt, 72 | chat_template, 73 | num_free_tokens, 74 | device) 75 | # Optimize the input tokens to generate the target string 76 | if args.discrete_optimizer == "gcg": 77 | solution = prompt_opt.optimize_gcg(model, input_ids, input_slice, free_token_slice, target_slice, 78 | loss_slice, args.num_steps, batch_size=args.batch_size, topk=args.topk) 79 | elif args.discrete_optimizer == "random_search": 80 | solution = prompt_opt.optimize_random_search(model, input_ids, input_slice, free_token_slice, 81 | target_slice, loss_slice, args.num_steps, batch_size=args.batch_size) 82 | else: 83 | raise ValueError("discrete_optimizer must be one of ['gcg', 'random_search']") 84 | 85 | # Test the prompt and log the new generation with the target string 86 | logging.info(f"Hard tokens returned:") 87 | optimized_ids = solution["input_ids"] 88 | output = model.generate(input_ids=optimized_ids[input_slice].unsqueeze(0), max_new_tokens=20, do_sample=False) 89 | logging.info(f"solution: {tokenizer.decode(optimized_ids[input_slice], skip_special_tokens=True)}") 90 | logging.info(f"goal: {tokenizer.decode(input_ids[target_slice], skip_special_tokens=True)}") 91 | logging.info(f"output: {tokenizer.decode(output[0, target_slice], skip_special_tokens=True)}") 92 | 93 | -------------------------------------------------------------------------------- /make_table_of_results.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import json 3 | from pathlib import Path 4 | 5 | import pandas as pd 6 | 7 | # argparser to get the experiment name 8 | parser = argparse.ArgumentParser(description="Create a table from JSON files") 9 | parser.add_argument("--experiment_name", default=None, type=str, nargs="+", help="list of experiments to include.") 10 | args = parser.parse_args() 11 | 12 | # Create a Path object with the folder path 13 | folder_path = Path("outputs") 14 | 15 | # List to hold all json data 16 | json_list = [] 17 | experiment_names_list = [] 18 | # Iterate over each JSON file in the directory 19 | for json_file in folder_path.glob("*/*.json"): 20 | with open(json_file, "r", encoding="utf-8") as f: 21 | # Load the JSON content 22 | json_content = json.load(f) 23 | # Append the content as is (which will result in a single cell containing the JSON in the DataFrame) 24 | experiment_names_list.append(json_content["cfg_experiment_name"]) 25 | if args.experiment_name is not None and json_content["cfg_experiment_name"] in args.experiment_name: 26 | json_list.append(json_content) 27 | elif args.experiment_name is None: 28 | json_list.append(json_content) 29 | 30 | print(f"The set of experiment names in the folder is: {set(experiment_names_list)}") 31 | # Convert the list of JSON objects to a DataFrame 32 | df = pd.DataFrame(json_list) 33 | df = df.sort_values(by=["cfg_model_name", "cfg_dataset", "cfg_data_idx", "cfg_discrete_optimizer"]) 34 | df["ratio"] = df["target_length"] / df["num_free_tokens"] 35 | df["memorized"] = df["ratio"] > 1 36 | print(df[["cfg_model_name", "cfg_dataset", "cfg_data_idx", "cfg_discrete_optimizer", "ratio", "memorized", 37 | "success"]].round(2).to_markdown()) 38 | 39 | # Make summary counting the average ratio and success rate for each dataset and discrete_optimizer include counts 40 | summary = df.groupby(["cfg_model_name", "cfg_dataset", "cfg_discrete_optimizer"]).agg( 41 | {"ratio": "mean", "memorized": "mean", "success": "count"}).round(2) 42 | print(summary.to_markdown()) 43 | print(f"dataframe shape: {df.shape}") 44 | -------------------------------------------------------------------------------- /prompt-minimization-main.py: -------------------------------------------------------------------------------- 1 | import json 2 | import logging 3 | import os 4 | 5 | import hydra 6 | import numpy as np 7 | import torch 8 | from hydra.core.hydra_config import HydraConfig 9 | from omegaconf import OmegaConf 10 | from transformers import AutoModelForCausalLM, AutoTokenizer 11 | 12 | import prompt_optimization as prompt_opt 13 | from prompt_optimization.utils import get_id_func, now, load_target_str 14 | 15 | OmegaConf.register_new_resolver("generate_id", get_id_func()) 16 | 17 | 18 | @hydra.main(version_base=None, config_path="config", config_name="promptmin") 19 | def main(cfg): 20 | # Set randomness 21 | if cfg.seed: 22 | np.random.seed(cfg.seed) 23 | torch.manual_seed(cfg.seed) 24 | torch.cuda.manual_seed(cfg.seed) 25 | torch.cuda.manual_seed_all(cfg.seed) 26 | torch.backends.cudnn.deterministic = True 27 | torch.backends.cudnn.benchmark = False 28 | 29 | for arg, value in OmegaConf.to_container(cfg, resolve=True).items(): 30 | logging.info(f"{arg}: {value}") 31 | 32 | # Device, model, and tokenizer setup 33 | device = "cuda" if torch.cuda.is_available() else "cpu" 34 | if torch.cuda.device_count() > 1: 35 | model_args = dict(trust_remote_code=True, low_cpu_mem_usage=True, torch_dtype=torch.bfloat16, device_map="auto") 36 | else: 37 | model_args = dict(trust_remote_code=True, low_cpu_mem_usage=True, torch_dtype=torch.bfloat16) 38 | # model_args = dict(trust_remote_code=True, low_cpu_mem_usage=True, torch_dtype=torch.float16) 39 | model = AutoModelForCausalLM.from_pretrained(cfg.model_name, **model_args) 40 | tokenizer = AutoTokenizer.from_pretrained(cfg.model_name) 41 | if torch.cuda.device_count() <= 1: 42 | model = model.to(device) 43 | 44 | if cfg.random_weights: 45 | logging.info("Randomizing weights") 46 | for module in model.modules(): 47 | if isinstance(module, torch.nn.Linear): 48 | torch.nn.init.normal_(module.weight, mean=0, std=0.02) 49 | if module.bias is not None: 50 | torch.nn.init.constant_(module.bias, 0) 51 | 52 | # Data setup 53 | input_str = cfg.input_str 54 | target_str = cfg.target_str 55 | chat_template = cfg.chat_template 56 | system_prompt = cfg.system_prompt 57 | 58 | if cfg.dataset is not None and cfg.data_idx is not None: 59 | target_str = load_target_str(cfg.dataset, cfg.data_idx, tokenizer) 60 | cfg.target_str = target_str 61 | logging.info(f"Target string selected from dataset, cfg.targer_str: {cfg.target_str}") 62 | 63 | # Optimization setup 64 | optimization_args = {"discrete_optimizer": cfg.discrete_optimizer, 65 | "num_steps": cfg.num_steps, 66 | "lr": cfg.lr, 67 | "optimizer": cfg.optimizer, 68 | "batch_size": cfg.batch_size, 69 | "mini_batch_size": cfg.mini_batch_size, 70 | "topk": cfg.topk} 71 | 72 | solution = prompt_opt.minimize_prompt(model, tokenizer, input_str, target_str, system_prompt, chat_template, device, 73 | optimization_args, max_tokens=cfg.max_tokens) 74 | input_slice, target_slice, loss_slice, input_ids = (solution["input_slice"], 75 | solution["target_slice"], 76 | solution["loss_slice"], 77 | solution["input_ids"]) 78 | 79 | # Test the prompt and log the new generation with the target string 80 | if solution["success"] is True: 81 | logging.info(f"Hard tokens returned:") 82 | optimized_ids = solution["input_ids"] 83 | output = model.generate(input_ids=optimized_ids[input_slice].unsqueeze(0), max_new_tokens=20, 84 | do_sample=False) 85 | optimal_prompt = tokenizer.decode(optimized_ids[input_slice], skip_special_tokens=True) 86 | logging.info(f"solution: {optimal_prompt}") 87 | logging.info(f"goal: {tokenizer.decode(input_ids[target_slice], skip_special_tokens=True)}") 88 | logging.info(f"output: {tokenizer.decode(output[0, target_slice], skip_special_tokens=True)}") 89 | 90 | # Calculate loss for the target_ids 91 | with torch.no_grad(): 92 | ids_for_loss_computation = input_ids[target_slice].unsqueeze(0).to(device) 93 | outputs = model(ids_for_loss_computation, labels=ids_for_loss_computation) 94 | loss_of_target_str = outputs.loss.item() 95 | 96 | with torch.no_grad(): 97 | ids_for_loss_computation = input_ids[input_slice].unsqueeze(0).to(device) 98 | outputs = model(ids_for_loss_computation, labels=ids_for_loss_computation) 99 | loss_of_prompt = outputs.loss.item() 100 | 101 | solution["input_ids"] = input_ids.tolist() 102 | 103 | # Compile data for saving to a JSON file 104 | results = { 105 | "target_length": target_slice.stop - target_slice.start, 106 | "target_str": target_str, 107 | "loss_of_target_str": loss_of_target_str, 108 | "loss_of_prompt": loss_of_prompt, 109 | "success": True, 110 | "optimal_prompt": optimal_prompt, 111 | } 112 | for k, v in solution.items(): 113 | if isinstance(v, slice): 114 | results[k] = (v.start, v.stop) 115 | else: 116 | results[k] = v 117 | else: 118 | results = {"success": False, 119 | "num_free_tokens": solution["num_free_tokens"], 120 | "target_str": target_str, 121 | "target_length": target_slice.stop - target_slice.start, 122 | } 123 | 124 | for k, v in OmegaConf.to_container(cfg, resolve=True).items(): 125 | results[f"cfg_{k}"] = v 126 | 127 | # log data to the console 128 | for key, value in results.items(): 129 | logging.info(f"{key}: {value}") 130 | results["time"] = now() 131 | 132 | # Save the data to a JSON file 133 | filename = os.path.join(HydraConfig.get().run.dir, f"results.json") 134 | with open(filename, 'w') as json_file: 135 | json.dump(results, json_file) 136 | 137 | 138 | if __name__ == "__main__": 139 | main() 140 | -------------------------------------------------------------------------------- /prompt_optimization/__init__.py: -------------------------------------------------------------------------------- 1 | from .gcg import optimize_gcg 2 | from .random_search import optimize_random_search 3 | from .utils import prep_text, check_output_with_hard_tokens 4 | from .miniprompt import minimize_prompt 5 | -------------------------------------------------------------------------------- /prompt_optimization/gcg.py: -------------------------------------------------------------------------------- 1 | """ 2 | gcg.py 3 | an implementation of Greedy Coordinate Gradient 4 | From: Universal and Transferable Adversarial Attacks on Aligned Language Models 5 | By: Andy Zou, Zifan Wang, Nicholas Carlini, Milad Nasr, J. Zico Kolter, Matt Fredrikson 6 | 2023 7 | https://arxiv.org/abs/2307.15043 8 | 9 | developed in collaboration by: Avi Schwarzschild and Zhili Feng and Pratyush Maini in 2024 10 | """ 11 | 12 | import logging 13 | 14 | import torch 15 | import torch.nn.functional as F 16 | 17 | 18 | def sample_tokens(num_tokens, embedding_matrix, batch_size, device): 19 | sample = torch.randint(0, embedding_matrix.size(0), (batch_size, num_tokens), device=device) 20 | new_token_loc = torch.randint(0, num_tokens, (batch_size,), device=device) 21 | new_token_vals = torch.randint(0, embedding_matrix.size(0), (batch_size,), device=device) 22 | sample[torch.arange(batch_size), new_token_loc] = new_token_vals 23 | return sample 24 | 25 | 26 | def optimize_gcg(model, input_ids, input_slice, free_token_slice, target_slice, loss_slice, 27 | num_steps, topk=250, batch_size=100, mini_batch_size=100): 28 | # Get embedding matrix 29 | try: 30 | embedding_matrix = model.get_input_embeddings().weight 31 | except NotImplementedError: 32 | embedding_matrix = model.transformer.wte.weight 33 | 34 | best_loss = torch.inf 35 | best_input = input_ids.clone() 36 | 37 | # Greedy Coordinate Gradient optimization loop 38 | for i in range(num_steps): 39 | # Create one-hot tensor and embeddings from input_ids 40 | inputs_one_hot = F.one_hot(input_ids, embedding_matrix.size(0)).type(embedding_matrix.dtype).unsqueeze(0) 41 | inputs_one_hot.requires_grad_(True) 42 | inputs_embeds = torch.matmul(inputs_one_hot, embedding_matrix) 43 | # Forward and backward pass 44 | output = model(inputs_embeds=inputs_embeds) 45 | loss = torch.nn.functional.cross_entropy(output.logits[0, loss_slice], input_ids[target_slice].squeeze()) 46 | grad = torch.autograd.grad(loss, inputs_one_hot)[0][:, free_token_slice] 47 | with torch.no_grad(): 48 | # Get topk gradients 49 | top_values, top_indices = torch.topk(-grad[0], topk, dim=1) 50 | # Build batch of input_ids with random topk tokens 51 | free_token_ids = inputs_one_hot[0, free_token_slice].argmax(-1) 52 | free_tokens_batch = free_token_ids.repeat(batch_size, 1) 53 | new_token_loc = torch.randint(0, free_token_ids.size(0), (batch_size, 1)) 54 | new_token_vals = top_indices[new_token_loc, torch.randint(0, topk, (batch_size, 1))] 55 | free_tokens_batch[torch.arange(batch_size), new_token_loc.squeeze()] = new_token_vals.squeeze() 56 | candidates_input_ids = input_ids.repeat(batch_size, 1) 57 | candidates_input_ids[:, free_token_slice] = free_tokens_batch 58 | 59 | loss = torch.zeros(batch_size) 60 | for mini_batch in range(0, batch_size, mini_batch_size): 61 | output = model(input_ids=candidates_input_ids[mini_batch:mini_batch + mini_batch_size]) 62 | labels = input_ids[target_slice].repeat(output.logits.size(0), 1) 63 | loss_mini_batch = F.cross_entropy(output.logits[:, loss_slice].transpose(1, 2), labels, 64 | reduction="none") 65 | loss[mini_batch:mini_batch + mini_batch_size] = loss_mini_batch.mean(dim=-1) 66 | best_candidate = torch.argmin(loss) 67 | input_ids = candidates_input_ids[best_candidate] 68 | 69 | # Compute test loss and check token matches 70 | output_single = model(input_ids=input_ids.unsqueeze(0)) 71 | match = (output_single.logits[0, loss_slice].argmax(-1) == input_ids[target_slice].squeeze()) 72 | logging.info(f"step: {i:<4} | " 73 | f"loss: {loss[best_candidate].mean().item():0.6f} | " 74 | f"{match.int().tolist()} | " 75 | ) 76 | if match.all(): 77 | best_input = input_ids.clone() 78 | break 79 | if loss[best_candidate].mean().item() < best_loss: 80 | best_loss = loss[best_candidate].mean().item() 81 | best_input = input_ids.clone() 82 | 83 | return {"input_ids": best_input, "inputs_embeds": model.get_input_embeddings()(best_input).unsqueeze(0)} 84 | 85 | -------------------------------------------------------------------------------- /prompt_optimization/miniprompt.py: -------------------------------------------------------------------------------- 1 | """ 2 | miniprompt.py 3 | an implementation of miniprompt 4 | 5 | developed in collaboration by: Avi Schwarzschild and Zhili Feng and Pratyush Maini in 2024 6 | """ 7 | import logging 8 | 9 | import prompt_optimization as prompt_opt 10 | 11 | 12 | def minimize_prompt(model, tokenizer, input_str, target_str, system_prompt, chat_template, device, optimization_args, 13 | max_tokens=30): 14 | n_tokens_in_prompt = 5 15 | running_max = max_tokens 16 | running_min = 0 17 | success = False 18 | best_prompt = None 19 | done = False 20 | best_slices = (None, None, None, None) 21 | 22 | while not done: 23 | logging.info("\n------------------------------------\n") 24 | logging.info(f"{n_tokens_in_prompt} tokens in the prompt") 25 | input_ids, free_token_slice, input_slice, target_slice, loss_slice = prompt_opt.prep_text(input_str, 26 | target_str, 27 | tokenizer, 28 | system_prompt, 29 | chat_template, 30 | n_tokens_in_prompt, 31 | device) 32 | if running_max == -1: 33 | running_max = (target_slice.stop - target_slice.start) * 5 34 | if optimization_args["discrete_optimizer"] == "gcg": 35 | solution = prompt_opt.optimize_gcg(model, input_ids, input_slice, free_token_slice, target_slice, 36 | loss_slice, optimization_args["num_steps"], 37 | batch_size=optimization_args["batch_size"], 38 | topk=optimization_args["topk"], 39 | mini_batch_size=optimization_args["mini_batch_size"]) 40 | elif optimization_args["discrete_optimizer"] == "random_search": 41 | solution = prompt_opt.optimize_random_search(model, input_ids, input_slice, free_token_slice, 42 | target_slice, loss_slice, optimization_args["num_steps"], 43 | batch_size=optimization_args["batch_size"], 44 | mini_batch_size=optimization_args["mini_batch_size"]) 45 | else: 46 | raise ValueError( 47 | "discrete_optimizer must be one of ['gcg', 'random_search']") 48 | 49 | target_acquired = prompt_opt.check_output_with_hard_tokens(model, solution["input_ids"].unsqueeze(0), 50 | target_slice, 51 | loss_slice) 52 | 53 | if target_acquired: 54 | logging.info(f"Target acquired with {n_tokens_in_prompt} tokens in the prompt") 55 | running_max = n_tokens_in_prompt 56 | success = True 57 | best_prompt = solution["input_ids"] 58 | new_num_tokens = n_tokens_in_prompt - 1 59 | best_slices = (free_token_slice, input_slice, target_slice, loss_slice) 60 | else: 61 | logging.info(f"Target NOT acquired with {n_tokens_in_prompt} tokens in the prompt") 62 | new_num_tokens = n_tokens_in_prompt + 5 63 | running_min = n_tokens_in_prompt 64 | optimization_args["num_steps"] = int(optimization_args["num_steps"] * 1.2) 65 | 66 | if (new_num_tokens >= running_max) or (new_num_tokens <= running_min): 67 | done = True 68 | else: 69 | n_tokens_in_prompt = new_num_tokens 70 | 71 | output = {"free_token_slice": best_slices[0] if best_slices[0] is not None else free_token_slice, 72 | "input_slice": best_slices[1] if best_slices[1] is not None else input_slice, 73 | "target_slice": best_slices[2] if best_slices[2] is not None else target_slice, 74 | "loss_slice": best_slices[3] if best_slices[3] is not None else loss_slice, 75 | "success": success, 76 | "num_free_tokens": running_max, 77 | "input_ids": best_prompt, 78 | } 79 | return output 80 | -------------------------------------------------------------------------------- /prompt_optimization/random_search.py: -------------------------------------------------------------------------------- 1 | """ 2 | random_search.py 3 | an implementation of random search 4 | Proposed for prompt optimization in 5 | Adversarial attacks on gpt-4 via simple random search. 2023. by Maksym Andriushchenko 6 | 7 | developed in collaboration by: Avi Schwarzschild and Zhili Feng and Pratyush Maini in 2024 8 | """ 9 | import logging 10 | 11 | import torch 12 | 13 | 14 | def optimize_random_search(model, input_ids, input_slice, free_token_slice, target_slice, loss_slice, 15 | num_steps, batch_size=100, mini_batch_size=100): 16 | with torch.no_grad(): 17 | # Get embedding matrix 18 | embedding_matrix = model.get_input_embeddings().weight 19 | 20 | best_loss = torch.inf 21 | best_input = input_ids.clone() 22 | 23 | # Random search optimization loop 24 | for i in range(num_steps): 25 | # Get random batch of single token perturbations for the free tokens 26 | free_token_ids = input_ids[free_token_slice] 27 | free_tokens_batch = free_token_ids.repeat(batch_size, 1) 28 | new_token_loc = torch.randint(0, free_token_ids.size(0), (batch_size,), device=input_ids.device) 29 | new_token_vals = torch.randint(0, embedding_matrix.size(0), (batch_size,), device=input_ids.device) 30 | free_tokens_batch[torch.arange(batch_size), new_token_loc] = new_token_vals 31 | batch_input_ids = input_ids.repeat(batch_size, 1) 32 | batch_input_ids[:, free_token_slice] = free_tokens_batch 33 | 34 | loss = torch.zeros(batch_size) 35 | for mini_batch in range(0, batch_size, mini_batch_size): 36 | output = model(input_ids=batch_input_ids[mini_batch:mini_batch + mini_batch_size]) 37 | labels = input_ids[target_slice].repeat(output.logits.size(0), 1) 38 | loss_mini_batch = torch.nn.functional.cross_entropy(output.logits[:, loss_slice].transpose(1, 2), 39 | labels, 40 | reduction="none") 41 | loss[mini_batch:mini_batch + mini_batch_size] = loss_mini_batch.mean(dim=-1) 42 | best_candidate = torch.argmin(loss) 43 | 44 | input_ids = batch_input_ids[best_candidate] 45 | 46 | # compute test loss 47 | output_single = model(input_ids=input_ids.unsqueeze(0)) 48 | match = (output_single.logits[0, loss_slice].argmax(-1) == input_ids[target_slice].squeeze()) 49 | logging.info(f"step: {i:<4} | " 50 | f"loss: {loss[best_candidate].mean().item():0.6f} | " 51 | f"{match.int().tolist()} | ") 52 | if match.all(): 53 | best_input = input_ids.clone() 54 | break 55 | if loss[best_candidate].mean().item() < best_loss: 56 | best_loss = loss[best_candidate].mean().item() 57 | best_input = input_ids.clone() 58 | 59 | return {"input_ids": best_input, "inputs_embeds": model.get_input_embeddings()(best_input).unsqueeze(0)} 60 | -------------------------------------------------------------------------------- /prompt_optimization/utils.py: -------------------------------------------------------------------------------- 1 | """ 2 | utils.py 3 | functions for preparing text for discrete optimization 4 | """ 5 | import datetime 6 | import json 7 | 8 | import torch 9 | from almost_unique_id import generate_id 10 | 11 | 12 | def load_target_str(dataset_name, idx, tokenizer): 13 | if dataset_name == "essays": 14 | with open("datasets/essays_by_avi.json", "r") as fh: 15 | quote_list = json.load(fh) 16 | target_str = quote_list[idx] 17 | elif dataset_name == "famous_quotes": 18 | with open("datasets/famous_quotes.json", "r") as fh: 19 | quote_list = json.load(fh) 20 | target_str = quote_list[idx] 21 | elif dataset_name == "wikipedia": 22 | with open("datasets/wikipedia.json", "r") as fh: 23 | quote_list = json.load(fh) 24 | target_str = quote_list[idx] 25 | elif dataset_name == "ap": 26 | with open("datasets/ap-articles-november-2023.json", "r") as fh: 27 | quote_list = json.load(fh) 28 | target_str = quote_list[idx] 29 | elif dataset_name == "random": 30 | len = 3 + (idx % 15) 31 | target_ids = torch.randint(0, tokenizer.vocab_size, (100, 20))[idx, :len] 32 | target_str = tokenizer.decode(target_ids) 33 | else: 34 | raise ValueError(f"args.dataset = {dataset_name}, but that option isn't implemented.") 35 | return target_str 36 | 37 | 38 | def prep_text(input_str, target_str, tokenizer, system_prompt, chat_template, num_free_tokens, device): 39 | input_tokens = tokenizer.encode(input_str, return_tensors="pt", add_special_tokens=False).to(device=device) 40 | target_tokens = tokenizer.encode(target_str, return_tensors="pt", add_special_tokens=False).to(device=device) 41 | system_prompt_tokens = tokenizer.encode(system_prompt, return_tensors="pt", add_special_tokens=False).to( 42 | device=device) 43 | chat_template_tokens = ( 44 | tokenizer.encode(chat_template[0], return_tensors="pt", add_special_tokens=False).to(device=device), 45 | tokenizer.encode(chat_template[1], return_tensors="pt", add_special_tokens=False).to(device=device)) 46 | free_tokens = torch.randint(0, tokenizer.vocab_size, (1, num_free_tokens)).to(device=device) 47 | 48 | input_ids = torch.cat((chat_template_tokens[0], system_prompt_tokens, input_tokens, free_tokens, 49 | chat_template_tokens[1], target_tokens), dim=1).squeeze().long() 50 | 51 | # build slice objects 52 | tokens_before_free = chat_template_tokens[0].size(-1) + system_prompt_tokens.size(-1) + input_tokens.size(-1) 53 | free_token_slice = slice(tokens_before_free, tokens_before_free + free_tokens.size(-1)) 54 | input_slice = slice(0, input_ids.size(-1) - target_tokens.size(-1)) 55 | target_slice = slice(input_ids.size(-1) - target_tokens.size(-1), input_ids.size(-1)) 56 | loss_slice = slice(input_ids.size(-1) - target_tokens.size(-1) - 1, input_ids.size(-1) - 1) 57 | 58 | return input_ids, free_token_slice, input_slice, target_slice, loss_slice 59 | 60 | 61 | def check_output_with_hard_tokens(model, input_ids, target_slice, loss_slice): 62 | output = model(input_ids) 63 | match = (output.logits[0, loss_slice].argmax(-1) == input_ids[0, target_slice].squeeze()).all() 64 | return match 65 | 66 | 67 | def now(): 68 | return datetime.datetime.now().strftime("%Y%m%d-%H:%M:%S") 69 | 70 | 71 | def get_id_func(): 72 | id = generate_id() 73 | 74 | def get_id(): 75 | return id 76 | 77 | return get_id 78 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | accelerate==0.24.1 2 | ai2-olmo==0.2.5 3 | aiofiles==23.2.1 4 | aiohttp==3.8.4 5 | aiosignal==1.3.1 6 | almost-unique-id==0.0.3 7 | evaluate==0.4.0 8 | huggingface-hub==0.20.3 9 | hydra-core==1.3.2 10 | idna==3.4 11 | ipython==8.15.0 12 | jupyter==1.0.0 13 | matplotlib==3.7.1 14 | matplotlib-inline==0.1.6 15 | numpy==1.25.0 16 | pandas==2.0.3 17 | seaborn==0.12.2 18 | tabulate==0.9.0 19 | torch==2.1.0 20 | transformers==4.38.2 21 | --------------------------------------------------------------------------------