├── .gitignore
├── LICENSE
├── README.md
├── analyze_results.ipynb
├── config
    ├── hydra
    │   └── job_logging
    │   │   └── custom.yaml
    ├── incontextunlearning_baseline.yaml
    ├── incontextunlearning_withsystemprompt.yaml
    └── promptmin.yaml
├── datasets
    ├── famous_quotes.json
    └── wikipedia.json
├── example_script.py
├── make_table_of_results.py
├── prompt-minimization-main.py
├── prompt_optimization
    ├── __init__.py
    ├── gcg.py
    ├── miniprompt.py
    ├── random_search.py
    └── utils.py
└── requirements.txt


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | outputs/
  6 | junk.py
  7 | .DS_Store
  8 | .idea
  9 | 
 10 | # C extensions
 11 | *.so
 12 | 
 13 | # Distribution / packaging
 14 | .Python
 15 | build/
 16 | develop-eggs/
 17 | dist/
 18 | downloads/
 19 | eggs/
 20 | .eggs/
 21 | lib/
 22 | lib64/
 23 | parts/
 24 | sdist/
 25 | var/
 26 | wheels/
 27 | share/python-wheels/
 28 | *.egg-info/
 29 | .installed.cfg
 30 | *.egg
 31 | MANIFEST
 32 | 
 33 | # PyInstaller
 34 | #  Usually these files are written by a python script from a template
 35 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 36 | *.manifest
 37 | *.spec
 38 | 
 39 | # Installer logs
 40 | pip-log.txt
 41 | pip-delete-this-directory.txt
 42 | 
 43 | # Unit test / coverage reports
 44 | htmlcov/
 45 | .tox/
 46 | .nox/
 47 | .coverage
 48 | .coverage.*
 49 | .cache
 50 | nosetests.xml
 51 | coverage.xml
 52 | *.cover
 53 | *.py,cover
 54 | .hypothesis/
 55 | .pytest_cache/
 56 | cover/
 57 | 
 58 | # Translations
 59 | *.mo
 60 | *.pot
 61 | 
 62 | # Django stuff:
 63 | *.log
 64 | local_settings.py
 65 | db.sqlite3
 66 | db.sqlite3-journal
 67 | 
 68 | # Flask stuff:
 69 | instance/
 70 | .webassets-cache
 71 | 
 72 | # Scrapy stuff:
 73 | .scrapy
 74 | 
 75 | # Sphinx documentation
 76 | docs/_build/
 77 | 
 78 | # PyBuilder
 79 | .pybuilder/
 80 | target/
 81 | 
 82 | # Jupyter Notebook
 83 | .ipynb_checkpoints
 84 | 
 85 | # IPython
 86 | profile_default/
 87 | ipython_config.py
 88 | 
 89 | # pyenv
 90 | #   For a library or package, you might want to ignore these files since the code is
 91 | #   intended to run in multiple environments; otherwise, check them in:
 92 | # .python-version
 93 | 
 94 | # pipenv
 95 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 96 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 97 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 98 | #   install all needed dependencies.
 99 | #Pipfile.lock
100 | 
101 | # poetry
102 | #   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
103 | #   This is especially recommended for binary packages to ensure reproducibility, and is more
104 | #   commonly ignored for libraries.
105 | #   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
106 | #poetry.lock
107 | 
108 | # pdm
109 | #   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
110 | #pdm.lock
111 | #   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
112 | #   in version control.
113 | #   https://pdm.fming.dev/#use-with-ide
114 | .pdm.toml
115 | 
116 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
117 | __pypackages__/
118 | 
119 | # Celery stuff
120 | celerybeat-schedule
121 | celerybeat.pid
122 | 
123 | # SageMath parsed files
124 | *.sage.py
125 | 
126 | # Environments
127 | .env
128 | .venv
129 | env/
130 | venv/
131 | ENV/
132 | env.bak/
133 | venv.bak/
134 | 
135 | # Spyder project settings
136 | .spyderproject
137 | .spyproject
138 | 
139 | # Rope project settings
140 | .ropeproject
141 | 
142 | # mkdocs documentation
143 | /site
144 | 
145 | # mypy
146 | .mypy_cache/
147 | .dmypy.json
148 | dmypy.json
149 | 
150 | # Pyre type checker
151 | .pyre/
152 | 
153 | # pytype static type analyzer
154 | .pytype/
155 | 
156 | # Cython debug symbols
157 | cython_debug/
158 | 
159 | # PyCharm
160 | #  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
161 | #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
162 | #  and can be added to the global gitignore or merged into this file.  For a more nuclear
163 | #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
164 | #.idea/
165 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | BSD 3-Clause License
 2 | 
 3 | Copyright (c) 2024, Avi Schwarzschild, Zhili Feng, Pratyush Maini
 4 | 
 5 | Redistribution and use in source and binary forms, with or without
 6 | modification, are permitted provided that the following conditions are met:
 7 | 
 8 | 1. Redistributions of source code must retain the above copyright notice, this
 9 |    list of conditions and the following disclaimer.
10 | 
11 | 2. Redistributions in binary form must reproduce the above copyright notice,
12 |    this list of conditions and the following disclaimer in the documentation
13 |    and/or other materials provided with the distribution.
14 | 
15 | 3. Neither the name of the copyright holder nor the names of its
16 |    contributors may be used to endorse or promote products derived from
17 |    this software without specific prior written permission.
18 | 
19 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
23 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
25 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
26 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
27 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Rethinking LLM Memorization through the Lens of Adversarial Compression
 2 | 
 3 | A compression-based approach to defining and measuring memorization with LLMs. 
 4 | 
 5 | This repository contains the code needed to measure memorization in LLMs using input-output compression. This method is presented in [our paper](https://arxiv.org/pdf/2404.15146). This repo was developed collaboratively by Avi Schwarzschild, Zhili Feng, and Pratyush Maini at Carnegie Mellon University in 2024. This code is particularly useful for reproducing the results in our paper on the topic.
 6 | 
 7 | 
 8 | ## Getting Started
 9 | 
10 | ### Requirements
11 | This code was developed and tested with Python 3.10.4. After cloning the repository, you can install the requirements and run our experiments.
12 | 
13 | To install requirements:
14 | 
15 | ```$ pip install -r requirements.txt```
16 | 
17 | ### Memorization Measurements
18 | 
19 | Try computing the compression ratio of the first sample in the [Famous Quotes](datasets/famous_quotes.json) dataset with the following command.  
20 | ```
21 | % python prompt-minimization-main.py dataset=famous_quotes data_idx=0
22 | ```
23 | 
24 | ### Logging Style and Data Analysis
25 | 
26 | ```
27 | outputs
28 | └── happy-Melissa
29 |         ├── .hydra
30 |         │   ├── config.yaml
31 |         │   ├── hydra.yaml
32 |         │   └── overrides.yaml
33 |         ├── results.json
34 |         └── log.log
35 | ```
36 | 
37 | These output folders can be parsed and analyzed as a DataFrame using Pandas.
38 | Open the [analyze_results notebook](analyze_results.ipynb) to process experiments or run [make_table_of_results.py](make_table_of_results.py) to see parse the output folder. The notebook will load all the results into a Pandas DataFrame and then it can be edited (for example by adding cells) to to whatever analysis is needed. The script is a short Python script that will show you the set of experiment names, a table with every entry, and a summary table aggregating across (model, dataset, optimizer) groups. It can also be used with the flag `--experiment_name <experiment-name-0> <experiment-name-1>...` to aggregate results from any number of experiments.
39 | 
40 | ### Optimizing Prompts
41 | We include a simple script of optimizing input tokens to elicit a targeted output from an LLM. This is only one step in finding minimal prompts, but it may be helpful to see how prompt optimization can be done in general.
42 | ```
43 | % python example_script.py
44 | ```
45 | 
46 | ## Contributing
47 | 
48 | We encourage anyone using the code to reach out to us directly and open issues and pull requests with questions and improvements!
49 | 
50 | ## Citing Our Work
51 | 
52 | ```
53 | @misc{schwarzschild2024rethinking,
54 |       title={Rethinking LLM Memorization through the Lens of Adversarial Compression}, 
55 |       author={Avi Schwarzschild and Zhili Feng and Pratyush Maini and Zachary C. Lipton and J. Zico Kolter},
56 |       year={2024},
57 |       eprint={2404.15146},
58 |       archivePrefix={arXiv},
59 |       primaryClass={cs.LG}
60 | }
61 | ```
62 | 


--------------------------------------------------------------------------------
/analyze_results.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "source": [
  6 |     "A Notebook for processing experimental results.\n",
  7 |     "\n",
  8 |     "All runs correspond to a folder in `outputs/` from which we can load config information and results from json data."
  9 |    ],
 10 |    "metadata": {
 11 |     "collapsed": false
 12 |    },
 13 |    "id": "8819094b533018fc"
 14 |   },
 15 |   {
 16 |    "cell_type": "code",
 17 |    "outputs": [],
 18 |    "source": [
 19 |     "import json\n",
 20 |     "from pathlib import Path\n",
 21 |     "import pandas as pd"
 22 |    ],
 23 |    "metadata": {
 24 |     "collapsed": false,
 25 |     "ExecuteTime": {
 26 |      "end_time": "2024-04-12T12:09:02.761133Z",
 27 |      "start_time": "2024-04-12T12:09:02.167577Z"
 28 |     }
 29 |    },
 30 |    "id": "337aab98af3643dc",
 31 |    "execution_count": 1
 32 |   },
 33 |   {
 34 |    "cell_type": "code",
 35 |    "outputs": [],
 36 |    "source": [
 37 |     "# Set this to filter runs based on the cfg.experiment_name argument\n",
 38 |     "experiment_name = None\n",
 39 |     "\n",
 40 |     "# It can be a list like this:\n",
 41 |     "# experiment_name = [\"pythia-small\", \"pythia-big\"]"
 42 |    ],
 43 |    "metadata": {
 44 |     "collapsed": false,
 45 |     "ExecuteTime": {
 46 |      "end_time": "2024-04-12T12:09:03.848958Z",
 47 |      "start_time": "2024-04-12T12:09:03.845438Z"
 48 |     }
 49 |    },
 50 |    "id": "757ba31709497865",
 51 |    "execution_count": 2
 52 |   },
 53 |   {
 54 |    "cell_type": "code",
 55 |    "outputs": [],
 56 |    "source": [
 57 |     "folder_path = Path(\"outputs\")\n",
 58 |     "json_list = []\n",
 59 |     "\n",
 60 |     "for json_file in folder_path.glob(\"*/*.json\"):\n",
 61 |     "    with open(json_file, \"r\", encoding=\"utf-8\") as f:\n",
 62 |     "        # Load the JSON content\n",
 63 |     "        json_content = json.load(f)\n",
 64 |     "        \n",
 65 |     "        # Append the content as is (which will result in a single cell containing the JSON in the DataFrame)\n",
 66 |     "        if experiment_name is not None:\n",
 67 |     "            if \"cfg_experiment_name\" in json_content and json_content[\"cfg_experiment_name\"] in experiment_name:\n",
 68 |     "                json_list.append(json_content)\n",
 69 |     "        else:\n",
 70 |     "            json_list.append(json_content)\n",
 71 |     "# Convert the list of JSON objects to a DataFrame\n",
 72 |     "df = pd.DataFrame(json_list)"
 73 |    ],
 74 |    "metadata": {
 75 |     "collapsed": false,
 76 |     "ExecuteTime": {
 77 |      "end_time": "2024-04-12T12:10:52.116311Z",
 78 |      "start_time": "2024-04-12T12:10:52.109492Z"
 79 |     }
 80 |    },
 81 |    "id": "8d027b206c17c5de",
 82 |    "execution_count": 11
 83 |   },
 84 |   {
 85 |    "cell_type": "code",
 86 |    "outputs": [
 87 |     {
 88 |      "data": {
 89 |       "text/plain": "   success  num_free_tokens                                 target_str  \\\n0    False               10  To be or not to be, that is the question.   \n1    False               10                    jumps over the lazy dog   \n2    False               10                   I think, therefore I am.   \n\n   target_length  cfg_batch_size cfg_discrete_optimizer  cfg_lr  \\\n0             12             100                    gcg    0.01   \n1              6             100                    gcg    0.01   \n2              7             100                    gcg    0.01   \n\n          cfg_model_name  cfg_num_steps cfg_optimizer  ...  cfg_input_str  \\\n0  EleutherAI/pythia-14m             20          adam  ...                  \n1  EleutherAI/pythia-14m             20          adam  ...                  \n2  EleutherAI/pythia-14m             20          adam  ...                  \n\n                              cfg_target_str cfg_chat_template  \\\n0  To be or not to be, that is the question.              [, ]   \n1                    jumps over the lazy dog              [, ]   \n2                   I think, therefore I am.              [, ]   \n\n  cfg_system_prompt    cfg_dataset cfg_data_idx cfg_random_weights  \\\n0                    famous_quotes          0.0              False   \n1                             None          NaN              False   \n2                    famous_quotes          1.0              False   \n\n  cfg_max_tokens cfg_mini_batch_size               time  \n0             10                 100  20240412-08:06:00  \n1             10                 100  20240412-08:05:01  \n2             10                 100  20240412-08:07:23  \n\n[3 rows x 24 columns]",
 90 |       "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>success</th>\n      <th>num_free_tokens</th>\n      <th>target_str</th>\n      <th>target_length</th>\n      <th>cfg_batch_size</th>\n      <th>cfg_discrete_optimizer</th>\n      <th>cfg_lr</th>\n      <th>cfg_model_name</th>\n      <th>cfg_num_steps</th>\n      <th>cfg_optimizer</th>\n      <th>...</th>\n      <th>cfg_input_str</th>\n      <th>cfg_target_str</th>\n      <th>cfg_chat_template</th>\n      <th>cfg_system_prompt</th>\n      <th>cfg_dataset</th>\n      <th>cfg_data_idx</th>\n      <th>cfg_random_weights</th>\n      <th>cfg_max_tokens</th>\n      <th>cfg_mini_batch_size</th>\n      <th>time</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>False</td>\n      <td>10</td>\n      <td>To be or not to be, that is the question.</td>\n      <td>12</td>\n      <td>100</td>\n      <td>gcg</td>\n      <td>0.01</td>\n      <td>EleutherAI/pythia-14m</td>\n      <td>20</td>\n      <td>adam</td>\n      <td>...</td>\n      <td></td>\n      <td>To be or not to be, that is the question.</td>\n      <td>[, ]</td>\n      <td></td>\n      <td>famous_quotes</td>\n      <td>0.0</td>\n      <td>False</td>\n      <td>10</td>\n      <td>100</td>\n      <td>20240412-08:06:00</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <td>False</td>\n      <td>10</td>\n      <td>jumps over the lazy dog</td>\n      <td>6</td>\n      <td>100</td>\n      <td>gcg</td>\n      <td>0.01</td>\n      <td>EleutherAI/pythia-14m</td>\n      <td>20</td>\n      <td>adam</td>\n      <td>...</td>\n      <td></td>\n      <td>jumps over the lazy dog</td>\n      <td>[, ]</td>\n      <td></td>\n      <td>None</td>\n      <td>NaN</td>\n      <td>False</td>\n      <td>10</td>\n      <td>100</td>\n      <td>20240412-08:05:01</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>False</td>\n      <td>10</td>\n      <td>I think, therefore I am.</td>\n      <td>7</td>\n      <td>100</td>\n      <td>gcg</td>\n      <td>0.01</td>\n      <td>EleutherAI/pythia-14m</td>\n      <td>20</td>\n      <td>adam</td>\n      <td>...</td>\n      <td></td>\n      <td>I think, therefore I am.</td>\n      <td>[, ]</td>\n      <td></td>\n      <td>famous_quotes</td>\n      <td>1.0</td>\n      <td>False</td>\n      <td>10</td>\n      <td>100</td>\n      <td>20240412-08:07:23</td>\n    </tr>\n  </tbody>\n</table>\n<p>3 rows × 24 columns</p>\n</div>"
 91 |      },
 92 |      "execution_count": 13,
 93 |      "metadata": {},
 94 |      "output_type": "execute_result"
 95 |     }
 96 |    ],
 97 |    "source": [
 98 |     "df.head().round(2)"
 99 |    ],
100 |    "metadata": {
101 |     "collapsed": false,
102 |     "ExecuteTime": {
103 |      "end_time": "2024-04-12T12:11:04.524587Z",
104 |      "start_time": "2024-04-12T12:11:04.515877Z"
105 |     }
106 |    },
107 |    "id": "5529d488e46ef405",
108 |    "execution_count": 13
109 |   },
110 |   {
111 |    "cell_type": "code",
112 |    "outputs": [],
113 |    "source": [
114 |     "# Add columns with compression ratio and memorization and sort\n",
115 |     "df[\"ratio\"] = df[\"target_length\"] / df[\"num_free_tokens\"]\n",
116 |     "df[\"memorized\"] = df[\"ratio\"] > 1\n",
117 |     "df.loc[df[\"success\"] == False, \"memorized\"] = False\n",
118 |     "df = df.sort_values(by=[\"cfg_model_name\", \"cfg_dataset\", \"cfg_data_idx\", \"cfg_discrete_optimizer\"])"
119 |    ],
120 |    "metadata": {
121 |     "collapsed": false,
122 |     "ExecuteTime": {
123 |      "end_time": "2024-04-12T12:11:49.587114Z",
124 |      "start_time": "2024-04-12T12:11:49.578651Z"
125 |     }
126 |    },
127 |    "id": "14e759e300cf964e",
128 |    "execution_count": 14
129 |   },
130 |   {
131 |    "cell_type": "code",
132 |    "outputs": [
133 |     {
134 |      "data": {
135 |       "text/plain": "   success  num_free_tokens                                 target_str  \\\n0    False               10  To be or not to be, that is the question.   \n2    False               10                   I think, therefore I am.   \n\n   target_length  cfg_batch_size cfg_discrete_optimizer  cfg_lr  \\\n0             12             100                    gcg    0.01   \n2              7             100                    gcg    0.01   \n\n          cfg_model_name  cfg_num_steps cfg_optimizer  ...  cfg_chat_template  \\\n0  EleutherAI/pythia-14m             20          adam  ...               [, ]   \n2  EleutherAI/pythia-14m             20          adam  ...               [, ]   \n\n   cfg_system_prompt    cfg_dataset cfg_data_idx cfg_random_weights  \\\n0                     famous_quotes          0.0              False   \n2                     famous_quotes          1.0              False   \n\n  cfg_max_tokens cfg_mini_batch_size               time ratio  memorized  \n0             10                 100  20240412-08:06:00   1.2      False  \n2             10                 100  20240412-08:07:23   0.7      False  \n\n[2 rows x 26 columns]",
136 |       "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>success</th>\n      <th>num_free_tokens</th>\n      <th>target_str</th>\n      <th>target_length</th>\n      <th>cfg_batch_size</th>\n      <th>cfg_discrete_optimizer</th>\n      <th>cfg_lr</th>\n      <th>cfg_model_name</th>\n      <th>cfg_num_steps</th>\n      <th>cfg_optimizer</th>\n      <th>...</th>\n      <th>cfg_chat_template</th>\n      <th>cfg_system_prompt</th>\n      <th>cfg_dataset</th>\n      <th>cfg_data_idx</th>\n      <th>cfg_random_weights</th>\n      <th>cfg_max_tokens</th>\n      <th>cfg_mini_batch_size</th>\n      <th>time</th>\n      <th>ratio</th>\n      <th>memorized</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>False</td>\n      <td>10</td>\n      <td>To be or not to be, that is the question.</td>\n      <td>12</td>\n      <td>100</td>\n      <td>gcg</td>\n      <td>0.01</td>\n      <td>EleutherAI/pythia-14m</td>\n      <td>20</td>\n      <td>adam</td>\n      <td>...</td>\n      <td>[, ]</td>\n      <td></td>\n      <td>famous_quotes</td>\n      <td>0.0</td>\n      <td>False</td>\n      <td>10</td>\n      <td>100</td>\n      <td>20240412-08:06:00</td>\n      <td>1.2</td>\n      <td>False</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>False</td>\n      <td>10</td>\n      <td>I think, therefore I am.</td>\n      <td>7</td>\n      <td>100</td>\n      <td>gcg</td>\n      <td>0.01</td>\n      <td>EleutherAI/pythia-14m</td>\n      <td>20</td>\n      <td>adam</td>\n      <td>...</td>\n      <td>[, ]</td>\n      <td></td>\n      <td>famous_quotes</td>\n      <td>1.0</td>\n      <td>False</td>\n      <td>10</td>\n      <td>100</td>\n      <td>20240412-08:07:23</td>\n      <td>0.7</td>\n      <td>False</td>\n    </tr>\n  </tbody>\n</table>\n<p>2 rows × 26 columns</p>\n</div>"
137 |      },
138 |      "execution_count": 19,
139 |      "metadata": {},
140 |      "output_type": "execute_result"
141 |     }
142 |    ],
143 |    "source": [
144 |     "# Filter the rows where dataset is 'random', the model is 'EleutherAI/pythia-14m', and success is True\n",
145 |     "filtered_df = df[(df['cfg_dataset'] == 'famous_quotes') &\n",
146 |     "                 (df['cfg_model_name'] == 'EleutherAI/pythia-14m')\n",
147 |     "                 ]\n",
148 |     "filtered_df.head().round(2)"
149 |    ],
150 |    "metadata": {
151 |     "collapsed": false,
152 |     "ExecuteTime": {
153 |      "end_time": "2024-04-12T12:13:35.306626Z",
154 |      "start_time": "2024-04-12T12:13:35.291714Z"
155 |     }
156 |    },
157 |    "id": "f3c999ffb1266688",
158 |    "execution_count": 19
159 |   },
160 |   {
161 |    "cell_type": "code",
162 |    "outputs": [
163 |     {
164 |      "data": {
165 |       "text/plain": "Empty DataFrame\nColumns: [ratio, memorized, success]\nIndex: []",
166 |       "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th></th>\n      <th></th>\n      <th>ratio</th>\n      <th>memorized</th>\n      <th>success</th>\n    </tr>\n    <tr>\n      <th>cfg_model_name</th>\n      <th>cfg_dataset</th>\n      <th>cfg_discrete_optimizer</th>\n      <th></th>\n      <th></th>\n      <th></th>\n    </tr>\n  </thead>\n  <tbody>\n  </tbody>\n</table>\n</div>"
167 |      },
168 |      "execution_count": 20,
169 |      "metadata": {},
170 |      "output_type": "execute_result"
171 |     }
172 |    ],
173 |    "source": [
174 |     "# Sort and dedup\n",
175 |     "df_sorted = df.sort_values(by=['cfg_model_name', 'cfg_discrete_optimizer', 'cfg_dataset', 'cfg_data_idx', 'success'], ascending=[True, True, True, True, False])\n",
176 |     "df = df_sorted.drop_duplicates(subset=['cfg_dataset', 'cfg_data_idx', 'cfg_model_name', 'cfg_discrete_optimizer'], keep='first')\n",
177 |     "df = df[df[\"success\"] == True]\n",
178 |     "\n",
179 |     "# Make summary counting the average ratio and success rate for each dataset and discrete_optimizer include counts\n",
180 |     "summary = df.groupby([\"cfg_model_name\", \"cfg_dataset\", \"cfg_discrete_optimizer\"]).agg(\n",
181 |     "    {\"ratio\": \"mean\", \"memorized\": \"mean\", \"success\": \"count\"}).round(2)\n",
182 |     "summary"
183 |    ],
184 |    "metadata": {
185 |     "collapsed": false,
186 |     "ExecuteTime": {
187 |      "end_time": "2024-04-12T12:14:36.158431Z",
188 |      "start_time": "2024-04-12T12:14:36.144177Z"
189 |     }
190 |    },
191 |    "id": "585f76598e4455fa",
192 |    "execution_count": 20
193 |   },
194 |   {
195 |    "cell_type": "code",
196 |    "outputs": [],
197 |    "source": [],
198 |    "metadata": {
199 |     "collapsed": false
200 |    },
201 |    "id": "6278d1767c5d2a84"
202 |   }
203 |  ],
204 |  "metadata": {
205 |   "kernelspec": {
206 |    "display_name": "Python 3",
207 |    "language": "python",
208 |    "name": "python3"
209 |   },
210 |   "language_info": {
211 |    "codemirror_mode": {
212 |     "name": "ipython",
213 |     "version": 2
214 |    },
215 |    "file_extension": ".py",
216 |    "mimetype": "text/x-python",
217 |    "name": "python",
218 |    "nbconvert_exporter": "python",
219 |    "pygments_lexer": "ipython2",
220 |    "version": "2.7.6"
221 |   }
222 |  },
223 |  "nbformat": 4,
224 |  "nbformat_minor": 5
225 | }
226 | 


--------------------------------------------------------------------------------
/config/hydra/job_logging/custom.yaml:
--------------------------------------------------------------------------------
 1 | version: 1
 2 | formatters:
 3 |   simple:
 4 |     format: "[%(asctime)s %(levelname)s]: %(message)s"
 5 |     datefmt: "%m/%d/%Y %H:%M:%S"
 6 | handlers:
 7 |   console:
 8 |     class: logging.StreamHandler
 9 |     formatter: simple
10 |     stream: ext://sys.stdout
11 |   file:
12 |     class: logging.handlers.RotatingFileHandler
13 |     formatter: simple
14 |     filename: log.log
15 | root:
16 |   handlers: [console, file]
17 | 
18 | disable_existing_loggers: false


--------------------------------------------------------------------------------
/config/incontextunlearning_baseline.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - override hydra/job_logging: custom
 3 |   - _self_
 4 | 
 5 | hydra:
 6 |   run:
 7 |     dir: ./outputs/${experiment_name}/${run_id}
 8 |   job_logging:
 9 |     handlers:
10 |       file:
11 |         filename: ${hydra.run.dir}/log.log
12 | 
13 | batch_size: 100
14 | chat_template:
15 |    - "<s>[INST] Give me a famous quote."
16 |    - " [/INST]  Sure! Here's a famous "
17 | data_idx:
18 | dataset:
19 | discrete_optimizer: gcg
20 | experiment_name: ${model_name}_${discrete_optimizer}_${lr}_${batch_size}_${num_steps}_${topk}_${seed}
21 | input_str: ''
22 | lr: 0.01
23 | max_tokens: 30
24 | mini_batch_size: 100
25 | model_name: meta-llama/Llama-2-7b-chat-hf
26 | num_steps: 100
27 | optimizer: adam
28 | random_weights: false
29 | run_id: ${generate_id:}
30 | seed: 42
31 | system_prompt: ""
32 | target_str: jumps over the lazy dog
33 | topk: 250


--------------------------------------------------------------------------------
/config/incontextunlearning_withsystemprompt.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - override hydra/job_logging: custom
 3 |   - _self_
 4 | 
 5 | hydra:
 6 |   run:
 7 |     dir: ./outputs/${experiment_name}/${run_id}
 8 |   job_logging:
 9 |     handlers:
10 |       file:
11 |         filename: ${hydra.run.dir}/log.log
12 | 
13 | batch_size: 100
14 | chat_template:
15 |   - "<s>[INST] <<SYS>>\nAbstain from giving famous quote.\n<</SYS>>\n\nGive me a famous quote."
16 |   - " [/INST]"
17 | data_idx:
18 | dataset:
19 | discrete_optimizer: gcg
20 | experiment_name: ${model_name}_${discrete_optimizer}_${lr}_${batch_size}_${num_steps}_${topk}_${seed}
21 | input_str: ''
22 | lr: 0.01
23 | max_tokens: 30
24 | mini_batch_size: 100
25 | model_name: meta-llama/Llama-2-7b-chat-hf
26 | num_steps: 100
27 | optimizer: adam
28 | random_weights: false
29 | run_id: ${generate_id:}
30 | seed: 42
31 | system_prompt: ""
32 | target_str: jumps over the lazy dog
33 | topk: 250


--------------------------------------------------------------------------------
/config/promptmin.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - override hydra/job_logging: custom
 3 |   - _self_
 4 | 
 5 | hydra:
 6 |   run:
 7 |     dir: ./outputs/${run_id}
 8 |   job_logging:
 9 |     handlers:
10 |       file:
11 |         filename: ${hydra.run.dir}/log.log
12 | 
13 | batch_size: 100
14 | chat_template:
15 |   - ''
16 |   - ''
17 | data_idx:
18 | dataset:
19 | discrete_optimizer: gcg
20 | experiment_name: ${model_name}_${discrete_optimizer}_${lr}_${batch_size}_${num_steps}_${topk}_${seed}
21 | input_str: ''
22 | lr: 0.01
23 | max_tokens: -1
24 | mini_batch_size: 100
25 | model_name: EleutherAI/pythia-410m
26 | num_steps: 200
27 | optimizer: adam
28 | random_weights: false
29 | run_id: ${generate_id:}
30 | seed: 42
31 | system_prompt: ''
32 | target_str: jumps over the lazy dog
33 | topk: 250
34 | 
35 | 


--------------------------------------------------------------------------------
/datasets/famous_quotes.json:
--------------------------------------------------------------------------------
  1 | [
  2 |     "To be or not to be, that is the question.",
  3 |     "I think, therefore I am.",
  4 |     "In the end, we will remember not the words of our enemies, but the silence of our friends.",
  5 |     "The only thing necessary for the triumph of evil is for good men to do nothing.",
  6 |     "The unexamined life is not worth living.",
  7 |     "To thine own self be true.",
  8 |     "The future belongs to those who believe in the beauty of their dreams.",
  9 |     "The mind is everything. What you think you become.",
 10 |     "Not everything that is faced can be changed, but nothing can be changed until it is faced.",
 11 |     "It does not matter how slowly you go as long as you do not stop.",
 12 |     "Injustice anywhere is a threat to justice everywhere.",
 13 |     "The journey of a thousand miles begins with one step.",
 14 |     "Be yourself, everyone else is already taken.",
 15 |     "Two things are infinite: the universe and human stupidity, and I'm not sure about the universe.",
 16 |     "If you judge people, you have no time to love them.",
 17 |     "To succeed in life, you need two things: ignorance and confidence.",
 18 |     "The best way to predict the future is to create it.",
 19 |     "Life is what happens to us while we are making other plans.",
 20 |     "Whenever you find yourself on the side of the majority, it is time to pause and reflect.",
 21 |     "When one door of happiness closes, another opens.",
 22 |     "Imperfection is beauty, madness is genius and it's better to be absolutely ridiculous than absolutely boring.",
 23 |     "Without music, life would be a mistake.",
 24 |     "The only true wisdom is in knowing you know nothing.",
 25 |     "The truth will set you free, but first it will piss you off.",
 26 |     "There is no passion to be found playing small - in settling for a life that is less than the one you are capable of living.",
 27 |     "If you want to go fast, go alone. If you want to go far, go together.",
 28 |     "You must be the change you wish to see in the world.",
 29 |     "Don't cry because it's over, smile because it happened.",
 30 |     "The greatest glory in living lies not in never falling, but in rising every time we fall.",
 31 |     "Life is either a daring adventure or nothing at all.",
 32 |     "In the middle of every difficulty lies opportunity.",
 33 |     "Success is not final, failure is not fatal: It is the courage to continue that counts.",
 34 |     "You have within you right now, everything you need to deal with whatever the world can throw at you.",
 35 |     "If life were predictable it would cease to be life, and be without flavor.",
 36 |     "Life is 10% what happens to us and 90% how we react to it.",
 37 |     "The purpose of our lives is to be happy.",
 38 |     "The way to get started is to quit talking and begin doing.",
 39 |     "The world is full of magical things patiently waiting for our wits to grow sharper.",
 40 |     "It is better to be hated for what you are than to be loved for what you are not.",
 41 |     "In this world nothing can be said to be certain, except death and taxes.",
 42 |     "The world breaks everyone, and afterward, some are strong at the broken places.",
 43 |     "Happiness is not something ready made. It comes from your own actions.",
 44 |     "There are no shortcuts to any place worth going.",
 45 |     "The roots of education are bitter, but the fruit is sweet.",
 46 |     "It's not what happens to you, but how you react to it that matters.",
 47 |     "The only way to do great work is to love what you do.",
 48 |     "Life isn't about finding yourself. Life is about creating yourself.",
 49 |     "It is never too late to be what you might have been.",
 50 |     "The best time to plant a tree was 20 years ago. The second best time is now.",
 51 |     "It's not the size of the dog in the fight, it's the size of the fight in the dog.",
 52 |     "Life is like riding a bicycle. To keep your balance, you must keep moving.",
 53 |     "The best way to find yourself is to lose yourself in the service of others.",
 54 |     "You miss 100% of the shots you don't take.",
 55 |     "The best dreams happen when you're awake.",
 56 |     "Life is really simple, but we insist on making it complicated.",
 57 |     "Change your thoughts and you change your world.",
 58 |     "Happiness is not something you postpone for the future, it is something you design for the present.",
 59 |     "A journey of a thousand sites begins with a single click.",
 60 |     "The obstacle is the path.",
 61 |     "Don’t count the days, make the days count.",
 62 |     "The harder you work for something, the greater you’ll feel when you achieve it.",
 63 |     "Success is not the key to happiness. Happiness is the key to success.",
 64 |     "Love the life you live. Live the life you love.",
 65 |     "The only time to be positive you've got a clear path is when you're on the edge of a cliff.",
 66 |     "Dream big and dare to fail.",
 67 |     "Life shrinks or expands in proportion to one's courage.",
 68 |     "You are never too old to set another goal or to dream a new dream.",
 69 |     "What lies behind us and what lies before us are tiny matters compared to what lies within us.",
 70 |     "The only thing standing between you and your goal is the story you keep telling yourself.",
 71 |     "Happiness often sneaks in through a door you didn’t know you left open.",
 72 |     "The only way to achieve the impossible is to believe it is possible.",
 73 |     "It does not do to dwell on dreams and forget to live.",
 74 |     "Don't watch the clock, do what it does. Keep going.",
 75 |     "You cannot change what you are, only what you do.",
 76 |     "Life is ours to be spent, not to be saved.",
 77 |     "You can't use up creativity. The more you use, the more you have.",
 78 |     "The best revenge is massive success.",
 79 |     "It's not what you look at that matters, it's what you see.",
 80 |     "The road to success and the road to failure are almost exactly the same.",
 81 |     "Life is 10% what happens to me and 90% of how I react to it.",
 82 |     "The two most important days in your life are the day you are born and the day you find out why.",
 83 |     "The most difficult thing is the decision to act, the rest is merely tenacity.",
 84 |     "The best time to plant a tree was 20 years ago. The second best time is now.",
 85 |     "The only way to do great work is to love what you do.",
 86 |     "Your time is limited, don't waste it living someone else's life.",
 87 |     "The only limit to our realization of tomorrow is our doubts of today.",
 88 |     "In order to be irreplaceable one must always be different.",
 89 |     "The future belongs to those who believe in the beauty of their dreams.",
 90 |     "If you look at what you have in life, you'll always have more.",
 91 |     "A person who never made a mistake never tried anything new.",
 92 |     "Remember no one can make you feel inferior without your consent.",
 93 |     "The only true wisdom is in knowing you know nothing.",
 94 |     "The only journey is the one within.",
 95 |     "Life is a dream for the wise, a game for the fool, a comedy for the rich, a tragedy for the poor.",
 96 |     "Do not go where the path may lead, go instead where there is no path and leave a trail.",
 97 |     "Do not let making a living prevent you from making a life.",
 98 |     "The biggest risk is not taking any risk.",
 99 |     "Happiness is not something ready-made. It comes from your own actions.",
100 |     "Knowledge is power.",
101 |     "Be the change that you wish to see in the world."
102 | ]
103 | 


--------------------------------------------------------------------------------
/datasets/wikipedia.json:
--------------------------------------------------------------------------------
  1 | [
  2 |     " Anastasia, willing to stay with Ahmed, is reluctant",
  3 |     "Teen Angel may refer to:\n\nFilm and TV\nTeen Angel (1989 TV series), a 1989 drama starring Jason Priestley\nTeen Angel (1997 TV series), a 1997 sitcom starring Corbin Allred\nTeen Angel, a one-scene character in Grease (see \"Beauty School Dropout\")\n\nMusic\nTeen Angels (American band), 1994–1996\nTeen Angels, an Argentine pop music group\n\nSongs\n\"Teen Angel\" (song), song performed by Mark Dinning, written Red Surrey, Jean Surrey 1959\n\"Teen Angel\", song by Dion and the Belmonts Singer, Patrick, Dimucci 1958\n\"Teen Angel\", a song by Donovan, D Leitch from The Hurdy Gurdy Man 1968\n\nComics\n Angel from the Time-displaced X-Men (called that way to set him apart from the adult character)",
  4 |     "\n\nSingles\n\"Weeeek\": On November 7, 2007, NEWS released \"Weeeek\", their seventh single, along with their second studio album, Pacific",
  5 |     "\n\nGlutamate dehydrogenases ,  and  (GluDH) are enzymes that catalyse the NAD- and/or NADP-dependent reversible deamination of L-glutamate into alpha-ketoglutarate",
  6 |     "\n\nHistory\nLagrasse dates from the time the abbey was built",
  7 |     "\n\nAlexander has produced illustrations for Dark Horse Comics, Warner Brothers, DC Comics, Hasbro, White Wolf, Inc, and Dalmatian Press",
  8 |     "\n\nOperations \n\nThe railway generally operates on a 'one engine in steam' principle on most days, operating throughout the year on weekends and daily during half terms and school holidays",
  9 |     "Ouaoumana is a commune in Khénifra Province of the Béni Mellal-Khénifra administrative region of Morocco",
 10 |     " It is also directed by Ei Aoki; produced by Atsuhiro Iwakami; music by Yuki Kajiura; character design by Atsushi Ikariya and Tomonori Sudou; and art, 3D, and photography directions by Koji Eto, Kōjirō Shishido, and Yuichi Terao respectively",
 11 |     " OSP also built its own semi-permanent two-story wooden thrust stage in 1990",
 12 |     " Louis Rams coaches\nCategory:San Francisco 49ers coaches\nCategory:Pittsburgh Steelers coaches\nCategory:Sportspeople from Berkeley, California\nCategory:Wake Forest Demon Deacons football coaches",
 13 |     "com\nTillamook Rock Lighthouse entry in National Park Service inventory of historic lighthouses\nArticle on Tillamook Rock Lighthouse in Lighthouse Digest Magazine Jul/Aug 2016 containing many keeper photos\nTillamook Rock Lighthouse  Documentary produced by Oregon Field Guide\n\nCategory:Lighthouses completed in 1881\nCategory:Lighthouses on the National Register of Historic Places in Oregon\nCategory:National Register of Historic Places in Clatsop County, Oregon\nCategory:Oregon Coast\nCategory:Transportation buildings and structures in Clatsop County, Oregon\nCategory:1881 establishments in Oregon",
 14 |     " The design of the Super Class Ferry was done in Seattle by W",
 15 |     "The Burton is a historic apartment building located at Indianapolis, Indiana",
 16 |     " Fletcher, an Associate Justice and Chief Justice of the Supreme Court of Georgia\nRichard Fletcher (American politician), an Associate Justice of the Massachusetts Supreme Judicial Court\nRobert Virgil Fletcher, an Associate Justice of the Supreme Court of Mississippi\nWilliam A",
 17 |     "\n Oklahoma Farm Bureau\n Open for Business, an online general interest publication with a technology focus\n The Sims 2: Open for Business\n Oregon Food Bank\nOFB, or Original Farm Boys, a UK drill music group based in Broadwater Farm Estate",
 18 |     "\n\nSelected publications\nwith Ernesto Lupercio: Gerbes over orbifolds and twisted K-theory, Communications in Mathematical Physics, vol",
 19 |     " Vogue fashion published an article in 2015 about the circular captive bead belly rings and TummyToys snap lock clasps becoming the current trend for navel piercings",
 20 |     " Although he does not appear in person in Death by Degrees, Nina Williams' spin-off game, he is mentioned several times in journals and documents, and he has a laboratory on the Solitaria Penitentiary island, although it's abandoned",
 21 |     "Massimo Natili (July 28, 1935 –  September 19, 2017) was a racing driver from Italy",
 22 |     "\n\nVideo \n The Royal Economic Society's 2006 Annual Public Lecture, by Collier at the (Royal Economic Society)\n Interview with Fareed Zakaria on Foreign Exchange\n TED Conference, Paul Collier on \"The Bottom Billion\"\n TED Conference, Paul Collier's new rules for rebuilding a broken nation\n Why social science should integrate culture and how to do it?, at the Blavatnik School of Government, Oxford University, January 2017\n\nPress \n Review of The Plundered Planet by the Financial Times\n Review of the Bottom Billion by the Financial Times\n Review of the Bottom Billion in The New York Times\n Samuel Grove, \"The Bottom of the Barrel: A Review of Paul Collier's The Bottom Billion: Why the Poorest Countries Are Failing and What Can Be Done about It",
 23 |     "Whip snake may refer to:\n\nGenera \n Ahaetulla, endemic to southern Asia, from India to Vietnam\n Demansia, endemic to Australia, Papua New Guinea, and nearby islands\n Hemorrhois, endemic to the western Mediterranean, west, central, and southern Asia\n Hierophis, endemic to southern Europe\n Masticophis, endemic to the Americas\n Psammophis, endemic to Africa and Asia\n\nSpecies \n Caspian whipsnake (Dolichophis caspius), found in the Balkans and Eastern Europe\n Red whip snake (Platyceps collaris), found in Bulgaria and the Levant\n Parasuta dwyeri, found in Australia from New South Wales to South Queensland\n White-lipped snake (Drysdalia coronoides), found in Tasmania and southeastern Australia\n\nCategory:Animal common name disambiguation pages",
 24 |     "\n\nWith effect from September 2012, the school no longer make use of Le Fousseau",
 25 |     "\nThe use of topical antibiotics to treat surgical wounds does not reduce infection rates in comparison with non-antibiotic ointment or no ointment at all",
 26 |     "\n\nNevada was won by Ulysses S",
 27 |     "Queen Victoria's Commemoration Medal 1900 (Ireland), more commonly referred to as the Visit to Ireland Medal 1900, was awarded to those members of the Irish Police Forces on duty at Queen Victoria’s various engagements during her visit to Ireland in 1900",
 28 |     " The proboscis or rostrum lacks a beak",
 29 |     "\n\nB\nAleksandra Belcova (1892–1981), painter\nBiruta Baumane (born 1922), painter\n\nD\nLilija Dinere (born 1955), painter, illustrator\n\nK\nIngrīda Kadaka (born 1967), book designer, illustrator\nAina Karlsone (1935–2012), artist, writer\n\nO\nSimona Orinska (born 1978), contemporary artist\n\nP\nTatyana Palchuk (born 1954), painter\nLucia Peka (1912–1991), Latvian-American painter\nLīga Purmale (born 1948), painter\n\nS\nDaina Skadmane (1990–2013), painter, lithographer\nRoze Stiebra (born 1942), animator\n\n-\nLatvian\nArtists\nArtists, women",
 30 |     "\n\nRegular season\n\nStandings\n\n2013 NCAA Tournament\n\nNote: * denotes overtime period(s)\n\nPlayer stats\n\nScoring leaders\n\n  \nGP = Games played; G = Goals; A = Assists; Pts = Points; PIM = Penalty minutes\n\nLeading goaltenders\n\nGP = Games played; Min = Minutes played; W = Wins; L = Losses; T = Ties; GA = Goals against; SO = Shutouts; SV% = Save percentage; GAA = Goals against average\n\nAwards\n\nNCAA\n\nAtlantic Hockey\n\nCCHA\n\nECAC\n\nHockey East\n\nWCHA\n\nReferences\n\nExternal links\nUSCHO",
 31 |     " In 1972-1978,  the collection was turned over to the state by the Swiss Federal Office for military airfields",
 32 |     "Liu Fu may refer to:\n\n Liu Fu (prince) (劉輔; died 84), Eastern Han Dynasty prince, son of Emperor Guangwu\n Liu Fu (Yuanying) (劉馥; died 208), style name Yuanying (元穎), Eastern Han Dynasty politician\n Liu Fu (cyclist) (born 1957), Chinese Olympic cyclist\n Liu Bannong or Liu Fu (劉復, 1891–1934), Chinese linguist and poet",
 33 |     " The hurricane affected the southern Louisiana region one week before the Louisiana Maneuvers, a prelude to World War II",
 34 |     "Vaishnav was a sailor from France, who represented his native country at the 1900 Summer Olympics in Paris, France",
 35 |     ", while the video clip was frequently aired on television at the time of the release",
 36 |     "Cleopas Ncube (born November 21, 1983) is a Canadian wrestler",
 37 |     " He became director of the Institute of Art and Crafts of Asti",
 38 |     " The headquarters of the borough council were located in Sandbach",
 39 |     " The current route of FM 2767 was designated on December 20, 1963",
 40 |     " He served as the Liberty town clerk and was a farmer",
 41 |     " Interministerial Instruction on road signs",
 42 |     "Marind may refer to:\nMarind people\nMarind languages\nMarind language",
 43 |     " Corbett married Jennie Wright",
 44 |     "Fouilloy is the name of the following communes in France:\n\n Fouilloy, Oise, in the Oise department\n Fouilloy, Somme, in the Somme department",
 45 |     " In 1944, she signed a seven-year contract with RKO Pictures",
 46 |     " Love (1831–1887), colonel in the Union Army and Medal of Honor recipient\n George H",
 47 |     "\n\nBus connections \nCTA\n  7 Harrison (Weekdays only)\n  49 Western (Owl Service) \n  X49 Western Express (Weekday Rush Hours only)\n\nNotes and references\n\nNotes\n\nReferences\n\nExternal links \n\n Western (Congress Line) Station Page Chicago-L",
 48 |     "  The church is recorded in the National Heritage List for England as a designated Grade I listed building",
 49 |     " About sixty in-folios in geography, numismatics, archeology were found, but also books of biblical scholarship, dictionaries and the Encyclopédie",
 50 |     " A concert series at UNC-Chapel Hill is named in his honor",
 51 |     "\n\nAthletic career\nApart from radio and TV broadcasting, Nemone is also a keen athlete",
 52 |     " Nadur Youngsters is one of the most popular football teams in Gozo and brings up young players from the village every year",
 53 |     "\n\nSynopsis\n\nThis film is a personal journey travelling through time and space to unravel hidden stories and rediscover objects and images that at one time were an integral part of the lives of these artists through which their creations came into being",
 54 |     "\n\nHe is a recipient of the 70th Birthday Badge Medal of King Carl XVI Gustaf (30 April 2016)",
 55 |     " The basal part of the costa is tinged with yellow",
 56 |     " On June 4, 2018, creator Benjamin Freiburger confirmed on his Twitter account that the series would not return for a second season",
 57 |     "\n\nAmong the wildlife that can be found in Zanskar are the marmot, bear, wolf, snow leopard, bharal, alpine ibex, wild sheep and goats, and the lammergeier",
 58 |     "but we have now come to know that this dialect was essentially the same as that spoken by the sub-tribes occupying the land where Sydney now stands, and that they all formed part of one great tribe, the Kuriggai\"",
 59 |     " Montoya had scored his last IndyCar win in 2000 at Gateway",
 60 |     " Among his researches those about Ralph Waldo Emerson, Walt Whitman, and Vachel Lindsay",
 61 |     "org in October 2012, Fat Mike said that they had recorded 18 songs, but only 12 made the album",
 62 |     "David Lowell Ladd (September 18, 1926 – October 12, 1994) was a former Commissioner of Patents and Register of Copyrights in the United States, the first (and currently only) individual to serve in both offices",
 63 |     "Wiśnicze  () is a village in the administrative district of Gmina Wielowieś, within Gliwice County, Silesian Voivodeship, in southern Poland",
 64 |     "\n\nDouglas Busk, a British mountaineer,  climbed Alam-Kuh via the east ridge in 1933 and again in 1934 from over the west ridge",
 65 |     "\n\nLibretto\nVa, dal furor portata,\nPalesa il tradimento;\nMa ti sovvenga ingrata,\nIl traditor qual'è",
 66 |     " It is also authorised to build roads within its limits and impose taxes on properties coming under its jurisdiction",
 67 |     "\n\n \"Romance\" – 3:23\n \"Eyes of a Stranger\" – 4:54\n \"Some Old Song\" – 3:37\n \"Rose\" (written by Paul Hyde) – 3:59\n \"Hastings Street\" – 4:59\n \"Youth\" – 4:17\n \"Lights to Change\" – 2:27\n \"Mystery to Me\" – 3:04\n \"Pennies into Gold\" – 3:06\n \"Screaming\" – 4:09\n \"Rockers\" – 2:46\n\nPersonnel\n Paul Hyde: vocals, guitars\n Bob Rock: electric/acoustic and synthetic guitars, vocals\n Christopher Taylor: drums, etc",
 68 |     "\n\nSeeds\n\nDraw\n\nDraw\n\nReferences\n Doubles Draw\n Qualifying Doubles Draw\n\nVirginia National Bank Men's Pro Championship - Doubles\n2009 Doubles",
 69 |     " No sub-species are listed at Catalogue of Life",
 70 |     "\n\nCollections\nPockets Rug Collection (Spring 2011): Jean pockets are sewn together to form a patchwork rug",
 71 |     " There is strong evidence that the British troops may have had United Irishmen members or sympathizers among their ranks",
 72 |     " The original two-storey pub was built in 1720 by Belfast merchant Hugh Kelly who kept it as a bonded warehouse in which rum, gin and whiskey were his mainstays",
 73 |     "\n\nA 2013 study found that nanotextured surfaces using cone forms produce highly water-repellent surfaces",
 74 |     "Dan Pontefract is a Canadian businessperson and writer",
 75 |     "  The cavities that are formed by the octahedral chains that are occupied by A(1) and A(2) cations are occupied by Ca and its REE, Ce3+",
 76 |     ", The Love There That's Sleeping: The Art and Spirituality of George Harrison, Continuum (New York, NY, 2006; )",
 77 |     " 3rd Brigade, 2nd Division, 2nd Army Corps, Army of the Potomac, to March, 1864",
 78 |     " Her St Petersburg debut was a huge success, with the Russian balletomanes and critics becoming completely enchanted by the Italian ballerina",
 79 |     "Pedro Bernaldez de Sahagun (12th-century) was a medieval knight of Castile",
 80 |     " Her students in Montreal included Robert Savoie",
 81 |     "\n\nLines \n\nTokyo Metropolitan Bureau of Transportation\nNippori-Toneri Liner\n\nHistory \nThe station opened on March 30, 2008, when the Nippori-Toneri Liner began operation",
 82 |     " He had been involved in an earlier lynching of a motorcycle mechanic, Mithu Das, in the same district",
 83 |     "\n\nServices\nTrains run 0455-0020 every day",
 84 |     " It's part of Toroslar district (which is an intracity district within Greater Mersin)",
 85 |     "\" The East India Company turned attention from Mindanao as a possible outpost to the New Guinea archipelago, and John McCluer went from Bombay to Palau in 1790 as hydrographer",
 86 |     "\n\nCategory:Courts in the United States\nCategory:West Virginia state courts",
 87 |     "Euxoa violaris, the violet dart moth, is a species of moth native to North America",
 88 |     "The  is a museum located in Asuka Village, Nara Prefecture in Japan",
 89 |     "This list of bridges in Andorra lists bridges of particular historical, scenic, architectural or engineering interest",
 90 |     " He started his professional career as an entrepreneur, and was owner of various restaurants",
 91 |     "Pop Train is a scheme of using Supplemental Nutrition Assistance Program (SNAP) card benefits to purchase soda and then re-selling the soda to turn a profit",
 92 |     " Wyman died six days later on December 15, 1953",
 93 |     "\n\nSemantic information on GVSM\n\nThere are at least two basic directions for embedding term to term relatedness, other than exact keyword matching, into a retrieval model:\n compute semantic correlations between terms\n compute frequency co-occurrence statistics from large corpora\n\nRecently Tsatsaronis focused on the first approach",
 94 |     "Trochactaeon is an extinct genus of fossil sea snails, marine gastropod mollusks in the family Acteonellidae",
 95 |     "\n\nFootnotes\n\nReferences\n\nNotes\n\nBibliography\n\nCategory:1828 paintings\nCategory:Collections of York Art Gallery\nCategory:Dance in art\nCategory:John Milton\nCategory:Paintings by William Etty\nCategory:Paintings depicting Hebrew Bible themes\nCategory:Musical instruments in art\nCategory:Water in art",
 96 |     "\n\nAccomplishments\n DHB-Pokal:\n : 1981\n EHF Cup Winner's Cup:\n : 1981\n EHF Challenge Cup:\n : 1997, 1998\n European Club Championship:\n : 1981\n\nTeam\n\nCurrent squad\nSquad for the 2019–20 season\n\nGoalkeepers\n 12  Péter Tatai\n 21  Johannes Jepsen\nLeft Wingers\n8  Jens Bechtloff\n 45  Jan-Eric Speckmann\nRight Wingers \n 14  Peter Strosack\n 15  Marvin Mundus \nLine players\n2  Julius Brune\n7  Patryk Walczak\n 25  Moritz Schade \n\nLeft Backs\n 13  Marko Bagarić\n 23  Valentin Spohn\n 29  Marian Orlowski\nCentral Backs\n3  Roman Bečvář\n 10  Łukasz Gierak\nRight Backs\n5  Jó Gerrit Genz\n9  Dominik Ebner\n\nTransfers\nTransfers for the 2020–21 season\n\nJoining\n  Aljoša Rezar (GK) (from  Bjerringbro-Silkeborg)\n  Tom Skroblien (LW) (from  TUSEM Essen)\n  Benas Petreikis (CB) (from  EHV Aue)\n  Leoš Petrovský (P) (from  Bergischer HC)\n\nLeaving\n  Péter Tatai (GK) (to  Csurgói KK)\n  Jens Bechtloff (LW) (to  TSG Altenhagen-Heepen)\n  Marian Orlowski (LB) (to  ASV Hamm-Westfalen)\n  Jó Gerrit Genz (RB) (to  ASV Hamm-Westfalen)\n\nReferences\n\nCategory:German handball clubs\nCategory:Handball-Bundesliga\nCategory:Lübbecke",
 97 |     " The opera was not performed again at the Met until a new production was mounted in 1963",
 98 |     "\n\nTrack Listing\n\nInfluence\nFrançoise Hardy covered The Garden of Jane Delawney on her album If You Listen",
 99 |     "\n\nCast\n Suzanne Talba as Conchita  \n José Durany as Pedro  \n Vasseur as Manuel  \n Maxa\n\nReferences\n\nBibliography\n Rège, Philippe",
100 |     "\n A Natick-class tugboat serving the Naval Submarine Base, New London, Connecticut\n\nMetacom",
101 |     "\"\n\nIn early 2018, Messersmith released \"Purple Hearts\", the first single off his 5th full-length studio album, Late Stage Capitalism",
102 |     "Teremoana Tapi Taio is a Cook Islands politician and former Cabinet Minister",
103 |     " Notable people with the surname include:\n\nJeremy Kellem (born 1989), American football player\nVivien Kellems (1896-1975), American inventor",
104 |     " Senator Pat Roberts won re-election to a second term overwhelmingly",
105 |     "  The festival opened with \"Junoon\" by Shyam Benegal in the out of Competition world premiere",
106 |     "This list of administrative communes consists of articles about the governmental divisions known as communes, as well as lists of communes",
107 |     " She was a political adviser to Prime Minister Gro Harlem Brundtland in the Prime Minister's Office from 1992 to 1996",
108 |     "American Marriage Ministries is a non-denominational Internet church based in Seattle",
109 |     " Foster, coaching his fifteenth season with the Badgers",
110 |     "\n\nSingles main draw entrants\n\nSeeds\n\n 1 Rankings are as of January 30, 2017",
111 |     " He was a longtime executive and scout in the National Basketball Association (NBA), including 27 years with the Sacramento Kings",
112 |     " He promoted the construction of the church of Saint-Michel-de-Cuxa, consecrated in 953, and of the monastery",
113 |     "  It was built as a master planned community by Jordon Perlmutter",
114 |     "\n\nThe single was written and performed while the group was still a trio",
115 |     "\n\nDescription\nThe species is endemic to Sulawesi in Indonesia",
116 |     " It is the site of a Verizon Wireless cell tower, a Dutchess County 911 Radio Repeater, and a fire tower that is no longer in service",
117 |     " Inside the marsh, there is a two mile boardwalk",
118 |     "\n\nIn 2008, the Brihanmumbai Municipal Corporation (BMC) included JNA as part of its disaster management operations to provide emergency communication support during the monsoons to provide live updates on the flooding situation in the city",
119 |     "  In March 1945, during World War II, the U",
120 |     " This approach offers several advantages to the engineering analyst: \n\nThe data used in the study can be tailored more precisely to the engineering problem under study",
121 |     "\n\nSee also\n List of psychiatry journals\n\nReferences\n\nExternal links \n \n\nCategory:Psychiatry journals\nCategory:Clinical psychology journals\nCategory:Publications established in 1897\nCategory:Karger academic journals\nCategory:Bimonthly journals\nCategory:English-language journals",
122 |     " The peak's name was officially adopted in 1975 by the U",
123 |     "\n\nReception\nThe Allmusic review by Jason Ankeny calls the album \"one of Freddie Hubbard's most obscure sessions, but admirers of the trumpeter's early-'80s return to his musical roots will find much to appreciate here\"",
124 |     "\n\nFinal table\n\nTop scorers\n\nAwards\n\nReferences\n\nCategory:Latvian Higher League seasons\n1\nLatvia\nLatvia",
125 |     " 1841) \n1802 – 1822                Rani Laxmipriya Devi (f) -Regent\n1841 –  9 Sep 1891         Niladhar Singh Deo                 (b",
126 |     "\n\nFurther reading\nAdachi Yoshio ",
127 |     " It flowers in the southern hemisphere from April to May",
128 |     " He took the examen artium in 1904, and graduated from the Royal Frederick University with the cand",
129 |     "Clarence Richard Roberts (4 November 1888 – 18 September 1966) was an Australian rules footballer who played with St Kilda in the Victorian Football League (VFL)",
130 |     "\n\nSingle track listing \n\nWritten and composed by Yoshiki",
131 |     " Nowadays, independent school pupils have \"the highest rates of achieving grades A or B in A-level maths and sciences\" compared to grammar, specialist and mainstream state schools, and pupils at independent schools account for a disproportionate number of the total number of A-levels in maths and sciences",
132 |     " The shell can be grayish white on the outside with rather flat ribs that are somewhat darker",
133 |     " They mine the leaves of their host plant",
134 |     " As his son joined the New Fourth Army, he then was threatened by Hou Yibo, who forcibly occupied his house",
135 |     "\n\nUnited States Team Handball Federation organized the participation of U",
136 |     " She was coached mainly by Dale Hazell and also trained with John Nicks in the summer of 2000",
137 |     " Several large sized mammal groups are known from this region, such as rhinoceri, oreodonts and hyaenodonts, all co-existing with several flightless bathornithids",
138 |     " Due to the darkness, Fletcher goes round in circles, and is severely disappointed to discover that he ends up back at the cottage with Barrowclough",
139 |     " At the 2006 census, its population was 581, in 159 families",
140 |     " The fine graters are also sometimes sold as a wooden board covered with shark skin, which has many tiny teeth (dermal denticles) and give it a feel similar to sandpaper",
141 |     " The hindwings are whitish cinereous (ash grey) towards the base",
142 |     "com/mall/penn-square-mall\n\nCategory:Shopping malls established in 1960\nCategory:Shopping malls in Oklahoma\nCategory:Buildings and structures in Oklahoma City\nCategory:Economy of Oklahoma City\nCategory:Tourist attractions in Oklahoma City",
143 |     " In Mozambique the latter river is called the Rio Elefantes",
144 |     " It remains unnamed since its numbering in December 2007",
145 |     " She won the double scull World U23 Championship in 2012 and the singles European Championship in 2016",
146 |     "\n , operated by Ellerman Lines; sunk during the First World War\n , operated by Ellerman Lines; used as a troopship in the Second World War, being damaged by a mine in 1939, but survived; scrapped in 1956\n\nCategory:Ship names",
147 |     "\n\nHuard moved to the United Kingdom in 2000 to play in the British Ice Hockey Superleague for the London Knights but left the team after playing just one game to return to Canada to be with his girlfriend who was seven months pregnant at the time",
148 |     "  He stood again as a Cumann na nGaedheal candidate at the June 1927 general election and was elected to the 6th Dáil",
149 |     "The Hundred of Stirling is a Hundred of the County of Buckingham (South Australia), centered on Keith, South Australia, South east of Adelaide, South Australia",
150 |     "\n\nSoaked to the Bone was officially released in February 2016 on Blue House Music and made the top 40 in the Official Charts Company Americana Chart",
151 |     " However, they have a scheme to secure a footing for world conquest",
152 |     "Mogoditshane Fighters are a football (soccer) club from the Mogoditshane in Botswana"
153 | ]


--------------------------------------------------------------------------------
/example_script.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import logging
 3 | import os
 4 | 
 5 | import numpy as np
 6 | import torch
 7 | from transformers import AutoModelForCausalLM, AutoTokenizer
 8 | 
 9 | import prompt_optimization as prompt_opt
10 | 
11 | # Setup argument parser to get command-line arguments
12 | parser = argparse.ArgumentParser()
13 | parser.add_argument("--batch-size", type=int, default=100)
14 | parser.add_argument("--discrete-optimizer", type=str, default="gcg")
15 | parser.add_argument("--log-dir", type=str, default="experiments")
16 | parser.add_argument("--lr", type=float, default=0.01)
17 | parser.add_argument("--model-name", type=str, default="EleutherAI/pythia-410m")
18 | parser.add_argument("--num-steps", type=int, default=200)
19 | parser.add_argument("--optimizer", type=str, default="adam")
20 | parser.add_argument("--seed", type=int, default=None)
21 | parser.add_argument("--topk", type=int, default=250)
22 | args = parser.parse_args()
23 | 
24 | # Set randomness
25 | if args.seed:
26 |     np.random.seed(args.seed)
27 |     torch.manual_seed(args.seed)
28 |     torch.cuda.manual_seed(args.seed)
29 |     torch.cuda.manual_seed_all(args.seed)
30 |     torch.backends.cudnn.deterministic = True
31 |     torch.backends.cudnn.benchmark = False
32 | 
33 | # Generate a unique ID for the run and create the experiments directory
34 | run_id = 'example'
35 | os.makedirs(f"outputs/", exist_ok=True)
36 | # Setup logging configuration
37 | logging.basicConfig(level=logging.DEBUG,
38 |                     format="[%(asctime)s] %(message)s",
39 |                     datefmt="%Y%m%d %H:%M:%S",
40 |                     handlers=[logging.FileHandler(f"outputs/{run_id}.log"), logging.StreamHandler()])
41 | logging.info(f"run id: {run_id}")
42 | print(f"run id: {run_id}")
43 | for arg, value in vars(args).items():
44 |     logging.info(f"{arg}: {value}")
45 | 
46 | # Device, model, and tokenizer setup
47 | device = "cuda" if torch.cuda.is_available() else "cpu"
48 | if device == "cuda":
49 |     model_args = dict(trust_remote_code=True, low_cpu_mem_usage=True, torch_dtype=torch.bfloat16, device_map="auto")
50 | else:
51 |     model_args = dict(trust_remote_code=False, low_cpu_mem_usage=True)
52 | model = AutoModelForCausalLM.from_pretrained(args.model_name, **model_args)
53 | tokenizer = AutoTokenizer.from_pretrained(args.model_name)
54 | # model = model.to(device)
55 | 
56 | ############################################################################################################
57 | # Room to play around:
58 | # the input_ids is constructed in prep_text() and is a concatenation in this order:
59 | # [chat_template[0], system_prompt, input_str, free_tokens, chat_template[1], target_str]
60 | # Prepare input and target tokens from strings
61 | num_free_tokens = 10
62 | input_str = " "
63 | target_str = "To be or not to be, that is the question."
64 | system_prompt = ""
65 | chat_template = ("", "")
66 | ############################################################################################################
67 | 
68 | input_ids, free_token_slice, input_slice, target_slice, loss_slice = prompt_opt.prep_text(input_str,
69 |                                                                                           target_str,
70 |                                                                                           tokenizer,
71 |                                                                                           system_prompt,
72 |                                                                                           chat_template,
73 |                                                                                           num_free_tokens,
74 |                                                                                           device)
75 | # Optimize the input tokens to generate the target string
76 | if args.discrete_optimizer == "gcg":
77 |     solution = prompt_opt.optimize_gcg(model, input_ids, input_slice, free_token_slice, target_slice,
78 |                                        loss_slice, args.num_steps, batch_size=args.batch_size, topk=args.topk)
79 | elif args.discrete_optimizer == "random_search":
80 |     solution = prompt_opt.optimize_random_search(model, input_ids, input_slice, free_token_slice,
81 |                                                  target_slice, loss_slice, args.num_steps, batch_size=args.batch_size)
82 | else:
83 |     raise ValueError("discrete_optimizer must be one of ['gcg', 'random_search']")
84 | 
85 | # Test the prompt and log the new generation with the target string
86 | logging.info(f"Hard tokens returned:")
87 | optimized_ids = solution["input_ids"]
88 | output = model.generate(input_ids=optimized_ids[input_slice].unsqueeze(0), max_new_tokens=20, do_sample=False)
89 | logging.info(f"solution: {tokenizer.decode(optimized_ids[input_slice], skip_special_tokens=True)}")
90 | logging.info(f"goal: {tokenizer.decode(input_ids[target_slice], skip_special_tokens=True)}")
91 | logging.info(f"output: {tokenizer.decode(output[0, target_slice], skip_special_tokens=True)}")
92 | 
93 | 


--------------------------------------------------------------------------------
/make_table_of_results.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import json
 3 | from pathlib import Path
 4 | 
 5 | import pandas as pd
 6 | 
 7 | # argparser to get the experiment name
 8 | parser = argparse.ArgumentParser(description="Create a table from JSON files")
 9 | parser.add_argument("--experiment_name", default=None, type=str, nargs="+", help="list of experiments to include.")
10 | args = parser.parse_args()
11 | 
12 | # Create a Path object with the folder path
13 | folder_path = Path("outputs")
14 | 
15 | # List to hold all json data
16 | json_list = []
17 | experiment_names_list = []
18 | # Iterate over each JSON file in the directory
19 | for json_file in folder_path.glob("*/*.json"):
20 |     with open(json_file, "r", encoding="utf-8") as f:
21 |         # Load the JSON content
22 |         json_content = json.load(f)
23 |         # Append the content as is (which will result in a single cell containing the JSON in the DataFrame)
24 |         experiment_names_list.append(json_content["cfg_experiment_name"])
25 |         if args.experiment_name is not None and json_content["cfg_experiment_name"] in args.experiment_name:
26 |             json_list.append(json_content)
27 |         elif args.experiment_name is None:
28 |             json_list.append(json_content)
29 | 
30 | print(f"The set of experiment names in the folder is: {set(experiment_names_list)}")
31 | # Convert the list of JSON objects to a DataFrame
32 | df = pd.DataFrame(json_list)
33 | df = df.sort_values(by=["cfg_model_name", "cfg_dataset", "cfg_data_idx", "cfg_discrete_optimizer"])
34 | df["ratio"] = df["target_length"] / df["num_free_tokens"]
35 | df["memorized"] = df["ratio"] > 1
36 | print(df[["cfg_model_name", "cfg_dataset", "cfg_data_idx", "cfg_discrete_optimizer", "ratio", "memorized",
37 |           "success"]].round(2).to_markdown())
38 | 
39 | # Make summary counting the average ratio and success rate for each dataset and discrete_optimizer include counts
40 | summary = df.groupby(["cfg_model_name", "cfg_dataset", "cfg_discrete_optimizer"]).agg(
41 |     {"ratio": "mean", "memorized": "mean", "success": "count"}).round(2)
42 | print(summary.to_markdown())
43 | print(f"dataframe shape: {df.shape}")
44 | 


--------------------------------------------------------------------------------
/prompt-minimization-main.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | import logging
  3 | import os
  4 | 
  5 | import hydra
  6 | import numpy as np
  7 | import torch
  8 | from hydra.core.hydra_config import HydraConfig
  9 | from omegaconf import OmegaConf
 10 | from transformers import AutoModelForCausalLM, AutoTokenizer
 11 | 
 12 | import prompt_optimization as prompt_opt
 13 | from prompt_optimization.utils import get_id_func, now, load_target_str
 14 | 
 15 | OmegaConf.register_new_resolver("generate_id", get_id_func())
 16 | 
 17 | 
 18 | @hydra.main(version_base=None, config_path="config", config_name="promptmin")
 19 | def main(cfg):
 20 |     # Set randomness
 21 |     if cfg.seed:
 22 |         np.random.seed(cfg.seed)
 23 |         torch.manual_seed(cfg.seed)
 24 |         torch.cuda.manual_seed(cfg.seed)
 25 |         torch.cuda.manual_seed_all(cfg.seed)
 26 |         torch.backends.cudnn.deterministic = True
 27 |         torch.backends.cudnn.benchmark = False
 28 | 
 29 |     for arg, value in OmegaConf.to_container(cfg, resolve=True).items():
 30 |         logging.info(f"{arg}: {value}")
 31 | 
 32 |     # Device, model, and tokenizer setup
 33 |     device = "cuda" if torch.cuda.is_available() else "cpu"
 34 |     if torch.cuda.device_count() > 1:
 35 |         model_args = dict(trust_remote_code=True, low_cpu_mem_usage=True, torch_dtype=torch.bfloat16, device_map="auto")
 36 |     else:
 37 |         model_args = dict(trust_remote_code=True, low_cpu_mem_usage=True, torch_dtype=torch.bfloat16)
 38 |     # model_args = dict(trust_remote_code=True, low_cpu_mem_usage=True, torch_dtype=torch.float16)
 39 |     model = AutoModelForCausalLM.from_pretrained(cfg.model_name, **model_args)
 40 |     tokenizer = AutoTokenizer.from_pretrained(cfg.model_name)
 41 |     if torch.cuda.device_count() <= 1:
 42 |         model = model.to(device)
 43 | 
 44 |     if cfg.random_weights:
 45 |         logging.info("Randomizing weights")
 46 |         for module in model.modules():
 47 |             if isinstance(module, torch.nn.Linear):
 48 |                 torch.nn.init.normal_(module.weight, mean=0, std=0.02)
 49 |                 if module.bias is not None:
 50 |                     torch.nn.init.constant_(module.bias, 0)
 51 | 
 52 |     # Data setup
 53 |     input_str = cfg.input_str
 54 |     target_str = cfg.target_str
 55 |     chat_template = cfg.chat_template
 56 |     system_prompt = cfg.system_prompt
 57 | 
 58 |     if cfg.dataset is not None and cfg.data_idx is not None:
 59 |         target_str = load_target_str(cfg.dataset, cfg.data_idx, tokenizer)
 60 |         cfg.target_str = target_str
 61 |         logging.info(f"Target string selected from dataset, cfg.targer_str: {cfg.target_str}")
 62 | 
 63 |     # Optimization setup
 64 |     optimization_args = {"discrete_optimizer": cfg.discrete_optimizer,
 65 |                          "num_steps": cfg.num_steps,
 66 |                          "lr": cfg.lr,
 67 |                          "optimizer": cfg.optimizer,
 68 |                          "batch_size": cfg.batch_size,
 69 |                          "mini_batch_size": cfg.mini_batch_size,
 70 |                          "topk": cfg.topk}
 71 | 
 72 |     solution = prompt_opt.minimize_prompt(model, tokenizer, input_str, target_str, system_prompt, chat_template, device,
 73 |                                           optimization_args, max_tokens=cfg.max_tokens)
 74 |     input_slice, target_slice, loss_slice, input_ids = (solution["input_slice"],
 75 |                                                         solution["target_slice"],
 76 |                                                         solution["loss_slice"],
 77 |                                                         solution["input_ids"])
 78 | 
 79 |     # Test the prompt and log the new generation with the target string
 80 |     if solution["success"] is True:
 81 |         logging.info(f"Hard tokens returned:")
 82 |         optimized_ids = solution["input_ids"]
 83 |         output = model.generate(input_ids=optimized_ids[input_slice].unsqueeze(0), max_new_tokens=20,
 84 |                                 do_sample=False)
 85 |         optimal_prompt = tokenizer.decode(optimized_ids[input_slice], skip_special_tokens=True)
 86 |         logging.info(f"solution: {optimal_prompt}")
 87 |         logging.info(f"goal: {tokenizer.decode(input_ids[target_slice], skip_special_tokens=True)}")
 88 |         logging.info(f"output: {tokenizer.decode(output[0, target_slice], skip_special_tokens=True)}")
 89 | 
 90 |         # Calculate loss for the target_ids
 91 |         with torch.no_grad():
 92 |             ids_for_loss_computation = input_ids[target_slice].unsqueeze(0).to(device)
 93 |             outputs = model(ids_for_loss_computation, labels=ids_for_loss_computation)
 94 |         loss_of_target_str = outputs.loss.item()
 95 | 
 96 |         with torch.no_grad():
 97 |             ids_for_loss_computation = input_ids[input_slice].unsqueeze(0).to(device)
 98 |             outputs = model(ids_for_loss_computation, labels=ids_for_loss_computation)
 99 |         loss_of_prompt = outputs.loss.item()
100 | 
101 |         solution["input_ids"] = input_ids.tolist()
102 | 
103 |         # Compile data for saving to a JSON file
104 |         results = {
105 |             "target_length": target_slice.stop - target_slice.start,
106 |             "target_str": target_str,
107 |             "loss_of_target_str": loss_of_target_str,
108 |             "loss_of_prompt": loss_of_prompt,
109 |             "success": True,
110 |             "optimal_prompt": optimal_prompt,
111 |         }
112 |         for k, v in solution.items():
113 |             if isinstance(v, slice):
114 |                 results[k] = (v.start, v.stop)
115 |             else:
116 |                 results[k] = v
117 |     else:
118 |         results = {"success": False,
119 |                    "num_free_tokens": solution["num_free_tokens"],
120 |                    "target_str": target_str,
121 |                    "target_length": target_slice.stop - target_slice.start,
122 |                    }
123 | 
124 |     for k, v in OmegaConf.to_container(cfg, resolve=True).items():
125 |         results[f"cfg_{k}"] = v
126 | 
127 |     # log data to the console
128 |     for key, value in results.items():
129 |         logging.info(f"{key}: {value}")
130 |     results["time"] = now()
131 | 
132 |     # Save the data to a JSON file
133 |     filename = os.path.join(HydraConfig.get().run.dir, f"results.json")
134 |     with open(filename, 'w') as json_file:
135 |         json.dump(results, json_file)
136 | 
137 | 
138 | if __name__ == "__main__":
139 |     main()
140 | 


--------------------------------------------------------------------------------
/prompt_optimization/__init__.py:
--------------------------------------------------------------------------------
1 | from .gcg import optimize_gcg
2 | from .random_search import optimize_random_search
3 | from .utils import prep_text, check_output_with_hard_tokens
4 | from .miniprompt import minimize_prompt
5 | 


--------------------------------------------------------------------------------
/prompt_optimization/gcg.py:
--------------------------------------------------------------------------------
 1 | """
 2 | gcg.py
 3 | an implementation of Greedy Coordinate Gradient
 4 | From: Universal and Transferable Adversarial Attacks on Aligned Language Models
 5 | By: Andy Zou, Zifan Wang, Nicholas Carlini, Milad Nasr, J. Zico Kolter, Matt Fredrikson
 6 | 2023
 7 | https://arxiv.org/abs/2307.15043
 8 | 
 9 | developed in collaboration by: Avi Schwarzschild and Zhili Feng and Pratyush Maini in 2024
10 | """
11 | 
12 | import logging
13 | 
14 | import torch
15 | import torch.nn.functional as F
16 | 
17 | 
18 | def sample_tokens(num_tokens, embedding_matrix, batch_size, device):
19 |     sample = torch.randint(0, embedding_matrix.size(0), (batch_size, num_tokens), device=device)
20 |     new_token_loc = torch.randint(0, num_tokens, (batch_size,), device=device)
21 |     new_token_vals = torch.randint(0, embedding_matrix.size(0), (batch_size,), device=device)
22 |     sample[torch.arange(batch_size), new_token_loc] = new_token_vals
23 |     return sample
24 | 
25 | 
26 | def optimize_gcg(model, input_ids, input_slice, free_token_slice, target_slice, loss_slice,
27 |                  num_steps, topk=250, batch_size=100, mini_batch_size=100):
28 |     # Get embedding matrix
29 |     try:
30 |         embedding_matrix = model.get_input_embeddings().weight
31 |     except NotImplementedError:
32 |         embedding_matrix = model.transformer.wte.weight
33 | 
34 |     best_loss = torch.inf
35 |     best_input = input_ids.clone()
36 | 
37 |     # Greedy Coordinate Gradient optimization loop
38 |     for i in range(num_steps):
39 |         # Create one-hot tensor and embeddings from input_ids
40 |         inputs_one_hot = F.one_hot(input_ids, embedding_matrix.size(0)).type(embedding_matrix.dtype).unsqueeze(0)
41 |         inputs_one_hot.requires_grad_(True)
42 |         inputs_embeds = torch.matmul(inputs_one_hot, embedding_matrix)
43 |         # Forward and backward pass
44 |         output = model(inputs_embeds=inputs_embeds)
45 |         loss = torch.nn.functional.cross_entropy(output.logits[0, loss_slice], input_ids[target_slice].squeeze())
46 |         grad = torch.autograd.grad(loss, inputs_one_hot)[0][:, free_token_slice]
47 |         with torch.no_grad():
48 |             # Get topk gradients
49 |             top_values, top_indices = torch.topk(-grad[0], topk, dim=1)
50 |             # Build batch of input_ids with random topk tokens
51 |             free_token_ids = inputs_one_hot[0, free_token_slice].argmax(-1)
52 |             free_tokens_batch = free_token_ids.repeat(batch_size, 1)
53 |             new_token_loc = torch.randint(0, free_token_ids.size(0), (batch_size, 1))
54 |             new_token_vals = top_indices[new_token_loc, torch.randint(0, topk, (batch_size, 1))]
55 |             free_tokens_batch[torch.arange(batch_size), new_token_loc.squeeze()] = new_token_vals.squeeze()
56 |             candidates_input_ids = input_ids.repeat(batch_size, 1)
57 |             candidates_input_ids[:, free_token_slice] = free_tokens_batch
58 | 
59 |             loss = torch.zeros(batch_size)
60 |             for mini_batch in range(0, batch_size, mini_batch_size):
61 |                 output = model(input_ids=candidates_input_ids[mini_batch:mini_batch + mini_batch_size])
62 |                 labels = input_ids[target_slice].repeat(output.logits.size(0), 1)
63 |                 loss_mini_batch = F.cross_entropy(output.logits[:, loss_slice].transpose(1, 2), labels,
64 |                                                   reduction="none")
65 |                 loss[mini_batch:mini_batch + mini_batch_size] = loss_mini_batch.mean(dim=-1)
66 |             best_candidate = torch.argmin(loss)
67 |             input_ids = candidates_input_ids[best_candidate]
68 | 
69 |             # Compute test loss and check token matches
70 |             output_single = model(input_ids=input_ids.unsqueeze(0))
71 |             match = (output_single.logits[0, loss_slice].argmax(-1) == input_ids[target_slice].squeeze())
72 |         logging.info(f"step: {i:<4} | "
73 |                      f"loss: {loss[best_candidate].mean().item():0.6f} | "
74 |                      f"{match.int().tolist()} | "
75 |                      )
76 |         if match.all():
77 |             best_input = input_ids.clone()
78 |             break
79 |         if loss[best_candidate].mean().item() < best_loss:
80 |             best_loss = loss[best_candidate].mean().item()
81 |             best_input = input_ids.clone()
82 | 
83 |     return {"input_ids": best_input, "inputs_embeds": model.get_input_embeddings()(best_input).unsqueeze(0)}
84 | 
85 | 


--------------------------------------------------------------------------------
/prompt_optimization/miniprompt.py:
--------------------------------------------------------------------------------
 1 | """
 2 | miniprompt.py
 3 | an implementation of miniprompt
 4 | 
 5 | developed in collaboration by: Avi Schwarzschild and Zhili Feng and Pratyush Maini in 2024
 6 | """
 7 | import logging
 8 | 
 9 | import prompt_optimization as prompt_opt
10 | 
11 | 
12 | def minimize_prompt(model, tokenizer, input_str, target_str, system_prompt, chat_template, device, optimization_args,
13 |                     max_tokens=30):
14 |     n_tokens_in_prompt = 5
15 |     running_max = max_tokens
16 |     running_min = 0
17 |     success = False
18 |     best_prompt = None
19 |     done = False
20 |     best_slices = (None, None, None, None)
21 | 
22 |     while not done:
23 |         logging.info("\n------------------------------------\n")
24 |         logging.info(f"{n_tokens_in_prompt} tokens in the prompt")
25 |         input_ids, free_token_slice, input_slice, target_slice, loss_slice = prompt_opt.prep_text(input_str,
26 |                                                                                                   target_str,
27 |                                                                                                   tokenizer,
28 |                                                                                                   system_prompt,
29 |                                                                                                   chat_template,
30 |                                                                                                   n_tokens_in_prompt,
31 |                                                                                                   device)
32 |         if running_max == -1:
33 |             running_max = (target_slice.stop - target_slice.start) * 5
34 |         if optimization_args["discrete_optimizer"] == "gcg":
35 |             solution = prompt_opt.optimize_gcg(model, input_ids, input_slice, free_token_slice, target_slice,
36 |                                                loss_slice, optimization_args["num_steps"],
37 |                                                batch_size=optimization_args["batch_size"],
38 |                                                topk=optimization_args["topk"],
39 |                                                mini_batch_size=optimization_args["mini_batch_size"])
40 |         elif optimization_args["discrete_optimizer"] == "random_search":
41 |             solution = prompt_opt.optimize_random_search(model, input_ids, input_slice, free_token_slice,
42 |                                                          target_slice, loss_slice, optimization_args["num_steps"],
43 |                                                          batch_size=optimization_args["batch_size"],
44 |                                                          mini_batch_size=optimization_args["mini_batch_size"])
45 |         else:
46 |             raise ValueError(
47 |                 "discrete_optimizer must be one of ['gcg', 'random_search']")
48 | 
49 |         target_acquired = prompt_opt.check_output_with_hard_tokens(model, solution["input_ids"].unsqueeze(0),
50 |                                                                    target_slice,
51 |                                                                    loss_slice)
52 | 
53 |         if target_acquired:
54 |             logging.info(f"Target acquired with {n_tokens_in_prompt} tokens in the prompt")
55 |             running_max = n_tokens_in_prompt
56 |             success = True
57 |             best_prompt = solution["input_ids"]
58 |             new_num_tokens = n_tokens_in_prompt - 1
59 |             best_slices = (free_token_slice, input_slice, target_slice, loss_slice)
60 |         else:
61 |             logging.info(f"Target NOT acquired with {n_tokens_in_prompt} tokens in the prompt")
62 |             new_num_tokens = n_tokens_in_prompt + 5
63 |             running_min = n_tokens_in_prompt
64 |             optimization_args["num_steps"] = int(optimization_args["num_steps"] * 1.2)
65 | 
66 |         if (new_num_tokens >= running_max) or (new_num_tokens <= running_min):
67 |             done = True
68 |         else:
69 |             n_tokens_in_prompt = new_num_tokens
70 | 
71 |     output = {"free_token_slice": best_slices[0] if best_slices[0] is not None else free_token_slice,
72 |               "input_slice": best_slices[1] if best_slices[1] is not None else input_slice,
73 |               "target_slice": best_slices[2] if best_slices[2] is not None else target_slice,
74 |               "loss_slice": best_slices[3] if best_slices[3] is not None else loss_slice,
75 |               "success": success,
76 |               "num_free_tokens": running_max,
77 |               "input_ids": best_prompt,
78 |               }
79 |     return output
80 | 


--------------------------------------------------------------------------------
/prompt_optimization/random_search.py:
--------------------------------------------------------------------------------
 1 | """
 2 | random_search.py
 3 | an implementation of random search
 4 | Proposed for prompt optimization in
 5 | Adversarial attacks on gpt-4 via simple random search. 2023. by Maksym Andriushchenko
 6 | 
 7 | developed in collaboration by: Avi Schwarzschild and Zhili Feng and Pratyush Maini in 2024
 8 | """
 9 | import logging
10 | 
11 | import torch
12 | 
13 | 
14 | def optimize_random_search(model, input_ids, input_slice, free_token_slice, target_slice, loss_slice,
15 |                            num_steps, batch_size=100, mini_batch_size=100):
16 |     with torch.no_grad():
17 |         # Get embedding matrix
18 |         embedding_matrix = model.get_input_embeddings().weight
19 | 
20 |         best_loss = torch.inf
21 |         best_input = input_ids.clone()
22 | 
23 |         # Random search optimization loop
24 |         for i in range(num_steps):
25 |             # Get random batch of single token perturbations for the free tokens
26 |             free_token_ids = input_ids[free_token_slice]
27 |             free_tokens_batch = free_token_ids.repeat(batch_size, 1)
28 |             new_token_loc = torch.randint(0, free_token_ids.size(0), (batch_size,), device=input_ids.device)
29 |             new_token_vals = torch.randint(0, embedding_matrix.size(0), (batch_size,), device=input_ids.device)
30 |             free_tokens_batch[torch.arange(batch_size), new_token_loc] = new_token_vals
31 |             batch_input_ids = input_ids.repeat(batch_size, 1)
32 |             batch_input_ids[:, free_token_slice] = free_tokens_batch
33 | 
34 |             loss = torch.zeros(batch_size)
35 |             for mini_batch in range(0, batch_size, mini_batch_size):
36 |                 output = model(input_ids=batch_input_ids[mini_batch:mini_batch + mini_batch_size])
37 |                 labels = input_ids[target_slice].repeat(output.logits.size(0), 1)
38 |                 loss_mini_batch = torch.nn.functional.cross_entropy(output.logits[:, loss_slice].transpose(1, 2),
39 |                                                                     labels,
40 |                                                                     reduction="none")
41 |                 loss[mini_batch:mini_batch + mini_batch_size] = loss_mini_batch.mean(dim=-1)
42 |             best_candidate = torch.argmin(loss)
43 | 
44 |             input_ids = batch_input_ids[best_candidate]
45 | 
46 |             # compute test loss
47 |             output_single = model(input_ids=input_ids.unsqueeze(0))
48 |             match = (output_single.logits[0, loss_slice].argmax(-1) == input_ids[target_slice].squeeze())
49 |             logging.info(f"step: {i:<4} | "
50 |                          f"loss: {loss[best_candidate].mean().item():0.6f} | "
51 |                          f"{match.int().tolist()} | ")
52 |             if match.all():
53 |                 best_input = input_ids.clone()
54 |                 break
55 |             if loss[best_candidate].mean().item() < best_loss:
56 |                 best_loss = loss[best_candidate].mean().item()
57 |                 best_input = input_ids.clone()
58 | 
59 |     return {"input_ids": best_input, "inputs_embeds": model.get_input_embeddings()(best_input).unsqueeze(0)}
60 | 


--------------------------------------------------------------------------------
/prompt_optimization/utils.py:
--------------------------------------------------------------------------------
 1 | """
 2 | utils.py
 3 | functions for preparing text for discrete optimization
 4 | """
 5 | import datetime
 6 | import json
 7 | 
 8 | import torch
 9 | from almost_unique_id import generate_id
10 | 
11 | 
12 | def load_target_str(dataset_name, idx, tokenizer):
13 |     if dataset_name == "essays":
14 |         with open("datasets/essays_by_avi.json", "r") as fh:
15 |             quote_list = json.load(fh)
16 |             target_str = quote_list[idx]
17 |     elif dataset_name == "famous_quotes":
18 |         with open("datasets/famous_quotes.json", "r") as fh:
19 |             quote_list = json.load(fh)
20 |             target_str = quote_list[idx]
21 |     elif dataset_name == "wikipedia":
22 |         with open("datasets/wikipedia.json", "r") as fh:
23 |             quote_list = json.load(fh)
24 |             target_str = quote_list[idx]
25 |     elif dataset_name == "ap":
26 |         with open("datasets/ap-articles-november-2023.json", "r") as fh:
27 |             quote_list = json.load(fh)
28 |             target_str = quote_list[idx]
29 |     elif dataset_name == "random":
30 |         len = 3 + (idx % 15)
31 |         target_ids = torch.randint(0, tokenizer.vocab_size, (100, 20))[idx, :len]
32 |         target_str = tokenizer.decode(target_ids)
33 |     else:
34 |         raise ValueError(f"args.dataset = {dataset_name}, but that option isn't implemented.")
35 |     return target_str
36 | 
37 | 
38 | def prep_text(input_str, target_str, tokenizer, system_prompt, chat_template, num_free_tokens, device):
39 |     input_tokens = tokenizer.encode(input_str, return_tensors="pt", add_special_tokens=False).to(device=device)
40 |     target_tokens = tokenizer.encode(target_str, return_tensors="pt", add_special_tokens=False).to(device=device)
41 |     system_prompt_tokens = tokenizer.encode(system_prompt, return_tensors="pt", add_special_tokens=False).to(
42 |         device=device)
43 |     chat_template_tokens = (
44 |         tokenizer.encode(chat_template[0], return_tensors="pt", add_special_tokens=False).to(device=device),
45 |         tokenizer.encode(chat_template[1], return_tensors="pt", add_special_tokens=False).to(device=device))
46 |     free_tokens = torch.randint(0, tokenizer.vocab_size, (1, num_free_tokens)).to(device=device)
47 | 
48 |     input_ids = torch.cat((chat_template_tokens[0], system_prompt_tokens, input_tokens, free_tokens,
49 |                            chat_template_tokens[1], target_tokens), dim=1).squeeze().long()
50 | 
51 |     # build slice objects
52 |     tokens_before_free = chat_template_tokens[0].size(-1) + system_prompt_tokens.size(-1) + input_tokens.size(-1)
53 |     free_token_slice = slice(tokens_before_free, tokens_before_free + free_tokens.size(-1))
54 |     input_slice = slice(0, input_ids.size(-1) - target_tokens.size(-1))
55 |     target_slice = slice(input_ids.size(-1) - target_tokens.size(-1), input_ids.size(-1))
56 |     loss_slice = slice(input_ids.size(-1) - target_tokens.size(-1) - 1, input_ids.size(-1) - 1)
57 | 
58 |     return input_ids, free_token_slice, input_slice, target_slice, loss_slice
59 | 
60 | 
61 | def check_output_with_hard_tokens(model, input_ids, target_slice, loss_slice):
62 |     output = model(input_ids)
63 |     match = (output.logits[0, loss_slice].argmax(-1) == input_ids[0, target_slice].squeeze()).all()
64 |     return match
65 | 
66 | 
67 | def now():
68 |     return datetime.datetime.now().strftime("%Y%m%d-%H:%M:%S")
69 | 
70 | 
71 | def get_id_func():
72 |     id = generate_id()
73 | 
74 |     def get_id():
75 |         return id
76 | 
77 |     return get_id
78 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | accelerate==0.24.1
 2 | ai2-olmo==0.2.5
 3 | aiofiles==23.2.1
 4 | aiohttp==3.8.4
 5 | aiosignal==1.3.1
 6 | almost-unique-id==0.0.3
 7 | evaluate==0.4.0
 8 | huggingface-hub==0.20.3
 9 | hydra-core==1.3.2
10 | idna==3.4
11 | ipython==8.15.0
12 | jupyter==1.0.0
13 | matplotlib==3.7.1
14 | matplotlib-inline==0.1.6
15 | numpy==1.25.0
16 | pandas==2.0.3
17 | seaborn==0.12.2
18 | tabulate==0.9.0
19 | torch==2.1.0
20 | transformers==4.38.2
21 | 


--------------------------------------------------------------------------------