├── requirements.txt.nvidia ├── generate.py ├── discouraging_words.py ├── PromptArray.ipynb ├── generator.py ├── generate.ipynb ├── program.py ├── LICENSE ├── lambada_score.py ├── HEAVEN AND HELL.md ├── discouraging-results └── RESULTS └── README.md /requirements.txt.nvidia: -------------------------------------------------------------------------------- 1 | lark==1.3.0 2 | sentencepiece==0.2.1 3 | transformers==4.57.0 4 | torch==2.8.0 5 | triton==3.4.0 6 | kernels==0.10.2 7 | accelerate==1.10.1 -------------------------------------------------------------------------------- /generate.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright (c) 2021 Jeffrey M. Binder. All rights reserved. 3 | 4 | from generator import PromptArrayGenerator 5 | from transformers import AutoTokenizer, AutoModelForCausalLM 6 | 7 | model_name_or_path = 'openai/gpt-oss-20b' 8 | device = 'cuda' 9 | 10 | length = 2000 11 | do_sample = True 12 | temperature = 0.3 13 | k = 10 14 | p = 1.0 15 | repetition_penalty = 2.0 16 | bad_words = None 17 | num_return_sequences = 2 18 | overlap_factor = 0.25 19 | chat_mode = True 20 | chat_mode_think_first = True 21 | chat_mode_max_thought_length = 2000 22 | 23 | prompt_text = '''Write an imaginary description of a new species of {serpent~snake}.''' 24 | 25 | 26 | # Initialize the model and tokenizer 27 | tokenizer = AutoTokenizer.from_pretrained(model_name_or_path) 28 | model = AutoModelForCausalLM.from_pretrained(model_name_or_path) 29 | model.to(device) 30 | model.eval() 31 | 32 | # Initialize PromptArray 33 | generator = PromptArrayGenerator( 34 | model, 35 | tokenizer 36 | ) 37 | 38 | import time 39 | start_time = time.time() 40 | outputs = generator( 41 | prompt=prompt_text, 42 | chat_mode=chat_mode, 43 | chat_mode_think_first=chat_mode_think_first, 44 | chat_mode_max_thought_length=chat_mode_max_thought_length, 45 | num_return_sequences=num_return_sequences, 46 | max_length=length, 47 | do_sample=do_sample, 48 | temperature=temperature, 49 | top_k=k, 50 | top_p=p, 51 | repetition_penalty=repetition_penalty, 52 | bad_words=bad_words, 53 | overlap_factor=overlap_factor, 54 | verbose=True 55 | ) 56 | print(f"Time: {time.time() - start_time}s") 57 | 58 | for i, output in enumerate(outputs): 59 | if num_return_sequences > 1: 60 | print(f'Generated sequence {i}:') 61 | print(output) 62 | 63 | -------------------------------------------------------------------------------- /discouraging_words.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright (c) 2021 Jeffrey M. Binder. All rights reserved. 3 | 4 | import codecs 5 | import nltk 6 | import random 7 | import scipy.stats 8 | import torch 9 | 10 | from generator import PromptArrayGenerator 11 | from transformers import AutoTokenizer, AutoModelForCausalLM 12 | 13 | model_type = 'gpt2' 14 | model_name_or_path = 'gpt2-xl' 15 | device = 'cuda' 16 | 17 | length = 300 18 | do_sample = True 19 | temperature = 0.6 20 | top_k = 5 21 | top_p = 0.5 22 | repetition_penalty = 1.5 23 | num_return_sequences = 10 24 | num_batches = 100 25 | seed = 14891435220765460437 26 | 27 | experiment_name = "snake~legs" 28 | prompt_v1 = '''Scientists recently discovered a new species of snake. Here is a description of it:''' 29 | prompt_v2 = '''Scientists recently discovered a new species of snake{~ with legs}. Here is a description of it:''' 30 | words_to_count = [("leg", "legs", "legged"), ("fur", "furred", "furry"), ("hair", "hairs", "haired", "hairy")] 31 | barnard_test_alternative = "greater" 32 | 33 | f1 = codecs.open(f"discouraging-results/{experiment_name}-v1", "w", "utf8") 34 | f2 = codecs.open(f"discouraging-results/{experiment_name}-v2", "w", "utf8") 35 | 36 | # Initialize the model and tokenizer 37 | torch.manual_seed(seed) 38 | tokenizer = AutoTokenizer.from_pretrained(model_name_or_path) 39 | model = AutoModelForCausalLM.from_pretrained(model_name_or_path) 40 | model.to(device) 41 | model.eval() 42 | 43 | # Initialize PromptArray 44 | generator = PromptArrayGenerator( 45 | model, 46 | tokenizer 47 | ) 48 | 49 | counts_1 = {word: 0 for word in words_to_count} 50 | counts_2 = {word: 0 for word in words_to_count} 51 | for batch_num in range(num_batches): 52 | print(f"Batch {batch_num}") 53 | for i, prompt in enumerate([prompt_v1, prompt_v2]): 54 | # Needed to avoid weirdness with Torch's random number generator 55 | output_sequences = generator( 56 | prompt=prompt, 57 | max_length=length, 58 | temperature=temperature, 59 | top_k=top_k, 60 | top_p=top_p, 61 | repetition_penalty=repetition_penalty, 62 | do_sample=do_sample, 63 | num_return_sequences=num_return_sequences, 64 | output_token_ids=True, 65 | verbose=True, 66 | ) 67 | 68 | # Remove the batch dimension when returning multiple sequences 69 | if len(output_sequences.shape) > 2: 70 | output_sequences.squeeze_() 71 | 72 | generated_sequences = [] 73 | 74 | for generated_sequence_idx, generated_sequence in enumerate(output_sequences): 75 | generated_sequence = generated_sequence.tolist() 76 | generated_sequence = [idx for idx in generated_sequence if idx != 0] 77 | idx = generated_sequence_idx + batch_num * num_return_sequences 78 | 79 | # Decode text 80 | generated_text = tokenizer.decode(generated_sequence, clean_up_tokenization_spaces=True) 81 | 82 | f = f1 if i == 0 else f2 83 | f.write(f"Sequence {idx}:\n{generated_text}\n\n----------\n") 84 | 85 | counts = counts_1 if i == 0 else counts_2 86 | toks = [tok.lower() for tok in nltk.word_tokenize(generated_text)] 87 | for word in words_to_count: 88 | variant_found = False 89 | for variant in word: 90 | if variant in toks: 91 | variant_found = True 92 | break 93 | if variant_found: 94 | counts[word] += 1 95 | 96 | f1.flush() 97 | f2.flush() 98 | print("word\tv1\tv2") 99 | n = num_return_sequences * (batch_num+1) 100 | for word in words_to_count: 101 | x1 = counts_1[word] 102 | x2 = counts_2[word] 103 | print(f"{word}\t{x1}/{n}\t{x2}/{n}") 104 | 105 | print("word\tv1\tv2\tp") 106 | n = num_return_sequences * num_batches 107 | for word in words_to_count: 108 | x1 = counts_1[word] 109 | x2 = counts_2[word] 110 | o = scipy.stats.barnard_exact([[x1, x2], [n-x1, n-x2]], alternative=barnard_test_alternative) 111 | p = o.pvalue 112 | print(f"{word}\t{x1}/{n}\t{x2}/{n}\t{p}") 113 | 114 | f1.close() 115 | f2.close() 116 | 117 | -------------------------------------------------------------------------------- /PromptArray.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "colab_type": "text", 7 | "id": "view-in-github" 8 | }, 9 | "source": [ 10 | "\"Open" 11 | ] 12 | }, 13 | { 14 | "cell_type": "markdown", 15 | "metadata": { 16 | "id": "35TQLi4rQ_gh" 17 | }, 18 | "source": [ 19 | "# PromptArray: A Prompting Language for Neural Text Generators\n", 20 | "\n", 21 | "This notebook allows you to experiment with PromptArray, a system for controlling the output of neural text generators. Text generators are usually controlled by prompts: input text that indicates what the model should do. For instance, if you want a description of a species of snake, you can enter the following:\n", 22 | "\n", 23 | "> Scientists recently discovered a new species of snake. Here is a description of it:\n", 24 | "\n", 25 | "The machine will then generate a completion of this text, which usually consists of something like the desired description. However, engineering effective prompts is not always straightforward; in particular, it is very hard to design a prompt that effectively tells the generator *not* to do something.\n", 26 | "\n", 27 | "PromptArray allows you to include operators in your prompt that manipulate the machine's predictions. At present, these five operators are available:\n", 28 | "\n", 29 | "| Operator | Meaning |\n", 30 | "| --- | --- |\n", 31 | "| A&B | A and B |\n", 32 | "| A\\|B | A or B |\n", 33 | "| A^B | A and not B |\n", 34 | "| A/B | A more than B |\n", 35 | "| A~B | A as opposed to B |\n", 36 | "\n", 37 | "These operators allow you to construct arrays of multiple prompt variants and merge their output. Don't care if it's a snake or lizard? Write \"a new species of {snake|lizard}.\" Want the species to combine the qualities of a snake and a bird? Write \"{snake&bird}.\" Want to make sure the snake is not venomous? Write \"{~ venomous} snake,\" which is far more effective than simply writing \"non-venomous snake.\" You can combine multiple operators, using {} brackets to group text.\n", 38 | "\n", 39 | "A detailed explanation of this method, along with the code, is available [here](https://github.com/jeffbinder/promptarray)." 40 | ] 41 | }, 42 | { 43 | "cell_type": "code", 44 | "execution_count": null, 45 | "metadata": { 46 | "cellView": "form", 47 | "id": "KRSZU1o-qJjz" 48 | }, 49 | "outputs": [], 50 | "source": [ 51 | "#@title Setup and Model Selection\n", 52 | "\n", 53 | "#@markdown Using a GPU is recommended, but you will first need to connect to an instance that has one. The larger models will not work unless your instance has enough RAM. Note that the XL model will take a while to load.\n", 54 | "\n", 55 | "model = \"gpt2-large\" #@param ['gpt2', 'gpt2-medium', 'gpt2-large', 'gpt2-xl']\n", 56 | "use_gpu = True #@param {type:\"boolean\"}\n", 57 | "\n", 58 | "%cd /content\n", 59 | "!rm -rf promptarray\n", 60 | "!git clone https://github.com/jeffbinder/promptarray\n", 61 | "%cd promptarray/\n", 62 | "\n", 63 | "!pip install lark\n", 64 | "!pip install sentencepiece\n", 65 | "!pip install git+https://github.com/huggingface/transformers.git@61f64262692ac7dc90e2e0bdeb7e79d9cd607a66\n", 66 | "\n", 67 | "import textwrap\n", 68 | "from generation_utils import *\n", 69 | "\n", 70 | "model_type = model.split('-')[0]\n", 71 | "model_name_or_path = model\n", 72 | "device = 'cuda' if use_gpu else 'cpu'\n", 73 | "\n", 74 | "# Initialize the model and tokenizer\n", 75 | "try:\n", 76 | " model_class, tokenizer_class = MODEL_CLASSES[model_type]\n", 77 | "except KeyError:\n", 78 | " raise KeyError(\"the model {} you specified is not supported. You are welcome to add it and open a PR :)\")\n", 79 | "tokenizer = tokenizer_class.from_pretrained(model_name_or_path)\n", 80 | "model = model_class.from_pretrained(model_name_or_path)\n", 81 | "model.to(device)\n", 82 | "model.eval()\n", 83 | "\n", 84 | "print(\"Ready!\")" 85 | ] 86 | }, 87 | { 88 | "cell_type": "code", 89 | "execution_count": null, 90 | "metadata": { 91 | "cellView": "form", 92 | "id": "x0dDxxwGB8DF" 93 | }, 94 | "outputs": [], 95 | "source": [ 96 | "#@title Prompt Entry\n", 97 | "\n", 98 | "prompt_text = \"Scientists recently discovered a new species of {serpent~snake}. Here is a description of it:\" #@param {type:\"string\"}\n", 99 | "\n", 100 | "output_length = 100 #@param {type:\"slider\", min:1, max:500, step:1}\n", 101 | "num_return_sequences = 3 #@param {type:\"slider\", min:1, max:10, step:1}\n", 102 | "\n", 103 | "#@markdown ___\n", 104 | "\n", 105 | "do_sample = True #@param {type:\"boolean\"}\n", 106 | "temperature = 0.6 #@param {type:\"slider\", min:0, max:1, step:0.01}\n", 107 | "top_k = 5 #@param {type:\"slider\", min:0, max:20, step:1}\n", 108 | "top_p = 0.8 #@param {type:\"slider\", min:0, max:1, step:0.01}\n", 109 | "repetition_penalty = 1.5 #@param {type:\"slider\", min:0, max:5, step:0.1}\n", 110 | "overlap_factor = 0.25 #@param {type:\"slider\", min:0, max:1, step:0.01}\n", 111 | "show_program = True #@param {type:\"boolean\"}\n", 112 | "\n", 113 | "\n", 114 | "def adjust_length_to_model(length, max_sequence_length):\n", 115 | " if length < 0 and max_sequence_length > 0:\n", 116 | " length = max_sequence_length\n", 117 | " elif 0 < max_sequence_length < length:\n", 118 | " length = max_sequence_length # No generation bigger than model size\n", 119 | " elif length < 0:\n", 120 | " length = MAX_LENGTH # avoid infinite loop\n", 121 | " return length\n", 122 | "length = adjust_length_to_model(output_length, max_sequence_length=model.config.max_position_embeddings)\n", 123 | "\n", 124 | "import time\n", 125 | "start_time = time.time()\n", 126 | "output_sequences = model.generate(\n", 127 | " prompt=prompt_text,\n", 128 | " chat_mode=False,\n", 129 | " overlap_factor=overlap_factor,\n", 130 | " tokenizer=tokenizer,\n", 131 | " max_length=length,\n", 132 | " temperature=temperature,\n", 133 | " top_k=top_k,\n", 134 | " top_p=top_p,\n", 135 | " repetition_penalty=repetition_penalty,\n", 136 | " do_sample=do_sample,\n", 137 | " num_return_sequences=num_return_sequences,\n", 138 | " pad_token_id=0,\n", 139 | " verbose=show_program,\n", 140 | ")\n", 141 | "print(f\"Time: {time.time() - start_time}s\\n\")\n", 142 | "\n", 143 | "# Remove the batch dimension when returning multiple sequences\n", 144 | "if len(output_sequences.shape) > 2:\n", 145 | " output_sequences.squeeze_()\n", 146 | "\n", 147 | "generated_sequences = []\n", 148 | "\n", 149 | "for generated_sequence_idx, generated_sequence in enumerate(output_sequences):\n", 150 | " generated_sequence = generated_sequence.tolist()\n", 151 | " generated_sequence = [idx for idx in generated_sequence if idx != 0]\n", 152 | "\n", 153 | " # Decode text\n", 154 | " generated_text = tokenizer.decode(generated_sequence, clean_up_tokenization_spaces=True)\n", 155 | "\n", 156 | " if num_return_sequences > 1:\n", 157 | " print(f'\\nGenerated sequence {generated_sequence_idx}:')\n", 158 | " print('\\n'.join(textwrap.wrap(generated_text)))\n", 159 | "\n" 160 | ] 161 | } 162 | ], 163 | "metadata": { 164 | "accelerator": "GPU", 165 | "colab": { 166 | "authorship_tag": "ABX9TyOKXzE34N6meiOstED+R9Ms", 167 | "collapsed_sections": [], 168 | "include_colab_link": true, 169 | "name": "PromptArray.ipynb", 170 | "provenance": [] 171 | }, 172 | "kernelspec": { 173 | "display_name": "Python 3 (ipykernel)", 174 | "language": "python", 175 | "name": "python3" 176 | }, 177 | "language_info": { 178 | "codemirror_mode": { 179 | "name": "ipython", 180 | "version": 3 181 | }, 182 | "file_extension": ".py", 183 | "mimetype": "text/x-python", 184 | "name": "python", 185 | "nbconvert_exporter": "python", 186 | "pygments_lexer": "ipython3", 187 | "version": "3.9.12" 188 | } 189 | }, 190 | "nbformat": 4, 191 | "nbformat_minor": 4 192 | } 193 | -------------------------------------------------------------------------------- /generator.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright (c) 2021-23 Jeffrey M. Binder. All rights reserved. 3 | # 4 | # This file contains a few lines of code adapted from the HuggingFace Transformers library, 5 | # which is under the Apache license. This library is covered by the following copyright statement: 6 | # 7 | # Copyright 2020 The Google AI Language Team Authors, Facebook AI Research authors and The HuggingFace Inc. team. 8 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 9 | 10 | from typing import Any, Dict, Iterable, List, Optional, Tuple, Union 11 | 12 | import torch 13 | import transformers 14 | 15 | from program import Program 16 | 17 | class PromptArrayGenerator: 18 | def __init__( 19 | self, 20 | model: transformers.PreTrainedModel, 21 | tokenizer: Any, 22 | bos_token_id: Optional[int] = None, 23 | pad_token_id: Optional[int] = None, 24 | eos_token_id: Optional[int] = None, 25 | use_cache: Optional[bool] = True, 26 | ): 27 | self.model = model 28 | self.vocab_size = self.model.config.vocab_size 29 | self.bos_token_id = bos_token_id or self.model.config.bos_token_id 30 | self.pad_token_id = pad_token_id or self.model.config.pad_token_id or 0 31 | self.eos_token_id = eos_token_id or self.model.config.eos_token_id 32 | self.device = self.model.device 33 | 34 | self.tokenizer = tokenizer 35 | self.use_cache = use_cache 36 | 37 | def __call__( 38 | self, 39 | prompt: str, 40 | chat_mode: bool = False, 41 | chat_mode_think_first: bool = False, 42 | chat_mode_max_thought_length: int = 2000, 43 | num_return_sequences: int = 1, 44 | max_length: int = None, 45 | do_sample: bool = False, 46 | temperature: Optional[float] = None, 47 | top_k: Optional[int] = None, 48 | top_p: Optional[float] = None, 49 | repetition_penalty: Optional[float] = None, 50 | bad_words: Optional[List[str]] = None, 51 | bad_words_ids: Optional[List[List[int]]] = None, 52 | overlap_factor: float = 0.25, 53 | output_token_ids: bool = False, 54 | verbose: bool = False 55 | ): 56 | if bad_words and bad_words_ids: 57 | raise ValueError("Cannot specify both `bad_words` and `bad_words_ids`!") 58 | elif bad_words: 59 | bad_words_ids = [self.tokenizer.encode(s) for s in bad_words] 60 | bad_words_ids += [self.tokenizer.encode(" " + s) for s in bad_words] 61 | bad_words_ids += [self.tokenizer.encode(s.title()) for s in bad_words] 62 | bad_words_ids += [self.tokenizer.encode(" " + s.title()) for s in bad_words] 63 | 64 | model_kwargs = { 65 | "use_cache": self.use_cache 66 | } 67 | 68 | with torch.no_grad(): 69 | program, input_ids, attention_mask = Program.compile( 70 | prompt, 71 | self.tokenizer, 72 | self.bos_token_id, 73 | self.pad_token_id, 74 | self.vocab_size, 75 | overlap_factor, 76 | chat_mode, 77 | chat_mode_think_first, 78 | chat_mode_max_thought_length, 79 | verbose, 80 | analysis_model=self.model 81 | ) 82 | 83 | input_ids = input_ids.repeat_interleave(num_return_sequences, dim=0) 84 | attention_mask = attention_mask.repeat_interleave(num_return_sequences, dim=0) 85 | input_ids = input_ids.to(self.device) 86 | model_kwargs["attention_mask"] = attention_mask.to(self.device) 87 | 88 | single_token_bad_words = [] 89 | multitoken_bad_words = [] 90 | if bad_words_ids: 91 | for word in bad_words_ids: 92 | if len(word) == 1: 93 | single_token_bad_words.append(word[0]) 94 | else: 95 | multitoken_bad_words.append(word) 96 | single_token_bad_words_mask = torch.zeros(self.model.config.vocab_size) 97 | single_token_bad_words_mask[single_token_bad_words] = 1 98 | single_token_bad_words_mask = single_token_bad_words_mask.unsqueeze(0).to(input_ids.device).bool() 99 | 100 | unfinished_sequences = input_ids.new(input_ids.shape[0]).fill_(1) 101 | num_variants = input_ids.shape[0] // num_return_sequences 102 | prompt_len = input_ids.shape[-1] 103 | cur_length = 0 104 | 105 | if self.use_cache: 106 | past_key_values = transformers.DynamicCache(config=self.model.config) 107 | cache_position = torch.ones_like(input_ids[0, :], dtype=torch.int64).cumsum(0) - 1 108 | 109 | first = True 110 | while cur_length < max_length: 111 | if self.use_cache: 112 | model_inputs = self.model.prepare_inputs_for_generation( 113 | input_ids if first else input_ids[:, -1:], 114 | past_key_values=past_key_values, 115 | cache_position=cache_position, 116 | **model_kwargs 117 | ) 118 | else: 119 | model_inputs = self.model.prepare_inputs_for_generation(input_ids, **model_kwargs) 120 | 121 | outputs = self.model(**model_inputs, return_dict=True) 122 | if self.use_cache: 123 | past_key_values = outputs.past_key_values 124 | cache_position = cache_position[-1:] + 1 125 | 126 | scores = outputs.logits[:, -1, :] 127 | scores = torch.nn.functional.softmax(scores, dim=-1) 128 | scores = program(scores, num_return_sequences) 129 | 130 | if temperature is not None: 131 | scores = scores / temperature 132 | 133 | if top_k is not None: 134 | indices_to_remove = scores < torch.topk(scores, top_k)[0][..., -1, None] 135 | scores = scores.masked_fill(indices_to_remove, -float("Inf")) 136 | 137 | if top_p is not None: 138 | sorted_logits, sorted_indices = torch.sort(scores, descending=False) 139 | cumulative_probs = sorted_logits.softmax(dim=-1).cumsum(dim=-1) 140 | sorted_indices_to_remove = cumulative_probs <= (1 - top_p) 141 | indices_to_remove = sorted_indices_to_remove.scatter(1, sorted_indices, sorted_indices_to_remove) 142 | scores = scores.masked_fill(indices_to_remove, -float("Inf")) 143 | 144 | if repetition_penalty is not None: 145 | score = torch.gather(scores, 1, input_ids) 146 | score = torch.where(score < 0, score * repetition_penalty, score / repetition_penalty) 147 | scores.scatter_(1, input_ids, score) 148 | 149 | if bad_words_ids: 150 | bad_words_mask = single_token_bad_words_mask.clone() 151 | for banned_token_seq in multitoken_bad_words: 152 | prev_tokens = banned_token_seq[:-1] 153 | prev_tokens_length = len(prev_tokens) 154 | check_tokens = input_ids[:, -prev_tokens_length:] if input_ids.shape[1] >= prev_tokens_length else input_ids 155 | if check_tokens.shape[1] == prev_tokens_length and torch.equal(check_tokens, torch.tensor(prev_tokens, device=input_ids.device).unsqueeze(0)): 156 | bad_words_mask[0, banned_token_seq[-1]] = 1 157 | scores = scores.masked_fill(bad_words_mask, -float("Inf")) 158 | 159 | if do_sample: 160 | probs = torch.nn.functional.softmax(scores[:num_return_sequences, :], dim=-1) 161 | next_tokens = torch.multinomial(probs, num_samples=1).squeeze(1).repeat(num_variants) 162 | else: 163 | next_tokens = torch.argmax(scores, dim=-1) 164 | 165 | next_tokens = next_tokens * unfinished_sequences + self.pad_token_id * (1 - unfinished_sequences) 166 | input_ids = torch.cat([input_ids, next_tokens[:, None]], dim=-1) 167 | model_kwargs["attention_mask"] = torch.cat( 168 | [ 169 | model_kwargs["attention_mask"], 170 | model_kwargs["attention_mask"].new_ones((attention_mask.shape[0], 1)) 171 | ], 172 | dim=-1 173 | ) 174 | 175 | cur_length += 1 176 | unfinished_sequences = unfinished_sequences.mul((next_tokens != self.eos_token_id).long()) 177 | if unfinished_sequences.max() == 0: 178 | break 179 | 180 | first = False 181 | 182 | output_ids = input_ids[0:num_return_sequences, prompt_len:] 183 | if len(output_ids.shape) > 2: 184 | output_ids.squeeze_() 185 | 186 | if output_token_ids: 187 | return output_ids 188 | else: 189 | text_outputs = [ 190 | ( 191 | self.tokenizer.decode(generated_sequence, clean_up_tokenization_spaces=True) 192 | .replace('<|endoftext|>', '') 193 | ) 194 | for generated_sequence in output_ids 195 | ] 196 | return text_outputs 197 | 198 | -------------------------------------------------------------------------------- /generate.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "id": "8ffd4b39", 7 | "metadata": {}, 8 | "outputs": [ 9 | { 10 | "name": "stderr", 11 | "output_type": "stream", 12 | "text": [ 13 | "/media/jechk/Ground/projects/promptarray/.venv/lib/python3.12/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", 14 | " from .autonotebook import tqdm as notebook_tqdm\n", 15 | "Fetching 41 files: 100%|██████████| 41/41 [00:00<00:00, 151645.91it/s]\n", 16 | "Fetching 41 files: 100%|██████████| 41/41 [00:00<00:00, 240849.39it/s]\n", 17 | "Loading checkpoint shards: 100%|██████████| 3/3 [00:06<00:00, 2.13s/it]\n" 18 | ] 19 | } 20 | ], 21 | "source": [ 22 | "import torch\n", 23 | "from generator import PromptArrayGenerator\n", 24 | "from transformers import AutoTokenizer, AutoModelForCausalLM\n", 25 | "\n", 26 | "import os\n", 27 | "os.environ[\"PYTORCH_CUDA_ALLOC_CONF\"] = \"expandable_segments:True\"\n", 28 | "\n", 29 | "model_name_or_path = 'openai/gpt-oss-20b'\n", 30 | "device = 'cuda' if torch.cuda.is_available() else 'mps' if torch.backends.mps.is_available() else 'cpu'\n", 31 | "\n", 32 | "# Initialize the model and tokenizer\n", 33 | "tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)\n", 34 | "model = AutoModelForCausalLM.from_pretrained(\n", 35 | " model_name_or_path,\n", 36 | " device_map='cpu' if device == 'mps' else 'auto',\n", 37 | " dtype=torch.bfloat16\n", 38 | ")\n", 39 | "if device == 'mps':\n", 40 | " model = model.to('mps')\n", 41 | "model.eval()\n", 42 | "\n", 43 | "# Initialize PromptArray\n", 44 | "generator = PromptArrayGenerator(\n", 45 | " model,\n", 46 | " tokenizer\n", 47 | ")" 48 | ] 49 | }, 50 | { 51 | "cell_type": "code", 52 | "execution_count": 2, 53 | "id": "d41d2046", 54 | "metadata": {}, 55 | "outputs": [ 56 | { 57 | "name": "stdout", 58 | "output_type": "stream", 59 | "text": [ 60 | "-- Generated prompt variants:\n", 61 | "0: Generate a mellifluous and euphuistic literary description of what it's like to live in New York. Keep to one paragraph.\n", 62 | "1: Generate an ungrammatical and badly spelled reddit shitpost about what it's like to live in New York. Keep to one paragraph.\n", 63 | "-- Program:\n", 64 | "0: 0 &= 1\n", 65 | "-- Generated analysis:\n", 66 | "<|start|>system<|message|>You are ChatGPT, a large language model trained by OpenAI.\n", 67 | "Knowledge cutoff: 2024-06\n", 68 | "Current date: 2025-09-27\n", 69 | "\n", 70 | "Reasoning: medium\n", 71 | "\n", 72 | "# Valid channels: analysis, commentary, final. Channel must be included for every message.<|end|><|start|>user<|message|>Generate a mellifluous and euphuistic literary description of what it's like to live in New York. Keep to one paragraph.<|end|><|start|>assistant<|channel|>analysis<|message|>We need to produce a single paragraph, literary description, mellifluous and euphuistic (which means ornate, rhetorical, elegant). So a single paragraph, with flowery language, describing life in New York. Should sound like a poetic description.\n", 73 | "\n", 74 | "Make sure not too long but detailed, with metaphors and sound description. Use words like \"celestial symphony of honks\" etc. Use rhetorical structure. Should be one paragraph.\n", 75 | "\n", 76 | "Ok, produce.<|end|><|start|>assistant<|channel|>final<|message|>\n", 77 | "-- Generated analysis:\n", 78 | "<|start|>system<|message|>You are ChatGPT, a large language model trained by OpenAI.\n", 79 | "Knowledge cutoff: 2024-06\n", 80 | "Current date: 2025-09-27\n", 81 | "\n", 82 | "Reasoning: medium\n", 83 | "\n", 84 | "# Valid channels: analysis, commentary, final. Channel must be included for every message.<|end|><|start|>user<|message|>Generate an ungrammatical and badly spelled reddit shitpost about what it's like to live in New York. Keep to one paragraph.<|end|><|start|>assistant<|channel|>analysis<|message|>We need to generate content as a \"reddit shitpost\" meaning ungrammatical, badly spelled. Use one paragraph. It's about what it's like to live in New York. It should be a casual, meme-like rant. Should be ungrammatical and badly spelled. Let's think: Use nonsense, slang, typos. Possibly include emojis. It should sound like a \"shitpost\" - maybe with tags like \"lol\", \"imagine\". Let's write in a single paragraph with many errors. Let's produce a comedic rant: \"new york is so weird\" etc. Provide one paragraph, many typos and broken grammar. Let's do that.<|end|><|start|>assistant<|channel|>final<|message|>\n", 85 | "Time: 153.4034161567688s\n", 86 | "Generated sequence 0:\n", 87 | "New York is so much that if you close an eye, every time it blinks out the city is still breathing – in a neon‑glossed chorus on Fifth Avenue – the subway’s a drumbeat in concrete lungs with its own heartbeat of screech, while on the street you hear the chatter, honkmaster horns, pigeon gossip in an endless, honky-tongued ballet. A morning walk feels as if you are tracing an invisible constellation that glows, each skyscraper its own bright star, all pointing you to where the next slice will melt your tongue into a sugary‑sour dream while taxis swirl like confederate beetles, racing across traffic jam‑beasts. When it rains, you are the one who can see that the sky is not a sheet, rather it's all those bright windows and windows that reflect the same light of a thousand neon signs that make every corner an echo in time. The city will give you the most intense, unending, and in fact, an all-infinite feeling of living on an endless loop where the rhythm keeps on, and the beat is your own pulse. The streets are like endless rivers that flow and you have nothing to do, just a simple thing: be there – in a moment or a lifetime. You never truly find yourself but always keep finding yourself and keep the city alive – it never sleeps. <|return|>\n", 88 | "Generated sequence 1:\n", 89 | "New YOrk, oh wretchedly wondrous city in the middle where the sky is an ever-shrouded, smog-stained cloud over which pigeonic pigeons parade, a city that makes your stomach churn at dawn with subway clatter but also gives heart-throbbing joy like you just found that last hidden pizza pie, the streets, they twist like a labyrinth in some insane dreamscape but you still find your way home, each day you get an endless supply from the street vendors of hot-doggish and bagelicious delight that make it feel almost too good to have an apartment full 4/5 the size of a tiny, cramped box. The people here are such weird creatures, with their bright eyes like a million flickering neon lamps on every corner, but at least you can always see them all and still feel at one, all in that same, insane and wonderful, chaotic symp. They keep moving in the same rhythm; like, they never stop, but that’s part because this place just does a lot, and it’s never dull. The city has a certain charm, a certain kind o… (the answer cut off to keep it short) but still you get so many new experiences from the city that is the place with its very essence.<|return|>\n" 90 | ] 91 | } 92 | ], 93 | "source": [ 94 | "length = 2000\n", 95 | "do_sample = True\n", 96 | "temperature = 0.3\n", 97 | "k = 10\n", 98 | "p = 1.0\n", 99 | "repetition_penalty = 2.0\n", 100 | "bad_words = []\n", 101 | "num_return_sequences = 2\n", 102 | "overlap_factor = 0.25\n", 103 | "chat_mode = True\n", 104 | "chat_mode_think_first = True\n", 105 | "chat_mode_max_thought_length = 2000\n", 106 | "\n", 107 | "prompt_text = '''Generate {a mellifluous and euphuistic literary description of&an ungrammatical and badly spelled reddit shitpost about} what it's like to live in New York. Keep to one paragraph.'''\n", 108 | "#prompt_text = '''Write a short, one-paragraph description of a {snake&{cat/kitty}}.'''\n", 109 | "\n", 110 | "import time\n", 111 | "start_time = time.time()\n", 112 | "outputs = generator(\n", 113 | " prompt=prompt_text,\n", 114 | " chat_mode=chat_mode,\n", 115 | " chat_mode_think_first=chat_mode_think_first,\n", 116 | " chat_mode_max_thought_length=chat_mode_max_thought_length,\n", 117 | " num_return_sequences=num_return_sequences,\n", 118 | " max_length=length,\n", 119 | " do_sample=do_sample,\n", 120 | " temperature=temperature,\n", 121 | " top_k=k,\n", 122 | " top_p=p,\n", 123 | " repetition_penalty=repetition_penalty,\n", 124 | " bad_words=bad_words,\n", 125 | " overlap_factor=overlap_factor,\n", 126 | " verbose=True\n", 127 | ")\n", 128 | "print(f\"Time: {time.time() - start_time}s\")\n", 129 | "\n", 130 | "for i, output in enumerate(outputs):\n", 131 | " if num_return_sequences > 1:\n", 132 | " print(f'Generated sequence {i}:')\n", 133 | " print(output)\n", 134 | "\n" 135 | ] 136 | } 137 | ], 138 | "metadata": { 139 | "kernelspec": { 140 | "display_name": ".venv", 141 | "language": "python", 142 | "name": "python3" 143 | }, 144 | "language_info": { 145 | "codemirror_mode": { 146 | "name": "ipython", 147 | "version": 3 148 | }, 149 | "file_extension": ".py", 150 | "mimetype": "text/x-python", 151 | "name": "python", 152 | "nbconvert_exporter": "python", 153 | "pygments_lexer": "ipython3", 154 | "version": "3.12.3" 155 | } 156 | }, 157 | "nbformat": 4, 158 | "nbformat_minor": 5 159 | } 160 | -------------------------------------------------------------------------------- /program.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright (c) 2021-23 Jeffrey M. Binder. All rights reserved. 3 | 4 | import math 5 | import time 6 | import transformers 7 | import torch 8 | from torch.nn import functional as F 9 | 10 | from typing import Any, Callable, Dict, Iterable, List, Optional, Tuple, Union 11 | from lark import Lark 12 | 13 | 14 | class ChannelFinalStoppingCriteria(transformers.StoppingCriteria): 15 | """ 16 | Stopping criteria that stops generation after the sequence "<|channel|>final<|message|>". 17 | """ 18 | def __init__(self, tokenizer): 19 | self.tokenizer = tokenizer 20 | self.stop_sequence = "<|channel|>final<|message|>" 21 | # Pre-tokenize the stop sequence for efficiency 22 | self.stop_tokens = tokenizer.encode(self.stop_sequence, add_special_tokens=False) 23 | 24 | def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor, **kwargs) -> bool: 25 | # Check if any sequence contains the stop sequence 26 | for i in range(input_ids.shape[0]): 27 | sequence = input_ids[i] 28 | sequence_text = self.tokenizer.decode(sequence, skip_special_tokens=False) 29 | if self.stop_sequence in sequence_text: 30 | return True 31 | return False 32 | 33 | parser = Lark(r''' 34 | start: _exp1 35 | 36 | _exp1: or_exp | sub_exp | _exp2 37 | _exp2: and_exp | div_exp | _exp3 38 | _exp3: rather_exp | _exp4 39 | _exp4: concat_exp | _exp5 40 | _exp5: "{" _exp1 "}" | text 41 | 42 | or_exp: _exp1 "|" _exp2 43 | sub_exp: _exp1 "^" _exp2 44 | and_exp: _exp2 "&" _exp3 45 | div_exp: _exp2 "/" _exp3 46 | rather_exp: _exp3 "~" _exp4 47 | concat_exp: _exp5 _exp4 48 | 49 | text: /([^\/|&~{}^\\]|\\[\/|&~{}^\\])+/? 50 | ''') 51 | 52 | class ProgramConfig(): 53 | def __init__(self, vocab_size: int, overlap_factor: float): 54 | self.vocab_size = vocab_size 55 | self.overlap_factor = overlap_factor 56 | 57 | class Operation(): 58 | def __init__(self, type: str, *args: Tuple[int]): 59 | self.type = type 60 | self.args = list(args) 61 | 62 | def __str__(self): 63 | arg_str = ', '.join([str(a) for a in self.args[1:]]) 64 | return f"{self.args[0]} {self.type}= {arg_str}" 65 | 66 | def add_arg(self, arg: int): 67 | self.args.append(arg) 68 | 69 | def shift(self, k: int): 70 | return Operation(self.type, *[arg + k for arg in self.args]) 71 | 72 | def __call__(self, config: ProgramConfig, logits: torch.Tensor, num_return_sequences: int): 73 | for i in range(num_return_sequences): 74 | args = [arg*num_return_sequences + i for arg in self.args] 75 | t0 = logits[args[0], :] 76 | t1 = logits[args[1], :] 77 | 78 | if self.type == '|': 79 | t_and = t0 * t1 80 | t_and /= torch.sum(t_and) 81 | p = config.overlap_factor 82 | t0 = (t0 + t1 - p*t_and) / (2 - p) 83 | 84 | elif self.type == '&': 85 | t0 = t0 * t1 86 | t0 /= torch.sum(t0) 87 | 88 | elif self.type == '^': 89 | t0 *= 1 - t1/torch.max(t1) 90 | t0 /= torch.sum(t0) 91 | 92 | elif self.type == '/': 93 | t0 /= t1 94 | t0 /= torch.sum(t0) 95 | 96 | elif self.type == '~': 97 | t0 *= t0 / t1 98 | t0 /= torch.sum(t0) 99 | 100 | logits[args[0], :] = t0 101 | 102 | nan_mask = logits.isnan() 103 | logits.masked_fill_(nan_mask, -float("inf")) 104 | 105 | return logits 106 | 107 | class Program(): 108 | def __init__(self, prompt: str, vocab_size: int, overlap_factor: float): 109 | self.prompt = prompt 110 | self.ops = [] 111 | self.config = ProgramConfig(vocab_size, overlap_factor) 112 | 113 | def __call__( 114 | self, 115 | logits: torch.tensor, 116 | num_return_sequences: int 117 | ) -> Tuple[List[torch.LongTensor], List[Operation]]: 118 | """ 119 | Executes the opcode produced by the compiler. 120 | """ 121 | num_variants = logits.shape[0] // num_return_sequences 122 | 123 | # run the code! 124 | for op in self.ops: 125 | logits = op( 126 | self.config, logits, num_return_sequences 127 | ) 128 | 129 | # replace the results for all variants with the combined result 130 | for j in range(num_return_sequences): 131 | for k in range(1, num_variants): 132 | logits[j + k * num_return_sequences, :] = logits[j, :] 133 | 134 | return logits 135 | 136 | @classmethod 137 | def compile( 138 | cls, 139 | prompt: str, 140 | tokenizer: Any, 141 | bos_token_id: int, 142 | pad_token_id: int, 143 | vocab_size: int, 144 | overlap_factor: float, 145 | chat_mode: bool, 146 | chat_mode_think_first: bool, 147 | chat_mode_max_thought_length: int, 148 | verbose: bool, 149 | analysis_model: Optional[Any] = None 150 | ): 151 | """ 152 | Parses a Boolean prompt and transforms it into a form suitable for use with the model. Returns three things: 1) a Program object that can be called to execute the operations included in the prompt, 2) a tensor comprising all possible prompt variants, suitable for use as input to the model; and 3) an attention mask that must be used when the model is run, so as to account for prompt variants of different lengths. 153 | """ 154 | tree = parser.parse(prompt) 155 | 156 | # Depth-first traversal 157 | def _dft(node, strings): 158 | if node.data == 'start': 159 | return _dft(node.children[0], strings) 160 | elif node.data == 'concat_exp': 161 | ops = [] 162 | for child in node.children: 163 | nstrings = len(strings) 164 | strings, child_ops = _dft(child, strings) 165 | multiplier = len(strings) // nstrings 166 | multiplied_ops = [] 167 | for k in range(multiplier): 168 | multiplied_ops += [op.shift(k*nstrings) for op in ops] 169 | ops = multiplied_ops + child_ops 170 | return strings, ops 171 | elif node.data in ('or_exp', 'and_exp', 'rather_exp', 'sub_exp', 'div_exp'): 172 | child1_strings, child1_ops = _dft(node.children[0], strings) 173 | shift = len(child1_strings) 174 | new_strings = child1_strings 175 | ops = child1_ops 176 | child2_strings, child2_ops = _dft(node.children[1], strings) 177 | new_strings += child2_strings 178 | ops += [op.shift(shift) for op in child2_ops] 179 | if node.data == 'or_exp': 180 | optype = '|' 181 | elif node.data == 'and_exp': 182 | optype = '&' 183 | elif node.data == 'sub_exp': 184 | optype = '^' 185 | elif node.data == 'div_exp': 186 | optype = '/' 187 | elif node.data == 'rather_exp': 188 | optype = '~' 189 | ops.append(Operation(optype, 0, shift)) 190 | return new_strings, ops 191 | elif node.data == 'text': 192 | if node.children: 193 | txt = node.children[0].value 194 | else: 195 | txt = "" 196 | unescaped_txt = '' 197 | escape = False 198 | for c in txt: 199 | if not escape and c == '\\': 200 | escape = True 201 | else: 202 | unescaped_txt += c 203 | escape = False 204 | txt = unescaped_txt 205 | return [s + txt for s in strings], [] 206 | strings, ops = _dft(tree, [""]) 207 | 208 | if verbose: 209 | print("-- Generated prompt variants:") 210 | for i, s in enumerate(strings): 211 | print(f"{i}: {s}") 212 | print("-- Program:") 213 | if not ops: 214 | print("[No operations]") 215 | for i, op in enumerate(ops): 216 | print(f"{i}: {op}") 217 | 218 | input_ids = [] 219 | max_len = 0 220 | for s in strings: 221 | if chat_mode: 222 | s = cls.chat_mode_preprocess( 223 | s, tokenizer, analysis_model, 224 | chat_mode_think_first, 225 | chat_mode_max_thought_length, 226 | verbose 227 | ) 228 | toks = tokenizer.tokenize(s) 229 | ids = tokenizer.convert_tokens_to_ids(toks) 230 | if bos_token_id is not None: 231 | ids = [bos_token_id] + ids 232 | input_ids.append(ids) 233 | if len(ids) > max_len: 234 | max_len = len(ids) 235 | 236 | input_ids_padded = [] 237 | input_attention_mask = [] 238 | for ids in input_ids: 239 | n_pad_toks = max_len - len(ids) 240 | padded_ids = [pad_token_id or 0] * n_pad_toks + ids 241 | input_ids_padded.append(padded_ids) 242 | attention_mask = [0] * n_pad_toks + [1] * len(ids) 243 | input_attention_mask.append(attention_mask) 244 | 245 | ids = torch.tensor(input_ids_padded) 246 | attention_mask = torch.tensor(input_attention_mask) 247 | 248 | program = Program(prompt, vocab_size, overlap_factor) 249 | program.ops = ops 250 | 251 | return program, ids, attention_mask 252 | 253 | @classmethod 254 | def chat_mode_preprocess( 255 | cls, 256 | prompt: str, 257 | tokenizer: Any, 258 | analysis_model: Any, 259 | separate_analysis: bool = False, 260 | max_analysis_length: int = 200, 261 | verbose: bool = False, 262 | ): 263 | if separate_analysis: 264 | inputs = tokenizer.apply_chat_template( 265 | [{"role": "user", "content": prompt}], 266 | add_generation_prompt=True, 267 | return_tensors="pt", 268 | return_dict=True 269 | ).to(analysis_model.device) 270 | outputs = analysis_model.generate( 271 | **inputs, 272 | max_new_tokens=max_analysis_length, 273 | stopping_criteria=transformers.StoppingCriteriaList([ 274 | ChannelFinalStoppingCriteria(tokenizer) 275 | ]) 276 | ) 277 | text = tokenizer.decode(outputs[0]) 278 | if verbose: 279 | print("-- Generated analysis:") 280 | print(text) 281 | return text 282 | else: 283 | text = tokenizer.apply_chat_template( 284 | [{"role": "user", "content": prompt}], 285 | tokenize=False, 286 | add_generation_prompt=True 287 | ) 288 | return text 289 | 290 | @classmethod 291 | def escape(self, prompt: str): 292 | r""" 293 | Escapes special characters in a string for use in a Boolean prompt. 294 | """ 295 | escaped_prompt = "" 296 | for c in prompt: 297 | if c in "\\/|&~^{}": 298 | escaped_prompt += "\\" 299 | escaped_prompt += c 300 | return escaped_prompt 301 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /lambada_score.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright (c) 2021 Jeffrey M. Binder. All rights reserved. 3 | 4 | import json 5 | import math 6 | import numpy as np 7 | import nltk 8 | import os 9 | import re 10 | import sys 11 | import torch 12 | import transformers 13 | 14 | from transformers import AutoTokenizer, AutoModelForCausalLM 15 | 16 | from program import Program 17 | from generator import PromptArrayGenerator 18 | 19 | model_type = 'gpt2' 20 | model_name_or_path = 'gpt2' 21 | device = 'cuda' 22 | 23 | test_mode = 'word' 24 | 25 | repetition_penalty = None 26 | suppress_punctuation = True 27 | batch_size = 20 28 | 29 | prompting_mode = 'sentence' # One of 'default', 'blank', 'fixed', 'word', 'phrase', 'sentence', 'sentence|blank', 'sentence|word', 'sentence|phrase', 'sentence|word|phrase' 30 | prefix = '[...]' 31 | fixed_negative_prompt = '[...] and' 32 | finetune_sentence_tokenizer = False 33 | regularize_text = False 34 | overlap_factor = 0.0 35 | 36 | re_phrase_boundary = re.compile('[,.:;?!"“”]') 37 | 38 | # Initialize the model and tokenizer 39 | tokenizer = AutoTokenizer.from_pretrained(model_name_or_path) 40 | model = AutoModelForCausalLM.from_pretrained(model_name_or_path) 41 | model.to(device) 42 | model.eval() 43 | 44 | generator = PromptArrayGenerator( 45 | model, 46 | tokenizer 47 | ) 48 | 49 | if model_type == 'xlm': 50 | re_word = re.compile(r"^ ?[A-Za-z']+()?$") 51 | re_final_punct = re.compile(r"^.*?([^A-Za-z' ]+()?)$") 52 | re_first_punct_on = re.compile(r"^.*?([^@A-Za-z' ]+.*)$") 53 | elif model_type == 'ctrl': 54 | re_word = re.compile(r"^ ?[@A-Za-z']+$") 55 | re_final_punct = re.compile(r"^.*s?([^@A-Za-z' ]+)$") 56 | re_first_punct_on = re.compile(r"^.*?([^@A-Za-z' ]+.*)$") 57 | elif model_type == 'xlnet': 58 | re_word = re.compile(r"^ ?[A-Za-z'▁]+$") 59 | else: 60 | re_word = re.compile(r"^ ?[A-Za-z']+$") 61 | 62 | if model_type == 'xlm': 63 | def is_word_piece(idx): 64 | tok = tokenizer.convert_ids_to_tokens([idx])[0] 65 | return re_word.match(tok) and not tok.endswith('') 66 | elif model_type == 'ctrl': 67 | def is_word_piece(idx): 68 | tok = tokenizer.convert_ids_to_tokens([idx])[0] 69 | return tok.endswith('@@') 70 | elif model_type == 'xlnet': 71 | def is_word_piece(idx): 72 | tok = tokenizer.convert_ids_to_tokens([idx])[0] 73 | return re_word.match(tok) and not tok.startswith('▁') 74 | else: 75 | def is_word_piece(idx): 76 | tok = tokenizer.convert_ids_to_tokens([idx])[0] 77 | string = tokenizer.convert_tokens_to_string([tok]) 78 | return re_word.match(string) and not string.startswith(' ') 79 | def is_punctuation(idx): 80 | tok = tokenizer.convert_ids_to_tokens([idx])[0] 81 | string = tokenizer.convert_tokens_to_string([tok]) 82 | return not re_word.match(string) 83 | 84 | punctuation = [] 85 | word_pieces = [] 86 | vocab = tokenizer.get_vocab() 87 | vocab_size = len(vocab) 88 | for tok in vocab: 89 | idx = vocab[tok] 90 | tok = tokenizer.convert_tokens_to_string([tok]) 91 | if not re_word.match(tok): 92 | punctuation.append([idx]) 93 | if model_type in ('xlm', 'ctrl') and test_mode == 'token' and is_word_piece(idx): 94 | word_pieces.append([idx]) 95 | 96 | bos_token = tokenizer.bos_token or tokenizer.cls_token or '' 97 | if model_type == 'ctrl': 98 | bos_token = 'Books ' 99 | 100 | # The models have word pieces at the beginning of the word, so we must add in an offset when 101 | # locating word boundaries 102 | if model_type in ('xlm', 'ctrl'): 103 | word_piece_offset = 1 104 | else: 105 | word_piece_offset = 0 106 | 107 | if model_type in ('xlm', 'ctrl') and test_mode == 'token': 108 | # Do not allow the prediction of word pieces in token mode because they cannot come at the 109 | # end of sentence in these models 110 | bad_words_ids = punctuation.copy() if suppress_punctuation else [] 111 | bad_words_ids += word_pieces 112 | elif model_type in ('openai-gpt', 'gpt2', 'xlnet') and test_mode == 'word': 113 | # Conversely, with these models, the word pieces come at the end, so they must be suppressed 114 | # at the beginning when we are trying to predict a word. 115 | bad_words_ids = punctuation.copy() if suppress_punctuation else [] 116 | bad_words_ids += word_pieces 117 | else: 118 | bad_words_ids = punctuation if suppress_punctuation else None 119 | 120 | fixed_negative_prompt = Program.escape(fixed_negative_prompt) 121 | 122 | def run_model(prompt): 123 | output_sequences = generator( 124 | prompt=prompt, 125 | overlap_factor=overlap_factor, 126 | num_return_sequences=1, 127 | max_length=1, 128 | do_sample=False, 129 | repetition_penalty=repetition_penalty, 130 | bad_words_ids=bad_words_ids, 131 | output_token_ids=True, 132 | ) 133 | 134 | if test_mode == 'word': 135 | # Punctuation is not suppressed after the first token, since it provides one of the ways 136 | # by which models can decide that the word has ended. The only straightforward way to implement 137 | # this given how generate() is implemented is to call it twice. 138 | guess_1 = output_sequences[0, -1] 139 | tok_1 = tokenizer.decode([guess_1]) 140 | prompt_2 = '{' + prompt + '}' + tok_1 141 | output_sequences_2 = generator( 142 | prompt=prompt_2, 143 | overlap_factor=overlap_factor, 144 | num_return_sequences=1, 145 | max_length=5, 146 | do_sample=False, 147 | repetition_penalty=repetition_penalty, 148 | output_token_ids=True, 149 | ) 150 | output_sequences = torch.cat([output_sequences, output_sequences_2], dim=1) 151 | 152 | if test_mode == 'token': 153 | guess = output_sequences[0, -1] 154 | return guess 155 | else: 156 | n = output_sequences.shape[1] 157 | j = 1 - word_piece_offset 158 | while j < n - word_piece_offset and is_word_piece(output_sequences[0, j]): 159 | j += 1 160 | end = j + word_piece_offset 161 | guess = output_sequences[0, :end].to('cpu') 162 | return guess 163 | 164 | sent_tokenizer = nltk.tokenize.punkt.PunktSentenceTokenizer() 165 | if finetune_sentence_tokenizer: 166 | f = open('../../data/gpt-2/data/lambada_development.jsonl') 167 | text = [] 168 | text = text.replace('\n', ' ').replace(' ', ' ').replace('“', '"').replace('”', '"').replace('’', '\'').replace('‘', '\'') 169 | for line in f.readlines(): 170 | text.append(json.loads(line)['text'] + ".") 171 | text = '\n'.join(text) 172 | f.close() 173 | sent_tokenizer.train(text) 174 | def split_last_sentence(text): 175 | # The following is necessary to get the sentence tokenizer to behave 176 | regularized_text = text.replace('\n', ' ').replace(' ', ' ').replace('“', '"').replace('”', '"').replace('’', '\'').replace('‘', '\'') 177 | sentences = sent_tokenizer.tokenize(regularized_text) 178 | n = len(sentences[-1]) 179 | return text[:-(n+1)], text[-n:] 180 | 181 | def interpret_line(line): 182 | text = json.loads(line)['text'] 183 | if regularize_text: 184 | text = text.replace('\n', ' ').replace(' ', ' ').replace('“', '"').replace('”', '"').replace('’', '\'').replace('‘', '\'') 185 | 186 | # Separate the prompt from the desired output 187 | ids = tokenizer.encode(text, add_special_tokens=False, return_tensors="pt") 188 | if test_mode == 'token': 189 | prompt = ids[0,:-1] 190 | answer = ids[0,-1] 191 | else: 192 | n = ids.shape[1] 193 | i = 1 + word_piece_offset 194 | while i <= n: 195 | if not is_word_piece(ids[0,-i]): 196 | break 197 | i += 1 198 | i -= word_piece_offset 199 | prompt = ids[0,:-i] 200 | answer = ids[0,-i:] 201 | prompt = tokenizer.decode(prompt) 202 | prompt = Program.escape(prompt) 203 | 204 | if prompting_mode == 'default': 205 | pass 206 | 207 | elif prompting_mode == 'blank': 208 | prompt = f'{prompt}~' 209 | 210 | elif prompting_mode == 'fixed': 211 | prompt = f'{prompt}~{fixed_negative_prompt}' 212 | 213 | elif prompting_mode == 'word': 214 | toks = nltk.word_tokenize(prompt) 215 | last_tok = Program.escape(toks[-1]) 216 | prompt = f'{prompt}~{prefix}{last_tok}' 217 | 218 | elif prompting_mode == 'phrase': 219 | phrases = re_phrase_boundary.split(prompt) 220 | last_phrase = Program.escape(phrases[-1]) 221 | prompt = f'{prompt}~{prefix}{last_phrase}' 222 | 223 | elif prompting_mode == 'sentence': 224 | first_sentences, last_sentence = split_last_sentence(prompt) 225 | last_sentence = Program.escape(last_sentence) 226 | prompt = f'{prompt}~{prefix}{last_sentence}' 227 | 228 | elif prompting_mode == 'sentence|blank': 229 | first_sentences, last_sentence = split_last_sentence(prompt) 230 | last_sentence = Program.escape(last_sentence) 231 | prompt = f'{prompt}~{prefix}{{{last_sentence}|}}' 232 | 233 | elif prompting_mode == 'sentence|word': 234 | _, last_sentence = split_last_sentence(prompt) 235 | last_sentence = Program.escape(last_sentence) 236 | toks = nltk.word_tokenize(prompt) 237 | last_tok = Program.escape(toks[-1]) 238 | prompt = f'{prompt}~{prefix}{{{last_sentence}|{last_tok}}}' 239 | 240 | elif prompting_mode == 'sentence|phrase': 241 | _, last_sentence = split_last_sentence(prompt) 242 | last_sentence = Program.escape(last_sentence) 243 | phrases = re_phrase_boundary.split(prompt) 244 | last_phrase = Program.escape(phrases[-1]) 245 | prompt = f'{prompt}~{prefix}{{{last_sentence}|{last_phrase}}}' 246 | 247 | elif prompting_mode == 'sentence|word|phrase': 248 | _, last_sentence = split_last_sentence(prompt) 249 | last_sentence = Program.escape(last_sentence) 250 | toks = nltk.word_tokenize(prompt) 251 | last_tok = Program.escape(toks[-1]) 252 | phrases = re_phrase_boundary.split(prompt) 253 | last_phrase = Program.escape(phrases[-1]) 254 | prompt = f'{prompt}~{prefix}{{{last_sentence}|{last_tok}|{last_phrase}}}' 255 | 256 | else: 257 | raise ValueError("Unknown prompting mode!") 258 | 259 | return text, prompt, answer 260 | 261 | f = open('../../data/gpt-2/data/lambada_test.jsonl') 262 | total_score = 0.0 263 | texts = [] 264 | prompts = [] 265 | answers = [] 266 | for line in f.readlines(): 267 | text, prompt, answer = interpret_line(line) 268 | texts.append(text) 269 | prompts.append(prompt) 270 | answers.append(answer) 271 | 272 | n = 0 273 | ncorrect = 0 274 | for text, prompt, answer in zip(texts, prompts, answers): 275 | guess = run_model(prompt) 276 | n += 1 277 | 278 | if model_type == 'ctrl' and test_mode == 'token': 279 | guess = [guess] 280 | answer = [answer] 281 | 282 | if test_mode == 'token': 283 | if model_type in ('xlm', 'ctrl'): 284 | guess_text = tokenizer.decode(guess) 285 | m = re_final_punct.match(guess_text) 286 | if m: 287 | guess_text = guess_text[:-len(m.group(1))] 288 | answer_text = tokenizer.decode(answer) 289 | correct = guess_text == answer_text 290 | else: 291 | correct = guess == answer 292 | else: 293 | if model_type in ('xlm', 'ctrl'): 294 | guess_text = tokenizer.decode(guess) 295 | m = re_first_punct_on.match(guess_text) 296 | if m: 297 | guess_text = guess_text[:-len(m.group(1))] 298 | answer_text = tokenizer.decode(answer) 299 | correct = guess_text == answer_text 300 | else: 301 | correct = guess.equal(answer) 302 | if correct: 303 | ncorrect += 1 304 | 305 | if n % 100 == 0: 306 | guess = tokenizer.decode(guess) 307 | print('----------') 308 | print(f'Text: {text}') 309 | print(f'Guess: {guess} - {"correct" if correct else "wrong"} ({ncorrect}/{n} = {100*ncorrect/n})') 310 | 311 | print(f'Final results: {ncorrect}/{n} = {100*ncorrect/n}') 312 | -------------------------------------------------------------------------------- /HEAVEN AND HELL.md: -------------------------------------------------------------------------------- 1 | # The Marriage of Heaven and Hell 2 | 3 | Some of my [past experiments](https://github.com/jeffbinder/visions-and-revisions) with text generation have taken inspiration from the Romantic poet [William Blake](http://www.blakearchive.org/). Continuing this engagement, I have designed some Boolean prompts inspired by Blake's 1790 book [The Marriage of Heaven and Hell](http://www.blakearchive.org/work/mhh). 4 | 5 | Unsurprisingly, GPT-2 is capable of generating descriptions of Heaven and Hell. Here are some examples of each, with the prompts in bold. 6 | 7 | ## Heaven 8 | 9 | > **The following is a detailed description of Heaven, the beautiful realm where the virtuous spend eternity in bliss and tranquility. The first thing one sees is** the sky and earth. The second is the heavenly beings, who live in the middle between the sky and the earth, and are surrounded by the four elements of earth, water, fire, and wind. These beings have no physical bodies, but are pure and holy beings who are completely devoid of any form or appearance. The heavenly beings are all of the four elements: earth, water, fire, and air. The heavenly bodies are the four celestial objects, the moon, the sun, the four 10 | 11 | > **The following is a detailed description of Heaven, the beautiful realm where the virtuous spend eternity in bliss and tranquility. The first thing one sees is** the sky and the sun, and they shine brightly and brightly. Then one sees mountains and rivers and lakes and forests. There are many trees, many rivers and lakes, and many mountains and mountainsides. All of this is very lovely, very peaceful, and very tranquil. There is nothing in this world that is more wonderful than this place. It is like the most beautiful flower that has ever been seen. The fragrance of this place is so exquisite that even the fragrance of the most fragrant flower 12 | 13 | > **The following is a detailed description of Heaven, the beautiful realm where the virtuous spend eternity in bliss and tranquility. The first thing one sees is** a vast, empty plain, which is the world of existence, the realm of existence. It is the place of birth and death, and the place where one is born and dies. The second is heaven, which is an infinite realm of happiness, joy, pleasure and bliss. It has the appearance and shape of an immense palace, with a great multitude of heavenly beings. The third is hell, which is an immense, gloomy, and miserable realm of torment and suffering, with a great multitude of 14 | 15 | > **The following is a detailed description of Heaven, the beautiful realm where the virtuous spend eternity in bliss and tranquility. The first thing one sees is** a vast, boundless sea of light. The next is the sky, which is filled with countless stars, many of them glowing brightly with the light of God's glory. Next is the ocean, filled to the brim with all manner and variety of living creatures, some beautiful, some ugly. There is a vast expanse of land covered with forests and fields, with mountains rising from the earth, and rivers flowing through the lands, and there are rivers flowing through the oceans, and lakes of various 16 | 17 | > **The following is a detailed description of Heaven, the beautiful realm where the virtuous spend eternity in bliss and tranquility. The first thing one sees is** an immense ocean, with countless islands and islands in it, each island having a different climate. The second thing one notices are the beautiful flowers and fruits of paradise, and the birds that sing there, as if they had come from the skies. In this world, one can find all sorts of pleasures. The third thing one notices is a great mountain range that stretches far away from this world and has mountains on all sides and valleys on the sides of the mountains. The mountains and the valleys are filled 18 | 19 | > **The following is a detailed description of Heaven, the beautiful realm where the virtuous spend eternity in bliss and tranquility. The first thing one sees is** the vastness and vastness and endless beauty of Heaven, and then the second is the endlessness of its endless beauty, the beauty of all that is, the beauty of all that is beautiful. The third is that the beauty of the beauty of Heaven, and the fourth is the beauty that comes with the knowledge that the beauty is eternal and infinite. And the fifth is the beauty that comes with knowing that the beauty is eternal and infinite. And the sixth is the knowledge that the beauty is eternal 20 | 21 | > **The following is a detailed description of Heaven, the beautiful realm where the virtuous spend eternity in bliss and tranquility. The first thing one sees is** the beautiful blue sky, with clouds of various colors. There is a clear, tranquil ocean with a lotus pond. The sky and ocean have the appearance of the Buddha's teachings, which are the source of all the other objects. There is a beautiful, pure land with a large number of trees. The ground is covered with white flowers and fruits, and it has the appearance of the Buddha's body, with the same characteristics of purity and brightness. In this land, one can find a great 22 | 23 | > **The following is a detailed description of Heaven, the beautiful realm where the virtuous spend eternity in bliss and tranquility. The first thing one sees is** the beautiful, clear blue sky, which has been created by the Creator. The sky itself has been made of many colors. It is a world where the Creator has placed the most important things. These are:The first place that the Creator places is Heaven. This place has been made for all the virtuous to dwell in, so that they can enjoy their eternal life in the blissful realm. There are two types of heaven: the Pure Land of Bliss and the World of the Six Realms of Ex 24 | 25 | > **The following is a detailed description of Heaven, the beautiful realm where the virtuous spend eternity in bliss and tranquility. The first thing one sees is** a beautiful, golden-hued world. It has many different types of mountains and rivers, as well as a variety in its vegetation and animals. There is a large, green forest with a variety in its trees, as well a variety of birds and animals. There is a lake, a stream, a mountain, and a forest. There is also a large lake with many different types of fish. There are many different kinds of flowers and plants, as there is in this realm. There is 26 | 27 | > **The following is a detailed description of Heaven, the beautiful realm where the virtuous spend eternity in bliss and tranquility. The first thing one sees is** a beautiful blue sky, which is filled with clouds, and a clear and tranquil sea with no sound of waves. A beautiful white cloud, with an orange and red border, appears above the blue sky. The clouds move slowly, and there is no wind, and no movement. There is no sound. It is as if one were in a deep and peaceful sleep. The clouds appear to float, but they are not floating, for they are not attached. There is a gentle breeze, which gently 28 | 29 | ## Hell 30 | 31 | > **The following is a detailed description of Hell, the fiery realm where the damned are tortured. The first thing one sees is** an endless sea of flames. This is the place where the damned dwell and the souls of the wicked go to burn. They can only pass from the flames by dying in a blaze. There are no gates to Hell, only a fiery gate to Hell. It is a terrible, terrible place and it's inhabitants are all evil.The first step into Hell is the gate of the fire, which is the entrance into the realm. The gate is a flaming red and has a great deal of heat, 32 | 33 | > **The following is a detailed description of Hell, the fiery realm where the damned are tortured. The first thing one sees is** a massive wall, made of fire, with the word "Hades" written in blood on it. The walls and floor of this fiery place are so hot that they can be walked upon, even in the shade. There are no windows or doors in Hell. It is completely dark and silent, save for the sound of a great wind blowing against the flames. There is a very strong odor of brimstone in the air. The only light comes from a single candle that burns continuously.The second 34 | 35 | > **The following is a detailed description of Hell, the fiery realm where the damned are tortured. The first thing one sees is** the great, gaping pit, a vast chasm that stretches across the entire surface. The walls are black, impenetrable and the only way to get in or out is through a series of doors, each guarded by a demon. The pit itself has no bottom and is filled with the bodies and souls of the damned. As one approaches, the walls of the pit become more and more impenetrable, so much so that one cannot see the bottom, and the demons can't see 36 | 37 | > **The following is a detailed description of Hell, the fiery realm where the damned are tortured. The first thing one sees is** an immense pit, which is the entrance to Hell. It is a dark and gloomy pit filled with the souls of the damned, which is filled to the brim with fire, brimstone and sulfur. There is also an endless river of fire that runs down from Hell. This river of fire, known as the Fire River, flows from Hell into the world of the living, and it burns all who cross its banks. It is also said that Hell is the place where the demons reside. Demons are 38 | 39 | > **The following is a detailed description of Hell, the fiery realm where the damned are tortured. The first thing one sees is** an immense pit, about twenty feet in diameter, which extends from the ground to a height about twenty-five or thirty feet. This is called the pit of fire and brimstone, because it is the place of torment for those who are condemned to eternal torment. It is a very narrow and narrow place. It is surrounded on all sides by a great flame, so that it is impossible to get through the fire. It has a very terrible smell, as though the flames were burning all around it 40 | 41 | > **The following is a detailed description of Hell, the fiery realm where the damned are tortured. The first thing one sees is** an endless expanse, like a great sea, and the second is an immense mountain, like a great mountain, and the third is an endless abyss, as wide and deep and dark as the abysses of Hades. The fourth thing is a vast plain with a great lake, as broad and deep and dark as the abyss of Hades, and in the midst of it is a huge tower, and on top of the tower is an immense throne, as high as Mount Olympus. And on this great 42 | 43 | > **The following is a detailed description of Hell, the fiery realm where the damned are tortured. The first thing one sees is** a huge, open pit. It has been dug deep and is about three times the width of the pit in which you are standing, but the walls have been blasted away and it is completely empty. A small wooden platform sits at the bottom of the hole and you are told that the only thing in it is a large iron cage. You are then taken to another room, this one much smaller than the pit, but still large enough to hold two men, one on each side of the pit. 44 | 45 | > **The following is a detailed description of Hell, the fiery realm where the damned are tortured. The first thing one sees is** a massive, burning pit, and then, in front of the pit, there are three huge doors. These are called Hell Gates and are used to transport the damned to their final fate.The second thing one sees in Hell is the "Hell Pit", where the dead are thrown. There is also a lake in the middle of Hell called the River of Fire. This river of flames flows from the bottom of Hell and is the source of all evil. It is said that when Hell is full of 46 | 47 | > **The following is a detailed description of Hell, the fiery realm where the damned are tortured. The first thing one sees is** an endless sea of flames and a vast pit of burning lava. This is Hell, the abode of demons and the home of the damned, who have been cast into this hellish world by their own wicked deeds and sins against God and man."The second thing one sees is an endless desert of fire, which has been made into a lake by a giant hand, which is also the abode of the damned. The lake has no bottom, and the water flows over it in endless waves, 48 | 49 | > **The following is a detailed description of Hell, the fiery realm where the damned are tortured. The first thing one sees is** an immense, blackened, burning, and smoking pit. This pit is the entrance to Hell. The pit is surrounded by many pits of varying size. There are many small ones and larger pits that are all surrounded with a layer or layer upon layer upon layer of flames. The fire in these pits is so hot that it can be seen from the other side of the world, and the heat of it burns the skin off one's face. There are many doors in this pit, and they all 50 | 51 | ## Heaven & Hell 52 | 53 | Using Boolean prompting, it is possible to combine the two prompts, thus producing descriptions of a place that combines the qualities of Heaven and Hell. While the "or" operator usually produces a description of one or the other, the "and" operator finds strange middle grounds. Here are ten examples: 54 | 55 | > **The following is a detailed description of {Hell, the fiery realm where the damned are tortured&Heaven, the beautiful realm where the virtuous spend eternity in bliss and tranquility}. The first thing one sees is** a large lake, with the water in a deep red colour. The lake has a small bridge leading up to the edge of the lake and a large gate leading to another bridge. The gate is guarded by three large, red-robed men, who look very angry. One of the three guards is holding an iron club, and the others are carrying spears. The three guards stand on either side of the lake, with one guard at each end. The three men look at you with angry, 56 | 57 | > **The following is a detailed description of {Hell, the fiery realm where the damned are tortured&Heaven, the beautiful realm where the virtuous spend eternity in bliss and tranquility}. The first thing one sees is** a large, round hole, with the mouth of the opening looking down into a pool of blackness. The second is the body of an enormous man, which appears as if it has been cut up into pieces. The third and most terrifying thing about this place is the sound of the wind blowing. The air is full of a strange smell, and the ground feels as if it were being crushed under the feet. The sound of the wind can be felt as far away as the city. There is 58 | 59 | > **The following is a detailed description of {Hell, the fiery realm where the damned are tortured&Heaven, the beautiful realm where the virtuous spend eternity in bliss and tranquility}. The first thing one sees is** an enormous lake surrounded by a huge mountain. The water in the middle of the lake is red and it looks like it has been boiled over. It is also very cold and there are no fish or birds around, only the occasional bird or bat flying about. The lake itself has no bottom and it is surrounded by a wall. There are no buildings, just a huge stone wall. There is a small path that leads up to the mountain. There are two gates that are guarded by three soldiers. 60 | 61 | > **The following is a detailed description of {Hell, the fiery realm where the damned are tortured&Heaven, the beautiful realm where the virtuous spend eternity in bliss and tranquility}. The first thing one sees is** the endless sea, the second is the vast, endless sky. There is no end to this endless sea and the sky is endless. The third is a vast, empty void. This void is called "the void of nothingness." It is a very strange, empty place. There are no living beings in this void and no living creatures. The fourth is a vast, endless sea. This sea has no bottom and is filled with countless fish, birds, beasts, and insects. The sea is 62 | 63 | > **The following is a detailed description of {Hell, the fiery realm where the damned are tortured&Heaven, the beautiful realm where the virtuous spend eternity in bliss and tranquility}. The first thing one sees is** a large lake, surrounded by mountains, which is called 'the lake of fire' and 'the lake of fire'. This lake is filled with a thick, yellowish smoke, as well as the bodies and ashes of the departed, and the dead are constantly falling into it. The second is the 'lake of the sun' or 'the lake in the sun'. This lake is full with a light that is bright and clear. The third is the 'lake in heaven' or the 'lake 64 | 65 | > **The following is a detailed description of {Hell, the fiery realm where the damned are tortured&Heaven, the beautiful realm where the virtuous spend eternity in bliss and tranquility}. The first thing one sees is** an enormous, open space, surrounded by mountains and forests, with a river flowing through the center of it, flowing from the north and flowing down to the south. The river flows into a deep pool in the center, where the waters of the pool turn red. There is a large stone bridge spanning this stream, with the river flowing through the middle of it, flowing north and south. On either side of the bridge is a large stone wall, and on either end of the bridge is a huge 66 | 67 | > **The following is a detailed description of {Hell, the fiery realm where the damned are tortured&Heaven, the beautiful realm where the virtuous spend eternity in bliss and tranquility}. The first thing one sees is** the great, open expanse, with no walls, and the sky is filled to the brim with stars. There is no sound except for that of the winds and rain, but it is so full that one feels the weight of it. The ground is covered with a layer that seems to be the skin of some creature, and the air smells of burning. There is no water or vegetation, but the ground itself is covered with the same substance as the skin of the creature that covers the ground. 68 | 69 | > **The following is a detailed description of {Hell, the fiery realm where the damned are tortured&Heaven, the beautiful realm where the virtuous spend eternity in bliss and tranquility}. The first thing one sees is** a huge, black hole in the sky. It is surrounded by an endless sea of fire, which has a strange, almost metallic taste to it. The air around the hole is filled with smoke, and the only thing you can see is the flames. There are also many, many small, dark pits, and a few small mountains that look as though they are made of solid stone. There is a small, dark hole at the bottom of the pit, which looks like the entrance to a tunnel 70 | 71 | > **The following is a detailed description of {Hell, the fiery realm where the damned are tortured&Heaven, the beautiful realm where the virtuous spend eternity in bliss and tranquility}. The first thing one sees is** an enormous, dark and empty void. It's like being in an enormous dark hole. Then you notice that there are no lights, no sound, no movement. The air smells strongly of sulphur. There are no sounds, either. There is only the smell of the air. The air smells like burning sulfur. You see that it is a very dark place, with a lot more light than there should be. There are two things you notice immediately: the first is the smell of the air 72 | 73 | > **The following is a detailed description of {Hell, the fiery realm where the damned are tortured&Heaven, the beautiful realm where the virtuous spend eternity in bliss and tranquility}. The first thing one sees is** a huge mountain of black stone, with the top being covered with a thick, black mist. This mountain has no name. The next thing one sees are the four corners, each with a small, circular platform on which the souls of those that have died before are waiting. The souls on the platforms look as if they have just come out from the underworld. There is no way to get to them except for a ladder. Once on the ladder one can climb to any of the four sides. Each 74 | -------------------------------------------------------------------------------- /discouraging-results/RESULTS: -------------------------------------------------------------------------------- 1 | Trial 1: 2 | 3 | model_type = 'gpt2' 4 | model_name_or_path = 'gpt2-xl' 5 | device = 'cuda' 6 | 7 | length = 300 8 | do_sample = True 9 | temperature = 0.6 10 | top_k = 5 11 | top_p = 0.5 12 | repetition_penalty = 1.5 13 | num_return_sequences = 10 14 | num_batches = 100 15 | seed = 16180992288124666425 16 | 17 | experiment_name = "snake without legs" 18 | prompt_v1 = '''Scientists recently discovered a new species of snake. Here is a description of it:''' 19 | prompt_v2 = '''Scientists recently discovered a new species of snake, an animal without legs. Here is a description of it:''' 20 | words_to_count = [("leg", "legs", "legged")] 21 | barnard_test_alternative = "greater" 22 | 23 | word v1 v2 p 24 | ('leg', 'legs', 'legged') 101/1000 646/1000 1.0 25 | 26 | 27 | Trial 2: 28 | 29 | model_type = 'gpt2' 30 | model_name_or_path = 'gpt2-xl' 31 | device = 'cuda' 32 | 33 | length = 300 34 | do_sample = True 35 | temperature = 0.6 36 | top_k = 5 37 | top_p = 0.5 38 | repetition_penalty = 1.5 39 | num_return_sequences = 10 40 | num_batches = 100 41 | seed = 14891435220765460437 42 | 43 | experiment_name = "snake~legs" 44 | prompt_v1 = '''Scientists recently discovered a new species of snake. Here is a description of it:''' 45 | prompt_v2 = '''Scientists recently discovered a new species of snake{~ with legs}. Here is a description of it:''' 46 | words_to_count = [("leg", "legs", "legged"), ("fur", "furred", "furry"), ("hair", "hairs", "haired", "hairy")] 47 | barnard_test_alternative = "greater" 48 | 49 | word v1 v2 p 50 | ('leg', 'legs', 'legged') 112/1000 37/1000 8.660505295210705e-11 51 | ('fur', 'furred', 'furry') 17/1000 12/1000 0.1796285083923741 52 | ('hair', 'hairs', 'haired', 'hairy') 55/1000 38/1000 0.03632206873626524 53 | 54 | 55 | Trial 3: 56 | 57 | model_type = 'gpt2' 58 | model_name_or_path = 'gpt2-xl' 59 | device = 'cuda' 60 | 61 | length = 300 62 | do_sample = True 63 | temperature = 0.6 64 | top_k = 5 65 | top_p = 0.5 66 | repetition_penalty = 1.5 67 | num_return_sequences = 10 68 | num_batches = 100 69 | seed = 8473381708045037544 70 | 71 | experiment_name = "snake~mammal" 72 | prompt_v1 = '''Scientists recently discovered a new species of snake. Here is a description of it:''' 73 | prompt_v2 = '''Scientists recently discovered a new species of {snake~mammal}. Here is a description of it:''' 74 | words_to_count = [("leg", "legs", "legged"), ("fur", "furred", "furry"), ("hair", "hairs", "haired", "hairy")] 75 | barnard_test_alternative = "greater" 76 | 77 | word v1 v2 p 78 | ('leg', 'legs', 'legged') 97/1000 99/1000 1.0 79 | ('fur', 'furred', 'furry') 17/1000 6/1000 0.010726673471164322 80 | ('hair', 'hairs', 'haired', 'hairy') 63/1000 42/1000 0.018246785611834355 81 | 82 | 83 | Trial 4: 84 | 85 | model_type = 'gpt2' 86 | model_name_or_path = 'gpt2-xl' 87 | device = 'cuda' 88 | 89 | length = 300 90 | do_sample = True 91 | temperature = 0.6 92 | top_k = 5 93 | top_p = 0.5 94 | repetition_penalty = 1.5 95 | num_return_sequences = 10 96 | num_batches = 100 97 | seed = 7114167335925440116 98 | 99 | experiment_name = "snake^legs" 100 | prompt_v1 = '''Scientists recently discovered a new species of snake. Here is a description of it:''' 101 | prompt_v2 = '''Scientists recently discovered a new species of snake{^ with legs}. Here is a description of it:''' 102 | words_to_count = [("leg", "legs", "legged"), ("fur", "furred", "furry"), ("hair", "hairs", "haired", "hairy")] 103 | barnard_test_alternative = "greater" 104 | 105 | word v1 v2 p 106 | ('leg', 'legs', 'legged') 97/1000 64/1000 0.0034013069337033262 107 | ('fur', 'furred', 'furry') 16/1000 12/1000 0.2289545697641766 108 | ('hair', 'hairs', 'haired', 'hairy') 66/1000 25/1000 5.497385072276489e-06 109 | 110 | 111 | Trial 5: 112 | 113 | model_type = 'gpt2' 114 | model_name_or_path = 'gpt2-xl' 115 | device = 'cuda' 116 | 117 | length = 300 118 | do_sample = True 119 | temperature = 0.6 120 | top_k = 5 121 | top_p = 0.5 122 | repetition_penalty = 1.5 123 | num_return_sequences = 10 124 | num_batches = 100 125 | seed = 13996266144284529327 126 | 127 | experiment_name = "snake^mammal" 128 | prompt_v1 = '''Scientists recently discovered a new species of snake. Here is a description of it:''' 129 | prompt_v2 = '''Scientists recently discovered a new species of {snake^mammal}. Here is a description of it:''' 130 | words_to_count = [("leg", "legs", "legged"), ("fur", "furred", "furry"), ("hair", "hairs", "haired", "hairy")] 131 | barnard_test_alternative = "greater" 132 | 133 | word v1 v2 p 134 | ('leg', 'legs', 'legged') 93/1000 102/1000 1.0 135 | ('fur', 'furred', 'furry') 14/1000 2/1000 0.001328370997030939 136 | ('hair', 'hairs', 'haired', 'hairy') 63/1000 10/1000 1.2712306385361985e-10 137 | 138 | 139 | Generation 1: 140 | 141 | model_type = 'gpt2' 142 | model_name_or_path = 'gpt2-xl' 143 | device = 'cuda' 144 | 145 | length = 100 146 | do_sample = True 147 | temperature = 0.6 148 | k = 5 149 | p = 0.5 150 | repetition_penalty = 1.5 151 | num_return_sequences = 10 152 | 153 | prompt_text = '''The following is a detailed description of Hell, the fiery realm where the damned are tortured. The first thing one sees is''' 154 | 155 | Generated sequence 0: 156 | an endless sea of flames. This is the place where the damned dwell and the souls of the wicked go to burn. They can only pass from the flames by dying in a blaze. There are no gates to Hell, only a fiery gate to Hell. It is a terrible, terrible place and it's inhabitants are all evil.The first step into Hell is the gate of the fire, which is the entrance into the realm. The gate is a flaming red and has a great deal of heat, 157 | Generated sequence 1: 158 | a massive wall, made of fire, with the word "Hades" written in blood on it. The walls and floor of this fiery place are so hot that they can be walked upon, even in the shade. There are no windows or doors in Hell. It is completely dark and silent, save for the sound of a great wind blowing against the flames. There is a very strong odor of brimstone in the air. The only light comes from a single candle that burns continuously.The second 159 | Generated sequence 2: 160 | the great, gaping pit, a vast chasm that stretches across the entire surface. The walls are black, impenetrable and the only way to get in or out is through a series of doors, each guarded by a demon. The pit itself has no bottom and is filled with the bodies and souls of the damned. As one approaches, the walls of the pit become more and more impenetrable, so much so that one cannot see the bottom, and the demons can't see 161 | Generated sequence 3: 162 | an immense pit, which is the entrance to Hell. It is a dark and gloomy pit filled with the souls of the damned, which is filled to the brim with fire, brimstone and sulfur. There is also an endless river of fire that runs down from Hell. This river of fire, known as the Fire River, flows from Hell into the world of the living, and it burns all who cross its banks. It is also said that Hell is the place where the demons reside. Demons are 163 | Generated sequence 4: 164 | an immense pit, about twenty feet in diameter, which extends from the ground to a height about twenty-five or thirty feet. This is called the pit of fire and brimstone, because it is the place of torment for those who are condemned to eternal torment. It is a very narrow and narrow place. It is surrounded on all sides by a great flame, so that it is impossible to get through the fire. It has a very terrible smell, as though the flames were burning all around it 165 | Generated sequence 5: 166 | an endless expanse, like a great sea, and the second is an immense mountain, like a great mountain, and the third is an endless abyss, as wide and deep and dark as the abysses of Hades. The fourth thing is a vast plain with a great lake, as broad and deep and dark as the abyss of Hades, and in the midst of it is a huge tower, and on top of the tower is an immense throne, as high as Mount Olympus. And on this great 167 | Generated sequence 6: 168 | a huge, open pit. It has been dug deep and is about three times the width of the pit in which you are standing, but the walls have been blasted away and it is completely empty. A small wooden platform sits at the bottom of the hole and you are told that the only thing in it is a large iron cage. You are then taken to another room, this one much smaller than the pit, but still large enough to hold two men, one on each side of the pit. 169 | Generated sequence 7: 170 | a massive, burning pit, and then, in front of the pit, there are three huge doors. These are called Hell Gates and are used to transport the damned to their final fate.The second thing one sees in Hell is the "Hell Pit", where the dead are thrown. There is also a lake in the middle of Hell called the River of Fire. This river of flames flows from the bottom of Hell and is the source of all evil. It is said that when Hell is full of 171 | Generated sequence 8: 172 | an endless sea of flames and a vast pit of burning lava. This is Hell, the abode of demons and the home of the damned, who have been cast into this hellish world by their own wicked deeds and sins against God and man."The second thing one sees is an endless desert of fire, which has been made into a lake by a giant hand, which is also the abode of the damned. The lake has no bottom, and the water flows over it in endless waves, 173 | Generated sequence 9: 174 | an immense, blackened, burning, and smoking pit. This pit is the entrance to Hell. The pit is surrounded by many pits of varying size. There are many small ones and larger pits that are all surrounded with a layer or layer upon layer upon layer of flames. The fire in these pits is so hot that it can be seen from the other side of the world, and the heat of it burns the skin off one's face. There are many doors in this pit, and they all 175 | 176 | 177 | Generation 2: 178 | 179 | model_type = 'gpt2' 180 | model_name_or_path = 'gpt2-xl' 181 | device = 'cuda' 182 | 183 | length = 100 184 | do_sample = True 185 | temperature = 0.6 186 | k = 5 187 | p = 0.5 188 | repetition_penalty = 1.5 189 | num_return_sequences = 10 190 | 191 | prompt_text = '''The following is a detailed description of Heaven, the beautiful realm where the virtuous spend eternity in bliss and tranquility. The first thing one sees is''' 192 | 193 | Generated sequence 0: 194 | the sky and earth. The second is the heavenly beings, who live in the middle between the sky and the earth, and are surrounded by the four elements of earth, water, fire, and wind. These beings have no physical bodies, but are pure and holy beings who are completely devoid of any form or appearance. The heavenly beings are all of the four elements: earth, water, fire, and air. The heavenly bodies are the four celestial objects, the moon, the sun, the four 195 | Generated sequence 1: 196 | the sky and the sun, and they shine brightly and brightly. Then one sees mountains and rivers and lakes and forests. There are many trees, many rivers and lakes, and many mountains and mountainsides. All of this is very lovely, very peaceful, and very tranquil. There is nothing in this world that is more wonderful than this place. It is like the most beautiful flower that has ever been seen. The fragrance of this place is so exquisite that even the fragrance of the most fragrant flower 197 | Generated sequence 2: 198 | a vast, empty plain, which is the world of existence, the realm of existence. It is the place of birth and death, and the place where one is born and dies. The second is heaven, which is an infinite realm of happiness, joy, pleasure and bliss. It has the appearance and shape of an immense palace, with a great multitude of heavenly beings. The third is hell, which is an immense, gloomy, and miserable realm of torment and suffering, with a great multitude of 199 | Generated sequence 3: 200 | a vast, boundless sea of light. The next is the sky, which is filled with countless stars, many of them glowing brightly with the light of God's glory. Next is the ocean, filled to the brim with all manner and variety of living creatures, some beautiful, some ugly. There is a vast expanse of land covered with forests and fields, with mountains rising from the earth, and rivers flowing through the lands, and there are rivers flowing through the oceans, and lakes of various 201 | Generated sequence 4: 202 | an immense ocean, with countless islands and islands in it, each island having a different climate. The second thing one notices are the beautiful flowers and fruits of paradise, and the birds that sing there, as if they had come from the skies. In this world, one can find all sorts of pleasures. The third thing one notices is a great mountain range that stretches far away from this world and has mountains on all sides and valleys on the sides of the mountains. The mountains and the valleys are filled 203 | Generated sequence 5: 204 | the vastness and vastness and endless beauty of Heaven, and then the second is the endlessness of its endless beauty, the beauty of all that is, the beauty of all that is beautiful. The third is that the beauty of the beauty of Heaven, and the fourth is the beauty that comes with the knowledge that the beauty is eternal and infinite. And the fifth is the beauty that comes with knowing that the beauty is eternal and infinite. And the sixth is the knowledge that the beauty is eternal 205 | Generated sequence 6: 206 | the beautiful blue sky, with clouds of various colors. There is a clear, tranquil ocean with a lotus pond. The sky and ocean have the appearance of the Buddha's teachings, which are the source of all the other objects. There is a beautiful, pure land with a large number of trees. The ground is covered with white flowers and fruits, and it has the appearance of the Buddha's body, with the same characteristics of purity and brightness. In this land, one can find a great 207 | Generated sequence 7: 208 | the beautiful, clear blue sky, which has been created by the Creator. The sky itself has been made of many colors. It is a world where the Creator has placed the most important things. These are:The first place that the Creator places is Heaven. This place has been made for all the virtuous to dwell in, so that they can enjoy their eternal life in the blissful realm. There are two types of heaven: the Pure Land of Bliss and the World of the Six Realms of Ex 209 | Generated sequence 8: 210 | a beautiful, golden-hued world. It has many different types of mountains and rivers, as well as a variety in its vegetation and animals. There is a large, green forest with a variety in its trees, as well a variety of birds and animals. There is a lake, a stream, a mountain, and a forest. There is also a large lake with many different types of fish. There are many different kinds of flowers and plants, as there is in this realm. There is 211 | Generated sequence 9: 212 | a beautiful blue sky, which is filled with clouds, and a clear and tranquil sea with no sound of waves. A beautiful white cloud, with an orange and red border, appears above the blue sky. The clouds move slowly, and there is no wind, and no movement. There is no sound. It is as if one were in a deep and peaceful sleep. The clouds appear to float, but they are not floating, for they are not attached. There is a gentle breeze, which gently 213 | 214 | 215 | Generation 3: 216 | 217 | model_type = 'gpt2' 218 | model_name_or_path = 'gpt2-xl' 219 | device = 'cuda' 220 | 221 | length = 100 222 | do_sample = True 223 | temperature = 0.6 224 | k = 5 225 | p = 0.5 226 | repetition_penalty = 1.5 227 | num_return_sequences = 10 228 | 229 | prompt_text = '''The following is a detailed description of {Hell, the fiery realm where the damned are tortured&Heaven, the beautiful realm where the virtuous spend eternity in bliss and tranquility}. The first thing one sees is''' 230 | 231 | Generated sequence 0: 232 | a large lake, with the water in a deep red colour. The lake has a small bridge leading up to the edge of the lake and a large gate leading to another bridge. The gate is guarded by three large, red-robed men, who look very angry. One of the three guards is holding an iron club, and the others are carrying spears. The three guards stand on either side of the lake, with one guard at each end. The three men look at you with angry, 233 | Generated sequence 1: 234 | a large, round hole, with the mouth of the opening looking down into a pool of blackness. The second is the body of an enormous man, which appears as if it has been cut up into pieces. The third and most terrifying thing about this place is the sound of the wind blowing. The air is full of a strange smell, and the ground feels as if it were being crushed under the feet. The sound of the wind can be felt as far away as the city. There is 235 | Generated sequence 2: 236 | an enormous lake surrounded by a huge mountain. The water in the middle of the lake is red and it looks like it has been boiled over. It is also very cold and there are no fish or birds around, only the occasional bird or bat flying about. The lake itself has no bottom and it is surrounded by a wall. There are no buildings, just a huge stone wall. There is a small path that leads up to the mountain. There are two gates that are guarded by three soldiers. 237 | Generated sequence 3: 238 | the endless sea, the second is the vast, endless sky. There is no end to this endless sea and the sky is endless. The third is a vast, empty void. This void is called "the void of nothingness." It is a very strange, empty place. There are no living beings in this void and no living creatures. The fourth is a vast, endless sea. This sea has no bottom and is filled with countless fish, birds, beasts, and insects. The sea is 239 | Generated sequence 4: 240 | a large lake, surrounded by mountains, which is called 'the lake of fire' and 'the lake of fire'. This lake is filled with a thick, yellowish smoke, as well as the bodies and ashes of the departed, and the dead are constantly falling into it. The second is the 'lake of the sun' or 'the lake in the sun'. This lake is full with a light that is bright and clear. The third is the 'lake in heaven' or the 'lake 241 | Generated sequence 5: 242 | an enormous, open space, surrounded by mountains and forests, with a river flowing through the center of it, flowing from the north and flowing down to the south. The river flows into a deep pool in the center, where the waters of the pool turn red. There is a large stone bridge spanning this stream, with the river flowing through the middle of it, flowing north and south. On either side of the bridge is a large stone wall, and on either end of the bridge is a huge 243 | Generated sequence 6: 244 | the great, open expanse, with no walls, and the sky is filled to the brim with stars. There is no sound except for that of the winds and rain, but it is so full that one feels the weight of it. The ground is covered with a layer that seems to be the skin of some creature, and the air smells of burning. There is no water or vegetation, but the ground itself is covered with the same substance as the skin of the creature that covers the ground. 245 | Generated sequence 7: 246 | a huge, black hole in the sky. It is surrounded by an endless sea of fire, which has a strange, almost metallic taste to it. The air around the hole is filled with smoke, and the only thing you can see is the flames. There are also many, many small, dark pits, and a few small mountains that look as though they are made of solid stone. There is a small, dark hole at the bottom of the pit, which looks like the entrance to a tunnel 247 | Generated sequence 8: 248 | an enormous, dark and empty void. It's like being in an enormous dark hole. Then you notice that there are no lights, no sound, no movement. The air smells strongly of sulphur. There are no sounds, either. There is only the smell of the air. The air smells like burning sulfur. You see that it is a very dark place, with a lot more light than there should be. There are two things you notice immediately: the first is the smell of the air 249 | Generated sequence 9: 250 | a huge mountain of black stone, with the top being covered with a thick, black mist. This mountain has no name. The next thing one sees are the four corners, each with a small, circular platform on which the souls of those that have died before are waiting. The souls on the platforms look as if they have just come out from the underworld. There is no way to get to them except for a ladder. Once on the ladder one can climb to any of the four sides. Each 251 | 252 | 253 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # PromptArray: A Prompting Language for Neural Text Generators 2 | 3 | Neural text generators like the [GPT](https://github.com/openai/gpt-2) models promise a general-purpose means of manipulating texts. These models are trained to perform a simple task: given the beginning of a text, the model tries to predict what word will come next. By applying this procedure repeatedly, a model can generate a _completion_, meaning a continuation of the text that starts with a given prompt. Applying this technique to particular problems requires constructing a prompt that induces the model to produce the desired output. This development represents a step toward a dream the computer scientist Jean E. Sammet expressed over half a century ago: [programming a computer in English](https://dl.acm.org/doi/abs/10.1145/365230.365274). 4 | 5 | Designing reliable prompts, however, is a complex matter. The emergence of text generators has led to the practice of [prompt engineering](https://arxiv.org/abs/2107.13586)—that is, techniques (some automated, some manual) for designing better language model inputs. Some researchers have also developed new approaches to text generation that depart from the basic prompt-in, completion-out paradigm. One [proposed approach](https://arxiv.org/pdf/2103.00453.pdf) uses a secondary prompt to indicate forms of "bias" that are to be discouraged in the output. A related technique is [calibration](https://arxiv.org/pdf/2102.09690.pdf), which adjusts the probability distribution based on the output given a generic input. 6 | 7 | This repo implements the rudiments of what I am hoping will become a broader set of techniques for controlling text generators. as opposed to entering a single text prompt that is fed directly into the model, one enters an expression that can incorporate the following operators: 8 | 9 | | Operator | Meaning | 10 | | --- | --- | 11 | | A&B | A and B | 12 | | A\|B | A or B | 13 | | A^B | A and not B | 14 | | A/B | A more than B | 15 | | A~B | A as opposed to B | 16 | 17 | In essence, this creates hybrid of a text generator and a programming language, making it easy to compose arrays of multiple prompt variants and experiment with new ways of manipulating the numerical outputs of language models. The primary downside is an increase in the use of GPU memory. 18 | 19 | Apart from introducing a new syntax, this project suggests a new interpretation of Boolean logic. Boolean operators are usually understood in terms of truth values: "A and B" means that A and B are both true. But Boolean logic can also be interpreted in terms of meaning: "big and red" means the category of things that are both big and red. This semantic interpretation, as I hope to show, can be formalized in a way that is actually computable using language models. 20 | 21 | As an illustration, consider the following prompt, which one might use as a way of generating descriptions of rabbits: 22 | 23 | > Scientists recently discovered a new species of rabbit. Here is a description of it: 24 | 25 | Here is some example output: 26 | 27 | > The new species, named the New World Rabbit (Oryctolagus cuniculus), is the first to be found in North America since 1872. It is a medium-sized, short haired rabbit, with an average weight of 1.3 kilograms and a height of 1.2 meters (4 feet). The rabbit has dark gray fur, and its body color ranges from white, to black, and sometimes gray. Its ears are large and rounded. 28 | 29 | This prompt works well enough, but it is limited in its ability to exploit the information present in the model. One issue is that there are multiple words for this species: it could also be called a "bunny." Feeding in "bunny" instead of "rabbit" would produce slightly different results, because different patterns exist in the use of these words in the training data. What if we don't care about these differences? Using the *or* operator, one can construct a single prompt that considers both options: 30 | 31 | > Scientists recently discovered a new species of {rabbit|bunny}. Here is a description of it: 32 | 33 | The | indicates *or*; the brackets delimit the text that is affected by the operator and are otherwise ignored. This prompt causes the program to choose words that may be predicted based on either the "rabbit" or the "bunny" variant of the prompt, effectively ignoring the difference between the two words: 34 | 35 | > This is a new species of bunnies that lives in an area called "Bunny Hill". This is a very small area, only 1 km2. There is a lot of vegetation and a few trees. The bunny is a small rabbit with white fur, which can grow to about 2 cm in length. The fur on the back is white, while the ears and tail are brownish. The eyes and nose are yellowish, with black spots. 36 | 37 | This technique can also be used to create combinations of categories that are very different. 38 | 39 | > Scientists recently discovered a new species of {snake|bird}. Here is a description of it: 40 | 41 | The result tends to be descriptions of animals that combine the qualities of birds and snakes: 42 | 43 | > The new snake is called a Pteranodon, and the scientists believe it was the most primitive snake ever found. It is about 2 meters (7.6 feet) long, has no scales on its head and body, and its tail is only about 3 centimeters (1.5 inches) long, according to a report by National Geographic. It also has a large mouth with teeth like those found in modern snakes, and a large, curved beak. 44 | 45 | The program also includes an *and* operator, which selects predictions that are common to the different variants: 46 | 47 | > Scientists recently discovered a new species of {snake&bird}. Here is a description of it: 48 | 49 | This prompt tends to produce output that is not especially specific to any animal, or at least that can apply to both snakes and birds. If often seems to talk its way around exactly what sort of creature is being described: 50 | 51 | > The new species is a new genus and species, named Pteropus vampyrus. It was discovered in the forests of the Sierra Nevada Mountains in California. The new genus and species name honors Dr. Robert Vamp, a noted ornithologist and curator of the Museum of Vertebrate Zoology at the University of Chicago. The new genus name honors Dr Robert V. Pteropus, who discovered this species in the Sierra Nevada mountains of California in the early 1960s. 52 | 53 | Perhaps the most powerful operator is the *as opposed to* operator (~), which can be used to discourage outputs that are produced by a certain input. (This is related to the Boolean *not*, although there is a technical difference that I discuss below.) For example, one might want to generate a description of a "serpent" while emphasizing that this is not merely a synonym for "snake": 54 | 55 | > Scientists recently discovered a new species of {serpent~snake}. Here is a description of it: 56 | 57 | This generates words about a "serpent" while discouraging words that could also apply to a "snake." The result is a description of a different type of serpentine creature: 58 | 59 | > The Serpent of the Sea (Sphyrna leucophylla) is a serpent that lives on the ocean floor, and has a long, thin body with a long neck, and a small head with a small mouth. It can be distinguished from other serpent species by its long neck, the fact that it has two pairs of fins, the fact it does not swim, and by the presence in its mouth of a pair of large spines, which are used for grasping and killing prey. 60 | 61 | The *as opposed to* operator has a number of potential uses. One technique is to place an empty string on either side of the operator; this can be used to boost or diminish the effect of a particular piece of text on the model. For instance: 62 | 63 | > Scientists recently discovered a new species of bison{~ in the United States}. Here is a description of it: 64 | 65 | An example: 66 | 67 | > This species was discovered in Mongolia by the Mongolian Bison Conservation Project, which has a team in China and in Mongolia to monitor bisons and protect them.The new species was described by a team led by Dr. Zhan-Jin Li of Tsinghua University in Beijing and his colleagues, who have been studying bison for decades. The species was first described in 2003 by a group from Tsinghua. 68 | 69 | This will suppress words that place the bison in the United States. By putting it on the other side, one can amplify the phrase's effect: 70 | 71 | > Scientists recently discovered a new species of rabbit{ in the United States~}. Here is a description of it: 72 | 73 | Example: 74 | 75 | > The new species, called the American black rabbit, is native to the eastern and midwestern regions. It has a gray coat with black markings on its ears, legs and tail, and it has white markings on the back and sides of its ears and on the tips and tips of its ears and tail.The rabbit is a medium-sized mammal that weighs between 2 and 4 pounds (1.5 and 2 kilograms). 76 | 77 | If you want to include any of the special characters in the prompt, you can escape them with `\`. There are also two other operators, ^ and /, which I explain below. The system allows operations to be combined arbitrarily, producing complex expressions like the following: 78 | 79 | > {~Content warning: violence. }The following is a {true|faithful} \\& vivid account of what happened to me when I traveled to {Antarctica&the Arctic} to find the {{North&South} Pole|Yeti|meaning of life}. 80 | 81 | The output looks like this: 82 | 83 | > It is a story that is very close to my heart and I hope it will help others to find their own meaning in life.I was a 21 year old student at the time and had just completed my first year of university in the United States. I was living with my parents and was looking forward to my first year of graduate school in Canada, where I was studying to be a doctor.I had been living in the United Kingdom for the previous three years and had been to the North Pole on a previous trip. I had also visited the South pole and had a good feeling about it.I was planning to go to the South Pole again this year and had already purchased my plane ticket and all the necessary equipment. I was excited about my trip. I had a great time in Antarctica, and I had a lot of fun with my friends and family.On December 1, 1999, I boarded my plane for the last time in the United States and flew to the South Pole, arriving on December [...] 84 | 85 | Below I explain the method and present some preliminary findings suggesting that this system can improve the performance of language models at certain tasks—by one benchmark, LAMBADA, even bringing relatively small models like the 774B parameter GPT-2 close to the reported performance of the massive GPT-3. It also gives the user new ways of controlling the output of text generators, opening the potential for more systematic, nuanced, and creative approaches to what is, at present, often a matter of trial and error. 86 | 87 | ## What It Does (and Why, and How) 88 | 89 | This system was inspired in part by my research for my forthcoming book on the history of algorithms and, in particular, from a reconsideration of [George Boole](https://georgeboole.com/boole/)'s work on algebraic logic. The logic system that bears Boole's name (at least as it appears in programming languages like Python) is an arithmetic of two values, *true* and *false* or 1 and 0. This is not, however, how Boole's original system worked; his variables could have non-numeric values that represented classes of things such as "trees" and "sheep," and he used the *and* and *or* operators algebraically, not as operations to be executed. Like Boole's work, this system implements logical operators in a way that incorporates far more semantic content than the two-valued data type of standard programming languages. Instead of applying these operators to truth values, it applies them to English words that are to be fed into a language model. 90 | 91 | While Boolean logic in its modern form is based primarily on *and*, *or*, and *not*, Boole's original logic system included a fourth operator that has largely been forgotten. If *or* is like addition, *not* subtraction, and *and* multiplication, then this operator is equivalent to division. Division, in Boole's system, is the inverse of *and*: dividing "small and fluffy" by "fluffy" gives us "small." He described division as the operation “by which from the conception of a given class of things we ascend to the conception of some larger class from which the given class would be formed from the mental selection of those individuals which possess a given property” (*Selected Manuscripts in Logic and Its Philosophy*, 58). This idea did not hold up in philosophical logic because there is not necessarily a unique category that meets this definition. 92 | 93 | I hope to show that, even if it is problematic in logic, division does have a sensible meaning in regard to language models. Put simply, if subtraction works like "not prompt A," division works more like "a prompt that means not A." This is a highly useful effect, especially given the difficulties language models presently have dealing with negation, and it is the basis of the *more than* and *as opposed to* operators employed in this program. 94 | 95 | This project also takes inspiration from some old insights from linguistic anthropology. Structuralists such as [Claude Lévi-Strauss](https://press.uchicago.edu/ucp/books/book/chicago/R/bo3614777.html) maintained that language is based on difference: in order to understand what it means for something to be raw, we also must understand the meaning of "cooked." Yet it is not always self-evident which words are opposites. Perhaps it is clear that the opposite of day is night, but what is the opposite of a narrative? Is it, as [Lev Manovich once posited](http://mfj-online.org/journalPages/MFJ34/Manovich_Database_FrameSet.html), a database? A scientific analysis? A photograph? Silence? The *as opposed to* operator makes it possible to specify which opposite one has in mind, thus guiding the generator with more precision. 96 | 97 | ### Syntax 98 | 99 | My implementation of these operations is somewhat like a programming language, in that expressions are parsed into a syntax tree which is then compiled into a sequence of executable operations. The semantics, however, are very different compared to standard programming languages. Continuing my [longstanding interest](http://jeffreymbinder.net/208/homespring) in unusual programming languages, this project creates something of a hybrid between Boolean expressions and the English language. In executing programs, the system takes (and I nod here to [G. W. Leibniz](https://global.oup.com/academic/product/leibniz-dissertation-on-combinatorial-art-9780198837954?cc=us&lang=en&)) a combinatorial approach, generating all possible variants of the prompt based on the alternatives set out by the operators. It then runs the model on all of these variants and aggregates the resulting probability distributions based on the indicated operations. 100 | 101 | For example, consider the following, very simple example: 102 | 103 | > Hello|Goodbye 104 | 105 | This expression provides two options: the text is either "Hello" or "Goodbye." In order to interpret it, the software first runs the model on each of these prompts. The result is two probability distributions over possible next tokens, which constitute the model's predictions for each prompt. These distributions are combined through an implementation of the *or* logic, producing a new distribution that is used in generation. As the program generates more words, it continues to consider both prompts so that the Boolean operator affects all of the output. 106 | 107 | Now consider a more complex expression: 108 | 109 | > {Hello|Greetings}. {We&{I~They}} welcome you to|Welcome to 110 | 111 | Since there are alternatives at multiple points in the prompt, all possible combinations must be considered. The full expansion, in this case, includes six variants: 112 | 113 | > 0: Hello. We welcome you to 114 | > 1: Greetings. We welcome you to 115 | > 2: Hello. I welcome you to 116 | > 3: Greetings. I welcome you to 117 | > 4: Hello. They welcome you to 118 | > 5: Greetings. They welcome you to 119 | > 6: Welcome to 120 | 121 | Note that the text to which the *as opposed to* operator applies—the word "They" as an opening for the second sentence—must still be considered as an option so that its effects can be discouraged. In order to combine the outputs, the software generates a simple program that indicates the order in which the operations must be performed: 122 | 123 | > 0: 0 |= 1 124 | > 1: 2 |= 3 125 | > 2: 4 |= 5 126 | > 3: 2 ~= 4 127 | > 4: 0 &= 2 128 | > 5: 0 |= 6 129 | 130 | This means that the values for prompts 0 and one are combined with the | operation and the result stored at position 0; the same is then done for 2 and 3, and so forth. In the end, the final result always ends up at position 0. 131 | 132 | A downside of this method is the potential for combinatorial explosion—an exponential increase in the number of variants as the complexity of the prompt increases. This issue arises in cases where there are options in multiple parts of a single stretch of text, as in "{A|B}{C|D}{E|F}{G|H}{I|J}" (32 variants). If the operators are organized in a strictly nested structure, then the number of variants is simply equal to the number of nodes in the tree. Thus, the seemingly more complex expression "{{{A&B}|{C&{D\~E}}}&{F|{{G&H}\~{I^J}}}}" only produces 10 variants. Users will need to consider this when designing prompts. 133 | 134 | All of the variants are fed into the model in one batch, so a prompt that involves more variants will not take much more time to run than a simple one, at least on a GPU. However, expressions will use up more GPU memory the more variants they involve. 135 | 136 | ### Semantics 137 | 138 | The efficacy of this technique is strongly dependent on what calculations are used when applying the operators. Since there are a number of possible ways of applying Boolean logic to language models, I decided to outline several approaches I have tried and explain why I made the choices I did. My preferred approach is based on a combination of probability theory and logic, and it is specifically aimed at manipulating the semantic content of the prompts—that is, their meanings. This is a work in progress, and I welcome feedback and suggestions. 139 | 140 | In all cases, we need to implement the rules for how *and* and *or* behave when applied to the multinomial probability distributions that text generation models output. A simple way of doing this would be to apply logical connectives to the prompts, considered as values of random variables. Suppose that Pr(gen = x | prompt = A) gives the probability that token x will be generated given prompt A. We could define the operators like so: 141 | 142 | > Pr(gen = x | prompt = A and B) = Pr(gen = x | prompt = A and prompt = B) 143 | > Pr(gen = x | prompt = A or B) = Pr(gen = x | prompt = A or prompt = B) 144 | > Pr(gen = x | prompt = A and not B) = Pr(gen = x | prompt = A and prompt ≠ B) 145 | 146 | However, this approach runs into conceptual problems because, in a standard text generation model, there can only be one prompt at a time. It therefore does not make sense to say that the prompt is both A and B, and employing this condition leads to inconsistent probabilities. As a result, this approach works for *or*, but it does not provide a sound way of defining *and*. 147 | 148 | A better approach would be to apply the operators not to the prompts themselves, but rather to their (logical) meanings. While we cannot directly compute the effects of meaning on the text generator, we can come up with a formal system that approximates this effect. Suppose that exp is a Boolean prompt expression and and exp ⊃ A indicates that the expression's continuation should reflect the meaning of A.* We can interpret the *and* and *or* operators through the following rules: 149 | 150 | > exp ⊃ A and B ⇒ exp ⊃ A and exp ⊃ B 151 | > exp ⊃ A or B ⇒ exp ⊃ A or exp ⊃ B 152 | 153 | *I am here thinking along the lines of Boole's categorical interpretation of logic, in which "and" and "or" apply not to propositions, but rather to categorematic terms (nouns or adjectives). In these terms, if we say that expression x means "red and smooth object," then its referent is red and its referent is smooth. Again, if we say that x means "red or blue object," then its referent is red or its referent is blue. In regard to text generation, we can transfer this thinking from the referent to the continuation: if a prompt contains the expression "A and B," then its continuation should reflect the meaning of A and the meaning of B; likewise, if a prompt contains "A or B," then its continuation should reflect the meaning of one or the other. 154 | 155 | We can then use the following rules for generating text: 156 | 157 | > If A contains no operators, then Pr(gen = x | exp ⊃ A) = Pr(gen = x | prompt = A) 158 | > Pr(gen = x | exp ⊃ A and exp ⊃ B) = Pr(gen1 = x | gen1 = gen2, exp1 ⊃ A, exp2 ⊃ B) 159 | 160 | Note that the last formula involves two separate generative processes, one for each prompt. We assume that these processes operate independently, with exp1 producing the continuation gen1 and exp2 producing the continuation gen2. Based on these definitions, we can derive the following: 161 | 162 | > Pr(gen = x | exp ⊃ A and exp ⊃ B) = Pr(gen = x | exp ⊃ A) Pr(gen = x | exp ⊃ B) / Pr(gen1 = gen2 | exp1 ⊃ A and exp2 ⊃ B) 163 | > Pr(gen = x | exp ⊃ A or exp ⊃ B) = (Pr(gen = x | exp ⊃ A) Pr(exp ⊃ A) + Pr(gen = x | exp ⊃ B) Pr(exp ⊃ B) - Pr(gen = x | exp ⊃ A and exp ⊃ B) Pr(exp ⊃ A and exp ⊃ B)) / (Pr(exp ⊃ A) + Pr(exp ⊃ B) - Pr(exp ⊃ A and exp ⊃ B)) 164 | 165 | The formula for *and* requires computing the probability for gen1 = gen2, which is simply the dot product of the two prediction vectors. For *or*, we need probabilities for A and B and for their co-occurrence. The probabilities for A and B affect the relative weights assigned to them, which the program currently sets to be equal. Pr(exp ⊃ A and exp ⊃ B) may be adjusted so as to encode different assumptions about how much overlap occurs between the meanings of prompts; it can be controlled using the `overlap_factor` parameter, which is generally best set to around 0.25. This is the method used for the the & and | operators. 166 | 167 | For *not*, the situation is less straightforward. One potential approach would be the following: 168 | 169 | > exp ⊃ A and not B ⇒ exp ⊃ A and exp ⊅ B 170 | 171 | That is, we interpret "A and not B" as an expression that means A and does not mean B. We can then derive the following formula for predictions: 172 | 173 | > Pr(gen = x | exp ⊅ B) = (Pr(gen = x) - Pr(gen = x | exp ⊃ B) Pr(exp ⊃ B)) / (1 - Pr(exp ⊃ B)) 174 | 175 | In order to use this formula, we would need a value for Pr(gen2 = x), which indicates the probability that token x will be generated given no information whatsoever about the prompt. A reasonable approximation of this would be a uniform distribution, which has the advantage of introducing no particular bias into the results. This leads to the following approximation: 176 | 177 | > Pr(gen = x | exp ⊃ A and not B) ∝ Pr(gen = x | exp ⊃ A) (1 - k Pr(gen = x | exp ⊃ B)) 178 | 179 | Where k can be set to the highest value that does not produce a negative probability. This form of negation is modestly effective at suppressing certain words, but it comes at the cost of decreasing the coherence of the generated text. You can try this version of "A and not B" using the ^ operator, which is mainly of theoretical interest. 180 | 181 | Thankfully, negation also admits another interpretation that works better in practice. What if, instead of interpreting negation as "exp does not mean B," we interpret it as "exp means 'not B'"? That is: 182 | 183 | > exp ⊃ A and not B ⇒ exp ⊃ A and exp ⊃ not B 184 | 185 | This is not logically equivalent to the above, since denying that an expression means B does not imply that the expression has the meaning "not B"; the expression may, instead, have nothing to say about B either way. This thinking can lead us to a different sort of negation operator, although justifying it requires a bit more work. 186 | 187 | My current approach is based on considering what should happen when we construct "B and not B." An ancient logical principle is [*ex contradictione sequitur quodlibet*](https://en.wikipedia.org/wiki/Principle_of_explosion) (from contradiction, anything follows). That is, from the proposition "B and not B," anything whatsoever may be inferred. If we think of generated text as involving inferences from the meaning of the prompt, we might suppose that a prompt meaning both "B" and "not B" should lead the model to generate text with no particular relation to B. That is, we want the following to hold: 188 | 189 | > Pr(gen = x | exp ⊃ B and not B) = Pr(gen = x) 190 | 191 | Based on this, one can readily prove that, given the definition we have chosen for *and*, the predictions for "not B" must have the following form: 192 | 193 | > Pr(gen = x | exp ⊃ not B) ∝ Pr(gen = x) / Pr(gen = x | exp ⊃ B) 194 | 195 | The value of Pr(gen = x) indicates the broader distribution from which B is to be removed. One option is to set it, once again, to a uniform distribution, thus making no assumptions about the range of tokens that may be generated. In practice, however, it typically makes sense to employ a more targeted value. In particular, it is useful to set set Pr(gen = x) to Pr(gen = x | exp ⊃ A) for some other prompt A. Under this assumption, the predictions for "not B" have the following form: 196 | 197 | > Pr(gen = x | exp ⊃ not B) ∝ Pr(gen = x | exp ⊃ A) / Pr(gen = x | exp ⊃ B) 198 | 199 | I have made this calculation available in the program with the / operator. In effect, this operator causes the program to choose words that are more probable with A than with B (hence the name *more than*). It is equivalent to "A and not B" with a uniform distribution for Pr(gen = x). 200 | 201 | The effect of / is only useful in combination with other operators, since by itself "A/B" produces nonsense. The most obvious way of doing this is to construct an expression of the form "A and not B," with the negation alternative set to A. This gives us the following: 202 | 203 | > Pr(gen = x | exp ⊃ A and not B) ∝ Pr(gen = x | exp ⊃ A)^2 / Pr(gen = x | exp ⊃ B) 204 | 205 | This is the calculation used for "A~B"; it is equivalent to "A&{A/B}." Put simply, it generates text using prompt A while asserting that the prompt means "A and not B." I have found that this method is effective not just at discouraging generators from doing certain things, but also at improving their performance when applied to certain tasks. 206 | 207 | There may also be some use in expressions of the form "A&{B/C}," which generates text using A while biasing the output in favor of words that are more probable with B than with C. For instance, one may use "adorable, cute kitty/cat" to inject cuteness into descriptions of any animal: 208 | 209 | > Scientists recently discovered a new species of {snake&{adorable, cute kitty/cat}}. Here is a description of it: 210 | 211 | > This is the most adorable snake I've seen. It's a little guy with an orange belly and white spots on its head. He has two tiny, black spots on his back. He has two little black dots on his sides and his eyes are a bright orange. His mouth is a bit bigger than his body. He is very small. I love him. I'm not sure how to pronounce his name. I'm calling him "Buddy". 212 | 213 | ### Implementation 214 | 215 | The main logic of this system, including the compiler and code for executing programs, appears in `program.py`. The file `generation_utils.py` amends the corresponding file from the Transformers package to invoke this system. The easiest way to get started is to run `generate.py`, whose source can be edited to change the settings. The system works in basically the same way as the regular Transformers generator, but instead of passing a sequence of token ids to `generate()`, you pass in a string containing the Boolean expression. To run this code, you will need to have recent Git master versions of PyTorch and Transformers installed. 216 | 217 | The code in this repo implements Boolean prompting for autoregressive text generators; it can be used with GPT, GPT2, XLNet, XLM, CTRL, and Transformer XL models, either with or without finetuning. The broader technique I describe is not, however, specific to this type of generator. In my project [A Hundred Visions and Revisions](https://github.com/jeffbinder/visions-and-revisions), first published in March 2020, I incorporated a technique similar to the *as opposed to* operator (there called "strong topic bias") into a non-autoregressive text rewriting procedure based on [BERT](https://github.com/google-research/bert)-style masked language models. 218 | 219 | ## Experiments 220 | 221 | I am continuing to develop and experiment with this method, but I do have some preliminary results. 222 | 223 | ### Discouraging Words 224 | 225 | An obvious application of this system is to prevent (or at least discourage) the model from doing certain things. Evaluating success at this goal is difficult, since judgments about the quality and meaning of texts can be debatable. However, it is possible to test the effect in a rough way by checking how often certain, generally undesirable words appear. 226 | 227 | Consider the following prompt: 228 | 229 | > Prompt A. Scientists recently discovered a new species of snake. Here is a description of it: 230 | 231 | This prompt does a fairly good job of producing text that resembles scientific (or pseudo-scientific) descriptions of snake species. Quite often, however, the output contains statements that belie the nature of snakes, such as saying the animal has fur or legs. Such problems could in theory be rectified with a better model. However, it is also possible to make better use of the information that is present in the current model. 232 | 233 | One might think to do this simply by altering the wording of the prompt so as to incorporate the information that, for instance, snakes lack legs: 234 | 235 | > Prompt B. Scientists recently discovered a new species of snake, an animal without legs. Here is a description of it: 236 | 237 | As an experiment, I ran GPT-2 XL a thousand times with each of these prompts and counted how many of the outputs contained one of three words associated with legs. Here are the results, including p-values computed using Barnard's exact test. As the results show, this method does not have the expected effect. 238 | 239 | | Words | Prompt A | Prompt B | p A\ This new snake is the only snake with legs. This is a male, which is the largest snake in its genus (Eunectes). The snake is about 2.5 meters (8 feet) long.The snake is named "Eunector," which is Latin for "snake with legs." 248 | 249 | This is an instance of a [much-discussed](https://direct.mit.edu/tacl/article/doi/10.1162/tacl_a_00298/43535/What-BERT-Is-Not-Lessons-from-a-New-Suite-of) problem language models have in dealing with negation. The problem stems in part from the nature of the predictive task: ordinarily, one wouldn't mention that the animal lacked legs unless legs had some relevance. However, it also stems from a general limitation of GPT-2's ability to understand negative words such as "without." 250 | 251 | The *as opposed to* operator provides an alternative way of specifying negation that produces much better results: 252 | 253 | > Prompt C. Scientists recently discovered a new species of snake{~ with legs}. Here is a description of it: 254 | 255 | The results are as follows: 256 | 257 | | Words | Prompt A | Prompt C | p A\>C | 258 | | --- | --- | --- | --- | 259 | | leg/legs/legged | 112/1000 | 37/1000 | ~9e-11 | 260 | 261 | As the results show, the method does indeed significantly reduce the references to legs in the output. 262 | 263 | This technique is not the only way the *as opposed to* operator may be applied. Another approach is to utilize both sides of the operator, indicating both what the animal is and what it is not. This technique can be used to discourage those irksome references to hair and fur: 264 | 265 | > Prompt D. Scientists recently discovered a new species of {snake~mammal}. Here is a description of it: 266 | 267 | These are the results: 268 | 269 | | Words | Prompt A | Prompt D | p A\>D | 270 | | --- | --- | --- | --- | 271 | | leg/legs/legged | 97/1000 | 99/1000 | N/A | 272 | | fur/furred/furry | 17/1000 | 6/1000 | ~0.01 | 273 | | hair/hairs/haired/hairy | 63/1000 | 42/1000 | ~0.02 | 274 | 275 | As these results show, adding the operator to the prompt decreased the incidence of words referring to the mammalian traits of fur and (to a lesser extent) hair. It also results in a somewhat smaller quantity of legs. 276 | 277 | A potential application of the *as opposed to* operator would be in discouraging the model from generating text with offensive, violent, or otherwise undesirable qualities. As such, this method would be conceptually similar to the ["self-debiasing"](https://arxiv.org/pdf/2103.00453.pdf) method proposed by Schick, Udupa, and Schütze. I would caution, however, that the technique's ability to discourage offensive text is only as good as the model's ability to distinguish offensive from non-offensive, which is to say (at least in the case of GPT-2) not that great. Such efforts also run into political difficulties: people do not all agree on what is offensive, and it is a complex matter to determine exactly what stance on this issue has wound up embedded in the model. Thus, while putting something like "{~Content warning: racism. }" into a prompt might mitigate the problem to some extent, it should not be taken as a solution. 278 | 279 | As a means of controlling text generators, this technique is probably not as effective as techniques like [GeDi](https://github.com/salesforce/GeDi), which uses a classification model to suppress undesired qualities. What is interesting about this use of Boolean prompting is that it enables users to describe what they want to discourage in natural language, which is interpreted using nothing but a pretrained model. 280 | 281 | ### LAMBADA 282 | 283 | The [LAMBADA](https://zenodo.org/record/2630551#.YWb8Iy-cbOQ) benchmark tests the ability of models to account for long-range dependencies in texts. LAMBADA is based on a collection of excerpts from novels, selected such that humans are able to guess the final word, but only when they are given the whole passage. The model is supplied with the passage absent the final word, and must predict what that word is. 284 | 285 | The LAMBADA benchmark is usally scored in two ways, the accuracy of predictions (measured as a percentage) and perplexity, which measures how well the probabilities produced by the model align with a text. Since the present technique alters the way the continuation is generated without altering the model itself, perplexity is not clearly applicable, so I have focused solely on accuracy. 286 | 287 | The creators of GPT-2 [report](https://cdn.openai.com/better-language-models/language_models_are_unsupervised_multitask_learners.pdf) that the largest version of their model attained an accuracy of **63.24**. Accoring to [this discussion](https://github.com/openai/gpt-2/issues/131), this experiment was based on predicting the final token of the text, not the final word. Since GPT-2 sometimes splits words into multiple tokens, this is an easier task than predicting whole words. I evaluated my method with both versions of the task to enable fair comparisons with other results. 288 | 289 | The performance of GPT-3 was tested using several different approaches. The authors advocate a "few-shot" approach, in which the model is primed with several completed examples before being presented with a problem; the [reported accuracy](https://arxiv.org/abs/2005.14165v4) with this approach is **86.4**. This approach is not, however, generally workable with smaller models, so I did not attempt to replicate it here. GPT-3's reported accuracy with the zero-shot approach, which I have employed in this experiment, is **76.2**. 290 | 291 | I tried several formulae for constructing the prompt. Here "context" refers to the passage, excluding the final word or token that is to be predicted. 292 | 293 | 1. context 294 | 2. context~ 295 | 3. context~[...]last word of context 296 | 4. context~[...]last phrase of context 297 | 5. context~[...]last sentence of context 298 | 6. context~[...]{last sentence|last word} 299 | 7. context~[...]{last sentence|last phrase} 300 | 301 | The rationale is to discourage the model from making predictions based solely on the later parts of the prompt while ignoring the earlier ones. Phrases are delineated using the following regex: `[,.:;?!"“”]`. Sentence boundaries are determined using the Punkt sentence tokenizer. 302 | 303 | These are the results for token prediction: 304 | 305 | | Model | # params | Baseline | Blank | Last word | Last phrase | Last sentence | Last sentence or word | Last sentence or phrase | 306 | | --- | --- | --- | --- | --- | --- | --- | --- | --- | 307 | | gpt2 | 117M | 48.03 | 58.63 | 62.37 | 65.61 | 67.71 | 67.81 | 67.73 | 308 | | gpt2-medium | 345M | 56.76 | 61.50 | 70.66 | 72.39 | 74.48 | 74.29 | 74.50 | 309 | | gpt2-large | 774M | 60.95 | 67.86 | 74.13 | 75.65 | 77.45 | 77.47 | 77.35 | 310 | | gpt2-xl | 1558M | 63.98 | 70.17 | 76.38 | 77.47 | 78.83 | 79.22 | 78.96 | 311 | 312 | These are the results for whole-word prediction: 313 | 314 | | Model | # params | Baseline | Blank | Last word | Last phrase | Last sentence | Last sentence or word | Last sentence or phrase | 315 | | --- | --- | --- | --- | --- | --- | --- | --- | --- | 316 | | gpt2 | 117M | 34.10 | 45.33 | 50.73 | 54.98 | 58.16 | 58.30 | 58.10 | 317 | | gpt2-medium | 345M | 44.79 | 47.72 | 62.10 | 63.46 | 66.93 | 66.99 | 67.07 | 318 | | gpt2-large | 774M | 50.05 | 56.61 | 66.02 | 67.79 | 70.97 | 70.79 | 70.68 | 319 | | gpt2-xl | 1558M | 53.87 | 59.13 | 68.60 | 69.90 | 72.39 | 72.87 | 72.54 | 320 | 321 | You can replicate these results using the `lambada_score.py` script. [Update March 2023: The reported experiments were run with an older version of the package; I have since rewritten the generator code, and scores are slightly different with the new version.] Note that the use of the | operator only works when the overlap factor is set to 0. 322 | 323 | It is worth noting that the scores are only improved with the *as opposed to* operator, as implemented using division; the ^ operator based on subtraction does not work in this application. 324 | 325 | To some extent, these techniques work by exploiting the specific nature of the test dataset. LAMBADA was designed for testing a model's ability to find relevant information that occurs in previous sentences. By boosting the effects of the earlier parts of the prompt, the negation operator ensures that the model considers the prompt as a whole. 326 | 327 | From an AI perspective, this approach might be seen as a way of gaming the system. as opposed to improving the model, the program is sifting through the output to find the predictions that best suit the nature of the LAMBADA test. Yet this sifting is only a problem if we insist that the machine be capable of both determining the nature of the task and performing it without human intervention. If we see language models not as [a foundation for general intelligence](https://arxiv.org/pdf/2108.07258.pdf), but rather as a practical means of performing computations, then designing prompt expressions that suit the task at hand is a useful technique. Boolean prompting is an alternative to AI purism, an approach that enables human and machine to work together. 328 | 329 | There is also reason to think that at least some of the improvement stems from something other than the specific nature of the task. For all model sizes except medium, simply adding a ~ operator to the end of the input improved the performance by more than five percentage points. This intervention makes no assumptions about the task at hand; it merely encodes the prior that the prompt contains some information that is relevant to the generative task. As an explanation of the increased performance, I would hypothesize that the model is conflating the overall frequency of a word with its probability in a particular context; the *as opposed to* operator induces the system to focus more on the context. This technique could potentially be applied to any number of tasks. 330 | --------------------------------------------------------------------------------