├── .gitignore ├── README.md ├── approach.png ├── hyde-demo.ipynb ├── hyde-dl19.ipynb ├── setup.py └── src └── hyde ├── __init__.py ├── generator.py ├── hyde.py └── promptor.py /.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__/ 2 | *.egg-info/ 3 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # HyDE: Precise Zero-Shot Dense Retrieval without Relevance Labels 2 | 3 | This is code repository for the paper: [HyDE: Precise Zero-Shot Dense Retrieval without Relevance Labels](https://arxiv.org/abs/2212.10496). 4 | 5 | **HyDE** zero-shot instructs GPT3 to generate a fictional document and re-encodes it with unsupervised retriever Contriever to search in its embedding space. 6 | HyDE significantly outperforms Contriever across tasks and languages and it does not require any human labeled relevance judgement. 7 | 8 | ![approach](approach.png) 9 | 10 | ## Steps to run the code 11 | 12 | 1. Install `pyserini` by following the [guide](https://github.com/castorini/pyserini#-installation). We use pyserini to conduct dense retrieval and evaluation. 13 | 14 | 15 | 2. Download the prebuilt Contrever faiss index 16 | ``` 17 | wget https://www.dropbox.com/s/dytqaqngaupp884/contriever_msmarco_index.tar.gz 18 | tar -xvf contriever_msmarco_index.tar.gz 19 | ``` 20 | 21 | 3. Setup GPT3 API key 22 | 23 | ``` 24 | export OPENAI = 25 | ``` 26 | 27 | 4. Run `hyde-dl19.ipynb`, it will run the experiment on the TREC DL19 dataset. Run `hyde-demo.ipynb`, it will go through HyDE pipeline with an example query. 28 | 29 | 30 | ## Citation 31 | 32 | ``` 33 | @article{hyde, 34 | title = {Precise Zero-Shot Dense Retrieval without Relevance Labels}, 35 | author = {Luyu Gao and Xueguang Ma and Jimmy Lin and Jamie Callan}, 36 | journal={arXiv preprint arXiv:2212.10496}, 37 | year = {2022} 38 | } 39 | ``` -------------------------------------------------------------------------------- /approach.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/texttron/hyde/a2fd8734307612cb0225d71ffbf26e0d225986b8/approach.png -------------------------------------------------------------------------------- /hyde-demo.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "c0a95872", 6 | "metadata": {}, 7 | "source": [ 8 | "# HyDE\n", 9 | "For a given query, HyDE retrieval pipeline contains 4 components:\n", 10 | "1. Promptor: bulid prompt for generator based on specific task.\n", 11 | "2. Generator: generates hypothesis documents using Large Language Model.\n", 12 | "3. Encoder: encode hypothesis documents to HyDE vector.\n", 13 | "4. Searcher: search nearest neighbour for the HyDE vector (dense retrieval)." 14 | ] 15 | }, 16 | { 17 | "cell_type": "markdown", 18 | "id": "a1ee489b", 19 | "metadata": {}, 20 | "source": [ 21 | "### Initialize HyDE components\n", 22 | "We use [pyserini](https://github.com/castorini/pyserini) as the search interface." 23 | ] 24 | }, 25 | { 26 | "cell_type": "code", 27 | "execution_count": 22, 28 | "id": "65c24913", 29 | "metadata": {}, 30 | "outputs": [], 31 | "source": [ 32 | "import json\n", 33 | "from pyserini.search.faiss import FaissSearcher\n", 34 | "from pyserini.search.lucene import LuceneSearcher\n", 35 | "from pyserini.encode import AutoQueryEncoder\n", 36 | "\n", 37 | "from hyde import Promptor, OpenAIGenerator, CohereGenerator, HyDE" 38 | ] 39 | }, 40 | { 41 | "cell_type": "code", 42 | "execution_count": 2, 43 | "id": "dcb0db43", 44 | "metadata": {}, 45 | "outputs": [], 46 | "source": [ 47 | "KEY = '' # replace with your API key, it can be OpenAI api key or Cohere api key\n", 48 | "promptor = Promptor('web search')\n", 49 | "generator = OpenAIGenerator('text-davinci-003', KEY)\n", 50 | "encoder = AutoQueryEncoder(encoder_dir='facebook/contriever', pooling='mean')\n", 51 | "searcher = FaissSearcher('contriever_msmarco_index/', encoder)\n", 52 | "corpus = LuceneSearcher.from_prebuilt_index('msmarco-v1-passage')" 53 | ] 54 | }, 55 | { 56 | "cell_type": "markdown", 57 | "id": "d0fe1eb3", 58 | "metadata": {}, 59 | "source": [ 60 | "### Build a HyDE pipeline" 61 | ] 62 | }, 63 | { 64 | "cell_type": "code", 65 | "execution_count": 27, 66 | "id": "ccacadc5", 67 | "metadata": {}, 68 | "outputs": [], 69 | "source": [ 70 | "hyde = HyDE(promptor, generator, encoder, searcher)" 71 | ] 72 | }, 73 | { 74 | "cell_type": "markdown", 75 | "id": "1ee99483", 76 | "metadata": {}, 77 | "source": [ 78 | "### Load example Query" 79 | ] 80 | }, 81 | { 82 | "cell_type": "code", 83 | "execution_count": 37, 84 | "id": "b8d1a85c", 85 | "metadata": {}, 86 | "outputs": [], 87 | "source": [ 88 | "query = 'how long does it take to remove wisdom tooth'" 89 | ] 90 | }, 91 | { 92 | "cell_type": "markdown", 93 | "id": "4c3be967", 94 | "metadata": {}, 95 | "source": [ 96 | "### Build Zeroshot Prompt" 97 | ] 98 | }, 99 | { 100 | "cell_type": "code", 101 | "execution_count": 38, 102 | "id": "a6e0eecc", 103 | "metadata": {}, 104 | "outputs": [ 105 | { 106 | "name": "stdout", 107 | "output_type": "stream", 108 | "text": [ 109 | "Please write a passage to answer the question.\n", 110 | "Question: how long does it take to remove wisdom tooth\n", 111 | "Passage:\n" 112 | ] 113 | } 114 | ], 115 | "source": [ 116 | "prompt = hyde.prompt(query)\n", 117 | "print(prompt)" 118 | ] 119 | }, 120 | { 121 | "cell_type": "markdown", 122 | "id": "1821d8b9", 123 | "metadata": {}, 124 | "source": [ 125 | "### Generate Hypothesis Documents" 126 | ] 127 | }, 128 | { 129 | "cell_type": "code", 130 | "execution_count": 39, 131 | "id": "4032c2b7", 132 | "metadata": {}, 133 | "outputs": [ 134 | { 135 | "name": "stdout", 136 | "output_type": "stream", 137 | "text": [ 138 | "HyDE Generated Document: 0\n", 139 | "There is no one-size-fits-all answer to this question, as the time it takes to remove a wisdom tooth can vary depending on the individual case. In general, however, the procedure usually takes around 30 minutes to an hour to complete.\n", 140 | "HyDE Generated Document: 1\n", 141 | "It generally takes around 30 to 45 minutes to remove a wisdom tooth. However, the time may vary depending on the position of the tooth and the amount of work required.\n", 142 | "HyDE Generated Document: 2\n", 143 | "It usually takes around 30 to 45 minutes to remove a wisdom tooth. However, the length of time may vary depending on the individual case.\n", 144 | "HyDE Generated Document: 3\n", 145 | "It can take anywhere from a few days to a few weeks to remove a wisdom tooth. The length of time will depend on the individual case and the severity of the tooth.\n", 146 | "HyDE Generated Document: 4\n", 147 | "The length of time it takes to remove a wisdom tooth varies depending on the tooth's position and the amount of bone surrounding it. The procedure can take anywhere from 20 minutes to an hour.\n", 148 | "HyDE Generated Document: 5\n", 149 | "The length of time it takes to remove a wisdom tooth depends on a few factors, such as the position of the tooth and the type of extraction (simple or surgical). A simple extraction is typically quicker, taking about 20 minutes, while a surgical extraction can take up to an hour.\n", 150 | "HyDE Generated Document: 6\n", 151 | "It can take anywhere from a few days to a couple of weeks to recover from having your wisdom teeth removed. The actual procedure itself is usually over within an hour or so, but it can take some time for the numbing medication to wear off and for the swelling to go down.\n", 152 | "HyDE Generated Document: 7\n", 153 | "It generally takes around 20 minutes to remove a wisdom tooth. However, the time may vary depending on the individual case. In some cases, the tooth may be removed in just a few minutes, while in others it may take up to an hour.\n" 154 | ] 155 | } 156 | ], 157 | "source": [ 158 | "hypothesis_documents = hyde.generate(query)\n", 159 | "for i, doc in enumerate(hypothesis_documents):\n", 160 | " print(f'HyDE Generated Document: {i}')\n", 161 | " print(doc.strip())" 162 | ] 163 | }, 164 | { 165 | "cell_type": "markdown", 166 | "id": "022a4cd8", 167 | "metadata": {}, 168 | "source": [ 169 | "### Encode HyDE vector" 170 | ] 171 | }, 172 | { 173 | "cell_type": "code", 174 | "execution_count": 40, 175 | "id": "cdb4e5b9", 176 | "metadata": {}, 177 | "outputs": [ 178 | { 179 | "name": "stdout", 180 | "output_type": "stream", 181 | "text": [ 182 | "(1, 768)\n" 183 | ] 184 | } 185 | ], 186 | "source": [ 187 | "hyde_vector = hyde.encode(query, hypothesis_documents)\n", 188 | "print(hyde_vector.shape)" 189 | ] 190 | }, 191 | { 192 | "cell_type": "markdown", 193 | "id": "fe95165a", 194 | "metadata": {}, 195 | "source": [ 196 | "### Search Relevant Documents" 197 | ] 198 | }, 199 | { 200 | "cell_type": "code", 201 | "execution_count": 41, 202 | "id": "8ae97c0a", 203 | "metadata": {}, 204 | "outputs": [ 205 | { 206 | "name": "stdout", 207 | "output_type": "stream", 208 | "text": [ 209 | "HyDE Retrieved Document: 0\n", 210 | "4174313\n", 211 | "The time it takes to remove the tooth will vary. Some procedures only take a few minutes, whereas others can take 20 minutes or longer. After your wisdom teeth have been removed, you may experience swelling and discomfort, both on the inside and outside of your mouth.This is usually worse for the first three days, but it can last for up to two weeks. Read more about how a wisdom tooth is removed and recovering from wisdom tooth removal.he time it takes to remove the tooth will vary. Some procedures only take a few minutes, whereas others can take 20 minutes or longer. After your wisdom teeth have been removed, you may experience swelling and discomfort, both on the inside and outside of your mouth.\n", 212 | "HyDE Retrieved Document: 1\n", 213 | "18103\n", 214 | "Before having your wisdom teeth removed, you'll be given an injection of local anaesthetic to numb the tooth and surrounding area. If you're particularly anxious about the procedure, your dentist or surgeon may give you a sedative to help you relax. This usually involves an injection into your arm.urgery to remove wisdom teeth shouldn't be painful, because the area will be numb. However, if you feel pain during the procedure, tell your dentist or oral surgeon so they can give you more anaesthetic. How long it takes to remove the tooth will vary.\n", 215 | "HyDE Retrieved Document: 2\n", 216 | "91493\n", 217 | "The time it takes to remove the tooth will vary. Some procedures only take a few minutes, whereas others can take 20 minutes or longer. After your wisdom teeth have been removed, you may experience swelling and discomfort, both on the inside and outside of your mouth. This is usually worse for the first three days, but it can last for up to two weeks. Read more about how a wisdom tooth is removed and recovering from wisdom tooth removal.\n", 218 | "HyDE Retrieved Document: 3\n", 219 | "4155912\n", 220 | "How long does it take to remove all wisdom teeth? I got my wisdom teeth removed 5 days ago. I received intravenous anesthesia, so I was not concious during the process, but those present said it only took about 35 to 40… minutes for removal.\n", 221 | "HyDE Retrieved Document: 4\n", 222 | "7344529\n", 223 | "Complications like infection can lengthen the time it takes to heal up, but here is a general timeline: 1 Swelling and pain will be the greatest during the first 3 days (peaking at about 48hours). 2 Normally, the sockets should take about 2 weeks to 1 month to cover over with solid gum tissue after scabbing first.\n", 224 | "HyDE Retrieved Document: 5\n", 225 | "4174308\n", 226 | "How wisdom teeth are removed. Your dentist may remove your wisdom teeth or they may refer you to a specialist surgeon for hospital treatment. Before the procedure, you'll usually be given a local anaesthetic injection to numb the area around the tooth.he time it takes to remove the tooth will vary. Some procedures only take a few minutes, whereas others can take 20 minutes or longer. After your wisdom teeth have been removed, you may experience swelling and discomfort, both on the inside and outside of your mouth.\n", 227 | "HyDE Retrieved Document: 6\n", 228 | "4174310\n", 229 | "If your dentist thinks you may need your wisdom teeth removed, they'll usually carry out an X-ray of your mouth. This gives them a clearer view of the position of your teeth. As with any teeth problems, it's important to see your dentist as soon as possible, rather than waiting for your regular dental check-up.he time it takes to remove the tooth will vary. Some procedures only take a few minutes, whereas others can take 20 minutes or longer. After your wisdom teeth have been removed, you may experience swelling and discomfort, both on the inside and outside of your mouth.\n", 230 | "HyDE Retrieved Document: 7\n", 231 | "4595794\n", 232 | "If you have a dentist remove your wisdom teeth, i can take between 20 minutes and three hours per tooth. If you get a surgeon to remove your wisdom teeth, it takes between 30 seconds and five minutes per tooth. To find out more on wisdom teeth surgery and the approximate times on surgery as well as post-operative instructions, go to http://dentalimplantsaustralia.com/. Source(s): http://dentalimplantsaustralia.com/.\n", 233 | "HyDE Retrieved Document: 8\n", 234 | "4159345\n", 235 | "How long does it take to heal after your wisdom teeth are removed. As with any type of surgery, everyone heals differently, but it usually takes between 10 days to 2 weeks for wisdom teeth removal.\n", 236 | "HyDE Retrieved Document: 9\n", 237 | "2940180\n", 238 | "You shouldn't even brush your teeth for the first day of recovery. According to the offices of practicing oral surgeon Dr. Joseph Arzadon of Arlington, Virginia, typical wisdom teeth recovery time is three to four days, although it can be as long as one week. The length of recovery depends a lot on how badly the wisdom teeth were impacted and how they were erupting.\n" 239 | ] 240 | } 241 | ], 242 | "source": [ 243 | "hits = hyde.search(hyde_vector, k=10)\n", 244 | "for i, hit in enumerate(hits):\n", 245 | " print(f'HyDE Retrieved Document: {i}')\n", 246 | " print(hit.docid)\n", 247 | " print(json.loads(corpus.doc(hit.docid).raw())['contents'])" 248 | ] 249 | }, 250 | { 251 | "cell_type": "markdown", 252 | "id": "a2edb68c", 253 | "metadata": {}, 254 | "source": [ 255 | "### End to End Search\n", 256 | "\n", 257 | "e2e search will directly go through all the steps descripted above." 258 | ] 259 | }, 260 | { 261 | "cell_type": "code", 262 | "execution_count": 42, 263 | "id": "47e9f353", 264 | "metadata": {}, 265 | "outputs": [ 266 | { 267 | "name": "stdout", 268 | "output_type": "stream", 269 | "text": [ 270 | "HyDE Retrieved Document: 0\n", 271 | "4174313\n", 272 | "The time it takes to remove the tooth will vary. Some procedures only take a few minutes, whereas others can take 20 minutes or longer. After your wisdom teeth have been removed, you may experience swelling and discomfort, both on the inside and outside of your mouth.This is usually worse for the first three days, but it can last for up to two weeks. Read more about how a wisdom tooth is removed and recovering from wisdom tooth removal.he time it takes to remove the tooth will vary. Some procedures only take a few minutes, whereas others can take 20 minutes or longer. After your wisdom teeth have been removed, you may experience swelling and discomfort, both on the inside and outside of your mouth.\n", 273 | "HyDE Retrieved Document: 1\n", 274 | "91493\n", 275 | "The time it takes to remove the tooth will vary. Some procedures only take a few minutes, whereas others can take 20 minutes or longer. After your wisdom teeth have been removed, you may experience swelling and discomfort, both on the inside and outside of your mouth. This is usually worse for the first three days, but it can last for up to two weeks. Read more about how a wisdom tooth is removed and recovering from wisdom tooth removal.\n", 276 | "HyDE Retrieved Document: 2\n", 277 | "4155912\n", 278 | "How long does it take to remove all wisdom teeth? I got my wisdom teeth removed 5 days ago. I received intravenous anesthesia, so I was not concious during the process, but those present said it only took about 35 to 40… minutes for removal.\n", 279 | "HyDE Retrieved Document: 3\n", 280 | "18103\n", 281 | "Before having your wisdom teeth removed, you'll be given an injection of local anaesthetic to numb the tooth and surrounding area. If you're particularly anxious about the procedure, your dentist or surgeon may give you a sedative to help you relax. This usually involves an injection into your arm.urgery to remove wisdom teeth shouldn't be painful, because the area will be numb. However, if you feel pain during the procedure, tell your dentist or oral surgeon so they can give you more anaesthetic. How long it takes to remove the tooth will vary.\n", 282 | "HyDE Retrieved Document: 4\n", 283 | "7344529\n", 284 | "Complications like infection can lengthen the time it takes to heal up, but here is a general timeline: 1 Swelling and pain will be the greatest during the first 3 days (peaking at about 48hours). 2 Normally, the sockets should take about 2 weeks to 1 month to cover over with solid gum tissue after scabbing first.\n", 285 | "HyDE Retrieved Document: 5\n", 286 | "3654735\n", 287 | "All surgery is associated with some degree of pain, from mild to severe. If your surgery was more extensive, then you will have more post-operative pain, unfortunately. Typically the pain intensity peaks 6 to 10 hours after the procedures.\n", 288 | "HyDE Retrieved Document: 6\n", 289 | "4174308\n", 290 | "How wisdom teeth are removed. Your dentist may remove your wisdom teeth or they may refer you to a specialist surgeon for hospital treatment. Before the procedure, you'll usually be given a local anaesthetic injection to numb the area around the tooth.he time it takes to remove the tooth will vary. Some procedures only take a few minutes, whereas others can take 20 minutes or longer. After your wisdom teeth have been removed, you may experience swelling and discomfort, both on the inside and outside of your mouth.\n", 291 | "HyDE Retrieved Document: 7\n", 292 | "4159345\n", 293 | "How long does it take to heal after your wisdom teeth are removed. As with any type of surgery, everyone heals differently, but it usually takes between 10 days to 2 weeks for wisdom teeth removal.\n", 294 | "HyDE Retrieved Document: 8\n", 295 | "2940180\n", 296 | "You shouldn't even brush your teeth for the first day of recovery. According to the offices of practicing oral surgeon Dr. Joseph Arzadon of Arlington, Virginia, typical wisdom teeth recovery time is three to four days, although it can be as long as one week. The length of recovery depends a lot on how badly the wisdom teeth were impacted and how they were erupting.\n", 297 | "HyDE Retrieved Document: 9\n", 298 | "4595794\n", 299 | "If you have a dentist remove your wisdom teeth, i can take between 20 minutes and three hours per tooth. If you get a surgeon to remove your wisdom teeth, it takes between 30 seconds and five minutes per tooth. To find out more on wisdom teeth surgery and the approximate times on surgery as well as post-operative instructions, go to http://dentalimplantsaustralia.com/. Source(s): http://dentalimplantsaustralia.com/.\n" 300 | ] 301 | } 302 | ], 303 | "source": [ 304 | "hits = hyde.e2e_search(query, k=10)\n", 305 | "for i, hit in enumerate(hits):\n", 306 | " print(f'HyDE Retrieved Document: {i}')\n", 307 | " print(hit.docid)\n", 308 | " print(json.loads(corpus.doc(hit.docid).raw())['contents'])" 309 | ] 310 | }, 311 | { 312 | "cell_type": "code", 313 | "execution_count": null, 314 | "id": "578d2ca9", 315 | "metadata": {}, 316 | "outputs": [], 317 | "source": [] 318 | } 319 | ], 320 | "metadata": { 321 | "kernelspec": { 322 | "display_name": "Python 3.8.13 ('binder')", 323 | "language": "python", 324 | "name": "python3" 325 | }, 326 | "language_info": { 327 | "codemirror_mode": { 328 | "name": "ipython", 329 | "version": 3 330 | }, 331 | "file_extension": ".py", 332 | "mimetype": "text/x-python", 333 | "name": "python", 334 | "nbconvert_exporter": "python", 335 | "pygments_lexer": "ipython3", 336 | "version": "3.8.13" 337 | }, 338 | "vscode": { 339 | "interpreter": { 340 | "hash": "69dc8d5aafec8ae3fa1fc52986190fb7b1bcb1c4684e5d3b6aa96016ecada253" 341 | } 342 | } 343 | }, 344 | "nbformat": 4, 345 | "nbformat_minor": 5 346 | } 347 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup, find_packages 2 | 3 | 4 | setup( 5 | name='hyde', 6 | version='0.0.1', 7 | packages=find_packages('src'), 8 | package_dir={'': 'src'}, 9 | install_requires=[ 10 | 'cohere', 11 | 'openai', 12 | 'pyserini', 13 | 'faiss-cpu', 14 | 'transformers' 15 | ], 16 | ) -------------------------------------------------------------------------------- /src/hyde/__init__.py: -------------------------------------------------------------------------------- 1 | from .generator import OpenAIGenerator, CohereGenerator 2 | from .promptor import Promptor 3 | from .hyde import HyDE -------------------------------------------------------------------------------- /src/hyde/generator.py: -------------------------------------------------------------------------------- 1 | import time 2 | import openai 3 | import cohere 4 | 5 | class Generator: 6 | def __init__(self, model_name, api_key): 7 | self.model_name = model_name 8 | self.api_key = api_key 9 | 10 | def generate(self): 11 | return "" 12 | 13 | 14 | class OpenAIGenerator(Generator): 15 | def __init__(self, model_name, api_key, base_url=None, n=8, max_tokens=512, temperature=0.7, top_p=1, frequency_penalty=0.0, presence_penalty=0.0, stop=['\n\n\n'], wait_till_success=False): 16 | super().__init__(model_name, api_key) 17 | self.n = n 18 | self.max_tokens = max_tokens 19 | self.temperature = temperature 20 | self.top_p = top_p 21 | self.frequency_penalty = frequency_penalty 22 | self.presence_penalty = presence_penalty 23 | self.stop = stop 24 | self.wait_till_success = wait_till_success 25 | self._client_init() 26 | self.base_url = base_url 27 | 28 | @staticmethod 29 | def parse_response(response): 30 | to_return = [] 31 | for _, g in enumerate(response['choices']): 32 | text = g['text'] 33 | logprob = sum(g['logprobs']['token_logprobs']) 34 | to_return.append((text, logprob)) 35 | texts = [r[0] for r in sorted(to_return, key=lambda tup: tup[1], reverse=True)] 36 | return texts 37 | 38 | def _client_init(self): 39 | self.client = openai.OpenAI( 40 | base_url=self.base_url, 41 | api_key=self.api_key, 42 | ) 43 | self.client 44 | 45 | def generate(self, prompt): 46 | get_results = False 47 | while not get_results: 48 | try: 49 | result = self.client.chat.completions.create( 50 | messages=[{"role":"user", "content": prompt}] 51 | model=self.model_name, 52 | max_completion_tokens=self.max_tokens, 53 | temperature=self.temperature, 54 | frequency_penalty=self.frequency_penalty, 55 | presence_penalty=self.presence_penalty, 56 | top_p=self.top_p, 57 | n=self.n, # some models only support n=1 58 | stop=self.stop, 59 | logprobs=1 # some models are not compatible with this setting 60 | ) 61 | get_results = True 62 | except Exception as e: 63 | if self.wait_till_success: 64 | time.sleep(1) 65 | else: 66 | raise e 67 | return self.parse_response(result) 68 | 69 | 70 | class CohereGenerator(Generator): 71 | def __init__(self, model_name, api_key, n=8, max_tokens=512, temperature=0.7, p=1, frequency_penalty=0.0, presence_penalty=0.0, stop=['\n\n\n'], wait_till_success=False): 72 | super().__init__(model_name, api_key) 73 | self.cohere = cohere.Cohere(self.api_key) 74 | self.n = n 75 | self.max_tokens = max_tokens 76 | self.temperature = temperature 77 | self.p = p 78 | self.frequency_penalty = frequency_penalty 79 | self.presence_penalty = presence_penalty 80 | self.stop = stop 81 | self.wait_till_success = wait_till_success 82 | 83 | 84 | @staticmethod 85 | def parse_response(response): 86 | text = response.generations[0].text 87 | return text 88 | 89 | def generate(self, prompt): 90 | texts = [] 91 | for _ in range(self.n): 92 | get_result = False 93 | while not get_result: 94 | try: 95 | result = self.cohere.generate( 96 | prompt=prompt, 97 | model=self.model_name, 98 | max_tokens=self.max_tokens, 99 | temperature=self.temperature, 100 | frequency_penalty=self.frequency_penalty, 101 | presence_penalty=self.presence_penalty, 102 | p=self.p, 103 | k=0, 104 | stop=self.stop, 105 | ) 106 | get_result = True 107 | except Exception as e: 108 | if self.wait_till_success: 109 | time.sleep(1) 110 | else: 111 | raise e 112 | text = self.parse_response(result) 113 | texts.append(text) 114 | return texts 115 | -------------------------------------------------------------------------------- /src/hyde/hyde.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | class HyDE: 5 | def __init__(self, promptor, generator, encoder, searcher): 6 | self.promptor = promptor 7 | self.generator = generator 8 | self.encoder = encoder 9 | self.searcher = searcher 10 | 11 | def prompt(self, query): 12 | return self.promptor.build_prompt(query) 13 | 14 | def generate(self, query): 15 | prompt = self.promptor.build_prompt(query) 16 | hypothesis_documents = self.generator.generate(prompt) 17 | return hypothesis_documents 18 | 19 | def encode(self, query, hypothesis_documents): 20 | all_emb_c = [] 21 | for c in [query] + hypothesis_documents: 22 | c_emb = self.encoder.encode(c) 23 | all_emb_c.append(np.array(c_emb)) 24 | all_emb_c = np.array(all_emb_c) 25 | avg_emb_c = np.mean(all_emb_c, axis=0) 26 | hyde_vector = avg_emb_c.reshape((1, len(avg_emb_c))) 27 | return hyde_vector 28 | 29 | def search(self, hyde_vector, k=10): 30 | hits = self.searcher.search(hyde_vector, k=k) 31 | return hits 32 | 33 | 34 | def e2e_search(self, query, k=10): 35 | prompt = self.promptor.build_prompt(query) 36 | hypothesis_documents = self.generator.generate(prompt) 37 | hyde_vector = self.encode(query, hypothesis_documents) 38 | hits = self.searcher.search(hyde_vector, k=k) 39 | return hits -------------------------------------------------------------------------------- /src/hyde/promptor.py: -------------------------------------------------------------------------------- 1 | WEB_SEARCH = """Please write a passage to answer the question. 2 | Question: {} 3 | Passage:""" 4 | 5 | 6 | SCIFACT = """Please write a scientific paper passage to support/refute the claim. 7 | Claim: {} 8 | Passage:""" 9 | 10 | 11 | ARGUANA = """Please write a counter argument for the passage. 12 | Passage: {} 13 | Counter Argument:""" 14 | 15 | 16 | TREC_COVID = """Please write a scientific paper passage to answer the question. 17 | Question: {} 18 | Passage:""" 19 | 20 | 21 | FIQA = """Please write a financial article passage to answer the question. 22 | Question: {} 23 | Passage:""" 24 | 25 | 26 | DBPEDIA_ENTITY = """Please write a passage to answer the question. 27 | Question: {} 28 | Passage:""" 29 | 30 | 31 | TREC_NEWS = """Please write a news passage about the topic. 32 | Topic: {} 33 | Passage:""" 34 | 35 | 36 | MR_TYDI = """Please write a passage in {} to answer the question in detail. 37 | Question: {} 38 | Passage:""" 39 | 40 | 41 | class Promptor: 42 | def __init__(self, task: str, language: str = 'en'): 43 | self.task = task 44 | self.language = language 45 | 46 | def build_prompt(self, query: str): 47 | if self.task == 'web search': 48 | return WEB_SEARCH.format(query) 49 | elif self.task == 'scifact': 50 | return SCIFACT.format(query) 51 | elif self.task == 'arguana': 52 | return ARGUANA.format(query) 53 | elif self.task == 'trec-covid': 54 | return TREC_COVID.format(query) 55 | elif self.task == 'fiqa': 56 | return FIQA.format(query) 57 | elif self.task == 'dbpedia-entity': 58 | return DBPEDIA_ENTITY.format(query) 59 | elif self.task == 'trec-news': 60 | return TREC_NEWS.format(query) 61 | elif self.task == 'mr-tydi': 62 | return MR_TYDI.format(self.language, query) 63 | else: 64 | raise ValueError('Task not supported') 65 | --------------------------------------------------------------------------------