├── .env.template ├── .gitignore ├── README.md ├── evaluation.py ├── indatasets └── webq │ ├── webq-dev.jsonl │ ├── webq-test.jsonl │ └── webq-train.jsonl ├── inference.py ├── inprompts ├── mult_doc.jsonl └── single_doc.jsonl ├── mainfunc.py └── requirements.txt /.env.template: -------------------------------------------------------------------------------- 1 | OPENAI_API_KEY=sk_... 2 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__/ 2 | .env 3 | logs/ -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## LLM-URL: Large Language Models are Built-in Autoregressive Search Engines 2 | 3 | ### Introduction & Setup 4 | 5 | This repository contains the code for the paper [Large Language Models are Built-in Autoregressive Search Engines](https://arxiv.org/abs/2305.09612)(Accepted to ACL 2023). 6 | 7 | - To get started, clone this repository and install the requirements: 8 | 9 | ```bash 10 | git clone https://github.com/Ziems/llm-url 11 | cd llm-url 12 | pip install -r requirements.txt 13 | ``` 14 | 15 | - Rename `.env.template` to `.env` then add your openai api key 16 | 17 | - Download the NQ and TriviaQA (tqa) datasets from Google drive: (we unified the formats) [\[link\]](https://drive.google.com/drive/folders/1lFFTklW_0HuR53hLpFdLClgfSAhXn_2f?usp=sharing) then put these directories in the `indatasets` directory along with WebQ which is already there. 18 | 19 | - Run retrieval (step1) on the dataset 20 | 21 | ```bash 22 | python3 mainfunc.py 23 | --dataset {dataset} 24 | --task step1 25 | ``` 26 | 27 | - Run answer generation (step2): 28 | 29 | ```bash 30 | python3 mainfunc.py 31 | --dataset {dataset} 32 | --task step2 33 | ``` 34 | 35 | ## Citation 36 | ``` 37 | @inproceedings{ziems-2023-large, 38 | title = "Large Language Models are Built-in Autoregressive Search Engines", 39 | author = "Ziems, Noah and 40 | Yu, Wenhao and 41 | Zhang, Zhihan and 42 | Jiang, Meng", 43 | booktitle = "Findings of the Association for Computational Linguistics: ACL 2023", 44 | year = "2023" 45 | } 46 | ``` 47 | Please feel free to cite our paper if you find this repository helpful in your research. 48 | -------------------------------------------------------------------------------- /evaluation.py: -------------------------------------------------------------------------------- 1 | import regex 2 | import json 3 | import string 4 | import unicodedata 5 | from typing import List 6 | import numpy as np 7 | from collections import Counter 8 | from rouge import Rouge 9 | 10 | 11 | class SimpleTokenizer(object): 12 | ALPHA_NUM = r'[\p{L}\p{N}\p{M}]+' 13 | NON_WS = r'[^\p{Z}\p{C}]' 14 | 15 | def __init__(self): 16 | """ 17 | Args: 18 | annotators: None or empty set (only tokenizes). 19 | """ 20 | self._regexp = regex.compile( 21 | '(%s)|(%s)' % (self.ALPHA_NUM, self.NON_WS), 22 | flags=regex.IGNORECASE + regex.UNICODE + regex.MULTILINE 23 | ) 24 | 25 | def tokenize(self, text, uncased=False): 26 | matches = [m for m in self._regexp.finditer(text)] 27 | if uncased: 28 | tokens = [m.group().lower() for m in matches] 29 | else: 30 | tokens = [m.group() for m in matches] 31 | return tokens 32 | 33 | 34 | def check_answer(example, tokenizer) -> List[bool]: 35 | """Search through all the top docs to see if they have any of the answers.""" 36 | answers = example['answers'] 37 | ctxs = example['ctxs'] 38 | 39 | hits = [] 40 | 41 | for _, doc in enumerate(ctxs): 42 | text = doc['text'] 43 | 44 | if text is None: # cannot find the document for some reason 45 | hits.append(False) 46 | continue 47 | 48 | hits.append(has_answer(answers, text, tokenizer)) 49 | 50 | return hits 51 | 52 | 53 | def has_answer(answers, text, tokenizer=SimpleTokenizer()) -> bool: 54 | """Check if a document contains an answer string.""" 55 | text = _normalize(text) 56 | text = tokenizer.tokenize(text, uncased=True) 57 | 58 | for answer in answers: 59 | answer = _normalize(answer) 60 | answer = tokenizer.tokenize(answer, uncased=True) 61 | for i in range(0, len(text) - len(answer) + 1): 62 | if answer == text[i: i + len(answer)]: 63 | return True 64 | return False 65 | 66 | 67 | def _normalize(text): 68 | return unicodedata.normalize('NFD', text) 69 | 70 | 71 | def normalize_answer(s): 72 | def remove_articles(text): 73 | return regex.sub(r'\b(a|an|the)\b', ' ', text) 74 | 75 | def white_space_fix(text): 76 | return ' '.join(text.split()) 77 | 78 | def remove_punc(text): 79 | exclude = set(string.punctuation) 80 | return ''.join(ch for ch in text if ch not in exclude) 81 | 82 | def lower(text): 83 | return text.lower() 84 | 85 | return white_space_fix(remove_articles(remove_punc(lower(s)))) 86 | 87 | 88 | def exact_match_score(prediction, ground_truth): 89 | return normalize_answer(prediction) == normalize_answer(ground_truth) 90 | 91 | 92 | def ems(prediction, ground_truths): 93 | return max([exact_match_score(prediction, gt) for gt in ground_truths]) 94 | 95 | 96 | def f1_score(prediction, ground_truth): 97 | prediction_tokens = normalize_answer(prediction).split() 98 | ground_truth_tokens = normalize_answer(ground_truth).split() 99 | common = Counter(prediction_tokens) & Counter(ground_truth_tokens) 100 | num_same = sum(common.values()) 101 | if num_same == 0: 102 | return 0 103 | precision = 1.0 * num_same / len(prediction_tokens) 104 | recall = 1.0 * num_same / len(ground_truth_tokens) 105 | f1 = (2 * precision * recall) / (precision + recall) 106 | return f1 107 | 108 | 109 | def f1(prediction, ground_truths): 110 | return max([f1_score(prediction, gt) for gt in ground_truths]) 111 | 112 | 113 | def rougel_score(prediction, ground_truth): 114 | rouge = Rouge() 115 | # no normalization 116 | try: 117 | scores = rouge.get_scores(prediction, ground_truth, avg=True) 118 | except ValueError: # "Hypothesis is empty." 119 | return 0.0 120 | return scores["rouge-l"]["f"] 121 | 122 | 123 | def rl(prediction, ground_truths): 124 | return max([rougel_score(prediction, gt) for gt in ground_truths]) 125 | 126 | 127 | ## file-level evaluation ... ### 128 | def eval_recall(infile): 129 | 130 | tokenizer = SimpleTokenizer() 131 | lines = open(infile, 'r').readlines()[1:] 132 | 133 | has_answer_count = 0 134 | answer_lengths = [] 135 | for line in lines: 136 | line = json.loads(line) 137 | answer = line['answer'] 138 | output = ' || '.join(line['output']) 139 | 140 | if has_answer(answer, output, tokenizer): 141 | has_answer_count += 1 142 | 143 | answer_lengths.append(len(output.split())) 144 | 145 | recall = round(has_answer_count/len(lines), 4) 146 | lens = round(np.mean(answer_lengths), 4) 147 | 148 | return recall, lens 149 | 150 | 151 | def eval_question_answering(infile): 152 | 153 | lines = open(infile, 'r').readlines()[1:] 154 | 155 | exact_match_count = 0 156 | answer_lengths = [] 157 | for line in lines: 158 | line = json.loads(line) 159 | answer = line['answer'] 160 | output = line['output'][0] 161 | 162 | if ems(output, answer): # EM evaluation 163 | exact_match_count += 1 164 | 165 | answer_lengths.append(len(output.split())) 166 | 167 | em = round(exact_match_count/len(lines), 4) 168 | lens = round(np.mean(answer_lengths), 4) 169 | 170 | return em, lens 171 | 172 | 173 | def eval_fact_checking(infile): 174 | 175 | tokenizer = SimpleTokenizer() 176 | lines = open(infile, 'r').readlines()[1:] 177 | 178 | exact_match_count = 0 179 | answer_lengths = [] 180 | for line in lines: 181 | line = json.loads(line) 182 | answer = line['answer'] 183 | output = line['output'][0] 184 | 185 | if answer == ["refutes"]: 186 | answer = ["refutes", "no", "false"] 187 | if answer == ["supports"]: 188 | answer = ["supports", "yes", "true"] 189 | 190 | if has_answer(answer, output, tokenizer): 191 | exact_match_count += 1 192 | 193 | answer_lengths.append(len(output.split())) 194 | 195 | em = round(exact_match_count/len(lines), 4) 196 | lens = round(np.mean(answer_lengths), 4) 197 | 198 | return em, lens 199 | 200 | 201 | def eval_dialogue_system(infile): 202 | 203 | lines = open(infile, 'r').readlines()[1:] 204 | 205 | f1_scores = [] 206 | rl_scores = [] 207 | answer_lengths = [] 208 | for line in lines: 209 | line = json.loads(line) 210 | answer = line['answer'] 211 | output = line['output'][0] 212 | 213 | f1_scores.append(f1(output, answer)) 214 | rl_scores.append(rl(output, answer)) 215 | answer_lengths.append(len(output.split())) 216 | 217 | F1 = round(np.mean(f1_scores), 4) 218 | RL = round(np.mean(rl_scores), 4) 219 | lens = round(np.mean(answer_lengths), 4) 220 | 221 | return F1, RL, lens 222 | 223 | -------------------------------------------------------------------------------- /indatasets/webq/webq-dev.jsonl: -------------------------------------------------------------------------------- 1 | {"id": "2738", "question": "what style of art did henri matisse use?", "answer": ["Fauvism", "Impressionism", "Neo-impressionism", "Modernism"]} 2 | {"id": "1048", "question": "what to do in panama city beach fl?", "answer": ["Thunder Beach Motorcycle Rally", "Club La Vela", "Man in the Sea Museum", "Camp Helen State Park", "Pier Park", "Shipwreck Island", "Latimer Cabin", "St. Andrews State Park", "Miracle Strip at Pier Park"]} 3 | {"id": "3320", "question": "who was the owner of kfc?", "answer": ["Colonel Sanders"]} 4 | {"id": "681", "question": "where did chancellorsville battle take place?", "answer": ["Spotsylvania County"]} 5 | {"id": "2825", "question": "what the currency in argentina?", "answer": ["Argentine peso"]} 6 | {"id": "534", "question": "who does michael vick play for?", "answer": ["Philadelphia Eagles"]} 7 | {"id": "1853", "question": "who did troy aikman play for?", "answer": ["Dallas Cowboys"]} 8 | {"id": "3170", "question": "what does obama have a degree in?", "answer": ["Political Science"]} 9 | {"id": "1518", "question": "what did william howe do in the revolutionary war?", "answer": ["Battle of Bunker Hill"]} 10 | {"id": "2368", "question": "what to do around krabi thailand?", "answer": ["Tup Island", "Ao Phra Nang", "Ko Lanta Yai", "Nosey Parker's Elephant Camp", "Ao Tha Len Bay", "Phra Nang Beach", "Hat Noppharat Beach", "Tiger Cave Temple", "Khao Phanom Bencha National Park", "Railay Beach"]} 11 | {"id": "915", "question": "who is larry ellison oracle?", "answer": ["Entrepreneur", "Businessperson", "Investor"]} 12 | {"id": "3255", "question": "where is pyramids located in egypt?", "answer": ["The Great Pyramid of Giza"]} 13 | {"id": "111", "question": "what is the local language of israel?", "answer": ["Hebrew Language"]} 14 | {"id": "2289", "question": "what place did thomas jefferson died?", "answer": ["Charlottesville"]} 15 | {"id": "1268", "question": "where is sony headquarters?", "answer": ["Tokyo"]} 16 | {"id": "3699", "question": "who plays riley finn on buffy the vampire slayer?", "answer": ["Marc Blucas"]} 17 | {"id": "2354", "question": "what years have the ravens won the super bowl?", "answer": ["Super Bowl XXXV", "Super Bowl XLVII"]} 18 | {"id": "2873", "question": "where does hector camacho live?", "answer": ["United States of America"]} 19 | {"id": "3004", "question": "who did the voice of kitt in knight rider?", "answer": ["William Daniels"]} 20 | {"id": "2115", "question": "what instrument did louis armstrong like to play?", "answer": ["trumpet", "Cornet"]} 21 | {"id": "162", "question": "what instrument does justin bieber?", "answer": ["guitar", "Piano", "trumpet", "Drums"]} 22 | {"id": "3560", "question": "who wrote 2 timothy 4?", "answer": ["Eastern Orthodox Church", "Anglicanism", "Eastern Catholic Churches", "Anglican Communion", "Catholicism", "Lutheranism", "Oriental Orthodoxy"]} 23 | {"id": "3207", "question": "what radio station does uga football come on?", "answer": ["Georgia Bulldogs football"]} 24 | {"id": "1077", "question": "what religions are practiced in afghanistan?", "answer": ["Shia Islam", "Sunni Islam"]} 25 | {"id": "1327", "question": "what happened to the battle of shiloh?", "answer": ["Surprise Attack!"]} 26 | {"id": "2099", "question": "who was philip randolph?", "answer": ["Trade unionist", "Social activist"]} 27 | {"id": "1105", "question": "what is the battle of antietam?", "answer": ["Maryland Campaign", "American Civil War"]} 28 | {"id": "2530", "question": "what is isaac newton famous for?", "answer": ["Physicist"]} 29 | {"id": "1021", "question": "what to see in washington dc in 2 days?", "answer": ["White House", "The Phillips Collection", "United States Capitol", "Thomas Jefferson Memorial", "National Museum of the American Indian", "International Spy Museum", "National Portrait Gallery", "Washington Monument", "Lincoln Memorial", "Freer Gallery of Art"]} 30 | {"id": "1302", "question": "what is zip code for chicago illinois?", "answer": ["60605", "60604", "60607", "60606", "60601", "60610", "60603", "60602", "60290", "60608"]} 31 | {"id": "3633", "question": "who is keyshia cole married too?", "answer": ["Daniel Gibson"]} 32 | {"id": "2094", "question": "what city did elvis presley grew up in?", "answer": ["Tupelo"]} 33 | {"id": "2405", "question": "what is colorado's state flower?", "answer": ["Aquilegia saximontana"]} 34 | {"id": "959", "question": "who is the current leader of china today?", "answer": ["Xi Jinping"]} 35 | {"id": "1275", "question": "what shows are on in london at the moment?", "answer": ["World Organization of the Scout Movement", "2008 Raindance Film Festival", "Grunwick dispute", "2003 London blackout", "1st Computer Olympiad", "Ocean transport and erection of Cleopatra\u2019s Needle, (from Egypt to) London, 1878", "Early fires of London", "Fathers 4 Justice protests", "2nd Computer Olympiad", "Live Earth"]} 36 | {"id": "942", "question": "where to travel around sydney?", "answer": ["The Rocks", "Rose Seidler House", "Bondi Beach", "Sydney Opera House", "Luna Park Sydney", "Wild Life Sydney", "Darling Harbour", "Sydney Harbour Bridge", "St Mary's Cathedral, Sydney", "Powerhouse Museum"]} 37 | {"id": "3005", "question": "what challenges did helen keller have to face?", "answer": ["Deafness", "Vision loss"]} 38 | {"id": "759", "question": "what language do jewish speak?", "answer": ["Yiddish Language", "Judeo-Arabic languages", "Karaim Language", "Hulaul\u00e1 Language", "Hebrew Language", "Catalanic", "Judeo-Portuguese", "Judeo-Tat", "Judeo-Italian Language", "Jud\u00e6o-Persian dialects"]} 39 | {"id": "1999", "question": "what languages do people speak in switzerland?", "answer": ["Romansh language", "French Language", "German Language", "Italian Language"]} 40 | {"id": "2871", "question": "what is there to see near the grand canyon?", "answer": ["Phoenix", "Grand Canyon National Park", "Lake Powell"]} 41 | {"id": "1232", "question": "where was the roman empire centered?", "answer": ["Rome"]} 42 | {"id": "1635", "question": "what teams did gretzky play on?", "answer": ["Edmonton Oilers"]} 43 | {"id": "3027", "question": "who did jerry rice retire with?", "answer": ["San Francisco 49ers"]} 44 | {"id": "1947", "question": "what timezone is new mexico currently in?", "answer": ["Mountain Time Zone", "UTC\u221207:00"]} 45 | {"id": "3236", "question": "what school did martin luther king jr attend?", "answer": ["Boston University"]} 46 | {"id": "85", "question": "what was woodrow wilson's major accomplishments?", "answer": ["United States Secretary of Agriculture", "Governor of New Jersey", "President of the United States"]} 47 | {"id": "976", "question": "what was wrong with joan crawford?", "answer": ["Myocardial infarction", "Pancreatic cancer"]} 48 | {"id": "2462", "question": "who does the united states export the most to?", "answer": ["Canada"]} 49 | {"id": "3039", "question": "what language does egyptians use?", "answer": ["Languages of Egypt", "Egyptian Arabic", "Coptic Language", "Egyptian language", "Sa'idi Arabic"]} 50 | {"id": "1356", "question": "what major airport is near destin florida?", "answer": ["Northwest Florida Regional Airport", "Destin\u2013Fort Walton Beach Airport"]} 51 | {"id": "2220", "question": "what years did yankees win championships?", "answer": ["1941 World Series", "1939 World Series", "1932 World Series", "1928 World Series", "1936 World Series", "1923 World Series", "1927 World Series", "1937 World Series", "1938 World Series", "1943 World Series"]} 52 | {"id": "331", "question": "where did bashar al assad study?", "answer": ["University of Damascus"]} 53 | {"id": "3532", "question": "what religion did jesus christ follow?", "answer": ["Judaism"]} 54 | {"id": "3438", "question": "who played the parents in ferris bueller day off?", "answer": ["Cindy Pickett"]} 55 | {"id": "3516", "question": "who is ronaldo playing for now?", "answer": ["Sport Club Corinthians Paulista"]} 56 | {"id": "417", "question": "in which province is johannesburg?", "answer": ["Gauteng"]} 57 | {"id": "877", "question": "what currency should i take to jamaica?", "answer": ["Jamaican dollar"]} 58 | {"id": "1661", "question": "where does the panama canal run through?", "answer": ["Panama Canal Zone"]} 59 | {"id": "2389", "question": "what did charles dickens believe in?", "answer": ["Anglicanism"]} 60 | {"id": "2520", "question": "what was gregor mendel known for?", "answer": ["Scientist", "Monk", "Botanist"]} 61 | {"id": "2765", "question": "who plays claudia joy on army wives?", "answer": ["Kim Delaney"]} 62 | {"id": "1904", "question": "what is molly ringwald in?", "answer": ["Office Killer", "The Breakfast Club", "Sixteen Candles", "Fresh Horses", "Strike It Rich", "Betsy's Wedding", "Pretty in Pink", "Spacehunter: Adventures in the Forbidden Zone", "The Stand", "Tempest"]} 63 | {"id": "211", "question": "what money do they use in chile?", "answer": ["Chilean peso"]} 64 | {"id": "1371", "question": "who makes lois griffin voice?", "answer": ["Alex Borstein"]} 65 | {"id": "519", "question": "who did johnny bench play for?", "answer": ["Cincinnati Reds"]} 66 | {"id": "722", "question": "what is the current government system in france?", "answer": ["Semi-presidential system", "Constitutional republic", "Unitary state"]} 67 | {"id": "3407", "question": "what are the mountains in peru called?", "answer": ["Andes"]} 68 | {"id": "3638", "question": "what type of strings does willie nelson use?", "answer": ["Steel-string acoustic guitar"]} 69 | {"id": "1720", "question": "what did cam newton do?", "answer": ["Quarterback"]} 70 | {"id": "990", "question": "what state was theodore roosevelt from?", "answer": ["New York"]} 71 | {"id": "1499", "question": "where did lee surrender to grant to end the civil war?", "answer": ["Battle of Appomattox Court House"]} 72 | {"id": "2464", "question": "what school did michael jordan go to?", "answer": ["University of North Carolina at Chapel Hill", "Emsley A. Laney High School"]} 73 | {"id": "1191", "question": "what county is plainfield il in?", "answer": ["Will County"]} 74 | {"id": "1676", "question": "what part of the world is south africa in?", "answer": ["Africa"]} 75 | {"id": "115", "question": "what did shawnee smith play in?", "answer": ["The Grudge 3", "Saw IV", "Summer School", "Saw II", "Saw III", "The Island", "Saw", "The Blob", "Who's Harry Crumb?"]} 76 | {"id": "884", "question": "what countries in north america continent?", "answer": ["Navassa Island", "Greenland", "Honduras", "El Salvador", "Saint Pierre and Miquelon", "Jan Mayen", "Guatemala", "Aruba", "Clipperton Island", "Belize"]} 77 | {"id": "1280", "question": "what is the closest airport to naples florida?", "answer": ["Marco Island Airport"]} 78 | {"id": "3182", "question": "what are the names of snoopy brothers and sisters?", "answer": ["Olaf", "Spike"]} 79 | {"id": "1516", "question": "what kind of money is used in israel?", "answer": ["Israeli new shekel"]} 80 | {"id": "2040", "question": "what american penny is worth money?", "answer": ["Cent"]} 81 | {"id": "1548", "question": "what is the name of the pittsburgh steelers stadium?", "answer": ["Pitt Stadium", "Forbes Field", "Heinz Field", "Three Rivers Stadium"]} 82 | {"id": "1576", "question": "what countries did germany take over during world war 2?", "answer": ["France"]} 83 | {"id": "382", "question": "where did jack johnson go to high school?", "answer": ["Kahuku High & Intermediate School"]} 84 | {"id": "2223", "question": "what year did steve nash play for the mavericks?", "answer": ["1998\u201399 NBA season"]} 85 | {"id": "2256", "question": "what happened to adolf hitler at the end of the war?", "answer": ["Suicide"]} 86 | {"id": "3094", "question": "who is aidan quinn?", "answer": ["Actor"]} 87 | {"id": "3700", "question": "what kind of political system is canada?", "answer": ["Parliamentary system", "Federation", "Constitutional monarchy", "Multi-party system"]} 88 | {"id": "707", "question": "where was selena gomez raised?", "answer": ["Grand Prairie"]} 89 | {"id": "1949", "question": "what is the legislature of missouri called?", "answer": ["Missouri General Assembly"]} 90 | {"id": "3659", "question": "where does the uk import from?", "answer": ["Canada", "Afghanistan", "Madagascar", "United States of America", "Antigua and Barbuda", "New Zealand", "Anguilla"]} 91 | {"id": "2150", "question": "what 3 countries does mexico border?", "answer": ["United States of America"]} 92 | {"id": "3559", "question": "what season did jason priestley leave 90210?", "answer": ["Beverly Hills, 90210 - Season 9"]} 93 | {"id": "1791", "question": "who is golfer dustin johnson dating?", "answer": ["Amanda Caulder"]} 94 | {"id": "567", "question": "what movies did ron howard do?", "answer": ["Apollo 13", "A Beautiful Mind", "Curious George", "Happy Days", "Cocoon", "From the Earth to the Moon"]} 95 | {"id": "1984", "question": "what kind of cancer killed larry hagman have?", "answer": ["Head and neck cancer", "Cancer", "Leukemia"]} 96 | {"id": "1157", "question": "where is mount vernon virginia?", "answer": ["Fairfax County, Virginia", "Washington metropolitan area"]} 97 | {"id": "3732", "question": "what channel is game show network on time warner cable?", "answer": ["Warner Bros. Entertainment"]} 98 | {"id": "1455", "question": "what timezone is tennessee nashville?", "answer": ["Central Time Zone"]} 99 | {"id": "1216", "question": "what awards has morgan freeman won?", "answer": ["Golden Icon", "National Board of Review Award for Best Actor", "London Film Critics Circle Award for Best Actor", "Golden Globe Award for Best Actor - Musical or Comedy Film", "National Society of Film Critics Award for Best Supporting Actor", "Kennedy Center Honor", "Camie Award", "NAACP Image Award for Outstanding Supporting Actor in a Motion Picture", "Academy Award for Actor in a Supporting Role"]} 100 | {"id": "1504", "question": "who is ben roethlisberger parents?", "answer": ["Ida Jane Foust", "Kenneth Todd Roethlisberger"]} 101 | {"id": "2491", "question": "what language do you speak in austria?", "answer": ["Austrian German", "Bosnian language", "Hungarian language", "Croatian language", "Serbian language", "Bavarian Language", "Slovenian language", "German Language", "Turkish Language"]} 102 | {"id": "951", "question": "what is the currency name of china?", "answer": ["Renminbi"]} 103 | {"id": "2165", "question": "what language do haitian speak?", "answer": ["French Language", "Haitian Creole French Language"]} 104 | {"id": "49", "question": "what movie is josh hutcherson in?", "answer": ["Cirque du Freak: The Vampire's Assistant", "The Hunger Games", "7 Days in Havana", "Journey to the Center of the Earth", "The Forger", "Detention", "Journey 2: The Mysterious Island", "Red Dawn", "The Third Rule", "The Kids Are All Right"]} 105 | {"id": "2479", "question": "where does honda play soccer?", "answer": ["Chief Executive Officer"]} 106 | {"id": "2700", "question": "what currency is used in hungary?", "answer": ["Hungarian forint"]} 107 | {"id": "258", "question": "what are major exports of the usa?", "answer": ["Pharmaceutical Preparations", "Food", "Industrial Organic Chemicals, NEC", "Automobile"]} 108 | {"id": "3754", "question": "what team does pudge rodriguez play for?", "answer": ["Texas Rangers"]} 109 | {"id": "806", "question": "where are the pyramids of giza located?", "answer": ["Egypt"]} 110 | {"id": "2548", "question": "what college did charles richard drew go to?", "answer": ["Amherst College"]} 111 | {"id": "1500", "question": "what does annie leibovitz do?", "answer": ["Photographer", "Artist"]} 112 | {"id": "59", "question": "where does lena river begin and end?", "answer": ["Baikal Mountains"]} 113 | {"id": "3715", "question": "where is bob marley grave?", "answer": ["Bob Marley Mausoleum"]} 114 | {"id": "616", "question": "where did matt barnes play?", "answer": ["Small forward"]} 115 | {"id": "313", "question": "what countries make up continental europe?", "answer": ["United Kingdom", "Albania", "Finland", "San Marino", "France", "Andorra", "Liechtenstein", "Georgia", "Germany", "Belgium"]} 116 | {"id": "1274", "question": "where did helen keller go to school?", "answer": ["Radcliffe College", "Perkins School for the Blind", "Wright-Humason School for the Deaf", "The Cambridge School of Weston", "Horace Mann School for the Deaf and Hard of Hearing"]} 117 | {"id": "1072", "question": "what kind of money do you use in aruba?", "answer": ["Aruban florin"]} 118 | {"id": "3213", "question": "what is the bosnian language?", "answer": ["Serbo-Croatian", "Croatian language", "Bosnian language", "Serbian language"]} 119 | {"id": "898", "question": "where did pablo picasso die?", "answer": ["Mougins"]} 120 | {"id": "1927", "question": "what did duke ellington do?", "answer": ["Musician"]} 121 | {"id": "1750", "question": "where is whitey bulger?", "answer": ["Boston"]} 122 | {"id": "159", "question": "what time zone is oklahoma state?", "answer": ["Central Time Zone", "UTC\u221206:00"]} 123 | {"id": "992", "question": "what form of government does brazil have?", "answer": ["Constitutional republic", "Presidential system", "Federal republic"]} 124 | {"id": "853", "question": "what is the short name for stephanie?", "answer": ["Stepha"]} 125 | {"id": "1487", "question": "where did the columbia river begin?", "answer": ["British Columbia"]} 126 | {"id": "1088", "question": "what did michelle obama do?", "answer": ["First Lady of the United States"]} 127 | {"id": "134", "question": "who all has dated taylor swift?", "answer": ["Harry Styles"]} 128 | {"id": "1025", "question": "what did bella abzug do?", "answer": ["Lawyer"]} 129 | {"id": "360", "question": "what did kate winslet get an oscar for?", "answer": ["The Reader"]} 130 | {"id": "3173", "question": "when was saint nicholas died?", "answer": ["7/17/1918"]} 131 | {"id": "3331", "question": "what is the zip code for wildwood?", "answer": ["08260"]} 132 | {"id": "3545", "question": "what continent is syria located in?", "answer": ["Asia"]} 133 | {"id": "2208", "question": "what songs did whitney houston?", "answer": ["Count on Me", "Ain't No Way (feat. Mary J. Blige)", "Why Does It Hurt So Bad", "I Will Always Love You", "I'm Every Woman", "Who Would Imagine a King", "You'll Never Stand Alone", "My Love Is Your Love", "Fine", "It's Not Right, but It's Okay"]} 134 | {"id": "577", "question": "what kind of government does chile have today?", "answer": ["Presidential system", "Republic", "Unitary state"]} 135 | {"id": "2460", "question": "what was the capital of ancient israel?", "answer": ["Jerusalem"]} 136 | {"id": "1540", "question": "who is kobe bryant wife bio?", "answer": ["Vanessa Laine"]} 137 | {"id": "131", "question": "where do baltimore ravens play?", "answer": ["M&T Bank Stadium"]} 138 | {"id": "1760", "question": "what were marco polo's goals?", "answer": ["Explorer"]} 139 | {"id": "718", "question": "who plays ferris bueller's best friend?", "answer": ["Alan Ruck"]} 140 | {"id": "1252", "question": "where was the battle of vicksburg located?", "answer": ["Warren County"]} 141 | {"id": "2767", "question": "what language does greece use?", "answer": ["Greek Language"]} 142 | {"id": "2913", "question": "what is the current leader of japan?", "answer": ["Shinz\u014d Abe"]} 143 | {"id": "3318", "question": "what government does france have?", "answer": ["Semi-presidential system", "Constitutional republic", "Unitary state"]} 144 | {"id": "3440", "question": "what should i visit in venice?", "answer": ["Grand Canal, Venice", "The Grand Canal, Venice", "Venice: The Prison", "Palazzo Dario, Venice", "View of Venice", "Gondola in Venice", "View of Venice: Ducal Palace, Dogana, and San Giorgio"]} 145 | {"id": "1703", "question": "what language does people speak in australia?", "answer": ["English Language"]} 146 | {"id": "1230", "question": "where did the tutsi come from?", "answer": ["Rwanda"]} 147 | {"id": "1379", "question": "what position does terrell owens play?", "answer": ["Wide Receiver"]} 148 | {"id": "1312", "question": "who is the new governor of florida 2011?", "answer": ["Rick Scott"]} 149 | {"id": "531", "question": "what religions are found in australia?", "answer": ["Catholicism", "Buddhism", "Islam", "Anglicanism"]} 150 | {"id": "1229", "question": "what was nicolaus copernicus discovery?", "answer": ["Heliocentrism", "Copernican heliocentrism", "Copernican Revolution"]} 151 | {"id": "3091", "question": "when did venus williams win wimbledon?", "answer": ["2009 Wimbledon Championships"]} 152 | {"id": "2999", "question": "what time zone is london in right now?", "answer": ["Greenwich Mean Time"]} 153 | {"id": "1286", "question": "what show is jill wagner on?", "answer": ["Wipeout", "Punk'd", "Blade: The Series", "Teen Wolf"]} 154 | {"id": "26", "question": "what year lebron james came to the nba?", "answer": ["2003\u201304 NBA season"]} 155 | {"id": "2221", "question": "what characters does seth macfarlane play in family guy?", "answer": ["Peter Griffin", "Mickey McFinnegan", "Stewie Griffin", "Jake Tucker", "Glenn Quagmire", "Carter Pewterschmidt", "Stan Smith", "Tom Tucker", "Kevin Swanson", "Brian Griffin"]} 156 | {"id": "98", "question": "where is jefferson davis buried?", "answer": ["Hollywood Cemetery"]} 157 | {"id": "1985", "question": "what kind of drugs does charlie sheen do?", "answer": ["Cocaine"]} 158 | {"id": "2866", "question": "what countries speak german as a first language?", "answer": ["Canada", "German Democratic Republic", "Luxembourg", "Switzerland", "Liechtenstein", "Germany", "West Germany", "Belgium", "Vatican City", "Second Polish Republic"]} 159 | {"id": "2615", "question": "what instrument does fela kuti play?", "answer": ["guitar", "trumpet", "Saxophone", "Drums", "keyboard"]} 160 | {"id": "1805", "question": "what religion is mary queen of scots?", "answer": ["Catholicism"]} 161 | {"id": "1240", "question": "where is mallorca?", "answer": ["Mediterranean Sea"]} 162 | {"id": "3777", "question": "what kind government does the us have?", "answer": ["Presidential system", "Federal republic", "Representative democracy", "Two-party system", "Constitutional republic", "Republic"]} 163 | {"id": "1011", "question": "what type of cancer did huell howser die of?", "answer": ["Prostate cancer"]} 164 | {"id": "2315", "question": "who are the senators of kansas 2013?", "answer": ["Jerry Moran", "Sam Brownback", "Pat Roberts"]} 165 | {"id": "1611", "question": "what state did henry clay represent?", "answer": ["Kentucky"]} 166 | {"id": "2914", "question": "how much indiana jones movies are there?", "answer": ["Indiana Jones and the Raiders of the Lost Ark", "Disaster Movie", "Indiana Jones and the Last Crusade", "Indiana Jones and the Temple of Doom", "Indiana Jones and the Kingdom of the Crystal Skull"]} 167 | {"id": "768", "question": "what system of government is used in south korea?", "answer": ["Constitutional republic"]} 168 | {"id": "2824", "question": "who did alicia keys have a baby with?", "answer": ["Swizz Beatz"]} 169 | {"id": "286", "question": "who owns aston martin 2012?", "answer": ["David Richards"]} 170 | {"id": "224", "question": "what happened to daddy yankee?", "answer": ["Gunshot"]} 171 | {"id": "652", "question": "what is the capital of spain in 2010?", "answer": ["Madrid"]} 172 | {"id": "1104", "question": "where is jack daniels tennessee whiskey made?", "answer": ["Tennessee"]} 173 | {"id": "3398", "question": "where is spain located what continent?", "answer": ["Europe"]} 174 | {"id": "1023", "question": "what is the money of argentina called?", "answer": ["Argentine peso"]} 175 | {"id": "883", "question": "what teams has lionel messi played for?", "answer": ["Argentina national football team", "FC Barcelona", "FC Barcelona C", "FC Barcelona B"]} 176 | {"id": "2356", "question": "what year did the mets win their first world series?", "answer": ["1969 World Series"]} 177 | {"id": "558", "question": "who does marion barber play 4?", "answer": ["Chicago Bears", "Minnesota Golden Gophers football", "Dallas Cowboys"]} 178 | {"id": "1141", "question": "what business does raj kundra do?", "answer": ["Entrepreneur", "Businessperson", "Film Producer"]} 179 | {"id": "873", "question": "what airport is in kauai hawaii?", "answer": ["Lihue Airport"]} 180 | {"id": "2062", "question": "who plays young john winchester in supernatural?", "answer": ["Jeffrey Dean Morgan"]} 181 | {"id": "1710", "question": "what is happening in germany right now?", "answer": ["Golf Beach Landing", "Second Battle of the Aisne"]} 182 | {"id": "3524", "question": "what movies gerard butler in?", "answer": ["Attila", "Beowulf & Grendel", "300", "Dear Frankie", "Butterfly on a Wheel", "Gamer", "Fast Food", "Dracula 2000", "Coriolanus"]} 183 | {"id": "506", "question": "who created the character of romeo?", "answer": ["William Shakespeare"]} 184 | {"id": "356", "question": "what continent is south africa part of?", "answer": ["Africa"]} 185 | {"id": "1259", "question": "where did brad paisley graduate from college?", "answer": ["Belmont University"]} 186 | {"id": "74", "question": "when did the colorado rockies go to the world series?", "answer": ["2007 National League Championship Series"]} 187 | {"id": "2922", "question": "where did marco rubio go to college?", "answer": ["Santa Fe College", "University of Miami", "University of Florida", "Tarkio College, Missouri", "South Miami High School", "University of Miami School of Law"]} 188 | {"id": "2153", "question": "what books did robert burns wrote?", "answer": ["To a Louse", "A Red, Red Rose", "The letters of Robert Burns", "Address to the Deil", "Tam o' Shanter", "Epitaph for James Smith", "To a Mouse", "Holy Willie's Prayer", "The wit of Robert Burns", "The Glenriddell manuscripts of Robert Burns"]} 189 | {"id": "3337", "question": "who plays edward elric?", "answer": ["Romi Park"]} 190 | {"id": "1602", "question": "who plays lorne on angel?", "answer": ["Andy Hallett"]} 191 | {"id": "3552", "question": "where to visit in orlando florida?", "answer": ["SeaWorld Orlando", "Orlando Science Center", "Cornell Fine Arts Museum", "Charles Hosmer Morse Museum of American Art", "Universal Studios Florida", "Orlando Museum of Art", "Harry P. Leu Gardens", "Arboretum of the University of Central Florida", "Holy Land Experience", "Universal Orlando"]} 192 | {"id": "332", "question": "who plays lois lane in superman returns?", "answer": ["Kate Bosworth"]} 193 | {"id": "510", "question": "what animal is on the western australian flag?", "answer": ["Black Swan"]} 194 | {"id": "838", "question": "what type of guitar does john mayer play?", "answer": ["Fender Stratocaster"]} 195 | {"id": "2357", "question": "who was leonardo da vinci teacher?", "answer": ["lorenzo de' medici patron of leonardo da vinci"]} 196 | {"id": "2383", "question": "what team did deion sanders play for in baseball?", "answer": ["Baltimore Orioles", "San Francisco 49ers", "Cincinnati Reds", "New York Yankees", "Atlanta Braves", "San Francisco Giants", "Atlanta Falcons", "Florida State Seminoles baseball"]} 197 | {"id": "1058", "question": "what color is miley cyrus red hair?", "answer": ["therichest.org"]} 198 | {"id": "3555", "question": "what was alexander graham bell known for?", "answer": ["Telephone"]} 199 | {"id": "402", "question": "what money do japanese use?", "answer": ["Japanese yen"]} 200 | {"id": "1382", "question": "what are the four official languages of nigeria?", "answer": ["English Language"]} 201 | {"id": "3244", "question": "who was the president of the us in 1971?", "answer": ["Richard Nixon"]} 202 | {"id": "2712", "question": "what language does romanian people speak?", "answer": ["Hungarian language", "Torlakian", "Romanian Language", "Romani language", "Ukrainian Language"]} 203 | {"id": "346", "question": "who played the voice of aladdin?", "answer": ["Scott Weinger"]} 204 | {"id": "3073", "question": "what did sir john frederick william herschel do?", "answer": ["Cyanotype", "Photographic fixer", "Actinometer"]} 205 | {"id": "443", "question": "who is the arizona cardinals football coach?", "answer": ["Bruce Arians"]} 206 | {"id": "327", "question": "when did mayans predict end of the world?", "answer": ["2012"]} 207 | {"id": "3417", "question": "who played marty mcfly's dad?", "answer": ["George McFly"]} 208 | {"id": "1471", "question": "what does russian people speak?", "answer": ["Yiddish Language", "Chuvash Language", "Russian Language", "Tatar Language", "Ukrainian Language"]} 209 | {"id": "70", "question": "what timezone is denver co?", "answer": ["Mountain Time Zone", "UTC\u221207:00"]} 210 | {"id": "442", "question": "who was the first leader of the afl?", "answer": ["Bud Adams", "Lamar Hunt"]} 211 | {"id": "1700", "question": "who plays stella in coronation street?", "answer": ["Lager"]} 212 | {"id": "1975", "question": "what do you want from me jerrod niemann lyrics meaning?", "answer": ["Universal Music Group Nashville", "Arista Nashville", "Category 5 Records"]} 213 | {"id": "2261", "question": "when did the chicago bulls win their first championship?", "answer": ["1991 NBA Finals"]} 214 | {"id": "1612", "question": "what country was vasco nunez de balboa born in?", "answer": ["Spain"]} 215 | {"id": "1546", "question": "what are the names of all the countries in africa?", "answer": ["Angola", "C\u00f4te d\u2019Ivoire", "Central African Republic", "Burkina Faso", "Djibouti", "Congo", "Burundi", "Botswana", "Cape Verde", "Benin"]} 216 | {"id": "3368", "question": "who plays bella on twilight?", "answer": ["Kristen Stewart"]} 217 | {"id": "92", "question": "who miley cyrus engaged to?", "answer": ["Liam Hemsworth"]} 218 | {"id": "2371", "question": "what else is there in orlando besides disney?", "answer": ["Baldwin Park"]} 219 | {"id": "733", "question": "who did heinrich himmler marry?", "answer": ["Margarete Boden"]} 220 | {"id": "1171", "question": "what hardships did teddy roosevelt overcome?", "answer": ["Cardiovascular disease"]} 221 | {"id": "511", "question": "where did drew stanton play in college?", "answer": ["Michigan State University"]} 222 | {"id": "2342", "question": "what political party was adolf hitler from?", "answer": ["German Workers' Party", "Nazi Party"]} 223 | {"id": "3688", "question": "where did apostle paul grow up?", "answer": ["Tarsus, Mersin"]} 224 | {"id": "1070", "question": "where marie curie come from?", "answer": ["Warsaw"]} 225 | {"id": "1482", "question": "where is the capital of canada on the map?", "answer": ["Ottawa"]} 226 | {"id": "1639", "question": "what was the name of the book hitler wrote while in prison?", "answer": ["Mein Kampf"]} 227 | {"id": "1874", "question": "what language is spoken in basque?", "answer": ["Basque"]} 228 | {"id": "3588", "question": "where did the allied invasion of france take place?", "answer": ["Normandy"]} 229 | {"id": "928", "question": "who won 2012 presidential election in france?", "answer": ["Nicolas Sarkozy"]} 230 | {"id": "2281", "question": "what did charles lindbergh became famous for in the 1920s?", "answer": ["Pilot"]} 231 | {"id": "2830", "question": "where is madeira?", "answer": ["Atlantic Ocean", "Azores Canaries Madeira", "Madeira Islands archipelago", "Portugal"]} 232 | {"id": "315", "question": "what did johnny crawford sing?", "answer": ["Judy Loves Me", "Your Nose Is Gonna Grow", "Rumors", "Rumours", "Daydreams", "Maybe It's You", "Cindy's Gonna Cry", "Cindy's Birthday", "Patti Ann"]} 233 | {"id": "665", "question": "where did elvis presley started his career?", "answer": ["Memphis"]} 234 | {"id": "3196", "question": "where did st louis tornado hit?", "answer": ["St. Louis"]} 235 | {"id": "1740", "question": "what did neil say on the moon?", "answer": ["One Small Step"]} 236 | {"id": "2471", "question": "what countries does canada export wheat to?", "answer": ["Japan"]} 237 | {"id": "861", "question": "who did george lucas get engaged to?", "answer": ["Mellody Hobson"]} 238 | {"id": "688", "question": "what religion was mary todd lincoln?", "answer": ["Presbyterianism", "Baptists", "Catholicism"]} 239 | {"id": "3644", "question": "where the murray river located?", "answer": ["Australia"]} 240 | {"id": "1383", "question": "where is bergen belsen concentration camp located?", "answer": ["Germany"]} 241 | {"id": "509", "question": "where is puntland somalia?", "answer": ["Garoowe", "Bari Region", "Dalweyn", "Eyl"]} 242 | {"id": "1541", "question": "what kind of government system does canada have?", "answer": ["Parliamentary system", "Federation", "Constitutional monarchy", "Multi-party system"]} 243 | {"id": "2800", "question": "what movies did matt bomer play in?", "answer": ["Magic Mike", "Flightplan", "Winter's Tale", "In Time", "8", "The Normal Heart", "The Texas Chainsaw Massacre: The Beginning"]} 244 | {"id": "2850", "question": "on what continent is canada found?", "answer": ["North America"]} 245 | {"id": "2284", "question": "which countries border lake victoria?", "answer": ["Kenya", "Tanzania", "Uganda"]} 246 | {"id": "1140", "question": "who was the leader of germany in wwii?", "answer": ["Hitler"]} 247 | {"id": "66", "question": "who does maggie grace play in taken?", "answer": ["Kim"]} 248 | {"id": "2026", "question": "what currency does the dominican republic?", "answer": ["Dominican peso"]} 249 | {"id": "1181", "question": "who played princess leia from star wars?", "answer": ["Carrie Fisher"]} 250 | {"id": "2227", "question": "what instruments does katy perry play?", "answer": ["Vocals"]} 251 | {"id": "2848", "question": "who is steven seagal sister?", "answer": ["Brenda Seagal"]} 252 | {"id": "2428", "question": "who is john garcia?", "answer": ["Singer", "Musician"]} 253 | {"id": "3200", "question": "who is the 2011 heisman trophy winner?", "answer": ["Robert Griffin III"]} 254 | {"id": "2050", "question": "what is the language used in indonesia?", "answer": ["Javanese Language", "Malay Language", "Sunda Language", "English Language", "Bali Language", "Indonesian Language", "Batak Language", "Tobelo Language", "Dutch Language", "Madura Language"]} 255 | {"id": "1217", "question": "who will michael schumacher drive for in 2013?", "answer": ["Omega SA"]} 256 | {"id": "2543", "question": "what is the predominant language in south africa?", "answer": ["South African English"]} 257 | {"id": "2686", "question": "what type of currency is used in puerto rico?", "answer": ["United States dollar"]} 258 | {"id": "3277", "question": "who is the current coach of the new york knicks?", "answer": ["Mike Woodson"]} 259 | {"id": "1973", "question": "who was audrey hepburn's husbands?", "answer": ["Robert Wolders", "Andrea Dotti", "Mel Ferrer"]} 260 | {"id": "2446", "question": "where does manny pacquiao live?", "answer": ["South Cotabato", "Kiamba"]} 261 | {"id": "426", "question": "who was louis riel?", "answer": ["Politician"]} 262 | {"id": "934", "question": "who was the general for the british in the revolutionary war?", "answer": ["Black British", "White British", "British American"]} 263 | {"id": "513", "question": "what countries fall in eastern europe?", "answer": ["Southern Carpathians", "Caucasus", "Belarusian Central Rada", "Kresy", "Kievan Rus'", "Grand Duchy of Lithuania", "Kingdom of Romania", "Moldova", "Belarusian People's Republic", "Second Polish Republic"]} 264 | {"id": "3626", "question": "where is kate middleton spending christmas?", "answer": ["Bucklebury"]} 265 | {"id": "3580", "question": "where did fred west work?", "answer": ["Laborer"]} 266 | {"id": "1073", "question": "what language do people speak in costa rica?", "answer": ["Spanish Language"]} 267 | {"id": "2728", "question": "what is the dollar called in brazil?", "answer": ["Brazilian real"]} 268 | {"id": "42", "question": "what party was andrew jackson?", "answer": ["Democratic-Republican Party", "Democratic Party", "Jacksonian Democratic Party"]} 269 | {"id": "850", "question": "what county is tampa located in?", "answer": ["Hillsborough County"]} 270 | {"id": "1662", "question": "what years did jackie robinson play baseball?", "answer": ["1956 Major League Baseball Season", "1948 Major League Baseball Season", "1951 Major League Baseball Season", "1949 Major League Baseball Season", "1950 Major League Baseball Season", "1955 Major League Baseball Season", "1954 Major League Baseball season", "1953 Major League Baseball Season", "1947 Major League Baseball Season", "1952 Major League Baseball Season"]} 271 | {"id": "2467", "question": "what has sara paxton been in?", "answer": ["Durango Kids", "Enter Nowhere", "Liar Liar", "Geppetto", "Liars All", "Mother Goose Parade", "Hollywood Takes a Stand Against Planking", "Aquamarine", "Haunted Lighthouse", "Hounded"]} 272 | {"id": "3373", "question": "what to do in daytona?", "answer": ["Daytona 500 Experience", "Jackie Robinson Ballpark", "Daytona International Speedway", "Mary McLeod Bethune Home", "Daytona Beach Bike Week", "Museum of Arts and Sciences", "Halifax Historical Museum", "Daytona Lagoon", "Ocean Center", "South Beach Street Historic District"]} 273 | {"id": "3490", "question": "what is the money currency in guatemala?", "answer": ["Guatemalan quetzal"]} 274 | {"id": "1389", "question": "who did lamar odom sign with?", "answer": ["Los Angeles Clippers"]} 275 | {"id": "2817", "question": "what did henry kissinger do?", "answer": ["United States Secretary of State", "National Security Advisor"]} 276 | {"id": "3025", "question": "who is the governor of indiana 2012?", "answer": ["Mitch Daniels"]} 277 | {"id": "309", "question": "what is the name of the first prophet of islam?", "answer": ["Muhammad"]} 278 | {"id": "1144", "question": "what team does drogba play for 2013?", "answer": ["Galatasaray S.K."]} 279 | {"id": "939", "question": "who founded collegehumor?", "answer": ["Josh Abramson", "Ricky Van Veen"]} 280 | {"id": "272", "question": "where was the battle of antietam creek?", "answer": ["Pennsylvania", "Maryland"]} 281 | {"id": "744", "question": "where does selena gomez live map?", "answer": ["New York City"]} 282 | {"id": "55", "question": "who was queen victoria's parents called?", "answer": ["Prince Edward, Duke of Kent and Strathearn", "Princess Victoria of Saxe-Coburg-Saalfeld"]} 283 | {"id": "3520", "question": "where was the first ford motor company located?", "answer": ["Dearborn"]} 284 | {"id": "1244", "question": "what did god say to abraham about circumcision?", "answer": ["Cave of the Patriarchs"]} 285 | {"id": "464", "question": "what time is right now in texas?", "answer": ["Texas Revolution"]} 286 | {"id": "1210", "question": "where is south carolina located?", "answer": ["United States of America"]} 287 | {"id": "874", "question": "what is kevin durant play style?", "answer": ["Small forward"]} 288 | {"id": "1724", "question": "what state did obama win?", "answer": ["Illinois"]} 289 | {"id": "3596", "question": "what are the primary languages of france?", "answer": ["French Language"]} 290 | {"id": "483", "question": "what type of music did richard wagner play?", "answer": ["Opera", "Classical music", "Romantic music"]} 291 | {"id": "1618", "question": "what type of music does ella fitzgerald sing?", "answer": ["Swing music", "Jazz", "Vocal jazz", "Traditional pop music", "Ballad"]} 292 | {"id": "302", "question": "who are the green bay packers owned by?", "answer": ["Green Bay Packers Board of Directors"]} 293 | {"id": "1837", "question": "what timezone is indianapolis indiana in?", "answer": ["North American Eastern Time Zone"]} 294 | {"id": "753", "question": "who does john beck play for?", "answer": ["Washington Redskins", "Miami Dolphins", "Baltimore Ravens", "Houston Texans"]} 295 | {"id": "2311", "question": "what killed sammy davis jr?", "answer": ["Complication", "Throat cancer"]} 296 | {"id": "2355", "question": "what money to take to sri lanka?", "answer": ["Sri Lankan rupee"]} 297 | {"id": "2972", "question": "who is the owner of the philadelphia eagles?", "answer": ["Jeffrey Lurie"]} 298 | {"id": "241", "question": "what countries share borders with france?", "answer": ["Italy", "Monaco", "Luxembourg", "Andorra", "Switzerland", "Germany", "Bay of Biscay", "Belgium", "Piedmont", "Spain"]} 299 | {"id": "627", "question": "what was richard wright known for?", "answer": ["Keyboard player"]} 300 | {"id": "1767", "question": "who plays jay adams in lords of dogtown?", "answer": ["Him/Herself"]} 301 | -------------------------------------------------------------------------------- /inference.py: -------------------------------------------------------------------------------- 1 | import openai 2 | import os 3 | import json 4 | import _thread 5 | import requests 6 | 7 | import threading 8 | 9 | from tqdm import tqdm 10 | from retry import retry 11 | 12 | from contextlib import contextmanager 13 | from collections import defaultdict 14 | 15 | import re 16 | link_regex = re.compile('((https?):((//)|(\\\\))+([\w\d:#@%/;$()~_?\+-=\\\.&](#!)?)*)', re.DOTALL) 17 | 18 | class TimeoutException(Exception): 19 | def __init__(self, msg=''): 20 | self.msg = msg 21 | 22 | 23 | @contextmanager 24 | def time_limit(seconds, msg=''): 25 | 26 | timer = threading.Timer(seconds, lambda: _thread.interrupt_main()) 27 | timer.start() 28 | try: 29 | yield 30 | except KeyboardInterrupt: 31 | raise TimeoutException("Timed out for operation {}".format(msg)) 32 | finally: 33 | # if the action ends in specified time, timer is canceled 34 | timer.cancel() 35 | 36 | 37 | def extract_urls(string): 38 | def clean(url): 39 | if url[-1] == '.': 40 | return url[:-1] 41 | return url 42 | r = re.findall(link_regex, string) 43 | return [clean(l[0]) for l in r] 44 | 45 | def extract_topic(url): 46 | url = url.split('#')[0] 47 | return url.split('/')[-1] 48 | 49 | @retry(Exception, tries=3, delay=10) 50 | def fetch_pages(titles, debug=False): 51 | if titles == ['']: 52 | return {'pages': [], 'titles': []} 53 | if debug: 54 | print(titles) 55 | api_url = 'https://en.wikipedia.org/w/api.php' 56 | params = { 57 | 'action': 'query', 58 | 'titles': '|'.join(titles), 59 | 'format': 'json', 60 | 'prop': 'revisions', 61 | 'rvslots': '*', 62 | 'rvprop': 'content', 63 | 'redirects': '' 64 | } 65 | response = requests.get(api_url, params=params, timeout=20) 66 | data = response.json() 67 | 68 | ret_titles = [] 69 | ret_pages = [] 70 | 71 | if 'warnings' in data or 'query' not in data: 72 | print("data warning") 73 | print(data) 74 | return {'pages': ret_pages, 'titles': ret_titles} 75 | 76 | for k in data['query']['pages'].keys(): 77 | page_data = data['query']['pages'][k] 78 | if 'missing' not in page_data and 'revisions' in page_data: 79 | title = page_data['title'] 80 | page = page_data['revisions'][0]['slots']['main']['*'] 81 | page = page.replace("\n", " ").replace("\t", " ") 82 | page = ' '.join(page.split())[:10_000] 83 | ret_pages.append(page) 84 | ret_titles.append(title) 85 | return {'pages': ret_pages, 'titles': ret_titles} 86 | 87 | 88 | def add_prompt(item, prompt): 89 | 90 | def rmreturn(s): 91 | s = s.replace('\n\n', ' ') 92 | s = s.replace('\n', ' ') 93 | return s.strip() 94 | 95 | query = item['question'] 96 | prompt = prompt.replace('{query}', query) 97 | 98 | if '{top_passages_concat}' in prompt: 99 | if item.get('top_passages_concat'): # background info 100 | passages = rmreturn(item['top_passages_concat'][0]) 101 | prompt = prompt.replace('{top_passages_concat}', passages) 102 | elif '{background}' in prompt: 103 | if item.get('output'): # background info 104 | backinfo = rmreturn(item['output'][0]) 105 | prompt = prompt.replace('{background}', backinfo) 106 | 107 | return ' '.join(prompt.split(' ')) # max 1000 words 108 | 109 | 110 | def run_embeddings(input_text, engine='text-similarity-davinci-001'): 111 | 112 | texts = [t.replace('\n', '') for t in input_text] 113 | outputs = openai.Embedding.create(input=texts, model=engine)['data'] 114 | embeddings = [o['embedding'] for o in outputs] 115 | 116 | return embeddings 117 | 118 | class OAIOverloadedException(Exception): 119 | "Raised when the OpenAI servers are overloaded." 120 | pass 121 | 122 | @retry((OAIOverloadedException, Exception), tries=50, delay=10) 123 | def openai_request(inputs_with_prompts, engine, max_tokens, num_sequence=1, temp=0): 124 | headers = { 125 | 'Content-Type': "application/json", 126 | 'Authorization': f"Bearer {os.getenv('OPENAI_API_KEY')}" # Dont share api keys 🫠 127 | } 128 | params = { 129 | "model": engine, 130 | "prompt": inputs_with_prompts, 131 | "max_tokens": max_tokens, 132 | "temperature": temp, 133 | "n": num_sequence, 134 | } 135 | response = requests.post("https://api.openai.com/v1/completions", json=params, headers=headers, timeout=20*3) 136 | data = response.json() 137 | if 'choices' not in data: 138 | print("Error: ", data['error']) 139 | raise OAIOverloadedException 140 | if 'error' in data: 141 | print("Error: ", data['error']) 142 | raise OAIOverloadedException 143 | return [c["text"] for c in data["choices"]] 144 | 145 | def run_main(inlines, outfile, engine, prompt, max_tokens, n=1, temp=0, parse_url=True, filter_docs=False): 146 | 147 | if os.path.exists(outfile): 148 | outs = open(outfile, 'a', encoding='utf8') 149 | num_lines = len(open(outfile, 'r').readlines()) 150 | inlines = inlines[num_lines - 1: ] 151 | else: # not os.path.exists(outfile) 152 | outs = open(outfile, 'a', encoding='utf8') 153 | outs.write(json.dumps({"prompt": prompt}) + '\n') 154 | 155 | pbar = tqdm(total = len(inlines)) 156 | index = 0 157 | pbar.update(index) 158 | while index < len(inlines): 159 | inputs, answers = [], [] 160 | inputs_with_prompts = [] 161 | for _ in range(5): # default 20 162 | if index >= len(inlines): break 163 | input_with_prompt = add_prompt(inlines[index], prompt) 164 | inputs.append(inlines[index]['question']) ## a string 165 | answers.append(inlines[index]['answer']) ## a list of strings 166 | inputs_with_prompts.append(input_with_prompt) 167 | index += 1 168 | if parse_url: 169 | samples = defaultdict(list) 170 | url_responses = defaultdict(list) 171 | extracted_topics = defaultdict(list) 172 | concat_pages = defaultdict(list) 173 | fetched_page_titles = defaultdict(list) 174 | fetched_page_texts = defaultdict(list) 175 | 176 | outputs = openai_request(inputs_with_prompts, 177 | engine, max_tokens, n, temp) 178 | for j, output in enumerate(outputs): 179 | samples[j//n].append(output) 180 | 181 | extracted_url_set = extract_urls(output) 182 | extracted_topic_set = [extract_topic(url) for url in extracted_url_set] 183 | 184 | fetched_page_set = fetch_pages(extracted_topic_set) 185 | _fetched_page_texts = fetched_page_set['pages'] 186 | _fetched_page_titles = fetched_page_set['titles'] 187 | 188 | 189 | url_responses[j//n].append(extracted_url_set) 190 | extracted_topics[j//n].append(extracted_topic_set) 191 | concat_pages[j//n].append(' '.join(_fetched_page_texts)) 192 | fetched_page_titles[j//n].append(_fetched_page_titles) 193 | fetched_page_texts[j//n].append(_fetched_page_texts) 194 | 195 | for i in range(len(inputs_with_prompts)): 196 | outs.write(json.dumps({ 197 | 'question': inputs[i], 198 | 'answer': answers[i], 199 | 'gpt3_response': output, 200 | 'url_response': url_responses[i], 201 | 'extracted_topic': extracted_topics[i][0], 202 | 'output': concat_pages[i], 203 | 'fetched_page_titles': fetched_page_titles[i], 204 | 'fetched_page_texts': fetched_page_texts[i] 205 | }) 206 | +'\n') 207 | else: 208 | samples = defaultdict(list) 209 | outputs = openai_request(inputs_with_prompts, 210 | engine, max_tokens, n, temp) 211 | for j, output in enumerate(outputs): 212 | samples[j//n].append(output) 213 | 214 | for i in range(len(inputs_with_prompts)): 215 | outs.write(json.dumps({ 216 | 'question': inputs[i], 217 | 'answer': answers[i], 218 | 'output': samples[i]}) 219 | +'\n') 220 | 221 | pbar.update(len(inputs_with_prompts)) 222 | 223 | pbar.close() 224 | outs.close() 225 | -------------------------------------------------------------------------------- /inprompts/mult_doc.jsonl: -------------------------------------------------------------------------------- 1 | {"type": "question answering", "task": "step1", "pid": 1, "prompt": "what does jamaican people speak? which 10 Wikipedia URLs have the answer? \n\n Jamaican people speak Jamaican Creole, also known as Patois or Patwa. \n\n 1. https://en.wikipedia.org/wiki/Jamaican_Creole \n 2. https://en.wikipedia.org/wiki/Jamaican_language \n 3. https://en.wikipedia.org/wiki/Jamaican_English \n 4. https://en.wikipedia.org/wiki/Jamaican_Patois \n 5. https://en.wikipedia.org/wiki/Jamaican_Creole_grammar \n 6. https://en.wikipedia.org/wiki/Jamaican_Creole_phonology \n 7. https://en.wikipedia.org/wiki/Jamaican_Creole_lexicon \n 8. https://en.wikipedia.org/wiki/Jamaican_Creole_syntax \n 9. https://en.wikipedia.org/wiki/Jamaican_Creole_vocabulary \n 10. https://en.wikipedia.org/wiki/Jamaican_Creole_language \n\n\n\n what did james k polk do before he was president? which 10 Wikipedia URLs have the answer? \n\n Before becoming President, James K. Polk served as the Speaker of the House of Representatives (1835–1839), Governor of Tennessee (1839–1841), and U.S. Representative from Tennessee (1825–1839). \n\n 1. https://en.wikipedia.org/wiki/James_K._Polk \n 2. https://en.wikipedia.org/wiki/Speaker_of_the_United_States_House_of_Representatives \n 3. https://en.wikipedia.org/wiki/Governor_of_Tennessee \n 4. https://en.wikipedia.org/wiki/U.S._House_of_Representatives \n 5. https://en.wikipedia.org/wiki/Tennessee \n 6. https://en.wikipedia.org/wiki/United_States_House_of_Representatives \n 7. https://en.wikipedia.org/wiki/List_of_governors_of_Tennessee \n 8. https://en.wikipedia.org/wiki/List_of_Speakers_of_the_United_States_House_of_Representatives \n \n\n\n\n what is the oregon ducks 2012 football schedule? which 10 Wikipedia URLs have the answer? \n\n 1. https://en.wikipedia.org/wiki/2012_Oregon_Ducks_football_team \n 2. https://en.wikipedia.org/wiki/2012_Oregon_Ducks_football_team_schedule \n 3. https://en.wikipedia.org/wiki/2012_Pac-12_Conference_football_season \n 4. https://en.wikipedia.org/wiki/2012_Oregon_Ducks_football_team_statistics \n 5. https://en.wikipedia.org/wiki/2012_Oregon_Ducks_football_team_roster \n 6. https://en.wikipedia.org/wiki/2012_Oregon_Ducks_football_team_rankings \n 7. https://en.wikipedia.org/wiki/2012_Oregon_Ducks_football_team_results \n 8. https://en.wikipedia.org/wiki/2012_Oregon_Ducks_football_team_game_logs \n 9. https://en.wikipedia.org/wiki/2012_Oregon_Ducks_football_team_awards \n\n\n\n who plays ken barlow in coronation street? which 10 Wikipedia URLs have the answer? \n\n The actor who plays Ken Barlow in Coronation Street is William Roache. \n\n The following 10 Wikipedia URLs have the answer: \n\n 1. https://en.wikipedia.org/wiki/William_Roache \n 2. https://en.wikipedia.org/wiki/Coronation_Street\n3. https://en.wikipedia.org/wiki/Ken_Barlow \n 4. https://en.wikipedia.org/wiki/List_of_Coronation_Street_characters_(1960) \n 5. https://en.wikipedia.org/wiki/List_of_Coronation_Street_characters_(1961) \n 6. https://en.wikipedia.org/wiki/List_of_Coronation_Street_characters_(1962) \n 7. https://en.wikipedia.org/wiki/List_of_Coronation_Street_characters_(1963) \n 8. https://en.wikipedia.org/wiki/List_of_Coronation_Street_characters_(1964) \n 9. https://en.wikipedia.org/wiki/List_of_Coronation_Street_characters_(1965) \n\n\n\n what happened after mr. sugihara died? which 10 Wikipedia URLs have the answer? \n\n After Mr. Sugihara died, his legacy of saving thousands of lives during World War II was remembered and honored. He was posthumously awarded the title of Righteous Among the Nations by the State of Israel in 1985. \n\n 1. https://en.wikipedia.org/wiki/Chiune_Sugihara \n 2. https://en.wikipedia.org/wiki/Righteous_Among_the_Nations \n 3. https://en.wikipedia.org/wiki/Japanese_diplomats_who_rescued_Jews_during_the_Holocaust \n 4. https://en.wikipedia.org/wiki/Visas_for_Life \n 5. https://en.wikipedia.org/wiki/Kaunas_Ghetto \n 6. https://en.wikipedia.org/wiki/Lithuanian_Jews \n 7. https://en.wikipedia.org/wiki/Vilna_Ghetto \n 8. https://en.wikipedia.org/wiki/Kovno_Ghetto \n 9. https://en.wikipedia.org/wiki/Kaunas_Conference\n10. https://en.wikipedia.org/wiki/Sugihara_Surviv \n\n\n\n {query} Which 10 Wikipedia URLs have the answer? \n\n"} 2 | {"type": "question answering", "task": "step2", "pid": 1, "prompt": "Refer to the passages below and answer the following question with just one entity. \n\n Passages: {top_passages_concat} \n\n Question: {query} \n\n The answer is"} 3 | -------------------------------------------------------------------------------- /inprompts/single_doc.jsonl: -------------------------------------------------------------------------------- 1 | {"type": "question answering", "task": "step1", "pid": 1, "prompt": "what does jamaican people speak? which Wikipedia URL has the answer? \n\n Jamaican people speak Jamaican Creole, also known as Patois. The Wikipedia URL for this answer is https://en.wikipedia.org/wiki/Jamaican_Creole \n\n\n\n what did james k polk do before he was president? which Wikipedia URL has the answer? \n\n Before becoming president, James K. Polk served as the Speaker of the House of Representatives (1835–1839), Governor of Tennessee (1839–1841), and U.S. Representative from Tennessee (1825–1839).\n\n The Wikipedia URL with the answer is: https://en.wikipedia.org/wiki/James_K._Polk#Early_life_and_career \n\n\n\n what is the oregon ducks 2012 football schedule? which Wikipedia URL has the answer? \n\n The Oregon Ducks 2012 football schedule can be found at the following Wikipedia URL: https://en.wikipedia.org/wiki/2012_Oregon_Ducks_football_team \n\n\n\n who plays ken barlow in coronation street? which Wikipedia URL has the answer? \n\n The actor who plays Ken Barlow in Coronation Street is William Roache. The Wikipedia URL for this answer is https://en.wikipedia.org/wiki/William_Roache. \n\n\n\n what happened after mr. sugihara died? which Wikipedia URL has the answer? \n\n After Mr. Sugihara died in 1986, his family continued to promote his legacy of humanitarianism. His son, Chiune Sugihara Jr., established the Chiune Sugihara Memorial Foundation in Tokyo in 1992. \n\n The answer can be found on the Wikipedia page for Chiune Sugihara: https://en.wikipedia.org/wiki/Chiune_Sugihara \n\n\n\n {query} Which Wikipedia URL has the answer? \n\n"} 2 | {"type": "question answering", "task": "step2", "pid": 1, "prompt": "Refer to the passage below and answer the following question with just one entity. \n\n Passage: {background} \n\n Question: {query} \n\n The answer is"} 3 | -------------------------------------------------------------------------------- /mainfunc.py: -------------------------------------------------------------------------------- 1 | from dotenv import load_dotenv, find_dotenv 2 | load_dotenv(find_dotenv()) 3 | 4 | import argparse 5 | import os 6 | import json 7 | 8 | from inference import run_main 9 | from evaluation import ( 10 | eval_recall, 11 | eval_question_answering, 12 | eval_fact_checking, 13 | eval_dialogue_system 14 | ) 15 | 16 | 17 | def readfiles(infile): 18 | 19 | if infile.endswith('json'): 20 | lines = json.load(open(infile, 'r', encoding='utf8')) 21 | elif infile.endswith('jsonl'): 22 | lines = open(infile, 'r', encoding='utf8').readlines() 23 | lines = [json.loads(l) for l in lines] 24 | else: 25 | raise NotImplementedError 26 | 27 | if len(lines[0]) == 1 and lines[0].get('prompt'): 28 | lines = lines[1:] ## skip prompt line 29 | 30 | return lines 31 | 32 | 33 | def step1(dataset, datatype, split, max_tokens, engine, prompt, pid, n, temp, prompt_type): 34 | 35 | inputfile = f'indatasets/{dataset}/{dataset}-{split}.jsonl' 36 | inlines = readfiles(inputfile) 37 | 38 | if (temp is None) or (temp == 0): 39 | outputfolder = f'logs/backgrounds-greedy-{engine}-{prompt_type}/{dataset}' 40 | else: # tempature > 0 41 | outputfolder = f'logs/backgrounds-sample(n={n},temp={temp})-{engine}/{dataset}' 42 | os.makedirs(outputfolder, exist_ok=True) 43 | outputfile = f'{outputfolder}/{dataset}-{split}-p{pid}.jsonl' 44 | 45 | run_main(inlines, outputfile, engine, prompt, max_tokens, n, temp, parse_url=True) 46 | 47 | if datatype == 'question answering': ## Eval Recall@K score 48 | recallfile = f'{outputfolder}/{dataset}-recall-{prompt_type}.jsonl' 49 | with open(recallfile, 'a') as recallout: 50 | recall, length = eval_recall(outputfile) 51 | outmetrics = { 52 | 'outputfile': outputfile, 53 | 'prompt': prompt, 54 | 'recall': recall, 55 | 'length': length, 56 | } 57 | print(f'Recall: {recall}; Avg.Length: {length}') 58 | recallout.write(json.dumps(outmetrics) + '\n') 59 | 60 | 61 | def step2(dataset, datatype, split, max_tokens, engine, prompt, pid, prompt_type): 62 | # processed only used for passage 63 | inputfile = f'logs/backgrounds-greedy-{engine}-{prompt_type}/{dataset}/{dataset}-{split}-p{pid}.jsonl' 64 | inlines = readfiles(inputfile) 65 | 66 | outputfolder = f'logs/finaloutput-greedy-{engine}-{prompt_type}/{dataset}' 67 | os.makedirs(outputfolder, exist_ok=True) 68 | outputfile = f'{outputfolder}/{dataset}-{split}-p{pid}.jsonl' 69 | 70 | run_main(inlines, outputfile, engine, prompt, max_tokens, parse_url=False, filter_docs=True) 71 | 72 | if datatype == 'question answering': ## Eval Exact Match 73 | evalfile = f'{outputfolder}/{dataset}-metrics.jsonl' 74 | with open(evalfile, 'a') as evalout: 75 | emscore, length = eval_question_answering(outputfile) 76 | outmetrics = { 77 | 'outputfile': outputfile, 78 | 'prompt': prompt, 79 | 'exact match': emscore, 80 | 'length': length, 81 | } 82 | print(f'Exact Match: {emscore}; Avg.Length: {length}') 83 | evalout.write(json.dumps(outmetrics) + '\n') 84 | 85 | elif datatype == 'fact checking': ## Eval Accuracy 86 | evalfile = f'{outputfolder}/{dataset}-metrics.jsonl' 87 | with open(evalfile, 'a') as evalout: 88 | accuracy, length = eval_fact_checking(outputfile) 89 | outmetrics = { 90 | 'outputfile': outputfile, 91 | 'prompt': prompt, 92 | 'accuracy': accuracy, 93 | 'length': length, 94 | } 95 | print(f'Accuracy: {accuracy}; Avg.Length: {length}') 96 | evalout.write(json.dumps(outmetrics) + '\n') 97 | 98 | elif datatype == 'dialogue system': ## Eval F1 and Rouge 99 | evalfile = f'{outputfolder}/{dataset}-metrics.jsonl' 100 | with open(evalfile, 'a') as evalout: 101 | f1score, rougel, length = eval_dialogue_system(outputfile) 102 | outmetrics = { 103 | 'outputfile': outputfile, 104 | 'prompt': prompt, 105 | 'f1-score': f1score, 106 | 'rouge-l': rougel, 107 | 'length': length, 108 | } 109 | print(f'F1-score: {f1score}; Rouge-L: {rougel}; Avg.Length: {length}') 110 | evalout.write(json.dumps(outmetrics) + '\n') 111 | 112 | 113 | if __name__ == "__main__": 114 | 115 | parser = argparse.ArgumentParser() 116 | 117 | # Required parameters 118 | parser.add_argument("--dataset", default=None, type=str, required=True, 119 | help="dataset name: [nq, tqa, webq, wizard, fever, fm2, sqa]", 120 | ) 121 | parser.add_argument("--task", default=None, type=str, required=True, 122 | help="task name: [step1, step2], should be either 1 or 2", 123 | ) 124 | parser.add_argument("--prompt_type", default="single_doc", type=str, required=False, 125 | help="prompt type: [single_doc, multi_doc]" 126 | ) 127 | parser.add_argument("--split", default='test', type=str, required=False, 128 | help="dataset split: [train, dev, test]", 129 | ) 130 | parser.add_argument("--engine", default='text-davinci-003', type=str, required=False, 131 | help="text-davinci-003 (used in our experiments), text-davinci-002", 132 | ) 133 | parser.add_argument("--num_sequence", default=1, type=int, required=False) 134 | parser.add_argument("--temperature", default=0, type=float, required=False) 135 | 136 | args = parser.parse_args() 137 | 138 | if args.dataset in ['nq', 'webq', 'tqa', 'twiki', 'sqa']: 139 | datatype = 'question answering' 140 | elif args.dataset in ['fever', 'fm2']: 141 | datatype = 'fact checking' 142 | elif args.dataset in ['wizard']: 143 | datatype = 'dialogue system' 144 | else: # other task type? 145 | raise NotImplementedError 146 | 147 | if args.task == 'step1': 148 | max_tokens = 300 149 | # max_tokens = 500 150 | elif args.task == 'step2': 151 | if datatype == 'dialogue system': 152 | max_tokens = 50 153 | else: # QA and Fact ... 154 | max_tokens = 10 155 | 156 | promptpath = f'inprompts/{args.prompt_type}.jsonl' 157 | if not os.path.exists(promptpath): 158 | raise FileNotFoundError(f'Prompt file {promptpath} not found.') 159 | promptlines = open(promptpath, 'r').readlines() 160 | 161 | for line in promptlines: 162 | line = json.loads(line) 163 | 164 | if line['type'] == datatype and line['task'] == args.task: 165 | prompt = line['prompt'] 166 | pid = line['pid'] 167 | 168 | if args.task == 'step1': 169 | outputs = step1(args.dataset, datatype, args.split, max_tokens, args.engine, 170 | prompt, pid, args.num_sequence, args.temperature, args.prompt_type) 171 | 172 | elif args.task == 'step2': 173 | outputs = step2(args.dataset, datatype, args.split, 174 | max_tokens, args.engine, prompt, pid, args.prompt_type) 175 | 176 | else: ## should be either 1 or 2 177 | raise NotImplementedError(f'Invalid task given: {args.task} (should be either 1 or 2)') 178 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | openai 2 | retry 3 | tqdm 4 | python-dotenv --------------------------------------------------------------------------------