├── requirements.txt ├── figs ├── logo.webp ├── logo_1.webp └── main-fig.png ├── run_longgenbench_MMLU.sh ├── data ├── MMLU │ └── data │ │ ├── dev │ │ ├── world_religions_dev.csv │ │ ├── miscellaneous_dev.csv │ │ ├── abstract_algebra_dev.csv │ │ ├── management_dev.csv │ │ ├── conceptual_physics_dev.csv │ │ ├── anatomy_dev.csv │ │ ├── electrical_engineering_dev.csv │ │ ├── philosophy_dev.csv │ │ ├── human_aging_dev.csv │ │ ├── human_sexuality_dev.csv │ │ ├── virology_dev.csv │ │ ├── medical_genetics_dev.csv │ │ ├── computer_security_dev.csv │ │ ├── clinical_knowledge_dev.csv │ │ ├── high_school_chemistry_dev.csv │ │ ├── global_facts_dev.csv │ │ ├── jurisprudence_dev.csv │ │ ├── high_school_mathematics_dev.csv │ │ ├── high_school_microeconomics_dev.csv │ │ ├── college_chemistry_dev.csv │ │ ├── high_school_macroeconomics_dev.csv │ │ ├── high_school_geography_dev.csv │ │ ├── college_physics_dev.csv │ │ ├── elementary_mathematics_dev.csv │ │ ├── marketing_dev.csv │ │ ├── high_school_physics_dev.csv │ │ ├── public_relations_dev.csv │ │ ├── college_mathematics_dev.csv │ │ ├── college_biology_dev.csv │ │ ├── logical_fallacies_dev.csv │ │ ├── sociology_dev.csv │ │ ├── us_foreign_policy_dev.csv │ │ ├── high_school_biology_dev.csv │ │ ├── college_medicine_dev.csv │ │ ├── econometrics_dev.csv │ │ ├── formal_logic_dev.csv │ │ ├── moral_disputes_dev.csv │ │ ├── high_school_government_and_politics_dev.csv │ │ ├── prehistory_dev.csv │ │ ├── high_school_psychology_dev.csv │ │ ├── astronomy_dev.csv │ │ ├── moral_scenarios_dev.csv │ │ ├── nutrition_dev.csv │ │ ├── professional_accounting_dev.csv │ │ ├── business_ethics_dev.csv │ │ ├── professional_psychology_dev.csv │ │ ├── machine_learning_dev.csv │ │ ├── international_law_dev.csv │ │ ├── high_school_statistics_dev.csv │ │ ├── college_computer_science_dev.csv │ │ ├── high_school_computer_science_dev.csv │ │ ├── professional_medicine_dev.csv │ │ ├── high_school_world_history_dev.csv │ │ ├── security_studies_dev.csv │ │ └── professional_law_dev.csv │ │ ├── README.txt │ │ └── val │ │ ├── management_val.csv │ │ ├── global_facts_val.csv │ │ ├── abstract_algebra_val.csv │ │ ├── college_chemistry_val.csv │ │ ├── human_sexuality_val.csv │ │ ├── world_religions_val.csv │ │ ├── college_mathematics_val.csv │ │ ├── electrical_engineering_val.csv │ │ ├── medical_genetics_val.csv │ │ ├── anatomy_val.csv │ │ ├── business_ethics_val.csv │ │ ├── machine_learning_val.csv │ │ ├── us_foreign_policy_val.csv │ │ ├── high_school_computer_science_val.csv │ │ ├── college_physics_val.csv │ │ ├── jurisprudence_val.csv │ │ ├── high_school_geography_val.csv │ │ ├── conceptual_physics_val.csv │ │ ├── human_aging_val.csv │ │ ├── computer_security_val.csv │ │ ├── public_relations_val.csv │ │ ├── college_biology_val.csv │ │ ├── college_computer_science_val.csv │ │ ├── astronomy_val.csv │ │ ├── logical_fallacies_val.csv │ │ ├── econometrics_val.csv │ │ ├── virology_val.csv │ │ ├── high_school_mathematics_val.csv │ │ ├── formal_logic_val.csv │ │ ├── clinical_knowledge_val.csv │ │ ├── international_law_val.csv │ │ └── high_school_physics_val.csv └── LongGenBench_GSM8K_prompt │ └── LongGenBench_prompt.txt ├── run_longgenbench_GSM8K.sh └── README.md /requirements.txt: -------------------------------------------------------------------------------- 1 | openai 2 | datasets -------------------------------------------------------------------------------- /figs/logo.webp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dominic789654/LongGenBench/HEAD/figs/logo.webp -------------------------------------------------------------------------------- /figs/logo_1.webp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dominic789654/LongGenBench/HEAD/figs/logo_1.webp -------------------------------------------------------------------------------- /figs/main-fig.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dominic789654/LongGenBench/HEAD/figs/main-fig.png -------------------------------------------------------------------------------- /run_longgenbench_MMLU.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Set default values 4 | K=20 5 | OUTPUT_PATH="./outputs/LongGenBench_MMLU/LongGenBench_MMLU_demo" 6 | API_KEY="" 7 | MODEL="gpt-4o-mini" 8 | API_ENDPOINT="" 9 | 10 | # Run the Python script with the specified arguments 11 | python longgenbench_MMLU_openai.py \ 12 | --k $K \ 13 | --output_path "$OUTPUT_PATH" \ 14 | --api_key "$API_KEY" \ 15 | --model "$MODEL" \ 16 | --api_endpoint "$API_ENDPOINT" -------------------------------------------------------------------------------- /data/MMLU/data/dev/world_religions_dev.csv: -------------------------------------------------------------------------------- 1 | What is the sign of the covenant for Jewish males?,The rainbow,Circumcision,A son,Bar mitzvah,B 2 | What is the Second Gem in Buddhism?,The Dharma,The Sangha,The Buddha,The Bodhisattva,A 3 | " In which dynasty was the ""Mandate of Heaven"" developed to legitimatize the new rulers?",Shang,Zhou,Han,Xia,B 4 | Which Japanese government promoted a kind of national cult based on the emperor and his associations with kami?,Honen,Tanaka,Tokugawa,Meiji,D 5 | How can the Upanishads be characterized?,Ritual texts,Philosophical texts,Hymns,Origin stories,B -------------------------------------------------------------------------------- /data/MMLU/data/dev/miscellaneous_dev.csv: -------------------------------------------------------------------------------- 1 | How many axles does a standard automobile have?,one,two,four,eight,B 2 | What place is named in the title of the 1979 live album by rock legends Cheap Trick?,Budapest,Budokan,Bhutan,Britain,B 3 | Who is the shortest man to ever win an NBA slam dunk competition?,Anthony 'Spud' Webb,Michael 'Air' Jordan,Tyrone 'Muggsy' Bogues,Julius 'Dr J' Erving,A 4 | What is produced during photosynthesis?,hydrogen,nylon,oxygen,light,C 5 | Which of these songs was a Top 10 hit for the rock band The Police?,'Radio Ga-Ga','Ob-la-di Ob-la-da','De Do Do Do De Da Da Da','In-a-Gadda-Da-Vida',C 6 | -------------------------------------------------------------------------------- /run_longgenbench_GSM8K.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # # Set default values 4 | K=35 5 | PROMPT_PATH="./data/LongGenBench_GSM8K_prompt/LongGenBench_prompt_json.txt" 6 | OUTPUT_PATH="./outputs/LongGenBench_GSM8K/LongGenBench_GSM8K_demo.txt" 7 | QUESTION_LIMIT=700 8 | API_KEY="" 9 | MODEL="gpt-4o-mini" 10 | API_ENDPOINT="" 11 | 12 | # Run the Python script with the specified arguments 13 | python longgenbench_GSM8K_openai.py \ 14 | --k $K \ 15 | --prompt_path "$PROMPT_PATH" \ 16 | --output_path "$OUTPUT_PATH" \ 17 | --question_limit $QUESTION_LIMIT \ 18 | --api_key "$API_KEY" \ 19 | --model "$MODEL" \ 20 | --api_endpoint "$API_ENDPOINT" 21 | 22 | 23 | 24 | -------------------------------------------------------------------------------- /data/MMLU/data/dev/abstract_algebra_dev.csv: -------------------------------------------------------------------------------- 1 | Find all c in Z_3 such that Z_3[x]/(x^2 + c) is a field.,0,1,2,3,B 2 | "Statement 1 | If aH is an element of a factor group, then |aH| divides |a|. Statement 2 | If H and K are subgroups of G then HK is a subgroup of G.","True, True","False, False","True, False","False, True",B 3 | Statement 1 | Every element of a group generates a cyclic subgroup of the group. Statement 2 | The symmetric group S_10 has 10 elements.,"True, True","False, False","True, False","False, True",C 4 | Statement 1| Every function from a finite set onto itself must be one to one. Statement 2 | Every subgroup of an abelian group is abelian.,"True, True","False, False","True, False","False, True",A 5 | Find the characteristic of the ring 2Z.,0,3,12,30,A 6 | -------------------------------------------------------------------------------- /data/MMLU/data/dev/management_dev.csv: -------------------------------------------------------------------------------- 1 | What are the two main dimensions of the Ohio Studies into leadership?,Starting position and end position,Initial environment and changed environment,Organisational structure and conditioning,Initiating structure and considerations,D 2 | Hygiene factors are associated with which writer?,Frederick Hertzberg,D.C. McClelland,Abraham Maslow,Douglas McGregor,A 3 | Which element of the cultural web forms regalia?,Symbols,Rituals and routines,Power structures,Control systems,A 4 | What characteristic is not a key feature of the 'open systems' model of management?,Morale,Innovation,Growth resource,Adaptation,A 5 | How can organisational structures that are characterised by democratic and inclusive styles of management be described?,Hierarchical,Bureaucratic,Flat,Functional,C 6 | -------------------------------------------------------------------------------- /data/MMLU/data/dev/conceptual_physics_dev.csv: -------------------------------------------------------------------------------- 1 | "Compared with the mass of a uranium atom undergoing fission, the combined masses of the products after fission are",less,more,the same,zero,A 2 | Things that are equivalent according to the equivalence principle are,space and time.,a traveling twin and a stay-at-home twin.,gravity and acceleration.,mass and energy.,C 3 | Colors in a soap bubble result from light,converted to a different frequency,deflection,interference,polarization,C 4 | A model airplane flies slower when flying into the wind and faster with wind at its back. When launched at right angles to the wind a cross wind its groundspeed compared with flying in still air is,the same,greater,less,either greater or less depending on wind speed,B 5 | Which of these three elements has the most mass per nucleon?,Hydrogen,Iron,Uranium,Same in each,A 6 | -------------------------------------------------------------------------------- /data/MMLU/data/dev/anatomy_dev.csv: -------------------------------------------------------------------------------- 1 | What is the embryological origin of the hyoid bone?,The first pharyngeal arch,The first and second pharyngeal arches,The second pharyngeal arch,The second and third pharyngeal arches,D 2 | Which of these branches of the trigeminal nerve contain somatic motor processes?,The supraorbital nerve,The infraorbital nerve,The mental nerve,None of the above,D 3 | The pleura,have no sensory innervation.,are separated by a 2 mm space.,extend into the neck.,are composed of respiratory epithelium.,C 4 | In Angle's Class II Div 2 occlusion there is,excess overbite of the upper lateral incisors.,negative overjet of the upper central incisors.,excess overjet of the upper lateral incisors.,excess overjet of the upper central incisors.,C 5 | Which of the following is the body cavity that contains the pituitary gland?,Abdominal,Cranial,Pleural,Spinal,B 6 | -------------------------------------------------------------------------------- /data/MMLU/data/dev/electrical_engineering_dev.csv: -------------------------------------------------------------------------------- 1 | "In an SR latch built from NOR gates, which condition is not allowed","S=0, R=0","S=0, R=1","S=1, R=0","S=1, R=1",D 2 | "In a 2 pole lap winding dc machine , the resistance of one conductor is 2Ω and total number of conductors is 100. Find the total resistance",200Ω,100Ω,50Ω,10Ω,C 3 | "The coil of a moving coil meter has 100 turns, is 40 mm long and 30 mm wide. The control torque is 240*10-6 N-m on full scale. If magnetic flux density is 1Wb/m2 range of meter is",1 mA.,2 mA.,3 mA.,4 mA.,B 4 | "Two long parallel conductors carry 100 A. If the conductors are separated by 20 mm, the force per meter of length of each conductor will be",100 N.,0.1 N.,1 N.,0.01 N.,B 5 | A point pole has a strength of 4π * 10^-4 weber. The force in newtons on a point pole of 4π * 1.5 * 10^-4 weber placed at a distance of 10 cm from it will be,15 N.,20 N.,7.5 N.,3.75 N.,A 6 | -------------------------------------------------------------------------------- /data/MMLU/data/dev/philosophy_dev.csv: -------------------------------------------------------------------------------- 1 | Psychological egoism is:,an ethical theory about how we ought to behave.,a generalization concerning the way people tend to behave.,a claim about human nature and the ways people are capable of behaving.,none of the above.,C 2 | "According to Moore’s “ideal utilitarianism,” the right action is the one that brings about the greatest amount of:",pleasure.,happiness.,good.,virtue.,C 3 | "According to d'Holbach, people always act according to _____.",free choices,dictates of the soul,necessary natural laws,undetermined will,C 4 | "Before Tolstoy's Christian conversion, what was his perspective on the meaning of life?",optimist,satisfied,nominally religious,pessimist,D 5 | "The study of reality in the broadest sense, an inquiry into the elemental nature of the universe and the things in it, is known as _____.",metaphysics,epistemology,quantum physics,axiology,A 6 | -------------------------------------------------------------------------------- /data/MMLU/data/dev/human_aging_dev.csv: -------------------------------------------------------------------------------- 1 | "Which of the following persons is more likely to remain at home alone, as of 2019?",An Asian man or woman,A Hispanic man,An African American woman,A white man or woman,C 2 | The finding that adults tend to remember events from their adolescence better than from other periods in their lives is referred to as the,Adolescence advantage,Reminiscence bump,Memorial memorial,Quadratic retrieval spike,B 3 | "When older adults move to a new state after retirement, which of the following is the more likely destination?",Texas,California,Hawaii,Vermont,A 4 | Which element in tobacco smoke is responsible for cancers?,Nicotine,Tar,Carbon monoxide,Smoke particles,B 5 | "All other things being equal, which of the following persons is more likely to show osteoporosis?",An older Hispanic American woman,An older African American woman,An older Asian American woman,An older Native American woman,C -------------------------------------------------------------------------------- /data/MMLU/data/dev/human_sexuality_dev.csv: -------------------------------------------------------------------------------- 1 | Morning sickness is typically a problem:,during the first trimester,during the second trimester,during the third trimester,all through the pregnancy,A 2 | "A woman who knows she has active herpes and untreated syphilis but continues to have sex without informing her partners of her condition has, in psychoanalytic terms:",a strong ego,a weak superego,a weak id,a strong superego,B 3 | Women's ability to have multiple orgasms is primarily due to:,the fact that they do not have a refractory period.,the response of the inner layers of the vagina.,having alternating orgasms in different locations.,the G-Spot.,A 4 | The nature of homosexual activities that occur during preadolescence include all but which of the following?,sexual intercourse,circle jerks,exhibitionism,touching each other's genitals,A 5 | The most common disorder among men who seek sexual therapy is:,premature ejaculation,inhibited ejaculation,erectile disorder,ejaculatory disorder,C 6 | -------------------------------------------------------------------------------- /data/MMLU/data/dev/virology_dev.csv: -------------------------------------------------------------------------------- 1 | Why are parvoviruses a highly impactful parasite?,Because they have no nucleic acid,They require a helper virus,Only replicate in dividing cells,Can integrate into host chromosomes,A 2 | Which of the following is a morphological characteristic of the paramyxoviruses.,Fragile viruses often visualised with RNA spewing from the inside,Elongate viruses,Icosahedral viruses with envelope,Very large viruses,A 3 | A key factor facilitating the application of nested case-control studies from the MACS was:,Data collection,Establishment of a repository of biologic specimens,Participant interest,Administration of the questionnaire by staff,B 4 | The most important goal of a behavioral intervention is:,Change in behavior,Comprehensive coverage,Effective use of behavioral theory,Sustained behavior change,D 5 | The median survival time to AIDS and death was established by following:,Seroprevalent HIV-infected individuals,Seronegatives,Seroconverters,High-risk seronegatives,C -------------------------------------------------------------------------------- /data/MMLU/data/dev/medical_genetics_dev.csv: -------------------------------------------------------------------------------- 1 | Large triplet repeat expansions can be detected by:,polymerase chain reaction.,single strand conformational polymorphism analysis.,Southern blotting.,Western blotting.,C 2 | DNA ligase is,an enzyme that joins fragments in normal DNA replication,an enzyme of bacterial origin which cuts DNA at defined base sequences,an enzyme that facilitates transcription of specific genes,an enzyme which limits the level to which a particular nutrient reaches,A 3 | A gene showing codominance,has both alleles independently expressed in the heterozygote,has one allele dominant to the other,has alleles tightly linked on the same chromosome,has alleles expressed at the same time in development,A 4 | Which of the following conditions does not show multifactorial inheritance?,Pyloric stenosis,Schizophrenia,Spina bifida (neural tube defects),Marfan syndrome,D 5 | The stage of meiosis in which chromosomes pair and cross over is:,prophase I,metaphase I,prophase II,metaphase II,A 6 | -------------------------------------------------------------------------------- /data/MMLU/data/dev/computer_security_dev.csv: -------------------------------------------------------------------------------- 1 | SHA-1 has a message digest of,160 bits,512 bits,628 bits,820 bits,A 2 | "_____________ can modify data on your system – so that your system doesn’t run correctly or you can no longer access specific data, or it may even ask for ransom in order to give your access.",IM – Trojans,Backdoor Trojans,Trojan-Downloader,Ransom Trojan,D 3 | What is ethical hacking?,"""Hacking"" ethics so they justify unintended selfish behavior","Hacking systems (e.g., during penetration testing) to expose vulnerabilities so they can be fixed, rather than exploited",Hacking into systems run by those whose ethics you disagree with,"A slang term for rapid software development, e.g., as part of hackathons",B 4 | Exploitation of the Heartbleed bug permits,overwriting cryptographic keys in memory,a kind of code injection,a read outside bounds of a buffer,a format string attack,C 5 | The ____________ is anything which your search engine cannot search.,Haunted web,World Wide Web,Surface web,Deep Web,D 6 | -------------------------------------------------------------------------------- /data/MMLU/data/dev/clinical_knowledge_dev.csv: -------------------------------------------------------------------------------- 1 | The energy for all forms of muscle contraction is provided by:,ATP.,ADP.,phosphocreatine.,oxidative phosphorylation.,A 2 | What is the difference between a male and a female catheter?,Male and female catheters are different colours.,Male catheters are longer than female catheters.,Male catheters are bigger than female catheters.,Female catheters are longer than male catheters.,B 3 | In the assessment of the hand function which of the following is true?,Abduction of the thumb is supplied by spinal root T2,Opposition of the thumb by opponens policis is supplied by spinal root T1,Finger adduction is supplied by the median nerve,Finger abduction is mediated by the palmar interossei,B 4 | "How many attempts should you make to cannulate a patient before passing the job on to a senior colleague, according to the medical knowledge of 2020?",4,3,2,1,C 5 | Glycolysis is the name given to the pathway involving the conversion of:,glycogen to glucose-1-phosphate.,glycogen or glucose to fructose.,glycogen or glucose to pyruvate or lactate.,glycogen or glucose to pyruvate or acetyl CoA.,C -------------------------------------------------------------------------------- /data/MMLU/data/dev/high_school_chemistry_dev.csv: -------------------------------------------------------------------------------- 1 | Which of the following is considered an acid anhydride?,HCl,H2SO3,SO2,Al(NO3)3,C 2 | Which of the following is expected to be a polar molecule?,PCl4F,BF3,CO2,Si(CH3)4,A 3 | "From the solubility rules, which of the following is true?","All chlorides, bromides, and iodides are soluble",All sulfates are soluble,All hydroxides are soluble,All ammonium-containing compounds are soluble,D 4 | "A new compound is synthesized and found to be a monoprotic acid with a molar mass of 248 g/mol. When 0.0050 mol of this acid are dissolved in 0.500 L of water, the pH is measured as 3.89. What is the pKa of this acid?",3.89,7.78,5.78,2.33,C 5 | "A solution contains 2.00 mole of acetic acid, CH3COOH, and 1.00 mole of calcium acetate, Ca(CH3COO)2. The solution is able to resist the addition of a small amount of strong acid or strong base with only minor changes in the pH of the solution. Larger quantities of strong acid or strong base can cause a significant change in pH. How many moles of nitric acid, HNO3, may be added before the pH begins to change significantly?",0.500 mole,1.00 mole,2.00 mole,3.00 mole,C 6 | -------------------------------------------------------------------------------- /data/MMLU/data/dev/global_facts_dev.csv: -------------------------------------------------------------------------------- 1 | Which of the following pairs of statements are both true (as of 2019)?,People tend to be optimistic about their own future and the future of their nation or the world.,People tend to be optimistic about their own future but pessimistic about the future of their nation or the world.,People tend to be pessimistic about their own future but optimistic about the future of their nation or the world.,People tend to be pessimistic about their own future and the future of their nation or the world.,B 2 | "As of 2019, about what percentage of Americans agree that the state is run for the benefit of all the people?",31%,46%,61%,76%,B 3 | "As of 2015, since 1990 forests have ____ in Europe and have ____ in Africa and the Americas.","increased, increased","increased, decreased","decreased, increased","decreased, decreased",B 4 | "As of 2019, about what percentage of Russians say it is very important to have free media in our country without government/state censorship?",38%,53%,68%,83%,A 5 | "As of 2017, how many of the world’s 1-year-old children today have been vaccinated against some disease? *",80%,60%,40%,20%,A -------------------------------------------------------------------------------- /data/MMLU/data/dev/jurisprudence_dev.csv: -------------------------------------------------------------------------------- 1 | Which position does Rawls claim is the least likely to be adopted by the POP (people in the original position)?,The POP would choose equality above liberty.,The POP would opt for the 'maximin' strategy.,The POP would opt for the 'difference principle'.,The POP would reject the 'system of natural liberty.',A 2 | Functions of the law include all but which of the following?,maximizing individual freedom,providing a basis for compromise,keeping the peace,promoting the principles of the free enterprise system,D 3 | Which word best summarizes Weber's explanation of the development of formally rational law?,Authority.,Charisma.,Co-operation.,Capitalism.,D 4 | "The ________ School of jurisprudence postulates that the law is based on what is ""correct.""",Natural Law,Analytical,Historical,Sociological,A 5 | "Iverson Jewelers wrote a letter to Miller, 'We have received an exceptionally fine self winding Rolox watch which we will sell to you at a very favorable price.'",The letter is an offer to sell,A valid offer cannot be made by letter.,The letter contains a valid offer which will terminate within a reasonable time.,The letter lacks one of the essential elements of an offer.,D 6 | -------------------------------------------------------------------------------- /data/MMLU/data/dev/high_school_mathematics_dev.csv: -------------------------------------------------------------------------------- 1 | "Joe was in charge of lights for a dance. The red light blinks every two seconds, the yellow light every three seconds, and the blue light every five seconds. If we include the very beginning and very end of the dance, how many times during a seven minute dance will all the lights come on at the same time? (Assume that all three lights blink simultaneously at the very beginning of the dance.)",3,15,6,5,B 2 | "Five thousand dollars compounded annually at an $x\%$ interest rate takes six years to double. At the same interest rate, how many years will it take $\$300$ to grow to $\$9600$?",12,1,30,5,C 3 | "The variable $x$ varies directly as the square of $y$, and $y$ varies directly as the cube of $z$. If $x$ equals $-16$ when $z$ equals 2, what is the value of $x$ when $z$ equals $\frac{1}{2}$?",-1,16,-\frac{1}{256},\frac{1}{16},C 4 | Simplify and write the result with a rational denominator: $$\sqrt{\sqrt[3]{\sqrt{\frac{1}{729}}}}$$,\frac{3\sqrt{3}}{3},\frac{1}{3},\sqrt{3},\frac{\sqrt{3}}{3},D 5 | "Ten students take a biology test and receive the following scores: 45, 55, 50, 70, 65, 80, 40, 90, 70, 85. What is the mean of the students’ test scores?",55,60,62,65,D 6 | -------------------------------------------------------------------------------- /data/MMLU/data/dev/high_school_microeconomics_dev.csv: -------------------------------------------------------------------------------- 1 | "In a competitive labor market for housepainters, which of the following would increase the demand for housepainters?",An effective minimum wage imposed on this labor market.,An increase in the price of gallons of paint.,An increase in the construction of new houses.,An increase in the price of mechanical painters so long as the output effect exceeds the substitution effect.,C 2 | "If the government subsidizes producers in a perfectly competitive market, then",the demand for the product will increase,the demand for the product will decrease,the consumer surplus will increase,the consumer surplus will decrease,C 3 | The concentration ratio for a monopoly is,0,5,10,100,D 4 | Which of the following is true of a price floor?,The price floor shifts the demand curve to the left.,An effective floor creates a shortage of the good.,The price floor shifts the supply curve of the good to the right.,"To be an effective floor, it must be set above the equilibrium price.",D 5 | Which of the following is necessarily a characteristic of oligopoly?,Free entry into and exit from the market,A few large producers,One producer of a good with no close substitutes,A homogenous product,B 6 | -------------------------------------------------------------------------------- /data/MMLU/data/dev/college_chemistry_dev.csv: -------------------------------------------------------------------------------- 1 | Which of the following statements about the lanthanide elements is NOT true?,The most common oxidation state for the lanthanide elements is +3.,Lanthanide complexes often have high coordination numbers (> 6).,All of the lanthanide elements react with aqueous acid to liberate hydrogen.,The atomic radii of the lanthanide elements increase across the period from La to Lu.,D 2 | A 0.217 g sample of HgO (molar mass = 217 g) reacts with excess iodide ions according to the reaction shown above. Titration of the resulting solution requires how many mL of 0.10 M HCl to reach equivalence point?,1.0 mL,10 mL,20 mL,50 mL,C 3 | "Predict the number of lines in the EPR spectrum of a solution of 13C-labelled methyl radical (13CH3•), assuming the lines do not overlap.",4,3,6,24,A 4 | "3 Cl−(aq) + 4 CrO_4^2−(aq) + 23 H+(aq) → 3 HClO2(aq) + 4 Cr3+(aq) + 10 H2O(l). In the reaction shown above, Cl−(aq) behaves as",an acid,a base,a catalyst,a reducing agent,D 5 | "Which of the following lists the hydrides of group-14 elements in order of thermal stability, from lowest to highest?",PbH4 < SnH4 < GeH4 < SiH4 < CH4,PbH4 < SnH4 < CH4 < GeH4 < SiH4,CH4 < SiH4 < GeH4 < SnH4 < PbH4,CH4 < PbH4 < GeH4 < SnH4 < SiH4,A 6 | -------------------------------------------------------------------------------- /data/MMLU/data/dev/high_school_macroeconomics_dev.csv: -------------------------------------------------------------------------------- 1 | Which of the following is not included in the U.S. GDP?,The U.S. military opens a new base in a foreign country with 1000 U.S. personnel.,Japanese consumers buy thousands of CDs produced in the United States.,An American pop singer performs a sold-out concert in Paris.,A French theatrical production tours dozens of American cities.,C 2 | The short-run Phillips curve indicates a,direct relation between unemployment and inflation,direct relation between price and quantity demanded,inverse relation between price and quantity demanded,inverse relation between unemployment and inflation,D 3 | A federal deficit occurs when,exports exceed imports.,imports exceed exports.,federal tax collections exceed spending.,federal spending exceeds federal tax revenues.,D 4 | Holding all else equal which of the following monetary policies would be used to boost U.S. exports?,Increasing the discount rate,Increasing the reserve ratio,Buying government securities,Lowering tariffs,C 5 | Which of the following policies best describes supply-side fiscal policy?,An increase in the money supply,Increased government spending,Lower taxes on research and development of new technology,Higher taxes on household income,C 6 | -------------------------------------------------------------------------------- /data/MMLU/data/dev/high_school_geography_dev.csv: -------------------------------------------------------------------------------- 1 | The rate of natural increase of a population is found by subtracting the,crude death rate from the crude birth date.,crude birth rate from the crude death rate.,doubling time from the crude birth rate.,fertility rate from the crude death rate.,A 2 | "During the third stage of the demographic transition model, which of the following is true?",Birth rates increase and population growth rate is less rapid.,Birth rates decline and population growth rate is less rapid.,Birth rates increase and population growth rate increases.,Birth rates decrease and population growth rate increases.,B 3 | Which of the following statements is NOT accurate regarding the services provided by local governments in the United States?,Duplication of efforts occurs often.,Social problems of the central city spill over into the surrounding residential suburbs.,Inefficiency in providing services occurs often.,One neighborhood's efforts to reduce pollution are always supported by neighboring communities.,D 4 | The practice of hiring a foreign third-party service provider to run an operation is called,outsourcing.,offshoring.,maquiladoras.,locational interdependence.,B 5 | Which one of the following items is an example of nonmaterial culture?,Dove soap,Dove candy bar,Dove symbol,A dove (bird),C 6 | -------------------------------------------------------------------------------- /data/MMLU/data/dev/college_physics_dev.csv: -------------------------------------------------------------------------------- 1 | A refracting telescope consists of two converging lenses separated by 100 cm. The eye-piece lens has a focal length of 20 cm. The angular magnification of the telescope is,4,5,6,20,A 2 | For which of the following thermodynamic processes is the increase in the internal energy of an ideal gas equal to the heat added to the gas?,Constant temperature,Constant volume,Constant pressure,Adiabatic,B 3 | "One end of a Nichrome wire of length 2L and cross-sectional area A is attached to an end of another Nichrome wire of length L and cross- sectional area 2A. If the free end of the longer wire is at an electric potential of 8.0 volts, and the free end of the shorter wire is at an electric potential of 1.0 volt, the potential at the junction of the two wires is most nearly equal to",2.4 V,3.3 V,4.5 V,5.7 V,A 4 | A refracting telescope consists of two converging lenses separated by 100 cm. The eye-piece lens has a focal length of 20 cm. The angular magnification of the telescope is,4,5,6,20,A 5 | "The muon decays with a characteristic lifetime of about 10^-6 second into an electron, a muon neutrino, and an electron antineutrino. The muon is forbidden from decaying into an electron and just a single neutrino by the law of conservation of",charge,mass,energy and momentum,lepton number,D 6 | -------------------------------------------------------------------------------- /data/MMLU/data/dev/elementary_mathematics_dev.csv: -------------------------------------------------------------------------------- 1 | "The population of the city where Michelle was born is 145,826. What is the value of the 5 in the number 145,826?",5 thousands,5 hundreds,5 tens,5 ones,A 2 | "Olivia used the rule ""Add 11"" to create the number pattern shown below. 10, 21, 32, 43, 54 Which statement about the number pattern is true?",The 10th number in the pattern will be an even number.,The number pattern will never have two even numbers next to each other.,The next two numbers in the pattern will be an even number then an odd number.,If the number pattern started with an odd number then the pattern would have only odd numbers in it.,B 3 | A total of 30 players will play basketball at a park. There will be exactly 5 players on each team. Which statement correctly explains how to find the number of teams needed?,Add 5 to 30 to find 35 teams.,Divide 30 by 5 to find 6 teams.,Multiply 30 and 5 to find 150 teams.,Subtract 5 from 30 to find 25 teams.,B 4 | A store sells 107 different colors of paint. They have 25 cans of each color in storage. The number of cans of paint the store has in storage can be found using the expression below. 107 × 25. How many cans of paint does the store have in storage?,749,"2,675","2,945","4,250",B 5 | Which expression is equivalent to 5 x 9?,(5 x 4) x (6 x 5),(5 x 5) + (5 x 4),(5 x 5) + (5 x 9),(5 x 9) x (6 x 9),B 6 | -------------------------------------------------------------------------------- /data/MMLU/data/dev/marketing_dev.csv: -------------------------------------------------------------------------------- 1 | _____________ is a natural outcome when combining demographic and geographic variables.,Geodemographics,Product differentiation.,ANSOFF matrix.,Brand management.,A 2 | "In an organization, the group of people tasked with buying decisions is referred to as the _______________.",Outsourcing unit.,Procurement centre.,Chief executive unit.,Decision-making unit.,D 3 | Which of the following is an assumption in Maslow's hierarchy of needs?,Needs are dependent on culture and also on social class.,Lower-level needs must be at least partially satisfied before higher needs can affect behaviour.,Needs are not prioritized or arranged in any particular order.,"Satisfied needs are motivators, and new needs emerge when current needs remain unmet.",B 4 | The single group within society that is most vulnerable to reference group influence is:,The older consumer who feels somewhat left out of things.,"The married women, many of whom feel a need for stability in their lives.",New immigrants who really want to assimilate into their new culture.,"Children, who base most of their buying decisions on outside influences.",D 5 | "Although the content and quality can be as controlled as direct mail, response rates of this medium are lower because of the lack of a personal address mechanism. This media format is known as:",Care lines.,Direct mail.,Inserts.,Door to door.,D 6 | -------------------------------------------------------------------------------- /data/MMLU/data/dev/high_school_physics_dev.csv: -------------------------------------------------------------------------------- 1 | Which of the following conditions will ensure that angular momentum is conserved? I. Conservation of linear momentum II. Zero net external force III. Zero net external torque,I and II only,I and III only,II and III only,III only,D 2 | "A pipe full of air is closed at one end. A standing wave is produced in the pipe, causing the pipe to sound a note. Which of the following is a correct statement about the wave’s properties at the closed end of the pipe?","The pressure is at a node, but the particle displacement is at an antinode.","The pressure is at an antinode, but the particle displacement is at a node.",The pressure and the particle displacement are both at nodes.,The pressure and the particle displacement are both at antinodes.,B 3 | "A photocell of work function ϕ = 2eV is connected to a resistor in series. Light of frequency f = 1 × 10^15 Hz hits a metal plate of the photocell. If the power of the light is P = 100 W, what is the current through the resistor?",2:00 AM,6:00 AM,12:00 AM,24 A,D 4 | "A microwave oven is connected to an outlet, 120 V, and draws a current of 2 amps. At what rate is energy being used by the microwave oven?",10 W,30 W,60 W,240 W,D 5 | "A point charge, Q = +1 mC, is fixed at the origin. How much work is required to move a charge, Q = +8 µC, from the point (0, 4 meters) to the point (3 meters, 0)?",3.5 J,6.0 J,22.5 J,40 J,B -------------------------------------------------------------------------------- /data/MMLU/data/dev/public_relations_dev.csv: -------------------------------------------------------------------------------- 1 | What should a public relations media practitioner do if she does not know the answer to a reporter's question?,Give the reporter other information she is certain is correct.,Say that the information is 'off the record' and will be disseminated later.,Say 'I don't know' and promise to provide the information later.,"Say 'no comment,' rather than appear uninformed.",C 2 | "In issues management, what is the most proactive approach to addressing negative or misleading information posted online about your organization?",Buy domain names that could be used by opposition groups.,Post anonymous comments on blogs to combat this information.,Prepare a news release that discredits the inaccurate information.,Make policy changes to address complaints highlighted on these sites.,D 3 | Which of these statements is true of the Vatican in 2010 at the time of the accusations of child abuse cover-ups?,There was a coordinated media response.,Consistent messages were communicated.,Criticisms were taken as attacks on the Catholic Church.,The credibility of the Vatican was upheld.,C 4 | At which stage in the planning process would a situation analysis be carried out?,Defining the program,Planning the program,Taking action and implementing ideas,Evaluation of the program,A 5 | Earth Hour was a campaign launched by which organization?,Greenpeace,The UN,Oxfam,World Wildlife Fund,D 6 | -------------------------------------------------------------------------------- /data/MMLU/data/dev/college_mathematics_dev.csv: -------------------------------------------------------------------------------- 1 | "Let V be the set of all real polynomials p(x). Let transformations T, S be defined on V by T:p(x) -> xp(x) and S:p(x) -> p'(x) = d/dx p(x), and interpret (ST)(p(x)) as S(T(p(x))). Which of the following is true?",ST = 0,ST = T,ST = TS,ST - TS is the identity map of V onto itself.,D 2 | "A tank initially contains a salt solution of 3 grams of salt dissolved in 100 liters of water. A salt solution containing 0.02 grams of salt per liter of water is sprayed into the tank at a rate of 4 liters per minute. The sprayed solution is continually mixed with the salt solution in the tank, and the mixture flows out of the tank at a rate of 4 liters per minute. If the mixing is instantaneous, how many grams of salt are in the tank after 100 minutes have elapsed?",2,2 - e^-2,2 + e^-2,2 + e^-4,D 3 | "Let A be a real 2x2 matrix. Which of the following statements must be true? 4 | I. All of the entries of A^2 are nonnegative. 5 | II. The determinant of A^2 is nonnegative. 6 | III. If A has two distinct eigenvalues, then A^2 has two distinct eigenvalues.",I only,II only,III only,II and III only,B 7 | "Suppose that f(1 + x) = f(x) for all real x. If f is a polynomial and f(5) = 11, then f(15/2)",-11,0,11,33/2,C 8 | "Let A be the set of all ordered pairs of integers (m, n) such that 7m + 12n = 22. What is the greatest negative number in the set B = {m + n : (m, n) \in A}?",-5,-4,-3,-2,B 9 | -------------------------------------------------------------------------------- /data/MMLU/data/dev/college_biology_dev.csv: -------------------------------------------------------------------------------- 1 | Which of the following represents an accurate statement concerning arthropods?,They possess an exoskeleton composed primarily of peptidoglycan.,They possess an open circulatory system with a dorsal heart.,They are members of a biologically unsuccessful phylum incapable of exploiting diverse habitats and nutrition sources.,"They lack paired, jointed appendages.",B 2 | "In a given population, 1 out of every 400 people has a cancer caused by a completely recessive allele, b. Assuming the population is in Hardy-Weinberg equilibrium, which of the following is the expected proportion of individuals who carry the b allele but are not expected to develop the cancer?",1/400,19/400,20/400,38/400,D 3 | "The presence of homologous structures in two different organisms, such as the humerus in the front limb of a human and a bird, indicates that",the human and bird are polyphyletic species,a human's and bird's evolution is convergent,the human and bird belong to a clade,the human and bird developed by analogy,C 4 | "According to the pressure-flow model of movement of phloem contents, photosynthate movement from source to sink is driven by",an ATP-dependent pressure-flow pump,a water-pressure potential gradient,transpiration,apoplastic diffusion,B 5 | Which of the following contain DNA sequences required for the segregation of chromosomes in mitosis and meiosis?,Telomeres,Centromeres,Nucleosomes,Spliceosomes,B 6 | -------------------------------------------------------------------------------- /data/MMLU/data/dev/logical_fallacies_dev.csv: -------------------------------------------------------------------------------- 1 | "If someone attacks the character of an opposing arguer, instead of responding to that opponent's arguments, the first person has probably committed which of the following fallacies?",tu quoque,horse laugh,argument against the person,ignoratio elenchi,C 2 | The complex question fallacy consists of,arguing something is inferior just because it doesn't do something it was never intended to do.,including more than one claim in the proposition and treating proof for one claim as proof for all the claims.,"drawing a conclusion before examining the evidence, and only considering evidence that supports that conclusion.","asking a question that includes either an unproven assumption or more than one question, thus making a straightforward yes or no answer meaningless.",D 3 | Which of the following is true of a valid categorical syllogism?,The minor premise must deny the antecedent,The major premise must affirm the consequent,The middle term must be used in at least one premise in a universal or unqualified sense,All of the above,C 4 | Arguing that what is true of the parts must be true of the whole is the fallacy of...,Division,Composition,Appeal to the person,Appeal to ignorance,B 5 | "When an arguer causes confusion during refutation because of real or feigned lack of an ability to engage in refutation, that arguer may have committed the fallacy of",poor sportsmanship,appeal to compassion,argument against the person,ignorance of refutation,D 6 | -------------------------------------------------------------------------------- /data/MMLU/data/dev/sociology_dev.csv: -------------------------------------------------------------------------------- 1 | Which of the following did the post-war welfare state of 1948 not aim to provide:,free health care and education for all,a minimum wage,full employment,universal welfare,B 2 | What does Berger (1963) describe as a metaphor for social reality?,a fairground ride,a circus,a puppet theatre,a ballet,C 3 | The shift from 'civil religion' to 'common religion' means that:,the increasing bureaucracy of the state has made religion only a marginal part of our lives,"despite the weakening of traditional authority, our everyday lives and 'common sense' remain shaped by religious beliefs and values","religious participation in collective worship may have declined, but people still practise their faiths in private","people are much more likely to discuss their religious beliefs in public, informal settings",B 4 | The term 'hegemony' refers to:,the tendency for the working class not to realize their own interests,"a dominant ideology that legitimates economic, political and cultural power",a form of dual consciousness based on ideology and everyday experiences,a mode of payment given for outstanding topiary,B 5 | Which of the following is not a problem associated with official statistics on strike action?,most strikes go unnoticed by employers and the mass media,not all industrial disputes will be reported by the employer,the definition of strikes excludes those that involve fewer than ten workers or last less than one day,it is hard to compare strikes that were measured in different ways,A 6 | -------------------------------------------------------------------------------- /data/MMLU/data/dev/us_foreign_policy_dev.csv: -------------------------------------------------------------------------------- 1 | How did the 2008 financial crisis affect America's international reputation?,It damaged support for the US model of political economy and capitalism,It created anger at the United States for exaggerating the crisis,It increased support for American global leadership under President Obama,It reduced global use of the US dollar,A 2 | How did NSC-68 change U.S. strategy?,It globalized containment.,It militarized containment.,It called for the development of the hydrogen bomb.,All of the above,D 3 | The realm of policy decisions concerned primarily with relations between the United States and the rest of the world is known as,terrorism policy.,economic policy.,foreign policy.,international policy.,C 4 | How do Defensive Realism and Offensive Realism differ in their explanation of state behaviour?,Defensive realists place greater emphasis on the role of international institutions,Defensive realists place less emphasis on geographical factors,Offensive realists give more priority to the national interest than Defensive realists.,"Defensive realists believe states are security maximizers, while Offensive realists believe states to be power maximizers",D 5 | How did Donald Trump attack globalization in the 2016 campaign?,Globalization had made men like him too rich,"Globalization only benefited certain American states, such as New York","Liberal elites had encouraged globalization, while 'ordinary Americans' lost jobs because of it",Globalization encouraged damaging trade wars,C 6 | -------------------------------------------------------------------------------- /data/MMLU/data/dev/high_school_biology_dev.csv: -------------------------------------------------------------------------------- 1 | Which of the following is not a way to form recombinant DNA?,Translation,Conjugation,Specialized transduction,Transformation,A 2 | A mutation in a bacterial enzyme changed a previously polar amino acid into a nonpolar amino acid. This amino acid was located at a site distant from the enzyme’s active site. How might this mutation alter the enzyme’s substrate specificity?,By changing the enzyme’s pH optimum,By changing the enzyme’s location in the cell,By changing the shape of the protein,An amino acid change away from the active site cannot alter the enzyme’s substrate specificity.,C 3 | "In animal cells, which of the following represents the most likely pathway that a secretory protein takes as it is synthesized in a cell?",Plasma membrane–Golgi apparatus–ribosome–secretory vesicle–rough ER,Ribosome–Golgi apparatus–rough ER–secretory vesicle–plasma membrane,Plasma membrane–Golgi apparatus–ribosome–secretory vesicle–rough ER,Ribosome–rough ER–Golgi apparatus–secretory vesicle–plasma membrane,D 4 | Which of the following is not known to be involved in the control of cell division?,Cyclins,Protein kinases,Checkpoints,Fibroblast cells,D 5 | Homologous structures are often cited as evidence for the process of natural selection. All of the following are examples of homologous structures EXCEPT,the wings of a bird and the wings of a bat,the flippers of a whale and the arms of a man,the pectoral fins of a porpoise and the flippers of a seal,the forelegs of an insect and the forelimbs of a dog,D 6 | -------------------------------------------------------------------------------- /data/MMLU/data/dev/college_medicine_dev.csv: -------------------------------------------------------------------------------- 1 | Glucose is transported into the muscle cell:,via protein transporters called GLUT4.,only in the presence of insulin.,via hexokinase.,via monocarbylic acid transporters.,A 2 | Which of the following is not a true statement?,Muscle glycogen is broken down enzymatically to glucose-1-phosphate,Elite endurance runners have a high proportion of Type I fibres in their leg muscles,Liver glycogen is important in the maintenance of the blood glucose concentration,Insulin promotes glucose uptake by all tissues in the body,D 3 | "In a genetic test of a newborn, a rare genetic disorder is found that has X-linked recessive transmission. Which of the following statements is likely true regarding the pedigree of this disorder?",All descendants on the maternal side will have the disorder.,Females will be approximately twice as affected as males in this family.,All daughters of an affected male will be affected.,There will be equal distribution of males and females affected.,C 4 | "A high school science teacher fills a 1 liter bottle with pure nitrogen and seals the lid. The pressure is 1.70 atm, and the room temperature is 25°C. Which two variables will both increase the pressure of the system, if all other variables are held constant?","Increasing temperature, increasing moles of gas","Increasing temperature, increasing volume","Decreasing volume, decreasing temperature","Decreasing moles of gas, increasing volume",A 5 | An expected side effect of creatine supplementation is:,muscle weakness.,gain in body mass.,muscle cramps.,loss of electrolytes.,B 6 | -------------------------------------------------------------------------------- /data/MMLU/data/README.txt: -------------------------------------------------------------------------------- 1 | This file contains the dev, val, and test data for our multitask test. 2 | The dev dataset is for few-shot learning to prime the model, and the test set the source of evaluation questions. 3 | The auxiliary_training data could be used for fine-tuning, something important for models without few-shot capabilities. This auxiliary training data comes from other NLP multiple choice datasets such as MCTest (Richardson et al., 2013), RACE (Lai et al., 2017), ARC (Clark et al., 2018, 2016), and OBQA (Mihaylov et al., 2018). 4 | Unless otherwise specified, the questions are in reference to human knowledge as of January 1st, 2020. In the far future, it may be useful to add to the prompt that the question is written for 2020 audiences. 5 | 6 | -- 7 | 8 | If you find this useful in your research, please consider citing the test and also the ETHICS dataset it draws from: 9 | 10 | @article{hendryckstest2021, 11 | title={Measuring Massive Multitask Language Understanding}, 12 | author={Dan Hendrycks and Collin Burns and Steven Basart and Andy Zou and Mantas Mazeika and Dawn Song and Jacob Steinhardt}, 13 | journal={Proceedings of the International Conference on Learning Representations (ICLR)}, 14 | year={2021} 15 | } 16 | 17 | @article{hendrycks2021ethics, 18 | title={Aligning AI With Shared Human Values}, 19 | author={Dan Hendrycks and Collin Burns and Steven Basart and Andrew Critch and Jerry Li and Dawn Song and Jacob Steinhardt}, 20 | journal={Proceedings of the International Conference on Learning Representations (ICLR)}, 21 | year={2021} 22 | } 23 | -------------------------------------------------------------------------------- /data/MMLU/data/val/management_val.csv: -------------------------------------------------------------------------------- 1 | Simon (1960) is associated with what type of decision making model?,Rational,Classical,Programmed,Administrative,D 2 | At what level of an organisation does a corporate manager operate?,Functional,Operational,Middle level,Top level,D 3 | Which one is not a recognised key skill of management?,Conceptual skills,Human skills,Technical skills,Writing skills,D 4 | Which one of the following is not a characteristic of a bureaucratic organisation?,Authority,Regulations,Command structure,Change,D 5 | To what does authority refer?,The ability to organise people,The power to command and direct,The need for order,The right to change jobs,B 6 | Power distance in national cultures is a characteristic of a model by which writer?,Geert Hofstede,David Yip,Tony Purdie,Michael Porter,A 7 | Of what is an investment decision an example?,Programmed decision,Routine decision,Management decision,Non-programmed decision,D 8 | How do you call a system that allows workers to work part of the week from home?,Piece time,Part-time,Job sharing,Telecommuting,D 9 | What are the three interpersonal roles of managers?,"Figurehead, leader and liaison","Spokesperson, leader, coordinator","Director, coordinator, disseminator","Communicator, organiser, spokesperson",A 10 | "According to Lewin, Lippet and White's 1939 experiment, which form of leadership produced the most work from participants?",Laissez-faire,Democratic,Authoritarian,A mix of laissez-faire and democratic,C 11 | Which of the following motivators is the most basic need in Maslow's hierarchy?,Safety,Belonging,Physiological,Esteem,C 12 | -------------------------------------------------------------------------------- /data/MMLU/data/dev/econometrics_dev.csv: -------------------------------------------------------------------------------- 1 | "For a stationary autoregressive process, shocks will",Eventually die away,Persist indefinitely,Grow exponentially,Never occur,A 2 | "Consider the following AR(1) model with the disturbances having zero mean and unit variance 3 | 4 | yt = 0.2 + 0.4 yt-1 + ut 5 | 6 | The (unconditional) mean of y will be given by",0.2,0.4,0.5,0.33,D 7 | "Suppose that a test statistic has associated with it a p-value of 0.08. Which one of the following statements is true? 8 | 9 | (i) If the size of the test were exactly 8%, we would be indifferent between rejecting and not rejecting the null hypothesis 10 | 11 | (ii) The null would be rejected if a 10% size of test were used 12 | 13 | (iii) The null would not be rejected if a 1% size of test were used 14 | 15 | (iv) The null would be rejected if a 5% size of test were used.",(ii) and (iv) only,(i) and (iii) only,"(i), (ii), and (iii) only","(i), (ii), (iii), and (iv)",C 16 | What would be then consequences for the OLS estimator if heteroscedasticity is present in a regression model but ignored?,It will be biased,It will be inconsistent,It will be inefficient,"All of (a), (b) and (c) will be true.",C 17 | "Suppose now that a researcher wishes to use information criteria to determine the optimal lag length for a VAR. 500 observations are available for the bi-variate VAR, and the values of the determinant of the variance-covariance matrix of residuals are 0.0336, 0.0169, 0.0084, and 0.0062 for 1, 2, 3, and 4 lags respectively. What is the optimal model order according to Akaike's information criterion?",1 lag,2 lags,3 lags,4 lags,C 18 | -------------------------------------------------------------------------------- /data/MMLU/data/dev/formal_logic_dev.csv: -------------------------------------------------------------------------------- 1 | Select the best translation into predicate logic: No people drive on Mars.,~Pd,(∀x)(Px ∨ ~Dx),(∀x)(Px ⊃ ~Dx),~Dp,C 2 | Select the best translation into predicate logic.George borrows Hector's lawnmower. (g: George; h: Hector; l: Hector's lawnmower; Bxyx: x borrows y from z),Blgh,Bhlg,Bglh,Bghl,C 3 | " Select the best English interpretation of the given arguments in predicate logic. 4 | Dm 5 | (∀x)(Wx ⊃ ~Dx) 6 | (∀x)Wx ∨ Ag / (∃x)Ax",Marina is a dancer. Some weaklings are not dancers. Either everything is a weakling or Georgia plays volleyball. So something plays volleyball.,Marina is a dancer. No weakling is a dancer. Everything is either a weakling or plays volleyball. So something plays volleyball.,Marina is a dancer. Some weaklings are not dancers. Everything is either a weakling or plays volleyball. So something plays volleyball.,Marina is a dancer. No weakling is a dancer. Either everything is a weakling or Georgia plays volleyball. So something plays volleyball.,D 7 | " Construct a complete truth table for the following pairs of propositions. Then, using the truth tables, determine whether the statements are logically equivalent or contradictory. If neither, determine whether they are consistent or inconsistent. Justify your answers. 8 | E ⊃ (F · E) and ~E · F",Logically equivalent,Contradictory,"Neither logically equivalent nor contradictory, but consistent",Inconsistent,C 9 | " Which of the given formulas of PL is the best symbolization of the following sentence? 10 | Turtles live long lives and are happy creatures, unless they are injured.",(L • H) ≡ I,(L • H) ∨ I,L • (H ∨ I),L • (H ⊃ R),B 11 | -------------------------------------------------------------------------------- /data/MMLU/data/dev/moral_disputes_dev.csv: -------------------------------------------------------------------------------- 1 | "According to Metz, what is wrong with consequentialist arguments against capital punishment based on African values?",It is unclear as of yet whether or not capital punishment deters harm to the community.,It is unclear as of yet whether or not capital punishment deters harm to any individuals.,Consequentialism is not supported by African values.,"Even though consequentialism is supported by African values, no consequentialist arguments framed in terms of African values have been offered.",A 2 | A fertilized ovum is also known as,a zygote.,an embryo.,a viability.,a blastocyst.,A 3 | "Which of the following is an intrinsic good, according to Singer?",being in the upper class,the ability to learn and solve problems,being more beautiful than most of the population,being shorter than average,B 4 | "Pence compares six different cases of reproduction, from natural twinning to SCNT. What conclusion does he draw from this comparison?",SCNT is not a different kind of reproduction because there are no morally relevant differences between it and other permissible means of reproduction.,"Because there is a low risk of harm for natural twinning, there will be a low risk of harm for SCNT.",both A and B,neither A nor B,A 5 | "Baron admits that the versions of the ticking bomb hypothetical she discusses are ""stunningly stupid,"" but she claims this is actually evidence of",the stupidity of most traditional philosophical examples.,a general lack of intelligence among people with advanced degrees.,the wrongness of torture.,the readiness on the part of many intelligent people to see torture as the best solution to deal with terrorism.,D -------------------------------------------------------------------------------- /data/MMLU/data/val/global_facts_val.csv: -------------------------------------------------------------------------------- 1 | "As of 2014, about what percentage of primary aged children in Sub-Saharan Africa were out-of-school?",10%,20%,35%,50%,B 2 | "There are two billion children in the world today (2020), aged 0 to 15 years old. How many children will there be in the year 2100 according to the United Nations?",4 billion,3 billion,2 billion,1 billion,C 3 | "Controlling for inflation and PPP-adjustment, about how much did GDP per capita increase from 1950 to 2016 in India?",by 4 fold,by 8 fold,by 16 fold,by 32 fold,A 4 | "As of 2020, about what percentage of the population in Sub-Saharan Africa, is living on less than $1.90?",10%,20%,40%,80%,C 5 | In which of the following country did the largest number of children die under the age of five years in 2017?,China,United States,Indonesia,Pakistan,D 6 | Which of the following is true about the global growth rate?,The global growth rate was four times as high 50 years ago as it is in 2020.,The global growth rate was two times as high 50 years ago as it is in 2020.,The global growth rate is two times as high as it is in 2020.,The global growth rate is four times as high today as it is in 2020.,B 7 | Most Latin American countries achieved independence,shortly after World War II,in the 1960s,around the time of World War I,in the early nineteenth century,D 8 | After the Atom bomb how many Americans support giving aid to Japan even at the cost of food shortages?,33%,50%,66%,85%,C 9 | In 1948 how many Americans believed the Kinsey Report on Sexuality in the Male to be a bad thing?,92%,64%,27%,11%,D 10 | Total global healthcare expenditure is about what percentage of global GDP as of 2017?,1%,2%,5%,10%,D -------------------------------------------------------------------------------- /data/MMLU/data/dev/high_school_government_and_politics_dev.csv: -------------------------------------------------------------------------------- 1 | Uncertainty over the limits to presidential power is caused primarily by the fact that,the constitutional definition of those powers is broad and unspecific,most people agree that the Constitution places too many limits on presidential power,the Supreme Court consistently refuses to rule on cases concerning presidential powers,constitutional amendments have greatly increased presidential powers,A 2 | "The term ""budget deficit"" refers to the",annual increase in federal spending on the military,amount of interest on the national debt,difference between the initial budget proposals made by the president and Congress,amount the government spends in excess of its revenues,D 3 | "Which of the following cases established the precedent that a defendant must be informed of the right to remain silent, the right to a lawyer, and protection from self-incrimination?",Weeks v. United States,Betts v. Brady,Mapp v. Ohio,Miranda v. Arizona,D 4 | Which of the following statements about cabinet departments is FALSE?,They are established by the legislative branch.,Their members often don't have much influence over presidential decisions.,They cannot all be run by leaders who belong to the same political party the president does.,Not every federal agency is a cabinet department.,C 5 | Which of the following best states an argument made by James Madison in The Federalist number 10?,Honest politicians can prevent factions from developing.,Factions are more likely to occur in large republics than in small ones.,The negative effects of factionalism can be reduced by a republican government.,Free elections are the people's best defense against factionalism.,C 6 | -------------------------------------------------------------------------------- /data/MMLU/data/dev/prehistory_dev.csv: -------------------------------------------------------------------------------- 1 | The great Mayan king Pacal built temples in the city of Palenque in order to:,satisfy the powerful Mayan astronomer priests.,"display his generosity to the common people, since they were allowed to live in the temples.","frighten away enemies, in particular the Spaniards.","legitimize his kingship, since his father was not royal.",D 2 | "According to Timothy Pauketat, the evidence for social stratification and political power at Cahokia suggests:",a center of Mississippian civilization with conditions similar to the rise of early states.,the limitations of authority in a Native American society of egalitarian foragers.,a simple chiefdom or perhaps a complex chiefdom had evolved by A.D. 1500.,a center of Mississippian civilization with conditions similar to societies on the Northwest Coast of North America.,A 3 | Researchers now believe that the decline of the Maya was caused chiefly by:,"a cataclysm of some kind, such as an earthquake, volcano, or tsunami.",ecological degradation resulting from slash-and-burn farming techniques.,endless wars between neighboring Mayan city-states.,practices of interbreeding that led to a steep rise in congenital disorders.,B 4 | Recent research on hominid species dating from the Middle Pliocene indicates there was (as of 2020):,"a great amount of species diversity, or a single species that exhibited a lot of diversity.",very little species diversity during this period and very few hominids.,decreased species diversity due to a prolonged ice age followed by a severe drought.,"decreased species diversity but increased numbers of hammerstones and flakes, indicating stone tool manufacture.",A 5 | What is the approximate mean cranial capacity of Homo erectus?,under 650 cc,about 800 cc,just under 1000 cc,1200 cc,C -------------------------------------------------------------------------------- /data/MMLU/data/dev/high_school_psychology_dev.csv: -------------------------------------------------------------------------------- 1 | Ani believes that her attitudes and behavior play a central role in what happens to her. Such a belief is likely to be associated with,a strong superego.,low self-esteem.,low self-efficacy.,an internal locus of control.,D 2 | "According to Caplan's model of consultee-centered case consultation, the consultant is primarily interested in",identifying the causes and solutions of the client's presenting problems,identifying and eliminating the causes of the consultee's difficulties in handling a problem,establishing a hierarchy of authority to enable effective decision making,"presenting a single, well-defined and unambiguous course of action for the consultant to overcome skills deficits",B 3 | "While swimming in the ocean, Ivan is frightened by a dark shadow in the water even before he has the chance to identify what the shadow is. The synaptic connections taking place during this incident of fright are best described by which of the following?",Messages are sent from the thalamus directly to the amygdala.,"Messages are sent from the thalamus to the ""what"" and ""where"" pathways.",Messages are sent from the parasympathetic nervous system to the cerebral cortex.,Messages are sent from the frontal lobes to the pituitary gland.,A 4 | "According to the Individuals with Disabilities Education Improvement Act, which of the following must an educational agency do before it changes the educational placement of a student with a disability?",Give the child a trial period in the new environment,Notify the parents in writing,Obtain school board approval,Obtain parental consent,B 5 | Pascale is interested in the processing strategies children use to learn new information. Pascale would best be classified as what type of psychologist?,sociocultural,clinical,cognitive,behaviorist,C -------------------------------------------------------------------------------- /data/MMLU/data/val/abstract_algebra_val.csv: -------------------------------------------------------------------------------- 1 | The cyclic subgroup of Z_24 generated by 18 has order,4,8,12,6,A 2 | Find the order of the factor group Z_6/<3>.,2,3,6,12,B 3 | Statement 1 | A permutation that is a product of m even permutations and n odd permutations is an even permutation if and only if n is even. Statement 2 | Every group is isomorphic to a group of permutations.,"True, True","False, False","True, False","False, True",A 4 | Find the order of the factor group (Z_4 x Z_12)/(<2> x <2>),2,3,4,12,C 5 | Find the maximum possible order for some element of Z_4 x Z_6.,4,6,12,24,C 6 | Statement 1 | The symmetric group S_3 is cyclic. Statement 2 | Every group is isomorphic to some group of permutations.,"True, True","False, False","True, False","False, True",D 7 | "Statement 1 | If a and b are elements of finite order in an Abelian group, then |ab| is the lcm (|a|,|b|). Statement 2 | If g is a group element and g^n = e, then |g| = n.","True, True","False, False","True, False","False, True",B 8 | "Statement 1 | If f is a homomorphism from G to K and H is normal in G then f(H) is normal in K. Statement 2 | If f is a homomorphism from G to a group and H is finite subgroup of G, then |f(H)| divides |H|.","True, True","False, False","True, False","False, True",D 9 | Find the maximum possible order for an element of S_n for n = 7.,6,12,30,105,B 10 | Statement 1 | Every integral domain has a field of quotients. Statement 2 | A polynomial of degree n over a ring can have at most n zeros counting multiplicity.,"True, True","False, False","True, False","False, True",C 11 | "Statement 1 | If a group has an element of order 10, then the number of elements of order 10 is divisible by 4. Statement 2 | If m and n are positive integers and phi is the Euler phi function, then phi(mn) = phi(m)phi(n).","True, True","False, False","True, False","False, True",B 12 | -------------------------------------------------------------------------------- /data/MMLU/data/dev/astronomy_dev.csv: -------------------------------------------------------------------------------- 1 | You are pushing a truck along a road. Would it be easier to accelerate this truck on Mars? Why? (Assume there is no friction),It would be harder since the truck is heavier on Mars.,It would be easier since the truck is lighter on Mars.,It would be harder since the truck is lighter on Mars.,It would be the same no matter where you are.,D 2 | Where do most short-period comets come from and how do we know?,The Kuiper belt; short period comets tend to be in the plane of the solar system just like the Kuiper belt.,The Kuiper belt; short period comets tend to come from random directions indicating a spherical distribution of comets called the Kuiper belt.,The asteroid belt; short period comets have orbital periods similar to asteroids like Vesta and are found in the plane of the solar system just like the asteroid belt.,The Oort cloud; short period comets tend to be in the plane of the solar system just like the Oort cloud.,A 3 | Say the pupil of your eye has a diameter of 5 mm and you have a telescope with an aperture of 50 cm. How much more light can the telescope gather than your eye?,10000 times more,100 times more,1000 times more,10 times more,A 4 | Why isn't there a planet where the asteroid belt is located?,A planet once formed here but it was broken apart by a catastrophic collision.,There was not enough material in this part of the solar nebula to form a planet.,There was too much rocky material to form a terrestrial planet but not enough gaseous material to form a jovian planet.,Resonance with Jupiter prevented material from collecting together to form a planet.,D 5 | Why is Mars red?,"Because the surface is covered with heavily oxidized (""rusted"") minerals.",Because the atmosphere scatters more light at bluer wavelengths transmitting mostly red light.,Because Mars is covered with ancient lava flows which are red in color.,Because flowing water on Mars's surface altered the surface minerals several billion years ago.,A 6 | -------------------------------------------------------------------------------- /data/MMLU/data/dev/moral_scenarios_dev.csv: -------------------------------------------------------------------------------- 1 | "For which of these two scenarios does the main character (who uses I/me/my) do something clearly morally wrong, according to ordinary moral standards in the US as of 2020? Scenario 1 | I really loved my newborn baby boy. Scenario 2 | The new girl is embarrassed to live in a trailer park, so I invite the class to her house.","Wrong, Wrong","Wrong, Not wrong","Not wrong, Wrong","Not wrong, Not wrong",C 2 | "For which of these two scenarios does the main character (who uses I/me/my) do something clearly morally wrong, according to ordinary moral standards in the US as of 2020? Scenario 1 | I asked for a cigarette from the guy outside the store. Scenario 2 | When the security guard left his post, I pocketed a rare coin.","Wrong, Wrong","Wrong, Not wrong","Not wrong, Wrong","Not wrong, Not wrong",C 3 | "For which of these two scenarios does the main character (who uses I/me/my) do something clearly morally wrong, according to ordinary moral standards in the US as of 2020? Scenario 1 | I bought an old pick-up truck for work. Scenario 2 | I looked up at the gorgeous waterfall and threw a rock at it.","Wrong, Wrong","Wrong, Not wrong","Not wrong, Wrong","Not wrong, Not wrong",D 4 | "For which of these two scenarios does the main character (who uses I/me/my) do something clearly morally wrong, according to ordinary moral standards in the US as of 2020? Scenario 1 | During the house's demolition, I pushed him into a wall. Scenario 2 | When I choose a babysitter for my twin nieces, I don't care about her racial background.","Wrong, Wrong","Wrong, Not wrong","Not wrong, Wrong","Not wrong, Not wrong",B 5 | "For which of these two scenarios does the main character (who uses I/me/my) do something clearly morally wrong, according to ordinary moral standards in the US as of 2020? Scenario 1 | I took his wallet after he harassed me at the bar. Scenario 2 | I made love to my newborn baby boy.","Wrong, Wrong","Wrong, Not wrong","Not wrong, Wrong","Not wrong, Not wrong",A -------------------------------------------------------------------------------- /data/MMLU/data/dev/nutrition_dev.csv: -------------------------------------------------------------------------------- 1 | "In a cohort study, the risk ratio of developing diabetes was 0.86 when comparing consumers of tea (the exposed) to those who did not drink tea (the unexposed). Which one statement is correct (according to knowledge in 2020)? 2 | ",The tea drinkers have lower risk of developing diabetes.,The tea drinkers have higher risk of developing diabetes.,Based on the information given we cannot tell if the observed difference in disease risk is the result of chance.,"The risk ratio is close to the value one, so there is no difference in disease risk between the two groups.",C 3 | "Which of the following statements is correct (according to knowledge in 2020)? 4 | ",Consumers with phenylketonuria must avoid the consumption of the sweetener aspartame,Consumers with phenylketonuria must avoid the consumption of the sweetener saccharin,Consumers with phenylketonuria must avoid the consumption of the sweetener sucralose,Consumers with phenylketonuria must avoid the consumption of the sweetener acesulfame K,A 5 | "Which of the following is the most plausible explanation for the protective effect of dietary fibre against cancer of the colon, as of 2020? 6 | ","Propionic acid, formed during colonic fibre fermentation inhibits liver fatty acid synthesis","Butyric acid, formed during colonic fibre fermentation stimulates ""silencing"" of the SLC5A8 tumour suppressor gene",None of these options are correct,"Butyric acid, formed during colonic fibre fermentation stimulates anti-oxidant defences in the colon",D 7 | "Which of the following statements about iodine is correct, as of 2020? 8 | ",50% of adults consume iodine at levels below the RNI,Dairy products are a poor source of iodine,The iodine content of organic milk is generally lower that the level in non-organic milk,UK dietary reference values recommend an increase in iodine intake in pregnancy,C 9 | "What is the first-line drug for patients with type 2 diabetes and obesity, as of 2020? 10 | ",Acarbose,Metformin,Sulphonylureas,Insulin,B -------------------------------------------------------------------------------- /data/MMLU/data/dev/professional_accounting_dev.csv: -------------------------------------------------------------------------------- 1 | Box a nongovernmental not-for-profit organization had the following transactions during the year: Proceeds from sale of investments $80000 Purchase of property plant and equipment $10000 Proceeds from long-term debt $100000 Loss on sale of investment $5000 What amount should be reported as net cash provided by financing activities in Box's statement of cash flows?,"$70,000","$75,000","$80,000",100000,D 2 | "One hundred years ago, your great-great-grandmother invested $100 at 5% yearly interest. What is the investment worth today?","$13,000",$600,"$15,000","$28,000",A 3 | "Krete is an unmarried taxpayer with income exclusively from wages. By December 31, year 1, Krete's employer has withheld $16,000 in federal income taxes and Krete has made no estimated tax payments. On April 15, year 2, Krete timely filed for an extension request to file her individual tax return, and paid $300 of additional taxes. Krete's year 1 tax liability was $16,500 when she timely filed her return on April 30, year 2, and paid the remaining tax liability balance. What amount would be subject to the penalty for underpayment of estimated taxes?",$0,$500,"$1,650","$16,500",A 4 | "On January 1, year 1, Alpha Co. signed an annual maintenance agreement with a software provider for $15,000 and the maintenance period begins on March 1, year 2. Alpha also incurred $5,000 of costs on January 1, year 1, related to software modification requests that will increase the functionality of the software. Alpha depreciates and amortizes its computer and software assets over five years using the straight-line method. What amount is the total expense that Alpha should recognize related to the maintenance agreement and the software modifications for the year ended December 31, year 1?","$5,000","$13,500","$16,000","$20,000",B 5 | An auditor traces the serial numbers on equipment to a nonissuer’s subledger. Which of the following management assertions is supported by this test?,Valuation and allocation,Completeness,Rights and obligations,Presentation and disclosure,B 6 | -------------------------------------------------------------------------------- /data/MMLU/data/dev/business_ethics_dev.csv: -------------------------------------------------------------------------------- 1 | "Beyond the business case for engaging in CSR there are a number of moral arguments relating to: negative _______, the _______that corporations possess and the ________ of business and society.","Externalities, Power, Independence","Publicity, Insubstantial resources, Mutual dependence","Publicity, Power, Independence","Externalities, Power, Mutual dependence",D 2 | "_______ is the direct attempt to formally or informally manage ethical issues or problems, through specific policies, practices and programmes.",Corporate social responsibility,Business ethics management,Sustainability,Environmental management,B 3 | "To ensure the independence of the non-executive board members, they are a number of steps which can be taken, which include non-executives being drawn from _______ the company, being appointed for a _________ time period as well as being appointed _________.","Outside, Limited, Independently","Inside, Limited, Intermittently","Outside, Unlimited, Intermittently","Inside, Unlimited, Independently",A 4 | "Three contrasting tactics that CSO's can engage in to meet their aims are ________ which typically involves research and communication, ________, which may involve physically attacking a company's operations or ________, often involving some form of _______.","Non-violent direct action, Violent direct action, Indirect action, Boycott","Indirect action, Instrumental action, Non-violent direct action, Information campaign","Indirect action, Violent direct action, Non-violent direct-action Boycott","Non-violent direct action, Instrumental action, Indirect action, Information campaign",C 5 | "In contrast to _______, _______ aim to reward favourable behaviour by companies. The success of such campaigns have been heightened through the use of ___________, which allow campaigns to facilitate the company in achieving _________ .","Buycotts, Boycotts, Blockchain technology, Charitable donations","Buycotts, Boycotts, Digital technology, Increased Sales","Boycotts, Buyalls, Blockchain technology, Charitable donations","Boycotts, Buycotts, Digital technology, Increased Sales",D 6 | -------------------------------------------------------------------------------- /data/MMLU/data/dev/professional_psychology_dev.csv: -------------------------------------------------------------------------------- 1 | "One of your therapy clients asks your advice about a good weight- reduction program. You have investigated the programs in the community and are enrolled in the one you consider the best. This program offers a $50 bonus to its patrons for each new person they bring into the program. Under these circumstances, your most appropriate response would be to",tell your client the pros and cons of each program you know about except for the one in which you are enrolled,recommend to your client the program in which you are enrolled and explain the $50 bonus you will receive,recommend to your client the program in which you are enrolled and offer to have the $50 bonus credited to your client's account in the program,"tell your client the pros and cons of each program you know about, but do not claim the $50 bonus if your client enrolls in your program",D 2 | "There are three ways to measure the Central Tendency: the Mean, the Median and the Mode. From your knowledge about them, what is the mode?",less sensitive to extreme scores than the mean,more useful for skewed distributions,sensitive to extreme values and highly skewed distributions,the most frequently occurring number,D 3 | "In terms of Hofstede’s (1980) five cultural dimensions, the United States scores at the top of the scale on:",individualism.,individualism and power distance.,power distance and masculinity.,uncertainty avoidance.,A 4 | Carl Jung believed that a client's transference:,is a fantasy that distracts the client from reality.,represents “mixed feelings” toward the therapist. ,"""is a form of """"acting out.""""""",reflects the client’s personal and collective unconscious.,D 5 | "In the construction of a multiple regression equation for purposes of prediction, the optimal combination of measures is one in which the predictors",are uncorrelated with each other but are moderately correlated with the criterion,have low correlations with each other and low correlations with the criterion,are highly intercorrelated with each other and moderately correlated with the criterion,have low correlations with the criterion bur are moderately correlated with each other,A 6 | -------------------------------------------------------------------------------- /data/MMLU/data/val/college_chemistry_val.csv: -------------------------------------------------------------------------------- 1 | Which of the following is true for Br2 at standard temperature and pressure?,It is a colorless gas.,It is a red-brown volatile liquid.,It is a colorless volatile liquid.,It is a yellow metallic solid.,B 2 | The 13C spectrum of which isomer of C6H14 has lines with three distinct chemical shifts?,hexane,2-methylpentane,3-methylpentane,"2,3-dimethylbutane",A 3 | The Rydberg equation v = R_H(1/n_1^2 - 1/n_2^2) accurately predicts the UV-visible emission spectrum of the hydrogen atom. A form of the Rydberg equation may also be used to predict the UV-visible emission for all of the following EXCEPT,"hydride ion, H−","deuterium atom, D","tritium atom, T","helium cation, He+",A 4 | Reduction of D-xylose with NaBH4 yields a product that is a,racemic mixture,single pure enantiomer,mixture of two diastereomers in equal amounts,meso compound,D 5 | Calculate the polarization of a proton in a magnetic field of 335 mT and 10.5 T at 298 K.,6.345 x 10^-4 at 0.335 T; 9.871 x 10^-5 at 10.5 T,0.793 x 10^-4 at 0.335 T; 6.931 x 10^-7 at 10.5 T,1.148 x 10^-6 at 0.335 T; 3.598 x 10^-5 at 10.5 T,4.126 x 10^-3 at 0.335 T; 2.142 x 10^-6 at 10.5 T,D 6 | The anhydride of Ba(OH)2 is,BaH2,BaOH,Ba,BaO,D 7 | A 1 M solution of TEMPO in chloroform at room temperature shows broader EPR spectrum than that of a 1 mM solution. Which sentence describes this phenomenon most accurately?,"in the more concentrated solution, the lines are broadened due to the dipole-dipole interaction between the TEMPO molecules","in the more concentrated solution, the lines are broadened by the slow tumbling of TEMPO molecules","in the more concentrated solution, the lines are broadened due to the exchange interaction between the TEMPO molecules","in the more concentrated solution, the lines are broadened by the hyperfine interaction between the TEMPO molecules",C 8 | A set of hybrid sp^3 orbitals for a carbon atom is given above. Which of the following is NOT true about the orbitals?,The orbitals are degenerate.,The set of orbitals has a tetrahedral geometry.,These orbitals are constructed from a linear combination of atomic orbitals.,Each hybrid orbital may hold four electrons.,D -------------------------------------------------------------------------------- /data/MMLU/data/val/human_sexuality_val.csv: -------------------------------------------------------------------------------- 1 | Axl Tulip has a thin clear discharge coming from his penis and mild discomfort when he urinates. He is most likely suffering from _______ and should be treated with ________.,gonorrhea penicillin,gonorrhea tetracycline,chlamydia penicillin,chlamydia tetracycline,D 2 | "In adulthood, exhibitionists",are typically psychiatrically disturbed,are unassertive,have excellent social skills,have fulfilling sex lives,B 3 | "Before contact with Europeans, many First Nations populations viewed gay men, lesbians, and people who assumed cross-gender roles with",disgust and revulsion.,pity and indulgence.,fear and awe.,respect and admiration.,D 4 | Vaginal lubrication begins __________ after the onset of arousing stimuli,10 to 30 seconds,30 to 60 seconds,1 to 2 minutes,2 to 4 minutes,A 5 | "__________, an odorous steroid that is well documented as a pheremone in pigs, has been isolated in the underarm sweat of humans.",Ephderine,Human Chorionic Gonadotropin (hCG),Hyalunronidase,Androstenol,D 6 | Which of the following drugs inhibits sexual performance by activating the sympathetic system?,alcohol,cocaine,marijuana,opium,B 7 | Which of the following changes is NOT associated with menopause?,the walls of the vagina become thick,the vagina shrinks in length,the labia minora shrink,production of estrogen declines,A 8 | "What is the last stage in the ""coming out"" process?",Integration,Relationship Development,Segregation,Exploration,A 9 | The people of Inis Beag believe that:,menopause can produce insanity,masturbation produces facial deformities,incest is acceptable with daughters but not with sons,masturbation produces genital deformities,A 10 | The bulk of the research evidence suggests that:,all women have a G-spot,all women with a G-spot ejaculate,all women with a G-spot also have a prostate gland,none of the above,D 11 | The feedback loop in women which regulates the production of sex hormones includes all but which of the following?,introitus,progesterone,follicle-stimulating hormone,pituitary gland,A 12 | IUD use has been linked with which of the following,pelvic inflammatory disease,tubal infections,uterine infections,all of the above,D -------------------------------------------------------------------------------- /data/MMLU/data/dev/machine_learning_dev.csv: -------------------------------------------------------------------------------- 1 | "A 6-sided die is rolled 15 times and the results are: side 1 comes up 0 times; side 2: 1 time; side 3: 2 times; side 4: 3 times; side 5: 4 times; side 6: 5 times. Based on these results, what is the probability of side 3 coming up when using Add-1 Smoothing?",2.0/15,1.0/7,3.0/16,1.0/5,B 2 | Which image data augmentation is most common for natural images?,random crop and horizontal flip,random crop and vertical flip,posterization,dithering,A 3 | "You are reviewing papers for the World’s Fanciest Machine Learning Conference, and you see submissions with the following claims. Which ones would you consider accepting? ",My method achieves a training error lower than all previous methods!,My method achieves a test error lower than all previous methods! (Footnote: When regularisation parameter λ is chosen so as to minimise test error.),My method achieves a test error lower than all previous methods! (Footnote: When regularisation parameter λ is chosen so as to minimise cross-validaton error.),My method achieves a cross-validation error lower than all previous methods! (Footnote: When regularisation parameter λ is chosen so as to minimise cross-validaton error.),C 4 | "To achieve an 0/1 loss estimate that is less than 1 percent of the true 0/1 loss (with probability 95%), according to Hoeffding's inequality the IID test set must have how many examples?",around 10 examples,around 100 examples,between 100 and 500 examples,more than 1000 examples,D 5 | "Traditionally, when we have a real-valued input attribute during decision-tree learning we consider a binary split according to whether the attribute is above or below some threshold. Pat suggests that instead we should just have a multiway split with one branch for each of the distinct values of the attribute. From the list below choose the single biggest problem with Pat’s suggestion:",It is too computationally expensive.,It would probably result in a decision tree that scores badly on the training set and a testset.,It would probably result in a decision tree that scores well on the training set but badly on a testset.,It would probably result in a decision tree that scores well on a testset but badly on a training set.,C -------------------------------------------------------------------------------- /data/MMLU/data/dev/international_law_dev.csv: -------------------------------------------------------------------------------- 1 | What types of force does Article 2(4) of the UN Charter prohibit?,Article 2(4) encompasses only armed force,"Article 2(4) encompasses all types of force, including sanctions",Article 2(4) encompasses all interference in the domestic affairs of States,Article 2(4) encompasses force directed only against a State's territorial integrity,A 2 | What is the judge ad hoc?,"If a party to a contentious case before the ICJ does not have a national sitting as judge, it is entitled to nominate someone as a judge solely for that case, with the title of judge ad hoc",Judge ad hoc is the member of the bench of the ICJ with a casting vote,"Judge ad hoc is a surrogate judge, in case a judge is disqualified or passes away",Judge ad hoc is the judge that each party will always nominate in every contentious case,A 3 | Would a reservation to the definition of torture in the ICCPR be acceptable in contemporary practice?,This is an acceptable reservation if the reserving country's legislation employs a different definition,This is an unacceptable reservation because it contravenes the object and purpose of the ICCPR,This is an unacceptable reservation because the definition of torture in the ICCPR is consistent with customary international law,This is an acceptable reservation because under general international law States have the right to enter reservations to treaties,B 4 | When 'consent' can serve as a circumstance precluding the wrongfulness of a State conduct?,Consent can serve as a circumstance precluding the wrongfulness whenever it is given,Consent can never serve as a circumstance precluding wrongfulness,"Consent can serve as a circumstance precluding wrongfulness, provided the consent is valid and to the extent that the conduct remains within the limits of the consent given","Consent can always serve as a circumstance precluding wrongfulness, no matter which organ of the State gives it",C 5 | How the consent to be bound of a State may be expressed?,The consent of a State to be bound is expressed only by ratification,"The consent of a state to be bound by a treaty may be expressed by signature, ratification, acceptance, approval or accession",The consent of a State to be bound is expressed by signature,The consent of a State to be bound is expressed by whatever means they choose,B 6 | -------------------------------------------------------------------------------- /data/MMLU/data/val/world_religions_val.csv: -------------------------------------------------------------------------------- 1 | Which of the following is NOT a category of Shari'ah law?,Criminal,Neutral,Unlawful,Commendable,A 2 | " Which Guru ordered the building of the Darbar Sahib, also known as the Divine Court?",Guru Nanak,Guru Ram Das,Guru Arjan,Guru Amar Das,C 3 | " Yamago Soko created the ""way of the warrior,"" known as which of the following?",Bushido,Samurai,Nembutsu,Gomadaki,A 4 | Which of the following was part of the role of a deaconess?,Ministering to the sick,Preparing women for baptism,Praying for the suffering,All of the above,D 5 | Which term refers to the five main areas into which early Christianity was divided?,Presbyteries,Sees,Churches,Synods,B 6 | What is the name of the theologian who argued against Pelagius?,John,Anselm,Arius,Augustine,D 7 | Jean Calvin is known for emphasizing which of the following doctrines?,Transubstantiation,Miaphystism,Glossolalia,Predestination,D 8 | Which form of Buddhism was the first to make significant inroads in North America?,Zen,Pure Land,Theravada,Tibetan,A 9 | The Great Cloud Sutra prophesied the imminent arrival of which person?,Maitreya (Milo),The Buddha,Zhou Dunyi,Wang Yangming,A 10 | What is the Hebrew word is used to refer to the Nazi's catastrophic program of genocide?,Shoah,Seder,Passover,Diaspora,A 11 | Which modern cult believed that Japanese society needed to be cleansed through an apocalypse?,Shinnyo-en,Aum Shinrikyo,Rissho Koseikai,Honji Suijaku,B 12 | For how many minutes per day do Jaina devotees practise meditation or reflection?,48,60,12,36,A 13 | What is the English name for Hanukkah?,The Festival of Booths,The Festival of the Giving of Torah,The Festival of Lights,The Festival of Weeks,C 14 | "Which movement rejected Confucianism and Buddhism in favor of ""true"" Japanese spiritual traditions?",Kami,Kokugaku,Samurai,Gomadaki,B 15 | The Daoist concept of wuwei means which of the following?,Non-action,Contemplation,Meditation,Intentionality,A 16 | Who was the immediate successor to Guru Nanak?,Guru Angad,His son,Guru Gobind Singh,Guru Hargobind,A 17 | " What does Ghandi's title ""Mahatma"" mean?","""Enlightened leader""","""Great soul""","""Enlightened soul""","""Great leader""",B 18 | What is the Decalogue?,The Exodus,The Covenant,The Ten Commandments,The creation story,C 19 | Which Jewish group demanded a narrow and literal interpretation of the Torah?,Sadducees,Pharisees,Essenes,Zealots,A 20 | -------------------------------------------------------------------------------- /data/MMLU/data/dev/high_school_statistics_dev.csv: -------------------------------------------------------------------------------- 1 | Which of the following is a correct statement about correlation?,"If the slope of the regression line is exactly 1, then the correlation is exactly 1.","If the correlation is 0, then the slope of the regression line is undefined.",Switching which variable is called x and which is called y changes the sign of the correlation.,The correlation r is equal to the slope of the regression line when z-scores for the y-variable are plotted against z-scores for the x-variable.,D 2 | "Suppose X and Y are random variables with E(X) = 37, var(X) = 5, E(Y) = 62, and var(Y) = 12. What are the expected value and variance of the random variable X + Y?","E(X + Y) = 99, var(X + Y) = 8.5","E(X + Y) = 99, var(X + Y) = 13","E(X + Y) = 99, var(X + Y) = 17",There is insufficient information to answer this question.,D 3 | "After a frost warning was issued, the owner of a large orange grove asked his workers to spray all his trees with water. The water was supposed to freeze and form a protective covering of ice around the orange blossom. Nevertheless, the owner suspected that some trees suffered considerable damage due to the frost. To estimate the proportion of trees that suffered more than 50 percent damage due to the frost, he took a random sample of 100 trees from his grove. What is the response variable in this experiment?",The proportion of trees that suffered more than 50 percent damage due to frost.,The number of trees affected by the frost.,The number of trees sampled from the grove.,"For each sampled tree, whether it suffered more than 50 percent damage or at most 50 percent damage.",D 4 | "A new smartwatch is manufactured in one part of a factory, then secured for shipping in another, independent part of the factory. The weight of the smartwatch has a mean of 62 grams and a standard deviation of 1.0 grams. The weight of the packaging (box, user's guide, bubble wrap, etc.) has a mean of 456 grams and a standard deviation of 6 grams. Together, the distribution of the weight of the smartwatch and its packaging would have the following mean and standard deviation:",Mean 518 grams; standard deviation 7.0 grams,Mean 518 grams; standard deviation 3.5 grams,Mean 518 grams; standard deviation 6.1 grams,Mean 394 grams; standard deviation 6.1 grams,C 5 | "Which of the following sets has the smallest standard deviation? Which has the largest? 6 | I: {1,2,3} 7 | II: {-10,10} 8 | III: {100} 9 | ","I, II","II, III","III, I","III, II",D -------------------------------------------------------------------------------- /data/MMLU/data/val/college_mathematics_val.csv: -------------------------------------------------------------------------------- 1 | "If a polynomial f(x) over the real numbers has the complex numbers 2 + i and 1 - i as roots, then f(x) could be",x^3 + 5x^2 + 4x + 1,x^4 - 6x^3 + 15x^2 - 18x + 10,x^3 - x^2 + 4x + 1,x^4 + 7x^2 + 10,B 2 | "What is the volume of the solid in xyz-space bounded by the surfaces y = x^2, y = 2 - x^2, z = 0, and z = y + 3?",8/3,16/3,32/3,104/105,C 3 | "Suppose A, B, and C are statements such that C is true if exactly one of A and B is true. If C is false, which of the following statements must be true?","If A is true, then B is false.","If A is false, then B is false.","If A is false, then B is true.",Both A and B are true.,B 4 | "In a survey of 100 undergraduate math majors at a certain college, the following information is 5 | obtained about the courses they are taking during the Spring semester: 6 | 41 are enrolled in real analysis, 7 | 44 are enrolled in differential equations, 48 are enrolled in linear algebra, 8 | 11 are enrolled in both real analysis and linear algebra, 9 | 14 are enrolled in both real analysis and differential equations, 10 | 19 are enrolled in both differential equations and linear algebra, and 10 are not enrolled in any of these three courses. 11 | How many of the students surveyed are enrolled in all three of these courses?",1,2,3,4,A 12 | "k digits are to be chosen at random (with repetitions allowed) from {0, 1, 2, 3, 4, 5, 6, 7, 8, 9}. What is the probability that 0 will not be chosen?",(9/10)^k,(1/10)^k,(k-1)/k,1/10,A 13 | "In the complex z-plane, the set of points satisfying the equation z^2 = |z|^2 is a",pair of points,circle,half-line,line,D 14 | "Let S, T, and U be nonempty sets, and let f: S -> T and g: T -> U be functions such that the function g ∘ f : S -> U is one-to-one (injective). Which of the following must be true?",f is one-to-one.,f is onto.,g is one-to-one.,g is onto.,A 15 | A fair die is tossed 360 times. The probability that a six comes up on 70 or more of the tosses is,greater than 0.50,between 0.16 and 0.50,between 0.02 and 0.16,between 0.01 and 0.02,C 16 | For what value of b is the line y = 10x tangent to the curve y = e^(bx) at some point in the xy-plane?,10/e,10,10e,e^10,A 17 | "If the finite group G contains a subgroup of order five but no element of G other than the identity is its own inverse, then the order of G could be",8,20,30,35,D 18 | "If one arch of the curve y = sin x is revolved around the x-axis, what's the volume of the generated solid?",π^2 / 4,π^2 / 2,2π,π^2,B 19 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ![](figs/logo.webp) 2 | 3 |

4 | 🚀 EMNLP 2024 Findings 📃 5 | Paper 6 |

7 | 8 | # 📖 LongGenBench: Long-context Generation Benchmark 9 | 10 | 11 | ## Overview 12 | 13 | **LongGenBench** is a newly introduced benchmark specifically designed to evaluate the long-context generation capabilities of large language models (LLMs). Unlike traditional retrieval-based benchmarks, LongGenBench focuses on the ability of models to generate coherent and contextually accurate text over extended passages. It allows for customizable generation context lengths and requires LLMs to respond with a single, cohesive long-context answer. Key findings from LongGenBench evaluations include: 14 | 15 | 1. Both API-accessed and open-source models experience performance degradation in long-context generation, ranging from 1.2% to 47.1%. 16 | 2. Different LLM series show varying trends of performance degradation, with the Gemini-1.5-Flash model showing the least degradation among API models, and the Qwen2 series performing best among open-source models. 17 | 18 | ## Method 19 | ![Main](figs/main-fig.png) 20 | 21 | ## ⚙️ How to evaluate on LongGenBench 22 | ``` 23 | conda create -yn LongGenBench python=3.9 24 | conda activate LongGenBench 25 | pip install -r requirements.txt 26 | ``` 27 | 28 | Replace the API key with your own API key in the bash file. 29 | 30 | ### 📚 GSM8K 31 | 32 | ``` 33 | bash run_longgenbench_GSM8K.sh 34 | ``` 35 | 36 | 37 | ### 📚 MMLU 38 | 39 | ``` 40 | bash run_longgenbench_MMLU.sh 41 | ``` 42 | The each subtask of MMLU will run fist K questions and then output the result to the `./outputs/LongGenBench_MMLU/LongGenBench_MMLU_{subtask_name}.txt` 43 | 44 | 45 | ## 📚 Bibtex 46 | If you use or extend our work, please cite the following [paper](https://arxiv.org/abs/2410.04199): 47 | ``` 48 | @misc{liu2024longgenbench, 49 | title={LongGenBench: Long-context Generation Benchmark}, 50 | author={Xiang Liu and Peijie Dong and Xuming Hu and Xiaowen Chu}, 51 | year={2024}, 52 | eprint={2410.04199}, 53 | archivePrefix={arXiv}, 54 | primaryClass={cs.CL}, 55 | url={https://arxiv.org/abs/2410.04199}, 56 | } 57 | 58 | 59 | ``` 60 | 61 | ## 📚 Acknowledgement 62 | 63 | We would like to thank the authors of [Active-Prompt](https://github.com/shizhediao/active-prompt) and [Chain-of-Thought-Hub](https://github.com/FranxYao/chain-of-thought-hub/) for providing the codebase. 64 | 65 | -------------------------------------------------------------------------------- /data/MMLU/data/val/electrical_engineering_val.csv: -------------------------------------------------------------------------------- 1 | "A 4 pole lap wound dc generator has 4 brushes, if one of the brush is damaged, what will be the change in V, I and P ratings","V, I and P","V/2, I/2 and P/4","V/2, I and P/2","V, I/2 and P/2",D 2 | No load saturation characteristics are plotted between,no load voltage and field current,no load voltage and armature current,short circuit current and field current,short circuit current and armature current,A 3 | When checked with an ohm meter an open resistor reads,high but within tolerance.,low but not zero.,zero.,infinite.,D 4 | Which one of the following is not a vectored interrupt?,TRAP.,RST 7.5.,RST 6.5.,INTR.,D 5 | "In AC circuits, the connection of measuring instruments cause loading effect errors which may effect",only the magnitude of the quantity being measured.,only phase of the quantity being measured.,both of above.,"magnitude, phase and waveform of the quantity being measured.",D 6 | "If a circuit consists of two mass elements, two spring elements and four friction elements. Find the order of transfer function?",2,4,6,12,B 7 | A capacitor carries a charge of 0.1 C at 5 V. Its capacitance is,0.02 F,0.5 F,0.05 F,0.2 F,A 8 | Inside a hollow conducting sphere,electric field is zero.,electric field is a non zero constant.,electric field changes with magnitude of the charge given to the conductor.,electric field changes with distance from the center of the sphere.,A 9 | What will be the total flux emitted by a source of 60 candle power?,754.2 lumens.,0.001326 lumens.,60 lumens.,None of these.,A 10 | "For audio frequency applications, the popular oscillator used is",Wien bridge oscillator,Hartley oscillator,Crystal oscillator,Phase shift oscillator,A 11 | Which number system has a base of 16,Decimal,Octal,Hexadecimal,None,C 12 | "A motor has a thermal heating time constant of 50 mm. When the motor runs continuous of full scale, its final temperature rise is 80° C, what would be the temperature rise after 1 hour, if the motor runs continuously on full load?",55.9° C.,58° C.,56° C.,60° C.,A 13 | Which of the following is/are the advantages of a closed loop control system?,The closed loop systems are accurate.,The closed loop systems are less affected by noise.,both A and B.,nether A nor B.,C 14 | A digital circuit that can store on bit is a,XOR gate,flip-flop,gate,registor,B 15 | A combinational PLD with a programmable AND array and a fixed OR array is called a,PLD,PROM,PAL,PLA,C 16 | For dielectric heating the range of frequency normally employed is,10 KHz to 100 KHz.,100 KHz to 10 MHz.,1 MHz to 10 MHz.,10 MHz to 40 MHz.,D 17 | -------------------------------------------------------------------------------- /data/LongGenBench_GSM8K_prompt/LongGenBench_prompt.txt: -------------------------------------------------------------------------------- 1 | Question_1: 2 | There are 15 trees in the grove. Grove workers will plant trees in the grove today. After they are done, there will be 21 trees. How many trees did the grove workers plant today? 3 | 4 | Question_2: 5 | If there are 3 cars in the parking lot and 2 more cars arrive, how many cars are in the parking lot? 6 | 7 | Question_3: 8 | Leah had 32 chocolates and her sister had 42. If they ate 35, how many pieces do they have left in total? 9 | 10 | Question_4: 11 | Jason had 20 lollipops. He gave Denny some lollipops. Now Jason has 12 lollipops. How many lollipops did Jason give to Denny? 12 | 13 | Question_5: 14 | Shawn has five toys. For Christmas, he got two toys each from his mom and dad. How many toys does he have now? 15 | 16 | Question_6: 17 | There were nine computers in the server room. Five more computers were installed each day, from monday to thursday. How many computers are now in the server room? 18 | 19 | Question_7: 20 | Michael had 58 golf balls. On tuesday, he lost 23 golf balls. On wednesday, he lost 2 more. How many golf balls did he have at the end of wednesday? 21 | 22 | Question_8: 23 | Olivia has $23. She bought five bagels for $3 each. How much money does she have left? 24 | 25 | Answer_1: 26 | There are 15 trees originally. 27 | Then there were 21 trees after some more were planted. 28 | So there must have been 21 - 15 = 6. 29 | The answer is 6. 30 | 31 | Answer_2: 32 | There are originally 3 cars. 33 | 2 more cars arrive. 34 | 3 + 2 = 5. 35 | The answer is 5. 36 | 37 | Answer_3: 38 | Originally, Leah had 32 chocolates. 39 | Her sister had 42. 40 | So in total they had 32 + 42 = 74. 41 | After eating 35, they had 74 - 35 = 39. 42 | The answer is 39. 43 | 44 | Answer_4: 45 | Jason started with 20 lollipops. 46 | Then he had 12 after giving some to Denny. 47 | So he gave Denny 20 - 12 = 8. 48 | The answer is 8. 49 | 50 | Answer_5: 51 | Shawn started with 5 toys. 52 | If he got 2 toys each from his mom and dad, then that is 4 more toys. 53 | 5 + 4 = 9. 54 | The answer is 9. 55 | 56 | Answer_6: 57 | There were originally 9 computers. 58 | For each of 4 days, 5 more computers were added. 59 | So 5 * 4 = 20 computers were added. 60 | 9 + 20 is 29. 61 | The answer is 29. 62 | 63 | Answer_7: 64 | Michael started with 58 golf balls. 65 | After losing 23 on tues- day, he had 58 - 23 = 35. 66 | After losing 2 more, he had 35 - 2 = 33 golf balls. 67 | The answer is 33. 68 | 69 | Answer_8: 70 | Olivia had 23 dollars. 71 | 5 bagels for 3 dollars each will be 5 x 3 = 15 dollars. 72 | So she has 23 - 15 dollars left. 73 | 23 - 15 is 8. 74 | The answer is 8. -------------------------------------------------------------------------------- /data/MMLU/data/dev/college_computer_science_dev.csv: -------------------------------------------------------------------------------- 1 | Which of the following regular expressions is equivalent to (describes the same set of strings as) (a* + b)*(c + d)?,a*(c + d)+ b(c + d),a*(c + d)* + b(c + d)*,a*(c + d)+ b*(c + d),(a + b)*c +(a + b)*d,D 2 | "A certain pipelined RISC machine has 8 general-purpose registers R0, R1, . . . , R7 and supports the following operations. 3 | ADD Rs1, Rs2, Rd Add Rs1 to Rs2 and put the sum in Rd 4 | MUL Rs1, Rs2, Rd Multiply Rs1 by Rs2 and put the product in Rd 5 | An operation normally takes one cycle; however, an operation takes two cycles if it produces a result required by the immediately following operation in an operation sequence. Consider the expression AB + ABC + BC, where variables A, B, C are located in registers R0, R1, R2. If the contents of these three registers must not be modified, what is the minimum number of clock cycles required for an operation sequence that computes the value of AB + ABC + BC?",5,6,7,8,B 6 | "The Singleton design pattern is used to guarantee that only a single instance of a class may be instantiated. Which of the following is (are) true of this design pattern? 7 | I. The Singleton class has a static factory method to provide its instance. 8 | II. The Singleton class can be a subclass of another class. 9 | III. The Singleton class has a private constructor.",I only,II only,III only,"I, II, and III",D 10 | "A compiler generates code for the following assignment statement. 11 | G := (A + B) * C - (D + E) * F 12 | The target machine has a single accumulator and a single-address instruction set consisting of instructions load, store, add, subtract, and multiply. For the arithmetic operations, the left operand is taken from the accumulator and the result appears in the accumulator. The smallest possible number of instructions in the resulting code is",5,6,7,9,D 13 | "Consider a computer design in which multiple processors, each with a private cache memory, share global memory using a single bus. This bus is the critical system resource. Each processor can execute one instruction every 500 nanoseconds as long as memory references are satisfied by its local cache. When a cache miss occurs, the processor is delayed for an additional 2,000 nanoseconds. During half of this additional delay, the bus is dedicated to serving the cache miss. During the other half, the processor cannot continue, but the bus is free to service requests from other processors. On average, each instruction requires 2 memory references. On average, cache misses occur on 1 percent of references. What proportion of the capacity of the bus would a single processor consume, ignoring delays due to competition from other processors?",1/50,1/27,1/25,2/27,B 14 | -------------------------------------------------------------------------------- /data/MMLU/data/val/medical_genetics_val.csv: -------------------------------------------------------------------------------- 1 | The allele associated with sickle cell anemia apparently reached a high frequency in some human populations due to:,random mating,superior fitness of heterozygotes in areas where malaria was present,migration of individuals with the allele into other populations,a high mutation rate at that specific gene,B 2 | RFLP analysis is a technique that,uses hybridization to detect specific DNA restriction fragments in genomic DNA,is used to determine whether a gene is transcribed in specific cells,measures the transfer frequency of genes during conjugation,is used to detect genetic variation at the protein level,A 3 | "Repeat core sequences consisting of 2, 3, or 4 base pairs are known as what?",Single nucleotide polymorphisms (SNPs),Microsatellites,Minisatellites,Satellites,B 4 | Marfan syndrome is caused by mutations in the gene which encodes _______.,Collagen,Dystrophin,Elastin,Fibrillin,D 5 | If an autosomal recessive disorder which shows Hardy-Weinberg equilibrium has an incidence of 1 in 6400 then the frequency of carriers is approximately _______.,1 in 20,1 in 40,1 in 80,1 in 160,B 6 | Tracing of a cell lineage during development means that,the cells giving rise to and derived from a specific cell are known,the sequence of the enhancers for developmental genes is known,the regulatory genes for the organism have been genetically mapped,cell components in the membrane involved in signaling have been isolated,A 7 | Mutations which occur in body cells which do not go on to form gametes can be classified as:,auxotrophic mutations,somatic mutations,morphological mutations,oncogenes,B 8 | "Assuming that the level of glucose is low, a mutation in the repressor of the lac operon in E. coli, preventing binding of the repressor to the operator, should result in:",constitutive expression of the lac operon genes,lack of expression or reduced expression of the lac operon genes under all circumstances,expression of the genes only when lactose is present,expression of the genes only when lactose is absent,A 9 | The “sticky ends” generated by restriction enzymes allow,selection for plasmids lacking antibiotic resistance,pieces of DNA from different sources to hybridize to each other and to be joined together,replication of transfer RNA within the bacterial cell,easy identification of plasmids which carry an insert,B 10 | The presence of an extra digit is referred to as __________.,arachnodactyly,brachydactyly,clinodactyly,polydactyly,D 11 | Which of the following statements about heritability are true?,is a measure of the level of heterozygotes in a population,is a measure of the proportion of variation that is due to genetic causes,is a measure of proportion of repeated DNA in an organism,is a measure of level of gene linkage,B 12 | -------------------------------------------------------------------------------- /data/MMLU/data/val/anatomy_val.csv: -------------------------------------------------------------------------------- 1 | Which of the following terms describes the body's ability to maintain its normal state?,Anabolism,Catabolism,Tolerance,Homeostasis,D 2 | Which of the following structures travel through the substance of the parotid gland?,The maxillary artery,The maxillary artery and retromandibular vein,"The maxillary artery, retromandibular vein and facial artery","The maxillary artery, retromandibular vein, facial artery and buccal branch of the mandibular nerve",B 3 | A medical practitioner places a stethoscope over the patient's seventh right intercostal space in the mid-axillary line. The stethoscope overlies the,upper lobe of the lung.,middle lobe of the lung.,lower lobe of the lung.,costo-diaphragmatic recess.,C 4 | The maxillary sinus,is lined by stratified squamous epithelium.,drains into the superior meatus of the nasal cavities.,is innervated by branches of the maxillary division of the trigeminal nerve.,Receives its blood supply from the first part of the maxillary artery.,C 5 | Which of the following is flexible connective tissue that is attached to bones at the joints?,Adipose,Cartilage,Epithelial,Muscle,B 6 | Tonsillar tissue is commonly found,on the posterior wall of the oropharynx.,under the mucosa of the ventral surface of the tongue.,between the palatoglossal and palatopharyngeal folds.,at all three sites.,C 7 | The best place to listen to the general heart sound with a stethoscope is the,fifth left intercostal space in the midclavicular line.,second left intercostal space one inch from the sternum.,third left rib at its junction with the sternum.,sternum midway between the sternal angle and xiphisternum.,A 8 | Emissary veins connect the intracranial venous sinuses to,veins draining the scalp.,veins draining the eye.,the pterygoid venous plexus.,All of the above areas.,D 9 | Smooth muscle cells contain,actin but not myosin.,myosin but not actin.,actin and myosin arranged irregularly.,actin and myosin arranged in parallel arrays.,C 10 | "Which of the following controls body temperature, sleep, and appetite?",Adrenal glands,Hypothalamus,Pancreas,Thalamus,B 11 | Which one of the following is brought about by activation of sympathetic nerves?,Contraction of bronchial smooth muscle.,Dilation of the pupil of the eye.,Increase in intestinal secretion,Decrease in heart rate,B 12 | The trachea,lies deep to the oesophagus.,lies superficial to the oesophagus.,is totally enclosed by cartilaginous rings.,divides at the thoracic inlet.,B 13 | Pharyngeal mucosa is innervated by the glossopharyngeal nerves because it develops from the,first pharyngeal arch.,second pharyngeal arch.,third pharyngeal arch.,fourth pharyngeal arch.,C 14 | A patient is most likely to experience pain due to infection of the ethmoidal air cells sinus,at the base of the skull.,on the forehead.,in the cheeks.,between the eyes.,D 15 | -------------------------------------------------------------------------------- /data/MMLU/data/val/business_ethics_val.csv: -------------------------------------------------------------------------------- 1 | Disqualification of directors may result from breaches under the,Sale of Goods Act 1979,Financial Services Act 1986,Companies Act 2006 and Insolvency Act 1986,Health and Safety at Work Act 1974,C 2 | "In a global context, many companies have significant ______ power due to their ability to threaten governments, in the face of ________ with relocation to other territories, which Beck (1998) describes as 'corporate power of _______.","Economic, Commercial competition, Social sanction","Political, Undesirable regulation, Transnational withdrawal","Social, Commercial competition, Social sanction","Social, Undesirable regulation, Transnational withdrawal",B 3 | Ethics of duty is a principle based ethical theory attributed to Emmanuel Kant.,"1,3","2,3","1,2","1,2,3",A 4 | "While businesses traditionally engaged in civil society through ______, recently more business-________ collaboration have emerged in order to tackle social and environmental problems. Likewise, CSO's strategies have evolved with an increasing number setting up _________, which are more commercially orientated then traditional charity models.","Philanthropy, CSO, Social enterprises","Philanthropy, Government, Lobbying groups","Advertising campaigns, CSO, Lobbying groups","Advertising campaigns, Government, Social enterprises",A 5 | "The relationship between shareholders and the company, or so called ________, describes shareholders as _________ and managers of the company as ________.","Agency relation, Agent, Principals","Stakeholder management, Agent, Principals","Agency relation, Principal, Agents","Stakeholder management, Principal, Agents",C 6 | Executive directors are responsible for running the firm.,"1,4","1,3","2,3","1,2,3",A 7 | Subjection to emotional exploitation.,"1,2,4","1,3,4","2,3,4","1,2,3,4,5",B 8 | The recent global sustainability agenda has been dominated by which of the following organisations sustainability goals?,Marks & Spencer's 'Plan A',United Nations 'Sustainable Development Goals',The Alibaba Group's 'Sustainability Initiatives',Unilever's 'Sustainable Living Plan',B 9 | The OECD argues that corporate governance problems arise because:,Ownership and control is separated,Managers always act in their own self interest,Profit maximization is the main objective of organizations,Stakeholders have differing levels of power,A 10 | What is meant by the phrase CSR?,Corporate Social Responsibility,Company Social Responsibility,Corporate Society Responsibility,Company Society Responsibility,A 11 | "______, ________ and _______ are all factors which can impinge upon stakeholder communication and collaboration.","Resource intensity, Culture clash, Accountability","Resource clash, Culture intensity, Accountability","Resource intensity, Culture clash, Coordinative fluidity","Resource clash, Culture clash, Coordinative fluidity",A 12 | -------------------------------------------------------------------------------- /data/MMLU/data/dev/high_school_computer_science_dev.csv: -------------------------------------------------------------------------------- 1 | Which of the following is an example of the use of a device on the Internet of Things (IoT) ?,A car alerts a driver that it is about to hit an object.,A hiker uses a G P S watch to keep track of her position.,A refrigerator orders milk from an online delivery service when the milk in the refrigerator is almost gone.,A runner uses a watch with optical sensors to monitor his heart rate.,C 2 | "Many Web browsers allow users to open anonymous windows. During a browsing session in an anonymous window, the browser does not record a browsing history or a list of downloaded files. When the anonymous window is exited, cookies created during the session are deleted. Which of the following statements about browsing sessions in an anonymous window is true?","The activities of a user browsing in an anonymous window will not be visible to people who monitor the user's network, such as the system administrator.",Items placed in a Web store's shopping cart for future purchase during the anonymous browsing session will not be saved on the user's computer.,A user will not be able to log in to e-mail or social media accounts during the anonymous browsing session.,A user browsing in an anonymous window will be protected from viruses launched from any web sites visited or files downloaded.,B 3 | "What is the output of ""abc""[::-1] in Python 3?",Error,abc,cba,c,C 4 | "In the program below, the initial value of x is 5 and the initial value of y is 10. 5 | IF (X < O) 6 | { 7 | DISPLAY (""Foxtrot"") 8 | } 9 | ELSE 10 | { 11 | IF (X > y) 12 | { 13 | DISPLAY (""Hotel"") 14 | } 15 | ELSE 16 | { 17 | IF (y > O) 18 | { 19 | DISPLAY (""November"") 20 | } 21 | ELSE 22 | { 23 | DISPLAY (""Yankee"") 24 | } 25 | } 26 | } 27 | 28 | What is displayed as a result of running the program?",Foxtrot,Hotel,November,Yankee,C 29 | "A list of numbers has n elements, indexed from 1 to n. The following algorithm is intended to display the number of elements in the list that have a value greater than 100. The algorithm uses the variables count and position. Steps 3 and 4 are missing. 30 | Step 1: Set count to 0 and position to 1. 31 | Step 2: If the value of the element at index position is greater 32 | than 100, increase the value of count by 1. 33 | Step 3: (missing step) 34 | Step 4: (missing step) 35 | Step 5: Display the value of count. 36 | Which of the following could be used to replace steps 3 and 4 so that the algorithm works as intended?","Step 3: Increase the value of position by 1. 37 | Step 4: Repeat steps 2 and 3 until the value of count is greater than 100.","Step 3: Increase the value of position by 1. 38 | Step 4: Repeat steps 2 and 3 until t he value of position is greater than n.","Step 3: Repeat step 2 until the value of count is greater than 100. 39 | Step 4: Increase the value of position by 1.","Step 3: Repeat step 2 until the value of position is greater than n. 40 | Step 4: Increase the value of count by 1.",D 41 | -------------------------------------------------------------------------------- /data/MMLU/data/val/machine_learning_val.csv: -------------------------------------------------------------------------------- 1 | Which of the following guidelines is applicable to initialization of the weight vector in a fully connected neural network.,Should not set it to zero since otherwise it will cause overfitting,Should not set it to zero since otherwise (stochastic) gradient descent will explore a very small space,Should set it to zero since otherwise it causes a bias,Should set it to zero in order to preserve symmetry across all neurons,B 2 | Which of the following statements about Naive Bayes is incorrect?,Attributes are equally important.,Attributes are statistically dependent of one another given the class value.,Attributes are statistically independent of one another given the class value.,Attributes can be nominal or numeric,B 3 | Statement 1| The L2 penalty in a ridge regression is equivalent to a Laplace prior on the weights. Statement 2| There is at least one set of 4 points in R^3 that can be shattered by the hypothesis set of all 2D planes in R^3.,"True, True","False, False","True, False","False, True",D 4 | "For the one-parameter model, mean-Square error (MSE) is defined as follows: 1/(2N) \sum (y_n − β_0)^2 . We have a half term in the front because,",scaling MSE by half makes gradient descent converge faster.,presence of half makes it easy to do grid search. ,it does not matter whether half is there or not. ,none of the above,C 5 | "In Yann LeCun's cake, the cherry on top is",reinforcement learning,self-supervised learning,unsupervised learning,supervised learning,A 6 | "What is the dimensionality of the null space of the following matrix? A = [[1, 1, 1], [1, 1, 1], [1, 1, 1]]",0,1,2,3,C 7 | The number of test examples needed to get statistically significant results should be _,Larger if the error rate is larger.,Larger if the error rate is smaller.,Smaller if the error rate is smaller.,It does not matter.,B 8 | "Compared to the variance of the Maximum Likelihood Estimate (MLE), the variance of the Maximum A Posteriori (MAP) estimate is ________",higher,same,lower,it could be any of the above,C 9 | "Which of the following best describes the joint probability distribution P(X, Y, Z) for the given Bayes net. X <- Y -> Z?","P(X, Y, Z) = P(Y) * P(X|Y) * P(Z|Y)","P(X, Y, Z) = P(X) * P(Y|X) * P(Z|Y)","P(X, Y, Z) = P(Z) * P(X|Z) * P(Y|Z)","P(X, Y, Z) = P(X) * P(Y) * P(Z)",A 10 | "You observe the following while fitting a linear regression to the data: As you increase the amount of training data, the test error decreases and the training error increases. The train error is quite low (almost what you expect it to), while the test error is much higher than the train error. What do you think is the main reason behind this behavior. Choose the most probable option.",High variance,High model bias,High estimation bias,None of the above,A 11 | "Statement 1| If there exists a set of k instances that cannot be shattered by H, then VC(H) < k. Statement 2| If two hypothesis classes H1 and H2 satisfy H1 ⊆ H2, then VC(H1) ≤ VC(H2).","True, True","False, False","True, False","False, True",D 12 | -------------------------------------------------------------------------------- /data/MMLU/data/val/us_foreign_policy_val.csv: -------------------------------------------------------------------------------- 1 | What policies did Alexander Hamilton pursue to build the American economy?,Free trade with all European countries on an equal basis and a smaller government role in the economy,Establishment of a national currency and higher government spending,"Tariffs to protect American manufacturing, establishment of a national bank, and the repayment of all debts.",Limits on immigration and imported technology to encourage higher wages,C 2 | How did relations with Russia develop under George H.W. Bush?,Cautious support for Gorbachev and Yeltsin,Unreserved support for Gorbachev and Yeltsin,"Denouncement of Gorbachev, but support for Yeltsin",Denouncement of Gorbachev and Yeltsin,A 3 | What fact impinged upon America's 'neutral' stance towards the belligerent in World War I between 1915 and 1917?,US banks funded the allies much more than Germany and Austria-Hungary,US banks funded Germany and Austria-Hungary much more than the allies,Wilson secretly favoured the German position,None of the above,A 4 | Who were the original five permanent members on the UN Security Council?,"United States, Soviet Union, Germany, France, and Great Britain","United States, Germany, France, Great Britain, and Japan","United States, Great Britain, Republic of China, India, and Brazil","United States, Soviet Union, France, Great Britain, and Republic of China",D 5 | "According to Realists, what accounts for the onset of the Cold War?",Ideological differences,A power vacuum,The expansionist nature of the Soviet Union,Both b and c,D 6 | What is soft power?,"Use of coercive measures short of war, such as economic sanctions",Ways in which the values and culture of a country provide it with influence in the international system,Ineffective use of one's power,When states use carrots (incentives) instead of sticks (threats) to influence other countries.,B 7 | Why did Congress oppose Wilson's proposal for the League of Nations?,It feared the League would encourage Soviet influence in the US,It feared the League would be anti-democratic,It feared the League would commit the US to an international alliance,Both a and b,C 8 | What is the core reason why the US is viewed as exceptional?,Because it is a great power but uses that power sparingly,"Because of its intensely liberal character, which is rooted in a sense of its special mission",Because of its exclusive concentration on issues of power and security,"Because of its despotic character, which is rooted in a sense of its special mission",B 9 | What is meant by a grand strategy of 'primacy'?,Assurance of US parity with other powers,Striving for American dominance in the international system,Putting America's interests first,Promotion of America as the world's first democracy,B 10 | Which factor is cited by Revisionists as the primary cause of the Cold War?,The threat posed by the Soviet Union,Domestic concerns of the US,Soviet ideology,None of the above,B 11 | Who is considered the “father” of containment?,George Kennan,John Foster Dulles,Henry Kissinger,Dwight Eisenhower,A 12 | -------------------------------------------------------------------------------- /data/MMLU/data/val/high_school_computer_science_val.csv: -------------------------------------------------------------------------------- 1 | "In Python 3, which of the following function removes all leading and trailing whitespace in string?","replace(old, new [, max])",strip([chars]),swapcase(),title(),B 2 | Which of the following best describes a Web server?,A computer system that delivers Web pages to clients,A computer system that determines the shortest path between two computers over the Internet,A computer system running software that provides a user-friendly interface for creating Web pages,A computer system that translates domain names to IP addresses,A 3 | "A user purchased a new smart home device with embedded software and connected the device to a home network. The user then registered the device with the manufacturer, setting up an account using a personal e-mail and password. Which of the following explains how a phishing attack could occur against the user of the smart home device?",A vulnerability in the device’s software is exploited to gain unauthorized access to other devices on the user’s home network.,A vulnerability in the device’s software is exploited to install software that reveals the user’s password to an unauthorized individual.,"The user is sent an e-mail appearing to be from the manufacturer, asking the user to confirm the account password by clicking on a link in the e-mail and entering the password on the resulting page.",The user’s account is sent an overwhelming number of messages in an attempt to disrupt service on the user’s home network.,C 4 | "In Python 3, let r = lambda q: q * 2. What is r(3)?",2,6,3,1,B 5 | Consider the following list. - Assembly language - Block-based programming language - Logic gate - Machine language Which of the following arranges the list in order from highest level of abstraction to lowest level of abstraction?,"Block-based programming language, assembly language, machine language, logic gate","Block-based programming language, machine language, assembly language, logic gate","Block-based programming language, machine language, logic gate, assembly language","Machine language, block-based programming language, assembly language, logic gate",A 6 | "A large hospital maintains a list of patients’ records in no particular order. To find the record of a given patient, which represents the most efficient method that will work?",Do a sequential search on the name field of the records.,Do a binary search on the name field of the records.,Use insertion sort to sort the records alphabetically by name; then do a sequential search on the name field of the records.,Usemergesort to sort the records alphabetically by name; then do a sequential search on the name field of the records.,A 7 | Which types of functions grow the slowest?,O(N^(1/2)),O(N^(1/4)),O(N^(1/N)),O(N),C 8 | Which is a valid expression in Python 3.5?,sort('ab'),sorted('ab'),"""ab"".sort()",1/0,B 9 | "Consider the following segment of code. 10 | String word = ""conflagration""; 11 | int x = word.indexOf(""flag""); 12 | String s = word.substring(0, x); 13 | What will be the result of executing the above segment?",String s will be the empty string.,"String s will contain ""flag"".","String s will contain ""conf"".","String s will contain ""con"".",D 14 | -------------------------------------------------------------------------------- /data/MMLU/data/val/college_physics_val.csv: -------------------------------------------------------------------------------- 1 | "Characteristic X rays, appearing as sharp lines on a continuous background, are produced when high-energy electrons bombard a metal target. Which of the following processes results in the characteristic X rays?",Electrons producing Čerenkov radiation,Electrons colliding with phonons in the metal,Electrons filling inner shell vacancies that are created in the metal atoms,Electrons combining with protons to form neutrons,C 2 | "Two students perform an experiment in which they drop a ball from rest from a known height above the ground and measure the speed of the ball just before it strikes the ground. From repeated measurement, the students estimate the uncertainty in the measured speed of the ball to be 10 percent. Which of the following gives the uncertainty in the kinetic energy of the ball? (Assume the uncertainty in the ball’s mass is negligibly small.)",5%,10%,15%,20%,D 3 | The rest mass of a particle with total energy 5.0 GeV and momentum 4.9 GeV/c is approximately,0.1 GeV/c^2,0.2 GeV/c^2,0.5 GeV/c^2,1.0 GeV/c^2,D 4 | "A grating spectrometer can just barely resolve two wavelengths of 500 nm and 502 nm, respectively. Which of the following gives the resolving power of the spectrometer?",2,250,"5,000","10,000",B 5 | Electromagnetic radiation emitted from a nucleus is most likely to be in the form of,gamma rays,microwaves,ultraviolet radiation,visible light,A 6 | For which of the following thermodynamic processes is the increase in the internal energy of an ideal gas equal to the heat added to the gas?,Constant temperature,Constant volume,Constant pressure,Adiabatic,B 7 | "An ideal monatomic gas expands quasi-statically to twice its volume. If the process is isothermal, the work done by the gas is Wi. If the process is adiabatic, the work done by the gas is Wa. Which of the following is true?",Wi = Wa,0 = Wi < Wa,0 < Wi < Wa,0 2R(mv^2/R),Wc = 2R(mv^2/R),Wc < 2R(mv^2/R),Cannot be determined,C 18 | -------------------------------------------------------------------------------- /data/MMLU/data/dev/professional_law_dev.csv: -------------------------------------------------------------------------------- 1 | "A state legislature has recently enacted a statute making it a misdemeanor to curse or revile or use obscene or opprobrious language toward or in reference to a police officer perfonning his duties. A student at a state university organized a demonstration on campus to protest the war. The rally was attended by a group of 50 students who shouted anti-war messages at cars passing by. To show his contempt for the United States, the student sewed the American flag to the rear of his jeans. When a police officer saw the flag sown on the student's jeans, he approached and told him to remove the flag or he would be placed under arrest. The student became angered and shouted at the police officer, ""Listen, you bastard, I'll wear this rag anywhere I please. "" The student was subsequently placed under arrest and charged with violating the state statute. The student subsequently brings suit in state court challenging the constitutionality of the statute. The strongest constitutional argument for the student is that",the statute is void for vagueness under the Fourteenth Amendment's due process clause.,the statute is invalid because it violates the petitioner's freedom of speech under the First Amendment.,the statute is an abridgment of freedom of speech under the First Amendment because less restrictive means are available for achieving the same purpose.,the statute is overbroad and consequently invalid under the First and FourteenthAmendments.,D 2 | "A state has recently enacted a statute prohibiting the disposal of any nuclear wastes within the state. This law does not contravene or conflict with any federal statutes. A man operates a company in the state that is engaged in the disposal of nuclear wastes. Subsequent to the passage of the state statute, the man, not yet aware of the new law, entered into contracts with many out-of-state firms to dispose of their nuclear wastes in the state. On account of this new law, however, the man will be unable to perform these contracts. Assume that the man has standing to challenge this state law. Which of the following presents his strongest constitutional grounds to challenge the state law prohibiting the disposal of nuclear wastes within the state?",The commerce clause.,The equal protection clause of the Fourteenth Amendment.,"The privileges and immunities clause of Article IV, Section 2. ",The contract clause.,A 3 | Judge took judicial notice of some facts at the beginning of the trial. Which of the following is not an appropriate kind of fact for judicial notice?,Indisputable facts.,Facts that have been asserted by individual political organizations.,Facts recognized to be true by common knowledge.,Facts capable of scientific verification.,B 4 | "On October 1, 1980, a developer, owner of several hundred acres in a rural county, drafted a general development plan for the area. The duly recorded plan imposed elaborate limitations and restrictions upon the land in the plan, which was to be developed as a residential district. The restrictions were to extend to all persons acquiring any of the lots and to their heirs, assigns, and lessees. It was further provided that all subsequent owners would be charged with due notice of the restrictions. Among those restrictions in the general plan were the following:(22) A franchise right is created in a strip of land 10 feet in width along the rear of each lot for the use of public utility companies with right of ingress and egress. (23) No house or structure of any kind shall be built on the aforementioned strip of land running through the said blocks. In 2000, a retiree purchased one of the lots, built a house, and erected a fence in the rear of his property within the restricted area. In 2004, a teacher purchased a lot adjacent to the retiree's property and built a new house. Two years later, a librarian purchased the lot that adjoined the teacher's property. The three deeds to those properties each contained references to the deed book where the general plan was recorded. In 2008, the librarian began the construction of a seven-foot post-and-rail fence along the line dividing his lot with the teacher's, and along the center of the area subject to the franchise right. Although the teacher objected to its construction, the fence was completed. If the teacher seeks a mandatory injunction to compel removal of the librarian's fence, the court will most likely","grant relief, because the fence was in violation of the easement restriction. ","grant relief, because the encroachment of the fence violated the restriction in the original plan. ","deny relief, because the teacher failed to enforce the restriction against the retiree. ","deny relief, because the fence would not be construed as ""a structure"" within the terms of the restriction. ",B 5 | "A son owed a creditor $5,000. The son's father contacted the creditor and told him that he wanted to pay the son's debt. The father signed a document that stated the father would pay the son's debt at a rate of $500 a month for 10 months. The creditor made no written or oral commitment to forbear to sue the son to collect the $5,000 debt, and the father made no oral or written request for any such forbearance. For the next five months, the father made and the creditor accepted the $500 monthly payments as agreed. During that period, the creditor, in fact, did forbear to take any legal action against the son. However, the father then informed the creditor that he would make no further payments on the debt. Which of the following is the most persuasive argument that the father is liable to the creditor under the terms of their agreement?","The father's promise and the creditor's reliance thereon, if proved, gave rise to a valid claim by the creditor against the father based on the doctrine of promissory estoppel. ","Because it was foreseeable that the father's promise would induce the creditor to forbear taking any action against the son, such forbearance was, as a matter of law, a bargained-for consideration for the father's promise. ","The father's five payments to the creditor totaling $2,500 manifested a serious intent on the father's part to be contractually bound, and such manifestation is generally recognized as an effective substitute for consideration. ","By assuming the antecedent debt obligation that the son owed to the creditor, the father became a surety whose promise to the creditor was enforceable, since it was in writing and supported by adequate consideration. ",A 6 | --------------------------------------------------------------------------------