├── .gitignore
├── config
├── sample.json
├── greedy.json
└── beam.json
├── example.png
├── prompts
├── __pycache__
│ ├── wrap_prompt.cpython-38.pyc
│ └── wrap_prompt.cpython-39.pyc
├── sr_feedback
│ ├── sys_instruction.json
│ ├── piqa.json
│ ├── csqa.json
│ ├── siqa.json
│ ├── wino.json
│ └── hella.json
├── generate_cot
│ ├── sys_instruction.json
│ ├── strategy.json
│ ├── csqa.json
│ ├── siqa.json
│ ├── wino.json
│ ├── gsm8k.json
│ └── hella.json
├── l2m_mid_answer
│ ├── sys_instruction.json
│ ├── piqa.json
│ ├── csqa.json
│ ├── siqa.json
│ ├── wino.json
│ └── hella.json
├── direct_answer
│ ├── sys_instruction.json
│ ├── strategy.json
│ ├── wino.json
│ ├── piqa.json
│ ├── csqa.json
│ ├── siqa.json
│ ├── gsm8k.json
│ └── hella.json
├── l2m_question
│ ├── sys_instruction.json
│ ├── piqa.json
│ ├── wino.json
│ ├── strategy.json
│ ├── csqa.json
│ ├── siqa.json
│ ├── hella.json
│ └── gsm8k.json
├── l2m_final_answer
│ ├── sys_instruction.json
│ ├── piqa.json
│ ├── csqa.json
│ ├── siqa.json
│ ├── wino.json
│ └── hella.json
├── cot_answer
│ ├── sys_instruction.json
│ ├── piqa.json
│ ├── strategy.json
│ ├── csqa.json
│ ├── siqa.json
│ ├── wino.json
│ ├── gsm8k.json
│ └── hella.json
├── rt_answer
│ ├── sys_instruction.json
│ ├── strategy.json
│ ├── csqa.json
│ ├── siqa.json
│ ├── wino.json
│ ├── gsm8k.json
│ └── hella.json
├── cons_answer
│ ├── sys_instruction.json
│ ├── strategy.json
│ ├── piqa.json
│ ├── csqa.json
│ ├── siqa.json
│ ├── wino.json
│ └── hella.json
├── sr_answer
│ ├── sys_instruction.json
│ ├── piqa.json
│ ├── csqa.json
│ ├── siqa.json
│ ├── wino.json
│ └── hella.json
├── generate_knowledge
│ ├── siqa.json
│ └── wino.json
└── wrap_prompt.py
├── demo_result
└── csqa
│ └── Llama-2-13b-chat-hf_cons_answer_dev_2.json
├── README.md
├── cal_token_cost.py
├── config.py
├── requirements.txt
├── metrics.py
├── cal_toxic_rate.py
├── llm_intervention.py
├── result
└── csqa
│ ├── attn-False_cnt-100_rep_std.json
│ └── attn-True_cnt-100_rep_std.json
└── llm_reason.py
/.gitignore:
--------------------------------------------------------------------------------
1 | /old_code
2 | /model
3 | /__pycache__
4 | /config.py
--------------------------------------------------------------------------------
/config/sample.json:
--------------------------------------------------------------------------------
1 | {"max_new_tokens":500, "max_length":2000, "res_decoding":false}
--------------------------------------------------------------------------------
/example.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BugMakerzzz/toxic_cot/HEAD/example.png
--------------------------------------------------------------------------------
/config/greedy.json:
--------------------------------------------------------------------------------
1 | {"max_new_tokens":500, "do_sample":false, "max_length":2000, "res_decoding":false}
--------------------------------------------------------------------------------
/prompts/__pycache__/wrap_prompt.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BugMakerzzz/toxic_cot/HEAD/prompts/__pycache__/wrap_prompt.cpython-38.pyc
--------------------------------------------------------------------------------
/prompts/__pycache__/wrap_prompt.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BugMakerzzz/toxic_cot/HEAD/prompts/__pycache__/wrap_prompt.cpython-39.pyc
--------------------------------------------------------------------------------
/config/beam.json:
--------------------------------------------------------------------------------
1 | {"max_new_tokens":500, "num_beams":5, "do_sample":true, "top_k":10, "top_p":0.9, "num_return_sequences":5, "max_length":2000, "res_decoding":false}
2 |
--------------------------------------------------------------------------------
/prompts/sr_feedback/sys_instruction.json:
--------------------------------------------------------------------------------
1 | {"instruction":"You are a helpful, respectful and honest assistant. You should use your reasoning abilities to give a feedback to the given rational. Your response should be in this form: 'Feedback: {feedback}'."}
--------------------------------------------------------------------------------
/prompts/generate_cot/sys_instruction.json:
--------------------------------------------------------------------------------
1 | {"instruction":"You are a helpful, respectful and honest assistant. You should use your reasoning abilities to given the reason for the answer to the given questions. Your response should be in this form: 'Reason: {reason}'."}
--------------------------------------------------------------------------------
/prompts/l2m_mid_answer/sys_instruction.json:
--------------------------------------------------------------------------------
1 | {"instruction":"You are a helpful, respectful and honest assistant. You should use your reasoning abilities to answer the given subquestion in reasoning tasks. You should reply the correct answer to the subquestion. Your response should be in this form: 'Answer: {answer}'."}
--------------------------------------------------------------------------------
/prompts/direct_answer/sys_instruction.json:
--------------------------------------------------------------------------------
1 | {"instruction":"You are a helpful, respectful and honest assistant. You should use your commonsense knowledge and reasoning abilities to answer the given questions in commonsense reasoning tasks. You should reply the correct option. If you don't know the answer to a question, please reply 'Answer: None'."}
--------------------------------------------------------------------------------
/prompts/l2m_question/sys_instruction.json:
--------------------------------------------------------------------------------
1 | {"instruction":"You are a helpful, respectful and honest assistant. You should use your reasoning abilities to break down the questions into subquestions. You should reply the correct subquestions. Your response should be in this form: 'To solve the question, we need to solve these subquestions:\nQuestion 1:{subquestion}\nQuestion 2:{subquestion}"}
--------------------------------------------------------------------------------
/prompts/l2m_final_answer/sys_instruction.json:
--------------------------------------------------------------------------------
1 | {"instruction":"You are a helpful, respectful and honest assistant. You should use your reasoning abilities and the given context to answer the given questions in reasoning tasks. You should reply the answer. Your response should be in this form: 'So the answer is: ({option}) {answer}'. If you don't know the answer to a question, please reply 'Answer: None'."}
--------------------------------------------------------------------------------
/prompts/cot_answer/sys_instruction.json:
--------------------------------------------------------------------------------
1 | {"instruction":"You are a helpful, respectful and honest assistant. You should use your reasoning abilities to answer the given questions in reasoning tasks. You should reply the correct rationales and the answer. Your response should be in this form: '{reason} So the answer is: ({option}) {answer}'. If you don't know the answer to a question, please reply 'Answer: None'."}
--------------------------------------------------------------------------------
/prompts/rt_answer/sys_instruction.json:
--------------------------------------------------------------------------------
1 | {"instruction":"You are a helpful, respectful and honest assistant. You should use your reasoning abilities to answer the given questions in reasoning tasks. You should reply the correct answer according to the given rationale and question. Your response should be in this form: 'Answer: ({option}) {answer}'. If you don't know the answer to a question, please reply 'Answer: None'."}
--------------------------------------------------------------------------------
/prompts/cons_answer/sys_instruction.json:
--------------------------------------------------------------------------------
1 | {"instruction":"You are a helpful, respectful and honest assistant. You should use your reasoning abilities to answer the given questions in reasoning tasks. You should reply the correct rationales and wrong rationales. Your response should be in this form: 'Explanation: {Correct reason} Answer: ({option}) {answer} Wrong Explanation: {Wrong reason} Wrong Answer: ({option}) {answer}'."}
--------------------------------------------------------------------------------
/prompts/sr_answer/sys_instruction.json:
--------------------------------------------------------------------------------
1 | {"instruction":"You are a helpful, respectful and honest assistant. You should use your reasoning abilities, the given rational and feedback to update your answer to the given questions in reasoning tasks. You should reply the correct rationales and the answer. Your response should be in this form: '{reason} So the answer is: ({option}) {answer}'. If you don't know the answer to a question, please reply 'Answer: None'."}
--------------------------------------------------------------------------------
/prompts/direct_answer/strategy.json:
--------------------------------------------------------------------------------
1 | [
2 | {
3 | "question":"Could Elizabeth I of England have seen Romeo and Juliet? (1) yes (2) no",
4 | "answer":"Answer: (1) yes."
5 | },
6 | {
7 | "question":"Would Jane Goodall be considered an expert in oceanography? (1) yes (2) no",
8 | "answer":"Answer: (2) no."
9 | },
10 | {
11 | "question":"Did Edward of Westminster ever spend time in Gloucestershire? (1) yes (2) no",
12 | "answer":"Answer: (1) yes."
13 | },
14 | {
15 | "question":"Does human with no gear die capturing giant squid in natural habitat? (1) yes (2) no",
16 | "answer":"Answer: (1) yes."
17 | },
18 | {
19 | "question":"Would a bartender make a cocktail for Noah Schnapp? (1) yes (2) no",
20 | "answer":"Answer: (2) no."
21 | }
22 | ]
23 |
24 |
--------------------------------------------------------------------------------
/demo_result/csqa/Llama-2-13b-chat-hf_cons_answer_dev_2.json:
--------------------------------------------------------------------------------
1 | [
2 | {
3 | "question": "Where could you find hundreds of thousands of home?\n(1) field (2) neighborhood (3) star can (4) city or town (5) apartment building ",
4 | "answer": " Explanation: Hundreds of thousands of homes can be found in a city or town.\nAnswer: (4) city or town.\nWrong Explanation: Hundreds of thousands of homes can be found in a field.\nWrong Answer: (1) field.",
5 | "pred": "4",
6 | "label": "4",
7 | "cor_flag": true
8 | },
9 | {
10 | "question": "Most people who are family like to greet each other with a what?\n(1) apartments (2) listen to music (3) have friends (4) know what ophiolites (5) hug ",
11 | "answer": " Explanation: People who are family usually greet each other with a hug.\nAnswer: (5) hug.\nWrong Explanation: People who are family usually greet each other with a listen to music.\nWrong Answer: (2) listen to music.",
12 | "pred": "5",
13 | "label": "5",
14 | "cor_flag": true
15 | },
16 | {
17 | "acc": 1.0
18 | }
19 | ]
20 |
--------------------------------------------------------------------------------
/prompts/direct_answer/wino.json:
--------------------------------------------------------------------------------
1 | [
2 | {
3 | "question":"The test was hard for Samuel but a breeze for Randy , since _ had failed to study for it.\n(1) Samuel (2) Randy",
4 | "answer":"Answer: (1) Samuel."
5 | },
6 | {
7 | "question":"Kyle slowly wormed their way into the life of Derrick, because _ was good and manipulating people.\n(1) Kyle (2) Derrick",
8 | "answer":"Answer: (1) Kyle."
9 | },
10 | {
11 | "question":"Donald was very grounded but Michael often got lost in their daydreams. _ was very capricious all the time.\n(1) Donald (2) Michael",
12 | "answer":"Answer: (2) Michael."
13 | },
14 | {
15 | "question":"After stopping when running, Betty was able to help Rachel treat their asthma because _ has it too.\n(1) Betty (2) Rachel",
16 | "answer":"Answer: (1) Betty."
17 | },
18 | {
19 | "question":"Jeffrey was sick with the stomach flu and not hungry, but Christopher was starving. _ ordered food.\n(1) Jeffrey (2) Christopher",
20 | "answer":"Answer: (2) Christopher."
21 | }
22 | ]
23 |
24 |
25 |
--------------------------------------------------------------------------------
/prompts/direct_answer/piqa.json:
--------------------------------------------------------------------------------
1 | [
2 | {
3 | "question":"How do you flood a room?\n(1) fill it with objects. (2) fill it with water",
4 | "answer":"Answer: (2) fill it with water"
5 | },
6 | {
7 | "question":"How can I get oil stains out of my driveway?\n(1) Douse each stain with a couple cans of beer. (2) Douse each stain with a couple cans of soda.",
8 | "answer":"Answer: (2) Douse each stain with a couple cans of soda."
9 | },
10 | {
11 | "question":"Soothe a painful sunburn.\n(1) Wait until brewed tea bag is cool, then apply on burn. (2) Wait until brewed tea bag is hot, then apply on burn.",
12 | "answer":"Answer: (1) Wait until brewed tea bag is cool, then apply on burn."
13 | },
14 | {
15 | "question":"What can I use for fuel in an alcohol stove?\n(1) Use acetone. (2) Use vinegar.",
16 | "answer":"Answer: (1) Use acetone."
17 | },
18 | {
19 | "question":"How can I cut the handles of metal cutlery?\n(1) Use a hand saw to cut the handles. (2) Use a hand drill to cut the handles.",
20 | "answer":"Answer: (1) Use a hand saw to cut the handles."
21 | }
22 | ]
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Focus on Your Question! Interpreting and Mitigating Toxic CoT Problems in Commonsense Reasoning
2 |
3 | [[Paper]](https://arxiv.org/abs/2402.18344)
4 |
5 |
6 |
7 | ### 0. Instructions
8 |
9 | This repository hosts the codes of our work: ***"Focus on Your Question! Interpreting and Mitigating Toxic CoT Problems in Commonsense Reasoning"***, which is accepted in **ACL 2024 main** conference.
10 |
11 |
12 |
13 | ### 1. Installation
14 |
15 | ```python
16 | git clone https://github.com/BugMakerzzz/toxic_cot.git
17 | cd toxic_cot
18 | pip install -r requirements.txt
19 | ```
20 |
21 |
22 |
23 | ### 2. Run Attribution Tracing Experiment
24 |
25 | ```python
26 | python llm_cot_probe.py
27 | ```
28 |
29 |
30 |
31 | ### 3. Run Intervention Tracing Experiment
32 |
33 | ```python
34 | python llm_intervention.py
35 | ```
36 |
37 |
38 |
39 | ### 4. Run Residual Decoding Method
40 |
41 | ```python
42 | python res_reason.py
43 | ```
44 |
45 |
46 |
47 | ### 5. Run Serial-Position Swap Method
48 |
49 | ```python
50 | python rt_reason.py
51 | ```
52 |
53 |
54 |
55 | ### 6. Run Baselines
56 |
57 | ```python
58 | python llm_reason.py
59 | ```
60 |
61 |
--------------------------------------------------------------------------------
/prompts/direct_answer/csqa.json:
--------------------------------------------------------------------------------
1 | [
2 | {
3 | "question":"Google Maps and other highway and street GPS services have replaced what? \n (1) atlas (2) mexico (3) countryside (4) united states (5) oceans ",
4 | "answer":"Answer: (1) atlas. "
5 | },
6 | {
7 | "question":"The fox walked from the city into the forest, what was it looking for? \n (1) pretty flowers. (2) hen house (3) natural habitat (4) storybook (5) dense forest ",
8 | "answer":"Answer: (3) natural habitat. "
9 | },
10 | {
11 | "question":"You can share files with someone if you have a connection to a what? \n (1) freeway (2) radio (3) wires (4) computer network (5) electrical circuit ",
12 | "answer":"Answer: (4) computer network. "
13 | },
14 | {
15 | "question":"Too many people want exotic snakes. The demand is driving what to carry them? \n (1) ditch (2) shop (3) north america (4) outdoors (5) pet shops ",
16 | "answer":"Answer: (5) pet shops. "
17 | },
18 | {
19 | "question":"The body guard was good at his duties, he made the person who hired him what? \n (1) better job (2) feel safe (3) irritated (4) save money (5) headache ",
20 | "answer":"Answer: (2) feel safe. "
21 | }
22 | ]
23 |
--------------------------------------------------------------------------------
/prompts/generate_knowledge/siqa.json:
--------------------------------------------------------------------------------
1 | Input: What will Quinn want to do next? \n (A) Eat messy snacks (B) help out a friend (C) Pick up the dirty clothes \n Quinn wanted to help me clean my room up because it was so messy.
2 | Knowledge: A messy room likely contains dirty clothes.
3 |
4 | Input: What will Aubrey want to do next? \n (A) help Aubrey go back home (B) keep on partying without the mom (C) going on with the mom \n Sasha's mom passed out in the middle of the party. Aubrey took Sasha's mom to the hospital.
5 | Knowledge: One should attend to their sick family member.
6 |
7 | Input: How would Jan feel afterwards? \n (A) scared of losing the cat (B) normal (C) relieved for fixing the problem \n Their cat kept trying to escape out of the window, so Jan placed an obstacle in the way.
8 | Knowledge: One usually has positive emotions after solving a problem.
9 |
10 | Input: How would Sydney feel afterwards? \n (A) affected (B) like they released their tension (C) worse \n Sydney had so much pent up emotion, they burst into tears at work.
11 | Knowledge: Crying can be a catharsis.
12 |
13 | Input: What does Sydney need to do before this? \n (A) be bad at her job (B) do a good job (C) be lazy \n Sydney got a raise and a new promotion.
14 | Knowledge: Pay raise and promotion are usually results of good job performance.
--------------------------------------------------------------------------------
/prompts/generate_knowledge/wino.json:
--------------------------------------------------------------------------------
1 | [
2 | {
3 | "question":"The GPS and map helped me navigate home. I got lost when the _ got turned off. \n (A) GPS (B) map",
4 | "knowledge":"A GPS device is electronic, while a map is paper-based."
5 | },
6 | {
7 | "question":"I picked up a bag of peanuts and raisins for a snack. I wanted a sweeter snack out so I ate the _ for now. \n (A) raisins (B) peanuts",
8 | "knowledge":"Peanuts contain a lot of fat. Raisins contain a lot of sugar."
9 | },
10 | {
11 | "question":"The geese prefer to nest in the fields rather than the forests because in the _ predators are more hidden. \n (A) fields (B) forests",
12 | "knowledge":"There are more trees in the forests than in the fields."
13 | },
14 | {
15 | "question":"Once in Poland, Dennis enjoyed the trip more than Jason because _ had a shallow understanding of the Polish language. \n (A) Dennis (B) Jason",
16 | "knowledge":"Those who know the native language would enjoy the trip better."
17 | },
18 | {
19 | "question":"Adam put handwash only clothes in the washer but Aaron washed them by hand as _ was lazy. \n (A) Adam (B) Aaron",
20 | "knowledge":"Washing clothes with washer takes less effort than by hand."
21 | }
22 | ]
23 |
24 |
--------------------------------------------------------------------------------
/prompts/direct_answer/siqa.json:
--------------------------------------------------------------------------------
1 | [
2 | {
3 | "question":"Quinn wanted to help me clean my room up because it was so messy. What will Quinn want to do next? \n (1) Eat messy snacks (2) help out a friend (3) Pick up the dirty clothes ",
4 | "answer":"Answer: (3) Pick up the dirty clothes"
5 | },
6 | {
7 | "question":"Sasha's mom passed out in the middle of the party. Aubrey took Sasha's mom to the hospital. What will Aubrey want to do next? \n (1) help Aubrey go back home (2) keep on partying without the mom (3) going on with the mom ",
8 | "answer":"Answer: (3) going on with the mom"
9 | },
10 | {
11 | "question":"Their cat kept trying to escape out of the window, so Jan placed an obstacle in the way. How would Jan feel afterwards? \n (1) scared of losing the cat (2) normal (3) relieved for fixing the problem ",
12 | "answer":"Answer: (3) relieved for fixing the problem"
13 | },
14 | {
15 | "question":"Sydney had so much pent up emotion, they burst into tears at work. How would Sydney feel afterwards? \n (1) affected (2) like they released their tension (3) worse ",
16 | "answer":"Answer: (2) like they released their tension"
17 | },
18 | {
19 | "question":"Sydney got a raise and a new promotion. What does Sydney need to do before this? \n (1) be bad at her job (2) do a good job (3) be lazy ",
20 | "answer":"Answer: (2) do a good job"
21 | }
22 | ]
23 |
24 |
25 |
--------------------------------------------------------------------------------
/prompts/direct_answer/gsm8k.json:
--------------------------------------------------------------------------------
1 | [
2 | {
3 | "question":"Sam and Jeff had a skipping competition at recess. The competition was split into four rounds. Sam completed 1 more skip than Jeff in the first round. Jeff skipped 3 fewer times than Sam in the second round. Jeff skipped 4 more times than Sam in the third round. Jeff got tired and only completed half the number of skips as Sam in the last round. If Sam skipped 16 times in each round, what is the average number of skips per round completed by Jeff?",
4 | "answer":"Answer: 14"
5 | },
6 | {
7 | "question":"Stella and Twinkle are filling up a truck with a capacity of 6000 stone blocks at the rate of 250 blocks per hour per person. They work for four hours and are then joined by 6 other people who also work at the same rate. How many hours did filling the truck take?",
8 | "answer":"Answer: 6"
9 | },
10 | {
11 | "question":"John has five more roommates than twice as many as Bob. If Bob has 10 roommates, how many roommates does John have?",
12 | "answer":"Answer: 25"
13 | },
14 | {
15 | "question":"Kyle bought 2 glass bottles that can hold 15 origami stars each. He then bought another 3 identical glass bottles. How many stars must Kyle make to fill all the glass bottles he bought?",
16 | "answer":"Answer: 75"
17 | },
18 | {
19 | "question":"Ann is baking cookies. She bakes three dozen oatmeal raisin cookies, two dozen sugar cookies, and four dozen chocolate chip cookies. Ann gives away two dozen oatmeal raisin cookies, 1.5 dozen sugar cookies, and 2.5 dozen chocolate chip cookies. How many total cookies does she keep?",
20 | "answer":"Answer: 36"
21 | }
22 | ]
23 |
24 |
25 |
--------------------------------------------------------------------------------
/prompts/l2m_mid_answer/piqa.json:
--------------------------------------------------------------------------------
1 | [
2 | {
3 | "question":"How do you flood a room?\n(1) fill it with objects. (2) fill it with water\nQuestion 1: What does flood a room mean?",
4 | "answer":"Answer: Flood means to become covered with water."
5 | },
6 | {
7 | "question":"How can I get oil stains out of my driveway?\n(1) Douse each stain with a couple cans of beer. (2) Douse each stain with a couple cans of soda.\nQuestion 1: How to get old stains out? Answer: Oil stains can be removed by dissolving the oil in a solution.\nQuestion 2: What can oil be soluble in?",
8 | "answer":"Answer: Oil can be soluble in Sodium carbonate solution. The soda is a kind of sodium carbonate solution. So oil can be soluble in soda."
9 | },
10 | {
11 | "question":"Soothe a painful sunburn.\n(1) Wait until brewed tea bag is cool, then apply on burn. (2) Wait until brewed tea bag is hot, then apply on burn.\nQuestion 1: How to soothe a painfule sunborn?",
12 | "answer":"Answer: Sunburn can be alleviated by applying tea bag on burn."
13 | },
14 | {
15 | "question":"What can I use for fuel in an alcohol stove?\n(1) Use acetone. (2) Use vinegar.\nQuestion 1: What properties must the fuel have? Answer: Materials used as fuel must be flammable.\nQuestion 2: What materials are flammable?",
16 | "answer":"Answer: Acetone is flammable, while vinegar is not."
17 | },
18 | {
19 | "question":"How can I cut the handles of metal cutlery?\n(1) Use a hand saw to cut the handles. (2) Use a hand drill to cut the handles.\nQuestion 1: (1) What direction of force is needed for cutting?",
20 | "answer":"Answer: Cutting requires horizontal force."
21 | }
22 | ]
23 |
24 |
--------------------------------------------------------------------------------
/prompts/l2m_question/piqa.json:
--------------------------------------------------------------------------------
1 | [
2 | {
3 | "question":"How do you flood a room?\n(1) fill it with objects. (2) fill it with water",
4 | "answer":"To solve the question, we need to solve these subquestions:\nQuestion 1: What does flood a room mean?\nQuestion 2: How do a person flood a room?"
5 | },
6 | {
7 | "question":"How can I get oil stains out of my driveway?\n(1) Douse each stain with a couple cans of beer. (2) Douse each stain with a couple cans of soda.",
8 | "answer":"To solve the question, we need to solve these subquestions:\nQuestion 1: How to get old stains out?\nQuestion 2: What can oil be soluble in?"
9 | },
10 | {
11 | "question":"Soothe a painful sunburn.\n(1) Wait until brewed tea bag is cool, then apply on burn. (2) Wait until brewed tea bag is hot, then apply on burn.",
12 | "answer":"To solve the question, we need to solve these subquestions:\nQuestion 1: How to soothe a painfule sunborn?\nQuestion 2: Whether hot or cold material can soothe a sunborn?"
13 | },
14 | {
15 | "question":"What can I use for fuel in an alcohol stove?\n(1) Use acetone. (2) Use vinegar.",
16 | "answer":"To solve the question, we need to solve these subquestions:\nQuestion 1: What properties must the fuel have?\nQuestion 2: What materials are flammable?"
17 | },
18 | {
19 | "question":"How can I cut the handles of metal cutlery?\n(1) Use a hand saw to cut the handles. (2) Use a hand drill to cut the handles.",
20 | "answer":"To solve the question, we need to solve these subquestions:\nQuestion 1: (1) What direction of force is needed for cutting?\nQuestion 2: What tools can provide people with force in that direction?"
21 | }
22 | ]
23 |
24 |
--------------------------------------------------------------------------------
/prompts/cot_answer/piqa.json:
--------------------------------------------------------------------------------
1 | [
2 | {
3 | "question":"How do you flood a room?\n(1) fill it with objects. (2) fill it with water",
4 | "answer":"Too much water can cause flooding. Thus, if we want to flood a room, we should use water. So the answer is: (2) fill it with water"
5 | },
6 | {
7 | "question":"How can I get oil stains out of my driveway?\n(1) Douse each stain with a couple cans of beer. (2) Douse each stain with a couple cans of soda.",
8 | "answer":"Sodium carbonate solution can wash away oil stains. The soda is a kind of sodium carbonate solution. Thus, you can use cans of soda to get oil stains out of your driveway. So the answer is: (2) Douse each stain with a couple cans of soda."
9 | },
10 | {
11 | "question":"Soothe a painful sunburn.\n(1) Wait until brewed tea bag is cool, then apply on burn. (2) Wait until brewed tea bag is hot, then apply on burn.",
12 | "answer":"Sunburn can be alleviated by applying cold material. Thus, you should apply cool tea rather than hot tea bag to soothe your sunburn. So the answer is: (1) Wait until brewed tea bag is cool, then apply on burn."
13 | },
14 | {
15 | "question":"What can I use for fuel in an alcohol stove?\n(1) Use acetone. (2) Use vinegar.",
16 | "answer":"Acetone is flammable, while vinegar is not. If you want to use something for fuel, the thing you use should be flammable. Thus, you should use acetone for fuel in an alcohol stove. So the answer is: (1) Use acetone."
17 | },
18 | {
19 | "question":"How can I cut the handles of metal cutlery?\n(1) Use a hand saw to cut the handles. (2) Use a hand drill to cut the handles.",
20 | "answer":"A hand saw is used for making cuts and a hand drill is used for making holes. If you want to cut something, you should use a hand saw rather than hand drill. So the answer is: (1) Use a hand saw to cut the handles."
21 | }
22 | ]
23 |
24 |
--------------------------------------------------------------------------------
/prompts/l2m_question/wino.json:
--------------------------------------------------------------------------------
1 | [
2 | {
3 | "question":"The test was hard for Samuel but a breeze for Randy , since _ had failed to study for it.\n(1) Samuel (2) Randy",
4 | "answer":"To solve the question, we need to solve these subquestions:\nQuestion 1: Why the test is hard for Samul?\nQuestion 2:Why the test is a breeze for Randy?\nQuestion 3:Who had fail to study for the test?"
5 | },
6 | {
7 | "question":"Kyle slowly wormed their way into the life of Derrick, because _ was good and manipulating people.\n(1) Kyle (2) Derrick",
8 | "answer":"To solve the question, we need to solve these subquestions:\nQuestion 1: Why Kyle slowly wormed their way into the life of Derrick?\nQuestion 2:Who was good and manipulating people?"
9 | },
10 | {
11 | "question":"Donald was very grounded but Michael often got lost in their daydreams. _ was very capricious all the time.\n(1) Donald (2) Michael",
12 | "answer":"To solve the question, we need to solve these subquestions:\nQuestion 1: What will Donald do if he was very grounded?\nQuestion 2: What will Michael do if he often got lost in their daydreams?\nQuestion 3: Who was very capricious all the time?"
13 | },
14 | {
15 | "question":"After stopping when running, Betty was able to help Rachel treat their asthma because _ has it too.\n(1) Betty (2) Rachel",
16 | "answer":"To solve the question, we need to solve these subquestions:\nQuestion 1: Why does Betty was able to help Rachel treat their asthma?\nQuestion 2: Who has asthma too?"
17 | },
18 | {
19 | "question":"Jeffrey was sick with the stomach flu and not hungry, but Christopher was starving. _ ordered food.\n(1) Jeffrey (2) Christopher",
20 | "answer":"To solve the question, we need to solve these subquestions:\nQuestion 1: What will Jeffrey do if he was sick with the stomach flu and not hungry?\nQuestion 2: What will Christopher do if he was starving?\nQuestion 3: Who may ordered food?"
21 | }
22 | ]
--------------------------------------------------------------------------------
/prompts/generate_cot/strategy.json:
--------------------------------------------------------------------------------
1 | [
2 | {
3 | "question":"Could Elizabeth I of England have seen Romeo and Juliet? (1) yes (2) no\nAnswer: (1) yes",
4 | "answer":"Reason: Queen Elizabeth I ruled of England and Ireland from 17 November 1558 until 24 March 1603. She died in 1603. Romeo and Juliet, play by William Shakespeare, written about 1594-96 and first published in an unauthorized quarto in 1597. Since 1597 is before 1603, the queen has seen Romeo and Juliet."
5 | },
6 | {
7 | "question":"Would Jane Goodall be considered an expert in oceanography? (1) yes (2) no\nAnswer: (2) no",
8 | "answer":"Reason: Jane Goodall is considered the world's foremost expert on chimpanzees. She has on work in oceanography. Thus, she would not be considered an expert in oceanography."
9 | },
10 | {
11 | "question":"Did Edward of Westminster ever spend time in Gloucestershire? (1) yes (2) no\nAnswer: (1) yes",
12 | "answer":"Reason: Edward of Westminster was killed aged seventeen at the Battle of Tewkesbury. Besides, Tewkesbury is located in Gloucestershire, England. Therefore, Edward of Westminster spent time in Gloucestershire."
13 | },
14 | {
15 | "question":"Does human with no gear die capturing giant squid in natural habitat? (1) yes (2) no\nAnswer: (1) yes",
16 | "answer":"Reason: The giant squid live deep underwater. If human goes to their habitat to capture, they need to go to the deep water. Humans need equipment to maintain life in the deep sea. Hence, human with no gear will die when capturing giant squid in natural habitat."
17 | },
18 | {
19 | "question":"Would a bartender make a cocktail for Noah Schnapp? (1) yes (2) no\nAnswer: (2) no",
20 | "answer":"Reason: Noah Cameron Schnapp is an American actorperson who was born in 2004. He was not of the minimum legal age to purchase alcoholic beverages. As the cocktail is kind of alcoholic beverages, it is illegal for a bartender make a cocktail for Noah Schnapp."
21 | }
22 | ]
23 |
24 |
--------------------------------------------------------------------------------
/prompts/cot_answer/strategy.json:
--------------------------------------------------------------------------------
1 | [
2 | {
3 | "question":"Could Elizabeth I of England have seen Romeo and Juliet? (1) yes (2) no",
4 | "answer":"Queen Elizabeth I ruled of England and Ireland from 17 November 1558 until 24 March 1603. She died in 1603. Romeo and Juliet, play by William Shakespeare, written about 1594-96 and first published in an unauthorized quarto in 1597. Since 1597 is before 1603, the queen has seen Romeo and Juliet. So the answer is: (1) yes."
5 | },
6 | {
7 | "question":"Would Jane Goodall be considered an expert in oceanography? (1) yes (2) no",
8 | "answer":"Jane Goodall is considered the world's foremost expert on chimpanzees. She has on work in oceanography. Thus, she would not be considered an expert in oceanography. So the answer is: (2) no."
9 | },
10 | {
11 | "question":"Did Edward of Westminster ever spend time in Gloucestershire? (1) yes (2) no",
12 | "answer":"Edward of Westminster was killed aged seventeen at the Battle of Tewkesbury. Besides, Tewkesbury is located in Gloucestershire, England. Therefore, Edward of Westminster spent time in Gloucestershire. So the answer is: (1) yes."
13 | },
14 | {
15 | "question":"Does human with no gear die capturing giant squid in natural habitat? (1) yes (2) no",
16 | "answer":"The giant squid live deep underwater. If human goes to their habitat to capture, they need to go to the deep water. Humans need equipment to maintain life in the deep sea. Hence, human with no gear will die when capturing giant squid in natural habitat. So the answer is: (1) yes."
17 | },
18 | {
19 | "question":"Would a bartender make a cocktail for Noah Schnapp? (1) yes (2) no",
20 | "answer":"Noah Cameron Schnapp is an American actorperson who was born in 2004. He was not of the minimum legal age to purchase alcoholic beverages. As the cocktail is kind of alcoholic beverages, it is illegal for a bartender make a cocktail for Noah Schnapp. So the answer is: (2) no."
21 | }
22 | ]
23 |
24 |
--------------------------------------------------------------------------------
/prompts/rt_answer/strategy.json:
--------------------------------------------------------------------------------
1 | [
2 | {
3 | "question":"Could Elizabeth I of England have seen Romeo and Juliet? (1) yes (2) no",
4 | "answer":"Queen Elizabeth I ruled of England and Ireland from 17 November 1558 until 24 March 1603. She died in 1603. Romeo and Juliet, play by William Shakespeare, written about 1594-96 and first published in an unauthorized quarto in 1597. Since 1597 is before 1603, the queen has seen Romeo and Juliet. So the answer is: (1) yes."
5 | },
6 | {
7 | "question":"Would Jane Goodall be considered an expert in oceanography? (1) yes (2) no",
8 | "answer":"Jane Goodall is considered the world's foremost expert on chimpanzees. She has on work in oceanography. Thus, she would not be considered an expert in oceanography. So the answer is: (2) no."
9 | },
10 | {
11 | "question":"Did Edward of Westminster ever spend time in Gloucestershire? (1) yes (2) no",
12 | "answer":"Edward of Westminster was killed aged seventeen at the Battle of Tewkesbury. Besides, Tewkesbury is located in Gloucestershire, England. Therefore, Edward of Westminster spent time in Gloucestershire. So the answer is: (1) yes."
13 | },
14 | {
15 | "question":"Does human with no gear die capturing giant squid in natural habitat? (1) yes (2) no",
16 | "answer":"The giant squid live deep underwater. If human goes to their habitat to capture, they need to go to the deep water. Humans need equipment to maintain life in the deep sea. Hence, human with no gear will die when capturing giant squid in natural habitat. So the answer is: (1) yes."
17 | },
18 | {
19 | "question":"Would a bartender make a cocktail for Noah Schnapp? (1) yes (2) no",
20 | "answer":"Noah Cameron Schnapp is an American actorperson who was born in 2004. He was not of the minimum legal age to purchase alcoholic beverages. As the cocktail is kind of alcoholic beverages, it is illegal for a bartender make a cocktail for Noah Schnapp. So the answer is: (2) no."
21 | }
22 | ]
23 |
24 |
--------------------------------------------------------------------------------
/prompts/cons_answer/strategy.json:
--------------------------------------------------------------------------------
1 | [
2 | {
3 | "question":"Could Elizabeth I of England have seen Romeo and Juliet? (1) yes (2) no",
4 | "answer":"Queen Elizabeth I ruled of England and Ireland from 17 November 1558 until 24 March 1603. She died in 1603. Romeo and Juliet, play by William Shakespeare, written about 1594-96 and first published in an unauthorized quarto in 1597. Since 1597 is before 1603, the queen has seen Romeo and Juliet. So the answer is: (1) yes."
5 | },
6 | {
7 | "question":"Would Jane Goodall be considered an expert in oceanography? (1) yes (2) no",
8 | "answer":"Jane Goodall is considered the world's foremost expert on chimpanzees. She has on work in oceanography. Thus, she would not be considered an expert in oceanography. So the answer is: (2) no."
9 | },
10 | {
11 | "question":"Did Edward of Westminster ever spend time in Gloucestershire? (1) yes (2) no",
12 | "answer":"Edward of Westminster was killed aged seventeen at the Battle of Tewkesbury. Besides, Tewkesbury is located in Gloucestershire, England. Therefore, Edward of Westminster spent time in Gloucestershire. So the answer is: (1) yes."
13 | },
14 | {
15 | "question":"Does human with no gear die capturing giant squid in natural habitat? (1) yes (2) no",
16 | "answer":"The giant squid live deep underwater. If human goes to their habitat to capture, they need to go to the deep water. Humans need equipment to maintain life in the deep sea. Hence, human with no gear will die when capturing giant squid in natural habitat. So the answer is: (1) yes."
17 | },
18 | {
19 | "question":"Would a bartender make a cocktail for Noah Schnapp? (1) yes (2) no",
20 | "answer":"Noah Cameron Schnapp is an American actorperson who was born in 2004. He was not of the minimum legal age to purchase alcoholic beverages. As the cocktail is kind of alcoholic beverages, it is illegal for a bartender make a cocktail for Noah Schnapp. So the answer is: (2) no."
21 | }
22 | ]
23 |
24 |
--------------------------------------------------------------------------------
/prompts/l2m_question/strategy.json:
--------------------------------------------------------------------------------
1 | [
2 | {
3 | "question":"Could Elizabeth I of England have seen Romeo and Juliet? (1) yes (2) no",
4 | "answer":"Queen Elizabeth I ruled of England and Ireland from 17 November 1558 until 24 March 1603. She died in 1603. Romeo and Juliet, play by William Shakespeare, written about 1594-96 and first published in an unauthorized quarto in 1597. Since 1597 is before 1603, the queen has seen Romeo and Juliet. So the answer is: (1) yes."
5 | },
6 | {
7 | "question":"Would Jane Goodall be considered an expert in oceanography? (1) yes (2) no",
8 | "answer":"Jane Goodall is considered the world's foremost expert on chimpanzees. She has on work in oceanography. Thus, she would not be considered an expert in oceanography. So the answer is: (2) no."
9 | },
10 | {
11 | "question":"Did Edward of Westminster ever spend time in Gloucestershire? (1) yes (2) no",
12 | "answer":"Edward of Westminster was killed aged seventeen at the Battle of Tewkesbury. Besides, Tewkesbury is located in Gloucestershire, England. Therefore, Edward of Westminster spent time in Gloucestershire. So the answer is: (1) yes."
13 | },
14 | {
15 | "question":"Does human with no gear die capturing giant squid in natural habitat? (1) yes (2) no",
16 | "answer":"The giant squid live deep underwater. If human goes to their habitat to capture, they need to go to the deep water. Humans need equipment to maintain life in the deep sea. Hence, human with no gear will die when capturing giant squid in natural habitat. So the answer is: (1) yes."
17 | },
18 | {
19 | "question":"Would a bartender make a cocktail for Noah Schnapp? (1) yes (2) no",
20 | "answer":"Noah Cameron Schnapp is an American actorperson who was born in 2004. He was not of the minimum legal age to purchase alcoholic beverages. As the cocktail is kind of alcoholic beverages, it is illegal for a bartender make a cocktail for Noah Schnapp. So the answer is: (2) no."
21 | }
22 | ]
23 |
24 |
--------------------------------------------------------------------------------
/prompts/l2m_question/csqa.json:
--------------------------------------------------------------------------------
1 | [
2 | {
3 | "question":"Google Maps and other highway and street GPS services have replaced what? \n (1) atlas (2) mexico (3) countryside (4) united states (5) oceans ",
4 | "answer":"To solve the question, we need to solve these subquestions:\nQuestion 1: What is Google Maps and other highway and street GPS services used for?\nQuestion 2:What thing has the same use as street GPS services and turns to be replaced?"
5 | },
6 | {
7 | "question":"The fox walked from the city into the forest, what was it looking for? \n (1) pretty flowers. (2) hen house (3) natural habitat (4) storybook (5) dense forest ",
8 | "answer":"To solve the question, we need to solve these subquestions:\nQuestion 1: What the reason that fox walked from the city into the forest?\nQuestion 2: What is located in forest and likely attracts fox?"
9 | },
10 | {
11 | "question":"You can share files with someone if you have a connection to a what? \n (1) freeway (2) radio (3) wires (4) computer network (5) electrical circuit ",
12 | "answer":"To solve the question, we need to solve these subquestions:\nQuestion 1: What place can we share files with someone?\nQuestion 2: To share files in this place, we need have a connection to what?"
13 | },
14 | {
15 | "question":"Too many people want exotic snakes. The demand is driving what to carry them? \n (1) ditch (2) shop (3) north america (4) outdoors (5) pet shops ",
16 | "answer":"To solve the question, we need to solve these subquestions:\nQuestion 1: Where can people get exotic snakes?\nQuestion 2: What can carry exotic snakes and meets the demand?"
17 |
18 | },
19 | {
20 | "question":"The body guard was good at his duties, he made the person who hired him what? \n (1) better job (2) feel safe (3) irritated (4) save money (5) headache ",
21 | "answer":"To solve the question, we need to solve these subquestions:\nQuestion 1: What the duties of the body guard?\nQuestion 2: If the duty is excuted well, the person who hired him feel what?"
22 | }
23 | ]
24 |
25 |
--------------------------------------------------------------------------------
/prompts/generate_cot/csqa.json:
--------------------------------------------------------------------------------
1 | [
2 | {
3 | "question":"Google Maps and other highway and street GPS services have replaced what?\n(1) atlas (2) mexico (3) countryside (4) united states (5) oceans\nAnswer: (1) atlas.",
4 | "answer":"Reason: Electronic maps and GPS services are the modern version of paper atlas. In that case, the atlas have been replaced by Google Maps and other highway and street GPS services."
5 | },
6 | {
7 | "question":"The fox walked from the city into the forest, what was it looking for?\n(1) pretty flowers. (2) hen house (3) natural habitat (4) storybook (5) dense forest\nAnswer: (3) natural habitat.",
8 | "answer":"Reason: Since the fox walk from the city into the forest, he may looks for something in the forest but not in the city. From all of the options, the natural habitat are usually away from cities."
9 | },
10 | {
11 | "question":"You can share files with someone if you have a connection to a what?\n(1) freeway (2) radio (3) wires (4) computer network (5) electrical circuit\nAnswer: (4) computer network.",
12 | "answer":"Reason: Files usually can be stored in the computers. In that case, we can share them over the Internet. Thus, if we connect to a computer network, we can share the file with others."
13 | },
14 | {
15 | "question":"Too many people want exotic snakes. The demand is driving what to carry them? \n (1) ditch (2) shop (3) north america (4) outdoors (5) pet shops\nAnswer: (5) pet shops.",
16 | "answer":"Reason: If people want exotic snakes, they may like to raise snakes as pets. If there is a demand for snakes as pets, pet shops will be pushed to carry them, in order to make more money."
17 | },
18 | {
19 | "question":"The body guard was good at his duties, he made the person who hired him what? \n (1) better job (2) feel safe (3) irritated (4) save money (5) headache\nAnswer: (2) feel safe.",
20 | "answer":"Reason: The job of body guards is to ensure the safety and security of the employer. People ususally hire the body guard to make themselves safe."
21 | }
22 | ]
23 |
24 |
--------------------------------------------------------------------------------
/prompts/cot_answer/csqa.json:
--------------------------------------------------------------------------------
1 | [
2 | {
3 | "question":"Google Maps and other highway and street GPS services have replaced what? \n (1) atlas (2) mexico (3) countryside (4) united states (5) oceans ",
4 | "answer":"Electronic maps and GPS services are the modern version of paper atlas. In that case, the atlas have been replaced by Google Maps and other highway and street GPS services. So the answer is: (1) atlas. "
5 | },
6 | {
7 | "question":"The fox walked from the city into the forest, what was it looking for? \n (1) pretty flowers. (2) hen house (3) natural habitat (4) storybook (5) dense forest ",
8 | "answer":"Since the fox walk from the city into the forest, he may looks for something in the forest but not in the city. From all of the options, the natural habitat are usually away from cities. So the answer is: (3) natural habitat. "
9 | },
10 | {
11 | "question":"You can share files with someone if you have a connection to a what? \n (1) freeway (2) radio (3) wires (4) computer network (5) electrical circuit ",
12 | "answer":"Files usually can be stored in the computers. In that case, we can share them over the Internet. Thus, if we connect to a computer network, we can share the file with others. So the answer is: (4) computer network. "
13 | },
14 | {
15 | "question":"Too many people want exotic snakes. The demand is driving what to carry them? \n (1) ditch (2) shop (3) north america (4) outdoors (5) pet shops ",
16 | "answer":"If people want exotic snakes, they may like to raise snakes as pets. If there is a demand for snakes as pets, pet shops will be pushed to carry them, in order to make more money. So the answer is: (5) pet shops. "
17 | },
18 | {
19 | "question":"The body guard was good at his duties, he made the person who hired him what? \n (1) better job (2) feel safe (3) irritated (4) save money (5) headache ",
20 | "answer":"The job of body guards is to ensure the safety and security of the employer. People ususally hire the body guard to make themselves safe. So the answer is: (2) feel safe. "
21 | }
22 | ]
23 |
24 |
--------------------------------------------------------------------------------
/prompts/l2m_question/siqa.json:
--------------------------------------------------------------------------------
1 | [
2 | {
3 | "question":"Quinn wanted to help me clean my room up because it was so messy. What will Quinn want to do next? \n (1) Eat messy snacks (2) help out a friend (3) Pick up the dirty clothes ",
4 | "answer":"To solve the question, we need to solve these subquestions:\nQuestion 1: What should a person do if she want to help clean the room?\nQuestion 2: What will Quinn want to do next?"
5 | },
6 | {
7 | "question":"Sasha's mom passed out in the middle of the party. Aubrey took Sasha's mom to the hospital. What will Aubrey want to do next? \n (1) help Aubrey go back home (2) keep on partying without the mom (3) going on with the mom ",
8 | "answer":"To solve the question, we need to solve these subquestions:\nQuestion 1: What does Sasha's mom need if she pass out and go to the hospital?\nQuestion 2: What will Aubrey want to do next?"
9 | },
10 | {
11 | "question":"Their cat kept trying to escape out of the window, so Jan placed an obstacle in the way. How would Jan feel afterwards? \n (1) scared of losing the cat (2) normal (3) relieved for fixing the problem ",
12 | "answer":"To solve the question, we need to solve these subquestions:\nQuestion 1: What will happen if Jan placed an obstacle in the way?\nQuestion 2: How would Jan feel afterwards?"
13 | },
14 | {
15 | "question":"Sydney had so much pent up emotion, they burst into tears at work. How would Sydney feel afterwards? \n (1) affected (2) like they released their tension (3) worse ",
16 | "answer":"To solve the question, we need to solve these subquestions:\nQuestion 1: What will happen if someone burst into tears at work?\nQuestion 2: How would Sydney feel afterwards?"
17 | },
18 | {
19 | "question":"Sydney got a raise and a new promotion. What does Sydney need to do before this? \n (1) be bad at her job (2) do a good job (3) be lazy ",
20 | "answer":"To solve the question, we need to solve these subquestions:\nQuestion 1: Why does Sydney got a raise and a new promotion?\nQuestion 2: What does Sydney need to do before this?"
21 | }
22 | ]
23 |
24 |
25 |
--------------------------------------------------------------------------------
/prompts/sr_feedback/piqa.json:
--------------------------------------------------------------------------------
1 | [
2 | {
3 | "question":"How do you flood a room?\n(1) fill it with objects. (2) fill it with water\nRationale: Too much water can cause flooding. Thus, if we want to flood a room, we should use water. So the answer is: (2) fill it with water",
4 | "answer":"Feedback: The rational is good and do not need to change."
5 | },
6 | {
7 | "question":"How can I get oil stains out of my driveway?\n(1) Douse each stain with a couple cans of beer. (2) Douse each stain with a couple cans of soda.\nRationale: You can use beer to get oil stains out. So the answer is: (1) Douse each stain with a couple cans of beer.",
8 | "answer":"Feedback: The rational is wrong. It is Sodium carbonate solution which can wash away oil stains. But beer can not contain it, so beer can not get oil stains out."
9 | },
10 | {
11 | "question":"Soothe a painful sunburn.\n(1) Wait until brewed tea bag is cool, then apply on burn. (2) Wait until brewed tea bag is hot, then apply on burn.\nRationale: If a person want to soothe a painful sunburn, he can use a tea bag. So the answer is: (2) Wait until brewed tea bag is hot, then apply on burn.",
12 | "answer":"Feedback: The rational is wrong. Sunburn can be alleviated only by applying cold material but not hot material. Thus, you should not apply hot tea bag."
13 | },
14 | {
15 | "question":"What can I use for fuel in an alcohol stove?\n(1) Use acetone. (2) Use vinegar.\nRationale: Vinegar can be used for fule. So the answer is: (2) Use vinegar.",
16 | "answer":"Feedback: The rational is wrong. Vinegar is not flammable, so it can not be used for fuel."
17 | },
18 | {
19 | "question":"How can I cut the handles of metal cutlery?\n(1) Use a hand saw to cut the handles. (2) Use a hand drill to cut the handles.\nRationale: A hand saw is used for making cuts and a hand drill is used for making holes. If you want to cut something, you should use a hand saw rather than hand drill. So the answer is: (1) Use a hand saw to cut the handles.",
20 | "answer":"Feedback: The rational is good and do not need to change."
21 | }
22 | ]
23 |
24 |
--------------------------------------------------------------------------------
/prompts/l2m_mid_answer/csqa.json:
--------------------------------------------------------------------------------
1 | [
2 | {
3 | "question":"Google Maps and other highway and street GPS services have replaced what? \n (1) atlas (2) mexico (3) countryside (4) united states (5) oceans \nQuestion 1: What is Google Maps and other highway and street GPS services used for?",
4 | "answer":"Answer: They are used for find someplace and navigation."
5 | },
6 | {
7 | "question":"The fox walked from the city into the forest, what was it looking for? \n (1) pretty flowers. (2) hen house (3) natural habitat (4) storybook (5) dense forest \nQuestion 1: What the reason that fox walked from the city into the forest? Answer: Since forest is more natrual than the city, it may want to find some place to rest.\nQuestion 2: What is located in forest and likely attracts fox?",
8 | "answer":"Answer: Since the forest has natural habitat for the fox to rest, it may look for the natural habitat in the forest."
9 | },
10 | {
11 | "question":"You can share files with someone if you have a connection to a what? \n (1) freeway (2) radio (3) wires (4) computer network (5) electrical circuit \nQuestion 1: What place can we share files with someone?",
12 | "answer":"Answer: We can share files with someone on computers or the Internet."
13 | },
14 | {
15 | "question":"Too many people want exotic snakes. The demand is driving what to carry them? \n (1) ditch (2) shop (3) north america (4) outdoors (5) pet shops \nQuestion 1: Where can people get exotic snakes? Answer: Since snakers can sometimes be a pet, people can get it in a pet shop.\nQuestion 2: What can carry exotic snakes and meets the demand? ",
16 | "answer":"Answer: If many people want snakes and the pet shop can sell it, pet shop may carry them driven by the demand."
17 |
18 | },
19 | {
20 | "question":"The body guard was good at his duties, he made the person who hired him what? \n (1) better job (2) feel safe (3) irritated (4) save money (5) headache \nQuestion 1: What the duties of the body guard?",
21 | "answer":"Answer: The duty of the body guard is to make sure that the person who hires him is safe."
22 | }
23 | ]
24 |
25 |
--------------------------------------------------------------------------------
/prompts/rt_answer/csqa.json:
--------------------------------------------------------------------------------
1 | [
2 | {
3 | "question":"Rationale: Electronic maps and GPS services are the modern version of paper atlas. In that case, the atlas have been replaced by Google Maps and other highway and street GPS services. \nQuestion: Google Maps and other highway and street GPS services have replaced what? (1) atlas (2) mexico (3) countryside (4) united states (5) oceans ",
4 | "answer":"Answer: (1) atlas. "
5 | },
6 | {
7 | "question":"Rationale: Since the fox walk from the city into the forest, he may looks for something in the forest but not in the city. From all of the options, the natural habitat are usually away from cities. \nQuestion: The fox walked from the city into the forest, what was it looking for? (1) pretty flowers. (2) hen house (3) natural habitat (4) storybook (5) dense forest ",
8 | "answer":"Answer: (3) natural habitat. "
9 | },
10 | {
11 | "question":"Rationale: Files usually can be stored in the computers. In that case, we can share them over the Internet. Thus, if we connect to a computer network, we can share the file with others. \nQuestion: You can share files with someone if you have a connection to a what? (1) freeway (2) radio (3) wires (4) computer network (5) electrical circuit",
12 | "answer":"Answer: (4) computer network. "
13 | },
14 | {
15 | "question":"Rationale: If people want exotic snakes, they may like to raise snakes as pets. If there is a demand for snakes as pets, pet shops will be pushed to carry them, in order to make more money. \nQuestion: Too many people want exotic snakes. The demand is driving what to carry them? (1) ditch (2) shop (3) north america (4) outdoors (5) pet shops ",
16 | "answer":"Answer: (5) pet shops."
17 | },
18 | {
19 | "question":"Rationale: The job of body guards is to ensure the safety and security of the employer. People ususally hire the body guard to make themselves safe. \nQuestion: The body guard was good at his duties, he made the person who hired him what? (1) better job (2) feel safe (3) irritated (4) save money (5) headache",
20 | "answer":"Answer: (2) feel safe. "
21 | }
22 | ]
23 |
24 |
--------------------------------------------------------------------------------
/prompts/l2m_mid_answer/siqa.json:
--------------------------------------------------------------------------------
1 | [
2 | {
3 | "question":"Quinn wanted to help me clean my room up because it was so messy. What will Quinn want to do next? \n (1) Eat messy snacks (2) help out a friend (3) Pick up the dirty clothes\nQuestion 1: What should a person do if she want to help clean the room?",
4 | "answer":"Answer: Picking up the dirty clothes is one way to clean the room."
5 | },
6 | {
7 | "question":"Sasha's mom passed out in the middle of the party. Aubrey took Sasha's mom to the hospital. What will Aubrey want to do next? \n (1) help Aubrey go back home (2) keep on partying without the mom (3) going on with the mom\nQuestion 1: What does Sasha's mom need if she pass out and go to the hospital? Answer: Sasha's mom is sent to the hospital beacuse she passes out, she becomes a patient. Thus, she needs to be cared for.\nQuestion 2: What will Aubrey want to do next?",
8 | "answer":"Answer: Aubrey may need to take care of Sasha's mom. Thus, Aubrey will go on with the mom."
9 | },
10 | {
11 | "question":"Their cat kept trying to escape out of the window, so Jan placed an obstacle in the way. How would Jan feel afterwards? \n (1) scared of losing the cat (2) normal (3) relieved for fixing the problem\nQuestion 1: What will happen if Jan placed an obstacle in the way?",
12 | "answer":"Answer: If Jan place an obstacle in the way, the cat can not escape. Hence, Jan can avoid losing the cat."
13 | },
14 | {
15 | "question":"Sydney had so much pent up emotion, they burst into tears at work. How would Sydney feel afterwards? \n (1) affected (2) like they released their tension (3) worse\nQuestion 1: What will happen if someone burst into tears at work? Answer: Crying is often a way to release tension. If someone burst into tears, she may feel released after that.\nQuestion 2: How would Sydney feel afterwards?",
16 | "answer":"Answer: Sydney burst into tears at work. Thus, she would release the tension."
17 | },
18 | {
19 | "question":"Sydney got a raise and a new promotion. What does Sydney need to do before this? \n (1) be bad at her job (2) do a good job (3) be lazy\nQuestion 1: Why does Sydney got a raise and a new promotion?",
20 | "answer":"Answer: Since she got a raise, she may did a good jon in work."
21 | }
22 | ]
--------------------------------------------------------------------------------
/prompts/generate_cot/siqa.json:
--------------------------------------------------------------------------------
1 | [
2 | {
3 | "question":"Quinn wanted to help me clean my room up because it was so messy. What will Quinn want to do next? \n (1) Eat messy snacks (2) help out a friend (3) Pick up the dirty clothes\nAnswer: (3) Pick up the dirty clothes ",
4 | "answer":"Reason: Quinn want to clean the room up. Picking up the dirty clothes is one way to clean the room. Thus, quinn will want to pick up the dirty clothes next."
5 | },
6 | {
7 | "question":"Sasha's mom passed out in the middle of the party. Aubrey took Sasha's mom to the hospital. What will Aubrey want to do next? \n (1) help Aubrey go back home (2) keep on partying without the mom (3) going on with the mom\nAnswer: (3) going on with the mom",
8 | "answer":"Reason: Sasha's mom is sent to the hospital beacuse she passes out. After that, she becomes a patient. A patient needs to be cared for. Aubrey may need to take care of her. Thus, Aubrey will go on with the mom."
9 | },
10 | {
11 | "question":"Their cat kept trying to escape out of the window, so Jan placed an obstacle in the way. How would Jan feel afterwards? \n (1) scared of losing the cat (2) normal (3) relieved for fixing the problem\nAnswer: (3) relieved for fixing the problem",
12 | "answer":"Reason: The cat try to escape so Jan need to stop it to avoid losing the cat. Jan place an obstacle in the way so the cat can not escape. The problem is been solved. Thus, Jan will fell relieved for fixing the problem."
13 | },
14 | {
15 | "question":"Sydney had so much pent up emotion, they burst into tears at work. How would Sydney feel afterwards? \n (1) affected (2) like they released their tension (3) worse\nAnswer: (2) like they released their tension",
16 | "answer":"Reason: Crying is often a way to release tension. Sydney burst into tears at work. Thus, she would release the tension."
17 | },
18 | {
19 | "question":"Sydney got a raise and a new promotion. What does Sydney need to do before this? \n (1) be bad at her job (2) do a good job (3) be lazy\nAnswer: (2) do a good job",
20 | "answer":"Reason: To get a raise and a new promotion, Sydney needs to work hard and do a good job. Sydney got a raise and a new promotion. Thus, she need to do a good job before this."
21 | }
22 | ]
23 |
24 |
25 |
--------------------------------------------------------------------------------
/prompts/cot_answer/siqa.json:
--------------------------------------------------------------------------------
1 | [
2 | {
3 | "question":"Quinn wanted to help me clean my room up because it was so messy. What will Quinn want to do next? \n (1) Eat messy snacks (2) help out a friend (3) Pick up the dirty clothes ",
4 | "answer":"Quinn want to clean the room up. Picking up the dirty clothes is one way to clean the room. Thus, quinn will want to pick up the dirty clothes next. So the answer is: (3) Pick up the dirty clothes"
5 | },
6 | {
7 | "question":"Sasha's mom passed out in the middle of the party. Aubrey took Sasha's mom to the hospital. What will Aubrey want to do next? \n (1) help Aubrey go back home (2) keep on partying without the mom (3) going on with the mom ",
8 | "answer":"Sasha's mom is sent to the hospital beacuse she passes out. After that, she becomes a patient. A patient needs to be cared for. Aubrey may need to take care of her. Thus, Aubrey will go on with the mom. So the answer is: (3) going on with the mom"
9 | },
10 | {
11 | "question":"Their cat kept trying to escape out of the window, so Jan placed an obstacle in the way. How would Jan feel afterwards? \n (1) scared of losing the cat (2) normal (3) relieved for fixing the problem ",
12 | "answer":"The cat try to escape so Jan need to stop it to avoid losing the cat. Jan place an obstacle in the way so the cat can not escape. The problem is been solved. Thus, Jan will fell relieved for fixing the problem. So the answer is: (3) relieved for fixing the problem"
13 | },
14 | {
15 | "question":"Sydney had so much pent up emotion, they burst into tears at work. How would Sydney feel afterwards? \n (1) affected (2) like they released their tension (3) worse ",
16 | "answer":"Crying is often a way to release tension. Sydney burst into tears at work. Thus, she would release the tension. So the answer is: (2) like they released their tension"
17 | },
18 | {
19 | "question":"Sydney got a raise and a new promotion. What does Sydney need to do before this? \n (1) be bad at her job (2) do a good job (3) be lazy ",
20 | "answer":"To get a raise and a new promotion, Sydney needs to work hard and do a good job. Sydney got a raise and a new promotion. Thus, she need to do a good job before this. So the answer is: (2) do a good job"
21 | }
22 | ]
23 |
24 |
25 |
--------------------------------------------------------------------------------
/prompts/rt_answer/siqa.json:
--------------------------------------------------------------------------------
1 | [
2 | {
3 | "question":"Rationale: Quinn want to clean the room up. Picking up the dirty clothes is one way to clean the room. Thus, quinn will want to pick up the dirty clothes next.\nQuestion: Quinn wanted to help me clean my room up because it was so messy. What will Quinn want to do next? \n (1) Eat messy snacks (2) help out a friend (3) Pick up the dirty clothes ",
4 | "answer":"Answer: (3) Pick up the dirty clothes"
5 | },
6 | {
7 | "question":"Rationale: Sasha's mom is sent to the hospital beacuse she passes out. After that, she becomes a patient. A patient needs to be cared for. Aubrey may need to take care of her. Thus, Aubrey will go on with the mom.\nQuestion: Sasha's mom passed out in the middle of the party. Aubrey took Sasha's mom to the hospital. What will Aubrey want to do next? \n (1) help Aubrey go back home (2) keep on partying without the mom (3) going on with the mom ",
8 | "answer":"Answer: (3) going on with the mom"
9 | },
10 | {
11 | "question":"Rationale: The cat try to escape so Jan need to stop it to avoid losing the cat. Jan place an obstacle in the way so the cat can not escape. The problem is been solved. Thus, Jan will fell relieved for fixing the problem.\nQuestion: Their cat kept trying to escape out of the window, so Jan placed an obstacle in the way. How would Jan feel afterwards? \n (1) scared of losing the cat (2) normal (3) relieved for fixing the problem ",
12 | "answer":"Answer: (3) relieved for fixing the problem"
13 | },
14 | {
15 | "question":"Rationale: Crying is often a way to release tension. Sydney burst into tears at work. Thus, she would release the tension.\nQuestion: Sydney had so much pent up emotion, they burst into tears at work. How would Sydney feel afterwards? \n (1) affected (2) like they released their tension (3) worse ",
16 | "answer":"Answer: (2) like they released their tension"
17 | },
18 | {
19 | "question":"Rationale: To get a raise and a new promotion, Sydney needs to work hard and do a good job. Sydney got a raise and a new promotion. Thus, she need to do a good job before this.\nQuestion: Sydney got a raise and a new promotion. What does Sydney need to do before this? \n (1) be bad at her job (2) do a good job (3) be lazy ",
20 | "answer":"Answer: (2) do a good job"
21 | }
22 | ]
23 |
24 |
25 |
--------------------------------------------------------------------------------
/prompts/direct_answer/hella.json:
--------------------------------------------------------------------------------
1 | [
2 | {
3 | "question":"Then, the man writes over the snow covering the window of a car, and a woman wearing winter clothes smiles. then \n (1) the man adds wax to the windshield and cuts it. (2) a person board a ski lift, while two men supporting the head of the person wearing winter clothes snow as the we girls sled. (3) the man puts on a christmas coat, knitted with netting. (4) the man continues removing the snow on his car. ",
4 | "answer":"Answer: (4) the man continues removing the snow on his car."
5 | },
6 | {
7 | "question":"The man in the center is demonstrating a hairstyle on the person wearing the blue shirt. the man in the blue shirt \n (1) is standing on the sponge cutting the hair of the person wearing the blue shirt. (2) is doing the hairstyle with his hand and the hairspray. (3) sits on the chair next to the sink. (4) is being shown eye to eye. ",
8 | "answer":"Answer: (3) sits on the chair next to the sink. "
9 | },
10 | {
11 | "question":"The roof is done and a view of the entire house is shown to show off the finished roof. the woman \n (1) is standing in front of the home, smiling while talking. (2) interviews the man again and leaves the room. (3) shows the soil with two lay-ups of shingle and applies a layer onto the top shingle. (4) stacks the bags on the side and begins putting stencils on the top. ",
12 | "answer":"Answer: (1) is standing in front of the home, smiling while talking. "
13 | },
14 | {
15 | "question":"People practice ballet in a studio alone and in couples. then \n (1) a man begins dancing and top dancing standing on the ground. (2) a boy and a girl dance ballet, then a man enter and dance with the girl. (3) the band performs ballet in the studio and in the open gathered. (4) people dances and dances together, dancing. ",
16 | "answer":"Answer: (2) a boy and a girl dance ballet, then a man enter and dance with the girl. "
17 | },
18 | {
19 | "question":"A person is seen standing on a tennis court bouncing a ball. another man \n (1) takes his turn speaking to the camera. (2) walks up hitting a birdie. (3) is seen standing ready on the other side in front of a large audience. (4) is holding the racket next to him. ",
20 | "answer":"Answer: (3) is seen standing ready on the other side in front of a large audience. "
21 | }
22 | ]
23 |
--------------------------------------------------------------------------------
/prompts/l2m_final_answer/piqa.json:
--------------------------------------------------------------------------------
1 | [
2 | {
3 | "question":"How do you flood a room?\n(1) fill it with objects. (2) fill it with water\nQuestion 1: What does flood a room mean? Answer: Flood means to become covered with water.\nQuestion 2: How do a person flood a room? Answer: If we want to flood a room, we should use water.",
4 | "answer":"So the answer is: (2) fill it with water"
5 | },
6 | {
7 | "question":"How can I get oil stains out of my driveway?\n(1) Douse each stain with a couple cans of beer. (2) Douse each stain with a couple cans of soda.\nQuestion 1: How to get old stains out? Answer: Oil stains can be removed by dissolving the oil in a solution.\nQuestion 2: What can oil be soluble in? Answer: Oil can be soluble in Sodium carbonate solution. The soda is a kind of sodium carbonate solution. So oil can be soluble in soda.",
8 | "answer":"So the answer is: (2) Douse each stain with a couple cans of soda."
9 | },
10 | {
11 | "question":"Soothe a painful sunburn.\n(1) Wait until brewed tea bag is cool, then apply on burn. (2) Wait until brewed tea bag is hot, then apply on burn.\nQuestion 1: How to soothe a painfule sunborn? Answer: Sunburn can be alleviated by applying tea bag on burn.\nQuestion 2: Whether hot or cold material can soothe a sunborn? Answer: You should apply cool tea bag rather than hot tea bag to soothe your sunburn.",
12 | "answer":"So the answer is: (1) Wait until brewed tea bag is cool, then apply on burn."
13 | },
14 | {
15 | "question":"What can I use for fuel in an alcohol stove?\n(1) Use acetone. (2) Use vinegar.\nQuestion 1: What properties must the fuel have? Answer: Materials used as fuel must be flammable.\nQuestion 2: What materials are flammable? Answer: Acetone is flammable, while vinegar is not.",
16 | "answer":"So the answer is: (1) Use acetone."
17 | },
18 | {
19 | "question":"How can I cut the handles of metal cutlery?\n(1) Use a hand saw to cut the handles. (2) Use a hand drill to cut the handles.\nQuestion 1: (1) What direction of force is needed for cutting? Answer: Cutting requires horizontal force.\nQuestion 2: What tools can provide people with force in that direction? Answer: You should use a hand saw rather than hand drill to provide horizontal force for you.",
20 | "answer":"So the answer is: (1) Use a hand saw to cut the handles."
21 | }
22 | ]
23 |
24 |
--------------------------------------------------------------------------------
/prompts/sr_feedback/csqa.json:
--------------------------------------------------------------------------------
1 | [
2 | {
3 | "question":"Google Maps and other highway and street GPS services have replaced what? \n (1) atlas (2) mexico (3) countryside (4) united states (5) oceans\nRational: Electronic maps and GPS services are the modern version of paper atlas. In that case, the atlas have been replaced by Google Maps and other highway and street GPS services. So the answer is: (1) atlas.",
4 | "answer":"Feedback: The rational is good and do not need to change."
5 | },
6 | {
7 | "question":"The fox walked from the city into the forest, what was it looking for? \n (1) pretty flowers. (2) hen house (3) natural habitat (4) storybook (5) dense forest\nRational: Since the fox walk from the city into the forest, he may looks for something in the forest. The forest usually contains a lot of pretty flowers. So the answer is: (1) pretty flowers.",
8 | "answer":"Feedback: The rational is wrong. Since the flowers is also located in city, the fox does not need to leave city for the flower."
9 | },
10 | {
11 | "question":"You can share files with someone if you have a connection to a what? \n (1) freeway (2) radio (3) wires (4) computer network (5) electrical circuit\nRational: Files usually can be stored in the computers and shared on it. Thus, we need connect the computer to electrical circuit to turn it on. So the answer is: (5) electrical circuit.",
12 | "answer":"Feedback: The rational is wrong. We can share files through the Internet, so we must connect to the network but not electrical circuit."
13 | },
14 | {
15 | "question":"Too many people want exotic snakes. The demand is driving what to carry them? \n (1) ditch (2) shop (3) north america (4) outdoors (5) pet shops\nRational: Exotic snakes are usually located in outdoors. So the answer is: (4) outdoors.",
16 | "answer":"Feedback: The rational is wrong. The question asks fot what to carry the snake, but the outdoors can not carry snake and meet the demand."
17 | },
18 | {
19 | "question":"The body guard was good at his duties, he made the person who hired him what? \n (1) better job (2) feel safe (3) irritated (4) save money (5) headache\nRational: The job of body guards is to ensure the safety and security of the employer. People ususally hire the body guard to make themselves safe. So the answer is: (2) feel safe.",
20 | "answer":"Feedback: The rational is good and do not need to change."
21 | }
22 | ]
23 |
24 |
--------------------------------------------------------------------------------
/prompts/cons_answer/piqa.json:
--------------------------------------------------------------------------------
1 | [
2 | {
3 | "question":"How do you flood a room?\n(1) fill it with objects. (2) fill it with water",
4 | "answer":"Explanation: Too much water can cause flooding. Thus, if we want to flood a room, we should use water.\nAnswer: (2) fill it with water\nWrong Explanation: To flood room means to fill the room. So you can use any objects to fill it.\nWrong Answer: (1) fill it with objects"
5 | },
6 | {
7 | "question":"How can I get oil stains out of my driveway?\n(1) Douse each stain with a couple cans of beer. (2) Douse each stain with a couple cans of soda.",
8 | "answer":"Explanation: Sodium carbonate solution can wash away oil stains. The soda is a kind of sodium carbonate solution. Thus, you can use cans of soda to get oil stains out of your driveway.\nAnswer: (2) Douse each stain with a couple cans of soda.\nWrong Explanation: You can use beer to get oil stains out.\nWrong Answer: (1) Douse each stain with a couple cans of beer."
9 | },
10 | {
11 | "question":"Soothe a painful sunburn.\n(1) Wait until brewed tea bag is cool, then apply on burn. (2) Wait until brewed tea bag is hot, then apply on burn.",
12 | "answer":"Explanation: Sunburn can be alleviated by applying cold material. Thus, you should apply cool tea rather than hot tea bag to soothe your sunburn.\nAnswer: (1) Wait until brewed tea bag is cool, then apply on burn.\nWrong Explanation: If a person want to soothe a painful sunburn, he can use a tea bag.\nWrong Answer: (2) Wait until brewed tea bag is hot, then apply on burn."
13 | },
14 | {
15 | "question":"What can I use for fuel in an alcohol stove?\n(1) Use acetone. (2) Use vinegar.",
16 | "answer":"Explanation: Acetone is flammable, while vinegar is not. If you want to use something for fuel, the thing you use should be flammable. Thus, you should use acetone for fuel in an alcohol stove.\nAnswer: (1) Use acetone.\nWrong Explanation: Vinegar can be used for fule.\nWrong Answer: (2) Use vinegar."
17 | },
18 | {
19 | "question":"How can I cut the handles of metal cutlery?\n(1) Use a hand saw to cut the handles. (2) Use a hand drill to cut the handles.",
20 | "answer":"Explanation: A hand saw is used for making cuts and a hand drill is used for making holes. If you want to cut something, you should use a hand saw rather than hand drill.\nAnswer: (1) Use a hand saw to cut the handles.\nWrong Explanation: Since the hand drill is very hard, it can be used to cut objects.\nWrong Answer: (2) Use a hand drill to cut the handles."
21 | }
22 | ]
23 |
24 |
--------------------------------------------------------------------------------
/prompts/cot_answer/wino.json:
--------------------------------------------------------------------------------
1 | [
2 | {
3 | "question":"The test was hard for Samuel but a breeze for Randy , since _ had failed to study for it.\n(1) Samuel (2) Randy",
4 | "answer":"To pass a test, a person need to study for it. If a person feel the test like a breeze, it means the test is easy for him. A person feels the test easy, because he studies hard for it. Since we know that Samuel feel the test very hard, she may fail to study for it. So the answer is: (1) Samuel."
5 | },
6 | {
7 | "question":"Kyle slowly wormed their way into the life of Derrick, because _ was good and manipulating people.\n(1) Kyle (2) Derrick",
8 | "answer":"A person wormes his way into other's life, because he is friendly and approachable. A friendly person is considered good. A person is seen as manipulating people, that means he like to interact with others and others like him. Since Kyle slowly wormed their way into the life of Derric, Kyle will be seen as good and manipulating people. So the answer is: (1) Kyle."
9 | },
10 | {
11 | "question":"Donald was very grounded but Michael often got lost in their daydreams. _ was very capricious all the time.\n(1) Donald (2) Michael",
12 | "answer":"A person is grounded means he works hard and does not like to fantasize. A person often gets lost in his daydreams, he is seen as unrealistic and egocentric. A person is capricious all the time means he does everything only according to his own ideas. Since Michael often gets lost in their daydreams but Donald does not, Michael is seen as very capricious all the time. So the answer is: (2) Michael."
13 | },
14 | {
15 | "question":"After stopping when running, Betty was able to help Rachel treat their asthma because _ has it too.\n(1) Betty (2) Rachel",
16 | "answer":"A person who has a mild disease may knows how to treat it. Since Since Betty was able to help Rachel treat their asthma, Betty may has it, too. So the answer is: (1) Betty."
17 | },
18 | {
19 | "question":"Jeffrey was sick with the stomach flu and not hungry, but Christopher was starving. _ ordered food.\n(1) Jeffrey (2) Christopher",
20 | "answer":"Stomach flu is a viral infection in the digestive system. A person is starve so that he need to eat something next. A person is not hungry means he does not want to eat something. Ordering food means to buy food online. A person may eat food after ordering it. Since Christopher was starving and Jeffery was sick and not hungry, Christopher tended to order food. So the answer is: (2) Christopher."
21 | }
22 | ]
23 |
24 |
25 |
--------------------------------------------------------------------------------
/prompts/generate_cot/wino.json:
--------------------------------------------------------------------------------
1 | [
2 | {
3 | "question":"The test was hard for Samuel but a breeze for Randy , since _ had failed to study for it.\n(1) Samuel (2) Randy\nAnswer: (1) Samuel.",
4 | "answer":"Reason: To pass a test, a person need to study for it. If a person feel the test like a breeze, it means the test is easy for him. A person feels the test easy, because he studies hard for it. Since we know that Samuel feel the test very hard, she may fail to study for it."
5 | },
6 | {
7 | "question":"Kyle slowly wormed their way into the life of Derrick, because _ was good and manipulating people.\n(1) Kyle (2) Derrick\nAnswer: (1) Kyle.",
8 | "answer":"Reason: If a person wormes his way into other's life, he is seen as friendly and approachable. A friendly person is considered good. A person is seen as manipulating people, that means he like to interact with others and others like him. Since Kyle slowly wormed their way into the life of Derric, Kyle will be seen as good and manipulating people."
9 | },
10 | {
11 | "question":"Donald was very grounded but Michael often got lost in their daydreams. _ was very capricious all the time.\n(1) Donald (2) Michael\nAnswer: (2) Michael.",
12 | "answer":"Reason: If a person is grounded, he may work hard and do not like to fantasize. If a person often gets lost in his daydreams, he is seen as unrealistic and egocentric. A person is capricious all the time means he does everything only according to his own ideas. Since Michael often gets lost in their daydreams but Donald does not, Michael is seen as very capricious all the time."
13 | },
14 | {
15 | "question":"After stopping when running, Betty was able to help Rachel treat their asthma because _ has it too.\n(1) Betty (2) Rachel\nAnswer: (1) Betty.",
16 | "answer":"Reason: If a person who has a mild disease, he may know how to treat it. Since Betty was able to help Rachel treat their asthma, Betty may has it, too."
17 | },
18 | {
19 | "question":"Jeffrey was sick with the stomach flu and not hungry, but Christopher was starving. _ ordered food.\n(1) Jeffrey (2) Christopher\nAnswer: (2) Christopher.",
20 | "answer":"Reason: Stomach flu is a viral infection in the digestive system. If a person is starve, he need to eat something next. If a person is not hungry, he does not want to eat something. Ordering food means to buy food online. A person may eat food after ordering it. Since Christopher was starving and Jeffery was sick and not hungry, Christopher tended to order food."
21 | }
22 | ]
23 |
24 |
25 |
--------------------------------------------------------------------------------
/prompts/l2m_mid_answer/wino.json:
--------------------------------------------------------------------------------
1 | [
2 | {
3 | "question":"The test was hard for Samuel but a breeze for Randy , since _ had failed to study for it.\n(1) Samuel (2) Randy\nQuestion 1: Why the test is hard for Samul?",
4 | "answer":"Answer: If the test is hard for Samul, he may not study for it."
5 | },
6 | {
7 | "question":"Kyle slowly wormed their way into the life of Derrick, because _ was good and manipulating people.\n(1) Kyle (2) Derrick\nQuestion 1: Why Kyle slowly wormed their way into the life of Derrick? Answer: If Kyle wormes his way into Derrick's life, Kyle is friendly and approachable and Derrick like him.\nQuestion 2:Who was good and manipulating people?",
8 | "answer":"Answer: Since is friendly and approachable and Derrick like him, Kyle will be seen as good and manipulating people."
9 | },
10 | {
11 | "question":"Donald was very grounded but Michael often got lost in their daydreams. _ was very capricious all the time.\n(1) Donald (2) Michael\nQuestion 1: What will Donald do if he was very grounded? Answer: If Donald is grounded, he works hard and does not like to fantasize.\nQuestion 2: What will Michael do if he often got lost in their daydreams? Answer: If Michael often gets lost in his daydreams, he is unrealistic and egocentric.\nQuestion 3: Who was very capricious all the time?",
12 | "answer":"Answer: If a person is capricious all the time means he does everything only according to his own ideas, he likely gets lost in their daydreams. Since Michael often does it, Michael is seen as very capricious all the time."
13 | },
14 | {
15 | "question":"After stopping when running, Betty was able to help Rachel treat their asthma because _ has it too.\n(1) Betty (2) Rachel\nQuestion 1: Why does Betty was able to help Rachel treat their asthma?",
16 | "answer":"Answer: If Betty was able to help Rachel treat asthma, she knows how to treat it. Since a person who has a mild disease may knows how to treat it, Betty may has asthma."
17 | },
18 | {
19 | "question":"Jeffrey was sick with the stomach flu and not hungry, but Christopher was starving. _ ordered food.\n(1) Jeffrey (2) Christopher\nQuestion 1: What will Jeffrey do if he was sick with the stomach flu and not hungry? Answer: Since Jeffrey was sick with the stomach flu and not hungry, he does not want to eat anything. In that case, he will not order food.\nQuestion 2: What will Christopher do if he was starving?",
20 | "answer":"Answer: If Christopher is starve, he need to eat something next. If he needs food, he tends to order food."
21 | }
22 | ]
23 |
24 |
25 |
--------------------------------------------------------------------------------
/prompts/rt_answer/wino.json:
--------------------------------------------------------------------------------
1 | [
2 | {
3 | "question":"Rationale: To pass a test, a person need to study for it. If a person feel the test like a breeze, it means the test is easy for him. A person feels the test easy, because he studies hard for it. Since we know that Samuel feel the test very hard, she may fail to study for it. \nQuestion: The test was hard for Samuel but a breeze for Randy , since _ had failed to study for it.\n(1) Samuel (2) Randy",
4 | "answer":"Answer: (1) Samuel."
5 | },
6 | {
7 | "question":"Rationale: If a person wormes his way into other's life, he is seen as friendly and approachable. A friendly person is considered good. A person is seen as manipulating people, that means he like to interact with others and others like him. Since Kyle slowly wormed their way into the life of Derric, Kyle will be seen as good and manipulating people.\nQuestion: Kyle slowly wormed their way into the life of Derrick, because _ was good and manipulating people.\n(1) Kyle (2) Derrick ",
8 | "answer":"Answer: (1) Kyle."
9 | },
10 | {
11 | "question":"Rationale: If a person is grounded, he may work hard and do not like to fantasize. If a person often gets lost in his daydreams, he is seen as unrealistic and egocentric. A person is capricious all the time means he does everything only according to his own ideas. Since Michael often gets lost in their daydreams but Donald does not, Michael is seen as very capricious all the time. \nQuestion: Donald was very grounded but Michael often got lost in their daydreams. _ was very capricious all the time.\n(1) Donald (2) Michael",
12 | "answer":"Answer: (2) Michael."
13 | },
14 | {
15 | "question":"Rationale: If a person who has a mild disease, he may know how to treat it. Since Betty was able to help Rachel treat their asthma, Betty may has it, too. \nQuestion: After stopping when running, Betty was able to help Rachel treat their asthma because _ has it too.\n(1) Betty (2) Rachel",
16 | "answer":"Answer: (1) Betty."
17 | },
18 | {
19 | "question":"Rationale: Stomach flu is a viral infection in the digestive system. If a person is starve, he need to eat something next. If a person is not hungry, he does not want to eat something. Ordering food means to buy food online. A person may eat food after ordering it. Since Christopher was starving and Jeffery was sick and not hungry, Christopher tended to order food. \nQuestion: Jeffrey was sick with the stomach flu and not hungry, but Christopher was starving. _ ordered food.\n(1) Jeffrey (2) Christopher",
20 | "answer":"Answer: (2) Christopher."
21 | }
22 | ]
23 |
24 |
25 |
--------------------------------------------------------------------------------
/prompts/sr_feedback/siqa.json:
--------------------------------------------------------------------------------
1 | [
2 | {
3 | "question":"Quinn wanted to help me clean my room up because it was so messy. What will Quinn want to do next? \n (1) Eat messy snacks (2) help out a friend (3) Pick up the dirty clothes\nRationale: Since Quinn wanted to help me clean my room, she want to help out a friend. So the answer is: (2) help out a friend",
4 | "answer":"Feedback: The rational is wrong. Since the question asks what to do 'next'. A more reasonable guess is to help organize clothes, etc., because the house is very messy"
5 | },
6 | {
7 | "question":"Sasha's mom passed out in the middle of the party. Aubrey took Sasha's mom to the hospital. What will Aubrey want to do next? \n (1) help Aubrey go back home (2) keep on partying without the mom (3) going on with the mom\nRationale: Sasha's mom is sent to the hospital beacuse she passes out. After that, she becomes a patient. A patient needs to be cared for. Aubrey may need to take care of her. Thus, Aubrey will go on with the mom. So the answer is: (3) going on with the mom",
8 | "answer":"Feedback: The rational is good and do not need to change."
9 | },
10 | {
11 | "question":"Their cat kept trying to escape out of the window, so Jan placed an obstacle in the way. How would Jan feel afterwards? \n (1) scared of losing the cat (2) normal (3) relieved for fixing the problem\nRationale: Since Jan placed an obstacle, the cat can not escape. Thus, there is no problem about losing the cat, and Jan may feel normal. So the answer is: (2) normal.",
12 | "answer":"Feedback: The rational is wrong. If a problem is been solved, people will feel relaxed and relieved. Thus, Jan will fell relieved for fixing the problem."
13 | },
14 | {
15 | "question":"Sydney had so much pent up emotion, they burst into tears at work. How would Sydney feel afterwards? \n (1) affected (2) like they released their tension (3) worse\nRationale: Crying is often a way to change one's mood. Since Sydney burst into tears at work, her mood will be affected. So the answer is: (1) affected",
16 | "answer":"Feedback: The rational is wrong. Crying is often a way to release tension. The word 'affected' is too vague, it's more reasonable that Sydeny get released beacause crying."
17 | },
18 | {
19 | "question":"Sydney got a raise and a new promotion. What does Sydney need to do before this? \n (1) be bad at her job (2) do a good job (3) be lazy\nRationale: To get a raise and a new promotion, Sydney needs to work hard and do a good job. Sydney got a raise and a new promotion. Thus, she need to do a good job before this. So the answer is: (2) do a good job",
20 | "answer":"Feedback: The rational is good and do not need to change."
21 | }
22 | ]
23 |
24 |
25 |
--------------------------------------------------------------------------------
/prompts/l2m_final_answer/csqa.json:
--------------------------------------------------------------------------------
1 | [
2 | {
3 | "question":"Google Maps and other highway and street GPS services have replaced what? \n (1) atlas (2) mexico (3) countryside (4) united states (5) oceans \nQuestion 1: What is Google Maps and other highway and street GPS services used for?Answer: They are used for find someplace and navigation.\nQuestion 2:What thing has the same use as street GPS services and turns to be replaced? Answer:The atlas is also used for navigation and is tend to be replaced by these GPS services.",
4 | "answer":"So the answer is: (1) atlas."
5 | },
6 | {
7 | "question":"The fox walked from the city into the forest, what was it looking for? \n (1) pretty flowers. (2) hen house (3) natural habitat (4) storybook (5) dense forest \nQuestion 1: What the reason that fox walked from the city into the forest? Answer: Since forest is more natrual than the city, it may want to find some place to rest.\nQuestion 2: What is located in forest and likely attracts fox? Answer: Since the forest has natural habitat for the fox to rest, it may look for the natural habitat in the forest.",
8 | "answer":"So the answer is: (3) natural habitat."
9 | },
10 | {
11 | "question":"You can share files with someone if you have a connection to a what? \n (1) freeway (2) radio (3) wires (4) computer network (5) electrical circuit \nQuestion 1: What place can we share files with someone? We can share files with someone on computers or the Internet.\nQuestion 2: To share files in this place, we need have a connection to what? To share files in the Internet, we need to connect our computer to the network.",
12 | "answer":"So the answer is: (4) computer network."
13 | },
14 | {
15 | "question":"Too many people want exotic snakes. The demand is driving what to carry them? \n (1) ditch (2) shop (3) north america (4) outdoors (5) pet shops \nQuestion 1: Where can people get exotic snakes? Answer: Since snakers can sometimes be a pet, people can get it in a pet shop.\nQuestion 2: What can carry exotic snakes and meets the demand? Answer: If many people want snakes and the pet shop can sell it, pet shop may carry them driven by the demand.",
16 | "answer":"So the answer is: (5) pet shops. "
17 | },
18 | {
19 | "question":"The body guard was good at his duties, he made the person who hired him what? \n (1) better job (2) feel safe (3) irritated (4) save money (5) headache \nQuestion 1: What the duties of the body guard? Answer: The duty of the body guard is to make sure that the person who hires him is safe.\nQuestion 2: If the duty is excuted well, the person who hired him feel what? Answer: If this duty excutes well, the person will fell safe.",
20 | "answer":"So the answer is: (2) feel safe. "
21 | }
22 | ]
23 |
24 |
--------------------------------------------------------------------------------
/prompts/l2m_question/hella.json:
--------------------------------------------------------------------------------
1 | [
2 | {
3 | "question":"Then, the man writes over the snow covering the window of a car, and a woman wearing winter clothes smiles. then \n (1) the man adds wax to the windshield and cuts it. (2) a person board a ski lift, while two men supporting the head of the person wearing winter clothes snow as the we girls sled. (3) the man puts on a christmas coat, knitted with netting. (4) the man continues removing the snow on his car. ",
4 | "answer":"To solve the question, we need to solve these subquestions:\nQuestion 1: What happens to the car?\nQuestion 2: What will the man do next?"
5 | },
6 | {
7 | "question":"The man in the center is demonstrating a hairstyle on the person wearing the blue shirt. the man in the blue shirt \n (1) is standing on the sponge cutting the hair of the person wearing the blue shirt. (2) is doing the hairstyle with his hand and the hairspray. (3) sits on the chair next to the sink. (4) is being shown eye to eye. ",
8 | "answer":"To solve the question, we need to solve these subquestions:\nQuestion 1: What is the man in blue shirt doing?\nQuestion 2: What will the man in blue shirt do next?"
9 | },
10 | {
11 | "question":"The roof is done and a view of the entire house is shown to show off the finished roof. the woman \n (1) is standing in front of the home, smiling while talking. (2) interviews the man again and leaves the room. (3) shows the soil with two lay-ups of shingle and applies a layer onto the top shingle. (4) stacks the bags on the side and begins putting stencils on the top. ",
12 | "answer":"To solve the question, we need to solve these subquestions:\nQuestion 1: What does the woman feel if the roof is done?\nQuestion 2: What will the woman do if she feel like that?"
13 | },
14 | {
15 | "question":"People practice ballet in a studio alone and in couples. then \n (1) a man begins dancing and top dancing standing on the ground. (2) a boy and a girl dance ballet, then a man enter and dance with the girl. (3) the band performs ballet in the studio and in the open gathered. (4) people dances and dances together, dancing. ",
16 | "answer":"To solve the question, we need to solve these subquestions:\nQuestion 1: What are people doing in this scenario?\nQuestion 2: What likely to happen next?"
17 | },
18 | {
19 | "question":"A person is seen standing on a tennis court bouncing a ball. another man \n (1) takes his turn speaking to the camera. (2) walks up hitting a birdie. (3) is seen standing ready on the other side in front of a large audience. (4) is holding the racket next to him. ",
20 | "answer":"To solve the question, we need to solve these subquestions:\nQuestion 1: What are the two man doing in this scenario?\nQuestion 2: What will another man do next?"
21 | }
22 | ]
23 |
--------------------------------------------------------------------------------
/prompts/cons_answer/csqa.json:
--------------------------------------------------------------------------------
1 | [
2 | {
3 | "question":"Google Maps and other highway and street GPS services have replaced what? \n (1) atlas (2) mexico (3) countryside (4) united states (5) oceans ",
4 | "answer":"Explanation: Electronic maps and GPS services are the modern version of paper atlas. In that case, the atlas have been replaced by Google Maps and other highway and street GPS services.\nAnswer: (1) atlas\nWrong Explanation: The GPS services can be used for navigation, so the answer is realted to ocean.\nWrong Answer: (5) oceans"
5 | },
6 | {
7 | "question":"The fox walked from the city into the forest, what was it looking for? \n (1) pretty flowers. (2) hen house (3) natural habitat (4) storybook (5) dense forest ",
8 | "answer":"Explanation: Since the fox walk from the city into the forest, he may looks for something in the forest but not in the city. From all of the options, the natural habitat are usually away from cities.\nAnswer: (3) natural habitat.\nWrong Explanation: Since the fox walk from the city into the forest, he may looks for something in the forest. The forest usually contains a lot of pretty flowers.\nWrong Answer: (1) pretty flowers."
9 | },
10 | {
11 | "question":"You can share files with someone if you have a connection to a what? \n (1) freeway (2) radio (3) wires (4) computer network (5) electrical circuit ",
12 | "answer":"Explanation: Files usually can be stored in the computers. In that case, we can share them over the Internet. Thus, if we connect to a computer network, we can share the file with others.\nAnswer: (4) computer network.\nWrong Explanation: Files usually can be stored in the computers and shared on it. Thus, we need connect the computer to electrical circuit to turn it on.\nWrong Answer: (5) electrical circuit."
13 | },
14 | {
15 | "question":"Too many people want exotic snakes. The demand is driving what to carry them? \n (1) ditch (2) shop (3) north america (4) outdoors (5) pet shops ",
16 | "answer":"Explanation: If people want exotic snakes, they may like to raise snakes as pets. If there is a demand for snakes as pets, pet shops will be pushed to carry them, in order to make more money.\nAnswer: (5) pet shops.\nWrong Explanation: Exotic snakes are usually located in outdoors.\nWrong Answer: (4) outdoors."
17 | },
18 | {
19 | "question":"The body guard was good at his duties, he made the person who hired him what? \n (1) better job (2) feel safe (3) irritated (4) save money (5) headache ",
20 | "answer":"Explanation: The job of body guards is to ensure the safety and security of the employer. People ususally hire the body guard to make themselves safe.\nAnswer: (2) feel safe.\nWrong Explanation: If the body guard was good at his duties, the man who hired him will feel he did a good job.\nWrong Answer: (1) better job."
21 | }
22 | ]
23 |
24 |
--------------------------------------------------------------------------------
/prompts/l2m_final_answer/siqa.json:
--------------------------------------------------------------------------------
1 | [
2 | {
3 | "question":"Quinn wanted to help me clean my room up because it was so messy. What will Quinn want to do next? \n (1) Eat messy snacks (2) help out a friend (3) Pick up the dirty clothes\nQuestion 1: What should a person do if she want to help clean the room? Answer: Picking up the dirty clothes is one way to clean the room.\nQuestion 2: What will Quinn want to do next? Answer: Since Quinn want to clean room, quinn will want to pick up the dirty clothes next.",
4 | "answer":"So the answer is: (3) Pick up the dirty clothes"
5 | },
6 | {
7 | "question":"Sasha's mom passed out in the middle of the party. Aubrey took Sasha's mom to the hospital. What will Aubrey want to do next? \n (1) help Aubrey go back home (2) keep on partying without the mom (3) going on with the mom\nQuestion 1: What does Sasha's mom need if she pass out and go to the hospital? Answer: Sasha's mom is sent to the hospital beacuse she passes out, she becomes a patient. Thus, she needs to be cared for.\nQuestion 2: What will Aubrey want to do next? Answer: Aubrey may need to take care of Sasha's mom. Thus, Aubrey will go on with the mom.",
8 | "answer":"So the answer is: (3) going on with the mom"
9 | },
10 | {
11 | "question":"Their cat kept trying to escape out of the window, so Jan placed an obstacle in the way. How would Jan feel afterwards? \n (1) scared of losing the cat (2) normal (3) relieved for fixing the problem\nQuestion 1: What will happen if Jan placed an obstacle in the way? Answer: If Jan place an obstacle in the way, the cat can not escape. Hence, Jan can avoid losing the cat.\nQuestion 2: How would Jan feel afterwards? Answer: Since the problem is been solve, Jan will fell relieved for fixing the problem.",
12 | "answer":"So the answer is: (3) relieved for fixing the problem"
13 | },
14 | {
15 | "question":"Sydney had so much pent up emotion, they burst into tears at work. How would Sydney feel afterwards? \n (1) affected (2) like they released their tension (3) worse\nQuestion 1: What will happen if someone burst into tears at work? Answer: Crying is often a way to release tension. If someone burst into tears, she may feel released after that.\nQuestion 2: How would Sydney feel afterwards? Answer: Sydney burst into tears at work. Thus, she would release the tension.",
16 | "answer":"So the answer is: (2) like they released their tension"
17 | },
18 | {
19 | "question":"Sydney got a raise and a new promotion. What does Sydney need to do before this? \n (1) be bad at her job (2) do a good job (3) be lazy\nQuestion 1: Why does Sydney got a raise and a new promotion? Answer: Since she got a raise, she may did a good jon in work.\nQuestion 2: What does Sydney need to do before this? Answer: Sydney got a raise and a new promotion. Thus, she need to do a good job before this.",
20 | "answer":"So the answer is: (2) do a good job"
21 | }
22 | ]
23 |
24 |
25 |
--------------------------------------------------------------------------------
/prompts/cons_answer/siqa.json:
--------------------------------------------------------------------------------
1 | [
2 | {
3 | "question":"Quinn wanted to help me clean my room up because it was so messy. What will Quinn want to do next? \n (1) Eat messy snacks (2) help out a friend (3) Pick up the dirty clothes ",
4 | "answer":"Explanation: Quinn want to clean the room up. Picking up the dirty clothes is one way to clean the room. Thus, quinn will want to pick up the dirty clothes next.\nAnswer: (3) Pick up the dirty clothes\nWrong Explanation: Since Quinn wanted to help me clean my room, she want to help out a friend.\nWrong Answer: (2) help out a friend"
5 | },
6 | {
7 | "question":"Sasha's mom passed out in the middle of the party. Aubrey took Sasha's mom to the hospital. What will Aubrey want to do next? \n (1) help Aubrey go back home (2) keep on partying without the mom (3) going on with the mom ",
8 | "answer":"Explanation: Sasha's mom is sent to the hospital beacuse she passes out. After that, she becomes a patient. A patient needs to be cared for. Aubrey may need to take care of her. Thus, Aubrey will go on with the mom.\nAnswer: (3) going on with the mom\nWrong Explanation: Since Aubrey leave in the middle of the party, he may want to go back to keep on party next.\nWrong Answer: (2) keep on partying without the mom."
9 | },
10 | {
11 | "question":"Their cat kept trying to escape out of the window, so Jan placed an obstacle in the way. How would Jan feel afterwards? \n (1) scared of losing the cat (2) normal (3) relieved for fixing the problem ",
12 | "answer":"Explanation: The cat try to escape so Jan need to stop it to avoid losing the cat. Jan place an obstacle in the way so the cat can not escape. The problem is been solved. Thus, Jan will fell relieved for fixing the problem.\nAnswer: (3) relieved for fixing the problem\nWrong Explanation: Since Jan placed an obstacle, the cat can not escape. Thus, there is no problem about losing the cat, and Jan may feel normal.\nWrong Answer: (2) normal."
13 | },
14 | {
15 | "question":"Sydney had so much pent up emotion, they burst into tears at work. How would Sydney feel afterwards? \n (1) affected (2) like they released their tension (3) worse ",
16 | "answer":"Explanation: Crying is often a way to release tension. Sydney burst into tears at work. Thus, she would release the tension.\nAnswer: (2) like they released their tension\nWrong Explanation: Crying is often a way to change one's mood. Since Sydney burst into tears at work, her mood will be affected.\nWrong Answer: (1) affected"
17 | },
18 | {
19 | "question":"Sydney got a raise and a new promotion. What does Sydney need to do before this? \n (1) be bad at her job (2) do a good job (3) be lazy ",
20 | "answer":"Explanation: To get a raise and a new promotion, Sydney needs to work hard and do a good job. Sydney got a raise and a new promotion. Thus, she need to do a good job before this.\nAnswer: (2) do a good job\nWrong Explanation: People sometimes gets new promotion with no reason.\nWrong Answer: (3) be lazy"
21 | }
22 | ]
23 |
24 |
25 |
--------------------------------------------------------------------------------
/prompts/l2m_mid_answer/hella.json:
--------------------------------------------------------------------------------
1 | [
2 | {
3 | "question":"Then, the man writes over the snow covering the window of a car, and a woman wearing winter clothes smiles. then \n (1) the man adds wax to the windshield and cuts it. (2) a person board a ski lift, while two men supporting the head of the person wearing winter clothes snow as the we girls sled. (3) the man puts on a christmas coat, knitted with netting. (4) the man continues removing the snow on his car.\nQuestion 1: What happens to the car?",
4 | "answer":"Answer: Since the man writes over the snow covring the window of a car, the car is covered by snow."
5 | },
6 | {
7 | "question":"The man in the center is demonstrating a hairstyle on the person wearing the blue shirt. the man in the blue shirt \n (1) is standing on the sponge cutting the hair of the person wearing the blue shirt. (2) is doing the hairstyle with his hand and the hairspray. (3) sits on the chair next to the sink. (4) is being shown eye to eye.\nQuestion 1: What is the man in blue shirt doing? Answer: Since he is been demonstrating a hairstyle, he is getting a hair cut now.\nQuestion 2: What will the man in blue shirt do next?",
8 | "answer":"Answer: If the man is getting a hair cut, he needs to wash his head. Thus, he will sit on the chair next to the sink and wait to be washed."
9 | },
10 | {
11 | "question":"The roof is done and a view of the entire house is shown to show off the finished roof. the woman \n (1) is standing in front of the home, smiling while talking. (2) interviews the man again and leaves the room. (3) shows the soil with two lay-ups of shingle and applies a layer onto the top shingle. (4) stacks the bags on the side and begins putting stencils on the top.\nQuestion 1: What does the woman feel if the roof is done?",
12 | "answer":"Answer: If the roof is done, the woman will feel happy, proud and satisfied."
13 | },
14 | {
15 | "question":"People practice ballet in a studio alone and in couples. then \n (1) a man begins dancing and top dancing standing on the ground. (2) a boy and a girl dance ballet, then a man enter and dance with the girl. (3) the band performs ballet in the studio and in the open gathered. (4) people dances and dances together, dancing.\nQuestion 1: What are people doing in this scenario? Answer: People are practicing ballet according to the context.\nQuestion 2: What likely to happen next?",
16 | "answer":"Answer: If people pratice ballet, they often dance with their partener or dance alone. Thus, there may be a boy and a girl dance ballet."
17 | },
18 | {
19 | "question":"A person is seen standing on a tennis court bouncing a ball. another man \n (1) takes his turn speaking to the camera. (2) walks up hitting a birdie. (3) is seen standing ready on the other side in front of a large audience. (4) is holding the racket next to him.\nQuestion 1: What are the two man doing in this scenario?",
20 | "answer":"Answer: Since a person is bouncing a ball on a tennis court, they are having a tennis match."
21 | }
22 | ]
23 |
--------------------------------------------------------------------------------
/prompts/sr_answer/piqa.json:
--------------------------------------------------------------------------------
1 | [
2 | {
3 | "question":"How do you flood a room?\n(1) fill it with objects. (2) fill it with water\nRationale: Too much water can cause flooding. Thus, if we want to flood a room, we should use water. So the answer is: (2) fill it with water Feedback: The rational is good and do not need to change.",
4 | "answer":"Too much water can cause flooding. Thus, if we want to flood a room, we should use water. So the answer is: (2) fill it with water"
5 | },
6 | {
7 | "question":"How can I get oil stains out of my driveway?\n(1) Douse each stain with a couple cans of beer. (2) Douse each stain with a couple cans of soda.\nRationale: You can use beer to get oil stains out. So the answer is: (1) Douse each stain with a couple cans of beer. Feedback: The rational is wrong. It is Sodium carbonate solution which can wash away oil stains. But beer can not contain it, so beer can not get oil stains out.",
8 | "answer":"Sodium carbonate solution can wash away oil stains. The soda is a kind of sodium carbonate solution. Thus, you can use cans of soda to get oil stains out of your driveway. So the answer is: (2) Douse each stain with a couple cans of soda."
9 | },
10 | {
11 | "question":"Soothe a painful sunburn.\n(1) Wait until brewed tea bag is cool, then apply on burn. (2) Wait until brewed tea bag is hot, then apply on burn.\nRationale: If a person want to soothe a painful sunburn, he can use a tea bag. So the answer is: (2) Wait until brewed tea bag is hot, then apply on burn. Feedback: The rational is wrong. Sunburn can be alleviated only by applying cold material but not hot material. Thus, you should not apply hot tea bag.",
12 | "answer":"Sunburn can be alleviated by applying cold material. Thus, you should apply cool tea rather than hot tea bag to soothe your sunburn. So the answer is: (1) Wait until brewed tea bag is cool, then apply on burn."
13 | },
14 | {
15 | "question":"What can I use for fuel in an alcohol stove?\n(1) Use acetone. (2) Use vinegar.\nRationale: Vinegar can be used for fule. So the answer is: (2) Use vinegar. Feedback: The rational is wrong. Vinegar is not flammable, so it can not be used for fuel.",
16 | "answer":"Acetone is flammable, while vinegar is not. If you want to use something for fuel, the thing you use should be flammable. Thus, you should use acetone for fuel in an alcohol stove. So the answer is: (1) Use acetone."
17 | },
18 | {
19 | "question":"How can I cut the handles of metal cutlery?\n(1) Use a hand saw to cut the handles. (2) Use a hand drill to cut the handles.\nRationale: A hand saw is used for making cuts and a hand drill is used for making holes. If you want to cut something, you should use a hand saw rather than hand drill. So the answer is: (1) Use a hand saw to cut the handles. Feedback: The rational is good and do not need to change.",
20 | "answer":"A hand saw is used for making cuts and a hand drill is used for making holes. If you want to cut something, you should use a hand saw rather than hand drill. So the answer is: (1) Use a hand saw to cut the handles."
21 | }
22 | ]
23 |
24 |
--------------------------------------------------------------------------------
/prompts/sr_feedback/wino.json:
--------------------------------------------------------------------------------
1 | [
2 | {
3 | "question":"The test was hard for Samuel but a breeze for Randy , since _ had failed to study for it.\n(1) Samuel (2) Randy\nRational: To pass a test, a person need to study for it. If a person feel the test like a breeze, it means the test is easy for him. A person feels the test easy, because he studies hard for it. Since we know that Samuel feel the test very hard, she may fail to study for it. So the answer is: (1) Samuel.",
4 | "answer":"Feedback: The rational is good and do not need to change."
5 | },
6 | {
7 | "question":"Kyle slowly wormed their way into the life of Derrick, because _ was good and manipulating people.\n(1) Kyle (2) Derrick\nRational: A person wormes his way into other's life, because he is friendly and approachable. A person is seen as manipulating people, that means he like to interact with others. Since Kyle slowly wormed their way into the life of Derric, Derric will be seen as good and manipulating people. So the answer is: (2) Derric.",
8 | "answer":"Feedback: The rational is wrong. Since a person wormes his way into other's life is friendly, it's Kyle who was good, not Derrick."
9 | },
10 | {
11 | "question":"Donald was very grounded but Michael often got lost in their daydreams. _ was very capricious all the time.\n(1) Donald (2) Michael\nRational: A person is grounded means he works hard and does not like to fantasize. A person often gets lost in his daydreams, he is seen as unrealistic and egocentric. A person is capricious all the time means he does everything only according to his own ideas. Since Michael often gets lost in their daydreams but Donald does not, Donald is seen as very capricious all the time. So the answer is: (1) Donald.",
12 | "answer":"Feedback: The rational is wrong. Since a capricious person does everything only according to his own ideas, he likely to got lost in their daydreams. Thus, it is Michael who is capricious instead of Donald."
13 | },
14 | {
15 | "question":"After stopping when running, Betty was able to help Rachel treat their asthma because _ has it too.\n(1) Betty (2) Rachel\nRational: A person who has a mild disease may knows how to treat it. Since Betty was able to help Rachel treat their asthma, Rachel has asthma. So the answer is: (2) Rachel.",
16 | "answer":"Feedback: The rational is wrong. Since the question asks who has it too, it more likely to fill the Betty into the blank since she knows how to treat the asthma."
17 | },
18 | {
19 | "question":"Jeffrey was sick with the stomach flu and not hungry, but Christopher was starving. _ ordered food.\n(1) Jeffrey (2) Christopher\nRational: Stomach flu is a viral infection in the digestive system. A person is starve so that he need to eat something next. A person is not hungry means he does not want to eat something. Ordering food means to buy food online. A person may eat food after ordering it. Since Christopher was starving and Jeffery was sick and not hungry, Christopher tended to order food. So the answer is: (2) Christopher.",
20 | "answer":"Feedback: The rational is good and do not need to change."
21 | }
22 | ]
--------------------------------------------------------------------------------
/prompts/l2m_final_answer/wino.json:
--------------------------------------------------------------------------------
1 | [
2 | {
3 | "question":"The test was hard for Samuel but a breeze for Randy , since _ had failed to study for it.\n(1) Samuel (2) Randy\nQuestion 1: Why the test is hard for Samul? Answer: If the test is hard for Samul, he may not study for it.\nQuestion 2:Why the test is a breeze for Randy? Answer: If Randy feel the test like a breeze, the test is easy for her. In that case, she may study hard for it.\nQuestion 3:Who had fail to study for the test? Answer: Since Samul does not study for the test, Samul fails to study for it.",
4 | "answer":"So the answer is: (1) Samuel."
5 | },
6 | {
7 | "question":"Kyle slowly wormed their way into the life of Derrick, because _ was good and manipulating people.\n(1) Kyle (2) Derrick\nQuestion 1: Why Kyle slowly wormed their way into the life of Derrick? Answer: If Kyle wormes his way into Derrick's life, Kyle is friendly and approachable and Derrick like him.\nQuestion 2:Who was good and manipulating people? Answer: Since is friendly and approachable and Derrick like him, Kyle will be seen as good and manipulating people.",
8 | "answer":"So the answer is: (1) Kyle."
9 | },
10 | {
11 | "question":"Donald was very grounded but Michael often got lost in their daydreams. _ was very capricious all the time.\n(1) Donald (2) Michael\nQuestion 1: What will Donald do if he was very grounded? Answer: If Donald is grounded, he works hard and does not like to fantasize.\nQuestion 2: What will Michael do if he often got lost in their daydreams? Answer: If Michael often gets lost in his daydreams, he is unrealistic and egocentric.\nQuestion 3: Who was very capricious all the time? Answer: If a person is capricious all the time means he does everything only according to his own ideas, he likely gets lost in their daydreams. Since Michael often does it, Michael is seen as very capricious all the time.",
12 | "answer":"So the answer is: (2) Michael."
13 | },
14 | {
15 | "question":"After stopping when running, Betty was able to help Rachel treat their asthma because _ has it too.\n(1) Betty (2) Rachel\nQuestion 1: Why does Betty was able to help Rachel treat their asthma? Answer: If Betty was able to help Rachel treat asthma, she knows how to treat it. Since a person who has a mild disease may knows how to treat it, Betty may has asthma.\nQuestion 2: Who has asthma too? Answer: Since Rachel already has asthma and Betty may has asthma, Betty has asthma too.",
16 | "answer":"So the answer is: (1) Betty."
17 | },
18 | {
19 | "question":"Jeffrey was sick with the stomach flu and not hungry, but Christopher was starving. _ ordered food.\n(1) Jeffrey (2) Christopher\nQuestion 1: What will Jeffrey do if he was sick with the stomach flu and not hungry? Answer: Since Jeffrey was sick with the stomach flu and not hungry, he does not want to eat anything. In that case, he will not order food.\nQuestion 2: What will Christopher do if he was starving? Answer: If Christopher is starve, he need to eat something next. If he needs food, he tends to order food.\nQuestion 3: Who may ordered food? Answer: Christopher may ordered food.",
20 | "answer":"So the answer is: (2) Christopher."
21 | }
22 | ]
23 |
24 |
25 |
--------------------------------------------------------------------------------
/prompts/cot_answer/gsm8k.json:
--------------------------------------------------------------------------------
1 | [
2 | {
3 | "question":"Sam and Jeff had a skipping competition at recess. The competition was split into four rounds. Sam completed 1 more skip than Jeff in the first round. Jeff skipped 3 fewer times than Sam in the second round. Jeff skipped 4 more times than Sam in the third round. Jeff got tired and only completed half the number of skips as Sam in the last round. If Sam skipped 16 times in each round, what is the average number of skips per round completed by Jeff?",
4 | "answer":"In round one, Jeff completed 16 - 1 = 15.\nIn round two, Jeff completed 16 - 3 = 13.\nIn round three, Jeff completed 16 + 4 = 20.\nIn round four, Jeff completed 16 / 2 = 8.\nJeff completed 15 + 13 + 20 + 8 = 56 skips in total.\nJeff skipped an average of 56 / 4 = 14 skips per round.\n So the answer is: 14"
5 | },
6 | {
7 | "question":"Stella and Twinkle are filling up a truck with a capacity of 6000 stone blocks at the rate of 250 blocks per hour per person. They work for four hours and are then joined by 6 other people who also work at the same rate. How many hours did filling the truck take?",
8 | "answer":"Stella and Twinkle filled up the truck at the rate of 250 blocks per hour per person, a total of 2*250 = 500 blocks per hour for both.\nAfter working for four hours, Stella and Twinkle had filled 4*500 = 2000 blocks into the truck.\nThe number of blocks they had to put into the truck for it to be full is 6000-2000 = 4000\nWhen 6 more people joined Stella and Twinkle, a total of 2+6 = 8 people were filling the truck now.\nWorking at the rate of 250 blocks per person, the eight people filled the truck with 250*8 = 2000 blocks in one hour.\nIf there were 4000 blocks that still needed to be put into the truck, the 8 people took 4000/2000 = 2 hours to fill the truck with the blocks.\nThe total time it took to fill up the tank is 4+2 = 6 hours.\nSo the answer is: 6"
9 | },
10 | {
11 | "question":"John has five more roommates than twice as many as Bob. If Bob has 10 roommates, how many roommates does John have?",
12 | "answer":"wice ten roommates is 2*10 = 20\nJohn has 5 more than 20 roommates which is 20+5 = 25 roommates\nSo the answer is: 25"
13 | },
14 | {
15 | "question":"Kyle bought 2 glass bottles that can hold 15 origami stars each. He then bought another 3 identical glass bottles. How many stars must Kyle make to fill all the glass bottles he bought?",
16 | "answer":"Kyle has 2 + 3 = 5 glass bottles.\nHe needs to make 15 x 5 = 75 origami stars\nSo the answer is: 75"
17 | },
18 | {
19 | "question":"Ann is baking cookies. She bakes three dozen oatmeal raisin cookies, two dozen sugar cookies, and four dozen chocolate chip cookies. Ann gives away two dozen oatmeal raisin cookies, 1.5 dozen sugar cookies, and 2.5 dozen chocolate chip cookies. How many total cookies does she keep?",
20 | "answer":"Ann had 3 dozen oatmeal cookies - 2 dozen = 1 dozen oatmeal cookies left.\nAnn had 2 dozen sugar cookies - 1.5 dozen = .5 dozen sugar cookies left.\nAnn had 4 dozen chocolate chip cookies - 2.5 dozen = 1.5 dozen chocolate chip cookies left.\nAnn has a total of 1 + .5 + 1.5 dozen cookies = 3 dozen cookies.\n3 dozen cookies is 3 *12 cookies = 36 cookies remaining.\nSo the answer is: 36"
21 | }
22 | ]
23 |
24 |
25 |
--------------------------------------------------------------------------------
/prompts/generate_cot/gsm8k.json:
--------------------------------------------------------------------------------
1 | [
2 | {
3 | "question":"Sam and Jeff had a skipping competition at recess. The competition was split into four rounds. Sam completed 1 more skip than Jeff in the first round. Jeff skipped 3 fewer times than Sam in the second round. Jeff skipped 4 more times than Sam in the third round. Jeff got tired and only completed half the number of skips as Sam in the last round. If Sam skipped 16 times in each round, what is the average number of skips per round completed by Jeff?",
4 | "answer":"In round one, Jeff completed 16 - 1 = 15.\nIn round two, Jeff completed 16 - 3 = 13.\nIn round three, Jeff completed 16 + 4 = 20.\nIn round four, Jeff completed 16 / 2 = 8.\nJeff completed 15 + 13 + 20 + 8 = 56 skips in total.\nJeff skipped an average of 56 / 4 = 14 skips per round.\n So the answer is: 14"
5 | },
6 | {
7 | "question":"Stella and Twinkle are filling up a truck with a capacity of 6000 stone blocks at the rate of 250 blocks per hour per person. They work for four hours and are then joined by 6 other people who also work at the same rate. How many hours did filling the truck take?",
8 | "answer":"Stella and Twinkle filled up the truck at the rate of 250 blocks per hour per person, a total of 2*250 = 500 blocks per hour for both.\nAfter working for four hours, Stella and Twinkle had filled 4*500 = 2000 blocks into the truck.\nThe number of blocks they had to put into the truck for it to be full is 6000-2000 = 4000\nWhen 6 more people joined Stella and Twinkle, a total of 2+6 = 8 people were filling the truck now.\nWorking at the rate of 250 blocks per person, the eight people filled the truck with 250*8 = 2000 blocks in one hour.\nIf there were 4000 blocks that still needed to be put into the truck, the 8 people took 4000/2000 = 2 hours to fill the truck with the blocks.\nThe total time it took to fill up the tank is 4+2 = 6 hours.\nSo the answer is: 6"
9 | },
10 | {
11 | "question":"John has five more roommates than twice as many as Bob. If Bob has 10 roommates, how many roommates does John have?",
12 | "answer":"wice ten roommates is 2*10 = 20\nJohn has 5 more than 20 roommates which is 20+5 = 25 roommates\nSo the answer is: 25"
13 | },
14 | {
15 | "question":"Kyle bought 2 glass bottles that can hold 15 origami stars each. He then bought another 3 identical glass bottles. How many stars must Kyle make to fill all the glass bottles he bought?",
16 | "answer":"Kyle has 2 + 3 = 5 glass bottles.\nHe needs to make 15 x 5 = 75 origami stars\nSo the answer is: 75"
17 | },
18 | {
19 | "question":"Ann is baking cookies. She bakes three dozen oatmeal raisin cookies, two dozen sugar cookies, and four dozen chocolate chip cookies. Ann gives away two dozen oatmeal raisin cookies, 1.5 dozen sugar cookies, and 2.5 dozen chocolate chip cookies. How many total cookies does she keep?",
20 | "answer":"Ann had 3 dozen oatmeal cookies - 2 dozen = 1 dozen oatmeal cookies left.\nAnn had 2 dozen sugar cookies - 1.5 dozen = .5 dozen sugar cookies left.\nAnn had 4 dozen chocolate chip cookies - 2.5 dozen = 1.5 dozen chocolate chip cookies left.\nAnn has a total of 1 + .5 + 1.5 dozen cookies = 3 dozen cookies.\n3 dozen cookies is 3 *12 cookies = 36 cookies remaining.\nSo the answer is: 36"
21 | }
22 | ]
23 |
24 |
25 |
--------------------------------------------------------------------------------
/prompts/l2m_question/gsm8k.json:
--------------------------------------------------------------------------------
1 | [
2 | {
3 | "question":"Sam and Jeff had a skipping competition at recess. The competition was split into four rounds. Sam completed 1 more skip than Jeff in the first round. Jeff skipped 3 fewer times than Sam in the second round. Jeff skipped 4 more times than Sam in the third round. Jeff got tired and only completed half the number of skips as Sam in the last round. If Sam skipped 16 times in each round, what is the average number of skips per round completed by Jeff?",
4 | "answer":"In round one, Jeff completed 16 - 1 = 15.\nIn round two, Jeff completed 16 - 3 = 13.\nIn round three, Jeff completed 16 + 4 = 20.\nIn round four, Jeff completed 16 / 2 = 8.\nJeff completed 15 + 13 + 20 + 8 = 56 skips in total.\nJeff skipped an average of 56 / 4 = 14 skips per round.\n So the answer is: 14"
5 | },
6 | {
7 | "question":"Stella and Twinkle are filling up a truck with a capacity of 6000 stone blocks at the rate of 250 blocks per hour per person. They work for four hours and are then joined by 6 other people who also work at the same rate. How many hours did filling the truck take?",
8 | "answer":"Stella and Twinkle filled up the truck at the rate of 250 blocks per hour per person, a total of 2*250 = 500 blocks per hour for both.\nAfter working for four hours, Stella and Twinkle had filled 4*500 = 2000 blocks into the truck.\nThe number of blocks they had to put into the truck for it to be full is 6000-2000 = 4000\nWhen 6 more people joined Stella and Twinkle, a total of 2+6 = 8 people were filling the truck now.\nWorking at the rate of 250 blocks per person, the eight people filled the truck with 250*8 = 2000 blocks in one hour.\nIf there were 4000 blocks that still needed to be put into the truck, the 8 people took 4000/2000 = 2 hours to fill the truck with the blocks.\nThe total time it took to fill up the tank is 4+2 = 6 hours.\nSo the answer is: 6"
9 | },
10 | {
11 | "question":"John has five more roommates than twice as many as Bob. If Bob has 10 roommates, how many roommates does John have?",
12 | "answer":"wice ten roommates is 2*10 = 20\nJohn has 5 more than 20 roommates which is 20+5 = 25 roommates\nSo the answer is: 25"
13 | },
14 | {
15 | "question":"Kyle bought 2 glass bottles that can hold 15 origami stars each. He then bought another 3 identical glass bottles. How many stars must Kyle make to fill all the glass bottles he bought?",
16 | "answer":"Kyle has 2 + 3 = 5 glass bottles.\nHe needs to make 15 x 5 = 75 origami stars\nSo the answer is: 75"
17 | },
18 | {
19 | "question":"Ann is baking cookies. She bakes three dozen oatmeal raisin cookies, two dozen sugar cookies, and four dozen chocolate chip cookies. Ann gives away two dozen oatmeal raisin cookies, 1.5 dozen sugar cookies, and 2.5 dozen chocolate chip cookies. How many total cookies does she keep?",
20 | "answer":"Ann had 3 dozen oatmeal cookies - 2 dozen = 1 dozen oatmeal cookies left.\nAnn had 2 dozen sugar cookies - 1.5 dozen = .5 dozen sugar cookies left.\nAnn had 4 dozen chocolate chip cookies - 2.5 dozen = 1.5 dozen chocolate chip cookies left.\nAnn has a total of 1 + .5 + 1.5 dozen cookies = 3 dozen cookies.\n3 dozen cookies is 3 *12 cookies = 36 cookies remaining.\nSo the answer is: 36"
21 | }
22 | ]
23 |
24 |
25 |
--------------------------------------------------------------------------------
/prompts/rt_answer/gsm8k.json:
--------------------------------------------------------------------------------
1 | [
2 | {
3 | "question":"Rationale: In round one, Jeff completed 16 - 1 = 15.\nIn round two, Jeff completed 16 - 3 = 13.\nIn round three, Jeff completed 16 + 4 = 20.\nIn round four, Jeff completed 16 / 2 = 8.\nJeff completed 15 + 13 + 20 + 8 = 56 skips in total.\nJeff skipped an average of 56 / 4 = 14 skips per round.\nQuestion: Sam and Jeff had a skipping competition at recess. The competition was split into four rounds. Sam completed 1 more skip than Jeff in the first round. Jeff skipped 3 fewer times than Sam in the second round. Jeff skipped 4 more times than Sam in the third round. Jeff got tired and only completed half the number of skips as Sam in the last round. If Sam skipped 16 times in each round, what is the average number of skips per round completed by Jeff?",
4 | "answer":"Answer: 14"
5 | },
6 | {
7 | "question":"Rationale: Stella and Twinkle filled up the truck at the rate of 250 blocks per hour per person, a total of 2*250 = 500 blocks per hour for both.\nAfter working for four hours, Stella and Twinkle had filled 4*500 = 2000 blocks into the truck.\nThe number of blocks they had to put into the truck for it to be full is 6000-2000 = 4000\nWhen 6 more people joined Stella and Twinkle, a total of 2+6 = 8 people were filling the truck now.\nWorking at the rate of 250 blocks per person, the eight people filled the truck with 250*8 = 2000 blocks in one hour.\nIf there were 4000 blocks that still needed to be put into the truck, the 8 people took 4000/2000 = 2 hours to fill the truck with the blocks.\nThe total time it took to fill up the tank is 4+2 = 6 hours.\nQuestion: Stella and Twinkle are filling up a truck with a capacity of 6000 stone blocks at the rate of 250 blocks per hour per person. They work for four hours and are then joined by 6 other people who also work at the same rate. How many hours did filling the truck take?",
8 | "answer":"Answer: 6"
9 | },
10 | {
11 | "question":"Rationale: wice ten roommates is 2*10 = 20\nJohn has 5 more than 20 roommates which is 20+5 = 25 roommates\nQuestion: John has five more roommates than twice as many as Bob. If Bob has 10 roommates, how many roommates does John have?",
12 | "answer":"Answer: 25"
13 | },
14 | {
15 | "question":"Rationale: Kyle has 2 + 3 = 5 glass bottles.\nHe needs to make 15 x 5 = 75 origami stars.\nQuestion: Kyle bought 2 glass bottles that can hold 15 origami stars each. He then bought another 3 identical glass bottles. How many stars must Kyle make to fill all the glass bottles he bought?",
16 | "answer":"Answer: 75"
17 | },
18 | {
19 | "question":"Rationale: Ann had 3 dozen oatmeal cookies - 2 dozen = 1 dozen oatmeal cookies left.\nAnn had 2 dozen sugar cookies - 1.5 dozen = .5 dozen sugar cookies left.\nAnn had 4 dozen chocolate chip cookies - 2.5 dozen = 1.5 dozen chocolate chip cookies left.\nAnn has a total of 1 + .5 + 1.5 dozen cookies = 3 dozen cookies.\n3 dozen cookies is 3 *12 cookies = 36 cookies remaining.\nQuestion: Ann is baking cookies. She bakes three dozen oatmeal raisin cookies, two dozen sugar cookies, and four dozen chocolate chip cookies. Ann gives away two dozen oatmeal raisin cookies, 1.5 dozen sugar cookies, and 2.5 dozen chocolate chip cookies. How many total cookies does she keep?",
20 | "answer":"Answer: 36"
21 | }
22 | ]
23 |
24 |
25 |
--------------------------------------------------------------------------------
/prompts/generate_cot/hella.json:
--------------------------------------------------------------------------------
1 | [
2 | {
3 | "question":"Then, the man writes over the snow covering the window of a car, and a woman wearing winter clothes smiles. then \n (1) the man adds wax to the windshield and cuts it. (2) a person board a ski lift, while two men supporting the head of the person wearing winter clothes snow as the we girls sled. (3) the man puts on a christmas coat, knitted with netting. (4) the man continues removing the snow on his car.\nAnswer: (4) the man continues removing the snow on his car.",
4 | "answer":"Reason: Snow must be removed from a car before one can drive it. Since there is some snow covering the car, so the man tends to remove it next."
5 | },
6 | {
7 | "question":"The man in the center is demonstrating a hairstyle on the person wearing the blue shirt. the man in the blue shirt \n (1) is standing on the sponge cutting the hair of the person wearing the blue shirt. (2) is doing the hairstyle with his hand and the hairspray. (3) sits on the chair next to the sink. (4) is being shown eye to eye.\nAnswer: (3) sits on the chair next to the sink.",
8 | "answer":"Reason: One should sit still when getting a haircut. People often sits down when they are get a haircut. From the context, we can get that the man in the blue shirt is getting a haircut, so he should sit down and wait."
9 | },
10 | {
11 | "question":"The roof is done and a view of the entire house is shown to show off the finished roof. the woman \n (1) is standing in front of the home, smiling while talking. (2) interviews the man again and leaves the room. (3) shows the soil with two lay-ups of shingle and applies a layer onto the top shingle. (4) stacks the bags on the side and begins putting stencils on the top.\nAnswer: (1) is standing in front of the home, smiling while talking.",
12 | "answer":"Reason: One usually feels pleased after finishing a home improvement project. If a person feels pleased, he tends to smile. Since the woman has finished all the work, she may be proud of it and feel pleased. Then she may smile."
13 | },
14 | {
15 | "question":"People practice ballet in a studio alone and in couples. then \n (1) a man begins dancing and top dancing standing on the ground. (2) a boy and a girl dance ballet, then a man enter and dance with the girl. (3) the band performs ballet in the studio and in the open gathered. (4) people dances and dances together, dancing.\nAnswer: (2) a boy and a girl dance ballet, then a man enter and dance with the girl.",
16 | "answer":"Reason: Ballet is a kind of dance. If people pratice ballet, they often dance with their partener or dance alone, which matches the description of option (2)."
17 | },
18 | {
19 | "question":"A person is seen standing on a tennis court bouncing a ball. another man \n (1) takes his turn speaking to the camera. (2) walks up hitting a birdie. (3) is seen standing ready on the other side in front of a large audience. (4) is holding the racket next to him.\nAnswer: (3) is seen standing ready on the other side in front of a large audience.",
20 | "answer":"Reason: The player who bounces the ball is the one who serves. In the tennis race, two players are located on opposite sides of the court, one is serving and the other is preparing to receive the ball. Since there is a person who serves, another one should stands ready."
21 | }
22 | ]
23 |
--------------------------------------------------------------------------------
/prompts/cot_answer/hella.json:
--------------------------------------------------------------------------------
1 | [
2 | {
3 | "question":"Then, the man writes over the snow covering the window of a car, and a woman wearing winter clothes smiles. then \n (1) the man adds wax to the windshield and cuts it. (2) a person board a ski lift, while two men supporting the head of the person wearing winter clothes snow as the we girls sled. (3) the man puts on a christmas coat, knitted with netting. (4) the man continues removing the snow on his car. ",
4 | "answer":"Snow must be removed from a car before one can drive it. Since there is some snow covering the car, so the man tends to remove it next. So the answer is: (4) the man continues removing the snow on his car."
5 | },
6 | {
7 | "question":"The man in the center is demonstrating a hairstyle on the person wearing the blue shirt. the man in the blue shirt \n (1) is standing on the sponge cutting the hair of the person wearing the blue shirt. (2) is doing the hairstyle with his hand and the hairspray. (3) sits on the chair next to the sink. (4) is being shown eye to eye. ",
8 | "answer":"One should sit still when getting a haircut. People often sits down when they are get a haircut. From the context, we can get that the man in the blue shirt is getting a haircut, so he should sit down and wait. So the answer is: (3) sits on the chair next to the sink. "
9 | },
10 | {
11 | "question":"The roof is done and a view of the entire house is shown to show off the finished roof. the woman \n (1) is standing in front of the home, smiling while talking. (2) interviews the man again and leaves the room. (3) shows the soil with two lay-ups of shingle and applies a layer onto the top shingle. (4) stacks the bags on the side and begins putting stencils on the top. ",
12 | "answer":"One usually feels pleased after finishing a home improvement project. If a person feels pleased, he tends to smile. Since the woman has finished all the work, she may be proud of it and feel pleased. Then she may smile. So the answer is: (1) is standing in front of the home, smiling while talking. "
13 | },
14 | {
15 | "question":"People practice ballet in a studio alone and in couples. then \n (1) a man begins dancing and top dancing standing on the ground. (2) a boy and a girl dance ballet, then a man enter and dance with the girl. (3) the band performs ballet in the studio and in the open gathered. (4) people dances and dances together, dancing. ",
16 | "answer":"Ballet is a kind of dance. If people pratice ballet, they often dance with their partener or dance alone, which matches the description of option (2). So the answer is: (2) a boy and a girl dance ballet, then a man enter and dance with the girl. "
17 | },
18 | {
19 | "question":"A person is seen standing on a tennis court bouncing a ball. another man \n (1) takes his turn speaking to the camera. (2) walks up hitting a birdie. (3) is seen standing ready on the other side in front of a large audience. (4) is holding the racket next to him. ",
20 | "answer":"The player who bounces the ball is the one who serves. In the tennis race, two players are located on opposite sides of the court, one is serving and the other is preparing to receive the ball. Since there is a person who serves, another one should stands ready. So the answer is: (3) is seen standing ready on the other side in front of a large audience. "
21 | }
22 | ]
23 |
--------------------------------------------------------------------------------
/prompts/rt_answer/hella.json:
--------------------------------------------------------------------------------
1 | [
2 | {
3 | "question":"Rationale: Snow must be removed from a car before one can drive it. Since there is some snow covering the car, so the man tends to remove it next.\nQuestion: Then, the man writes over the snow covering the window of a car, and a woman wearing winter clothes smiles. then \n (1) the man adds wax to the windshield and cuts it. (2) a person board a ski lift, while two men supporting the head of the person wearing winter clothes snow as the we girls sled. (3) the man puts on a christmas coat, knitted with netting. (4) the man continues removing the snow on his car. ",
4 | "answer":"Answer: (4) the man continues removing the snow on his car."
5 | },
6 | {
7 | "question":"Rationale: One should sit still when getting a haircut. People often sits down when they are get a haircut. From the context, we can get that the man in the blue shirt is getting a haircut, so he should sit down and wait.\nQuestion: The man in the center is demonstrating a hairstyle on the person wearing the blue shirt. the man in the blue shirt \n (1) is standing on the sponge cutting the hair of the person wearing the blue shirt. (2) is doing the hairstyle with his hand and the hairspray. (3) sits on the chair next to the sink. (4) is being shown eye to eye. ",
8 | "answer":"Answer: (3) sits on the chair next to the sink. "
9 | },
10 | {
11 | "question":"Rationale: One usually feels pleased after finishing a home improvement project. If a person feels pleased, he tends to smile. Since the woman has finished all the work, she may be proud of it and feel pleased. Then she may smile.\nQuestion: The roof is done and a view of the entire house is shown to show off the finished roof. the woman \n (1) is standing in front of the home, smiling while talking. (2) interviews the man again and leaves the room. (3) shows the soil with two lay-ups of shingle and applies a layer onto the top shingle. (4) stacks the bags on the side and begins putting stencils on the top. ",
12 | "answer":"Answer: (1) is standing in front of the home, smiling while talking. "
13 | },
14 | {
15 | "question":"Rationale: Ballet is a kind of dance. If people pratice ballet, they often dance with their partener or dance alone, which matches the description of option (2).\nQuestion: People practice ballet in a studio alone and in couples. then \n (1) a man begins dancing and top dancing standing on the ground. (2) a boy and a girl dance ballet, then a man enter and dance with the girl. (3) the band performs ballet in the studio and in the open gathered. (4) people dances and dances together, dancing. ",
16 | "answer":"Answer: (2) a boy and a girl dance ballet, then a man enter and dance with the girl."
17 | },
18 | {
19 | "question":"Rationale: The player who bounces the ball is the one who serves. In the tennis race, two players are located on opposite sides of the court, one is serving and the other is preparing to receive the ball. Since there is a person who serves, another one should stands ready.\nQuestion: A person is seen standing on a tennis court bouncing a ball. another man \n (1) takes his turn speaking to the camera. (2) walks up hitting a birdie. (3) is seen standing ready on the other side in front of a large audience. (4) is holding the racket next to him. ",
20 | "answer":"Answer: (3) is seen standing ready on the other side in front of a large audience. "
21 | }
22 | ]
23 |
--------------------------------------------------------------------------------
/prompts/sr_answer/csqa.json:
--------------------------------------------------------------------------------
1 | [
2 | {
3 | "question":"Google Maps and other highway and street GPS services have replaced what? \n (1) atlas (2) mexico (3) countryside (4) united states (5) oceans\nRational: Electronic maps and GPS services are the modern version of paper atlas. In that case, the atlas have been replaced by Google Maps and other highway and street GPS services. So the answer is: (1) atlas. Feedback: The rational is good and do not need to change.",
4 | "answer":"Electronic maps and GPS services are the modern version of paper atlas. In that case, the atlas have been replaced by Google Maps and other highway and street GPS services. So the answer is: (1) atlas. "
5 | },
6 | {
7 | "question":"The fox walked from the city into the forest, what was it looking for? \n (1) pretty flowers. (2) hen house (3) natural habitat (4) storybook (5) dense forest\nRational: Since the fox walk from the city into the forest, he may looks for something in the forest. The forest usually contains a lot of pretty flowers. So the answer is: (1) pretty flowers. Feedback: The rational is wrong. Since the flowers is also located in city, the fox does not need to leave city for the flower.",
8 | "answer":"Since the fox walk from the city into the forest, he may looks for something in the forest but not in the city. From all of the options, the natural habitat are usually away from cities. So the answer is: (3) natural habitat."
9 | },
10 | {
11 | "question":"You can share files with someone if you have a connection to a what? \n (1) freeway (2) radio (3) wires (4) computer network (5) electrical circuit\nRational: Files usually can be stored in the computers and shared on it. Thus, we need connect the computer to electrical circuit to turn it on. So the answer is: (5) electrical circuit. Feedback: The rational is wrong. We can share files through the Internet, so we must connect to the network but not electrical circuit.",
12 | "answer":"Files usually can be stored in the computers. In that case, we can share them over the Internet. Thus, if we connect to a computer network, we can share the file with others. So the answer is: (4) computer network."
13 | },
14 | {
15 | "question":"Too many people want exotic snakes. The demand is driving what to carry them? \n (1) ditch (2) shop (3) north america (4) outdoors (5) pet shops\nRational: Exotic snakes are usually located in outdoors. So the answer is: (4) outdoors. Feedback: The rational is wrong. The question asks fot what to carry the snake, but the outdoors can not carry snake and meet the demand.",
16 | "answer":"If people want exotic snakes, they may like to raise snakes as pets. If there is a demand for snakes as pets, pet shops will be pushed to carry them, in order to make more money. So the answer is: (5) pet shops. "
17 | },
18 | {
19 | "question":"The body guard was good at his duties, he made the person who hired him what? \n (1) better job (2) feel safe (3) irritated (4) save money (5) headache\nRational: The job of body guards is to ensure the safety and security of the employer. People ususally hire the body guard to make themselves safe. So the answer is: (2) feel safe. Feedback: The rational is good and do not need to change.",
20 | "answer":"The job of body guards is to ensure the safety and security of the employer. People ususally hire the body guard to make themselves safe. So the answer is: (2) feel safe. "
21 | }
22 | ]
23 |
24 |
--------------------------------------------------------------------------------
/prompts/sr_feedback/hella.json:
--------------------------------------------------------------------------------
1 | [
2 | {
3 | "question":"Then, the man writes over the snow covering the window of a car, and a woman wearing winter clothes smiles. then \n (1) the man adds wax to the windshield and cuts it. (2) a person board a ski lift, while two men supporting the head of the person wearing winter clothes snow as the we girls sled. (3) the man puts on a christmas coat, knitted with netting. (4) the man continues removing the snow on his car.\nRationale: Snow must be removed from a car before one can drive it. Since there is some snow covering the car, so the man tends to remove it next. So the answer is: (4) the man continues removing the snow on his car.",
4 | "answer":"Feedback: The rational is good and do not need to change."
5 | },
6 | {
7 | "question":"The man in the center is demonstrating a hairstyle on the person wearing the blue shirt. the man in the blue shirt \n (1) is standing on the sponge cutting the hair of the person wearing the blue shirt. (2) is doing the hairstyle with his hand and the hairspray. (3) sits on the chair next to the sink. (4) is being shown eye to eye.\nRationale: Since the man is doing a hair cut, he need to do the hairstyle first. So the answer is: (2) is doing the hairstyle with his hand and the hairspray.",
8 | "answer":"Feedback: The rational is wrong. Since the man in the blue shirt man is been demonstrating, he is the person that getting a hair cut not doing a hair cut. Thus, he does not need to doing the hairstyle."
9 | },
10 | {
11 | "question":"The roof is done and a view of the entire house is shown to show off the finished roof. the woman \n (1) is standing in front of the home, smiling while talking. (2) interviews the man again and leaves the room. (3) shows the soil with two lay-ups of shingle and applies a layer onto the top shingle. (4) stacks the bags on the side and begins putting stencils on the top.\nRationale: One usually feels pleased after finishing a home improvement project. If a person feels pleased, he tends to smile. Since the woman has finished all the work, she may be proud of it and feel pleased. Then she may smile. So the answer is: (1) is standing in front of the home, smiling while talking.",
12 | "answer":"Feedback: The rational is good and do not need to change."
13 | },
14 | {
15 | "question":"People practice ballet in a studio alone and in couples. then \n (1) a man begins dancing and top dancing standing on the ground. (2) a boy and a girl dance ballet, then a man enter and dance with the girl. (3) the band performs ballet in the studio and in the open gathered. (4) people dances and dances together, dancing.\nRationale: Since people are practicing ballet, they may dance together. So the answer is: (4) people dances and dances together, dancing.",
16 | "answer":"Feedback: The rational is wrong. Ballet is a kind of dance that plays with their partener or dance alone. Thus, it is unlikely that people dance together."
17 | },
18 | {
19 | "question":"A person is seen standing on a tennis court bouncing a ball. another man \n (1) takes his turn speaking to the camera. (2) walks up hitting a birdie. (3) is seen standing ready on the other side in front of a large audience. (4) is holding the racket next to him.\nRationale: Since a person is bouncing a ball on a tennis court, they may play tennis. Thus, another man should hold a racket to play it. So the answer is: (4) is holding the racket next to him.",
20 | "answer":"Feedback: The rational is wrong. In the tennis race, two players are located on opposite sides of the court, thus, it is impossible that another man stand next to him."
21 | }
22 | ]
23 |
--------------------------------------------------------------------------------
/prompts/sr_answer/siqa.json:
--------------------------------------------------------------------------------
1 | [
2 | {
3 | "question":"Quinn wanted to help me clean my room up because it was so messy. What will Quinn want to do next? \n (1) Eat messy snacks (2) help out a friend (3) Pick up the dirty clothes\nRationale: Since Quinn wanted to help me clean my room, she want to help out a friend. So the answer is: (2) help out a friend. Feedback: The rational is wrong. Since the question asks what to do 'next'. A more reasonable guess is to help organize clothes, etc., because the house is very messy",
4 | "answer": "Quinn want to clean the room up. Picking up the dirty clothes is one way to clean the room. Thus, quinn will want to pick up the dirty clothes next. So the answer is: (3) Pick up the dirty clothes"
5 | },
6 | {
7 | "question":"Sasha's mom passed out in the middle of the party. Aubrey took Sasha's mom to the hospital. What will Aubrey want to do next? \n (1) help Aubrey go back home (2) keep on partying without the mom (3) going on with the mom\nRationale: Sasha's mom is sent to the hospital beacuse she passes out. After that, she becomes a patient. A patient needs to be cared for. Aubrey may need to take care of her. Thus, Aubrey will go on with the mom. So the answer is: (3) going on with the mom. Feedback: The rational is good and do not need to change.",
8 | "answer":"Sasha's mom is sent to the hospital beacuse she passes out. After that, she becomes a patient. A patient needs to be cared for. Aubrey may need to take care of her. Thus, Aubrey will go on with the mom. So the answer is: (3) going on with the mom"
9 | },
10 | {
11 | "question":"Their cat kept trying to escape out of the window, so Jan placed an obstacle in the way. How would Jan feel afterwards? \n (1) scared of losing the cat (2) normal (3) relieved for fixing the problem\nRationale: Since Jan placed an obstacle, the cat can not escape. Thus, there is no problem about losing the cat, and Jan may feel normal. So the answer is: (2) normal. Feedback: The rational is wrong. If a problem is been solved, people will feel relaxed and relieved. Thus, Jan will fell relieved for fixing the problem.",
12 | "answer":"The cat try to escape so Jan need to stop it to avoid losing the cat. Jan place an obstacle in the way so the cat can not escape. The problem is been solved. Thus, Jan will fell relieved for fixing the problem. So the answer is: (3) relieved for fixing the problem"
13 | },
14 | {
15 | "question":"Sydney had so much pent up emotion, they burst into tears at work. How would Sydney feel afterwards? \n (1) affected (2) like they released their tension (3) worse\nRationale: Crying is often a way to change one's mood.Since Sydney burst into tears at work, her mood will be affected So the answer is: (1) affected Feedback: The rational is wrong. Crying is often a way to release tension. The word 'affected' is too vague, it's more reasonable that Sydeny get released beacause crying.",
16 | "answer":"Crying is often a way to release tension. Sydney burst into tears at work. Thus, she would release the tension. So the answer is: (2) like they released their tension"
17 | },
18 | {
19 | "question":"Sydney got a raise and a new promotion. What does Sydney need to do before this? \n (1) be bad at her job (2) do a good job (3) be lazy\nRationale: To get a raise and a new promotion, Sydney needs to work hard and do a good job. Sydney got a raise and a new promotion. Thus, she need to do a good job before this. So the answer is: (2) do a good job Feedback: The rational is good and do not need to change.",
20 | "answer":"To get a raise and a new promotion, Sydney needs to work hard and do a good job. Sydney got a raise and a new promotion. Thus, she need to do a good job before this. So the answer is: (2) do a good job"
21 | }
22 | ]
23 |
24 |
25 |
--------------------------------------------------------------------------------
/prompts/cons_answer/wino.json:
--------------------------------------------------------------------------------
1 | [
2 | {
3 | "question":"The test was hard for Samuel but a breeze for Randy , since _ had failed to study for it.\n(1) Samuel (2) Randy",
4 | "answer":"Explanation: To pass a test, a person need to study for it. If a person feel the test like a breeze, it means the test is easy for him. A person feels the test easy, because he studies hard for it. Since we know that Samuel feel the test very hard, she may fail to study for it.\nAnswer: (1) Samuel.\nWrong Explanation: The test was a breeze for Randy, so he try to study for it. Thus, Randy is the man who fail to study.\nWrong Answer: (2) Randy."
5 | },
6 | {
7 | "question":"Kyle slowly wormed their way into the life of Derrick, because _ was good and manipulating people.\n(1) Kyle (2) Derrick",
8 | "answer":"Explanation: A person wormes his way into other's life, because he is friendly and approachable. A friendly person is considered good. A person is seen as manipulating people, that means he like to interact with others and others like him. Since Kyle slowly wormed their way into the life of Derric, Kyle will be seen as good and manipulating people.\nAnswer: (1) Kyle.\nWrong Explanation: A person wormes his way into other's life, because he is friendly and approachable. A person is seen as manipulating people, that means he like to interact with others. Since Kyle slowly wormed their way into the life of Derric, Derric will be seen as good and manipulating people.\nWrong Answer: (2) Derric."
9 | },
10 | {
11 | "question":"Donald was very grounded but Michael often got lost in their daydreams. _ was very capricious all the time.\n(1) Donald (2) Michael",
12 | "answer":"Explanation: A person is grounded means he works hard and does not like to fantasize. A person often gets lost in his daydreams, he is seen as unrealistic and egocentric. A person is capricious all the time means he does everything only according to his own ideas. Since Michael often gets lost in their daydreams but Donald does not, Michael is seen as very capricious all the time.\nAnswer: (2) Michael.\nWrong Explanation: A person is grounded means he works hard and does not like to fantasize. A person often gets lost in his daydreams, he is seen as unrealistic and egocentric. A person is capricious all the time means he does everything only according to his own ideas. Since Michael often gets lost in their daydreams but Donald does not, Donald is seen as very capricious all the time.\nWrong Answer: (1) Donald."
13 | },
14 | {
15 | "question":"After stopping when running, Betty was able to help Rachel treat their asthma because _ has it too.\n(1) Betty (2) Rachel",
16 | "answer":"Explanation: A person who has a mild disease may knows how to treat it. Since Since Betty was able to help Rachel treat their asthma, Betty may has it, too.\nAnswer: (1) Betty.\nWrong Explanation: A person who has a mild disease may knows how to treat it. Since Betty was able to help Rachel treat their asthma, Rachel has asthma.\nWrong Answer: (2) Rachel."
17 | },
18 | {
19 | "question":"Jeffrey was sick with the stomach flu and not hungry, but Christopher was starving. _ ordered food.\n(1) Jeffrey (2) Christopher",
20 | "answer":"Explanation: Stomach flu is a viral infection in the digestive system. A person is starve so that he need to eat something next. A person is not hungry means he does not want to eat something. Ordering food means to buy food online. A person may eat food after ordering it. Since Christopher was starving and Jeffery was sick and not hungry, Christopher tended to order food.\nAnswer: (2) Christopher.\nWrong Explanation: Since Jeffrey was sick, he may need to eat something to recover. Therefore, Jeffrey is the man who ordered food.\nWrong Answer: (1) Jeffrey"
21 | }
22 | ]
23 |
24 |
25 |
--------------------------------------------------------------------------------
/cal_token_cost.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | import json
3 | import os
4 | from prompts.wrap_prompt import LlamaPrompter
5 | parser = argparse.ArgumentParser()
6 | parser.add_argument('--model', type=str, default='Llama-2-13b-chat-hf')
7 | parser.add_argument('--dataset', type=str, default='csqa')
8 | args = parser.parse_args()
9 |
10 | model_name = args.model
11 | dataset = args.dataset
12 |
13 |
14 | def get_cost(task):
15 | if task == 'direct':
16 | path = f'./result/{dataset}/{model_name}_direct_answer_dev_2000.json'
17 | elif task == 'cot':
18 | path = f'./result/{dataset}/{model_name}_cot_answer_dev_2000_greedy.json'
19 | elif task == 'sc':
20 | path = f'./result/{dataset}/{model_name}_sc_2000.json'
21 | elif task == 'l2m':
22 | path = f'./result/{dataset}/{model_name}_l2m_question_dev_2000.json'
23 | elif task == 'sr':
24 | path = f'./result/{dataset}/{model_name}_self_refine_dev_2000.json'
25 | elif task == 'cont':
26 | path = f'./result/{dataset}/{model_name}_cons_answer_dev_2000.json'
27 | else:
28 | path = f'./result/{dataset}/res.json'
29 |
30 |
31 | if not os.path.exists(path):
32 | return -1
33 | with open(path, 'r') as f:
34 | data = json.load(f)[:-1]
35 | f.close()
36 | cnt = 0
37 | sum_cost = 0
38 | for msg in data:
39 | question = msg['question']
40 | if task in ['sc', 'res', 'cot']:
41 | prompter = LlamaPrompter(dataset, 'cot_answer')
42 | question = prompter.wrap_input(question, icl_cnt=5)
43 | elif task == 'sr':
44 | prompter = LlamaPrompter(dataset, 'sr_feedback')
45 | question = prompter.wrap_input(question, icl_cnt=5)
46 | prompter = LlamaPrompter(dataset, 'sr_answer')
47 | question = prompter.wrap_input(question, icl_cnt=5)
48 | elif task == 'l2m':
49 | prompter = LlamaPrompter(dataset, 'l2m_question')
50 | question = prompter.wrap_input(question, icl_cnt=5)
51 | prompter = LlamaPrompter(dataset, 'l2m_mid_answer')
52 | question = prompter.wrap_input(question, icl_cnt=5)
53 | prompter = LlamaPrompter(dataset, 'l2m_final_answer')
54 | question = prompter.wrap_input(question, icl_cnt=5)
55 | elif task == 'cont':
56 | prompter = LlamaPrompter(dataset, 'cons_answer')
57 | question = prompter.wrap_input(question, icl_cnt=5)
58 | else:
59 | prompter = LlamaPrompter(dataset, 'cot_answer')
60 | question += prompter.wrap_input(question, icl_cnt=5)
61 |
62 | if task == 'sc':
63 | result = ""
64 | for text in msg['answer']:
65 | result += text
66 | elif task == 'our':
67 | result = msg['answer'] * 2
68 | else:
69 | result = msg['answer']
70 |
71 | cost = len(question + result)
72 | sum_cost += cost
73 | cnt += 1
74 | return sum_cost / cnt
75 |
76 |
77 | test_data = []
78 | task_ls = ['cot','sc', 'sr', 'l2m', 'cont', 'our']
79 | for task in task_ls:
80 | cost = get_cost(task)
81 | print(f'{task}: Avg cost:{cost}')
82 | # if dataset == 'csqa':
83 | # index1 = [41,49,158,161,174,244,276,283,286,297,386,394,402,413,424,431,441,443,457,523,539,652,700,709,754,869,881,898,939,946]
84 | # index2 = [36,331,379,395,521,525,527,599,654,826,893,913,998]
85 | # elif dataset == 'wino':
86 | # index1 = [7,15,50,53,97,108,119,121,132,201,207,209,235,253,284,285,307,320,338,342,347,387,390,426,453,467,475,478,482,490,498]
87 | # index2 = [40,47,73,175,180,185,197,232,255,266,274,306,316,327,333,409,423,427,433,444,454,481,493]
88 | # task_ls = ['cot', 'res', 'sps', 'riders']
89 | # for task in task_ls:
90 | # acc1 = get_drift_acc(task, index1)
91 | # acc2 = get_drift_acc(task, index2)
92 | # print(f'{task}: Type1:{acc1} Type2:{acc2}')
--------------------------------------------------------------------------------
/prompts/l2m_final_answer/hella.json:
--------------------------------------------------------------------------------
1 | [
2 | {
3 | "question":"Then, the man writes over the snow covering the window of a car, and a woman wearing winter clothes smiles. then \n (1) the man adds wax to the windshield and cuts it. (2) a person board a ski lift, while two men supporting the head of the person wearing winter clothes snow as the we girls sled. (3) the man puts on a christmas coat, knitted with netting. (4) the man continues removing the snow on his car.\nQuestion 1: What happens to the car? Answer: Since the man writes over the snow covring the window of a car, the car is covered by snow.\nQuestion 2: What will the man do next? Answer: Snow must be removed from a car before one can drive it. Since there is some snow covering the car, so the man tends to remove it next.",
4 | "answer":"So the answer is: (4) the man continues removing the snow on his car."
5 | },
6 | {
7 | "question":"The man in the center is demonstrating a hairstyle on the person wearing the blue shirt. the man in the blue shirt \n (1) is standing on the sponge cutting the hair of the person wearing the blue shirt. (2) is doing the hairstyle with his hand and the hairspray. (3) sits on the chair next to the sink. (4) is being shown eye to eye.\nQuestion 1: What is the man in blue shirt doing? Answer: Since he is been demonstrating a hairstyle, he is getting a hair cut now.\nQuestion 2: What will the man in blue shirt do next? Answer: If the man is getting a hair cut, he needs to wash his head. Thus, he will sit on the chair next to the sink and wait to be washed.",
8 | "answer":"So the answer is: (3) sits on the chair next to the sink. "
9 | },
10 | {
11 | "question":"The roof is done and a view of the entire house is shown to show off the finished roof. the woman \n (1) is standing in front of the home, smiling while talking. (2) interviews the man again and leaves the room. (3) shows the soil with two lay-ups of shingle and applies a layer onto the top shingle. (4) stacks the bags on the side and begins putting stencils on the top.\nQuestion 1: What does the woman feel if the roof is done? Answer: If the roof is done, the woman will feel happy, proud and satisfied.\nQuestion 2: What will the woman do if she feel like that? Answer: If the woman feels happy, she tends to smile.",
12 | "answer":"So the answer is: (1) is standing in front of the home, smiling while talking. "
13 | },
14 | {
15 | "question":"People practice ballet in a studio alone and in couples. then \n (1) a man begins dancing and top dancing standing on the ground. (2) a boy and a girl dance ballet, then a man enter and dance with the girl. (3) the band performs ballet in the studio and in the open gathered. (4) people dances and dances together, dancing.\nQuestion 1: What are people doing in this scenario? Answer: People are practicing ballet according to the context.\nQuestion 2: What likely to happen next? Answer: If people pratice ballet, they often dance with their partener or dance alone. Thus, there may be a boy and a girl dance ballet.",
16 | "answer":"So the answer is: (2) a boy and a girl dance ballet, then a man enter and dance with the girl. "
17 | },
18 | {
19 | "question":"A person is seen standing on a tennis court bouncing a ball. another man \n (1) takes his turn speaking to the camera. (2) walks up hitting a birdie. (3) is seen standing ready on the other side in front of a large audience. (4) is holding the racket next to him.\nQuestion 1: What are the two man doing in this scenario? Answer: Since a person is bouncing a ball on a tennis court, they are having a tennis match.\nQuestion 2: What will another man do next? Answer: In the tennis race, two players are located on opposite sides of the court, one is serving and the other is preparing to receive the ball. Since there is a person who serves, another one should stands ready.",
20 | "answer":"So the answer is: (3) is seen standing ready on the other side in front of a large audience. "
21 | }
22 | ]
23 |
--------------------------------------------------------------------------------
/prompts/cons_answer/hella.json:
--------------------------------------------------------------------------------
1 | [
2 | {
3 | "question":"Then, the man writes over the snow covering the window of a car, and a woman wearing winter clothes smiles. then \n (1) the man adds wax to the windshield and cuts it. (2) a person board a ski lift, while two men supporting the head of the person wearing winter clothes snow as the we girls sled. (3) the man puts on a christmas coat, knitted with netting. (4) the man continues removing the snow on his car. ",
4 | "answer":"Explanation: Snow must be removed from a car before one can drive it. Since there is some snow covering the car, so the man tends to remove it next.\nAnswer: (4) the man continues removing the snow on his car.\nWrong Explanation: Since there is snow, the weather is snowy. Thus, the man will puts on a coat to avoid being cold.\nWrong Answer: (3) the man puts on a christmas coat, knitted with netting."
5 | },
6 | {
7 | "question":"The man in the center is demonstrating a hairstyle on the person wearing the blue shirt. the man in the blue shirt \n (1) is standing on the sponge cutting the hair of the person wearing the blue shirt. (2) is doing the hairstyle with his hand and the hairspray. (3) sits on the chair next to the sink. (4) is being shown eye to eye. ",
8 | "answer":"Explanation: One should sit still when getting a haircut. People often sits down when they are get a haircut. From the context, we can get that the man in the blue shirt is getting a haircut, so he should sit down and wait.\nAnswer: (3) sits on the chair next to the sink.\nWrong Explanation: Since the man is doing a hair cut, he need to do the hairstyle first.\nWrong Answer: (2) is doing the hairstyle with his hand and the hairspray."
9 | },
10 | {
11 | "question":"The roof is done and a view of the entire house is shown to show off the finished roof. the woman \n (1) is standing in front of the home, smiling while talking. (2) interviews the man again and leaves the room. (3) shows the soil with two lay-ups of shingle and applies a layer onto the top shingle. (4) stacks the bags on the side and begins putting stencils on the top. ",
12 | "answer":"Explanation: One usually feels pleased after finishing a home improvement project. If a person feels pleased, he tends to smile. Since the woman has finished all the work, she may be proud of it and feel pleased. Then she may smile.\nAnswer: (1) is standing in front of the home, smiling while talking. \nWrong Explanation: Since the roof was done, the woman will probably repair some other thing.\nWrong Answer: (4) stacks the bags on the side and begins putting stencils on the top."
13 | },
14 | {
15 | "question":"People practice ballet in a studio alone and in couples. then \n (1) a man begins dancing and top dancing standing on the ground. (2) a boy and a girl dance ballet, then a man enter and dance with the girl. (3) the band performs ballet in the studio and in the open gathered. (4) people dances and dances together, dancing. ",
16 | "answer":"Explanation: Ballet is a kind of dance. If people pratice ballet, they often dance with their partener or dance alone, which matches the description of option (2).\nAnswer: (2) a boy and a girl dance ballet, then a man enter and dance with the girl.\nWrong Explanation: Since people are practicing ballet, they may dance together.\nWrong Answer: (4) people dances and dances together, dancing."
17 | },
18 | {
19 | "question":"A person is seen standing on a tennis court bouncing a ball. another man \n (1) takes his turn speaking to the camera. (2) walks up hitting a birdie. (3) is seen standing ready on the other side in front of a large audience. (4) is holding the racket next to him. ",
20 | "answer":"Explanation: The player who bounces the ball is the one who serves. In the tennis race, two players are located on opposite sides of the court, one is serving and the other is preparing to receive the ball. Since there is a person who serves, another one should stands ready.\nAnswer: (3) is seen standing ready on the other side in front of a large audience.\nWrong Explanation: Since a person is bouncing a ball on a tennis court, they may play tennis. Thus, another man should hold a racket to play it.\nWrong Answer: (4) is holding the racket next to him."
21 | }
22 | ]
23 |
--------------------------------------------------------------------------------
/config.py:
--------------------------------------------------------------------------------
1 | #data_path
2 | csqa_train_data_path = './data/CommonsenseQA/train_rand_split.jsonl'
3 | csqa_dev_data_path = './data/CommonsenseQA/dev_rand_split.jsonl'
4 |
5 | wino_train_data_path = './data/winogrande_1.1/train_l.jsonl'
6 | wino_dev_data_path = './data/winogrande_1.1/dev.jsonl'
7 |
8 | hella_train_data_path = './data/hellaswag/hellaswag_train.jsonl'
9 | hella_dev_data_path = './data/hellaswag/hellaswag_val.jsonl'
10 |
11 | siqa_train_data_path = './data/SocialIQA/train.jsonl'
12 | siqa_train_label_path = './data/SocialIQA/train-labels.lst'
13 | siqa_dev_data_path = './data/SocialIQA/dev.jsonl'
14 | siqa_dev_label_path = './data/SocialIQA/dev-labels.lst'
15 |
16 | piqa_train_data_path = './data/PIQA/train.jsonl'
17 | piqa_train_label_path = './data/PIQA/train-labels.lst'
18 | piqa_dev_data_path = './data/PIQA/valid.jsonl'
19 | piqa_dev_label_path = './data/PIQA/valid-labels.lst'
20 |
21 | gsm8k_train_data_path = './data/grade-school-math/grade_school_math/data/train.jsonl'
22 | gsm8k_dev_data_path = './data/grade-school-math/grade_school_math/data/test.jsonl'
23 |
24 | strategy_data_path = './data/strategyqa_dataset/strategyqa_train_filtered.jsonl'
25 |
26 | #W&C_index
27 | #wino c2w 180
28 | wino_c2w_index = [4, 7, 13, 15, 18, 27, 36, 40, 41, 47, 50, 53, 60, 69, 71, 73, 76, 80, 84, 97, 100, 108, 113, 114, 119, 121, 132, 151, 158, 160, 171, 175, 180, 183, 185, 189, 197, 199, 201, 206, 207, 209, 232, 235, 245, 253, 255, 266, 272, 274, 284, 285, 292, 306, 307, 316, 320, 323, 327, 333, 338, 342, 347, 381, 387, 390, 393, 407, 409, 418, 423, 426, 427, 433, 439, 444, 453, 454, 455, 459, 467, 473, 475, 478, 479, 481, 482, 490, 493, 498, 512, 518, 525, 529, 531, 535, 538, 543, 557, 560, 568, 573, 574, 580, 582, 595, 597, 600, 605, 610, 620, 627, 638, 640, 646, 654, 661, 666, 677, 678, 686, 689, 693, 695, 710, 711, 712, 714, 721, 733, 735, 739, 740, 745, 752, 753, 759, 760, 766, 768, 772, 774, 776, 780, 782, 798, 808, 819, 824, 831, 836, 842, 848, 849, 861, 868, 869, 872, 873, 882, 893, 903, 911, 916, 920, 927, 928, 930, 943, 960, 962, 967, 973, 976, 977, 979, 981, 984, 995, 997]
29 | #wino w2c 177
30 | wino_w2c_index = [1, 2, 20, 22, 23, 32, 34, 38, 42, 46, 54, 59, 62, 65, 77, 79, 81, 89, 91, 96, 99, 101, 103, 104, 112, 122, 124, 127, 142, 144, 145, 170, 179, 182, 190, 191, 198, 205, 210, 212, 213, 215, 229, 239, 243, 247, 256, 265, 276, 279, 291, 294, 297, 303, 311, 328, 335, 348, 349, 353, 356, 358, 360, 370, 371, 372, 379, 380, 384, 388, 401, 405, 414, 435, 437, 441, 442, 443, 452, 458, 462, 464, 470, 474, 484, 491, 505, 507, 509, 510, 514, 517, 520, 523, 528, 532, 534, 544, 550, 554, 555, 564, 566, 576, 585, 591, 603, 606, 617, 621, 623, 629, 633, 639, 641, 644, 648, 655, 663, 665, 672, 681, 685, 688, 691, 697, 702, 707, 719, 743, 744, 747, 748, 750, 763, 767, 778, 790, 803, 816, 820, 825, 826, 829, 830, 841, 843, 853, 856, 871, 875, 876, 880, 889, 891, 897, 899, 901, 902, 904, 907, 909, 910, 914, 915, 921, 941, 942, 944, 948, 956, 958, 965, 972, 974, 980, 993]
31 | #csqa c2w 75
32 | csqa_c2w_index = [10, 24, 36, 41, 49, 137, 149, 158, 161, 174, 177, 193, 219, 220, 231, 244, 276, 283, 286, 297, 303, 308, 331, 340, 355, 379, 386, 394, 395, 402, 413, 424, 431, 441, 443, 450, 457, 467, 488, 521, 523, 525, 527, 539, 599, 604, 645, 652, 654, 685, 700, 709, 738, 754, 770, 795, 825, 826, 858, 869, 881, 893, 898, 903, 910, 913, 925, 929, 930, 939, 940, 946, 955, 993, 998]
33 | #csqa w2c 114
34 | csqa_w2c_index = [2, 5, 7, 14, 26, 31, 34, 35, 48, 58, 66, 75, 92, 96, 103, 109, 122, 125, 126, 127, 175, 184, 185, 186, 191, 200, 209, 218, 245, 247, 248, 249, 250, 253, 260, 267, 274, 293, 295, 314, 322, 324, 356, 363, 364, 370, 376, 380, 385, 387, 398, 412, 429, 438, 446, 513, 516, 524, 532, 543, 550, 566, 567, 588, 590, 592, 593, 594, 601, 602, 607, 616, 622, 624, 628, 633, 639, 640, 644, 646, 659, 673, 705, 713, 718, 721, 723, 744, 747, 755, 756, 758, 760, 768, 771, 776, 781, 791, 805, 818, 827, 829, 832, 835, 845, 851, 860, 892, 895, 927, 932, 956, 957, 972]
35 | OPENAI_API_KEY = 'fGBdIFoaDUeLQ'
36 | # OPENAI_API_KEY = 'sk-i7t4FKCdavAisTCWFc2f9737854348F29d17C9E7De2e9d9e'
37 | max_requests_per_minute = 3500 # 3_000 * 0.5
38 | max_tokens_per_minute = 90000 #250_000 * 0.5
39 |
40 | # max_requests_per_minute = 60 # 3_000 * 0.5
41 | # max_tokens_per_minute = 60000 #250_000 * 0.5
42 | # request_url = "https://api.openai.com/v1/chat/completions"
43 | request_url = 'https://ai.liaobots.work/v1/chat/completions'
44 | # request_url = 'https://api.xty.app/v1/chat/completions'
45 |
--------------------------------------------------------------------------------
/prompts/wrap_prompt.py:
--------------------------------------------------------------------------------
1 | import json
2 |
3 | class Prompter():
4 | def __init__(self, dataset, task) -> None:
5 | self.dataset = dataset
6 | self.task = task
7 | self.name = None
8 | pass
9 |
10 | def load_sys_instruction(self):
11 | sys_msg_path = f'./prompts/{self.task}/sys_instruction.json'
12 | with open(sys_msg_path, 'r') as f:
13 | sys_msg = json.load(f)
14 | sys_msg = sys_msg['instruction']
15 | sys_msg = self.wrap_msg(sys_msg, 'sys')
16 | return sys_msg
17 |
18 | def wrap_msg(self, msg, role):
19 | pass
20 |
21 |
22 | def load_examples(self, icl_cnt):
23 | if icl_cnt == 0:
24 | return ""
25 | else:
26 | example_path = f'./prompts/{self.task}/{self.dataset}.json'
27 | with open(example_path, 'r') as f:
28 | examples = json.load(f)
29 | if self.name in ['Baichuan','Mistral']:
30 | example_seq = []
31 | else:
32 | example_seq = ""
33 | cnt = 0
34 | for example in examples:
35 | user_msg = example['question']
36 | model_msg = example['answer']
37 | user_msg = self.wrap_msg(user_msg, 'user')
38 | model_msg = self.wrap_msg(model_msg, 'model')
39 | example_seq += user_msg + model_msg
40 | cnt += 1
41 | if cnt >= icl_cnt:
42 | break
43 | return example_seq
44 |
45 |
46 | def wrap_input(self, msg, icl_cnt):
47 | sys_msg = self.load_sys_instruction()
48 | example_msg = self.load_examples(icl_cnt=icl_cnt)
49 | user_msg = self.wrap_msg(msg, 'user')
50 | if self.name == 'Mistral':
51 | example_msg[0]['content'] = sys_msg[0]['content'] + example_msg[0]['content']
52 | return example_msg + user_msg
53 | else:
54 | return sys_msg + example_msg + user_msg
55 |
56 |
57 | class LlamaPrompter(Prompter):
58 | def __init__(self, dataset, task) -> None:
59 | super().__init__(dataset=dataset, task=task)
60 | self.sys_prompt = "[INST] <>\n{}\n<>\n\n"
61 | self.user_prompt = "{} [/INST] "
62 | self.model_prompt = "{} [INST] "
63 | self.name = 'Llama'
64 |
65 | def wrap_msg(self, msg, role):
66 | if role == 'sys':
67 | msg = self.sys_prompt.format(msg)
68 | elif role == 'user':
69 | msg = self.user_prompt.format(msg)
70 | else:
71 | msg = self.model_prompt.format(msg)
72 | return msg
73 |
74 | class VicunaPrompter(Prompter):
75 | def __init__(self, dataset, task) -> None:
76 | super().__init__(dataset=dataset, task=task)
77 | self.sys_prompt = "{}\n"
78 | self.user_prompt = "{} "
79 | self.model_prompt = "{}\n"
80 | self.name = ""
81 |
82 | def wrap_msg(self, msg, role):
83 | if role == 'sys':
84 | msg = self.sys_prompt.format(msg)
85 | elif role == 'user':
86 | msg = self.user_prompt.format(msg)
87 | else:
88 | msg = self.model_prompt.format(msg)
89 | return msg
90 |
91 |
92 | class GPTPrompter(Prompter):
93 | def __init__(self, dataset, task) -> None:
94 | super().__init__(dataset, task)
95 | self.name = 'Baichuan'
96 |
97 | def wrap_msg(self, msg, role):
98 | if role == 'sys':
99 | msg = [{"role":"system", "content": msg}]
100 | elif role == 'user':
101 | msg = [{"role":"user", "content": msg}]
102 | else:
103 | msg = [{"role":"assistant", "content": msg}]
104 | return msg
105 |
106 | class MistralPrompter(Prompter):
107 | def __init__(self, dataset, task) -> None:
108 | super().__init__(dataset, task)
109 | self.name = 'Mistral'
110 |
111 | def wrap_msg(self, msg, role):
112 | if role == 'sys':
113 | msg = [{"role":"user", "content": msg}]
114 | elif role == 'user':
115 | msg = [{"role":"user", "content": msg}]
116 | else:
117 | msg = [{"role":"assistant", "content": msg}]
118 | return msg
--------------------------------------------------------------------------------
/prompts/sr_answer/wino.json:
--------------------------------------------------------------------------------
1 | [
2 | {
3 | "question":"The test was hard for Samuel but a breeze for Randy , since _ had failed to study for it.\n(1) Samuel (2) Randy\nRational: To pass a test, a person need to study for it. If a person feel the test like a breeze, it means the test is easy for him. A person feels the test easy, because he studies hard for it. Since we know that Samuel feel the test very hard, she may fail to study for it. So the answer is: (1) Samuel. Feedback: The rational is good and do not need to change.",
4 | "answer":"To pass a test, a person need to study for it. If a person feel the test like a breeze, it means the test is easy for him. A person feels the test easy, because he studies hard for it. Since we know that Samuel feel the test very hard, she may fail to study for it. So the answer is: (1) Samuel."
5 | },
6 | {
7 | "question":"Kyle slowly wormed their way into the life of Derrick, because _ was good and manipulating people.\n(1) Kyle (2) Derrick\nRational: A person wormes his way into other's life, because he is friendly and approachable. A person is seen as manipulating people, that means he like to interact with others. Since Kyle slowly wormed their way into the life of Derric, Derric will be seen as good and manipulating people. So the answer is: (2) Derric. Feedback: The rational is wrong. Since a person wormes his way into other's life is friendly, it's Kyle who was good, not Derrick.",
8 | "answer":"A person wormes his way into other's life, because he is friendly and approachable. A friendly person is considered good. A person is seen as manipulating people, that means he like to interact with others and others like him. Since Kyle slowly wormed their way into the life of Derric, Kyle will be seen as good and manipulating people. So the answer is: (1) Kyle."
9 | },
10 | {
11 | "question":"Donald was very grounded but Michael often got lost in their daydreams. _ was very capricious all the time.\n(1) Donald (2) Michael\nRational: A person is grounded means he works hard and does not like to fantasize. A person often gets lost in his daydreams, he is seen as unrealistic and egocentric. A person is capricious all the time means he does everything only according to his own ideas. Since Michael often gets lost in their daydreams but Donald does not, Donald is seen as very capricious all the time. So the answer is: (1) Donald. Feedback: The rational is wrong. Since a capricious person does everything only according to his own ideas, he likely to got lost in their daydreams. Thus, it is Michael who is capricious instead of Donald.",
12 | "answer":"A person is grounded means he works hard and does not like to fantasize. A person often gets lost in his daydreams, he is seen as unrealistic and egocentric. A person is capricious all the time means he does everything only according to his own ideas. Since Michael often gets lost in their daydreams but Donald does not, Michael is seen as very capricious all the time. So the answer is: (2) Michael."
13 | },
14 | {
15 | "question":"After stopping when running, Betty was able to help Rachel treat their asthma because _ has it too.\n(1) Betty (2) Rachel\nRational: A person who has a mild disease may knows how to treat it. Since Betty was able to help Rachel treat their asthma, Rachel has asthma. So the answer is: (2) Rachel. Feedback: The rational is wrong. Since the question asks who has it too, it more likely to fill the Betty into the blank since she knows how to treat the asthma.",
16 | "answer":"A person who has a mild disease may knows how to treat it. Since Betty was able to help Rachel treat their asthma, Betty may has it, too. So the answer is: (1) Betty."
17 | },
18 | {
19 | "question":"Jeffrey was sick with the stomach flu and not hungry, but Christopher was starving. _ ordered food.\n(1) Jeffrey (2) Christopher\nRational: Stomach flu is a viral infection in the digestive system. A person is starve so that he need to eat something next. A person is not hungry means he does not want to eat something. Ordering food means to buy food online. A person may eat food after ordering it. Since Christopher was starving and Jeffery was sick and not hungry, Christopher tended to order food. So the answer is: (2) Christopher. Feedback: The rational is good and do not need to change.",
20 | "answer":"Stomach flu is a viral infection in the digestive system. A person is starve so that he need to eat something next. A person is not hungry means he does not want to eat something. Ordering food means to buy food online. A person may eat food after ordering it. Since Christopher was starving and Jeffery was sick and not hungry, Christopher tended to order food. So the answer is: (2) Christopher."
21 | }
22 | ]
--------------------------------------------------------------------------------
/prompts/sr_answer/hella.json:
--------------------------------------------------------------------------------
1 | [
2 | {
3 | "question":"Then, the man writes over the snow covering the window of a car, and a woman wearing winter clothes smiles. then \n (1) the man adds wax to the windshield and cuts it. (2) a person board a ski lift, while two men supporting the head of the person wearing winter clothes snow as the we girls sled. (3) the man puts on a christmas coat, knitted with netting. (4) the man continues removing the snow on his car.\nRationale: Snow must be removed from a car before one can drive it. Since there is some snow covering the car, so the man tends to remove it next. So the answer is: (4) the man continues removing the snow on his car. Feedback: The rational is good and do not need to change.",
4 | "answer":"Snow must be removed from a car before one can drive it. Since there is some snow covering the car, so the man tends to remove it next. So the answer is: (4) the man continues removing the snow on his car."
5 | },
6 | {
7 | "question":"The man in the center is demonstrating a hairstyle on the person wearing the blue shirt. the man in the blue shirt \n (1) is standing on the sponge cutting the hair of the person wearing the blue shirt. (2) is doing the hairstyle with his hand and the hairspray. (3) sits on the chair next to the sink. (4) is being shown eye to eye.\nRationale: Since the man is doing a hair cut, he need to do the hairstyle first. So the answer is: (2) is doing the hairstyle with his hand and the hairspray. Feedback: The rational is wrong. Since the man in the blue shirt man is been demonstrating, he is the person that getting a hair cut not doing a hair cut. Thus, he does not need to doing the hairstyle.",
8 | "answer":"One should sit still when getting a haircut. People often sits down when they are get a haircut. From the context, we can get that the man in the blue shirt is getting a haircut, so he should sit down and wait. So the answer is: (3) sits on the chair next to the sink. "
9 | },
10 | {
11 | "question":"The roof is done and a view of the entire house is shown to show off the finished roof. the woman \n (1) is standing in front of the home, smiling while talking. (2) interviews the man again and leaves the room. (3) shows the soil with two lay-ups of shingle and applies a layer onto the top shingle. (4) stacks the bags on the side and begins putting stencils on the top.\nRationale: One usually feels pleased after finishing a home improvement project. If a person feels pleased, he tends to smile. Since the woman has finished all the work, she may be proud of it and feel pleased. Then she may smile. So the answer is: (1) is standing in front of the home, smiling while talking. Feedback: The rational is good and do not need to change.",
12 | "answer":"One usually feels pleased after finishing a home improvement project. If a person feels pleased, he tends to smile. Since the woman has finished all the work, she may be proud of it and feel pleased. Then she may smile. So the answer is: (1) is standing in front of the home, smiling while talking."
13 | },
14 | {
15 | "question":"People practice ballet in a studio alone and in couples. then \n (1) a man begins dancing and top dancing standing on the ground. (2) a boy and a girl dance ballet, then a man enter and dance with the girl. (3) the band performs ballet in the studio and in the open gathered. (4) people dances and dances together, dancing.\nRationale: Since people are practicing ballet, they may dance together. So the answer is: (4) people dances and dances together, dancing. Feedback: The rational is wrong. Ballet is a kind of dance that plays with their partener or dance alone. Thus, it is unlikely that people dance together.",
16 | "answer":"Ballet is a kind of dance. If people pratice ballet, they often dance with their partener or dance alone, which matches the description of option (2). So the answer is: (2) a boy and a girl dance ballet, then a man enter and dance with the girl. "
17 | },
18 | {
19 | "question":"A person is seen standing on a tennis court bouncing a ball. another man \n (1) takes his turn speaking to the camera. (2) walks up hitting a birdie. (3) is seen standing ready on the other side in front of a large audience. (4) is holding the racket next to him.\nRationale: Since a person is bouncing a ball on a tennis court, they may play tennis. Thus, another man should hold a racket to play it. So the answer is: (4) is holding the racket next to him. Feedback: The rational is wrong. In the tennis race, two players are located on opposite sides of the court, thus, it is impossible that another man stand next to him.",
20 | "answer":"The player who bounces the ball is the one who serves. In the tennis race, two players are located on opposite sides of the court, one is serving and the other is preparing to receive the ball. Since there is a person who serves, another one should stands ready. So the answer is: (3) is seen standing ready on the other side in front of a large audience."
21 | }
22 | ]
23 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | accelerate==0.25.0
2 | aiohttp==3.9.3
3 | aiosignal==1.3.1
4 | alembic==1.13.1
5 | aniso8601==9.0.1
6 | annotated-types==0.7.0
7 | anyio==4.4.0
8 | asttokens==2.4.1
9 | async-timeout==4.0.3
10 | attrs==23.2.0
11 | base58==2.1.1
12 | blessed==1.20.0
13 | blinker==1.8.2
14 | blis==0.7.11
15 | boto3==1.33.13
16 | botocore==1.33.13
17 | cached-path==1.1.6
18 | cachetools==5.3.2
19 | catalogue==2.0.10
20 | certifi==2023.11.17
21 | charset-normalizer==3.3.2
22 | click==8.1.7
23 | cloudpickle==3.0.0
24 | commonmark==0.9.1
25 | conda-pack==0.7.1
26 | contourpy==1.2.0
27 | cycler==0.12.1
28 | cymem==2.0.8
29 | dataclasses==0.6
30 | datasets==2.19.1
31 | decorator==5.1.1
32 | Deprecated==1.2.14
33 | dill==0.3.8
34 | distro==1.9.0
35 | docker==7.1.0
36 | docker-pycreds==0.4.0
37 | docx==0.2.4
38 | en-core-web-sm @ https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.3.0/en_core_web_sm-3.3.0-py3-none-any.whl#sha256=84d7d8059bfbf53c09b39139782f76cd6ac7064851e7799dcc685c06ebf5fd4f
39 | entrypoints==0.4
40 | evaluate==0.4.2
41 | exceptiongroup==1.2.0
42 | executing==2.0.1
43 | fairscale==0.4.6
44 | filelock==3.7.1
45 | Flask==3.0.3
46 | fonttools==4.46.0
47 | frozenlist==1.4.1
48 | fsspec==2023.12.2
49 | func-timeout==4.3.5
50 | gitdb==4.0.11
51 | GitPython==3.1.40
52 | google-api-core==2.15.0
53 | google-auth==2.25.2
54 | google-cloud-core==2.4.1
55 | google-cloud-storage==2.14.0
56 | google-crc32c==1.5.0
57 | google-resumable-media==2.7.0
58 | googleapis-common-protos==1.62.0
59 | gpustat==1.1.1
60 | graphene==3.3
61 | graphql-core==3.2.3
62 | graphql-relay==3.2.0
63 | greenlet==3.0.3
64 | gunicorn==22.0.0
65 | h11==0.14.0
66 | h5py==3.10.0
67 | httpcore==1.0.5
68 | httpx==0.27.0
69 | huggingface-hub==0.23.2
70 | idna==3.6
71 | importlib-metadata==7.0.0
72 | importlib-resources==6.1.1
73 | iniconfig==2.0.0
74 | ipython==8.18.1
75 | itsdangerous==2.2.0
76 | jedi==0.19.1
77 | Jinja2==3.1.2
78 | jmespath==1.0.1
79 | joblib==1.3.2
80 | jsonlines==4.0.0
81 | jsonnet==0.20.0
82 | kiwisolver==1.4.5
83 | langcodes==3.3.0
84 | llvmlite==0.42.0
85 | lmdb==1.4.1
86 | lxml==5.2.2
87 | Mako==1.3.5
88 | Markdown==3.6
89 | MarkupSafe==2.1.3
90 | matplotlib==3.8.2
91 | matplotlib-inline==0.1.6
92 | mlflow==2.13.0
93 | more-itertools==10.1.0
94 | mpmath==1.3.0
95 | multidict==6.0.5
96 | multiprocess==0.70.16
97 | murmurhash==1.0.10
98 | networkx==3.2.1
99 | nltk==3.8.1
100 | numba==0.59.1
101 | numpy==1.26.2
102 | nvidia-cublas-cu12==12.1.3.1
103 | nvidia-cuda-cupti-cu12==12.1.105
104 | nvidia-cuda-nvrtc-cu12==12.1.105
105 | nvidia-cuda-runtime-cu12==12.1.105
106 | nvidia-cudnn-cu12==8.9.2.26
107 | nvidia-cufft-cu12==11.0.2.54
108 | nvidia-curand-cu12==10.3.2.106
109 | nvidia-cusolver-cu12==11.4.5.107
110 | nvidia-cusparse-cu12==12.1.0.106
111 | nvidia-ml-py==12.535.133
112 | nvidia-nccl-cu12==2.18.1
113 | nvidia-nvjitlink-cu12==12.3.101
114 | nvidia-nvtx-cu12==12.1.105
115 | openai==1.30.5
116 | opentelemetry-api==1.24.0
117 | opentelemetry-sdk==1.24.0
118 | opentelemetry-semantic-conventions==0.45b0
119 | packaging==23.2
120 | palettable==3.3.3
121 | pandas==2.1.4
122 | parso==0.8.3
123 | pathtools==0.1.2
124 | pathy==0.10.3
125 | patsy==0.5.6
126 | peft==0.11.1
127 | pexpect==4.9.0
128 | Pillow==10.1.0
129 | pluggy==1.3.0
130 | preshed==3.0.9
131 | promise==2.3
132 | prompt-toolkit==3.0.43
133 | protobuf==3.20.3
134 | psutil==5.9.6
135 | ptyprocess==0.7.0
136 | pure-eval==0.2.2
137 | pyaml-env==1.2.1
138 | pyarrow==15.0.2
139 | pyarrow-hotfix==0.6
140 | pyasn1==0.5.1
141 | pyasn1-modules==0.3.0
142 | pydantic==2.7.1
143 | pydantic_core==2.18.2
144 | Pygments==2.17.2
145 | pyparsing==3.1.1
146 | pytest==7.4.3
147 | python-dateutil==2.8.2
148 | pytz==2023.3.post1
149 | PyYAML==6.0.1
150 | querystring-parser==1.2.4
151 | rank-bm25==0.2.2
152 | regex==2023.10.3
153 | requests==2.31.0
154 | rich==12.6.0
155 | rouge==1.0.1
156 | rsa==4.9
157 | s3transfer==0.8.2
158 | sacremoses==0.1.1
159 | safetensors==0.4.1
160 | scikit-learn==1.3.2
161 | scipy==1.11.4
162 | seaborn==0.13.0
163 | sentence-transformers==2.2.2
164 | sentencepiece==0.1.99
165 | sentry-sdk==1.39.0
166 | setproctitle==1.3.3
167 | shap==0.45.0
168 | shortuuid==1.0.11
169 | six==1.16.0
170 | slicer==0.0.7
171 | smart-open==6.4.0
172 | smmap==5.0.1
173 | sniffio==1.3.1
174 | spacy==3.3.3
175 | spacy-legacy==3.0.12
176 | spacy-loggers==1.0.5
177 | SQLAlchemy==2.0.30
178 | sqlparse==0.5.0
179 | srsly==2.4.8
180 | stack-data==0.6.3
181 | statsmodels==0.14.1
182 | sympy==1.12
183 | tensorboardX==2.6.2.2
184 | termcolor==1.1.0
185 | thinc==8.0.17
186 | threadpoolctl==3.2.0
187 | tiktoken==0.5.2
188 | tokenizers==0.15.0
189 | tomli==2.0.1
190 | torch==2.1.1
191 | torchaudio==2.1.1
192 | torchvision==0.16.1
193 | tqdm==4.66.1
194 | traitlets==5.14.0
195 | transformers==4.36.0
196 | triton==2.1.0
197 | typer==0.4.2
198 | typing_extensions==4.12.0
199 | tzdata==2023.3
200 | urllib3==1.26.18
201 | wandb==0.12.21
202 | wasabi==0.10.1
203 | wcwidth==0.2.12
204 | Werkzeug==3.0.3
205 | wrapt==1.16.0
206 | xxhash==3.4.1
207 | yarl==1.9.4
208 | zipp==3.17.0
209 |
--------------------------------------------------------------------------------
/metrics.py:
--------------------------------------------------------------------------------
1 | import matplotlib.pyplot as plt
2 | import numpy as np
3 | import seaborn as sns
4 | import pandas as pd
5 | from matplotlib import rcParams
6 |
7 | rcParams['font.family']='sans-serif'
8 | plt.rcParams['font.sans-serif'] = ['Times New Roman'] # 如果要显示中文字体,则在此处设为:SimHei
9 | plt.rcParams['axes.unicode_minus'] = False # 显示负号
10 |
11 |
12 | def draw_plot(layers, scores, labels, path):
13 | x_values = layers
14 |
15 | # 创建图形
16 | plt.figure(figsize=(10, 6))
17 |
18 | for i in range(len(scores)):
19 | score = scores[i]
20 | label = labels[i]
21 | plt.plot(x_values, score, label=label, marker='.')
22 |
23 | # 设置图形标题和坐标轴标签
24 | plt.xlabel('Layers')
25 | plt.ylabel('Scores')
26 | # 设置横轴范围
27 | plt.xlim(0, 40)
28 | # 添加图例
29 | plt.legend()
30 | plt.savefig(path)
31 | plt.close()
32 |
33 | def draw_acc(layers, scores, label, path):
34 | x_values = layers
35 | plt.plot(x_values, scores, label=label, marker='.')
36 | plt.xlabel('Layers')
37 | plt.ylabel('Acc')
38 | # 设置横轴范围
39 | plt.xlim(0, 40)
40 | # 添加图例
41 | plt.legend()
42 | plt.savefig(path)
43 | plt.close()
44 |
45 | def draw_heat(index, scores, path, exp=None, vmax=None):
46 | sns.set()
47 | if exp == 'attn':
48 | ax=sns.heatmap(scores, cmap="RdBu_r", center=0, vmin=0, vmax=vmax, yticklabels=index)
49 | elif exp == 'mlp':
50 | ax=sns.heatmap(scores, cmap="BrBG", center=0, vmin=0, vmax=vmax, yticklabels=index)
51 | else:
52 | ax=sns.heatmap(scores, cmap="RdBu_r", center=0)
53 |
54 | ticks = [0, 4, 9, 14, 19, 24, 29, 34, 39]
55 | tick_labels = [i+1 for i in ticks]
56 | ticks = [i+0.5 for i in ticks]
57 | if exp:
58 | # ax=sns.heatmap(scores, cmap="RdBu_r", xticklabels=layers, yticklabels=index)
59 | # plt.ylabel('ADE', fontdict={'family' : 'Times New Roman', 'size':22})
60 | plt.xlabel('Layers', fontdict={'family' : 'Times New Roman', 'size':22})
61 | plt.yticks(fontproperties = 'Times New Roman', fontsize=20)
62 | plt.xticks(ticks=ticks, labels=tick_labels, fontproperties = 'Times New Roman', fontsize=20)
63 | plt.subplots_adjust(left=0.06, right=0.99, top=0.98, bottom=0.15)
64 | else:
65 | plt.ylabel('Layers', fontdict={'family' : 'Times New Roman', 'size':22})
66 | plt.xlabel('Heads', fontdict={'family' : 'Times New Roman', 'size':22})
67 | plt.yticks(ticks=ticks, labels=tick_labels, fontproperties = 'Times New Roman', fontsize=20)
68 | plt.xticks(ticks=ticks, labels=tick_labels, fontproperties = 'Times New Roman', fontsize=20)
69 | plt.subplots_adjust(left=0.15, right=0.99, top=0.98, bottom=0.15)
70 | cbar = ax.collections[0].colorbar
71 | cbar.ax.tick_params(labelsize=20)
72 | cbarlabels = cbar.ax.get_yticklabels()
73 | [label.set_fontname('Times New Roman') for label in cbarlabels]
74 | plt.savefig(path)
75 | plt.close()
76 |
77 |
78 | def draw_line_plot(x_range, results, labels, path, y_label='Scores'):
79 | layers = []
80 | scores = []
81 | tags = []
82 | for i in range(len(results)):
83 | scores += list(results[i])
84 | layers += x_range * len(labels)
85 | for i in range(len(labels)):
86 | tags += [labels[i]] * len(x_range)
87 | data_plot = pd.DataFrame({"layers":layers, "scores":scores, "tags":tags})
88 | # handles, labels = ax.get_legend_handles_labels()
89 | # plt.legend(handles=handles[1:], labels=labels[1:])
90 | ax = sns.lineplot(x = "layers", y = "scores", hue='tags', data=data_plot)
91 | # plt.axhline(0, linestyle='--')
92 | plt.gca().legend().set_title('')
93 | plt.ylabel(ylabel=y_label, fontdict={'family' : 'Times New Roman', 'size':22})
94 | plt.xlabel('Layers', fontdict={'family' : 'Times New Roman', 'size':22})
95 | plt.yticks(fontproperties = 'Times New Roman', fontsize=20)
96 | plt.xticks(fontproperties = 'Times New Roman', fontsize=20)
97 | plt.legend(prop={'family' : 'Times New Roman', 'size':22})
98 | plt.rcParams.update({'legend.fontsize':22})
99 | if y_label == 'Attr Div':
100 | plt.axhline(0, linestyle='--', color='k')
101 | plt.subplots_adjust(left=0.17, right=0.99, top=0.99, bottom=0.15)
102 | else:
103 | plt.subplots_adjust(left=0.16, right=0.99, top=0.99, bottom=0.15)
104 | plt.savefig(path)
105 | plt.close()
106 |
107 | def draw_attr_bar(layers, scores, path):
108 | score_up = np.where(scores > 0, scores, 0)
109 | score_down = np.where(scores < 0, scores, 0)
110 |
111 | plt.ylabel('Score', fontdict={'family' : 'Times New Roman'})
112 | plt.xlabel('Layers', fontdict={'family' : 'Times New Roman'})
113 | plt.yticks(fontproperties = 'Times New Roman')
114 | plt.xticks(fontproperties = 'Times New Roman')
115 |
116 | plt.xticks([1, 10, 20, 30, 40])
117 | plt.bar(layers, score_up, width=0.5, color='#EC7063')
118 | plt.bar(layers, score_down, width=0.5, color='#3498DB')
119 |
120 |
121 | plt.savefig(path)
122 |
123 |
--------------------------------------------------------------------------------
/cal_toxic_rate.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | import json
3 | import os
4 | parser = argparse.ArgumentParser()
5 | parser.add_argument('--model', type=str, default='Llama-2-13b-chat-hf')
6 | parser.add_argument('--dataset', type=str, default='csqa')
7 | args = parser.parse_args()
8 |
9 | model_name = args.model
10 | dataset = args.dataset
11 |
12 | base_file_path = f'./result/{dataset}/{model_name}_direct_answer_2000.json'
13 | base_data = []
14 | with open(base_file_path, 'r') as f:
15 | base_data = json.load(f)[:-1]
16 | f.close()
17 |
18 | def get_drift_acc(task, index):
19 | if task == 'cot':
20 | path = f'./result/{dataset}/{model_name}_cot_answer_2000.json'
21 | elif task == 'sc':
22 | path = f'./result/{dataset}/{model_name}_sc_2000.json'
23 | elif task == 'l2m':
24 | path = f'./result/{dataset}/{model_name}_l2m_question_dev_2000.json'
25 | elif task == 'sr':
26 | path = f'./result/{dataset}/{model_name}_self_refine_dev_2000.json'
27 | elif task == 'cont':
28 | path = f'./result/{dataset}/{model_name}_cons_answer_dev_2000.json'
29 | elif task == 'res':
30 | path = f'./result/{dataset}/res.json'
31 | elif task == 'sps':
32 | path = f'./result/{dataset}/{model_name}_rt_result_2000_rFalse.json'
33 | else:
34 | path = f'./result/{dataset}/{model_name}_rt_result_2000_rTrue.json'
35 | with open(path, 'r') as f:
36 | test_data = json.load(f)[:-1]
37 | f.close()
38 | cnt = 0
39 | correct = 0
40 | for i in range(len(test_data)):
41 | if i not in index:
42 | continue
43 | if test_data[i]['cor_flag']:
44 | correct += 1
45 | cnt += 1
46 | return correct / cnt
47 |
48 | def get_tr(task, base):
49 | if task == 'direct':
50 | path = f'./result/{dataset}/{model_name}_direct_answer_2000.json'
51 | elif task == 'cot':
52 | path = f'./result/{dataset}/{model_name}_cot_answer_2000.json'
53 | elif task == 'sc':
54 | path = f'./result/{dataset}/{model_name}_sc_2000.json'
55 | elif task == 'l2m':
56 | path = f'./result/{dataset}/{model_name}_l2m_2000.json'
57 | elif task == 'sr':
58 | path = f'./result/{dataset}/{model_name}_sr_2000.json'
59 | elif task == 'cont':
60 | path = f'./result/{dataset}/{model_name}_cons_answer_2000.json'
61 | elif task == 'res':
62 | path = f'./result/{dataset}/{model_name}_res.json'
63 | elif task == 'sps':
64 | path = f'./result/{dataset}/{model_name}_rt_result_2000_rFalse.json'
65 | else:
66 | path = f'./result/{dataset}/{model_name}_rt_result_2000_rTrue.json'
67 |
68 | if not os.path.exists(path):
69 | return -1
70 | with open(path, 'r') as f:
71 | test_data = json.load(f)[:-1]
72 | f.close()
73 | cnt = 0
74 | false = 0
75 | for i in range(len(test_data)):
76 | if test_data[i]['cor_flag']:
77 | continue
78 | if base[i]['cor_flag']:
79 | false += 1
80 | cnt += 1
81 | return false / cnt
82 |
83 | def get_acc(task):
84 | if task == 'direct':
85 | path = f'./result/{dataset}/{model_name}_direct_answer_2000.json'
86 | elif task == 'cot':
87 | path = f'./result/{dataset}/{model_name}_cot_answer_2000.json'
88 | elif task == 'sc':
89 | path = f'./result/{dataset}/{model_name}_sc_2000.json'
90 | elif task == 'l2m':
91 | path = f'./result/{dataset}/{model_name}_l2m_2000.json'
92 | elif task == 'sr':
93 | path = f'./result/{dataset}/{model_name}_sr_2000.json'
94 | elif task == 'cont':
95 | path = f'./result/{dataset}/{model_name}_cons_answer_2000.json'
96 | elif task == 'res':
97 | path = f'./result/{dataset}/{model_name}_res.json'
98 | elif task == 'sps':
99 | path = f'./result/{dataset}/{model_name}_rt_result_2000_rFalse.json'
100 | else:
101 | path = f'./result/{dataset}/{model_name}_rt_result_2000_rTrue.json'
102 |
103 | if not os.path.exists(path):
104 | return -1
105 | with open(path, 'r') as f:
106 | acc = json.load(f)[-1]['acc']
107 | return acc
108 |
109 | test_data = []
110 | task_ls = ['direct', 'cot','sc', 'sr', 'l2m', 'cont', 'res', 'sps', 'riders']
111 | for task in task_ls:
112 | tr = get_tr(task, base_data)
113 | acc = get_acc(task)
114 | print(f'{task}: Accuracy:{acc} Toxic Rate:{tr}')
115 | # if dataset == 'csqa':
116 | # index1 = [41,49,158,161,174,244,276,283,286,297,386,394,402,413,424,431,441,443,457,523,539,652,700,709,754,869,881,898,939,946]
117 | # index2 = [36,331,379,395,521,525,527,599,654,826,893,913,998]
118 | # elif dataset == 'wino':
119 | # index1 = [7,15,50,53,97,108,119,121,132,201,207,209,235,253,284,285,307,320,338,342,347,387,390,426,453,467,475,478,482,490,498]
120 | # index2 = [40,47,73,175,180,185,197,232,255,266,274,306,316,327,333,409,423,427,433,444,454,481,493]
121 | # task_ls = ['cot', 'sc','sr','l2m','con', 'res', 'sps', 'riders']
122 | # for task in task_ls:
123 | # acc1 = get_drift_acc(task, index1)
124 | # acc2 = get_drift_acc(task, index2)
125 | # print(f'{task}: Type1:{acc1} Type2:{acc2}')
--------------------------------------------------------------------------------
/llm_intervention.py:
--------------------------------------------------------------------------------
1 | import os
2 | import torch
3 | import argparse
4 | import json
5 | import seaborn as sns
6 | import matplotlib.pyplot as plt
7 | import numpy as np
8 | import random
9 | from tqdm import tqdm
10 | from transformers import AutoTokenizer
11 | from load_data import CoTLoader, InterventionData
12 | from transformers.generation.stopping_criteria import StoppingCriteria, StoppingCriteriaList
13 | from metrics import draw_plot, draw_heat, draw_line_plot
14 | from intervention_model import Model
15 | from utils import get_prompter
16 | random.seed(17)
17 |
18 | ## argparse
19 | parser = argparse.ArgumentParser()
20 | parser.add_argument('--model', type=str, default='Llama-2-13b-chat-hf')
21 | parser.add_argument('--dataset', type=str, default='wino')
22 | parser.add_argument('--mode', type=str, default='C2W')
23 | parser.add_argument('--cnt', type=int, default=10)
24 | parser.add_argument('--exp', type=str, default='mlp')
25 | parser.add_argument('--avg', action='store_true')
26 |
27 | args = parser.parse_args()
28 | model_name = args.model
29 | dataset = args.dataset
30 | mode = args.mode
31 | cnt = args.cnt
32 | exp = args.exp
33 | avg = args.avg
34 | ## Path
35 | model_path = f'./model/{model_name}'
36 | if model_name.startswith('Llama'):
37 | cot_file_path = f'./result/{dataset}/{model_name}_cot_answer_dev_1000.json'
38 | base_file_path = f'./result/{dataset}/{model_name}_direct_answer_dev_1000.json'
39 | full_cot_path = f'./result/{dataset}/{model_name}_cot_dev_1000.json'
40 | mlp_avg_rep_path = f'./result/{dataset}/attn-False_cnt-100_rep_std.json'
41 | attn_avg_rep_path = f'./result/{dataset}/attn-True_cnt-100_rep_std.json'
42 | else:
43 | cot_file_path = f'./result/{dataset}/{model_name}_cot_answer_2000.json'
44 | base_file_path = f'./result/{dataset}/{model_name}_direct_answer_2000.json'
45 | full_cot_path = f'./result/{dataset}/{model_name}_cot_dev_500.json'
46 | mlp_avg_rep_path = f'./result/{dataset}/{model_name}-False-2000_rep_std.json'
47 | attn_avg_rep_path = f'./result/{dataset}/{model_name}-True-100_rep_std.json'
48 | result_path = f'./result/{dataset}/fig/{exp}_inter/{model_name}_{mode}_{cnt}'
49 |
50 | def setup_seed(seed):
51 | torch.manual_seed(seed)
52 | torch.cuda.manual_seed_all(seed)
53 | np.random.seed(seed)
54 | random.seed(seed)
55 | torch.backends.cudnn.deterministic = True
56 | setup_seed(17)
57 | ## Load Model
58 |
59 | if model_name.startswith('Baichuan'):
60 | tokenizer = AutoTokenizer.from_pretrained(model_path,
61 | revision="v2.0",
62 | use_fast=False,
63 | trust_remote_code=True)
64 | else:
65 | tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
66 | cot_prompter = get_prompter(model_name=model_name, dataset=dataset, task='cot_answer')
67 |
68 | cot_prompter = get_prompter(model_name=model_name, dataset=dataset, task='cot_answer')
69 |
70 | ## Load Data
71 | index = None
72 | if model_name.startswith('Llama'):
73 | if mode == 'C2W':
74 | if dataset == 'csqa':
75 | # index = [41,49,158,161,174,219,244,276,283,286,297,386,394,402,413,424,431,441,443,457][:cnt]
76 | index = [36,331,379,395,521,525,527,599,654,826,893,913,998]
77 | elif dataset == 'wino':
78 | index = [40,47,73,175,180,185,197,232,255,266,274,306,316,327,333,409,423,427,433,444,454,481,493]
79 | # index = [7, 15, 50, 53, 84, 97, 108, 119, 121, 132, 201, 207, 209, 235, 253][:cnt]
80 | else:
81 | if mode == 'C2W':
82 | if dataset == 'csqa':
83 | index = [86,221,263,279,280,342,352,395,399,408,471,545,599,761,857,877,913]
84 | elif dataset == 'wino':
85 | # index = [28,53,90,93,97,102,145,148,158,183,185,201,261,316,327,348,366,393,429,437,453,465,506,584,642,658,661,678,696,710,732,734,755,756,771,805,843,882]
86 | index = [28,90,97,158,183,185,201,261,316,327,348,393,437,453,465,506,584,661,678,696,732,734,755,771,805,843]
87 |
88 |
89 | dataloader = CoTLoader()
90 | data, index = dataloader.load_data(cot_file=cot_file_path, base_file=base_file_path, mode=mode, cnt=cnt, index=index)
91 |
92 | inter_data_list = []
93 | model = Model(model_name=model_name)
94 | for msg in data:
95 |
96 | if model_name.startswith('Baichuan'):
97 | inter_data_list.append(InterventionData(msg, tokenizer, cot_prompter, model.model))
98 | else:
99 | inter_data_list.append(InterventionData(msg, tokenizer, cot_prompter))
100 |
101 |
102 | if exp == 'mlp':
103 | with open(mlp_avg_rep_path, 'r') as f:
104 | reps = json.load(f)
105 | results = model.intervention_experiment(inter_data_list, reps)
106 | else:
107 | with open(attn_avg_rep_path, 'r') as f:
108 | reps = json.load(f)
109 | results = model.attention_experiment(inter_data_list, reps)
110 |
111 | x_range = range(1, 41)
112 |
113 | inter_dic = {1:'context', 2:'option', 3:'cot', 4:'last'}
114 | scores = np.zeros(shape=(len(inter_dic.keys()),len(x_range),len(index)))
115 | labels = []
116 |
117 | for i, result in results.items():
118 | labels = []
119 | values = []
120 | for idx, score in result.items():
121 | label = inter_dic[idx]
122 | values.append(score.squeeze().numpy())
123 | labels.append(label)
124 | # print(score)
125 | if avg:
126 | values = np.array(values)
127 | scores[:,:,i] = values
128 | else:
129 | draw_heat(labels, values, result_path+'.pdf')
130 | if avg:
131 | scores = np.mean(scores,axis=-1)
132 | if exp == 'attn':
133 | if model_name.startswith('Baichuan'):
134 | vmax = 0.2
135 | else:
136 | vmax = 0.5
137 | else:
138 | if model_name.startswith('Baichuan'):
139 | vmax = 0.2
140 | else:
141 | vmax = 0.4
142 |
143 | draw_heat(labels, scores, result_path+'.pdf', exp=exp, vmax=vmax)
--------------------------------------------------------------------------------
/result/csqa/attn-False_cnt-100_rep_std.json:
--------------------------------------------------------------------------------
1 | {
2 | "1": {
3 | "0": 0.0063525615260005,
4 | "1": 0.008794148452579975,
5 | "2": 0.011081079952418804,
6 | "3": 0.021754594519734383,
7 | "4": 0.020850781351327896,
8 | "5": 0.03029480203986168,
9 | "6": 0.03711111471056938,
10 | "7": 0.04173906520009041,
11 | "8": 0.04823498800396919,
12 | "9": 0.05071701109409332,
13 | "10": 0.05887265503406525,
14 | "11": 0.05986303463578224,
15 | "12": 0.06207869574427605,
16 | "13": 0.0655895322561264,
17 | "14": 0.06801052391529083,
18 | "15": 0.07444223016500473,
19 | "16": 0.08237172663211823,
20 | "17": 0.08466289937496185,
21 | "18": 0.08366402238607407,
22 | "19": 0.0861833393573761,
23 | "20": 0.08901597559452057,
24 | "21": 0.09655385464429855,
25 | "22": 0.09678779542446136,
26 | "23": 0.09590758383274078,
27 | "24": 0.09457088261842728,
28 | "25": 0.0924505665898323,
29 | "26": 0.09768616408109665,
30 | "27": 0.09697309136390686,
31 | "28": 0.09959433227777481,
32 | "29": 0.10030948370695114,
33 | "30": 0.10477196425199509,
34 | "31": 0.10549679398536682,
35 | "32": 0.11576616019010544,
36 | "33": 0.11213714629411697,
37 | "34": 0.1258470118045807,
38 | "35": 0.13941508531570435,
39 | "36": 0.1589518040418625,
40 | "37": 0.1969732791185379,
41 | "38": 0.32282334566116333,
42 | "39": 0.6383440494537354
43 | },
44 | "2": {
45 | "0": 0.006251771003007889,
46 | "1": 0.008267977274954319,
47 | "2": 0.009305547922849655,
48 | "3": 0.017875447869300842,
49 | "4": 0.020738767459988594,
50 | "5": 0.030468763783574104,
51 | "6": 0.03731619194149971,
52 | "7": 0.041411206126213074,
53 | "8": 0.047632887959480286,
54 | "9": 0.057693738490343094,
55 | "10": 0.06291036307811737,
56 | "11": 0.06631124764680862,
57 | "12": 0.06641155481338501,
58 | "13": 0.07363756000995636,
59 | "14": 0.0720769613981247,
60 | "15": 0.06881169229745865,
61 | "16": 0.06812605261802673,
62 | "17": 0.06790657341480255,
63 | "18": 0.06946370005607605,
64 | "19": 0.06831823289394379,
65 | "20": 0.07628244161605835,
66 | "21": 0.08184769004583359,
67 | "22": 0.08238459378480911,
68 | "23": 0.08216940611600876,
69 | "24": 0.08062490820884705,
70 | "25": 0.08032988011837006,
71 | "26": 0.0882396325469017,
72 | "27": 0.08544350415468216,
73 | "28": 0.08814030140638351,
74 | "29": 0.0920654758810997,
75 | "30": 0.09549416601657867,
76 | "31": 0.09534645080566406,
77 | "32": 0.10567693412303925,
78 | "33": 0.10631692409515381,
79 | "34": 0.12264522910118103,
80 | "35": 0.13643497228622437,
81 | "36": 0.16747044026851654,
82 | "37": 0.18993516266345978,
83 | "38": 0.30328819155693054,
84 | "39": 0.6610473990440369
85 | },
86 | "3": {
87 | "0": 0.00644146092236042,
88 | "1": 0.007982950657606125,
89 | "2": 0.009698222391307354,
90 | "3": 0.01994360238313675,
91 | "4": 0.018415283411741257,
92 | "5": 0.028179286047816277,
93 | "6": 0.03389795869588852,
94 | "7": 0.04023869335651398,
95 | "8": 0.04663827270269394,
96 | "9": 0.050340618938207626,
97 | "10": 0.05775105208158493,
98 | "11": 0.06146712228655815,
99 | "12": 0.06345455348491669,
100 | "13": 0.07273544371128082,
101 | "14": 0.07991337776184082,
102 | "15": 0.0883568525314331,
103 | "16": 0.10024288296699524,
104 | "17": 0.09664621949195862,
105 | "18": 0.09281810373067856,
106 | "19": 0.09723879396915436,
107 | "20": 0.09906055778265,
108 | "21": 0.11715830862522125,
109 | "22": 0.11060420423746109,
110 | "23": 0.11150994151830673,
111 | "24": 0.10590066015720367,
112 | "25": 0.11040174216032028,
113 | "26": 0.11445979028940201,
114 | "27": 0.12103137373924255,
115 | "28": 0.12452929466962814,
116 | "29": 0.12251610308885574,
117 | "30": 0.1283876746892929,
118 | "31": 0.12970605492591858,
119 | "32": 0.14429661631584167,
120 | "33": 0.13773636519908905,
121 | "34": 0.15384837985038757,
122 | "35": 0.16938979923725128,
123 | "36": 0.19963425397872925,
124 | "37": 0.23424555361270905,
125 | "38": 0.3457285165786743,
126 | "39": 0.8521292209625244
127 | },
128 | "4": {
129 | "0": 0.010983715765178204,
130 | "1": 0.020042432472109795,
131 | "2": 0.026814738288521767,
132 | "3": 0.037387143820524216,
133 | "4": 0.04746044799685478,
134 | "5": 0.07517736405134201,
135 | "6": 0.0786554366350174,
136 | "7": 0.08432161062955856,
137 | "8": 0.09371500462293625,
138 | "9": 0.11412825435400009,
139 | "10": 0.14158904552459717,
140 | "11": 0.15014609694480896,
141 | "12": 0.15963214635849,
142 | "13": 0.20647458732128143,
143 | "14": 0.19127340614795685,
144 | "15": 0.17390508949756622,
145 | "16": 0.18314196169376373,
146 | "17": 0.17005600035190582,
147 | "18": 0.18588951230049133,
148 | "19": 0.17141836881637573,
149 | "20": 0.20356571674346924,
150 | "21": 0.18777011334896088,
151 | "22": 0.19509343802928925,
152 | "23": 0.17161378264427185,
153 | "24": 0.17053355276584625,
154 | "25": 0.16809462010860443,
155 | "26": 0.20643843710422516,
156 | "27": 0.1535990834236145,
157 | "28": 0.15698522329330444,
158 | "29": 0.15907470881938934,
159 | "30": 0.180629700422287,
160 | "31": 0.17127655446529388,
161 | "32": 0.2555738687515259,
162 | "33": 0.22774025797843933,
163 | "34": 0.33447515964508057,
164 | "35": 0.3591727912425995,
165 | "36": 0.40942636132240295,
166 | "37": 0.5059314966201782,
167 | "38": 0.4950774013996124,
168 | "39": 1.1670713424682617
169 | }
170 | }
--------------------------------------------------------------------------------
/result/csqa/attn-True_cnt-100_rep_std.json:
--------------------------------------------------------------------------------
1 | {
2 | "1": {
3 | "0": 0.016034431755542755,
4 | "1": 0.008597188629209995,
5 | "2": 0.013132232241332531,
6 | "3": 0.020786026492714882,
7 | "4": 0.015194983221590519,
8 | "5": 0.02490413933992386,
9 | "6": 0.029641393572092056,
10 | "7": 0.03079659305512905,
11 | "8": 0.03851607069373131,
12 | "9": 0.040314532816410065,
13 | "10": 0.05387641116976738,
14 | "11": 0.05402584373950958,
15 | "12": 0.051192574203014374,
16 | "13": 0.05878303200006485,
17 | "14": 0.05709461867809296,
18 | "15": 0.06898349523544312,
19 | "16": 0.07269451022148132,
20 | "17": 0.061869796365499496,
21 | "18": 0.06671564280986786,
22 | "19": 0.07354655861854553,
23 | "20": 0.07827043533325195,
24 | "21": 0.08035335689783096,
25 | "22": 0.06958582997322083,
26 | "23": 0.053452737629413605,
27 | "24": 0.06251504272222519,
28 | "25": 0.06198607385158539,
29 | "26": 0.06369276344776154,
30 | "27": 0.05594378709793091,
31 | "28": 0.07795331627130508,
32 | "29": 0.055569399148225784,
33 | "30": 0.050825364887714386,
34 | "31": 0.06442210078239441,
35 | "32": 0.06855574995279312,
36 | "33": 0.07812808454036713,
37 | "34": 0.08100443333387375,
38 | "35": 0.10675255954265594,
39 | "36": 0.1369907706975937,
40 | "37": 0.2153209149837494,
41 | "38": 0.2257111519575119,
42 | "39": 0.2673751413822174
43 | },
44 | "2": {
45 | "0": 0.016414199024438858,
46 | "1": 0.007651639636605978,
47 | "2": 0.012203294783830643,
48 | "3": 0.02167569287121296,
49 | "4": 0.021899402141571045,
50 | "5": 0.03118284046649933,
51 | "6": 0.03656743839383125,
52 | "7": 0.04252760857343674,
53 | "8": 0.05411273613572121,
54 | "9": 0.05325760319828987,
55 | "10": 0.06796081364154816,
56 | "11": 0.07093039155006409,
57 | "12": 0.07577069103717804,
58 | "13": 0.09067865461111069,
59 | "14": 0.07348836958408356,
60 | "15": 0.08224916458129883,
61 | "16": 0.06917548924684525,
62 | "17": 0.07372508198022842,
63 | "18": 0.07373254001140594,
64 | "19": 0.08234356343746185,
65 | "20": 0.07898721843957901,
66 | "21": 0.0831800028681755,
67 | "22": 0.06822909414768219,
68 | "23": 0.0571831539273262,
69 | "24": 0.05775734409689903,
70 | "25": 0.07256750017404556,
71 | "26": 0.06250351667404175,
72 | "27": 0.06503269076347351,
73 | "28": 0.052591871470212936,
74 | "29": 0.052151553332805634,
75 | "30": 0.0667206421494484,
76 | "31": 0.08017119765281677,
77 | "32": 0.07074551284313202,
78 | "33": 0.08131583780050278,
79 | "34": 0.10088871419429779,
80 | "35": 0.10446341335773468,
81 | "36": 0.15017735958099365,
82 | "37": 0.2085195928812027,
83 | "38": 0.2604779899120331,
84 | "39": 0.33112573623657227
85 | },
86 | "3": {
87 | "0": 0.015690196305513382,
88 | "1": 0.008211216889321804,
89 | "2": 0.013630680739879608,
90 | "3": 0.02109990268945694,
91 | "4": 0.016149483621120453,
92 | "5": 0.027793554589152336,
93 | "6": 0.03330055996775627,
94 | "7": 0.03549427166581154,
95 | "8": 0.043434321880340576,
96 | "9": 0.05019747093319893,
97 | "10": 0.0612463615834713,
98 | "11": 0.0602092519402504,
99 | "12": 0.06994479149580002,
100 | "13": 0.08391790091991425,
101 | "14": 0.09250296652317047,
102 | "15": 0.10730336606502533,
103 | "16": 0.10508696734905243,
104 | "17": 0.09599477052688599,
105 | "18": 0.07932454347610474,
106 | "19": 0.09333863854408264,
107 | "20": 0.09752238541841507,
108 | "21": 0.11671330034732819,
109 | "22": 0.07632520794868469,
110 | "23": 0.08575465530157089,
111 | "24": 0.08302164822816849,
112 | "25": 0.08065132796764374,
113 | "26": 0.09101955592632294,
114 | "27": 0.0854015126824379,
115 | "28": 0.07694537192583084,
116 | "29": 0.07431099563837051,
117 | "30": 0.079600490629673,
118 | "31": 0.07222311943769455,
119 | "32": 0.08731991052627563,
120 | "33": 0.07518697530031204,
121 | "34": 0.11128079891204834,
122 | "35": 0.12077319622039795,
123 | "36": 0.14397063851356506,
124 | "37": 0.25434958934783936,
125 | "38": 0.26060259342193604,
126 | "39": 0.36372804641723633
127 | },
128 | "4": {
129 | "0": 0.018526816740632057,
130 | "1": 0.014629525132477283,
131 | "2": 0.01531291101127863,
132 | "3": 0.034464702010154724,
133 | "4": 0.034024860709905624,
134 | "5": 0.06532632559537888,
135 | "6": 0.05371295288205147,
136 | "7": 0.06695323437452316,
137 | "8": 0.08484780043363571,
138 | "9": 0.07913283258676529,
139 | "10": 0.14885680377483368,
140 | "11": 0.1370631456375122,
141 | "12": 0.15027369558811188,
142 | "13": 0.16059213876724243,
143 | "14": 0.16449452936649323,
144 | "15": 0.15536275506019592,
145 | "16": 0.1905192881822586,
146 | "17": 0.1558334231376648,
147 | "18": 0.1900419294834137,
148 | "19": 0.13915996253490448,
149 | "20": 0.17389975488185883,
150 | "21": 0.17189964652061462,
151 | "22": 0.14131511747837067,
152 | "23": 0.09667805582284927,
153 | "24": 0.0775427371263504,
154 | "25": 0.08732336759567261,
155 | "26": 0.11393430083990097,
156 | "27": 0.1198594868183136,
157 | "28": 0.11606979370117188,
158 | "29": 0.05276035517454147,
159 | "30": 0.10249536484479904,
160 | "31": 0.11630222946405411,
161 | "32": 0.058025602251291275,
162 | "33": 0.09649517387151718,
163 | "34": 0.23000496625900269,
164 | "35": 0.08047664910554886,
165 | "36": 0.2660832107067108,
166 | "37": 0.25463369488716125,
167 | "38": 0.37955817580223083,
168 | "39": 0.4409673511981964
169 | }
170 | }
--------------------------------------------------------------------------------
/llm_reason.py:
--------------------------------------------------------------------------------
1 | import os
2 | import torch
3 | import re
4 | import argparse
5 | import json
6 | import time
7 | from tqdm import tqdm
8 | from transformers import AutoTokenizer, AutoModelForCausalLM, AutoConfig, AutoModelForSeq2SeqLM
9 | from load_data import DataLoader
10 | from utils import llama_generate, baichuan_generate, get_config, get_prompter, chat_generate, mistral_generate
11 |
12 |
13 | parser = argparse.ArgumentParser()
14 | parser.add_argument('--model', type=str, default='Llama-2-13b-chat-hf')
15 | parser.add_argument('--datalength', type=int, default=2)
16 | parser.add_argument('--dataset', type=str, default='siqa')
17 | parser.add_argument('--task', type=str, default='direct_answer')
18 | parser.add_argument('--icl', type=int, default=5)
19 | parser.add_argument('--shuffle', action='store_true')
20 | args = parser.parse_args()
21 |
22 |
23 | model_name = args.model
24 | dataset = args.dataset
25 | datalength = args.datalength
26 | task = args.task
27 | icl = args.icl
28 | shuffle = args.shuffle
29 |
30 | if model_name.startswith('Vicuna'):
31 | model_path = f'/netcache/huggingface/vicuna-13b'
32 | elif model_name.startswith('Mistral'):
33 | model_path = f'/mnt/publiccache/huggingface/Mistral-7B-Instruct-v0.2'
34 | else:
35 | if '70b' in model_name:
36 | model_path = '/mnt/publiccache/huggingface/Llama-2-70b-chat-hf'
37 | else:
38 | model_path = f'./model/{model_name}'
39 | result_path = f'./result/{dataset}/{model_name}_{task}_{datalength}.json'
40 |
41 | if model_name.startswith('Baichuan'):
42 | tokenizer = AutoTokenizer.from_pretrained(model_path,
43 | revision="v2.0",
44 | use_fast=False,
45 | trust_remote_code=True)
46 | model = AutoModelForCausalLM.from_pretrained(model_path,
47 | revision="v2.0",
48 | device_map="auto",
49 | torch_dtype=torch.bfloat16,
50 | trust_remote_code=True)
51 | model.eval()
52 | elif model_name.startswith('Llama') or model_name.startswith('Vicuna') or model_name.startswith('Mistral'):
53 | model = AutoModelForCausalLM.from_pretrained(model_path, torch_dtype=torch.float16, trust_remote_code=True, device_map='auto')
54 | tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
55 | model.eval()
56 |
57 |
58 |
59 | def setup_seed(seed):
60 | torch.manual_seed(seed)
61 | torch.cuda.manual_seed_all(seed)
62 | torch.backends.cudnn.deterministic = True
63 | setup_seed(17)
64 |
65 |
66 |
67 | dataloader = DataLoader(dataset=dataset, data_length=datalength, shuffle=shuffle)
68 | prompter = None
69 |
70 |
71 | correct = 0
72 | cnt = 0
73 | results = []
74 |
75 |
76 | def model_generate(question):
77 | input = prompter.wrap_input(question, icl_cnt=5)
78 | if task == 'sc':
79 | config = get_config(model_name=model_name, strategy='beam')
80 | elif task in ['cons_answer', 'l2m']:
81 | config = get_config(model_name=model_name, strategy='sample')
82 | else:
83 | config = get_config(model_name=model_name, strategy='greedy')
84 | if model_name.startswith('Baichuan'):
85 | model.generation_config = config
86 | return baichuan_generate(model, tokenizer, input, task)
87 | elif model_name.startswith('Chat'):
88 | return chat_generate(input, task)
89 | elif model_name.startswith('Mistral'):
90 | return mistral_generate(model, config, tokenizer, input, task)
91 | else:
92 | return llama_generate(model, config, tokenizer, input, task)
93 |
94 | cost = 0
95 | if task == 'dpr':
96 | path = f'./{task}_{dataset}_documents.json'
97 | with open(path, 'r') as f:
98 | documents = json.load(f)
99 | for data in tqdm(dataloader):
100 | start = time.time()
101 | question = data['question']
102 | label = data['label']
103 | if task == 'l2m':
104 | prompter = get_prompter(model_name, dataset, 'l2m_question')
105 | result, _ = model_generate(question)
106 | split_result = result.split('\n')
107 | questions = []
108 | for q in split_result[1:]:
109 | if 'Question' in q:
110 | questions.append(q)
111 | prompter = get_prompter(model_name, dataset, 'l2m_mid_answer')
112 | for q in questions:
113 | question += '\n' + q
114 | result, _ = model_generate(question)
115 | question += " " + result.split('\n')[0]
116 | prompter = get_prompter(model_name, dataset, 'l2m_final_answer')
117 | result, pred = model_generate(question)
118 | prompter = get_prompter(model_name, dataset, 'l2m_question')
119 | elif task == 'sr':
120 | prompter = get_prompter(model_name, dataset, 'cot_answer')
121 | result, _ = model_generate(question)
122 | question += '\nRationale: ' + result
123 | prompter = get_prompter(model_name, dataset, 'sr_feedback')
124 | result, _ = model_generate(question)
125 | question += ' ' + result
126 | prompter = get_prompter(model_name, dataset, 'sr_answer')
127 | result, pred = model_generate(question)
128 | elif task == 'cons_answer':
129 | prompter = get_prompter(model_name, dataset, 'cons_answer')
130 | result, pred = model_generate(question)
131 | elif task == 'direct_answer':
132 | prompter = get_prompter(model_name, dataset, 'direct_answer')
133 | result, pred = model_generate(question)
134 | elif task == 'dpr':
135 | if dataset in ['wino','piqa']:
136 | width = 2
137 | elif dataset == 'hella':
138 | width = 3
139 | else:
140 | width = 4
141 | document = ""
142 | for i in range(cnt, cnt+width):
143 | document += documents[i]['ctxs'][0]['text'] + '. '
144 | question = document + question
145 | prompter = get_prompter(model_name, dataset, 'direct_answer')
146 | result, pred = model_generate(question)
147 | elif task == 'bm25':
148 | prompter = get_prompter(model_name, dataset, 'cot_answer')
149 | result, pred = model_generate(question)
150 | else:
151 | prompter = get_prompter(model_name, dataset, 'cot_answer')
152 | result, pred = model_generate(question)
153 | if dataset != 'gsm8k':
154 | match = re.findall(r'[1-5]\)',pred)
155 | if match:
156 | pred = match[0][:-1]
157 | else:
158 | pred = 'None'
159 | else:
160 | output = pred.split('\n')
161 | output = [line for line in output if len(re.findall('\d+', line)) > 0][-1]
162 | answer = output.replace(',', '') # remove middle ',' from numbers like '1,234'
163 | answer = re.findall('\d+', answer)
164 | pred = label if label in answer else answer[-1]
165 | pred = answer.strip()
166 | cor_flag = (pred == label)
167 | cnt += 1
168 | end = time.time()
169 | cost += end - start
170 | if cor_flag:
171 | correct += 1
172 | msg = {'question':question, 'answer':result, 'pred':pred, 'label':label, 'cor_flag':cor_flag}
173 | results.append(msg)
174 | torch.cuda.empty_cache()
175 |
176 | results.append({'acc':correct/cnt})
177 | print(f'Acc:{correct/cnt}')
178 | print(f'Time:{cost/cnt}')
179 | with open(result_path, 'w', encoding='utf-8') as f:
180 | json.dump(results, f, indent=4)
--------------------------------------------------------------------------------