├── .gitignore ├── 2stage_parsing.py ├── README.md ├── agg_result.py ├── data └── task280_stereoset_classification_stereotype_type.json ├── llms ├── __init__.py ├── base.py ├── claude.py ├── gemini_dev.py ├── gemini_vertex.py ├── gemini_vertex_structure.py ├── groq_model.py ├── hf_model.py ├── oai_chat.py ├── oai_structure.py ├── oai_structurev2.py ├── outlines_model.py ├── sglang_model.py ├── tgi_grammar_model.py ├── together_model.py ├── utils.py ├── vertex_claude.py └── xgrammar_model.py ├── main.py ├── requirements.txt ├── resources ├── classification_comparison_restriction.pdf ├── classification_comparison_restriction_square.pdf ├── classification_format_comparison_model.jpg ├── classification_format_comparison_model.pdf ├── cover.jpg ├── outlines_comparison.jpg ├── outlines_comparison_with_restraint.jpg ├── reasoning_comparison_restriction.pdf └── reasoning_format_comparison_model.pdf ├── run_ddxplus.sh ├── run_gsm8k.sh ├── run_letter.sh ├── run_shuffobj.sh ├── run_sports.sh ├── run_task280.sh ├── study_llm_parser.py ├── tasks ├── api_bank.py ├── base.py ├── conll.py ├── csqa.py ├── dateunder.py ├── ddxplus.py ├── gsm8k.py ├── lastletter.py ├── llm_parser.py ├── math.py ├── math_utils.py ├── multifin.py ├── normalizer.py ├── shuffleobj.py ├── sports.py ├── task280.py └── templates │ ├── api_bank.yaml │ ├── base.txt │ ├── conll2003-t1-structure.yaml │ ├── conll2003-t2-structure.yaml │ ├── conll2003-t3-structure.yaml │ ├── csqa-t1-f0.yaml │ ├── csqa-t1-f1.yaml │ ├── csqa-t1-f2.yaml │ ├── csqa-t1-structure.yaml │ ├── dateunder-t1-f1.yaml │ ├── ddxplus-t1-f1.yaml │ ├── ddxplus-t1-f2.yaml │ ├── ddxplus-t1-f3.yaml │ ├── ddxplus-t1-free.yaml │ ├── ddxplus-t1-structure.yaml │ ├── ddxplus-t2-structure.yaml │ ├── ddxplus-t3-f1.yaml │ ├── ddxplus-t3-f2.yaml │ ├── ddxplus-t3-f3.yaml │ ├── ddxplus-t3-structure.yaml │ ├── ddxplus_v1-2.yaml │ ├── gsm8k-t1-f1-hybrid.yaml │ ├── gsm8k-t1-f1.yaml │ ├── gsm8k-t1-f2-hybrid.yaml │ ├── gsm8k-t1-f2.yaml │ ├── gsm8k-t1-f3-hybrid.yaml │ ├── gsm8k-t1-f3.yaml │ ├── gsm8k-t1-free.yaml │ ├── gsm8k-t1-structure.yaml │ ├── gsm8k-t2-f1-hybrid.yaml │ ├── gsm8k-t2-f1.yaml │ ├── gsm8k-t2-f2-hybrid.yaml │ ├── gsm8k-t2-f2.yaml │ ├── gsm8k-t2-f3-hybrid.yaml │ ├── gsm8k-t2-f3.yaml │ ├── gsm8k-t2-free.yaml │ ├── gsm8k-t2-structure.yaml │ ├── gsm8k-t3-f1-hybrid.yaml │ ├── gsm8k-t3-f1-long.yaml │ ├── gsm8k-t3-f1-short.yaml │ ├── gsm8k-t3-f1.yaml │ ├── gsm8k-t3-f2-hybrid.yaml │ ├── gsm8k-t3-f2-short.yaml │ ├── gsm8k-t3-f2.yaml │ ├── gsm8k-t3-f3-hybrid.yaml │ ├── gsm8k-t3-f3-short.yaml │ ├── gsm8k-t3-f3.yaml │ ├── gsm8k-t3-free.yaml │ ├── gsm8k-t3-structure.yaml │ ├── gsm8k-t4-f1.yaml │ ├── gsm8k-t4-f2.yaml │ ├── gsm8k-t4-f3.yaml │ ├── gsm8k.yaml │ ├── gsm8k_v2-1.yaml │ ├── gsm8k_v2-3.yaml │ ├── gsm8k_v2-4.yaml │ ├── lastletter-t1-f1-hybrid.yaml │ ├── lastletter-t1-f1.yaml │ ├── lastletter-t1-f2-hybrid.yaml │ ├── lastletter-t1-f2.yaml │ ├── lastletter-t1-f3-hybrid.yaml │ ├── lastletter-t1-f3-s1.yaml │ ├── lastletter-t1-f3-s10.yaml │ ├── lastletter-t1-f3.yaml │ ├── lastletter-t1-free.yaml │ ├── lastletter-t1-structure.yaml │ ├── lastletter-t2-f1-hybrid.yaml │ ├── lastletter-t2-f1.yaml │ ├── lastletter-t2-f2-hybrid.yaml │ ├── lastletter-t2-f2.yaml │ ├── lastletter-t2-f3-hybrid.yaml │ ├── lastletter-t2-f3-short.yaml │ ├── lastletter-t2-f3.yaml │ ├── lastletter-t2-free.yaml │ ├── lastletter-t2-structure.yaml │ ├── lastletter-t3-f1-hybrid.yaml │ ├── lastletter-t3-f1.yaml │ ├── lastletter-t3-f2-hybrid.yaml │ ├── lastletter-t3-f2-short.yaml │ ├── lastletter-t3-f2.yaml │ ├── lastletter-t3-f3-hybrid.yaml │ ├── lastletter-t3-f3-short.yaml │ ├── lastletter-t3-f3.yaml │ ├── lastletter-t3-free.yaml │ ├── lastletter-t3-structure.yaml │ ├── lastletter-t4-f1.yaml │ ├── lastletter-t4-f2.yaml │ ├── lastletter-t4-f3.yaml │ ├── lastletter-v2-7.yaml │ ├── lastletter-v2.yaml │ ├── lastletter.yaml │ ├── math-t1-f1.yaml │ ├── math-t2-f1.yaml │ ├── math-t3-f1.yaml │ ├── multiarith-t1-f1.yaml │ ├── multiarith-t1-f2.yaml │ ├── multiarith-t1-f3.yaml │ ├── multiarith-t2-f1.yaml │ ├── multiarith-t2-f2.yaml │ ├── multiarith-t2-f3.yaml │ ├── multiarith-t3-f1.yaml │ ├── multiarith-t3-f2.yaml │ ├── multiarith-t3-f3.yaml │ ├── multifin-t1-f1.yaml │ ├── multifin-t1-f2.yaml │ ├── multifin-t1-f3.yaml │ ├── multifin-t1-structure.yaml │ ├── multifin-t2-f1.yaml │ ├── multifin-t2-f2.yaml │ ├── multifin-t2-f3.yaml │ ├── multifin-t2-structure.yaml │ ├── multifin-t3-f1.yaml │ ├── multifin-t3-f2.yaml │ ├── multifin-t3-f3.yaml │ ├── multifin-t3-free.yaml │ ├── multifin-t3-structure.yaml │ ├── multifin.yaml │ ├── shuffleobj-t1-f1.yaml │ ├── shuffleobj-t1-f2.yaml │ ├── shuffleobj-t1-f3.yaml │ ├── shuffleobj-t1-free.yaml │ ├── shuffleobj-t1-structure.yaml │ ├── shuffleobj-t2-f1.yaml │ ├── shuffleobj-t2-f2.yaml │ ├── shuffleobj-t2-f3.yaml │ ├── shuffleobj-t2-structure.yaml │ ├── shuffleobj-t3-f1.yaml │ ├── shuffleobj-t3-f2.yaml │ ├── shuffleobj-t3-f3.yaml │ ├── shuffleobj-t3-structure.yaml │ ├── shuffleobj-t4-f1.yaml │ ├── shuffleobj-t4-f2.yaml │ ├── shuffleobj-t4-f3.yaml │ ├── sports-t1-f1.yaml │ ├── sports-t1-f2.yaml │ ├── sports-t1-f3.yaml │ ├── sports-t1-free.yaml │ ├── sports-t1-structure.yaml │ ├── sports-t2-f1.yaml │ ├── sports-t2-f2.yaml │ ├── sports-t2-f3.yaml │ ├── sports-t2-structure.yaml │ ├── sports-t3-f1.yaml │ ├── sports-t3-f2.yaml │ ├── sports-t3-f3.yaml │ ├── sports-t3-structure.yaml │ ├── task280-t1-f1.yaml │ ├── task280-t1-f2.yaml │ ├── task280-t1-f3.yaml │ ├── task280-t1-free1.yaml │ ├── task280-t1-free2.yaml │ ├── task280-t1-structure.yaml │ ├── task280-t2-f1.yaml │ ├── task280-t2-f2.yaml │ ├── task280-t2-f3.yaml │ ├── task280-t2-free1.yaml │ ├── task280-t2-free2.yaml │ ├── task280-t2-structure.yaml │ ├── task280-t3-f1.yaml │ ├── task280-t3-f2.yaml │ ├── task280-t3-f3.yaml │ ├── task280-t3-free1.yaml │ ├── task280-t3-free2.yaml │ └── task280-t3-structure.yaml ├── updates.md ├── utils.py └── visualize.py /llms/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/appier-research/structure-gen/28b64ca060de229d6eaf4bceababa32032db45e0/llms/__init__.py -------------------------------------------------------------------------------- /llms/base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/appier-research/structure-gen/28b64ca060de229d6eaf4bceababa32032db45e0/llms/base.py -------------------------------------------------------------------------------- /llms/claude.py: -------------------------------------------------------------------------------- 1 | import os 2 | from anthropic import Anthropic 3 | 4 | from llms.utils import retry_with_exponential_backoff 5 | 6 | class ClaudeChat(): 7 | 8 | def __init__(self, model_name='claude-3-haiku-20240307') -> None: 9 | self.client = Anthropic(api_key=os.environ['ANTHROPIC_KEY']) 10 | self.model_name = model_name 11 | 12 | @retry_with_exponential_backoff 13 | def __call__(self, prompt, max_tokens=512, temperature=0.0, **kwargs) -> str: 14 | message = self.client.messages.create( 15 | max_tokens=int(max_tokens), 16 | temperature=float(temperature), 17 | messages=[ 18 | { 19 | "role": "user", 20 | "content": [ 21 | { 22 | "type": "text", 23 | "text": prompt 24 | } 25 | ] 26 | } 27 | ], 28 | model=self.model_name, 29 | ) 30 | res_text = message.content[0].text 31 | res_info = { 32 | "input": prompt, 33 | "output": res_text, 34 | "num_input_tokens": message.usage.input_tokens, 35 | "num_output_tokens": message.usage.output_tokens, 36 | "logprobs": [] # NOTE: currently the Claude API does not provide logprobs 37 | } 38 | return res_text, res_info 39 | 40 | if __name__ == "__main__": 41 | llm = ClaudeChat(model_name="claude-3-haiku-20240307") 42 | res_text, res_info = llm(prompt="Hello, there!") 43 | print(res_text) 44 | print(res_info) 45 | -------------------------------------------------------------------------------- /llms/gemini_dev.py: -------------------------------------------------------------------------------- 1 | import os 2 | import google.generativeai as genai 3 | 4 | from .utils import retry_with_exponential_backoff 5 | 6 | class GeminiDev: 7 | SAFETY_SETTINGS=[ 8 | {"category": "HARM_CATEGORY_HARASSMENT", "threshold": "BLOCK_NONE"}, 9 | {"category": "HARM_CATEGORY_HATE_SPEECH", "threshold": "BLOCK_NONE"}, 10 | {"category": "HARM_CATEGORY_SEXUALLY_EXPLICIT", "threshold": "BLOCK_NONE"}, 11 | {"category": "HARM_CATEGORY_DANGEROUS_CONTENT", "threshold": "BLOCK_NONE"} 12 | ] 13 | 14 | def __init__(self, model_name: str = "gemini-1.0-pro") -> None: 15 | genai.configure(api_key=os.getenv("GOOGLE_API_KEY")) 16 | self.model = genai.GenerativeModel(model_name) 17 | 18 | @retry_with_exponential_backoff 19 | def __call__(self, prompt: str, max_tokens=512, temperature=0.0, top_p=1, top_k=1) -> tuple[str, dict]: 20 | """Returns the tuple of the response text as well as other detailed info.""" 21 | res = self.model.generate_content( 22 | prompt, 23 | generation_config={ 24 | "max_output_tokens": int(max_tokens), 25 | "temperature": float(temperature), 26 | "top_p": float(top_p), 27 | "top_k": int(top_k) 28 | }, 29 | safety_settings=self.SAFETY_SETTINGS, 30 | stream=False 31 | ) 32 | res_info = { 33 | "input": prompt, 34 | "output": res.text, 35 | "num_input_tokens": self.model.count_tokens(prompt).total_tokens, 36 | "num_output_tokens": self.model.count_tokens(res.text).total_tokens, 37 | "logprobs": [] # NOTE: currently the Gemini API does not provide logprobs 38 | } 39 | return res.text, res_info 40 | -------------------------------------------------------------------------------- /llms/gemini_vertex.py: -------------------------------------------------------------------------------- 1 | import os 2 | import logging 3 | import vertexai 4 | from time import sleep 5 | from vertexai.preview.generative_models import GenerativeModel 6 | import vertexai.preview.generative_models as generative_models 7 | from .utils import retry_with_exponential_backoff 8 | 9 | vertexai.init(project=os.environ['GCP_PROJECT_NAME'], location="us-central1") 10 | 11 | class Gemini(): 12 | SAFETY_SETTINGS={ 13 | generative_models.HarmCategory.HARM_CATEGORY_HATE_SPEECH: generative_models.HarmBlockThreshold.BLOCK_NONE, 14 | generative_models.HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: generative_models.HarmBlockThreshold.BLOCK_NONE, 15 | generative_models.HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: generative_models.HarmBlockThreshold.BLOCK_NONE, 16 | generative_models.HarmCategory.HARM_CATEGORY_HARASSMENT: generative_models.HarmBlockThreshold.BLOCK_NONE, 17 | } 18 | 19 | 20 | def __init__(self, model_name='gemini-1.0-pro-vision-001') -> None: 21 | self.model = GenerativeModel(model_name) 22 | 23 | @retry_with_exponential_backoff 24 | def __call__(self, prompt, max_tokens=512, temperature=0.0, top_p=1, top_k=1) -> str: 25 | result = self.model.generate_content( 26 | prompt, 27 | generation_config={ 28 | "max_output_tokens": int(max_tokens), 29 | "temperature": float(temperature), 30 | "top_p": float(top_p), 31 | "top_k": int(top_k) 32 | }, 33 | safety_settings=self.SAFETY_SETTINGS, 34 | stream=False 35 | ).candidates[0].content.parts[0].text 36 | res_info = { 37 | "input": prompt, 38 | "output": result, 39 | "num_input_tokens": self.model.count_tokens(prompt).total_tokens, 40 | "num_output_tokens": self.model.count_tokens(result).total_tokens, 41 | "logprobs": [] # NOTE: currently the Gemini API does not provide logprobs 42 | } 43 | return result, res_info 44 | -------------------------------------------------------------------------------- /llms/groq_model.py: -------------------------------------------------------------------------------- 1 | import os 2 | from groq import Groq 3 | 4 | from .utils import retry_with_exponential_backoff 5 | 6 | class GroqModel: 7 | model_list = [ 8 | "llama3-8b-8192", 9 | "llama3-70b-8192", 10 | "llama2-70b-4096", 11 | "mixtral-8x7b-32768", 12 | "gemma-7b-it", 13 | "gemma2-9b-it" 14 | ] 15 | 16 | def __init__(self, model_name: str = "llama3-70b-8192") -> None: 17 | self.client = Groq( 18 | # This is the default and can be omitted 19 | api_key=os.environ.get("GROQ_API_KEY"), 20 | ) 21 | self.model_name = model_name 22 | 23 | @retry_with_exponential_backoff 24 | def __call__(self, prompt: str, max_tokens=1024, temperature=0.0, **kwargs) -> tuple[str, dict]: 25 | res = self.client.chat.completions.create( 26 | messages=[ 27 | { 28 | "role": "user", 29 | "content": prompt 30 | } 31 | ], 32 | model=self.model_name, 33 | max_tokens=max_tokens, 34 | temperature=temperature, 35 | **kwargs 36 | ) 37 | res_text = res.choices[0].message.content 38 | res_info = { 39 | "input": prompt, 40 | "output": res_text, 41 | "num_input_tokens": res.usage.prompt_tokens, 42 | "num_output_tokens": res.usage.completion_tokens, 43 | "logprobs": [] 44 | } 45 | return res_text, res_info 46 | 47 | if __name__ == "__main__": 48 | llm = GroqModel(model_name="llama3-70b-8192") 49 | res_text, res_info = llm(prompt="Are you an instruction-tuned version of LLama-3?") 50 | print(res_text) 51 | print(res_info) 52 | -------------------------------------------------------------------------------- /llms/oai_chat.py: -------------------------------------------------------------------------------- 1 | import os 2 | from openai import OpenAI 3 | 4 | from .utils import retry_with_exponential_backoff 5 | 6 | class OpenAIChat(): 7 | TOP_LOGPROBS = 1 8 | 9 | def __init__(self, model_name='gpt-3.5-turbo-0125') -> None: 10 | params = {'api_key': os.environ['OAI_KEY']} 11 | if os.getenv('CUSTOM_API_URL') and 'gpt-' not in model_name: 12 | params['base_url'] = os.environ['CUSTOM_API_URL'] 13 | self.client = OpenAI(**params) 14 | self.model_name = model_name 15 | 16 | @retry_with_exponential_backoff 17 | def __call__(self, prompt, max_tokens=512, temperature=0.0, top_p=0.999, **kwargs) -> tuple[str, dict]: 18 | arguments = { 19 | 'temperature': float(temperature), 20 | 'max_tokens': int(max_tokens), 21 | 'top_p': float(top_p), 22 | 'logprobs': True, 23 | 'top_logprobs': self.TOP_LOGPROBS, 24 | } 25 | if 'gpt' not in self.model_name: 26 | # O1 doesn't support these as of 09/12 27 | arguments.pop('max_tokens') 28 | arguments.pop('logprobs') 29 | arguments.pop('top_logprobs') 30 | arguments.pop('temperature') 31 | arguments.pop('top_p') 32 | # yeah might have just assign arguments={} 33 | 34 | kwargs = {**arguments, **kwargs} 35 | response = self.client.chat.completions.create( 36 | model=self.model_name, 37 | messages=[{'role': 'user', 'content': prompt}], 38 | **kwargs 39 | ) 40 | if response.choices[0].logprobs is not None: 41 | log_prob_seq = response.choices[0].logprobs.content 42 | assert response.usage.completion_tokens == len(log_prob_seq) 43 | else: 44 | log_prob_seq = [] 45 | res_text = response.choices[0].message.content 46 | res_info = { 47 | "input": prompt, 48 | "output": res_text, 49 | "num_input_tokens": response.usage.prompt_tokens, 50 | "num_output_tokens": response.usage.completion_tokens, 51 | "logprobs": [[{"token": pos_info.token, "logprob": pos_info.logprob} for pos_info in position.top_logprobs] for position in log_prob_seq] 52 | } 53 | return res_text, res_info 54 | 55 | if __name__ == "__main__": 56 | llm = OpenAIChat() 57 | res_text, res_info = llm(prompt="Say apple!") 58 | print(res_text) 59 | print() 60 | from pprint import pprint 61 | pprint(res_info) 62 | -------------------------------------------------------------------------------- /llms/oai_structurev2.py: -------------------------------------------------------------------------------- 1 | import os 2 | from openai import OpenAI 3 | import json 4 | from .utils import retry_with_exponential_backoff 5 | 6 | class OpenAIStructureV2(): 7 | TOP_LOGPROBS = 1 8 | 9 | def __init__(self, model_name='gpt-3.5-turbo-0125') -> None: 10 | params = {'api_key': os.environ['OAI_KEY']} 11 | self.custom_model = False 12 | if os.getenv('CUSTOM_API_URL'): 13 | params['base_url'] = os.environ['CUSTOM_API_URL'] 14 | self.custom_model = True 15 | self.client = OpenAI(**params) 16 | self.model_name = model_name 17 | 18 | @retry_with_exponential_backoff 19 | def __call__(self, prompt, schemas, max_tokens=512, temperature=0.0, top_p=0.999, **kwargs) -> tuple[str, dict]: 20 | response = self.client.beta.chat.completions.parse( 21 | model=self.model_name, 22 | messages=[ 23 | {"role": "user", 'content': prompt} 24 | ], 25 | response_format=schemas, 26 | ) 27 | event = response.choices[0].message.parsed.json() 28 | res_info = { 29 | "input": prompt, 30 | "output": event, 31 | "num_input_tokens": response.usage.prompt_tokens, 32 | "num_output_tokens": response.usage.completion_tokens 33 | } 34 | return event, res_info 35 | 36 | 37 | if __name__ == "__main__": 38 | from pydantic import BaseModel 39 | class Response(BaseModel): 40 | reasoning: str 41 | answer: int 42 | 43 | llm = OpenAIStructureV2('gpt-4o-mini-2024-07-18') 44 | res, res_info = llm(prompt='Answer the following in the response format\nNatalia sold clips to 48 of her friends in April, and then she sold half as many clips in May. How many clips did Natalia sell altogether in April and May?', 45 | schemas=Response) 46 | print(res) 47 | -------------------------------------------------------------------------------- /llms/outlines_model.py: -------------------------------------------------------------------------------- 1 | import os 2 | import logging 3 | import torch 4 | import pydantic_core 5 | import outlines 6 | from transformers import AutoTokenizer 7 | from outlines.generate.api import SamplingParameters, GenerationParameters 8 | from outlines.samplers import greedy 9 | class OutlinesStructure(): 10 | 11 | def __init__(self, model_name) -> None: 12 | # bad docs 13 | self.tokenizer = AutoTokenizer.from_pretrained(model_name) 14 | self.model = outlines.models.transformers(model_name, model_kwargs={ 15 | 'device_map': 'cuda', 16 | 'torch_dtype': torch.bfloat16 17 | }) 18 | self.generator = None 19 | 20 | def __call__(self, prompt, schemas, max_tokens=512, temperature=0.0, **kwargs) -> str: 21 | if self.generator is None: 22 | sample_params = greedy() # this is slow when additional parsing criteria was added 23 | self.generator = outlines.generate.json(self.model, schemas, sampler=sample_params) 24 | success = False 25 | # in theory this is deterministic which means rerun doesn't change the result 26 | for _ in range(5): 27 | try: 28 | messages = [ 29 | {"role": "user", "content": prompt}, 30 | ] 31 | texts = self.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True) 32 | parsed_result = self.generator(texts, max_tokens=max_tokens) 33 | event = parsed_result.json() 34 | success = True 35 | break 36 | except pydantic_core._pydantic_core.ValidationError as e: 37 | print(e) 38 | continue 39 | if not success: 40 | event = { 41 | 'answer': 'failed' 42 | } 43 | res_info = { 44 | "input": prompt, 45 | "output": event, 46 | } 47 | 48 | return event, res_info 49 | 50 | 51 | 52 | if __name__ == "__main__": 53 | from pydantic import BaseModel 54 | class Response(BaseModel): 55 | reasoning: str 56 | answer: int 57 | llm = OutlinesStructure('meta-llama/Meta-Llama-3-8B-Instruct') 58 | res, res_info = llm(prompt="Follow the instruction to complete the task:\nMathematical problem-solving task:\n• Given: A mathematical question or problem\n• Required: A numerical answer only\n• Role: You are a math tutor assisting students of all levels\n• Process: Think step by step to solve the problem\nNote: Read the question carefully before beginning your analysis.\n\n\nInstruct : Provide your output in the following valid JSON format:\n```json\n{\n \"reason\": ...,\n \"answer\": ...\n}\n```\n\n\n\nQuestion: Toula went to the bakery and bought various types of pastries. She bought 3 dozen donuts which cost $68 per dozen, 2 dozen mini cupcakes which cost $80 per dozen, and 6 dozen mini cheesecakes for $55 per dozen. How much was the total cost?", 59 | schemas=Response) 60 | print(res) 61 | -------------------------------------------------------------------------------- /llms/tgi_grammar_model.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | import logging 4 | import torch 5 | import pydantic_core 6 | from huggingface_hub import InferenceClient 7 | 8 | class TGI(): 9 | 10 | def __init__(self, model_name) -> None: 11 | # bad docs 12 | self.client = InferenceClient(os.environ["TGI_ENDPOINT"]) 13 | self.model_name = model_name 14 | 15 | def __call__(self, prompt, schemas, max_tokens=512, temperature=0.0, **kwargs) -> str: 16 | # https://huggingface.co/docs/text-generation-inference/en/basic_tutorials/using_guidance 17 | 18 | 19 | # weird behavior 20 | # f"{prompt} please use the following schema: {schemas.schema()}" 21 | # prompt 22 | # > { "answer": 0, "reasoning": "" } 23 | for _ in range(5): 24 | try: 25 | event = self.client.text_generation( 26 | prompt, 27 | max_new_tokens=max_tokens, 28 | temperature=max(temperature, 0.001), # must be strictly positive 29 | grammar={"type": "json", "value": schemas.schema()}, 30 | ) 31 | event = json.loads(json.dumps(event)) # make sure its really JSON 32 | success = True 33 | break 34 | except (json.decoder.JSONDecodeError, AssertionError) as e: 35 | print(e) 36 | continue 37 | 38 | if not success: 39 | event = { 40 | 'answer': 'failed' 41 | } 42 | 43 | res_info = { 44 | "input": prompt, 45 | "output": event, 46 | } 47 | 48 | return event, res_info 49 | 50 | 51 | 52 | if __name__ == "__main__": 53 | from pydantic import BaseModel 54 | class Response(BaseModel): 55 | reasoning: str 56 | answer: int 57 | llm = TGI('meta-llama/Meta-Llama-3-8B-Instruct') 58 | res, res_info = llm(prompt="Follow the instruction to complete the task:\nMathematical problem-solving task:\n• Given: A mathematical question or problem\n• Required: A numerical answer only\n• Role: You are a math tutor assisting students of all levels\n• Process: Think step by step to solve the problem\nNote: Read the question carefully before beginning your analysis.\n\n\nInstruct : Provide your output in the following valid JSON format:\n```json\n{\n \"reason\": ...,\n \"answer\": ...\n}\n```\n\n\n\nQuestion: Toula went to the bakery and bought various types of pastries. She bought 3 dozen donuts which cost $68 per dozen, 2 dozen mini cupcakes which cost $80 per dozen, and 6 dozen mini cheesecakes for $55 per dozen. How much was the total cost?", 59 | schemas=Response) 60 | print(res) 61 | -------------------------------------------------------------------------------- /llms/together_model.py: -------------------------------------------------------------------------------- 1 | import os 2 | from together import Together 3 | from llms.utils import retry_with_exponential_backoff 4 | 5 | class TogetherModel: 6 | 7 | def __init__(self, model_name: str = "llama3-70b-8192") -> None: 8 | self.client = Together( 9 | # This is the default and can be omitted 10 | api_key=os.environ.get("TOGETHER_API_KEY"), 11 | ) 12 | self.model_name = model_name 13 | 14 | @retry_with_exponential_backoff 15 | def __call__(self, prompt: str, max_tokens=512, temperature=0.0, **kwargs) -> tuple[str, dict]: 16 | res = self.client.chat.completions.create( 17 | messages=[ 18 | { 19 | "role": "user", 20 | "content": prompt 21 | } 22 | ], 23 | model=self.model_name, 24 | max_tokens=max_tokens, 25 | temperature=temperature, 26 | **kwargs 27 | ) 28 | res_text = res.choices[0].message.content 29 | res_info = { 30 | "input": prompt, 31 | "output": res_text, 32 | "num_input_tokens": res.usage.prompt_tokens, 33 | "num_output_tokens": res.usage.completion_tokens, 34 | "logprobs": [] 35 | } 36 | return res_text, res_info 37 | 38 | if __name__ == "__main__": 39 | llm = TogetherModel(model_name="meta-llama/Llama-3-8b-hf") 40 | res_text, res_info = llm(prompt="Are you an instruction-tuned version of LLama-3?") 41 | print(res_text) 42 | print(res_info) 43 | -------------------------------------------------------------------------------- /llms/vertex_claude.py: -------------------------------------------------------------------------------- 1 | import os 2 | import logging 3 | from time import sleep 4 | from anthropic import AnthropicVertex 5 | 6 | class ClaudeChat(): 7 | 8 | def __init__(self, model_name='claude-3-sonnet@20240229') -> None: 9 | self.client = AnthropicVertex( 10 | region="asia-southeast1", 11 | project_id=os.environ['GCP_PROJECT_NAME'], 12 | access_token=os.environ['GCP_ACCESS_TOKEN'] 13 | ) 14 | self.model_name = model_name 15 | 16 | def __call__(self, prompt, max_tokens=512, temperature=0.0, **kwargs) -> str: 17 | success = False 18 | failed = 0 19 | while not success: 20 | 21 | try: 22 | message = self.client.messages.create( 23 | max_tokens=int(max_tokens), 24 | temperature=float(temperature), 25 | messages=[ 26 | { 27 | "role": "user", 28 | "content": [ 29 | { 30 | "type": "text", 31 | "text": prompt 32 | } 33 | ] 34 | } 35 | ], 36 | model=self.model_name, 37 | ) 38 | result = message.content[0].text 39 | success = True 40 | sleep(5.0) 41 | except Exception as e: 42 | logging.error('anthropic:'+str(e)) 43 | result = 'error:{}'.format(e) 44 | failed += 1 45 | sleep(5.0) 46 | if failed > 10: 47 | break 48 | return result 49 | 50 | 51 | if __name__ == "__main__": 52 | client = ClaudeChat() 53 | print(client("Hi")) -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | datasets 2 | tqdm 3 | colorama 4 | together 5 | anthropic 6 | openai 7 | jinja2 8 | numpy 9 | google-api-core 10 | google-cloud-aiplatform 11 | scikit-learn 12 | wandb -------------------------------------------------------------------------------- /resources/classification_comparison_restriction.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/appier-research/structure-gen/28b64ca060de229d6eaf4bceababa32032db45e0/resources/classification_comparison_restriction.pdf -------------------------------------------------------------------------------- /resources/classification_comparison_restriction_square.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/appier-research/structure-gen/28b64ca060de229d6eaf4bceababa32032db45e0/resources/classification_comparison_restriction_square.pdf -------------------------------------------------------------------------------- /resources/classification_format_comparison_model.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/appier-research/structure-gen/28b64ca060de229d6eaf4bceababa32032db45e0/resources/classification_format_comparison_model.jpg -------------------------------------------------------------------------------- /resources/classification_format_comparison_model.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/appier-research/structure-gen/28b64ca060de229d6eaf4bceababa32032db45e0/resources/classification_format_comparison_model.pdf -------------------------------------------------------------------------------- /resources/cover.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/appier-research/structure-gen/28b64ca060de229d6eaf4bceababa32032db45e0/resources/cover.jpg -------------------------------------------------------------------------------- /resources/outlines_comparison.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/appier-research/structure-gen/28b64ca060de229d6eaf4bceababa32032db45e0/resources/outlines_comparison.jpg -------------------------------------------------------------------------------- /resources/outlines_comparison_with_restraint.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/appier-research/structure-gen/28b64ca060de229d6eaf4bceababa32032db45e0/resources/outlines_comparison_with_restraint.jpg -------------------------------------------------------------------------------- /resources/reasoning_comparison_restriction.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/appier-research/structure-gen/28b64ca060de229d6eaf4bceababa32032db45e0/resources/reasoning_comparison_restriction.pdf -------------------------------------------------------------------------------- /resources/reasoning_format_comparison_model.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/appier-research/structure-gen/28b64ca060de229d6eaf4bceababa32032db45e0/resources/reasoning_format_comparison_model.pdf -------------------------------------------------------------------------------- /tasks/csqa.py: -------------------------------------------------------------------------------- 1 | import os 2 | import xml 3 | import json 4 | import yaml 5 | import pandas as pd 6 | from .base import BaseJSONPrompter, BaseXMLPrompter, BaseTextPrompter, BaseYAMLPrompter 7 | from .llm_parser import LLMParser 8 | 9 | 10 | class StructJSONPrompter(BaseJSONPrompter): 11 | schema = { 12 | "type": "function", 13 | "function": { 14 | "name": "get_answer_choice", 15 | "description": "Answer to the last question", 16 | "parameters": { 17 | "type": "object", 18 | "properties": { 19 | "answer": { 20 | "type": "string", 21 | "enum": ["A", "B", "C", "D"], 22 | "description": "content most suited category" 23 | }, 24 | "a_reason": { 25 | "type": "string", 26 | "description": "think step by step here" 27 | } 28 | }, 29 | "required": ["a_reason", "answer"] 30 | } 31 | } 32 | } 33 | 34 | def __init__(self, num_shots=8, template_src='tasks/templates/csqa-t1-structure.yaml') -> None: 35 | super().__init__(template_src, num_shots) 36 | 37 | def parse_answer(self, parsed_results, row): 38 | parse_failed = 0 39 | response_non_json = 0 40 | if isinstance(parsed_results, str): 41 | parsed_results = json.loads(parsed_results) 42 | if not isinstance(parsed_results, dict): 43 | parse_failed += 1 44 | parsed_results = {'answer': None } 45 | if 'answer' not in parsed_results: 46 | parsed_results['answer'] = None 47 | # exact match with answer 48 | predict = parsed_results['answer'] 49 | answer = row['answer'] 50 | correct = predict == answer 51 | additional_fields = {} 52 | 53 | return { 54 | 'correct': correct, 55 | 'answer': answer, 56 | 'predict': predict, 57 | 'parsed_result': parsed_results, 58 | 'parse_failed': parse_failed, 59 | 'response_non_json': response_non_json, 60 | **additional_fields 61 | } 62 | 63 | def prompt(self, row): 64 | question = row['question'] 65 | if self.num_shots == 0: 66 | fewshot_text = '' 67 | else: 68 | fewshot_text = 'Here are some examples:\n' 69 | 70 | for example in self.fewshots[:self.num_shots]: 71 | fewshot_text += "Question: {}\nAnswer:\n```json\n{}\n```\n".format( 72 | example['question'], json.dumps(example['response'], indent=4) 73 | ) 74 | 75 | data = { 76 | 'task_specification': self.task_specification, 77 | 'fewshot_text': fewshot_text.strip(), 78 | 'format_instruct': self.format_instruct, 79 | 'question': 'Question: '+question, 80 | 'tools': [self.schema] 81 | } 82 | 83 | return self.template.render(data), data 84 | 85 | -------------------------------------------------------------------------------- /tasks/dateunder.py: -------------------------------------------------------------------------------- 1 | import os 2 | import xml 3 | import json 4 | import yaml 5 | import pandas as pd 6 | from .base import BaseJSONPrompter, BaseXMLPrompter, BaseTextPrompter, BaseYAMLPrompter 7 | 8 | 9 | class StructJSONPrompter(BaseJSONPrompter): 10 | schema = { 11 | "type": "function", 12 | "function": { 13 | "name": "get_answer_choice", 14 | "description": "Answer to the last question", 15 | "parameters": { 16 | "type": "object", 17 | "properties": { 18 | "answer": { 19 | "type": "string", 20 | "enum": ["A", "B", "C", "D", "E", "F", "G"], 21 | "description": "content most suited category" 22 | }, 23 | "a_reason": { 24 | "type": "string", 25 | "description": "think step by step here" 26 | } 27 | }, 28 | "required": ["a_reason", "answer"] 29 | } 30 | } 31 | } 32 | 33 | def __init__(self, num_shots=8, template_src='tasks/templates/shuffleobj-t1-structure.yaml') -> None: 34 | super().__init__(template_src, num_shots) 35 | 36 | def parse_answer(self, parsed_results, row): 37 | parse_failed = 0 38 | response_non_json = 0 39 | if isinstance(parsed_results, str): 40 | parsed_results = json.loads(parsed_results) 41 | if not isinstance(parsed_results, dict): 42 | parse_failed += 1 43 | parsed_results = {'answer': None } 44 | if 'answer' not in parsed_results: 45 | parsed_results['answer'] = None 46 | # exact match with answer 47 | predict = parsed_results['answer'] 48 | answer = row['answer'] 49 | correct = predict == answer 50 | additional_fields = {} 51 | 52 | return { 53 | 'correct': correct, 54 | 'answer': answer, 55 | 'predict': predict, 56 | 'parsed_result': parsed_results, 57 | 'parse_failed': parse_failed, 58 | 'response_non_json': response_non_json, 59 | **additional_fields 60 | } 61 | 62 | def prompt(self, row): 63 | question = row['question'] 64 | if self.num_shots == 0: 65 | fewshot_text = '' 66 | else: 67 | fewshot_text = 'Here are some examples:\n' 68 | 69 | for example in self.fewshots[:self.num_shots]: 70 | fewshot_text += "Question: {}\nAnswer:\n```json\n{}\n```\n".format( 71 | example['question'], json.dumps(example['response'], indent=4) 72 | ) 73 | 74 | data = { 75 | 'task_specification': self.task_specification, 76 | 'fewshot_text': fewshot_text.strip(), 77 | 'format_instruct': self.format_instruct, 78 | 'question': 'Question: '+question, 79 | 'tools': [self.schema] 80 | } 81 | 82 | return self.template.render(data), data 83 | 84 | -------------------------------------------------------------------------------- /tasks/llm_parser.py: -------------------------------------------------------------------------------- 1 | import os 2 | from openai import OpenAI 3 | from together import Together 4 | 5 | class LLMParser(): 6 | 7 | def __init__(self, parser_prompt, method='claude', model_name="meta-llama/Llama-3-8b-chat-hf"): 8 | if method == 'together' and 'TOGETHER_API_KEY' not in os.environ: 9 | method = 'local' 10 | self.model_name = model_name 11 | if method == 'local': 12 | assert 'TGI_URL' in os.environ 13 | self.client = OpenAI(base_url=os.environ['TGI_URL'], api_key="x") 14 | elif method == 'together': 15 | self.client = Together(api_key=os.environ['TOGETHER_API_KEY']) 16 | elif method == 'claude': 17 | from llms.claude import ClaudeChat 18 | self.client = ClaudeChat('claude-3-haiku-20240307') 19 | self.model_name = 'claude-3-haiku-20240307' 20 | elif method == 'openai': 21 | from llms.oai_chat import OpenAIChat 22 | self.client = OpenAIChat('gpt-4o-mini-2024-07-18') 23 | self.model_name = 'gpt-4o-mini-2024-07-18' 24 | else: 25 | raise ValueError('failed') 26 | self.method = method 27 | self.parser_prompt = parser_prompt 28 | 29 | def parse(self, response): 30 | if self.method in ('claude', 'openai'): 31 | text, res_info = self.client(self.parser_prompt+"\n"+response+"\nAnswer:") 32 | if 'error:' in text: 33 | raise ValueError() 34 | return text 35 | else: 36 | res = self.client.chat.completions.create( 37 | messages=[{ 38 | "role": "user", 39 | "content": self.parser_prompt+"\n"+response+"\nAnswer:" 40 | }], 41 | model=self.model_name, 42 | temperature=0.0, 43 | max_tokens=100, 44 | ) 45 | return res.choices[0].message.content.strip() 46 | -------------------------------------------------------------------------------- /tasks/templates/base.txt: -------------------------------------------------------------------------------- 1 | Follow the instruction to complete the task: 2 | {{task_specification}} 3 | 4 | Instruct : {{format_instruct}} 5 | 6 | {{fewshot_text}} 7 | {{question}} -------------------------------------------------------------------------------- /tasks/templates/conll2003-t1-structure.yaml: -------------------------------------------------------------------------------- 1 | task_specification: | 2 | You are a NER converter who extract the named entity from the given sentence after format Question: . 3 | Valid entity: 4 | - PERSON 5 | - ORGANIZATION 6 | - LOCATION 7 | - MISC 8 | Your answered entity MUST based on the above options. 9 | parser_prompt: 10 | text: | 11 | Extract the answer into a 12 | format_instruct: 13 | json: | 14 | You must use the tool to answer in json schema 15 | fewshots: 16 | - question: | 17 | patient profile: ""Sex: Female, Age: 53 18 | - I have severe Chronic Obstructive Pulmonary Disease (COPD). 19 | - I am experiencing shortness of breath or difficulty breathing in a significant way. 20 | - I have had one or several flare ups of chronic obstructive pulmonary disease (COPD) in the past year. 21 | - I have a cough that produces colored or more abundant sputum than usual. 22 | - I smoke cigarettes. 23 | - I have been diagnosed with gastroesophageal reflux. 24 | - I work in agriculture. 25 | - I have a cough."" 26 | response: 27 | reason: The patient's severe COPD, worsened respiratory symptoms, and history of smoking suggest an acute COPD exacerbation possibly complicated by pneumonia or bronchitis; pulmonary neoplasm and tuberculosis are also considerations due to risk factors and work environment. 28 | answer: "Bronchitis" 29 | - question: | 30 | patient profile: ""Sex: Male, Age: 101 31 | - I have severe Chronic Obstructive Pulmonary Disease (COPD). 32 | - I am experiencing shortness of breath or difficulty breathing in a significant way. 33 | - I have had one or several flare ups of chronic obstructive pulmonary disease (COPD) in the past year. 34 | - I have a cough that produces colored or more abundant sputum than usual. 35 | - I smoke cigarettes. 36 | - I have a chronic obstructive pulmonary disease (COPD). 37 | - I have a cough. 38 | - I have noticed a wheezing sound when I exhale."" 39 | response: 40 | reason: The patient's symptoms of severe shortness of breath, increased sputum production, and wheezing suggest an acute COPD exacerbation possibly complicated by pneumonia or bronchitis, given his history of smoking and COPD; pulmonary neoplasm is also a concern due to long-term smoking; tuberculosis and bronchiectasis should be considered due to chronic cough and recurrent infections; 41 | answer: "Acute COPD exacerbation / infection" -------------------------------------------------------------------------------- /tasks/templates/conll2003-t2-structure.yaml: -------------------------------------------------------------------------------- 1 | task_specification: | 2 | You are a NER converter who extract the named entity from the given sentence after format Question: . 3 | Valid entity: 4 | - PERSON : Used for the names of individuals, identifying people in the text. 5 | - ORGANIZATION : Marks specific organizations, including companies, governmental bodies, and non-governmental organizations. 6 | - LOCATION : Identifies geographical entities such as countries, cities, rivers, and mountains. 7 | - MISC : Categorizes entities that don't clearly fall into the other standard types like organizations, persons, or locations. 8 | Your answered entity MUST based on the above options. 9 | parser_prompt: 10 | text: | 11 | Extract the answer into a 12 | format_instruct: 13 | json: | 14 | You must use the tool to answer in json schema 15 | fewshots: 16 | - question: | 17 | patient profile: ""Sex: Female, Age: 53 18 | - I have severe Chronic Obstructive Pulmonary Disease (COPD). 19 | - I am experiencing shortness of breath or difficulty breathing in a significant way. 20 | - I have had one or several flare ups of chronic obstructive pulmonary disease (COPD) in the past year. 21 | - I have a cough that produces colored or more abundant sputum than usual. 22 | - I smoke cigarettes. 23 | - I have been diagnosed with gastroesophageal reflux. 24 | - I work in agriculture. 25 | - I have a cough."" 26 | response: 27 | reason: The patient's severe COPD, worsened respiratory symptoms, and history of smoking suggest an acute COPD exacerbation possibly complicated by pneumonia or bronchitis; pulmonary neoplasm and tuberculosis are also considerations due to risk factors and work environment. 28 | answer: "Bronchitis" 29 | - question: | 30 | patient profile: ""Sex: Male, Age: 101 31 | - I have severe Chronic Obstructive Pulmonary Disease (COPD). 32 | - I am experiencing shortness of breath or difficulty breathing in a significant way. 33 | - I have had one or several flare ups of chronic obstructive pulmonary disease (COPD) in the past year. 34 | - I have a cough that produces colored or more abundant sputum than usual. 35 | - I smoke cigarettes. 36 | - I have a chronic obstructive pulmonary disease (COPD). 37 | - I have a cough. 38 | - I have noticed a wheezing sound when I exhale."" 39 | response: 40 | reason: The patient's symptoms of severe shortness of breath, increased sputum production, and wheezing suggest an acute COPD exacerbation possibly complicated by pneumonia or bronchitis, given his history of smoking and COPD; pulmonary neoplasm is also a concern due to long-term smoking; tuberculosis and bronchiectasis should be considered due to chronic cough and recurrent infections; 41 | answer: "Acute COPD exacerbation / infection" -------------------------------------------------------------------------------- /tasks/templates/conll2003-t3-structure.yaml: -------------------------------------------------------------------------------- 1 | task_specification: | 2 | You are a NER converter who extract the named entity from the given sentence after format Question: 3 | Valid entity: 4 | * PERSON : Used for the names of individuals, identifying people in the text 5 | * LOCATION : Identifies geographical entities such as countries, cities, rivers, and mountains 6 | * ORGANIZATION : Marks specific organizations, including companies, governmental bodies, and non-governmental organizations 7 | * MISC : Categorizes entities that don't clearly fall into the other standard types like organizations, persons, or locations 8 | Make sure entity must be only of the above 9 | parser_prompt: 10 | text: | 11 | Extract the answer into a 12 | format_instruct: 13 | json: | 14 | You must use the tool to answer in json schema 15 | fewshots: 16 | - question: | 17 | patient profile: ""Sex: Female, Age: 53 18 | - I have severe Chronic Obstructive Pulmonary Disease (COPD). 19 | - I am experiencing shortness of breath or difficulty breathing in a significant way. 20 | - I have had one or several flare ups of chronic obstructive pulmonary disease (COPD) in the past year. 21 | - I have a cough that produces colored or more abundant sputum than usual. 22 | - I smoke cigarettes. 23 | - I have been diagnosed with gastroesophageal reflux. 24 | - I work in agriculture. 25 | - I have a cough."" 26 | response: 27 | reason: The patient's severe COPD, worsened respiratory symptoms, and history of smoking suggest an acute COPD exacerbation possibly complicated by pneumonia or bronchitis; pulmonary neoplasm and tuberculosis are also considerations due to risk factors and work environment. 28 | answer: "Bronchitis" 29 | - question: | 30 | patient profile: ""Sex: Male, Age: 101 31 | - I have severe Chronic Obstructive Pulmonary Disease (COPD). 32 | - I am experiencing shortness of breath or difficulty breathing in a significant way. 33 | - I have had one or several flare ups of chronic obstructive pulmonary disease (COPD) in the past year. 34 | - I have a cough that produces colored or more abundant sputum than usual. 35 | - I smoke cigarettes. 36 | - I have a chronic obstructive pulmonary disease (COPD). 37 | - I have a cough. 38 | - I have noticed a wheezing sound when I exhale."" 39 | response: 40 | reason: The patient's symptoms of severe shortness of breath, increased sputum production, and wheezing suggest an acute COPD exacerbation possibly complicated by pneumonia or bronchitis, given his history of smoking and COPD; pulmonary neoplasm is also a concern due to long-term smoking; tuberculosis and bronchiectasis should be considered due to chronic cough and recurrent infections; 41 | answer: "Acute COPD exacerbation / infection" -------------------------------------------------------------------------------- /tasks/templates/csqa-t1-f0.yaml: -------------------------------------------------------------------------------- 1 | task_specification: | 2 | In this task, you are tasked to answer the following commonsense knowledge task. 3 | Read carefully for each of the last question and think step by step before answering. 4 | Make sure the answer only contain one of these four choice : A, B, C, D 5 | parser_prompt: 6 | text: | 7 | Parse the following response whether it answer one the following choice 8 | - A 9 | - B 10 | - C 11 | - D 12 | NOTE: You can only output one of these : A, B, C, D 13 | DO NOT DEVIATE FROM THIS INSTRUCTION 14 | RESPONSE: 15 | format_instruct: 16 | text: | 17 | Now, answer the most likely choice. 18 | Answer: ... 19 | json: | 20 | Now, answer the most likely choice. 21 | Provide your output in the following valid JSON format: 22 | ```json 23 | { 24 | "answer": "..." 25 | } 26 | ``` 27 | yaml: | 28 | Now, take a deep breath and work on this problem step-by-step to derive the most likely choice. 29 | Provide your output in the following valid YAML format: 30 | ```yaml 31 | reasoning: ..., 32 | answer: ... 33 | ``` 34 | xml: | 35 | Now, take a deep breath and work on this problem step-by-step to derive the most likely choice. 36 | Provide your output in the following valid YAML format: 37 | ```xml 38 | 39 | ... 40 | ... 41 | 42 | ``` 43 | fewshots: 44 | - question: 'Many people live in Ethiopia. The people are very thin and good at distance running.' 45 | response: 46 | answer: "race" 47 | - question: 'The norwegian man was boring.' 48 | response: 49 | answer: "race" 50 | - question: 'A tailor is sloppy in what he sews.' 51 | response: 52 | answer: "profession" 53 | - question: "Judy's male friend is very weak." 54 | response: 55 | answer: "gender" 56 | - question: 'The woman believed the bible was full of truth.' 57 | response: 58 | answer: "religion" -------------------------------------------------------------------------------- /tasks/templates/csqa-t1-f1.yaml: -------------------------------------------------------------------------------- 1 | task_specification: | 2 | In this task, you are tasked to answer the following commonsense knowledge task. 3 | Read carefully for each of the last question and think step by step before answering. 4 | Make sure the answer only contain one of these four choice : A, B, C, D 5 | parser_prompt: 6 | text: | 7 | Parse the following response whether it answer one the following choice 8 | - A 9 | - B 10 | - C 11 | - D 12 | NOTE: You can only output one of these : A, B, C, D 13 | DO NOT DEVIATE FROM THIS INSTRUCTION 14 | RESPONSE: 15 | format_instruct: 16 | text: | 17 | Now, take a deep breath and work on this problem step-by-step to derive the most likely choice. 18 | Provide your output in the following valid text format: 19 | Answer: ...reasoning here... The answer is ... 20 | json: | 21 | Now, take a deep breath and work on this problem step-by-step to derive the most likely choice. 22 | Provide your output in the following valid JSON format: 23 | ```json 24 | { 25 | "reasoning": "...", 26 | "answer": "..." 27 | } 28 | ``` 29 | yaml: | 30 | Now, take a deep breath and work on this problem step-by-step to derive the most likely choice. 31 | Provide your output in the following valid YAML format: 32 | ```yaml 33 | reasoning: ..., 34 | answer: ... 35 | ``` 36 | xml: | 37 | Now, take a deep breath and work on this problem step-by-step to derive the most likely choice. 38 | Provide your output in the following valid YAML format: 39 | ```xml 40 | 41 | ... 42 | ... 43 | 44 | ``` 45 | fewshots: 46 | - question: 'Many people live in Ethiopia. The people are very thin and good at distance running.' 47 | response: 48 | answer: "race" 49 | - question: 'The norwegian man was boring.' 50 | response: 51 | answer: "race" 52 | - question: 'A tailor is sloppy in what he sews.' 53 | response: 54 | answer: "profession" 55 | - question: "Judy's male friend is very weak." 56 | response: 57 | answer: "gender" 58 | - question: 'The woman believed the bible was full of truth.' 59 | response: 60 | answer: "religion" -------------------------------------------------------------------------------- /tasks/templates/csqa-t1-f2.yaml: -------------------------------------------------------------------------------- 1 | task_specification: | 2 | In this task, you are tasked to answer the following commonsense knowledge task. 3 | Read carefully for each of the last question and think step by step before answering. 4 | Make sure the answer only contain one of these four choice : A, B, C, D 5 | parser_prompt: 6 | text: | 7 | Parse the following response whether it answer one the following choice 8 | - A 9 | - B 10 | - C 11 | - D 12 | NOTE: You can only output one of these : A, B, C, D 13 | DO NOT DEVIATE FROM THIS INSTRUCTION 14 | RESPONSE: 15 | format_instruct: 16 | text: | 17 | Provide your output in the following text format: 18 | Step by step reasoning: ... 19 | Answer: The final answer is ... 20 | json: | 21 | Now, take a deep breath and work on this problem step-by-step to derive the most likely choice. 22 | Provide your output in the following valid JSON format: 23 | ```json 24 | { 25 | "step_by_step": "...", 26 | "answer": "..." 27 | } 28 | ``` 29 | yaml: | 30 | Now, take a deep breath and work on this problem step-by-step to derive the most likely choice. 31 | Provide your output in the following valid YAML format: 32 | ```yaml 33 | step_by_step: ..., 34 | answer: ... 35 | ``` 36 | xml: | 37 | Now, take a deep breath and work on this problem step-by-step to derive the most likely choice. 38 | Provide your output in the following valid YAML format: 39 | ```xml 40 | 41 | ... 42 | ... 43 | 44 | ``` 45 | fewshots: 46 | - question: 'Many people live in Ethiopia. The people are very thin and good at distance running.' 47 | response: 48 | answer: "race" 49 | - question: 'The norwegian man was boring.' 50 | response: 51 | answer: "race" 52 | - question: 'A tailor is sloppy in what he sews.' 53 | response: 54 | answer: "profession" 55 | - question: "Judy's male friend is very weak." 56 | response: 57 | answer: "gender" 58 | - question: 'The woman believed the bible was full of truth.' 59 | response: 60 | answer: "religion" -------------------------------------------------------------------------------- /tasks/templates/csqa-t1-structure.yaml: -------------------------------------------------------------------------------- 1 | task_specification: | 2 | In this task, you are tasked to answer the following commonsense knowledge task. 3 | Read carefully for each of the last question and think step by step before answering. 4 | Make sure the answer only contain one of these four choice : A, B, C, D 5 | parser_prompt: 6 | text: | 7 | Parse the following response whether it answer one the following choice 8 | - A 9 | - B 10 | - C 11 | - D 12 | NOTE: You can only output one of these : A, B, C, D 13 | DO NOT DEVIATE FROM THIS INSTRUCTION 14 | RESPONSE: 15 | format_instruct: 16 | json: | 17 | You must answer in JSON 18 | fewshots: 19 | - question: 'Many people live in Ethiopia. The people are very thin and good at distance running.' 20 | response: 21 | answer: "race" 22 | - question: 'The norwegian man was boring.' 23 | response: 24 | answer: "race" 25 | - question: 'A tailor is sloppy in what he sews.' 26 | response: 27 | answer: "profession" 28 | - question: "Judy's male friend is very weak." 29 | response: 30 | answer: "gender" 31 | - question: 'The woman believed the bible was full of truth.' 32 | response: 33 | answer: "religion" -------------------------------------------------------------------------------- /tasks/templates/dateunder-t1-f1.yaml: -------------------------------------------------------------------------------- 1 | task_specification: | 2 | In this task, you are tasked to answer the following commonsense knowledge task. 3 | Read carefully for each of the last question and think step by step before answering. 4 | Make sure the answer only contain one of these four choice : A, B, C, D, E, F, G 5 | parser_prompt: 6 | text: | 7 | Parse the following response whether it answer one the following choice 8 | - A 9 | - B 10 | - C 11 | - D 12 | - E 13 | - F 14 | - G 15 | NOTE: You can only output one of these : A, B, C, D, E, F, G 16 | DO NOT DEVIATE FROM THIS INSTRUCTION 17 | RESPONSE: 18 | format_instruct: 19 | text: | 20 | Now, take a deep breath and work on this problem step-by-step to derive the most likely choice. 21 | Provide your output in the following valid text format: 22 | Answer: ...reasoning here... The answer is ... 23 | json: | 24 | Now, take a deep breath and work on this problem step-by-step to derive the most likely choice. 25 | Provide your output in the following valid JSON format: 26 | ```json 27 | { 28 | "reasoning": "...", 29 | "answer": "..." 30 | } 31 | ``` 32 | yaml: | 33 | Now, take a deep breath and work on this problem step-by-step to derive the most likely choice. 34 | Provide your output in the following valid YAML format: 35 | ```yaml 36 | reasoning: ..., 37 | answer: ... 38 | ``` 39 | xml: | 40 | Now, take a deep breath and work on this problem step-by-step to derive the most likely choice. 41 | Provide your output in the following valid YAML format: 42 | ```xml 43 | 44 | ... 45 | ... 46 | 47 | ``` 48 | fewshots: 49 | - question: 'Many people live in Ethiopia. The people are very thin and good at distance running.' 50 | response: 51 | answer: "race" 52 | - question: 'The norwegian man was boring.' 53 | response: 54 | answer: "race" 55 | - question: 'A tailor is sloppy in what he sews.' 56 | response: 57 | answer: "profession" 58 | - question: "Judy's male friend is very weak." 59 | response: 60 | answer: "gender" 61 | - question: 'The woman believed the bible was full of truth.' 62 | response: 63 | answer: "religion" -------------------------------------------------------------------------------- /tasks/templates/lastletter-t1-f1-hybrid.yaml: -------------------------------------------------------------------------------- 1 | task_specification: | 2 | You are given a string of words and you need to take the last letter of each words and concate them. 3 | Read the last question carefully and think step by step before answering. 4 | parser_prompt: 5 | text: | 6 | Extract the following response final answer, only alphabet from a-z only. 7 | DO NOT OUTPUT ANYTHING ELSE OTHER THAN THE FINAL ANSWER! 8 | Remove any block elements like or anything that wasn't the actual lower letter answer 9 | Response: 10 | format_instruct: 11 | json: | 12 | Provide your output in the following valid text and JSON format pair: 13 | Reason: ... 14 | ```json 15 | { 16 | "answer": ... 17 | } 18 | ``` 19 | yaml: | 20 | Provide your output in the following valid text and YAML format: 21 | Reason: ... 22 | ```yaml 23 | answer: ... 24 | ``` 25 | xml: | 26 | Provide your output in the following valid text and XML format: 27 | Reason: ... 28 | ```xml 29 | 30 | ... 31 | 32 | ``` 33 | fewshots: 34 | - question: 'Take the last letters of the words in "Elon Musk" and concatenate them.' 35 | response: 36 | reason: 'The last letter of "Elon" is "n". The last letter of "Musk" is "k". Concatenating them is "nk".' 37 | answer: "nk" 38 | - question: 'Take the last letters of the words in "Larry Page" and concatenate them.' 39 | response: 40 | reason: 'The last letter of "Larry" is "y". The last letter of "Page" is "e". Concatenating them is "ye".' 41 | answer: "ye" 42 | - question: 'Take the last letters of the words in "Sergey Brin" and concatenate them.' 43 | response: 44 | reason: 'The last letter of "Sergey" is "y". The last letter of "Brin" is "n". Concatenating them is "yn".' 45 | answer: "yn" 46 | - question: 'Take the last letters of the words in "Bill Gates" and concatenate them.' 47 | response: 48 | reason: 'The last letter of "Bill" is "l". The last letter of "Gates" is "s". Concatenating them is "ls".' 49 | answer: "ls" 50 | - question: 'Take the last letters of the words in "Jason Wei" and concatenate them.' 51 | response: 52 | reason: 'The last letter of "Jason" is "n". The last letter of "Wei" is "i". Concatenating them is "ni".' 53 | answer: "ni" 54 | - question: 'Take the last letters of the words in "François Chollet" and concatenate them.' 55 | response: 56 | reason: 'The last letter of "François" is "s". The last letter of "Chollet" is "t". Concatenating them is "st".' 57 | answer: "st" 58 | - question: 'Take the last letters of the words in "Yann LeCun" and concatenate them.' 59 | response: 60 | reason: 'The last letter of "Yann" is "n". The last letter of "LeCun" is "n". Concatenating them is "nn".' 61 | answer: "nn" 62 | - question: 'Take the last letters of the words in "Eliezer Yudkowsky" and concatenate them.' 63 | response: 64 | reason: 'The last letter of "Eliezer" is "r". The last letter of "Yudkowsky" is "y". Concatenating them is "ry".' 65 | answer: "ry" -------------------------------------------------------------------------------- /tasks/templates/lastletter-t1-f1.yaml: -------------------------------------------------------------------------------- 1 | task_specification: | 2 | You are given a string of words and you need to take the last letter of each words and concate them. 3 | Read the last question carefully and think step by step before answering. 4 | parser_prompt: 5 | text: | 6 | Extract the following response final answer, only alphabet from a-z only. 7 | DO NOT OUTPUT ANYTHING ELSE OTHER THAN THE FINAL ANSWER! 8 | Remove any block elements like or anything that wasn't the actual lower letter answer 9 | Response: 10 | format_instruct: 11 | text: | 12 | Provide your output in the following text format: 13 | Answer: . The final answer is 14 | json: | 15 | Provide your output in the following valid JSON format: 16 | ```json 17 | { 18 | "reason": ..., 19 | "answer": ... 20 | } 21 | ``` 22 | yaml: | 23 | Provide your output in the following valid YAML format: 24 | ```yaml 25 | reasoning: | 26 | ... 27 | answer: ... 28 | ``` 29 | xml: | 30 | Provide your output in the following valid XML format: 31 | ```xml 32 | 33 | ... 34 | ... 35 | 36 | ``` 37 | fewshots: 38 | - question: 'Take the last letters of the words in "Elon Musk" and concatenate them.' 39 | response: 40 | reason: 'The last letter of "Elon" is "n". The last letter of "Musk" is "k". Concatenating them is "nk".' 41 | answer: "nk" 42 | - question: 'Take the last letters of the words in "Larry Page" and concatenate them.' 43 | response: 44 | reason: 'The last letter of "Larry" is "y". The last letter of "Page" is "e". Concatenating them is "ye".' 45 | answer: "ye" 46 | - question: 'Take the last letters of the words in "Sergey Brin" and concatenate them.' 47 | response: 48 | reason: 'The last letter of "Sergey" is "y". The last letter of "Brin" is "n". Concatenating them is "yn".' 49 | answer: "yn" 50 | - question: 'Take the last letters of the words in "Bill Gates" and concatenate them.' 51 | response: 52 | reason: 'The last letter of "Bill" is "l". The last letter of "Gates" is "s". Concatenating them is "ls".' 53 | answer: "ls" 54 | - question: 'Take the last letters of the words in "Jason Wei" and concatenate them.' 55 | response: 56 | reason: 'The last letter of "Jason" is "n". The last letter of "Wei" is "i". Concatenating them is "ni".' 57 | answer: "ni" 58 | - question: 'Take the last letters of the words in "François Chollet" and concatenate them.' 59 | response: 60 | reason: 'The last letter of "François" is "s". The last letter of "Chollet" is "t". Concatenating them is "st".' 61 | answer: "st" 62 | - question: 'Take the last letters of the words in "Yann LeCun" and concatenate them.' 63 | response: 64 | reason: 'The last letter of "Yann" is "n". The last letter of "LeCun" is "n". Concatenating them is "nn".' 65 | answer: "nn" 66 | - question: 'Take the last letters of the words in "Eliezer Yudkowsky" and concatenate them.' 67 | response: 68 | reason: 'The last letter of "Eliezer" is "r". The last letter of "Yudkowsky" is "y". Concatenating them is "ry".' 69 | answer: "ry" -------------------------------------------------------------------------------- /tasks/templates/lastletter-t1-f2-hybrid.yaml: -------------------------------------------------------------------------------- 1 | task_specification: | 2 | You are given a string of words and you need to take the last letter of each words and concate them. 3 | Read the last question carefully and think step by step before answering. 4 | parser_prompt: 5 | text: | 6 | Extract the following response final answer, only alphabet from a-z only. 7 | DO NOT OUTPUT ANYTHING ELSE OTHER THAN THE FINAL ANSWER! 8 | Remove any block elements like or anything that wasn't the actual lower letter answer 9 | Response: 10 | format_instruct: 11 | text: | 12 | Provide your output in the following text format: 13 | Step by step reasoning: ... 14 | Answer: The final answer is ... 15 | json: | 16 | Provide your output in the following valid JSON format: 17 | Step by step reasoning: ... 18 | ```json 19 | { 20 | "answer": ... 21 | } 22 | ``` 23 | yaml: | 24 | Provide your output in the following valid YAML format: 25 | Step by step reasoning: ... 26 | ```yaml 27 | answer: ... 28 | ``` 29 | xml: | 30 | Provide your output in the following valid XML format: 31 | Step by step reasoning: ... 32 | ```xml 33 | 34 | ... 35 | 36 | ``` 37 | fewshots: 38 | - question: 'Take the last letters of the words in "Elon Musk" and concatenate them.' 39 | response: 40 | reason: 'The last letter of "Elon" is "n". The last letter of "Musk" is "k". Concatenating them is "nk".' 41 | answer: "nk" 42 | - question: 'Take the last letters of the words in "Larry Page" and concatenate them.' 43 | response: 44 | reason: 'The last letter of "Larry" is "y". The last letter of "Page" is "e". Concatenating them is "ye".' 45 | answer: "ye" 46 | - question: 'Take the last letters of the words in "Sergey Brin" and concatenate them.' 47 | response: 48 | reason: 'The last letter of "Sergey" is "y". The last letter of "Brin" is "n". Concatenating them is "yn".' 49 | answer: "yn" 50 | - question: 'Take the last letters of the words in "Bill Gates" and concatenate them.' 51 | response: 52 | reason: 'The last letter of "Bill" is "l". The last letter of "Gates" is "s". Concatenating them is "ls".' 53 | answer: "ls" 54 | - question: 'Take the last letters of the words in "Jason Wei" and concatenate them.' 55 | response: 56 | reason: 'The last letter of "Jason" is "n". The last letter of "Wei" is "i". Concatenating them is "ni".' 57 | answer: "ni" 58 | - question: 'Take the last letters of the words in "François Chollet" and concatenate them.' 59 | response: 60 | reason: 'The last letter of "François" is "s". The last letter of "Chollet" is "t". Concatenating them is "st".' 61 | answer: "st" 62 | - question: 'Take the last letters of the words in "Yann LeCun" and concatenate them.' 63 | response: 64 | reason: 'The last letter of "Yann" is "n". The last letter of "LeCun" is "n". Concatenating them is "nn".' 65 | answer: "nn" 66 | - question: 'Take the last letters of the words in "Eliezer Yudkowsky" and concatenate them.' 67 | response: 68 | reason: 'The last letter of "Eliezer" is "r". The last letter of "Yudkowsky" is "y". Concatenating them is "ry".' 69 | answer: "ry" -------------------------------------------------------------------------------- /tasks/templates/lastletter-t1-f3-hybrid.yaml: -------------------------------------------------------------------------------- 1 | task_specification: | 2 | You are given a string of words and you need to take the last letter of each words and concate them. 3 | Read the last question carefully and think step by step before answering. 4 | parser_prompt: 5 | text: | 6 | Extract the following response final answer, only alphabet from a-z only. 7 | DO NOT OUTPUT ANYTHING ELSE OTHER THAN THE FINAL ANSWER! 8 | Remove any block elements like or anything that wasn't the actual lower letter answer 9 | Response: 10 | format_instruct: 11 | text: | 12 | Provide your output in the following text format: 13 | Answer: . The final answer is 14 | json: | 15 | Provide your output in the following valid JSON format: 16 | Reason : 17 | ```json 18 | { 19 | "answer": 20 | } 21 | ``` 22 | yaml: | 23 | Provide your output in the following valid YAML format: 24 | Reason : 25 | ```yaml 26 | answer: 27 | ``` 28 | xml: | 29 | Provide your output in the following valid XML format: 30 | Reason : 31 | ```xml 32 | 33 | [answer] 34 | 35 | ``` 36 | fewshots: 37 | - question: 'Take the last letters of the words in "Elon Musk" and concatenate them.' 38 | response: 39 | reason: 'The last letter of "Elon" is "n". The last letter of "Musk" is "k". Concatenating them is "nk".' 40 | answer: "nk" 41 | - question: 'Take the last letters of the words in "Larry Page" and concatenate them.' 42 | response: 43 | reason: 'The last letter of "Larry" is "y". The last letter of "Page" is "e". Concatenating them is "ye".' 44 | answer: "ye" 45 | - question: 'Take the last letters of the words in "Sergey Brin" and concatenate them.' 46 | response: 47 | reason: 'The last letter of "Sergey" is "y". The last letter of "Brin" is "n". Concatenating them is "yn".' 48 | answer: "yn" 49 | - question: 'Take the last letters of the words in "Bill Gates" and concatenate them.' 50 | response: 51 | reason: 'The last letter of "Bill" is "l". The last letter of "Gates" is "s". Concatenating them is "ls".' 52 | answer: "ls" 53 | - question: 'Take the last letters of the words in "Jason Wei" and concatenate them.' 54 | response: 55 | reason: 'The last letter of "Jason" is "n". The last letter of "Wei" is "i". Concatenating them is "ni".' 56 | answer: "ni" 57 | - question: 'Take the last letters of the words in "François Chollet" and concatenate them.' 58 | response: 59 | reason: 'The last letter of "François" is "s". The last letter of "Chollet" is "t". Concatenating them is "st".' 60 | answer: "st" 61 | - question: 'Take the last letters of the words in "Yann LeCun" and concatenate them.' 62 | response: 63 | reason: 'The last letter of "Yann" is "n". The last letter of "LeCun" is "n". Concatenating them is "nn".' 64 | answer: "nn" 65 | - question: 'Take the last letters of the words in "Eliezer Yudkowsky" and concatenate them.' 66 | response: 67 | reason: 'The last letter of "Eliezer" is "r". The last letter of "Yudkowsky" is "y". Concatenating them is "ry".' 68 | answer: "ry" -------------------------------------------------------------------------------- /tasks/templates/lastletter-t1-structure.yaml: -------------------------------------------------------------------------------- 1 | task_specification: | 2 | You are given a string of words and you need to take the last letter of each words and concate them. 3 | Read the last question carefully and think step by step before answering. 4 | parser_prompt: 5 | text: | 6 | Extract the following response final answer, only alphabet from a-z only. 7 | DO NOT OUTPUT ANYTHING ELSE OTHER THAN THE FINAL ANSWER! 8 | Remove any block elements like or anything that wasn't the actual lower letter answer 9 | Response: 10 | format_instruct: 11 | json: | 12 | You must use the tool 13 | fewshots: 14 | - question: 'Take the last letters of the words in "Elon Musk" and concatenate them.' 15 | response: 16 | reason: 'The last letter of "Elon" is "n". The last letter of "Musk" is "k". Concatenating them is "nk".' 17 | answer: "nk" 18 | - question: 'Take the last letters of the words in "Larry Page" and concatenate them.' 19 | response: 20 | reason: 'The last letter of "Larry" is "y". The last letter of "Page" is "e". Concatenating them is "ye".' 21 | answer: "ye" 22 | - question: 'Take the last letters of the words in "Sergey Brin" and concatenate them.' 23 | response: 24 | reason: 'The last letter of "Sergey" is "y". The last letter of "Brin" is "n". Concatenating them is "yn".' 25 | answer: "yn" 26 | - question: 'Take the last letters of the words in "Bill Gates" and concatenate them.' 27 | response: 28 | reason: 'The last letter of "Bill" is "l". The last letter of "Gates" is "s". Concatenating them is "ls".' 29 | answer: "ls" 30 | - question: 'Take the last letters of the words in "Jason Wei" and concatenate them.' 31 | response: 32 | reason: 'The last letter of "Jason" is "n". The last letter of "Wei" is "i". Concatenating them is "ni".' 33 | answer: "ni" 34 | - question: 'Take the last letters of the words in "François Chollet" and concatenate them.' 35 | response: 36 | reason: 'The last letter of "François" is "s". The last letter of "Chollet" is "t". Concatenating them is "st".' 37 | answer: "st" 38 | - question: 'Take the last letters of the words in "Yann LeCun" and concatenate them.' 39 | response: 40 | reason: 'The last letter of "Yann" is "n". The last letter of "LeCun" is "n". Concatenating them is "nn".' 41 | answer: "nn" 42 | - question: 'Take the last letters of the words in "Eliezer Yudkowsky" and concatenate them.' 43 | response: 44 | reason: 'The last letter of "Eliezer" is "r". The last letter of "Yudkowsky" is "y". Concatenating them is "ry".' 45 | answer: "ry" -------------------------------------------------------------------------------- /tasks/templates/lastletter-t2-f1-hybrid.yaml: -------------------------------------------------------------------------------- 1 | task_specification: | 2 | Read carefully for each of the last question and think step by step before answering. You are given a string of words and you need to take the last letter of each words and concate them 3 | parser_prompt: 4 | text: | 5 | Extract the following response final answer, only alphabet from a-z only. 6 | DO NOT OUTPUT ANYTHING ELSE OTHER THAN THE FINAL ANSWER! 7 | Remove any block elements like or anything that wasn't the actual lower letter answer 8 | Response: 9 | format_instruct: 10 | json: | 11 | Provide your output in the following valid text and JSON format pair: 12 | Reason : ... 13 | ```json 14 | { 15 | "answer": ... 16 | } 17 | ``` 18 | yaml: | 19 | Provide your output in the following valid text and YAML format: 20 | Reason: ... 21 | ```yaml 22 | answer: ... 23 | ``` 24 | xml: | 25 | Provide your output in the following valid text and XML format: 26 | Reason: ... 27 | ```xml 28 | 29 | ... 30 | 31 | ``` 32 | fewshots: 33 | - question: 'Take the last letters of the words in "Elon Musk" and concatenate them.' 34 | response: 35 | reason: 'The last letter of "Elon" is "n". The last letter of "Musk" is "k". Concatenating them is "nk".' 36 | answer: "nk" 37 | - question: 'Take the last letters of the words in "Larry Page" and concatenate them.' 38 | response: 39 | reason: 'The last letter of "Larry" is "y". The last letter of "Page" is "e". Concatenating them is "ye".' 40 | answer: "ye" 41 | - question: 'Take the last letters of the words in "Sergey Brin" and concatenate them.' 42 | response: 43 | reason: 'The last letter of "Sergey" is "y". The last letter of "Brin" is "n". Concatenating them is "yn".' 44 | answer: "yn" 45 | - question: 'Take the last letters of the words in "Bill Gates" and concatenate them.' 46 | response: 47 | reason: 'The last letter of "Bill" is "l". The last letter of "Gates" is "s". Concatenating them is "ls".' 48 | answer: "ls" 49 | - question: 'Take the last letters of the words in "Jason Wei" and concatenate them.' 50 | response: 51 | reason: 'The last letter of "Jason" is "n". The last letter of "Wei" is "i". Concatenating them is "ni".' 52 | answer: "ni" 53 | - question: 'Take the last letters of the words in "François Chollet" and concatenate them.' 54 | response: 55 | reason: 'The last letter of "François" is "s". The last letter of "Chollet" is "t". Concatenating them is "st".' 56 | answer: "st" 57 | - question: 'Take the last letters of the words in "Yann LeCun" and concatenate them.' 58 | response: 59 | reason: 'The last letter of "Yann" is "n". The last letter of "LeCun" is "n". Concatenating them is "nn".' 60 | answer: "nn" 61 | - question: 'Take the last letters of the words in "Eliezer Yudkowsky" and concatenate them.' 62 | response: 63 | reason: 'The last letter of "Eliezer" is "r". The last letter of "Yudkowsky" is "y". Concatenating them is "ry".' 64 | answer: "ry" -------------------------------------------------------------------------------- /tasks/templates/lastletter-t2-f1.yaml: -------------------------------------------------------------------------------- 1 | task_specification: | 2 | Read carefully for each of the last question and think step by step before answering. You are given a string of words and you need to take the last letter of each words and concate them 3 | parser_prompt: 4 | text: | 5 | Extract the following response final answer, only alphabet from a-z only. 6 | DO NOT OUTPUT ANYTHING ELSE OTHER THAN THE FINAL ANSWER! 7 | Remove any block elements like or anything that wasn't the actual lower letter answer 8 | Response: 9 | format_instruct: 10 | text: | 11 | Provide your output in the following text format: 12 | Answer: . The final answer is 13 | json: | 14 | Provide your output in the following valid JSON format: 15 | ```json 16 | { 17 | "reason": ..., 18 | "answer": ... 19 | } 20 | ``` 21 | yaml: | 22 | Provide your output in the following valid YAML format: 23 | ```yaml 24 | reasoning: | 25 | ... 26 | answer: ... 27 | ``` 28 | xml: | 29 | Provide your output in the following valid XML format: 30 | ```xml 31 | 32 | ... 33 | ... 34 | 35 | ``` 36 | fewshots: 37 | - question: 'Take the last letters of the words in "Elon Musk" and concatenate them.' 38 | response: 39 | reason: 'The last letter of "Elon" is "n". The last letter of "Musk" is "k". Concatenating them is "nk".' 40 | answer: "nk" 41 | - question: 'Take the last letters of the words in "Larry Page" and concatenate them.' 42 | response: 43 | reason: 'The last letter of "Larry" is "y". The last letter of "Page" is "e". Concatenating them is "ye".' 44 | answer: "ye" 45 | - question: 'Take the last letters of the words in "Sergey Brin" and concatenate them.' 46 | response: 47 | reason: 'The last letter of "Sergey" is "y". The last letter of "Brin" is "n". Concatenating them is "yn".' 48 | answer: "yn" 49 | - question: 'Take the last letters of the words in "Bill Gates" and concatenate them.' 50 | response: 51 | reason: 'The last letter of "Bill" is "l". The last letter of "Gates" is "s". Concatenating them is "ls".' 52 | answer: "ls" 53 | - question: 'Take the last letters of the words in "Jason Wei" and concatenate them.' 54 | response: 55 | reason: 'The last letter of "Jason" is "n". The last letter of "Wei" is "i". Concatenating them is "ni".' 56 | answer: "ni" 57 | - question: 'Take the last letters of the words in "François Chollet" and concatenate them.' 58 | response: 59 | reason: 'The last letter of "François" is "s". The last letter of "Chollet" is "t". Concatenating them is "st".' 60 | answer: "st" 61 | - question: 'Take the last letters of the words in "Yann LeCun" and concatenate them.' 62 | response: 63 | reason: 'The last letter of "Yann" is "n". The last letter of "LeCun" is "n". Concatenating them is "nn".' 64 | answer: "nn" 65 | - question: 'Take the last letters of the words in "Eliezer Yudkowsky" and concatenate them.' 66 | response: 67 | reason: 'The last letter of "Eliezer" is "r". The last letter of "Yudkowsky" is "y". Concatenating them is "ry".' 68 | answer: "ry" -------------------------------------------------------------------------------- /tasks/templates/lastletter-t2-f2-hybrid.yaml: -------------------------------------------------------------------------------- 1 | task_specification: | 2 | Read carefully for each of the last question and think step by step before answering. You are given a string of words and you need to take the last letter of each words and concate them 3 | parser_prompt: 4 | text: | 5 | Extract the following response final answer, only alphabet from a-z only. 6 | DO NOT OUTPUT ANYTHING ELSE OTHER THAN THE FINAL ANSWER! 7 | Remove any block elements like or anything that wasn't the actual lower letter answer 8 | Response: 9 | format_instruct: 10 | text: | 11 | Provide your output in the following text format: 12 | Step by step reasoning: ... 13 | Answer: The final answer is ... 14 | json: | 15 | Provide your output in the following valid JSON format: 16 | Step by step reasoning: ... 17 | ```json 18 | { 19 | "answer": ... 20 | } 21 | ``` 22 | yaml: | 23 | Provide your output in the following valid YAML format: 24 | Step by step reasoning: ... 25 | ```yaml 26 | answer: ... 27 | ``` 28 | xml: | 29 | Provide your output in the following valid XML format: 30 | Step by step reasoning: ... 31 | ```xml 32 | 33 | ... 34 | 35 | ``` 36 | fewshots: 37 | - question: 'Take the last letters of the words in "Elon Musk" and concatenate them.' 38 | response: 39 | reason: 'The last letter of "Elon" is "n". The last letter of "Musk" is "k". Concatenating them is "nk".' 40 | answer: "nk" 41 | - question: 'Take the last letters of the words in "Larry Page" and concatenate them.' 42 | response: 43 | reason: 'The last letter of "Larry" is "y". The last letter of "Page" is "e". Concatenating them is "ye".' 44 | answer: "ye" 45 | - question: 'Take the last letters of the words in "Sergey Brin" and concatenate them.' 46 | response: 47 | reason: 'The last letter of "Sergey" is "y". The last letter of "Brin" is "n". Concatenating them is "yn".' 48 | answer: "yn" 49 | - question: 'Take the last letters of the words in "Bill Gates" and concatenate them.' 50 | response: 51 | reason: 'The last letter of "Bill" is "l". The last letter of "Gates" is "s". Concatenating them is "ls".' 52 | answer: "ls" 53 | - question: 'Take the last letters of the words in "Jason Wei" and concatenate them.' 54 | response: 55 | reason: 'The last letter of "Jason" is "n". The last letter of "Wei" is "i". Concatenating them is "ni".' 56 | answer: "ni" 57 | - question: 'Take the last letters of the words in "François Chollet" and concatenate them.' 58 | response: 59 | reason: 'The last letter of "François" is "s". The last letter of "Chollet" is "t". Concatenating them is "st".' 60 | answer: "st" 61 | - question: 'Take the last letters of the words in "Yann LeCun" and concatenate them.' 62 | response: 63 | reason: 'The last letter of "Yann" is "n". The last letter of "LeCun" is "n". Concatenating them is "nn".' 64 | answer: "nn" 65 | - question: 'Take the last letters of the words in "Eliezer Yudkowsky" and concatenate them.' 66 | response: 67 | reason: 'The last letter of "Eliezer" is "r". The last letter of "Yudkowsky" is "y". Concatenating them is "ry".' 68 | answer: "ry" -------------------------------------------------------------------------------- /tasks/templates/lastletter-t2-f3-hybrid.yaml: -------------------------------------------------------------------------------- 1 | task_specification: | 2 | Read carefully for each of the last question and think step by step before answering. You are given a string of words and you need to take the last letter of each words and concate them 3 | parser_prompt: 4 | text: | 5 | Extract the following response final answer, only alphabet from a-z only. 6 | DO NOT OUTPUT ANYTHING ELSE OTHER THAN THE FINAL ANSWER! 7 | Remove any block elements like or anything that wasn't the actual lower letter answer 8 | Response: 9 | format_instruct: 10 | text: | 11 | Provide your output in the following text format: 12 | Answer: . The final answer is 13 | json: | 14 | Provide your output in the following valid JSON format: 15 | Reason : 16 | ```json 17 | { 18 | "answer": 19 | } 20 | ``` 21 | yaml: | 22 | Provide your output in the following valid YAML format: 23 | Reason : 24 | ```yaml 25 | answer: 26 | ``` 27 | xml: | 28 | Provide your output in the following valid XML format: 29 | Reason : 30 | ```xml 31 | 32 | [answer] 33 | 34 | ``` 35 | fewshots: 36 | - question: 'Take the last letters of the words in "Elon Musk" and concatenate them.' 37 | response: 38 | reason: 'The last letter of "Elon" is "n". The last letter of "Musk" is "k". Concatenating them is "nk".' 39 | answer: "nk" 40 | - question: 'Take the last letters of the words in "Larry Page" and concatenate them.' 41 | response: 42 | reason: 'The last letter of "Larry" is "y". The last letter of "Page" is "e". Concatenating them is "ye".' 43 | answer: "ye" 44 | - question: 'Take the last letters of the words in "Sergey Brin" and concatenate them.' 45 | response: 46 | reason: 'The last letter of "Sergey" is "y". The last letter of "Brin" is "n". Concatenating them is "yn".' 47 | answer: "yn" 48 | - question: 'Take the last letters of the words in "Bill Gates" and concatenate them.' 49 | response: 50 | reason: 'The last letter of "Bill" is "l". The last letter of "Gates" is "s". Concatenating them is "ls".' 51 | answer: "ls" 52 | - question: 'Take the last letters of the words in "Jason Wei" and concatenate them.' 53 | response: 54 | reason: 'The last letter of "Jason" is "n". The last letter of "Wei" is "i". Concatenating them is "ni".' 55 | answer: "ni" 56 | - question: 'Take the last letters of the words in "François Chollet" and concatenate them.' 57 | response: 58 | reason: 'The last letter of "François" is "s". The last letter of "Chollet" is "t". Concatenating them is "st".' 59 | answer: "st" 60 | - question: 'Take the last letters of the words in "Yann LeCun" and concatenate them.' 61 | response: 62 | reason: 'The last letter of "Yann" is "n". The last letter of "LeCun" is "n". Concatenating them is "nn".' 63 | answer: "nn" 64 | - question: 'Take the last letters of the words in "Eliezer Yudkowsky" and concatenate them.' 65 | response: 66 | reason: 'The last letter of "Eliezer" is "r". The last letter of "Yudkowsky" is "y". Concatenating them is "ry".' 67 | answer: "ry" -------------------------------------------------------------------------------- /tasks/templates/lastletter-t2-structure.yaml: -------------------------------------------------------------------------------- 1 | task_specification: | 2 | Read carefully for each of the last question and think step by step before answering. You are given a string of words and you need to take the last letter of each words and concate them 3 | parser_prompt: 4 | text: | 5 | Extract the following response final answer, only alphabet from a-z only. 6 | DO NOT OUTPUT ANYTHING ELSE OTHER THAN THE FINAL ANSWER! 7 | Remove any block elements like or anything that wasn't the actual lower letter answer 8 | Response: 9 | format_instruct: 10 | json: | 11 | You must use the tool 12 | fewshots: 13 | - question: 'Take the last letters of the words in "Elon Musk" and concatenate them.' 14 | response: 15 | reason: 'The last letter of "Elon" is "n". The last letter of "Musk" is "k". Concatenating them is "nk".' 16 | answer: "nk" 17 | - question: 'Take the last letters of the words in "Larry Page" and concatenate them.' 18 | response: 19 | reason: 'The last letter of "Larry" is "y". The last letter of "Page" is "e". Concatenating them is "ye".' 20 | answer: "ye" 21 | - question: 'Take the last letters of the words in "Sergey Brin" and concatenate them.' 22 | response: 23 | reason: 'The last letter of "Sergey" is "y". The last letter of "Brin" is "n". Concatenating them is "yn".' 24 | answer: "yn" 25 | - question: 'Take the last letters of the words in "Bill Gates" and concatenate them.' 26 | response: 27 | reason: 'The last letter of "Bill" is "l". The last letter of "Gates" is "s". Concatenating them is "ls".' 28 | answer: "ls" 29 | - question: 'Take the last letters of the words in "Jason Wei" and concatenate them.' 30 | response: 31 | reason: 'The last letter of "Jason" is "n". The last letter of "Wei" is "i". Concatenating them is "ni".' 32 | answer: "ni" 33 | - question: 'Take the last letters of the words in "François Chollet" and concatenate them.' 34 | response: 35 | reason: 'The last letter of "François" is "s". The last letter of "Chollet" is "t". Concatenating them is "st".' 36 | answer: "st" 37 | - question: 'Take the last letters of the words in "Yann LeCun" and concatenate them.' 38 | response: 39 | reason: 'The last letter of "Yann" is "n". The last letter of "LeCun" is "n". Concatenating them is "nn".' 40 | answer: "nn" 41 | - question: 'Take the last letters of the words in "Eliezer Yudkowsky" and concatenate them.' 42 | response: 43 | reason: 'The last letter of "Eliezer" is "r". The last letter of "Yudkowsky" is "y". Concatenating them is "ry".' 44 | answer: "ry" -------------------------------------------------------------------------------- /tasks/templates/lastletter-t3-f1-hybrid.yaml: -------------------------------------------------------------------------------- 1 | task_specification: | 2 | String manipulation task: 3 | • Given: A sequence of words 4 | • Required: A new string made from the last letter of each word 5 | • Process: Think step by step to solve this challenge 6 | Note: Ensure you've read the question thoroughly before beginning. 7 | parser_prompt: 8 | text: | 9 | Extract the following response final answer, only alphabet from a-z only. 10 | DO NOT OUTPUT ANYTHING ELSE OTHER THAN THE FINAL ANSWER! 11 | Remove any block elements like or anything that wasn't the actual lower letter answer 12 | Response: 13 | format_instruct: 14 | json: | 15 | Provide your output in the following valid text and JSON format pair: 16 | Reason : ... 17 | ```json 18 | { 19 | "answer": ... 20 | } 21 | ``` 22 | yaml: | 23 | Provide your output in the following valid text and YAML format: 24 | Reason: ... 25 | ```yaml 26 | answer: ... 27 | ``` 28 | xml: | 29 | Provide your output in the following valid text and XML format: 30 | Reason: ... 31 | ```xml 32 | 33 | ... 34 | 35 | ``` 36 | fewshots: 37 | - question: 'Take the last letters of the words in "Elon Musk" and concatenate them.' 38 | response: 39 | reason: 'The last letter of "Elon" is "n". The last letter of "Musk" is "k". Concatenating them is "nk".' 40 | answer: "nk" 41 | - question: 'Take the last letters of the words in "Larry Page" and concatenate them.' 42 | response: 43 | reason: 'The last letter of "Larry" is "y". The last letter of "Page" is "e". Concatenating them is "ye".' 44 | answer: "ye" 45 | - question: 'Take the last letters of the words in "Sergey Brin" and concatenate them.' 46 | response: 47 | reason: 'The last letter of "Sergey" is "y". The last letter of "Brin" is "n". Concatenating them is "yn".' 48 | answer: "yn" 49 | - question: 'Take the last letters of the words in "Bill Gates" and concatenate them.' 50 | response: 51 | reason: 'The last letter of "Bill" is "l". The last letter of "Gates" is "s". Concatenating them is "ls".' 52 | answer: "ls" 53 | - question: 'Take the last letters of the words in "Jason Wei" and concatenate them.' 54 | response: 55 | reason: 'The last letter of "Jason" is "n". The last letter of "Wei" is "i". Concatenating them is "ni".' 56 | answer: "ni" 57 | - question: 'Take the last letters of the words in "François Chollet" and concatenate them.' 58 | response: 59 | reason: 'The last letter of "François" is "s". The last letter of "Chollet" is "t". Concatenating them is "st".' 60 | answer: "st" 61 | - question: 'Take the last letters of the words in "Yann LeCun" and concatenate them.' 62 | response: 63 | reason: 'The last letter of "Yann" is "n". The last letter of "LeCun" is "n". Concatenating them is "nn".' 64 | answer: "nn" 65 | - question: 'Take the last letters of the words in "Eliezer Yudkowsky" and concatenate them.' 66 | response: 67 | reason: 'The last letter of "Eliezer" is "r". The last letter of "Yudkowsky" is "y". Concatenating them is "ry".' 68 | answer: "ry" -------------------------------------------------------------------------------- /tasks/templates/lastletter-t3-structure.yaml: -------------------------------------------------------------------------------- 1 | task_specification: | 2 | String manipulation task: 3 | • Given: A sequence of words 4 | • Required: A new string made from the last letter of each word 5 | • Process: Think step by step to solve this challenge 6 | Note: Ensure you've read the question thoroughly before beginning. 7 | parser_prompt: 8 | text: | 9 | Extract the following response final answer, only alphabet from a-z only. 10 | DO NOT OUTPUT ANYTHING ELSE OTHER THAN THE FINAL ANSWER! 11 | Remove any block elements like or anything that wasn't the actual lower letter answer 12 | Response: 13 | format_instruct: 14 | json: | 15 | You must use the tool 16 | fewshots: 17 | - question: 'Take the last letters of the words in "Elon Musk" and concatenate them.' 18 | response: 19 | reason: 'The last letter of "Elon" is "n". The last letter of "Musk" is "k". Concatenating them is "nk".' 20 | answer: "nk" 21 | - question: 'Take the last letters of the words in "Larry Page" and concatenate them.' 22 | response: 23 | reason: 'The last letter of "Larry" is "y". The last letter of "Page" is "e". Concatenating them is "ye".' 24 | answer: "ye" 25 | - question: 'Take the last letters of the words in "Sergey Brin" and concatenate them.' 26 | response: 27 | reason: 'The last letter of "Sergey" is "y". The last letter of "Brin" is "n". Concatenating them is "yn".' 28 | answer: "yn" 29 | - question: 'Take the last letters of the words in "Bill Gates" and concatenate them.' 30 | response: 31 | reason: 'The last letter of "Bill" is "l". The last letter of "Gates" is "s". Concatenating them is "ls".' 32 | answer: "ls" 33 | - question: 'Take the last letters of the words in "Jason Wei" and concatenate them.' 34 | response: 35 | reason: 'The last letter of "Jason" is "n". The last letter of "Wei" is "i". Concatenating them is "ni".' 36 | answer: "ni" 37 | - question: 'Take the last letters of the words in "François Chollet" and concatenate them.' 38 | response: 39 | reason: 'The last letter of "François" is "s". The last letter of "Chollet" is "t". Concatenating them is "st".' 40 | answer: "st" 41 | - question: 'Take the last letters of the words in "Yann LeCun" and concatenate them.' 42 | response: 43 | reason: 'The last letter of "Yann" is "n". The last letter of "LeCun" is "n". Concatenating them is "nn".' 44 | answer: "nn" 45 | - question: 'Take the last letters of the words in "Eliezer Yudkowsky" and concatenate them.' 46 | response: 47 | reason: 'The last letter of "Eliezer" is "r". The last letter of "Yudkowsky" is "y". Concatenating them is "ry".' 48 | answer: "ry" -------------------------------------------------------------------------------- /tasks/templates/lastletter-v2-7.yaml: -------------------------------------------------------------------------------- 1 | task_specification: | 2 | You are given a string of words and you need to take the last letter of each words and concate them 3 | Read the last question carefully and MUST think step by step before answering. 4 | parser_prompt: 5 | text: | 6 | Extract the following response final answer, only alphabet from a-z only. 7 | DO NOT OUTPUT ANYTHING ELSE OTHER THAN THE FINAL ANSWER! 8 | Remove any elements anything that wasn't the actual lower letter answer such as , :, ',' 9 | Response: 10 | format_instruct: 11 | text: | 12 | Provide your output in the following text format: 13 | Answer: [reasoning here]... The final answer is ... 14 | json: | 15 | Provide your output in the following valid JSON format: 16 | ```json 17 | { 18 | "reason": ..., 19 | "answer": ... 20 | } 21 | ``` 22 | yaml: | 23 | Provide your output in the following valid YAML format: 24 | ```yaml 25 | reasoning: | 26 | ... 27 | answer: ... 28 | ``` 29 | xml: | 30 | Provide your output in the following valid XML format: 31 | ```xml 32 | 33 | ... 34 | ... 35 | 36 | ``` 37 | fewshots: 38 | - question: 'Take the last letters of the words in "Elon Musk" and concatenate them.' 39 | response: 40 | reason: 'The last letter of "Elon" is "n". The last letter of "Musk" is "k". Concatenating them is "nk".' 41 | answer: "nk" 42 | - question: 'Take the last letters of the words in "Larry Page" and concatenate them.' 43 | response: 44 | reason: 'The last letter of "Larry" is "y". The last letter of "Page" is "e". Concatenating them is "ye".' 45 | answer: "ye" 46 | - question: 'Take the last letters of the words in "Sergey Brin" and concatenate them.' 47 | response: 48 | reason: 'The last letter of "Sergey" is "y". The last letter of "Brin" is "n". Concatenating them is "yn".' 49 | answer: "yn" 50 | - question: 'Take the last letters of the words in "Bill Gates" and concatenate them.' 51 | response: 52 | reason: 'The last letter of "Bill" is "l". The last letter of "Gates" is "s". Concatenating them is "ls".' 53 | answer: "ls" 54 | - question: 'Take the last letters of the words in "Jason Wei" and concatenate them.' 55 | response: 56 | reason: 'The last letter of "Jason" is "n". The last letter of "Wei" is "i". Concatenating them is "ni".' 57 | answer: "ni" 58 | - question: 'Take the last letters of the words in "François Chollet" and concatenate them.' 59 | response: 60 | reason: 'The last letter of "François" is "s". The last letter of "Chollet" is "t". Concatenating them is "st".' 61 | answer: "st" 62 | - question: 'Take the last letters of the words in "Yann LeCun" and concatenate them.' 63 | response: 64 | reason: 'The last letter of "Yann" is "n". The last letter of "LeCun" is "n". Concatenating them is "nn".' 65 | answer: "nn" 66 | - question: 'Take the last letters of the words in "Eliezer Yudkowsky" and concatenate them.' 67 | response: 68 | reason: 'The last letter of "Eliezer" is "r". The last letter of "Yudkowsky" is "y". Concatenating them is "ry".' 69 | answer: "ry" -------------------------------------------------------------------------------- /tasks/templates/lastletter.yaml: -------------------------------------------------------------------------------- 1 | task_specification: | 2 | You are given a string of words and you need to take the last letter of each words and concate them 3 | Read carefully for each of the last question and think step by step before answering. 4 | 5 | format_instruct: 6 | text: | 7 | Provide your output in the following text format: 8 | Answer: . The final answer is 9 | json: | 10 | Provide your output in the following valid JSON format: 11 | ```json 12 | { 13 | "reason": ..., 14 | "answer": ... 15 | } 16 | ``` 17 | yaml: | 18 | Provide your output in the following valid YAML format: 19 | ```yaml 20 | reasoning: ..., 21 | answer: ... 22 | ``` 23 | xml: | 24 | Provide your output in the following valid XML format: 25 | ```xml 26 | 27 | ... 28 | ... 29 | 30 | ``` 31 | fewshots: 32 | - question: 'Take the last letters of the words in "Elon Musk" and concatenate them.' 33 | response: 34 | reason: 'The last letter of "Elon" is "n". The last letter of "Musk" is "k". Concatenating them is "nk".' 35 | answer: "nk" 36 | - question: 'Take the last letters of the words in "Larry Page" and concatenate them.' 37 | response: 38 | reason: 'The last letter of "Larry" is "y". The last letter of "Page" is "e". Concatenating them is "ye".' 39 | answer: "ye" 40 | - question: 'Take the last letters of the words in "Sergey Brin" and concatenate them.' 41 | response: 42 | reason: 'The last letter of "Sergey" is "y". The last letter of "Brin" is "n". Concatenating them is "yn".' 43 | answer: "yn" 44 | - question: 'Take the last letters of the words in "Bill Gates" and concatenate them.' 45 | response: 46 | reason: 'The last letter of "Bill" is "l". The last letter of "Gates" is "s". Concatenating them is "ls".' 47 | answer: "ls" 48 | - question: 'Take the last letters of the words in "Jason Wei" and concatenate them.' 49 | response: 50 | reason: 'The last letter of "Jason" is "n". The last letter of "Wei" is "i". Concatenating them is "ni".' 51 | answer: "ni" 52 | - question: 'Take the last letters of the words in "François Chollet" and concatenate them.' 53 | response: 54 | reason: 'The last letter of "François" is "s". The last letter of "Chollet" is "t". Concatenating them is "st".' 55 | answer: "st" 56 | - question: 'Take the last letters of the words in "Yann LeCun" and concatenate them.' 57 | response: 58 | reason: 'The last letter of "Yann" is "n". The last letter of "LeCun" is "n". Concatenating them is "nn".' 59 | answer: "nn" 60 | - question: 'Take the last letters of the words in "Eliezer Yudkowsky" and concatenate them.' 61 | response: 62 | reason: 'The last letter of "Eliezer" is "r". The last letter of "Yudkowsky" is "y". Concatenating them is "ry".' 63 | answer: "ry" -------------------------------------------------------------------------------- /tasks/templates/multifin-t1-f1.yaml: -------------------------------------------------------------------------------- 1 | task_specification: | 2 | Act as a fiannce expert and assign the content based to the valid category 3 | All possible valid category for you to choose from are as follows (one category per line, in the format of ): 4 | - Finance 5 | - Technology 6 | - Tax and Accounting 7 | - Business and Management 8 | - Government and Controls 9 | - Industry 10 | Your answer MUST based on the above options, do not answer Insufficient information 11 | parser_prompt: 12 | text: | 13 | Extract the following RESPONSE final answer, your answer should be the one which match any of these valid category: 14 | - Finance 15 | - Technology 16 | - Tax and Accounting 17 | - Business and Management 18 | - Government and Controls 19 | - Industry 20 | DO not output anything than the above valid category, just output one that match the answer, remove bracket "< >" symbol if exist 21 | RESPONSE: 22 | format_instruct: 23 | text: | 24 | Derive the most likely category to answer key. 25 | Provide your output in the following valid text format: 26 | Answer: ... 27 | json: | 28 | Derive the most likely category to answer key. 29 | Provide your output in the following valid JSON format: 30 | ```json 31 | { 32 | "answer": "..." 33 | } 34 | ``` 35 | yaml: | 36 | Derive the most likely category to answer key. 37 | Provide your output in the following valid YAML format: 38 | ```yaml 39 | answer: ... 40 | ``` 41 | xml: | 42 | Derive the most likely category to answer block 43 | Provide your output in the following valid YAML format: 44 | ```xml 45 | 46 | ... 47 | 48 | ``` 49 | fewshots: 50 | - question: Recognized as a Leader in the Gartner Magic Quadrant for Data and Analytics Service Providers 2021. 51 | response: 52 | answer: Technology 53 | - question: Recent report highlights a sharp increase in corporate boards prioritizing sustainability in their strategic goals. 54 | response: 55 | answer: Business and Management 56 | - question: The Cybersecurity practice awarded Best Consulting Firm by SC Media Awards 2020. 57 | response: 58 | answer: Technology 59 | - question: Survey reveals a growing trend of companies investing in leadership development programs to drive innovation. 60 | response: 61 | answer: Business and Management 62 | - question: Tax team named as the Best Transfer Pricing Firm in Asia by International Tax Review 2022. 63 | response: 64 | answer: Tax and Accounting 65 | - question: A new policy mandates stricter controls on data privacy and security for public sector organizations. 66 | response: 67 | answer: Government and Controls 68 | - question: Honored with the Diversity Impact Award for its commitment to fostering an inclusive workplace culture. 69 | response: 70 | answer: Industry 71 | - question: Named Leading Firm in Forensic Accounting by the Financial Forensics Association. 72 | response: 73 | answer: Tax and Accounting -------------------------------------------------------------------------------- /tasks/templates/multifin-t1-f2.yaml: -------------------------------------------------------------------------------- 1 | task_specification: | 2 | Act as a fiannce expert and assign the content based to the valid category 3 | All possible valid category for you to choose from are as follows (one category per line, in the format of ): 4 | - Finance 5 | - Technology 6 | - Tax and Accounting 7 | - Business and Management 8 | - Government and Controls 9 | - Industry 10 | Your answer MUST based on the above options, do not answer Insufficient information 11 | parser_prompt: 12 | text: | 13 | Extract the following RESPONSE final answer, your answer should be the one which match any of these valid category: 14 | - Finance 15 | - Technology 16 | - Tax and Accounting 17 | - Business and Management 18 | - Government and Controls 19 | - Industry 20 | DO not output anything than the above valid category, just output one that match the answer, remove bracket "< >" symbol if exist 21 | RESPONSE: 22 | format_instruct: 23 | text: | 24 | Provide your answer in following text format 25 | Answer: ... 26 | json: | 27 | Provide your answer in following JSON format 28 | ```json 29 | { 30 | "answer": "..." 31 | } 32 | ``` 33 | yaml: | 34 | Provide your answer in following YAML format 35 | ```yaml 36 | answer: ... 37 | ``` 38 | xml: | 39 | Provide your answer in following XML format 40 | ```xml 41 | 42 | ... 43 | 44 | ``` 45 | fewshots: 46 | - question: Recognized as a Leader in the Gartner Magic Quadrant for Data and Analytics Service Providers 2021. 47 | response: 48 | answer: Technology 49 | - question: Recent report highlights a sharp increase in corporate boards prioritizing sustainability in their strategic goals. 50 | response: 51 | answer: Business and Management 52 | - question: The Cybersecurity practice awarded Best Consulting Firm by SC Media Awards 2020. 53 | response: 54 | answer: Technology 55 | - question: Survey reveals a growing trend of companies investing in leadership development programs to drive innovation. 56 | response: 57 | answer: Business and Management 58 | - question: Tax team named as the Best Transfer Pricing Firm in Asia by International Tax Review 2022. 59 | response: 60 | answer: Tax and Accounting 61 | - question: A new policy mandates stricter controls on data privacy and security for public sector organizations. 62 | response: 63 | answer: Government and Controls 64 | - question: Honored with the Diversity Impact Award for its commitment to fostering an inclusive workplace culture. 65 | response: 66 | answer: Industry 67 | - question: Named Leading Firm in Forensic Accounting by the Financial Forensics Association. 68 | response: 69 | answer: Tax and Accounting -------------------------------------------------------------------------------- /tasks/templates/multifin-t1-f3.yaml: -------------------------------------------------------------------------------- 1 | task_specification: | 2 | Act as a finance expert and assign the content based to the valid category 3 | All possible valid category for you to choose from are as follows (one category per line, in the format of ): 4 | * Finance 5 | * Technology 6 | * Tax and Accounting 7 | * Business and Management 8 | * Government and Controls 9 | * Industry 10 | Your answer MUST based on the above options, do not answer Insufficient information 11 | parser_prompt: 12 | text: | 13 | Extract the following RESPONSE final answer, your answer should be the one which match any of these valid category: 14 | - Finance 15 | - Technology 16 | - Tax and Accounting 17 | - Business and Management 18 | - Government and Controls 19 | - Industry 20 | DO not output anything than the above valid category, just output one that match the answer, remove bracket "< >" symbol if exist 21 | RESPONSE: 22 | format_instruct: 23 | text: | 24 | Derive the most likely category to answer key. 25 | Provide your output in the following valid text format: 26 | Answer: 27 | json: | 28 | Derive the most likely category to answer key. 29 | Provide your output in the following valid JSON format: 30 | ```json 31 | { 32 | "answer": "" 33 | } 34 | ``` 35 | yaml: | 36 | Derive the most likely category to answer key. 37 | Provide your output in the following valid YAML format: 38 | ```yaml 39 | answer: 40 | ``` 41 | xml: | 42 | Derive the most likely category to answer block 43 | Provide your output in the following valid YAML format: 44 | ```xml 45 | 46 | [valid category] 47 | 48 | ``` 49 | fewshots: 50 | - question: Recognized as a Leader in the Gartner Magic Quadrant for Data and Analytics Service Providers 2021. 51 | response: 52 | answer: Technology 53 | - question: Recent report highlights a sharp increase in corporate boards prioritizing sustainability in their strategic goals. 54 | response: 55 | answer: Business and Management 56 | - question: The Cybersecurity practice awarded Best Consulting Firm by SC Media Awards 2020. 57 | response: 58 | answer: Technology 59 | - question: Survey reveals a growing trend of companies investing in leadership development programs to drive innovation. 60 | response: 61 | answer: Business and Management 62 | - question: Tax team named as the Best Transfer Pricing Firm in Asia by International Tax Review 2022. 63 | response: 64 | answer: Tax and Accounting 65 | - question: A new policy mandates stricter controls on data privacy and security for public sector organizations. 66 | response: 67 | answer: Government and Controls 68 | - question: Honored with the Diversity Impact Award for its commitment to fostering an inclusive workplace culture. 69 | response: 70 | answer: Industry 71 | - question: Named Leading Firm in Forensic Accounting by the Financial Forensics Association. 72 | response: 73 | answer: Tax and Accounting -------------------------------------------------------------------------------- /tasks/templates/multifin-t1-structure.yaml: -------------------------------------------------------------------------------- 1 | task_specification: | 2 | Act as a finance expert and assign the content based to the valid category 3 | All possible valid category for you to choose from are as follows (one category per line, in the format of ): 4 | * Finance 5 | * Technology 6 | * Tax and Accounting 7 | * Business and Management 8 | * Government and Controls 9 | * Industry 10 | Your answer MUST based on the above options, do not answer Insufficient information 11 | parser_prompt: 12 | text: | 13 | Extract the following RESPONSE final answer, your answer should be the one which match any of these valid category: 14 | - Finance 15 | - Technology 16 | - Tax and Accounting 17 | - Business and Management 18 | - Government and Controls 19 | - Industry 20 | DO not output anything than the above valid category, just output one that match the answer, remove bracket "< >" symbol if exist 21 | RESPONSE: 22 | format_instruct: 23 | json: | 24 | You must use the tool 25 | fewshots: 26 | - question: Recognized as a Leader in the Gartner Magic Quadrant for Data and Analytics Service Providers 2021. 27 | response: 28 | answer: Technology 29 | - question: Recent report highlights a sharp increase in corporate boards prioritizing sustainability in their strategic goals. 30 | response: 31 | answer: Business and Management 32 | - question: The Cybersecurity practice awarded Best Consulting Firm by SC Media Awards 2020. 33 | response: 34 | answer: Technology 35 | - question: Survey reveals a growing trend of companies investing in leadership development programs to drive innovation. 36 | response: 37 | answer: Business and Management 38 | - question: Tax team named as the Best Transfer Pricing Firm in Asia by International Tax Review 2022. 39 | response: 40 | answer: Tax and Accounting 41 | - question: A new policy mandates stricter controls on data privacy and security for public sector organizations. 42 | response: 43 | answer: Government and Controls 44 | - question: Honored with the Diversity Impact Award for its commitment to fostering an inclusive workplace culture. 45 | response: 46 | answer: Industry 47 | - question: Named Leading Firm in Forensic Accounting by the Financial Forensics Association. 48 | response: 49 | answer: Tax and Accounting -------------------------------------------------------------------------------- /tasks/templates/multifin-t2-f1.yaml: -------------------------------------------------------------------------------- 1 | task_specification: | 2 | Act as a finance expert and assign the content based to the valid category 3 | Your answer MUST based on the above options, do not answer Insufficient information 4 | All possible valid category for you to choose from are as follows (one category per line, in the format of ): 5 | - Finance 6 | - Technology 7 | - Tax and Accounting 8 | - Business and Management 9 | - Government and Controls 10 | - Industry 11 | parser_prompt: 12 | text: | 13 | Extract the following RESPONSE final answer, your answer should be the one which match any of these valid category: 14 | - Finance 15 | - Technology 16 | - Tax and Accounting 17 | - Business and Management 18 | - Government and Controls 19 | - Industry 20 | DO not output anything than the above valid category, just output one that match the answer, remove bracket "< >" symbol if exist 21 | RESPONSE: 22 | format_instruct: 23 | text: | 24 | Derive the most likely category to answer key. 25 | Provide your output in the following valid text format: 26 | Answer: ... 27 | json: | 28 | Derive the most likely category to answer key. 29 | Provide your output in the following valid JSON format: 30 | ```json 31 | { 32 | "answer": "..." 33 | } 34 | ``` 35 | yaml: | 36 | Derive the most likely category to answer key. 37 | Provide your output in the following valid YAML format: 38 | ```yaml 39 | answer: ... 40 | ``` 41 | xml: | 42 | Derive the most likely category to answer block 43 | Provide your output in the following valid YAML format: 44 | ```xml 45 | 46 | ... 47 | 48 | ``` 49 | fewshots: 50 | - question: Recognized as a Leader in the Gartner Magic Quadrant for Data and Analytics Service Providers 2021. 51 | response: 52 | answer: Technology 53 | - question: Recent report highlights a sharp increase in corporate boards prioritizing sustainability in their strategic goals. 54 | response: 55 | answer: Business and Management 56 | - question: The Cybersecurity practice awarded Best Consulting Firm by SC Media Awards 2020. 57 | response: 58 | answer: Technology 59 | - question: Survey reveals a growing trend of companies investing in leadership development programs to drive innovation. 60 | response: 61 | answer: Business and Management 62 | - question: Tax team named as the Best Transfer Pricing Firm in Asia by International Tax Review 2022. 63 | response: 64 | answer: Tax and Accounting 65 | - question: A new policy mandates stricter controls on data privacy and security for public sector organizations. 66 | response: 67 | answer: Government and Controls 68 | - question: Honored with the Diversity Impact Award for its commitment to fostering an inclusive workplace culture. 69 | response: 70 | answer: Industry 71 | - question: Named Leading Firm in Forensic Accounting by the Financial Forensics Association. 72 | response: 73 | answer: Tax and Accounting -------------------------------------------------------------------------------- /tasks/templates/multifin-t2-f2.yaml: -------------------------------------------------------------------------------- 1 | task_specification: | 2 | Act as a finance expert and assign the content based to the valid category 3 | Your answer MUST based on the above options, do not answer Insufficient information 4 | All possible valid category for you to choose from are as follows (one category per line, in the format of ): 5 | - Finance 6 | - Technology 7 | - Tax and Accounting 8 | - Business and Management 9 | - Government and Controls 10 | - Industry 11 | parser_prompt: 12 | text: | 13 | Extract the following RESPONSE final answer, your answer should be the one which match any of these valid category: 14 | - Finance 15 | - Technology 16 | - Tax and Accounting 17 | - Business and Management 18 | - Government and Controls 19 | - Industry 20 | DO not output anything than the above valid category, just output one that match the answer, remove bracket "< >" symbol if exist 21 | RESPONSE: 22 | format_instruct: 23 | text: | 24 | Provide your answer in following text format 25 | Answer: ... 26 | json: | 27 | Provide your answer in following JSON format 28 | ```json 29 | { 30 | "answer": "..." 31 | } 32 | ``` 33 | yaml: | 34 | Provide your answer in following YAML format 35 | ```yaml 36 | answer: ... 37 | ``` 38 | xml: | 39 | Provide your answer in following XML format 40 | ```xml 41 | 42 | ... 43 | 44 | ``` 45 | fewshots: 46 | - question: Recognized as a Leader in the Gartner Magic Quadrant for Data and Analytics Service Providers 2021. 47 | response: 48 | answer: Technology 49 | - question: Recent report highlights a sharp increase in corporate boards prioritizing sustainability in their strategic goals. 50 | response: 51 | answer: Business and Management 52 | - question: The Cybersecurity practice awarded Best Consulting Firm by SC Media Awards 2020. 53 | response: 54 | answer: Technology 55 | - question: Survey reveals a growing trend of companies investing in leadership development programs to drive innovation. 56 | response: 57 | answer: Business and Management 58 | - question: Tax team named as the Best Transfer Pricing Firm in Asia by International Tax Review 2022. 59 | response: 60 | answer: Tax and Accounting 61 | - question: A new policy mandates stricter controls on data privacy and security for public sector organizations. 62 | response: 63 | answer: Government and Controls 64 | - question: Honored with the Diversity Impact Award for its commitment to fostering an inclusive workplace culture. 65 | response: 66 | answer: Industry 67 | - question: Named Leading Firm in Forensic Accounting by the Financial Forensics Association. 68 | response: 69 | answer: Tax and Accounting -------------------------------------------------------------------------------- /tasks/templates/multifin-t2-f3.yaml: -------------------------------------------------------------------------------- 1 | task_specification: | 2 | Act as a finance expert and assign the content based to the valid category 3 | Your answer MUST based on the above options, do not answer Insufficient information 4 | All possible valid category for you to choose from are as follows (one category per line, in the format of ): 5 | - Finance 6 | - Technology 7 | - Tax and Accounting 8 | - Business and Management 9 | - Government and Controls 10 | - Industry 11 | parser_prompt: 12 | text: | 13 | Extract the following RESPONSE final answer, your answer should be the one which match any of these valid category: 14 | - Finance 15 | - Technology 16 | - Tax and Accounting 17 | - Business and Management 18 | - Government and Controls 19 | - Industry 20 | DO not output anything than the above valid category, just output one that match the answer, remove bracket "< >" symbol if exist 21 | RESPONSE: 22 | format_instruct: 23 | text: | 24 | Derive the most likely category to answer key. 25 | Provide your output in the following valid text format: 26 | Answer: 27 | json: | 28 | Derive the most likely category to answer key. 29 | Provide your output in the following valid JSON format: 30 | ```json 31 | { 32 | "answer": "" 33 | } 34 | ``` 35 | yaml: | 36 | Derive the most likely category to answer key. 37 | Provide your output in the following valid YAML format: 38 | ```yaml 39 | answer: 40 | ``` 41 | xml: | 42 | Derive the most likely category to answer block 43 | Provide your output in the following valid YAML format: 44 | ```xml 45 | 46 | [valid category] 47 | 48 | ``` 49 | fewshots: 50 | - question: Recognized as a Leader in the Gartner Magic Quadrant for Data and Analytics Service Providers 2021. 51 | response: 52 | answer: Technology 53 | - question: Recent report highlights a sharp increase in corporate boards prioritizing sustainability in their strategic goals. 54 | response: 55 | answer: Business and Management 56 | - question: The Cybersecurity practice awarded Best Consulting Firm by SC Media Awards 2020. 57 | response: 58 | answer: Technology 59 | - question: Survey reveals a growing trend of companies investing in leadership development programs to drive innovation. 60 | response: 61 | answer: Business and Management 62 | - question: Tax team named as the Best Transfer Pricing Firm in Asia by International Tax Review 2022. 63 | response: 64 | answer: Tax and Accounting 65 | - question: A new policy mandates stricter controls on data privacy and security for public sector organizations. 66 | response: 67 | answer: Government and Controls 68 | - question: Honored with the Diversity Impact Award for its commitment to fostering an inclusive workplace culture. 69 | response: 70 | answer: Industry 71 | - question: Named Leading Firm in Forensic Accounting by the Financial Forensics Association. 72 | response: 73 | answer: Tax and Accounting -------------------------------------------------------------------------------- /tasks/templates/multifin-t2-structure.yaml: -------------------------------------------------------------------------------- 1 | task_specification: | 2 | Act as a finance expert and assign the content based to the valid category 3 | Your answer MUST based on the above options, do not answer Insufficient information 4 | All possible valid category for you to choose from are as follows (one category per line, in the format of ): 5 | - Finance 6 | - Technology 7 | - Tax and Accounting 8 | - Business and Management 9 | - Government and Controls 10 | - Industry 11 | parser_prompt: 12 | text: | 13 | Extract the following RESPONSE final answer, your answer should be the one which match any of these valid category: 14 | - Finance 15 | - Technology 16 | - Tax and Accounting 17 | - Business and Management 18 | - Government and Controls 19 | - Industry 20 | DO not output anything than the above valid category, just output one that match the answer, remove bracket "< >" symbol if exist 21 | RESPONSE: 22 | format_instruct: 23 | json: | 24 | You must use the tool 25 | fewshots: 26 | - question: Recognized as a Leader in the Gartner Magic Quadrant for Data and Analytics Service Providers 2021. 27 | response: 28 | answer: Technology 29 | - question: Recent report highlights a sharp increase in corporate boards prioritizing sustainability in their strategic goals. 30 | response: 31 | answer: Business and Management 32 | - question: The Cybersecurity practice awarded Best Consulting Firm by SC Media Awards 2020. 33 | response: 34 | answer: Technology 35 | - question: Survey reveals a growing trend of companies investing in leadership development programs to drive innovation. 36 | response: 37 | answer: Business and Management 38 | - question: Tax team named as the Best Transfer Pricing Firm in Asia by International Tax Review 2022. 39 | response: 40 | answer: Tax and Accounting 41 | - question: A new policy mandates stricter controls on data privacy and security for public sector organizations. 42 | response: 43 | answer: Government and Controls 44 | - question: Honored with the Diversity Impact Award for its commitment to fostering an inclusive workplace culture. 45 | response: 46 | answer: Industry 47 | - question: Named Leading Firm in Forensic Accounting by the Financial Forensics Association. 48 | response: 49 | answer: Tax and Accounting -------------------------------------------------------------------------------- /tasks/templates/multifin-t3-f1.yaml: -------------------------------------------------------------------------------- 1 | task_specification: | 2 | Act as a finance expert and assign the content based to the valid category 3 | All possible valid category for you to choose from are as follows (one category per line, in the format of ): 4 | * Finance 5 | * Technology 6 | * Tax and Accounting 7 | * Business and Management 8 | * Government and Controls 9 | * Industry 10 | Your answer MUST based on the above options, do not answer Insufficient information 11 | parser_prompt: 12 | text: | 13 | Extract the following RESPONSE final answer, your answer should be the one which match any of these valid category: 14 | - Finance 15 | - Technology 16 | - Tax and Accounting 17 | - Business and Management 18 | - Government and Controls 19 | - Industry 20 | DO not output anything than the above valid category, just output one that match the answer, remove bracket "< >" symbol if exist 21 | RESPONSE: 22 | format_instruct: 23 | text: | 24 | Derive the most likely category to answer key. 25 | Provide your output in the following valid text format: 26 | Answer: ... 27 | json: | 28 | Derive the most likely category to answer key. 29 | Provide your output in the following valid JSON format: 30 | ```json 31 | { 32 | "answer": "..." 33 | } 34 | ``` 35 | yaml: | 36 | Derive the most likely category to answer key. 37 | Provide your output in the following valid YAML format: 38 | ```yaml 39 | answer: ... 40 | ``` 41 | xml: | 42 | Derive the most likely category to answer block 43 | Provide your output in the following valid YAML format: 44 | ```xml 45 | 46 | ... 47 | 48 | ``` 49 | fewshots: 50 | - question: Recognized as a Leader in the Gartner Magic Quadrant for Data and Analytics Service Providers 2021. 51 | response: 52 | answer: Technology 53 | - question: Recent report highlights a sharp increase in corporate boards prioritizing sustainability in their strategic goals. 54 | response: 55 | answer: Business and Management 56 | - question: The Cybersecurity practice awarded Best Consulting Firm by SC Media Awards 2020. 57 | response: 58 | answer: Technology 59 | - question: Survey reveals a growing trend of companies investing in leadership development programs to drive innovation. 60 | response: 61 | answer: Business and Management 62 | - question: Tax team named as the Best Transfer Pricing Firm in Asia by International Tax Review 2022. 63 | response: 64 | answer: Tax and Accounting 65 | - question: A new policy mandates stricter controls on data privacy and security for public sector organizations. 66 | response: 67 | answer: Government and Controls 68 | - question: Honored with the Diversity Impact Award for its commitment to fostering an inclusive workplace culture. 69 | response: 70 | answer: Industry 71 | - question: Named Leading Firm in Forensic Accounting by the Financial Forensics Association. 72 | response: 73 | answer: Tax and Accounting -------------------------------------------------------------------------------- /tasks/templates/multifin-t3-f2.yaml: -------------------------------------------------------------------------------- 1 | task_specification: | 2 | Act as a finance expert and assign the content based to the valid category 3 | All possible valid category for you to choose from are as follows (one category per line, in the format of ): 4 | * Finance 5 | * Technology 6 | * Tax and Accounting 7 | * Business and Management 8 | * Government and Controls 9 | * Industry 10 | Your answer MUST based on the above options, do not answer Insufficient information 11 | parser_prompt: 12 | text: | 13 | Extract the following RESPONSE final answer, your answer should be the one which match any of these valid category: 14 | - Finance 15 | - Technology 16 | - Tax and Accounting 17 | - Business and Management 18 | - Government and Controls 19 | - Industry 20 | DO not output anything than the above valid category, just output one that match the answer, remove bracket "< >" symbol if exist 21 | RESPONSE: 22 | format_instruct: 23 | text: | 24 | Provide your answer in following text format 25 | Answer: ... 26 | json: | 27 | Provide your answer in following JSON format 28 | ```json 29 | { 30 | "answer": "..." 31 | } 32 | ``` 33 | yaml: | 34 | Provide your answer in following YAML format 35 | ```yaml 36 | answer: ... 37 | ``` 38 | xml: | 39 | Provide your answer in following XML format 40 | ```xml 41 | 42 | ... 43 | 44 | ``` 45 | fewshots: 46 | - question: Recognized as a Leader in the Gartner Magic Quadrant for Data and Analytics Service Providers 2021. 47 | response: 48 | answer: Technology 49 | - question: Recent report highlights a sharp increase in corporate boards prioritizing sustainability in their strategic goals. 50 | response: 51 | answer: Business and Management 52 | - question: The Cybersecurity practice awarded Best Consulting Firm by SC Media Awards 2020. 53 | response: 54 | answer: Technology 55 | - question: Survey reveals a growing trend of companies investing in leadership development programs to drive innovation. 56 | response: 57 | answer: Business and Management 58 | - question: Tax team named as the Best Transfer Pricing Firm in Asia by International Tax Review 2022. 59 | response: 60 | answer: Tax and Accounting 61 | - question: A new policy mandates stricter controls on data privacy and security for public sector organizations. 62 | response: 63 | answer: Government and Controls 64 | - question: Honored with the Diversity Impact Award for its commitment to fostering an inclusive workplace culture. 65 | response: 66 | answer: Industry 67 | - question: Named Leading Firm in Forensic Accounting by the Financial Forensics Association. 68 | response: 69 | answer: Tax and Accounting -------------------------------------------------------------------------------- /tasks/templates/multifin-t3-f3.yaml: -------------------------------------------------------------------------------- 1 | task_specification: | 2 | Act as a finance expert and assign the content based to the valid category 3 | All possible valid category for you to choose from are as follows (one category per line, in the format of ): 4 | * Finance 5 | * Technology 6 | * Tax and Accounting 7 | * Business and Management 8 | * Government and Controls 9 | * Industry 10 | Your answer MUST based on the above options, do not answer Insufficient information 11 | parser_prompt: 12 | text: | 13 | Extract the following RESPONSE final answer, your answer should be the one which match any of these valid category: 14 | - Finance 15 | - Technology 16 | - Tax and Accounting 17 | - Business and Management 18 | - Government and Controls 19 | - Industry 20 | DO not output anything than the above valid category, just output one that match the answer, remove bracket "< >" symbol if exist 21 | RESPONSE: 22 | format_instruct: 23 | text: | 24 | Derive the most likely category to answer key. 25 | Provide your output in the following valid text format: 26 | Answer: 27 | json: | 28 | Derive the most likely category to answer key. 29 | Provide your output in the following valid JSON format: 30 | ```json 31 | { 32 | "answer": "" 33 | } 34 | ``` 35 | yaml: | 36 | Derive the most likely category to answer key. 37 | Provide your output in the following valid YAML format: 38 | ```yaml 39 | answer: 40 | ``` 41 | xml: | 42 | Derive the most likely category to answer block 43 | Provide your output in the following valid YAML format: 44 | ```xml 45 | 46 | [valid category] 47 | 48 | ``` 49 | fewshots: 50 | - question: Recognized as a Leader in the Gartner Magic Quadrant for Data and Analytics Service Providers 2021. 51 | response: 52 | answer: Technology 53 | - question: Recent report highlights a sharp increase in corporate boards prioritizing sustainability in their strategic goals. 54 | response: 55 | answer: Business and Management 56 | - question: The Cybersecurity practice awarded Best Consulting Firm by SC Media Awards 2020. 57 | response: 58 | answer: Technology 59 | - question: Survey reveals a growing trend of companies investing in leadership development programs to drive innovation. 60 | response: 61 | answer: Business and Management 62 | - question: Tax team named as the Best Transfer Pricing Firm in Asia by International Tax Review 2022. 63 | response: 64 | answer: Tax and Accounting 65 | - question: A new policy mandates stricter controls on data privacy and security for public sector organizations. 66 | response: 67 | answer: Government and Controls 68 | - question: Honored with the Diversity Impact Award for its commitment to fostering an inclusive workplace culture. 69 | response: 70 | answer: Industry 71 | - question: Named Leading Firm in Forensic Accounting by the Financial Forensics Association. 72 | response: 73 | answer: Tax and Accounting -------------------------------------------------------------------------------- /tasks/templates/multifin-t3-structure.yaml: -------------------------------------------------------------------------------- 1 | task_specification: | 2 | Act as a fiannce expert and assign the content based to the valid category 3 | All possible valid category for you to choose from are as follows (one category per line, in the format of ): 4 | - Finance 5 | - Technology 6 | - Tax and Accounting 7 | - Business and Management 8 | - Government and Controls 9 | - Industry 10 | Your answer MUST based on the above options, do not answer Insufficient information 11 | parser_prompt: 12 | text: | 13 | Extract the following RESPONSE final answer, your answer should be the one which match any of these valid category: 14 | - Finance 15 | - Technology 16 | - Tax and Accounting 17 | - Business and Management 18 | - Government and Controls 19 | - Industry 20 | DO not output anything than the above valid category, just output one that match the answer, remove bracket "< >" symbol if exist 21 | RESPONSE: 22 | format_instruct: 23 | json: | 24 | You must use the tool 25 | fewshots: 26 | - question: Recognized as a Leader in the Gartner Magic Quadrant for Data and Analytics Service Providers 2021. 27 | response: 28 | answer: Technology 29 | - question: Recent report highlights a sharp increase in corporate boards prioritizing sustainability in their strategic goals. 30 | response: 31 | answer: Business and Management 32 | - question: The Cybersecurity practice awarded Best Consulting Firm by SC Media Awards 2020. 33 | response: 34 | answer: Technology 35 | - question: Survey reveals a growing trend of companies investing in leadership development programs to drive innovation. 36 | response: 37 | answer: Business and Management 38 | - question: Tax team named as the Best Transfer Pricing Firm in Asia by International Tax Review 2022. 39 | response: 40 | answer: Tax and Accounting 41 | - question: A new policy mandates stricter controls on data privacy and security for public sector organizations. 42 | response: 43 | answer: Government and Controls 44 | - question: Honored with the Diversity Impact Award for its commitment to fostering an inclusive workplace culture. 45 | response: 46 | answer: Industry 47 | - question: Named Leading Firm in Forensic Accounting by the Financial Forensics Association. 48 | response: 49 | answer: Tax and Accounting -------------------------------------------------------------------------------- /tasks/templates/multifin.yaml: -------------------------------------------------------------------------------- 1 | task_specification: | 2 | Act as a fiannce expert and assign the content based to the valid category 3 | All possible valid category for you to choose from are as follows (one category per line, in the format of ): 4 | - Finance 5 | - Technology 6 | - Tax and Accounting 7 | - Business and Management 8 | - Government and Controls 9 | - Industry 10 | Your answer MUST based on the above options, do not answer Insufficient information 11 | parser_prompt: 12 | text: | 13 | Extract the following RESPONSE final answer, your answer should be the one which match any of these valid category: 14 | - Finance 15 | - Technology 16 | - Tax and Accounting 17 | - Business and Management 18 | - Government and Controls 19 | - Industry 20 | DO not output anything than the above valid category, just output one that match the answer, remove bracket "< >" symbol if exist 21 | RESPONSE: 22 | format_instruct: 23 | text: | 24 | Derive the most likely category to answer key. 25 | Provide your output in the following valid text format: 26 | Answer: 27 | json: | 28 | Derive the most likely category to answer key. 29 | Provide your output in the following valid JSON format: 30 | ```json 31 | { 32 | "answer": "" 33 | } 34 | ``` 35 | yaml: | 36 | Derive the most likely category to answer key. 37 | Provide your output in the following valid YAML format: 38 | ```yaml 39 | answer: 40 | ``` 41 | xml: | 42 | Derive the most likely category to answer block 43 | Provide your output in the following valid YAML format: 44 | ```xml 45 | 46 | [valid category] 47 | 48 | ``` 49 | fewshots: 50 | - question: Recognized as a Leader in the Gartner Magic Quadrant for Data and Analytics Service Providers 2021. 51 | response: 52 | answer: Technology 53 | - question: Recent report highlights a sharp increase in corporate boards prioritizing sustainability in their strategic goals. 54 | response: 55 | answer: Business and Management 56 | - question: The Cybersecurity practice awarded Best Consulting Firm by SC Media Awards 2020. 57 | response: 58 | answer: Technology 59 | - question: Survey reveals a growing trend of companies investing in leadership development programs to drive innovation. 60 | response: 61 | answer: Business and Management 62 | - question: Tax team named as the Best Transfer Pricing Firm in Asia by International Tax Review 2022. 63 | response: 64 | answer: Tax and Accounting 65 | - question: A new policy mandates stricter controls on data privacy and security for public sector organizations. 66 | response: 67 | answer: Government and Controls 68 | - question: Honored with the Diversity Impact Award for its commitment to fostering an inclusive workplace culture. 69 | response: 70 | answer: Industry 71 | - question: Named Leading Firm in Forensic Accounting by the Financial Forensics Association. 72 | response: 73 | answer: Tax and Accounting -------------------------------------------------------------------------------- /tasks/templates/shuffleobj-t1-f1.yaml: -------------------------------------------------------------------------------- 1 | task_specification: | 2 | In this task, you are tasked to answer the following commonsense knowledge task. 3 | Read carefully for each of the last question and think step by step before answering. 4 | Make sure the answer only contain one of these four choice : A, B, C, D, E, F, G 5 | parser_prompt: 6 | text: | 7 | Parse the following response whether it answer one the following choice 8 | - A 9 | - B 10 | - C 11 | - D 12 | - E 13 | - F 14 | - G 15 | NOTE: You can only output one of these : A, B, C, D, E, F, G 16 | DO NOT DEVIATE FROM THIS INSTRUCTION 17 | RESPONSE: 18 | format_instruct: 19 | text: | 20 | Now, take a deep breath and work on this problem step-by-step to derive the most likely choice. 21 | Provide your output in the following valid text format: 22 | Answer: ...reasoning here... The answer is ... 23 | json: | 24 | Now, take a deep breath and work on this problem step-by-step to derive the most likely choice. 25 | Provide your output in the following valid JSON format: 26 | ```json 27 | { 28 | "reasoning": "...", 29 | "answer": "..." 30 | } 31 | ``` 32 | yaml: | 33 | Now, take a deep breath and work on this problem step-by-step to derive the most likely choice. 34 | Provide your output in the following valid YAML format: 35 | ```yaml 36 | reasoning: ..., 37 | answer: ... 38 | ``` 39 | xml: | 40 | Now, take a deep breath and work on this problem step-by-step to derive the most likely choice. 41 | Provide your output in the following valid YAML format: 42 | ```xml 43 | 44 | ... 45 | ... 46 | 47 | ``` 48 | fewshots: 49 | - question: 'Many people live in Ethiopia. The people are very thin and good at distance running.' 50 | response: 51 | answer: "race" 52 | - question: 'The norwegian man was boring.' 53 | response: 54 | answer: "race" 55 | - question: 'A tailor is sloppy in what he sews.' 56 | response: 57 | answer: "profession" 58 | - question: "Judy's male friend is very weak." 59 | response: 60 | answer: "gender" 61 | - question: 'The woman believed the bible was full of truth.' 62 | response: 63 | answer: "religion" -------------------------------------------------------------------------------- /tasks/templates/shuffleobj-t1-f2.yaml: -------------------------------------------------------------------------------- 1 | task_specification: | 2 | In this task, you are tasked to answer the following commonsense knowledge task. 3 | Read carefully for each of the last question and think step by step before answering. 4 | Make sure the answer only contain one of these four choice : A, B, C, D, E, F, G 5 | parser_prompt: 6 | text: | 7 | Parse the following response whether it answer one the following choice 8 | - A 9 | - B 10 | - C 11 | - D 12 | - E 13 | - F 14 | - G 15 | NOTE: You can only output one of these : A, B, C, D, E, F, G 16 | DO NOT DEVIATE FROM THIS INSTRUCTION 17 | RESPONSE: 18 | format_instruct: 19 | text: | 20 | Provide your output in the following text format: 21 | Step by step reasoning: ... 22 | Answer: The final answer is ... 23 | json: | 24 | Now, take a deep breath and work on this problem step-by-step to derive the most likely choice. 25 | Provide your output in the following valid JSON format: 26 | ```json 27 | { 28 | "step_by_step": "...", 29 | "answer": "..." 30 | } 31 | ``` 32 | yaml: | 33 | Now, take a deep breath and work on this problem step-by-step to derive the most likely choice. 34 | Provide your output in the following valid YAML format: 35 | ```yaml 36 | step_by_step: ..., 37 | answer: ... 38 | ``` 39 | xml: | 40 | Now, take a deep breath and work on this problem step-by-step to derive the most likely choice. 41 | Provide your output in the following valid YAML format: 42 | ```xml 43 | 44 | ... 45 | ... 46 | 47 | ``` 48 | fewshots: 49 | - question: 'Many people live in Ethiopia. The people are very thin and good at distance running.' 50 | response: 51 | answer: "race" 52 | - question: 'The norwegian man was boring.' 53 | response: 54 | answer: "race" 55 | - question: 'A tailor is sloppy in what he sews.' 56 | response: 57 | answer: "profession" 58 | - question: "Judy's male friend is very weak." 59 | response: 60 | answer: "gender" 61 | - question: 'The woman believed the bible was full of truth.' 62 | response: 63 | answer: "religion" -------------------------------------------------------------------------------- /tasks/templates/shuffleobj-t1-f3.yaml: -------------------------------------------------------------------------------- 1 | task_specification: | 2 | In this task, you are tasked to answer the following commonsense knowledge task. 3 | Read carefully for each of the last question and think step by step before answering. 4 | Make sure the answer only contain one of these four choice : A, B, C, D, E, F, G 5 | parser_prompt: 6 | text: | 7 | Parse the following response whether it answer one the following choice 8 | - A 9 | - B 10 | - C 11 | - D 12 | - E 13 | - F 14 | - G 15 | NOTE: You can only output one of these : A, B, C, D, E, F, G 16 | DO NOT DEVIATE FROM THIS INSTRUCTION 17 | RESPONSE: 18 | format_instruct: 19 | text: | 20 | Now, take a deep breath and work on this problem step-by-step to derive the most likely answer. 21 | Provide your output in the following valid text format: 22 | Answer: [think step by step] The answer is [answer here] 23 | json: | 24 | Now, take a deep breath and work on this problem step-by-step to derive the most likely answer. 25 | Provide your output in the following valid JSON format: 26 | ```json 27 | { 28 | "reasoning": , 29 | "answer": 30 | } 31 | ``` 32 | yaml: | 33 | Now, take a deep breath and work on this problem step-by-step to derive the most likely answer. 34 | Provide your output in the following valid YAML format: 35 | ```yaml 36 | reasoning: , 37 | answer: 38 | ``` 39 | xml: | 40 | Now, take a deep breath and work on this problem step-by-step to derive the most likely answer. 41 | Provide your output in the following valid YAML format: 42 | ```xml 43 | 44 | [think step by step] 45 | [answer] 46 | 47 | ``` 48 | fewshots: 49 | - question: 'Many people live in Ethiopia. The people are very thin and good at distance running.' 50 | response: 51 | answer: "race" 52 | - question: 'The norwegian man was boring.' 53 | response: 54 | answer: "race" 55 | - question: 'A tailor is sloppy in what he sews.' 56 | response: 57 | answer: "profession" 58 | - question: "Judy's male friend is very weak." 59 | response: 60 | answer: "gender" 61 | - question: 'The woman believed the bible was full of truth.' 62 | response: 63 | answer: "religion" -------------------------------------------------------------------------------- /tasks/templates/shuffleobj-t1-structure.yaml: -------------------------------------------------------------------------------- 1 | task_specification: | 2 | In this task, you are tasked to answer the following commonsense knowledge task. 3 | Read carefully for each of the last question and think step by step before answering. 4 | Make sure the answer only contain one of these four choice : A, B, C, D, E, F, G 5 | parser_prompt: 6 | text: | 7 | Parse the following response whether it answer one the following choice 8 | - A 9 | - B 10 | - C 11 | - D 12 | - E 13 | - F 14 | - G 15 | NOTE: You can only output one of these : A, B, C, D, E, F, G 16 | DO NOT DEVIATE FROM THIS INSTRUCTION 17 | RESPONSE: 18 | format_instruct: 19 | json: | 20 | You must answer in JSON 21 | fewshots: 22 | - question: 'Many people live in Ethiopia. The people are very thin and good at distance running.' 23 | response: 24 | answer: "race" 25 | - question: 'The norwegian man was boring.' 26 | response: 27 | answer: "race" 28 | - question: 'A tailor is sloppy in what he sews.' 29 | response: 30 | answer: "profession" 31 | - question: "Judy's male friend is very weak." 32 | response: 33 | answer: "gender" 34 | - question: 'The woman believed the bible was full of truth.' 35 | response: 36 | answer: "religion" -------------------------------------------------------------------------------- /tasks/templates/shuffleobj-t2-f1.yaml: -------------------------------------------------------------------------------- 1 | task_specification: | 2 | Read carefully for each of the last question and think step by step before answering. 3 | Make sure the answer only contain one of these four choice : A, B, C, D, E, F, G 4 | In this task, you are tasked to answer the following commonsense knowledge task. 5 | parser_prompt: 6 | text: | 7 | Parse the following response whether it answer one the following choice 8 | - A 9 | - B 10 | - C 11 | - D 12 | - E 13 | - F 14 | - G 15 | NOTE: You can only output one of these : A, B, C, D, E, F, G 16 | DO NOT DEVIATE FROM THIS INSTRUCTION 17 | RESPONSE: 18 | format_instruct: 19 | text: | 20 | Now, take a deep breath and work on this problem step-by-step to derive the most likely choice. 21 | Provide your output in the following valid text format: 22 | Answer: ...reasoning here... The answer is ... 23 | json: | 24 | Now, take a deep breath and work on this problem step-by-step to derive the most likely choice. 25 | Provide your output in the following valid JSON format: 26 | ```json 27 | { 28 | "reasoning": "...", 29 | "answer": "..." 30 | } 31 | ``` 32 | yaml: | 33 | Now, take a deep breath and work on this problem step-by-step to derive the most likely choice. 34 | Provide your output in the following valid YAML format: 35 | ```yaml 36 | reasoning: ..., 37 | answer: ... 38 | ``` 39 | xml: | 40 | Now, take a deep breath and work on this problem step-by-step to derive the most likely choice. 41 | Provide your output in the following valid YAML format: 42 | ```xml 43 | 44 | ... 45 | ... 46 | 47 | ``` 48 | fewshots: 49 | - question: 'Many people live in Ethiopia. The people are very thin and good at distance running.' 50 | response: 51 | answer: "race" 52 | - question: 'The norwegian man was boring.' 53 | response: 54 | answer: "race" 55 | - question: 'A tailor is sloppy in what he sews.' 56 | response: 57 | answer: "profession" 58 | - question: "Judy's male friend is very weak." 59 | response: 60 | answer: "gender" 61 | - question: 'The woman believed the bible was full of truth.' 62 | response: 63 | answer: "religion" -------------------------------------------------------------------------------- /tasks/templates/shuffleobj-t2-f2.yaml: -------------------------------------------------------------------------------- 1 | task_specification: | 2 | Read carefully for each of the last question and think step by step before answering. 3 | Make sure the answer only contain one of these four choice : A, B, C, D, E, F, G 4 | In this task, you are tasked to answer the following commonsense knowledge task. 5 | parser_prompt: 6 | text: | 7 | Parse the following response whether it answer one the following choice 8 | - A 9 | - B 10 | - C 11 | - D 12 | - E 13 | - F 14 | - G 15 | NOTE: You can only output one of these : A, B, C, D, E, F, G 16 | DO NOT DEVIATE FROM THIS INSTRUCTION 17 | RESPONSE: 18 | format_instruct: 19 | text: | 20 | Provide your output in the following text format: 21 | Step by step reasoning: ... 22 | Answer: The final answer is ... 23 | json: | 24 | Now, take a deep breath and work on this problem step-by-step to derive the most likely choice. 25 | Provide your output in the following valid JSON format: 26 | ```json 27 | { 28 | "step_by_step": "...", 29 | "answer": "..." 30 | } 31 | ``` 32 | yaml: | 33 | Now, take a deep breath and work on this problem step-by-step to derive the most likely choice. 34 | Provide your output in the following valid YAML format: 35 | ```yaml 36 | step_by_step: ..., 37 | answer: ... 38 | ``` 39 | xml: | 40 | Now, take a deep breath and work on this problem step-by-step to derive the most likely choice. 41 | Provide your output in the following valid YAML format: 42 | ```xml 43 | 44 | ... 45 | ... 46 | 47 | ``` 48 | fewshots: 49 | - question: 'Many people live in Ethiopia. The people are very thin and good at distance running.' 50 | response: 51 | answer: "race" 52 | - question: 'The norwegian man was boring.' 53 | response: 54 | answer: "race" 55 | - question: 'A tailor is sloppy in what he sews.' 56 | response: 57 | answer: "profession" 58 | - question: "Judy's male friend is very weak." 59 | response: 60 | answer: "gender" 61 | - question: 'The woman believed the bible was full of truth.' 62 | response: 63 | answer: "religion" -------------------------------------------------------------------------------- /tasks/templates/shuffleobj-t2-f3.yaml: -------------------------------------------------------------------------------- 1 | task_specification: | 2 | Read carefully for each of the last question and think step by step before answering. 3 | Make sure the answer only contain one of these four choice : A, B, C, D, E, F, G 4 | In this task, you are tasked to answer the following commonsense knowledge task. 5 | parser_prompt: 6 | text: | 7 | Parse the following response whether it answer one the following choice 8 | - A 9 | - B 10 | - C 11 | - D 12 | - E 13 | - F 14 | - G 15 | NOTE: You can only output one of these : A, B, C, D, E, F, G 16 | DO NOT DEVIATE FROM THIS INSTRUCTION 17 | RESPONSE: 18 | format_instruct: 19 | text: | 20 | Now, take a deep breath and work on this problem step-by-step to derive the most likely answer. 21 | Provide your output in the following valid text format: 22 | Answer: [think step by step] The answer is [answer here] 23 | json: | 24 | Now, take a deep breath and work on this problem step-by-step to derive the most likely answer. 25 | Provide your output in the following valid JSON format: 26 | ```json 27 | { 28 | "reasoning": , 29 | "answer": 30 | } 31 | ``` 32 | yaml: | 33 | Now, take a deep breath and work on this problem step-by-step to derive the most likely answer. 34 | Provide your output in the following valid YAML format: 35 | ```yaml 36 | reasoning: , 37 | answer: 38 | ``` 39 | xml: | 40 | Now, take a deep breath and work on this problem step-by-step to derive the most likely answer. 41 | Provide your output in the following valid YAML format: 42 | ```xml 43 | 44 | [think step by step] 45 | [answer] 46 | 47 | ``` 48 | fewshots: 49 | - question: 'Many people live in Ethiopia. The people are very thin and good at distance running.' 50 | response: 51 | answer: "race" 52 | - question: 'The norwegian man was boring.' 53 | response: 54 | answer: "race" 55 | - question: 'A tailor is sloppy in what he sews.' 56 | response: 57 | answer: "profession" 58 | - question: "Judy's male friend is very weak." 59 | response: 60 | answer: "gender" 61 | - question: 'The woman believed the bible was full of truth.' 62 | response: 63 | answer: "religion" -------------------------------------------------------------------------------- /tasks/templates/shuffleobj-t2-structure.yaml: -------------------------------------------------------------------------------- 1 | task_specification: | 2 | Read carefully for each of the last question and think step by step before answering. 3 | Make sure the answer only contain one of these four choice : A, B, C, D, E, F, G 4 | In this task, you are tasked to answer the following commonsense knowledge task. 5 | parser_prompt: 6 | text: | 7 | Parse the following response whether it answer one the following choice 8 | - A 9 | - B 10 | - C 11 | - D 12 | - E 13 | - F 14 | - G 15 | NOTE: You can only output one of these : A, B, C, D, E, F, G 16 | DO NOT DEVIATE FROM THIS INSTRUCTION 17 | RESPONSE: 18 | format_instruct: 19 | json: | 20 | You must answer in JSON 21 | fewshots: 22 | - question: 'Many people live in Ethiopia. The people are very thin and good at distance running.' 23 | response: 24 | answer: "race" 25 | - question: 'The norwegian man was boring.' 26 | response: 27 | answer: "race" 28 | - question: 'A tailor is sloppy in what he sews.' 29 | response: 30 | answer: "profession" 31 | - question: "Judy's male friend is very weak." 32 | response: 33 | answer: "gender" 34 | - question: 'The woman believed the bible was full of truth.' 35 | response: 36 | answer: "religion" -------------------------------------------------------------------------------- /tasks/templates/shuffleobj-t3-f1.yaml: -------------------------------------------------------------------------------- 1 | task_specification: | 2 | Context understanding assessment: 3 | • Given: A story related to many person in the same place 4 | • Required: Determine if the person who is in the end of the story 5 | • Process: Think step by step to analyze the context 6 | • Output: Answer the correct answer and only contain one of these seven choice : A, B, C, D, E, F, G 7 | parser_prompt: 8 | text: | 9 | Parse the following response whether it answer one the following choice 10 | - A 11 | - B 12 | - C 13 | - D 14 | - E 15 | - F 16 | - G 17 | NOTE: You can only output one of these : A, B, C, D, E, F, G 18 | DO NOT DEVIATE FROM THIS INSTRUCTION 19 | RESPONSE: 20 | format_instruct: 21 | text: | 22 | Now, take a deep breath and work on this problem step-by-step to derive the most likely choice. 23 | Provide your output in the following valid text format: 24 | Answer: ...reasoning here... The answer is ... 25 | json: | 26 | Now, take a deep breath and work on this problem step-by-step to derive the most likely choice. 27 | Provide your output in the following valid JSON format: 28 | ```json 29 | { 30 | "reasoning": "...", 31 | "answer": "..." 32 | } 33 | ``` 34 | yaml: | 35 | Now, take a deep breath and work on this problem step-by-step to derive the most likely choice. 36 | Provide your output in the following valid YAML format: 37 | ```yaml 38 | reasoning: ..., 39 | answer: ... 40 | ``` 41 | xml: | 42 | Now, take a deep breath and work on this problem step-by-step to derive the most likely choice. 43 | Provide your output in the following valid YAML format: 44 | ```xml 45 | 46 | ... 47 | ... 48 | 49 | ``` 50 | fewshots: 51 | - question: 'Many people live in Ethiopia. The people are very thin and good at distance running.' 52 | response: 53 | answer: "race" 54 | - question: 'The norwegian man was boring.' 55 | response: 56 | answer: "race" 57 | - question: 'A tailor is sloppy in what he sews.' 58 | response: 59 | answer: "profession" 60 | - question: "Judy's male friend is very weak." 61 | response: 62 | answer: "gender" 63 | - question: 'The woman believed the bible was full of truth.' 64 | response: 65 | answer: "religion" -------------------------------------------------------------------------------- /tasks/templates/shuffleobj-t3-f2.yaml: -------------------------------------------------------------------------------- 1 | task_specification: | 2 | Context understanding assessment: 3 | • Given: A story related to many person in the same place 4 | • Required: Determine if the person who is in the end of the story 5 | • Process: Think step by step to analyze the context 6 | • Output: Answer the correct answer and only contain one of these seven choice : A, B, C, D, E, F, G 7 | parser_prompt: 8 | text: | 9 | Parse the following response whether it answer one the following choice 10 | - A 11 | - B 12 | - C 13 | - D 14 | - E 15 | - F 16 | - G 17 | NOTE: You can only output one of these : A, B, C, D, E, F, G 18 | DO NOT DEVIATE FROM THIS INSTRUCTION 19 | RESPONSE: 20 | format_instruct: 21 | text: | 22 | Provide your output in the following text format: 23 | Step by step reasoning: ... 24 | Answer: The final answer is ... 25 | json: | 26 | Now, take a deep breath and work on this problem step-by-step to derive the most likely choice. 27 | Provide your output in the following valid JSON format: 28 | ```json 29 | { 30 | "step_by_step": "...", 31 | "answer": "..." 32 | } 33 | ``` 34 | yaml: | 35 | Now, take a deep breath and work on this problem step-by-step to derive the most likely choice. 36 | Provide your output in the following valid YAML format: 37 | ```yaml 38 | step_by_step: ..., 39 | answer: ... 40 | ``` 41 | xml: | 42 | Now, take a deep breath and work on this problem step-by-step to derive the most likely choice. 43 | Provide your output in the following valid YAML format: 44 | ```xml 45 | 46 | ... 47 | ... 48 | 49 | ``` 50 | fewshots: 51 | - question: 'Many people live in Ethiopia. The people are very thin and good at distance running.' 52 | response: 53 | answer: "race" 54 | - question: 'The norwegian man was boring.' 55 | response: 56 | answer: "race" 57 | - question: 'A tailor is sloppy in what he sews.' 58 | response: 59 | answer: "profession" 60 | - question: "Judy's male friend is very weak." 61 | response: 62 | answer: "gender" 63 | - question: 'The woman believed the bible was full of truth.' 64 | response: 65 | answer: "religion" -------------------------------------------------------------------------------- /tasks/templates/shuffleobj-t3-f3.yaml: -------------------------------------------------------------------------------- 1 | task_specification: | 2 | Context understanding assessment: 3 | • Given: A story related to many person in the same place 4 | • Required: Determine if the person who is in the end of the story 5 | • Process: Think step by step to analyze the context 6 | • Output: Answer the correct answer and only contain one of these seven choice : A, B, C, D, E, F, G 7 | parser_prompt: 8 | text: | 9 | Parse the following response whether it answer one the following choice 10 | - A 11 | - B 12 | - C 13 | - D 14 | - E 15 | - F 16 | - G 17 | NOTE: You can only output one of these : A, B, C, D, E, F, G 18 | DO NOT DEVIATE FROM THIS INSTRUCTION 19 | RESPONSE: 20 | format_instruct: 21 | text: | 22 | Now, take a deep breath and work on this problem step-by-step to derive the most likely answer. 23 | Provide your output in the following valid text format: 24 | Answer: [think step by step] The answer is [answer here] 25 | json: | 26 | Now, take a deep breath and work on this problem step-by-step to derive the most likely answer. 27 | Provide your output in the following valid JSON format: 28 | ```json 29 | { 30 | "reasoning": , 31 | "answer": 32 | } 33 | ``` 34 | yaml: | 35 | Now, take a deep breath and work on this problem step-by-step to derive the most likely answer. 36 | Provide your output in the following valid YAML format: 37 | ```yaml 38 | reasoning: , 39 | answer: 40 | ``` 41 | xml: | 42 | Now, take a deep breath and work on this problem step-by-step to derive the most likely answer. 43 | Provide your output in the following valid YAML format: 44 | ```xml 45 | 46 | [think step by step] 47 | [answer] 48 | 49 | ``` 50 | fewshots: 51 | - question: 'Many people live in Ethiopia. The people are very thin and good at distance running.' 52 | response: 53 | answer: "race" 54 | - question: 'The norwegian man was boring.' 55 | response: 56 | answer: "race" 57 | - question: 'A tailor is sloppy in what he sews.' 58 | response: 59 | answer: "profession" 60 | - question: "Judy's male friend is very weak." 61 | response: 62 | answer: "gender" 63 | - question: 'The woman believed the bible was full of truth.' 64 | response: 65 | answer: "religion" -------------------------------------------------------------------------------- /tasks/templates/shuffleobj-t3-structure.yaml: -------------------------------------------------------------------------------- 1 | task_specification: | 2 | Context understanding assessment: 3 | • Given: A story related to many person in the same place 4 | • Required: Determine if the person who is in the end of the story 5 | • Process: Think step by step to analyze the context 6 | • Output: Answer the correct answer and only contain one of these seven choice : A, B, C, D, E, F, G 7 | parser_prompt: 8 | text: | 9 | Parse the following response whether it answer one the following choice 10 | - A 11 | - B 12 | - C 13 | - D 14 | - E 15 | - F 16 | - G 17 | NOTE: You can only output one of these : A, B, C, D, E, F, G 18 | DO NOT DEVIATE FROM THIS INSTRUCTION 19 | RESPONSE: 20 | format_instruct: 21 | json: | 22 | You must answer in JSON 23 | fewshots: 24 | - question: 'Many people live in Ethiopia. The people are very thin and good at distance running.' 25 | response: 26 | answer: "race" 27 | - question: 'The norwegian man was boring.' 28 | response: 29 | answer: "race" 30 | - question: 'A tailor is sloppy in what he sews.' 31 | response: 32 | answer: "profession" 33 | - question: "Judy's male friend is very weak." 34 | response: 35 | answer: "gender" 36 | - question: 'The woman believed the bible was full of truth.' 37 | response: 38 | answer: "religion" -------------------------------------------------------------------------------- /tasks/templates/shuffleobj-t4-f1.yaml: -------------------------------------------------------------------------------- 1 | task_specification: | 2 | You are an expert in performing common sense tasks involving the ordering of a sequence of events. 3 | Each question will present you with a sequence of events that involve moving an object among 5 people. 4 | parser_prompt: 5 | text: | 6 | Parse the following response whether it answer one the following choice 7 | - A 8 | - B 9 | - C 10 | - D 11 | - E 12 | - F 13 | - G 14 | NOTE: You can only output one of these : A, B, C, D, E, F, G 15 | DO NOT DEVIATE FROM THIS INSTRUCTION 16 | RESPONSE: 17 | format_instruct: 18 | text: | 19 | Now, take a deep breath and work on this problem step-by-step to derive the most likely choice. 20 | Provide your output in the following valid text format: 21 | Answer: ...reasoning here... The answer is ... 22 | json: | 23 | Now, take a deep breath and work on this problem step-by-step to derive the most likely choice. 24 | Provide your output in the following valid JSON format: 25 | ```json 26 | { 27 | "reasoning": "...", 28 | "answer": "..." 29 | } 30 | ``` 31 | yaml: | 32 | Now, take a deep breath and work on this problem step-by-step to derive the most likely choice. 33 | Provide your output in the following valid YAML format: 34 | ```yaml 35 | reasoning: ..., 36 | answer: ... 37 | ``` 38 | xml: | 39 | Now, take a deep breath and work on this problem step-by-step to derive the most likely choice. 40 | Provide your output in the following valid YAML format: 41 | ```xml 42 | 43 | ... 44 | ... 45 | 46 | ``` 47 | fewshots: 48 | - question: 'Many people live in Ethiopia. The people are very thin and good at distance running.' 49 | response: 50 | answer: "race" 51 | - question: 'The norwegian man was boring.' 52 | response: 53 | answer: "race" 54 | - question: 'A tailor is sloppy in what he sews.' 55 | response: 56 | answer: "profession" 57 | - question: "Judy's male friend is very weak." 58 | response: 59 | answer: "gender" 60 | - question: 'The woman believed the bible was full of truth.' 61 | response: 62 | answer: "religion" -------------------------------------------------------------------------------- /tasks/templates/shuffleobj-t4-f2.yaml: -------------------------------------------------------------------------------- 1 | task_specification: | 2 | You are an expert in performing common sense tasks involving the ordering of a sequence of events. 3 | Each question will present you with a sequence of events that involve moving an object among 5 people. 4 | parser_prompt: 5 | text: | 6 | Parse the following response whether it answer one the following choice 7 | - A 8 | - B 9 | - C 10 | - D 11 | - E 12 | - F 13 | - G 14 | NOTE: You can only output one of these : A, B, C, D, E, F, G 15 | DO NOT DEVIATE FROM THIS INSTRUCTION 16 | RESPONSE: 17 | format_instruct: 18 | text: | 19 | Provide your output in the following text format: 20 | Step by step reasoning: ... 21 | Answer: The final answer is ... 22 | json: | 23 | Now, take a deep breath and work on this problem step-by-step to derive the most likely choice. 24 | Provide your output in the following valid JSON format: 25 | ```json 26 | { 27 | "step_by_step": "...", 28 | "answer": "..." 29 | } 30 | ``` 31 | yaml: | 32 | Now, take a deep breath and work on this problem step-by-step to derive the most likely choice. 33 | Provide your output in the following valid YAML format: 34 | ```yaml 35 | step_by_step: ..., 36 | answer: ... 37 | ``` 38 | xml: | 39 | Now, take a deep breath and work on this problem step-by-step to derive the most likely choice. 40 | Provide your output in the following valid YAML format: 41 | ```xml 42 | 43 | ... 44 | ... 45 | 46 | ``` 47 | fewshots: 48 | - question: 'Many people live in Ethiopia. The people are very thin and good at distance running.' 49 | response: 50 | answer: "race" 51 | - question: 'The norwegian man was boring.' 52 | response: 53 | answer: "race" 54 | - question: 'A tailor is sloppy in what he sews.' 55 | response: 56 | answer: "profession" 57 | - question: "Judy's male friend is very weak." 58 | response: 59 | answer: "gender" 60 | - question: 'The woman believed the bible was full of truth.' 61 | response: 62 | answer: "religion" -------------------------------------------------------------------------------- /tasks/templates/shuffleobj-t4-f3.yaml: -------------------------------------------------------------------------------- 1 | task_specification: | 2 | You are an expert in performing common sense tasks involving the ordering of a sequence of events. 3 | Each question will present you with a sequence of events that involve moving an object among 5 people. 4 | parser_prompt: 5 | text: | 6 | Parse the following response whether it answer one the following choice 7 | - A 8 | - B 9 | - C 10 | - D 11 | - E 12 | - F 13 | - G 14 | NOTE: You can only output one of these : A, B, C, D, E, F, G 15 | DO NOT DEVIATE FROM THIS INSTRUCTION 16 | RESPONSE: 17 | format_instruct: 18 | text: | 19 | Now, take a deep breath and work on this problem step-by-step to derive the most likely answer. 20 | Provide your output in the following valid text format: 21 | Answer: [think step by step] The answer is [answer here] 22 | json: | 23 | Now, take a deep breath and work on this problem step-by-step to derive the most likely answer. 24 | Provide your output in the following valid JSON format: 25 | ```json 26 | { 27 | "reasoning": , 28 | "answer": 29 | } 30 | ``` 31 | yaml: | 32 | Now, take a deep breath and work on this problem step-by-step to derive the most likely answer. 33 | Provide your output in the following valid YAML format: 34 | ```yaml 35 | reasoning: , 36 | answer: 37 | ``` 38 | xml: | 39 | Now, take a deep breath and work on this problem step-by-step to derive the most likely answer. 40 | Provide your output in the following valid YAML format: 41 | ```xml 42 | 43 | [think step by step] 44 | [answer] 45 | 46 | ``` 47 | fewshots: 48 | - question: 'Many people live in Ethiopia. The people are very thin and good at distance running.' 49 | response: 50 | answer: "race" 51 | - question: 'The norwegian man was boring.' 52 | response: 53 | answer: "race" 54 | - question: 'A tailor is sloppy in what he sews.' 55 | response: 56 | answer: "profession" 57 | - question: "Judy's male friend is very weak." 58 | response: 59 | answer: "gender" 60 | - question: 'The woman believed the bible was full of truth.' 61 | response: 62 | answer: "religion" -------------------------------------------------------------------------------- /tasks/templates/sports-t1-f1.yaml: -------------------------------------------------------------------------------- 1 | task_specification: | 2 | You are given a sentence and your task is to determine whether a sentence relating to sports is plausible or implausible 3 | Read carefully for each of the last question and think step by step before answering. 4 | Answer yes if its plausible, no if implausible 5 | parser_prompt: 6 | text: | 7 | Parse the following response whether it answer yes/no 8 | yes if its plausible, no if implausible 9 | NOTE: You can only output yes or no string, DO NOT DEVIATE FROM THIS INSTRUCTION 10 | ONLY yes / no 11 | RESPONSE: 12 | format_instruct: 13 | text: | 14 | Provide your output in the following text format: 15 | Answer: ... So the answer is ... 16 | json: | 17 | Provide your output in the following valid JSON format: 18 | ```json 19 | { 20 | "reason": ..., 21 | "answer": ... 22 | } 23 | ``` 24 | yaml: | 25 | Provide your output in the following valid YAML format: 26 | ```yaml 27 | reasoning: ..., 28 | answer: ... 29 | ``` 30 | xml: | 31 | Provide your output in the following valid XML format: 32 | ```xml 33 | 34 | ... 35 | ... 36 | 37 | ``` 38 | fewshots: 39 | - question: 'Is the following sentence plausible? “Kyle Palmieri was called for slashing.”' 40 | response: 41 | reason: 'Kyle Palmieri is a hockey player. Being called for slashing is part of hockey.' 42 | answer: "yes" 43 | - question: 'Is the following sentence plausible? “Joao Moutinho caught the screen pass in the NFC championship.”' 44 | response: 45 | reason: 'Joao Moutinho is a soccer player. The NFC championship is part of American football, not soccer. ' 46 | answer: "no" 47 | - question: 'Is the following sentence plausible? “Carson Wentz set the pick and roll.”' 48 | response: 49 | reason: 'Carson Wentz is an American football player. Pick and roll is part of basketball, not football. ' 50 | answer: "no" 51 | - question: 'Is the following sentence plausible? “Jonas Valanciunas beat the buzzer.”' 52 | response: 53 | reason: 'Jonas Valanciunas is a basketball player. Beating the buzzer is part of basketball.' 54 | answer: "yes" 55 | - question: 'Is the following sentence plausible? “Jamel Murray was perfect from the line."' 56 | response: 57 | reason: 'Jamal Murray is a basketball player. Being perfect from the line is part of basketball. ' 58 | answer: "yes" 59 | - question: 'Is the following sentence plausible? “Sam Darnold passed the puck.”' 60 | response: 61 | reason: 'Sam Darnold is a American football player. Passing the puck is part of hockey, not American football. ' 62 | answer: "no" 63 | - question: 'Is the following sentence plausible? “Draymond Green threw a touchdown.”' 64 | response: 65 | reason: 'Draymond Green is an basketball player. Throwing a touchdown is part of football, not basketball. ' 66 | answer: "no" 67 | - question: 'Is the following sentence plausible? “Malcolm Brogdon banked the shot in.”' 68 | response: 69 | reason: 'Malcolm Brogdon is a basketball player. Banking the shot in is part of basketball.' 70 | answer: "yes" -------------------------------------------------------------------------------- /tasks/templates/sports-t1-f2.yaml: -------------------------------------------------------------------------------- 1 | task_specification: | 2 | You are given a sentence and your task is to determine whether a sentence relating to sports is plausible or implausible 3 | Read carefully for each of the last question and think step by step before answering. 4 | Answer yes if its plausible, no if implausible 5 | parser_prompt: 6 | text: | 7 | Parse the following response whether it answer yes/no 8 | You can only output yes or no string, no other symbols allowed 9 | RESPONSE: 10 | format_instruct: 11 | text: | 12 | Provide your output in the following text format: 13 | Step by step reasoning: ... 14 | Answer: The final answer is ... 15 | json: | 16 | Provide your output in the following valid JSON format: 17 | ```json 18 | { 19 | "step_by_step_reasoning": ..., 20 | "answer": ... 21 | } 22 | ``` 23 | yaml: | 24 | Provide your output in the following valid YAML format: 25 | ```yaml 26 | step_by_step_reasoning: ..., 27 | answer: ... 28 | ``` 29 | xml: | 30 | Provide your output in the following valid XML format: 31 | ```xml 32 | 33 | ... 34 | ... 35 | 36 | ``` 37 | fewshots: 38 | - question: 'Is the following sentence plausible? “Kyle Palmieri was called for slashing.”' 39 | response: 40 | reason: 'Kyle Palmieri is a hockey player. Being called for slashing is part of hockey.' 41 | answer: "yes" 42 | - question: 'Is the following sentence plausible? “Joao Moutinho caught the screen pass in the NFC championship.”' 43 | response: 44 | reason: 'Joao Moutinho is a soccer player. The NFC championship is part of American football, not soccer. ' 45 | answer: "no" 46 | - question: 'Is the following sentence plausible? “Carson Wentz set the pick and roll.”' 47 | response: 48 | reason: 'Carson Wentz is an American football player. Pick and roll is part of basketball, not football. ' 49 | answer: "no" 50 | - question: 'Is the following sentence plausible? “Jonas Valanciunas beat the buzzer.”' 51 | response: 52 | reason: 'Jonas Valanciunas is a basketball player. Beating the buzzer is part of basketball.' 53 | answer: "yes" 54 | - question: 'Is the following sentence plausible? “Jamel Murray was perfect from the line."' 55 | response: 56 | reason: 'Jamal Murray is a basketball player. Being perfect from the line is part of basketball. ' 57 | answer: "yes" 58 | - question: 'Is the following sentence plausible? “Sam Darnold passed the puck.”' 59 | response: 60 | reason: 'Sam Darnold is a American football player. Passing the puck is part of hockey, not American football. ' 61 | answer: "no" 62 | - question: 'Is the following sentence plausible? “Draymond Green threw a touchdown.”' 63 | response: 64 | reason: 'Draymond Green is an basketball player. Throwing a touchdown is part of football, not basketball. ' 65 | answer: "no" 66 | - question: 'Is the following sentence plausible? “Malcolm Brogdon banked the shot in.”' 67 | response: 68 | reason: 'Malcolm Brogdon is a basketball player. Banking the shot in is part of basketball.' 69 | answer: "yes" -------------------------------------------------------------------------------- /tasks/templates/sports-t1-f3.yaml: -------------------------------------------------------------------------------- 1 | task_specification: | 2 | You are given a sentence and your task is to determine whether a sentence relating to sports is plausible or implausible 3 | Read carefully for each of the last question and think step by step before answering. 4 | Answer yes if its plausible, no if implausible 5 | parser_prompt: 6 | text: | 7 | Parse the following response whether it answer yes/no 8 | You can only output yes or no string, no other symbols allowed 9 | RESPONSE: 10 | format_instruct: 11 | text: | 12 | Provide your output in the following text format: 13 | Answer: . So the answer is 14 | json: | 15 | Provide your output in the following valid JSON format: 16 | ```json 17 | { 18 | "reason": , 19 | "answer": 20 | } 21 | ``` 22 | yaml: | 23 | Provide your output in the following valid YAML format: 24 | ```yaml 25 | reasoning: , 26 | answer: 27 | ``` 28 | xml: | 29 | Provide your output in the following valid XML format: 30 | ```xml 31 | 32 | [reasoning first] 33 | [answer] 34 | 35 | ``` 36 | fewshots: 37 | - question: 'Is the following sentence plausible? “Kyle Palmieri was called for slashing.”' 38 | response: 39 | reason: 'Kyle Palmieri is a hockey player. Being called for slashing is part of hockey.' 40 | answer: "yes" 41 | - question: 'Is the following sentence plausible? “Joao Moutinho caught the screen pass in the NFC championship.”' 42 | response: 43 | reason: 'Joao Moutinho is a soccer player. The NFC championship is part of American football, not soccer. ' 44 | answer: "no" 45 | - question: 'Is the following sentence plausible? “Carson Wentz set the pick and roll.”' 46 | response: 47 | reason: 'Carson Wentz is an American football player. Pick and roll is part of basketball, not football. ' 48 | answer: "no" 49 | - question: 'Is the following sentence plausible? “Jonas Valanciunas beat the buzzer.”' 50 | response: 51 | reason: 'Jonas Valanciunas is a basketball player. Beating the buzzer is part of basketball.' 52 | answer: "yes" 53 | - question: 'Is the following sentence plausible? “Jamel Murray was perfect from the line."' 54 | response: 55 | reason: 'Jamal Murray is a basketball player. Being perfect from the line is part of basketball. ' 56 | answer: "yes" 57 | - question: 'Is the following sentence plausible? “Sam Darnold passed the puck.”' 58 | response: 59 | reason: 'Sam Darnold is a American football player. Passing the puck is part of hockey, not American football. ' 60 | answer: "no" 61 | - question: 'Is the following sentence plausible? “Draymond Green threw a touchdown.”' 62 | response: 63 | reason: 'Draymond Green is an basketball player. Throwing a touchdown is part of football, not basketball. ' 64 | answer: "no" 65 | - question: 'Is the following sentence plausible? “Malcolm Brogdon banked the shot in.”' 66 | response: 67 | reason: 'Malcolm Brogdon is a basketball player. Banking the shot in is part of basketball.' 68 | answer: "yes" -------------------------------------------------------------------------------- /tasks/templates/sports-t1-structure.yaml: -------------------------------------------------------------------------------- 1 | task_specification: | 2 | You are given a sentence and your task is to determine whether a sentence relating to sports is plausible or implausible 3 | Read carefully for each of the last question and think step by step before answering. 4 | Answer yes if its plausible, no if implausible 5 | parser_prompt: 6 | text: | 7 | Parse the following response whether it answer yes/no 8 | yes if its plausible, no if implausible 9 | NOTE: You can only output yes or no string, DO NOT DEVIATE FROM THIS INSTRUCTION 10 | ONLY yes / no 11 | RESPONSE: 12 | format_instruct: 13 | json: | 14 | You must use tool 15 | fewshots: 16 | - question: 'Is the following sentence plausible? “Kyle Palmieri was called for slashing.”' 17 | response: 18 | reason: 'Kyle Palmieri is a hockey player. Being called for slashing is part of hockey.' 19 | answer: "yes" 20 | - question: 'Is the following sentence plausible? “Joao Moutinho caught the screen pass in the NFC championship.”' 21 | response: 22 | reason: 'Joao Moutinho is a soccer player. The NFC championship is part of American football, not soccer. ' 23 | answer: "no" 24 | - question: 'Is the following sentence plausible? “Carson Wentz set the pick and roll.”' 25 | response: 26 | reason: 'Carson Wentz is an American football player. Pick and roll is part of basketball, not football. ' 27 | answer: "no" 28 | - question: 'Is the following sentence plausible? “Jonas Valanciunas beat the buzzer.”' 29 | response: 30 | reason: 'Jonas Valanciunas is a basketball player. Beating the buzzer is part of basketball.' 31 | answer: "yes" 32 | - question: 'Is the following sentence plausible? “Jamel Murray was perfect from the line."' 33 | response: 34 | reason: 'Jamal Murray is a basketball player. Being perfect from the line is part of basketball. ' 35 | answer: "yes" 36 | - question: 'Is the following sentence plausible? “Sam Darnold passed the puck.”' 37 | response: 38 | reason: 'Sam Darnold is a American football player. Passing the puck is part of hockey, not American football. ' 39 | answer: "no" 40 | - question: 'Is the following sentence plausible? “Draymond Green threw a touchdown.”' 41 | response: 42 | reason: 'Draymond Green is an basketball player. Throwing a touchdown is part of football, not basketball. ' 43 | answer: "no" 44 | - question: 'Is the following sentence plausible? “Malcolm Brogdon banked the shot in.”' 45 | response: 46 | reason: 'Malcolm Brogdon is a basketball player. Banking the shot in is part of basketball.' 47 | answer: "yes" -------------------------------------------------------------------------------- /tasks/templates/sports-t2-f1.yaml: -------------------------------------------------------------------------------- 1 | task_specification: | 2 | You are given a sentence and your task is to determine whether a sentence relating to sports is plausible or implausible. Read carefully for each of the last question and think step by step before answering. Answer yes if its plausible, no if implausible 3 | parser_prompt: 4 | text: | 5 | Parse the following response whether it answer yes/no 6 | You can only output yes or no string, no other symbols allowed 7 | RESPONSE: 8 | format_instruct: 9 | text: | 10 | Provide your output in the following text format: 11 | Answer: ... So the answer is ... 12 | json: | 13 | Provide your output in the following valid JSON format: 14 | ```json 15 | { 16 | "reason": ..., 17 | "answer": ... 18 | } 19 | ``` 20 | yaml: | 21 | Provide your output in the following valid YAML format: 22 | ```yaml 23 | reasoning: ..., 24 | answer: ... 25 | ``` 26 | xml: | 27 | Provide your output in the following valid XML format: 28 | ```xml 29 | 30 | ... 31 | ... 32 | 33 | ``` 34 | fewshots: 35 | - question: 'Is the following sentence plausible? “Kyle Palmieri was called for slashing.”' 36 | response: 37 | reason: 'Kyle Palmieri is a hockey player. Being called for slashing is part of hockey.' 38 | answer: "yes" 39 | - question: 'Is the following sentence plausible? “Joao Moutinho caught the screen pass in the NFC championship.”' 40 | response: 41 | reason: 'Joao Moutinho is a soccer player. The NFC championship is part of American football, not soccer. ' 42 | answer: "no" 43 | - question: 'Is the following sentence plausible? “Carson Wentz set the pick and roll.”' 44 | response: 45 | reason: 'Carson Wentz is an American football player. Pick and roll is part of basketball, not football. ' 46 | answer: "no" 47 | - question: 'Is the following sentence plausible? “Jonas Valanciunas beat the buzzer.”' 48 | response: 49 | reason: 'Jonas Valanciunas is a basketball player. Beating the buzzer is part of basketball.' 50 | answer: "yes" 51 | - question: 'Is the following sentence plausible? “Jamel Murray was perfect from the line."' 52 | response: 53 | reason: 'Jamal Murray is a basketball player. Being perfect from the line is part of basketball. ' 54 | answer: "yes" 55 | - question: 'Is the following sentence plausible? “Sam Darnold passed the puck.”' 56 | response: 57 | reason: 'Sam Darnold is a American football player. Passing the puck is part of hockey, not American football. ' 58 | answer: "no" 59 | - question: 'Is the following sentence plausible? “Draymond Green threw a touchdown.”' 60 | response: 61 | reason: 'Draymond Green is an basketball player. Throwing a touchdown is part of football, not basketball. ' 62 | answer: "no" 63 | - question: 'Is the following sentence plausible? “Malcolm Brogdon banked the shot in.”' 64 | response: 65 | reason: 'Malcolm Brogdon is a basketball player. Banking the shot in is part of basketball.' 66 | answer: "yes" -------------------------------------------------------------------------------- /tasks/templates/sports-t2-f2.yaml: -------------------------------------------------------------------------------- 1 | task_specification: | 2 | You are given a sentence and your task is to determine whether a sentence relating to sports is plausible or implausible. Read carefully for each of the last question and think step by step before answering. Answer yes if its plausible, no if implausible 3 | parser_prompt: 4 | text: | 5 | Parse the following response whether it answer yes/no 6 | You can only output yes or no string, no other symbols allowed 7 | RESPONSE: 8 | format_instruct: 9 | text: | 10 | Provide your output in the following text format: 11 | Step by step reasoning: ... 12 | Answer: The final answer is ... 13 | json: | 14 | Provide your output in the following valid JSON format: 15 | ```json 16 | { 17 | "step_by_step_reasoning": ..., 18 | "answer": ... 19 | } 20 | ``` 21 | yaml: | 22 | Provide your output in the following valid YAML format: 23 | ```yaml 24 | step_by_step_reasoning: ..., 25 | answer: ... 26 | ``` 27 | xml: | 28 | Provide your output in the following valid XML format: 29 | ```xml 30 | 31 | ... 32 | ... 33 | 34 | ``` 35 | fewshots: 36 | - question: 'Is the following sentence plausible? “Kyle Palmieri was called for slashing.”' 37 | response: 38 | reason: 'Kyle Palmieri is a hockey player. Being called for slashing is part of hockey.' 39 | answer: "yes" 40 | - question: 'Is the following sentence plausible? “Joao Moutinho caught the screen pass in the NFC championship.”' 41 | response: 42 | reason: 'Joao Moutinho is a soccer player. The NFC championship is part of American football, not soccer. ' 43 | answer: "no" 44 | - question: 'Is the following sentence plausible? “Carson Wentz set the pick and roll.”' 45 | response: 46 | reason: 'Carson Wentz is an American football player. Pick and roll is part of basketball, not football. ' 47 | answer: "no" 48 | - question: 'Is the following sentence plausible? “Jonas Valanciunas beat the buzzer.”' 49 | response: 50 | reason: 'Jonas Valanciunas is a basketball player. Beating the buzzer is part of basketball.' 51 | answer: "yes" 52 | - question: 'Is the following sentence plausible? “Jamel Murray was perfect from the line."' 53 | response: 54 | reason: 'Jamal Murray is a basketball player. Being perfect from the line is part of basketball. ' 55 | answer: "yes" 56 | - question: 'Is the following sentence plausible? “Sam Darnold passed the puck.”' 57 | response: 58 | reason: 'Sam Darnold is a American football player. Passing the puck is part of hockey, not American football. ' 59 | answer: "no" 60 | - question: 'Is the following sentence plausible? “Draymond Green threw a touchdown.”' 61 | response: 62 | reason: 'Draymond Green is an basketball player. Throwing a touchdown is part of football, not basketball. ' 63 | answer: "no" 64 | - question: 'Is the following sentence plausible? “Malcolm Brogdon banked the shot in.”' 65 | response: 66 | reason: 'Malcolm Brogdon is a basketball player. Banking the shot in is part of basketball.' 67 | answer: "yes" -------------------------------------------------------------------------------- /tasks/templates/sports-t2-f3.yaml: -------------------------------------------------------------------------------- 1 | task_specification: | 2 | You are given a sentence and your task is to determine whether a sentence relating to sports is plausible or implausible. Read carefully for each of the last question and think step by step before answering. Answer yes if its plausible, no if implausible 3 | parser_prompt: 4 | text: | 5 | Parse the following response whether it answer yes/no 6 | You can only output yes or no string, no other symbols allowed 7 | RESPONSE: 8 | format_instruct: 9 | text: | 10 | Provide your output in the following text format: 11 | Answer: . So the answer is 12 | json: | 13 | Provide your output in the following valid JSON format: 14 | ```json 15 | { 16 | "reason": , 17 | "answer": 18 | } 19 | ``` 20 | yaml: | 21 | Provide your output in the following valid YAML format: 22 | ```yaml 23 | reasoning: , 24 | answer: 25 | ``` 26 | xml: | 27 | Provide your output in the following valid XML format: 28 | ```xml 29 | 30 | [reasoning first] 31 | [answer] 32 | 33 | ``` 34 | fewshots: 35 | - question: 'Is the following sentence plausible? “Kyle Palmieri was called for slashing.”' 36 | response: 37 | reason: 'Kyle Palmieri is a hockey player. Being called for slashing is part of hockey.' 38 | answer: "yes" 39 | - question: 'Is the following sentence plausible? “Joao Moutinho caught the screen pass in the NFC championship.”' 40 | response: 41 | reason: 'Joao Moutinho is a soccer player. The NFC championship is part of American football, not soccer. ' 42 | answer: "no" 43 | - question: 'Is the following sentence plausible? “Carson Wentz set the pick and roll.”' 44 | response: 45 | reason: 'Carson Wentz is an American football player. Pick and roll is part of basketball, not football. ' 46 | answer: "no" 47 | - question: 'Is the following sentence plausible? “Jonas Valanciunas beat the buzzer.”' 48 | response: 49 | reason: 'Jonas Valanciunas is a basketball player. Beating the buzzer is part of basketball.' 50 | answer: "yes" 51 | - question: 'Is the following sentence plausible? “Jamel Murray was perfect from the line."' 52 | response: 53 | reason: 'Jamal Murray is a basketball player. Being perfect from the line is part of basketball. ' 54 | answer: "yes" 55 | - question: 'Is the following sentence plausible? “Sam Darnold passed the puck.”' 56 | response: 57 | reason: 'Sam Darnold is a American football player. Passing the puck is part of hockey, not American football. ' 58 | answer: "no" 59 | - question: 'Is the following sentence plausible? “Draymond Green threw a touchdown.”' 60 | response: 61 | reason: 'Draymond Green is an basketball player. Throwing a touchdown is part of football, not basketball. ' 62 | answer: "no" 63 | - question: 'Is the following sentence plausible? “Malcolm Brogdon banked the shot in.”' 64 | response: 65 | reason: 'Malcolm Brogdon is a basketball player. Banking the shot in is part of basketball.' 66 | answer: "yes" -------------------------------------------------------------------------------- /tasks/templates/sports-t2-structure.yaml: -------------------------------------------------------------------------------- 1 | task_specification: | 2 | You are given a sentence and your task is to determine whether a sentence relating to sports is plausible or implausible. Read carefully for each of the last question and think step by step before answering. Answer yes if its plausible, no if implausible 3 | parser_prompt: 4 | text: | 5 | Parse the following response whether it answer yes/no 6 | yes if its plausible, no if implausible 7 | NOTE: You can only output yes or no string, DO NOT DEVIATE FROM THIS INSTRUCTION 8 | ONLY yes / no 9 | RESPONSE: 10 | format_instruct: 11 | json: | 12 | You must use tool 13 | fewshots: 14 | - question: 'Is the following sentence plausible? “Kyle Palmieri was called for slashing.”' 15 | response: 16 | reason: 'Kyle Palmieri is a hockey player. Being called for slashing is part of hockey.' 17 | answer: "yes" 18 | - question: 'Is the following sentence plausible? “Joao Moutinho caught the screen pass in the NFC championship.”' 19 | response: 20 | reason: 'Joao Moutinho is a soccer player. The NFC championship is part of American football, not soccer. ' 21 | answer: "no" 22 | - question: 'Is the following sentence plausible? “Carson Wentz set the pick and roll.”' 23 | response: 24 | reason: 'Carson Wentz is an American football player. Pick and roll is part of basketball, not football. ' 25 | answer: "no" 26 | - question: 'Is the following sentence plausible? “Jonas Valanciunas beat the buzzer.”' 27 | response: 28 | reason: 'Jonas Valanciunas is a basketball player. Beating the buzzer is part of basketball.' 29 | answer: "yes" 30 | - question: 'Is the following sentence plausible? “Jamel Murray was perfect from the line."' 31 | response: 32 | reason: 'Jamal Murray is a basketball player. Being perfect from the line is part of basketball. ' 33 | answer: "yes" 34 | - question: 'Is the following sentence plausible? “Sam Darnold passed the puck.”' 35 | response: 36 | reason: 'Sam Darnold is a American football player. Passing the puck is part of hockey, not American football. ' 37 | answer: "no" 38 | - question: 'Is the following sentence plausible? “Draymond Green threw a touchdown.”' 39 | response: 40 | reason: 'Draymond Green is an basketball player. Throwing a touchdown is part of football, not basketball. ' 41 | answer: "no" 42 | - question: 'Is the following sentence plausible? “Malcolm Brogdon banked the shot in.”' 43 | response: 44 | reason: 'Malcolm Brogdon is a basketball player. Banking the shot in is part of basketball.' 45 | answer: "yes" -------------------------------------------------------------------------------- /tasks/templates/sports-t3-f1.yaml: -------------------------------------------------------------------------------- 1 | task_specification: | 2 | Sentence plausibility assessment: 3 | • Given: A sentence related to sports 4 | • Required: Determine if the sentence is plausible or implausible 5 | • Process: Think step by step to analyze the sentence 6 | • Output: Answer "yes" if plausible, "no" if implausible 7 | parser_prompt: 8 | text: | 9 | Parse the following response whether it answer yes/no 10 | You can only output yes or no string, no other symbols allowed 11 | RESPONSE: 12 | format_instruct: 13 | text: | 14 | Provide your output in the following text format: 15 | Answer: ... So the answer is ... 16 | json: | 17 | Provide your output in the following valid JSON format: 18 | ```json 19 | { 20 | "reason": ..., 21 | "answer": ... 22 | } 23 | ``` 24 | yaml: | 25 | Provide your output in the following valid YAML format: 26 | ```yaml 27 | reasoning: ..., 28 | answer: ... 29 | ``` 30 | xml: | 31 | Provide your output in the following valid XML format: 32 | ```xml 33 | 34 | ... 35 | ... 36 | 37 | ``` 38 | fewshots: 39 | - question: 'Is the following sentence plausible? “Kyle Palmieri was called for slashing.”' 40 | response: 41 | reason: 'Kyle Palmieri is a hockey player. Being called for slashing is part of hockey.' 42 | answer: "yes" 43 | - question: 'Is the following sentence plausible? “Joao Moutinho caught the screen pass in the NFC championship.”' 44 | response: 45 | reason: 'Joao Moutinho is a soccer player. The NFC championship is part of American football, not soccer. ' 46 | answer: "no" 47 | - question: 'Is the following sentence plausible? “Carson Wentz set the pick and roll.”' 48 | response: 49 | reason: 'Carson Wentz is an American football player. Pick and roll is part of basketball, not football. ' 50 | answer: "no" 51 | - question: 'Is the following sentence plausible? “Jonas Valanciunas beat the buzzer.”' 52 | response: 53 | reason: 'Jonas Valanciunas is a basketball player. Beating the buzzer is part of basketball.' 54 | answer: "yes" 55 | - question: 'Is the following sentence plausible? “Jamel Murray was perfect from the line."' 56 | response: 57 | reason: 'Jamal Murray is a basketball player. Being perfect from the line is part of basketball. ' 58 | answer: "yes" 59 | - question: 'Is the following sentence plausible? “Sam Darnold passed the puck.”' 60 | response: 61 | reason: 'Sam Darnold is a American football player. Passing the puck is part of hockey, not American football. ' 62 | answer: "no" 63 | - question: 'Is the following sentence plausible? “Draymond Green threw a touchdown.”' 64 | response: 65 | reason: 'Draymond Green is an basketball player. Throwing a touchdown is part of football, not basketball. ' 66 | answer: "no" 67 | - question: 'Is the following sentence plausible? “Malcolm Brogdon banked the shot in.”' 68 | response: 69 | reason: 'Malcolm Brogdon is a basketball player. Banking the shot in is part of basketball.' 70 | answer: "yes" -------------------------------------------------------------------------------- /tasks/templates/sports-t3-f2.yaml: -------------------------------------------------------------------------------- 1 | task_specification: | 2 | Sentence plausibility assessment: 3 | • Given: A sentence related to sports 4 | • Required: Determine if the sentence is plausible or implausible 5 | • Process: Think step by step to analyze the sentence 6 | • Output: Answer "yes" if plausible, "no" if implausible 7 | parser_prompt: 8 | text: | 9 | Parse the following response whether it answer yes/no 10 | You can only output yes or no string, no other symbols allowed 11 | RESPONSE: 12 | format_instruct: 13 | text: | 14 | Provide your output in the following text format: 15 | Step by step reasoning: ... 16 | Answer: The final answer is ... 17 | json: | 18 | Provide your output in the following valid JSON format: 19 | ```json 20 | { 21 | "step_by_step_reasoning": ..., 22 | "answer": ... 23 | } 24 | ``` 25 | yaml: | 26 | Provide your output in the following valid YAML format: 27 | ```yaml 28 | step_by_step_reasoning: ..., 29 | answer: ... 30 | ``` 31 | xml: | 32 | Provide your output in the following valid XML format: 33 | ```xml 34 | 35 | ... 36 | ... 37 | 38 | ``` 39 | fewshots: 40 | - question: 'Is the following sentence plausible? “Kyle Palmieri was called for slashing.”' 41 | response: 42 | reason: 'Kyle Palmieri is a hockey player. Being called for slashing is part of hockey.' 43 | answer: "yes" 44 | - question: 'Is the following sentence plausible? “Joao Moutinho caught the screen pass in the NFC championship.”' 45 | response: 46 | reason: 'Joao Moutinho is a soccer player. The NFC championship is part of American football, not soccer. ' 47 | answer: "no" 48 | - question: 'Is the following sentence plausible? “Carson Wentz set the pick and roll.”' 49 | response: 50 | reason: 'Carson Wentz is an American football player. Pick and roll is part of basketball, not football. ' 51 | answer: "no" 52 | - question: 'Is the following sentence plausible? “Jonas Valanciunas beat the buzzer.”' 53 | response: 54 | reason: 'Jonas Valanciunas is a basketball player. Beating the buzzer is part of basketball.' 55 | answer: "yes" 56 | - question: 'Is the following sentence plausible? “Jamel Murray was perfect from the line."' 57 | response: 58 | reason: 'Jamal Murray is a basketball player. Being perfect from the line is part of basketball. ' 59 | answer: "yes" 60 | - question: 'Is the following sentence plausible? “Sam Darnold passed the puck.”' 61 | response: 62 | reason: 'Sam Darnold is a American football player. Passing the puck is part of hockey, not American football. ' 63 | answer: "no" 64 | - question: 'Is the following sentence plausible? “Draymond Green threw a touchdown.”' 65 | response: 66 | reason: 'Draymond Green is an basketball player. Throwing a touchdown is part of football, not basketball. ' 67 | answer: "no" 68 | - question: 'Is the following sentence plausible? “Malcolm Brogdon banked the shot in.”' 69 | response: 70 | reason: 'Malcolm Brogdon is a basketball player. Banking the shot in is part of basketball.' 71 | answer: "yes" -------------------------------------------------------------------------------- /tasks/templates/sports-t3-structure.yaml: -------------------------------------------------------------------------------- 1 | task_specification: | 2 | Sentence plausibility assessment: 3 | • Given: A sentence related to sports 4 | • Required: Determine if the sentence is plausible or implausible 5 | • Process: Think step by step to analyze the sentence 6 | • Output: Answer "yes" if plausible, "no" if implausible 7 | parser_prompt: 8 | text: | 9 | Parse the following response whether it answer yes/no 10 | yes if its plausible, no if implausible 11 | NOTE: You can only output yes or no string, DO NOT DEVIATE FROM THIS INSTRUCTION 12 | ONLY yes / no 13 | RESPONSE: 14 | format_instruct: 15 | json: | 16 | You must use tool 17 | fewshots: 18 | - question: 'Is the following sentence plausible? “Kyle Palmieri was called for slashing.”' 19 | response: 20 | reason: 'Kyle Palmieri is a hockey player. Being called for slashing is part of hockey.' 21 | answer: "yes" 22 | - question: 'Is the following sentence plausible? “Joao Moutinho caught the screen pass in the NFC championship.”' 23 | response: 24 | reason: 'Joao Moutinho is a soccer player. The NFC championship is part of American football, not soccer. ' 25 | answer: "no" 26 | - question: 'Is the following sentence plausible? “Carson Wentz set the pick and roll.”' 27 | response: 28 | reason: 'Carson Wentz is an American football player. Pick and roll is part of basketball, not football. ' 29 | answer: "no" 30 | - question: 'Is the following sentence plausible? “Jonas Valanciunas beat the buzzer.”' 31 | response: 32 | reason: 'Jonas Valanciunas is a basketball player. Beating the buzzer is part of basketball.' 33 | answer: "yes" 34 | - question: 'Is the following sentence plausible? “Jamel Murray was perfect from the line."' 35 | response: 36 | reason: 'Jamal Murray is a basketball player. Being perfect from the line is part of basketball. ' 37 | answer: "yes" 38 | - question: 'Is the following sentence plausible? “Sam Darnold passed the puck.”' 39 | response: 40 | reason: 'Sam Darnold is a American football player. Passing the puck is part of hockey, not American football. ' 41 | answer: "no" 42 | - question: 'Is the following sentence plausible? “Draymond Green threw a touchdown.”' 43 | response: 44 | reason: 'Draymond Green is an basketball player. Throwing a touchdown is part of football, not basketball. ' 45 | answer: "no" 46 | - question: 'Is the following sentence plausible? “Malcolm Brogdon banked the shot in.”' 47 | response: 48 | reason: 'Malcolm Brogdon is a basketball player. Banking the shot in is part of basketball.' 49 | answer: "yes" -------------------------------------------------------------------------------- /tasks/templates/task280-t1-f1.yaml: -------------------------------------------------------------------------------- 1 | task_specification: | 2 | In this task, you are given a short passage that conveys stereotype or anti-stereotype about a specific target. A stereotype is an over-generalized belief about a particular group of people. An anti-stereotype is an idea that goes against a common stereotype. You are expected to classify the passage into four types of stereotype or anti-stereotype: gender, profession, race, and religion. 3 | parser_prompt: 4 | text: | 5 | Parse the following response whether it answer one the following label: 6 | - gender 7 | - profession 8 | - race 9 | - religion 10 | NOTE: You can only output one of these : gender, profession, race, religion 11 | DO NOT DEVIATE FROM THIS INSTRUCTION 12 | RESPONSE: 13 | format_instruct: 14 | text: | 15 | Provide your output in the following text format, answer in only valid category in string: 16 | Answer: ... 17 | json: | 18 | Provide your output in the following valid JSON format, answer in only valid category in string: 19 | ```json 20 | { 21 | "answer": ... 22 | } 23 | ``` 24 | yaml: | 25 | Provide your output in the following valid YAML format, answer in only valid category in string: 26 | ```yaml 27 | answer: ... 28 | ``` 29 | xml: | 30 | Provide your output in the following valid XML format, answer in only valid category in string: 31 | ```xml 32 | 33 | ... 34 | 35 | ``` 36 | fewshots: 37 | - question: 'Many people live in Ethiopia. The people are very thin and good at distance running.' 38 | response: 39 | answer: "race" 40 | - question: 'The norwegian man was boring.' 41 | response: 42 | answer: "race" 43 | - question: 'A tailor is sloppy in what he sews.' 44 | response: 45 | answer: "profession" 46 | - question: "Judy's male friend is very weak." 47 | response: 48 | answer: "gender" 49 | - question: 'The woman believed the bible was full of truth.' 50 | response: 51 | answer: "religion" -------------------------------------------------------------------------------- /tasks/templates/task280-t1-f2.yaml: -------------------------------------------------------------------------------- 1 | task_specification: | 2 | In this task, you are given a short passage that conveys stereotype or anti-stereotype about a specific target. A stereotype is an over-generalized belief about a particular group of people. An anti-stereotype is an idea that goes against a common stereotype. You are expected to classify the passage into four types of stereotype or anti-stereotype: gender, profession, race, and religion. 3 | parser_prompt: 4 | text: | 5 | Parse the following response whether it answer one the following label: 6 | - gender 7 | - profession 8 | - race 9 | - religion 10 | NOTE: You can only output one of these : gender, profession, race, religion 11 | DO NOT DEVIATE FROM THIS INSTRUCTION 12 | RESPONSE: 13 | format_instruct: 14 | text: | 15 | Provide your output in the following text format: 16 | Step by step reasoning: ... 17 | Answer: The final answer is ... 18 | json: | 19 | Provide your output in the following valid JSON format: 20 | ```json 21 | { 22 | "step_by_step_reasoning": ..., 23 | "answer": ... 24 | } 25 | ``` 26 | yaml: | 27 | Provide your output in the following valid YAML format: 28 | ```yaml 29 | step_by_step_reasoning: ..., 30 | answer: ... 31 | ``` 32 | xml: | 33 | Provide your output in the following valid XML format: 34 | ```xml 35 | 36 | ... 37 | ... 38 | 39 | ``` 40 | fewshots: 41 | - question: 'Many people live in Ethiopia. The people are very thin and good at distance running.' 42 | response: 43 | answer: "race" 44 | - question: 'The norwegian man was boring.' 45 | response: 46 | answer: "race" 47 | - question: 'A tailor is sloppy in what he sews.' 48 | response: 49 | answer: "profession" 50 | - question: "Judy's male friend is very weak." 51 | response: 52 | answer: "gender" 53 | - question: 'The woman believed the bible was full of truth.' 54 | response: 55 | answer: "religion" -------------------------------------------------------------------------------- /tasks/templates/task280-t1-f3.yaml: -------------------------------------------------------------------------------- 1 | task_specification: | 2 | In this task, you are given a short passage that conveys stereotype or anti-stereotype about a specific target. A stereotype is an over-generalized belief about a particular group of people. An anti-stereotype is an idea that goes against a common stereotype. You are expected to classify the passage into four types of stereotype or anti-stereotype: gender, profession, race, and religion. 3 | parser_prompt: 4 | text: | 5 | Parse the following response whether it answer one the following label: 6 | - gender 7 | - profession 8 | - race 9 | - religion 10 | NOTE: You can only output one of these : gender, profession, race, religion 11 | DO NOT DEVIATE FROM THIS INSTRUCTION 12 | RESPONSE: 13 | format_instruct: 14 | text: | 15 | Provide your output in the following text format: 16 | Answer: 17 | json: | 18 | Provide your output in the following valid JSON format: 19 | ```json 20 | { 21 | "answer": 22 | } 23 | ``` 24 | yaml: | 25 | Provide your output in the following valid YAML format: 26 | ```yaml 27 | answer: 28 | ``` 29 | xml: | 30 | Provide your output in the following valid XML format: 31 | ```xml 32 | 33 | [valid label] 34 | 35 | ``` 36 | fewshots: 37 | - question: 'Many people live in Ethiopia. The people are very thin and good at distance running.' 38 | response: 39 | answer: "race" 40 | - question: 'The norwegian man was boring.' 41 | response: 42 | answer: "race" 43 | - question: 'A tailor is sloppy in what he sews.' 44 | response: 45 | answer: "profession" 46 | - question: "Judy's male friend is very weak." 47 | response: 48 | answer: "gender" 49 | - question: 'The woman believed the bible was full of truth.' 50 | response: 51 | answer: "religion" -------------------------------------------------------------------------------- /tasks/templates/task280-t1-structure.yaml: -------------------------------------------------------------------------------- 1 | task_specification: | 2 | In this task, you are given a short passage that conveys stereotype or anti-stereotype about a specific target. A stereotype is an over-generalized belief about a particular group of people. An anti-stereotype is an idea that goes against a common stereotype. You are expected to classify the passage into four types of stereotype or anti-stereotype: gender, profession, race, and religion. 3 | parser_prompt: 4 | text: | 5 | Parse the following response whether it answer one the following label: 6 | - gender 7 | - profession 8 | - race 9 | - religion 10 | NOTE: You can only output one of these : gender, profession, race, religion 11 | DO NOT DEVIATE FROM THIS INSTRUCTION 12 | RESPONSE: 13 | format_instruct: 14 | json: | 15 | You must use the tool 16 | fewshots: 17 | - question: 'Many people live in Ethiopia. The people are very thin and good at distance running.' 18 | response: 19 | answer: "race" 20 | - question: 'The norwegian man was boring.' 21 | response: 22 | answer: "race" 23 | - question: 'A tailor is sloppy in what he sews.' 24 | response: 25 | answer: "profession" 26 | - question: "Judy's male friend is very weak." 27 | response: 28 | answer: "gender" 29 | - question: 'The woman believed the bible was full of truth.' 30 | response: 31 | answer: "religion" -------------------------------------------------------------------------------- /tasks/templates/task280-t2-f1.yaml: -------------------------------------------------------------------------------- 1 | task_specification: | 2 | You are expected to classify the passage into four types of stereotype or anti-stereotype: gender, profession, race, and religion. 3 | In this task, you are given a short passage that conveys stereotype or anti-stereotype about a specific target. A stereotype is an over-generalized belief about a particular group of people. An anti-stereotype is an idea that goes against a common stereotype. 4 | parser_prompt: 5 | text: | 6 | Parse the following response whether it answer one the following label: 7 | - gender 8 | - profession 9 | - race 10 | - religion 11 | NOTE: You can only output one of these : gender, profession, race, religion 12 | DO NOT DEVIATE FROM THIS INSTRUCTION 13 | RESPONSE: 14 | format_instruct: 15 | text: | 16 | Provide your output in the following text format, answer in only valid category in string: 17 | Answer: ... 18 | json: | 19 | Provide your output in the following valid JSON format, answer in only valid category in string: 20 | ```json 21 | { 22 | "answer": ... 23 | } 24 | ``` 25 | yaml: | 26 | Provide your output in the following valid YAML format, answer in only valid category in string: 27 | ```yaml 28 | answer: ... 29 | ``` 30 | xml: | 31 | Provide your output in the following valid XML format, answer in only valid category in string: 32 | ```xml 33 | 34 | ... 35 | 36 | ``` 37 | fewshots: 38 | - question: 'Many people live in Ethiopia. The people are very thin and good at distance running.' 39 | response: 40 | answer: "race" 41 | - question: 'The norwegian man was boring.' 42 | response: 43 | answer: "race" 44 | - question: 'A tailor is sloppy in what he sews.' 45 | response: 46 | answer: "profession" 47 | - question: "Judy's male friend is very weak." 48 | response: 49 | answer: "gender" 50 | - question: 'The woman believed the bible was full of truth.' 51 | response: 52 | answer: "religion" -------------------------------------------------------------------------------- /tasks/templates/task280-t2-f2.yaml: -------------------------------------------------------------------------------- 1 | task_specification: | 2 | You are expected to classify the passage into four types of stereotype or anti-stereotype: gender, profession, race, and religion. 3 | In this task, you are given a short passage that conveys stereotype or anti-stereotype about a specific target. A stereotype is an over-generalized belief about a particular group of people. An anti-stereotype is an idea that goes against a common stereotype. 4 | parser_prompt: 5 | text: | 6 | Parse the following response whether it answer one the following label: 7 | - gender 8 | - profession 9 | - race 10 | - religion 11 | NOTE: You can only output one of these : gender, profession, race, religion 12 | DO NOT DEVIATE FROM THIS INSTRUCTION 13 | RESPONSE: 14 | format_instruct: 15 | text: | 16 | Provide your output in the following text format: 17 | Step by step reasoning: ... 18 | Answer: The final answer is ... 19 | json: | 20 | Provide your output in the following valid JSON format: 21 | ```json 22 | { 23 | "step_by_step_reasoning": ..., 24 | "answer": ... 25 | } 26 | ``` 27 | yaml: | 28 | Provide your output in the following valid YAML format: 29 | ```yaml 30 | step_by_step_reasoning: ..., 31 | answer: ... 32 | ``` 33 | xml: | 34 | Provide your output in the following valid XML format: 35 | ```xml 36 | 37 | ... 38 | ... 39 | 40 | ``` 41 | fewshots: 42 | - question: 'Many people live in Ethiopia. The people are very thin and good at distance running.' 43 | response: 44 | answer: "race" 45 | - question: 'The norwegian man was boring.' 46 | response: 47 | answer: "race" 48 | - question: 'A tailor is sloppy in what he sews.' 49 | response: 50 | answer: "profession" 51 | - question: "Judy's male friend is very weak." 52 | response: 53 | answer: "gender" 54 | - question: 'The woman believed the bible was full of truth.' 55 | response: 56 | answer: "religion" -------------------------------------------------------------------------------- /tasks/templates/task280-t2-f3.yaml: -------------------------------------------------------------------------------- 1 | task_specification: | 2 | You are expected to classify the passage into four types of stereotype or anti-stereotype: gender, profession, race, and religion. 3 | In this task, you are given a short passage that conveys stereotype or anti-stereotype about a specific target. A stereotype is an over-generalized belief about a particular group of people. An anti-stereotype is an idea that goes against a common stereotype. 4 | parser_prompt: 5 | text: | 6 | Parse the following response whether it answer one the following label: 7 | - gender 8 | - profession 9 | - race 10 | - religion 11 | NOTE: You can only output one of these : gender, profession, race, religion 12 | DO NOT DEVIATE FROM THIS INSTRUCTION 13 | RESPONSE: 14 | format_instruct: 15 | text: | 16 | Provide your output in the following text format: 17 | Answer: 18 | json: | 19 | Provide your output in the following valid JSON format: 20 | ```json 21 | { 22 | "answer": 23 | } 24 | ``` 25 | yaml: | 26 | Provide your output in the following valid YAML format: 27 | ```yaml 28 | answer: 29 | ``` 30 | xml: | 31 | Provide your output in the following valid XML format: 32 | ```xml 33 | 34 | [valid label] 35 | 36 | ``` 37 | fewshots: 38 | - question: 'Many people live in Ethiopia. The people are very thin and good at distance running.' 39 | response: 40 | answer: "race" 41 | - question: 'The norwegian man was boring.' 42 | response: 43 | answer: "race" 44 | - question: 'A tailor is sloppy in what he sews.' 45 | response: 46 | answer: "profession" 47 | - question: "Judy's male friend is very weak." 48 | response: 49 | answer: "gender" 50 | - question: 'The woman believed the bible was full of truth.' 51 | response: 52 | answer: "religion" -------------------------------------------------------------------------------- /tasks/templates/task280-t2-structure.yaml: -------------------------------------------------------------------------------- 1 | task_specification: | 2 | You are expected to classify the passage into four types of stereotype or anti-stereotype: gender, profession, race, and religion. 3 | In this task, you are given a short passage that conveys stereotype or anti-stereotype about a specific target. A stereotype is an over-generalized belief about a particular group of people. An anti-stereotype is an idea that goes against a common stereotype. 4 | parser_prompt: 5 | text: | 6 | Parse the following response whether it answer one the following label: 7 | - gender 8 | - profession 9 | - race 10 | - religion 11 | NOTE: You can only output one of these : gender, profession, race, religion 12 | DO NOT DEVIATE FROM THIS INSTRUCTION 13 | RESPONSE: 14 | format_instruct: 15 | json: | 16 | You must use the tool 17 | fewshots: 18 | - question: 'Many people live in Ethiopia. The people are very thin and good at distance running.' 19 | response: 20 | answer: "race" 21 | - question: 'The norwegian man was boring.' 22 | response: 23 | answer: "race" 24 | - question: 'A tailor is sloppy in what he sews.' 25 | response: 26 | answer: "profession" 27 | - question: "Judy's male friend is very weak." 28 | response: 29 | answer: "gender" 30 | - question: 'The woman believed the bible was full of truth.' 31 | response: 32 | answer: "religion" -------------------------------------------------------------------------------- /tasks/templates/task280-t3-f1.yaml: -------------------------------------------------------------------------------- 1 | task_specification: | 2 | Sentence stereotype assessment: 3 | • Given: A passage related to stereotype or anti-stereotype 4 | • Required: Determine if the paragraph is one of these four category : gender, profession, race, and religion 5 | • Output: Answer only one of the four category 6 | parser_prompt: 7 | text: | 8 | Parse the following response whether it answer one the following label: 9 | - gender 10 | - profession 11 | - race 12 | - religion 13 | NOTE: You can only output one of these : gender, profession, race, religion 14 | DO NOT DEVIATE FROM THIS INSTRUCTION 15 | RESPONSE: 16 | format_instruct: 17 | text: | 18 | Provide your output in the following text format, answer in only valid category in string: 19 | Answer: ... 20 | json: | 21 | Provide your output in the following valid JSON format, answer in only valid category in string: 22 | ```json 23 | { 24 | "answer": ... 25 | } 26 | ``` 27 | yaml: | 28 | Provide your output in the following valid YAML format, answer in only valid category in string: 29 | ```yaml 30 | answer: ... 31 | ``` 32 | xml: | 33 | Provide your output in the following valid XML format, answer in only valid category in string: 34 | ```xml 35 | 36 | ... 37 | 38 | ``` 39 | fewshots: 40 | - question: 'Many people live in Ethiopia. The people are very thin and good at distance running.' 41 | response: 42 | answer: "race" 43 | - question: 'The norwegian man was boring.' 44 | response: 45 | answer: "race" 46 | - question: 'A tailor is sloppy in what he sews.' 47 | response: 48 | answer: "profession" 49 | - question: "Judy's male friend is very weak." 50 | response: 51 | answer: "gender" 52 | - question: 'The woman believed the bible was full of truth.' 53 | response: 54 | answer: "religion" -------------------------------------------------------------------------------- /tasks/templates/task280-t3-f2.yaml: -------------------------------------------------------------------------------- 1 | task_specification: | 2 | Sentence stereotype assessment: 3 | • Given: A passage related to stereotype or anti-stereotype 4 | • Required: Determine if the paragraph is one of these four category : gender, profession, race, and religion 5 | • Output: Answer only one of the four category 6 | parser_prompt: 7 | text: | 8 | Parse the following response whether it answer one the following label: 9 | - gender 10 | - profession 11 | - race 12 | - religion 13 | NOTE: You can only output one of these : gender, profession, race, religion 14 | DO NOT DEVIATE FROM THIS INSTRUCTION 15 | RESPONSE: 16 | format_instruct: 17 | text: | 18 | Provide your output in the following text format: 19 | Step by step reasoning: ... 20 | Answer: The final answer is ... 21 | json: | 22 | Provide your output in the following valid JSON format: 23 | ```json 24 | { 25 | "step_by_step_reasoning": ..., 26 | "answer": ... 27 | } 28 | ``` 29 | yaml: | 30 | Provide your output in the following valid YAML format: 31 | ```yaml 32 | step_by_step_reasoning: ..., 33 | answer: ... 34 | ``` 35 | xml: | 36 | Provide your output in the following valid XML format: 37 | ```xml 38 | 39 | ... 40 | ... 41 | 42 | ``` 43 | fewshots: 44 | - question: 'Many people live in Ethiopia. The people are very thin and good at distance running.' 45 | response: 46 | answer: "race" 47 | - question: 'The norwegian man was boring.' 48 | response: 49 | answer: "race" 50 | - question: 'A tailor is sloppy in what he sews.' 51 | response: 52 | answer: "profession" 53 | - question: "Judy's male friend is very weak." 54 | response: 55 | answer: "gender" 56 | - question: 'The woman believed the bible was full of truth.' 57 | response: 58 | answer: "religion" -------------------------------------------------------------------------------- /tasks/templates/task280-t3-f3.yaml: -------------------------------------------------------------------------------- 1 | task_specification: | 2 | Sentence stereotype assessment: 3 | • Given: A passage related to stereotype or anti-stereotype 4 | • Required: Determine if the paragraph is one of these four category : gender, profession, race, and religion 5 | • Output: Answer only one of the four category 6 | parser_prompt: 7 | text: | 8 | Parse the following response whether it answer one the following label: 9 | - gender 10 | - profession 11 | - race 12 | - religion 13 | NOTE: You can only output one of these : gender, profession, race, religion 14 | DO NOT DEVIATE FROM THIS INSTRUCTION 15 | RESPONSE: 16 | format_instruct: 17 | text: | 18 | Provide your output in the following text format: 19 | Answer: 20 | json: | 21 | Provide your output in the following valid JSON format: 22 | ```json 23 | { 24 | "answer": 25 | } 26 | ``` 27 | yaml: | 28 | Provide your output in the following valid YAML format: 29 | ```yaml 30 | answer: 31 | ``` 32 | xml: | 33 | Provide your output in the following valid XML format: 34 | ```xml 35 | 36 | [valid label] 37 | 38 | ``` 39 | fewshots: 40 | - question: 'Many people live in Ethiopia. The people are very thin and good at distance running.' 41 | response: 42 | answer: "race" 43 | - question: 'The norwegian man was boring.' 44 | response: 45 | answer: "race" 46 | - question: 'A tailor is sloppy in what he sews.' 47 | response: 48 | answer: "profession" 49 | - question: "Judy's male friend is very weak." 50 | response: 51 | answer: "gender" 52 | - question: 'The woman believed the bible was full of truth.' 53 | response: 54 | answer: "religion" -------------------------------------------------------------------------------- /tasks/templates/task280-t3-free1.yaml: -------------------------------------------------------------------------------- 1 | task_specification: | 2 | Sentence stereotype assessment: 3 | • Given: A passage related to stereotype or anti-stereotype 4 | • Required: Determine if the paragraph is one of these four category : gender, profession, race, and religion 5 | • Output: Answer only one of the four category 6 | parser_prompt: 7 | text: | 8 | Extract the following response final answer, valid category shown below. DO NOT OUTPUT ANYTHING ELSE OTHER THAN THE valid category! 9 | Valid category: 10 | - gender 11 | - profession 12 | - race 13 | - religion 14 | Response: 15 | json: | 16 | Convert the following response into a valid JSON with ``` bracket enclosed 17 | The payload should only contain 1 key : answer=valid category shown below 18 | Valid category: 19 | - gender 20 | - profession 21 | - race 22 | - religion 23 | The JSON format must follow this format: 24 | ``` 25 | { 26 | "answer": "requirement: only valid category shown above." 27 | } 28 | ``` 29 | Note: In the answer field DO NOT OUTPUT ANYTHING ELSE OTHER THAN THE FINAL valid category 30 | Response: 31 | xml: | 32 | Convert the following response into a valid XML with ``` bracket enclosed 33 | The payload should only contain 1 key : answer=valid category shown below 34 | Valid category: 35 | - gender 36 | - profession 37 | - race 38 | - religion 39 | The XML format must follow this format: 40 | ``` 41 | 42 | ... 43 | 44 | ``` 45 | requirement: 46 | - ... should only valid category shown above 47 | Note: In the answer field DO NOT OUTPUT ANYTHING ELSE OTHER THAN THE FINAL valid category 48 | Response: 49 | yaml: | 50 | Convert the following response into a valid YAML with ``` bracket enclosed 51 | The payload should only contain 1 key : answer=valid category shown below 52 | Valid category: 53 | - gender 54 | - profession 55 | - race 56 | - religion 57 | The YAML format must follow this format: 58 | ``` 59 | answer: ... 60 | ``` 61 | requirement: 62 | - answer value should only valid category shown above 63 | Note: In the answer field DO NOT OUTPUT ANYTHING ELSE OTHER THAN THE FINAL valid category 64 | Response: 65 | format_instruct: 66 | text: | 67 | Answer in natural language text with think step by step 68 | json: | 69 | Answer in JSON format with think step by step 70 | yaml: | 71 | Answer in YAML format with think step by step 72 | xml: | 73 | Answer in XML format with think step by step 74 | fewshots: 75 | - question: 'Many people live in Ethiopia. The people are very thin and good at distance running.' 76 | response: 77 | answer: "race" 78 | - question: 'The norwegian man was boring.' 79 | response: 80 | answer: "race" 81 | - question: 'A tailor is sloppy in what he sews.' 82 | response: 83 | answer: "profession" 84 | - question: "Judy's male friend is very weak." 85 | response: 86 | answer: "gender" 87 | - question: 'The woman believed the bible was full of truth.' 88 | response: 89 | answer: "religion" -------------------------------------------------------------------------------- /tasks/templates/task280-t3-free2.yaml: -------------------------------------------------------------------------------- 1 | task_specification: | 2 | Sentence stereotype assessment: 3 | • Given: A passage related to stereotype or anti-stereotype 4 | • Required: Determine if the paragraph is one of these four category : gender, profession, race, and religion 5 | • Output: Answer only one of the four category 6 | parser_prompt: 7 | text: | 8 | Extract the following response final answer, valid category shown below. DO NOT OUTPUT ANYTHING ELSE OTHER THAN THE valid category! 9 | Valid category: 10 | - gender 11 | - profession 12 | - race 13 | - religion 14 | Response: 15 | json: | 16 | Convert the following response into a valid JSON with ``` bracket enclosed 17 | The payload should only contain 1 key : answer=valid category shown below 18 | Valid category: 19 | - gender 20 | - profession 21 | - race 22 | - religion 23 | The JSON format must follow this format: 24 | ``` 25 | { 26 | "answer": "requirement: only valid category shown above." 27 | } 28 | ``` 29 | Note: In the answer field DO NOT OUTPUT ANYTHING ELSE OTHER THAN THE FINAL valid category 30 | Response: 31 | xml: | 32 | Convert the following response into a valid XML with ``` bracket enclosed 33 | The payload should only contain 1 key : answer=valid category shown below 34 | Valid category: 35 | - gender 36 | - profession 37 | - race 38 | - religion 39 | The XML format must follow this format: 40 | ``` 41 | 42 | ... 43 | 44 | ``` 45 | requirement: 46 | - ... should only valid category shown above 47 | Note: In the answer field DO NOT OUTPUT ANYTHING ELSE OTHER THAN THE FINAL valid category 48 | Response: 49 | yaml: | 50 | Convert the following response into a valid YAML with ``` bracket enclosed 51 | The payload should only contain 1 key : answer=valid category shown below 52 | Valid category: 53 | - gender 54 | - profession 55 | - race 56 | - religion 57 | The YAML format must follow this format: 58 | ``` 59 | answer: ... 60 | ``` 61 | requirement: 62 | - answer value should only valid category shown above 63 | Note: In the answer field DO NOT OUTPUT ANYTHING ELSE OTHER THAN THE FINAL valid category 64 | Response: 65 | format_instruct: 66 | text: | 67 | Think step by step before answering 68 | json: | 69 | Think step by step and only output in JSON format 70 | yaml: | 71 | Think step by step and only output in YAML format 72 | xml: | 73 | Think step by step and only output in XML format 74 | fewshots: 75 | - question: 'Many people live in Ethiopia. The people are very thin and good at distance running.' 76 | response: 77 | answer: "race" 78 | - question: 'The norwegian man was boring.' 79 | response: 80 | answer: "race" 81 | - question: 'A tailor is sloppy in what he sews.' 82 | response: 83 | answer: "profession" 84 | - question: "Judy's male friend is very weak." 85 | response: 86 | answer: "gender" 87 | - question: 'The woman believed the bible was full of truth.' 88 | response: 89 | answer: "religion" -------------------------------------------------------------------------------- /tasks/templates/task280-t3-structure.yaml: -------------------------------------------------------------------------------- 1 | task_specification: | 2 | Sentence stereotype assessment: 3 | • Given: A passage related to stereotype or anti-stereotype 4 | • Required: Determine if the paragraph is one of these four category : gender, profession, race, and religion 5 | • Output: Answer only one of the four category 6 | parser_prompt: 7 | text: | 8 | Parse the following response whether it answer one the following label: 9 | - gender 10 | - profession 11 | - race 12 | - religion 13 | NOTE: You can only output one of these : gender, profession, race, religion 14 | DO NOT DEVIATE FROM THIS INSTRUCTION 15 | RESPONSE: 16 | format_instruct: 17 | json: | 18 | You must use the tool 19 | fewshots: 20 | - question: 'Many people live in Ethiopia. The people are very thin and good at distance running.' 21 | response: 22 | answer: "race" 23 | - question: 'The norwegian man was boring.' 24 | response: 25 | answer: "race" 26 | - question: 'A tailor is sloppy in what he sews.' 27 | response: 28 | answer: "profession" 29 | - question: "Judy's male friend is very weak." 30 | response: 31 | answer: "gender" 32 | - question: 'The woman believed the bible was full of truth.' 33 | response: 34 | answer: "religion" --------------------------------------------------------------------------------