├── LICENSE.md ├── README.md ├── data.zip ├── dataset_stats ├── error_categories_table.txt ├── stats.json └── stats_table.txt ├── environment.yml ├── error_detection_outputs ├── advanced_prompt_baseline │ ├── answerability_classification │ │ ├── Llama-2-70b-chat-hf │ │ │ ├── Llama-2-13b-chat-hf │ │ │ │ └── cot_instruction_prompt.jsonl │ │ │ ├── Llama-2-70b-chat-hf │ │ │ │ └── cot_instruction_prompt.jsonl │ │ │ ├── Mistral-7B-Instruct-v0.1 │ │ │ │ └── cot_instruction_prompt.jsonl │ │ │ ├── Mixtral-8x7B-Instruct-v0.1 │ │ │ │ └── cot_instruction_prompt.jsonl │ │ │ ├── Qwen1.5-14B-Chat │ │ │ │ └── cot_instruction_prompt.jsonl │ │ │ ├── Qwen1.5-72B-Chat │ │ │ │ └── cot_instruction_prompt.jsonl │ │ │ ├── claude-3-opus-20240229 │ │ │ │ └── cot_instruction_prompt.jsonl │ │ │ ├── gemini-1.0-pro-001 │ │ │ │ └── cot_instruction_prompt.jsonl │ │ │ ├── gemma-7b-it │ │ │ │ └── cot_instruction_prompt.jsonl │ │ │ ├── gpt-3.5-turbo-0125 │ │ │ │ └── cot_instruction_prompt.jsonl │ │ │ ├── gpt-4-0125-preview │ │ │ │ └── cot_instruction_prompt.jsonl │ │ │ └── gpt-4-0613 │ │ │ │ └── cot_instruction_prompt.jsonl │ │ └── gpt-4-0613 │ │ │ ├── Llama-2-13b-chat-hf │ │ │ └── cot_instruction_prompt.jsonl │ │ │ ├── Llama-2-70b-chat-hf │ │ │ └── cot_instruction_prompt.jsonl │ │ │ ├── Mistral-7B-Instruct-v0.1 │ │ │ └── cot_instruction_prompt.jsonl │ │ │ ├── Mixtral-8x7B-Instruct-v0.1 │ │ │ └── cot_instruction_prompt.jsonl │ │ │ ├── Qwen1.5-14B-Chat │ │ │ └── cot_instruction_prompt.jsonl │ │ │ ├── Qwen1.5-72B-Chat │ │ │ └── cot_instruction_prompt.jsonl │ │ │ ├── claude-3-opus-20240229 │ │ │ └── cot_instruction_prompt.jsonl │ │ │ ├── gemini-1.0-pro-001 │ │ │ └── cot_instruction_prompt.jsonl │ │ │ ├── gemma-7b-it │ │ │ └── cot_instruction_prompt.jsonl │ │ │ ├── gpt-3.5-turbo-0125 │ │ │ └── cot_instruction_prompt.jsonl │ │ │ ├── gpt-4-0125-preview │ │ │ └── cot_instruction_prompt.jsonl │ │ │ └── gpt-4-0613 │ │ │ └── cot_instruction_prompt.jsonl │ ├── finegrained_fact_verification │ │ ├── Llama-2-70b-chat-hf │ │ │ ├── Llama-2-13b-chat-hf │ │ │ │ └── cot_instruction_prompt.jsonl │ │ │ ├── Llama-2-70b-chat-hf │ │ │ │ └── cot_instruction_prompt.jsonl │ │ │ ├── Mistral-7B-Instruct-v0.1 │ │ │ │ └── cot_instruction_prompt.jsonl │ │ │ ├── Mixtral-8x7B-Instruct-v0.1 │ │ │ │ └── cot_instruction_prompt.jsonl │ │ │ ├── Qwen1.5-14B-Chat │ │ │ │ └── cot_instruction_prompt.jsonl │ │ │ ├── Qwen1.5-72B-Chat │ │ │ │ └── cot_instruction_prompt.jsonl │ │ │ ├── claude-3-opus-20240229 │ │ │ │ └── cot_instruction_prompt.jsonl │ │ │ ├── gemini-1.0-pro-001 │ │ │ │ └── cot_instruction_prompt.jsonl │ │ │ ├── gemma-7b-it │ │ │ │ └── cot_instruction_prompt.jsonl │ │ │ ├── gpt-3.5-turbo-0125 │ │ │ │ └── cot_instruction_prompt.jsonl │ │ │ ├── gpt-4-0125-preview │ │ │ │ └── cot_instruction_prompt.jsonl │ │ │ └── gpt-4-0613 │ │ │ │ └── cot_instruction_prompt.jsonl │ │ └── gpt-4-0613 │ │ │ ├── Llama-2-13b-chat-hf │ │ │ └── cot_instruction_prompt.jsonl │ │ │ ├── Llama-2-70b-chat-hf │ │ │ └── cot_instruction_prompt.jsonl │ │ │ ├── Mistral-7B-Instruct-v0.1 │ │ │ └── cot_instruction_prompt.jsonl │ │ │ ├── Mixtral-8x7B-Instruct-v0.1 │ │ │ └── cot_instruction_prompt.jsonl │ │ │ ├── Qwen1.5-14B-Chat │ │ │ └── cot_instruction_prompt.jsonl │ │ │ ├── Qwen1.5-72B-Chat │ │ │ └── cot_instruction_prompt.jsonl │ │ │ ├── claude-3-opus-20240229 │ │ │ └── cot_instruction_prompt.jsonl │ │ │ ├── gemini-1.0-pro-001 │ │ │ └── cot_instruction_prompt.jsonl │ │ │ ├── gemma-7b-it │ │ │ └── cot_instruction_prompt.jsonl │ │ │ ├── gpt-3.5-turbo-0125 │ │ │ └── cot_instruction_prompt.jsonl │ │ │ ├── gpt-4-0125-preview │ │ │ └── cot_instruction_prompt.jsonl │ │ │ └── gpt-4-0613 │ │ │ └── cot_instruction_prompt.jsonl │ └── math_word_problem_generation │ │ ├── Llama-2-70b-chat-hf │ │ ├── Llama-2-13b-chat-hf │ │ │ └── cot_instruction_prompt.jsonl │ │ ├── Llama-2-70b-chat-hf │ │ │ └── cot_instruction_prompt.jsonl │ │ ├── Mistral-7B-Instruct-v0.1 │ │ │ └── cot_instruction_prompt.jsonl │ │ ├── Mixtral-8x7B-Instruct-v0.1 │ │ │ └── cot_instruction_prompt.jsonl │ │ ├── Qwen1.5-14B-Chat │ │ │ └── cot_instruction_prompt.jsonl │ │ ├── Qwen1.5-72B-Chat │ │ │ └── cot_instruction_prompt.jsonl │ │ ├── claude-3-opus-20240229 │ │ │ └── cot_instruction_prompt.jsonl │ │ ├── gemini-1.0-pro-001 │ │ │ └── cot_instruction_prompt.jsonl │ │ ├── gemma-7b-it │ │ │ └── cot_instruction_prompt.jsonl │ │ ├── gpt-3.5-turbo-0125 │ │ │ └── cot_instruction_prompt.jsonl │ │ ├── gpt-4-0125-preview │ │ │ └── cot_instruction_prompt.jsonl │ │ └── gpt-4-0613 │ │ │ └── cot_instruction_prompt.jsonl │ │ └── gpt-4-0613 │ │ ├── Llama-2-13b-chat-hf │ │ └── cot_instruction_prompt.jsonl │ │ ├── Llama-2-70b-chat-hf │ │ └── cot_instruction_prompt.jsonl │ │ ├── Mistral-7B-Instruct-v0.1 │ │ └── cot_instruction_prompt.jsonl │ │ ├── Mixtral-8x7B-Instruct-v0.1 │ │ └── cot_instruction_prompt.jsonl │ │ ├── Qwen1.5-14B-Chat │ │ └── cot_instruction_prompt.jsonl │ │ ├── Qwen1.5-72B-Chat │ │ └── cot_instruction_prompt.jsonl │ │ ├── claude-3-opus-20240229 │ │ └── cot_instruction_prompt.jsonl │ │ ├── gemini-1.0-pro-001 │ │ └── cot_instruction_prompt.jsonl │ │ ├── gemma-7b-it │ │ └── cot_instruction_prompt.jsonl │ │ ├── gpt-3.5-turbo-0125 │ │ └── cot_instruction_prompt.jsonl │ │ ├── gpt-4-0125-preview │ │ └── cot_instruction_prompt.jsonl │ │ └── gpt-4-0613 │ │ └── cot_instruction_prompt.jsonl ├── majority_vote │ ├── answerability_classification │ │ ├── Llama-2-70b-chat-hf │ │ │ └── Llama-2-70b-chat-hf__Mixtral-8x7B-Instruct-v0.1__Qwen1.5-72B-Chat │ │ │ │ └── majority_vote.jsonl │ │ └── gpt-4-0613 │ │ │ └── Llama-2-70b-chat-hf__Mixtral-8x7B-Instruct-v0.1__Qwen1.5-72B-Chat │ │ │ └── majority_vote.jsonl │ ├── finegrained_fact_verification │ │ ├── Llama-2-70b-chat-hf │ │ │ └── Llama-2-70b-chat-hf__Mixtral-8x7B-Instruct-v0.1__Qwen1.5-72B-Chat │ │ │ │ └── majority_vote.jsonl │ │ └── gpt-4-0613 │ │ │ └── Llama-2-70b-chat-hf__Mixtral-8x7B-Instruct-v0.1__Qwen1.5-72B-Chat │ │ │ └── majority_vote.jsonl │ └── math_word_problem_generation │ │ ├── Llama-2-70b-chat-hf │ │ └── Llama-2-70b-chat-hf__Mixtral-8x7B-Instruct-v0.1__Qwen1.5-72B-Chat │ │ │ └── majority_vote.jsonl │ │ └── gpt-4-0613 │ │ └── Llama-2-70b-chat-hf__Mixtral-8x7B-Instruct-v0.1__Qwen1.5-72B-Chat │ │ └── majority_vote.jsonl ├── self_consistency │ ├── answerability_classification │ │ └── gpt-4-0613 │ │ │ ├── Mixtral-8x7B-Instruct-v0.1 │ │ │ └── baseline_errordetection_prompt_1.jsonl │ │ │ ├── gpt-3.5-turbo-0125 │ │ │ └── baseline_errordetection_prompt_1.jsonl │ │ │ └── gpt-4-0125-preview │ │ │ └── baseline_errordetection_prompt_1.jsonl │ ├── finegrained_fact_verification │ │ └── gpt-4-0613 │ │ │ ├── Mixtral-8x7B-Instruct-v0.1 │ │ │ └── baseline_errordetection_prompt_1.jsonl │ │ │ ├── gpt-3.5-turbo-0125 │ │ │ └── baseline_errordetection_prompt_1.jsonl │ │ │ └── gpt-4-0125-preview │ │ │ └── baseline_errordetection_prompt_1.jsonl │ └── math_word_problem_generation │ │ └── gpt-4-0613 │ │ ├── Mixtral-8x7B-Instruct-v0.1 │ │ └── baseline_errordetection_prompt_1.jsonl │ │ ├── gpt-3.5-turbo-0125 │ │ └── baseline_errordetection_prompt_1.jsonl │ │ └── gpt-4-0125-preview │ │ └── baseline_errordetection_prompt_1.jsonl └── simple_prompt_baseline │ ├── answerability_classification │ ├── Llama-2-70b-chat-hf │ │ ├── Llama-2-13b-chat-hf │ │ │ ├── baseline_errordetection_prompt_1.jsonl │ │ │ ├── baseline_errordetection_prompt_2.jsonl │ │ │ ├── baseline_errordetection_prompt_3.jsonl │ │ │ └── baseline_errordetection_prompt_4.jsonl │ │ ├── Llama-2-70b-chat-hf │ │ │ ├── baseline_errordetection_prompt_1.jsonl │ │ │ ├── baseline_errordetection_prompt_2.jsonl │ │ │ ├── baseline_errordetection_prompt_3.jsonl │ │ │ └── baseline_errordetection_prompt_4.jsonl │ │ ├── Mistral-7B-Instruct-v0.1 │ │ │ ├── baseline_errordetection_prompt_1.jsonl │ │ │ ├── baseline_errordetection_prompt_2.jsonl │ │ │ ├── baseline_errordetection_prompt_3.jsonl │ │ │ └── baseline_errordetection_prompt_4.jsonl │ │ ├── Mixtral-8x7B-Instruct-v0.1 │ │ │ ├── baseline_errordetection_prompt_1.jsonl │ │ │ ├── baseline_errordetection_prompt_2.jsonl │ │ │ ├── baseline_errordetection_prompt_3.jsonl │ │ │ └── baseline_errordetection_prompt_4.jsonl │ │ ├── Qwen1.5-14B-Chat │ │ │ ├── baseline_errordetection_prompt_1.jsonl │ │ │ ├── baseline_errordetection_prompt_2.jsonl │ │ │ ├── baseline_errordetection_prompt_3.jsonl │ │ │ └── baseline_errordetection_prompt_4.jsonl │ │ ├── Qwen1.5-72B-Chat │ │ │ ├── baseline_errordetection_prompt_1.jsonl │ │ │ ├── baseline_errordetection_prompt_2.jsonl │ │ │ ├── baseline_errordetection_prompt_3.jsonl │ │ │ └── baseline_errordetection_prompt_4.jsonl │ │ ├── claude-3-opus-20240229 │ │ │ ├── baseline_errordetection_prompt_1.jsonl │ │ │ ├── baseline_errordetection_prompt_2.jsonl │ │ │ ├── baseline_errordetection_prompt_3.jsonl │ │ │ └── baseline_errordetection_prompt_4.jsonl │ │ ├── gemini-1.0-pro-001 │ │ │ ├── baseline_errordetection_prompt_1.jsonl │ │ │ ├── baseline_errordetection_prompt_2.jsonl │ │ │ ├── baseline_errordetection_prompt_3.jsonl │ │ │ └── baseline_errordetection_prompt_4.jsonl │ │ ├── gemma-7b-it │ │ │ ├── baseline_errordetection_prompt_1.jsonl │ │ │ ├── baseline_errordetection_prompt_2.jsonl │ │ │ ├── baseline_errordetection_prompt_3.jsonl │ │ │ └── baseline_errordetection_prompt_4.jsonl │ │ ├── gpt-3.5-turbo-0125 │ │ │ ├── baseline_errordetection_prompt_1.jsonl │ │ │ ├── baseline_errordetection_prompt_2.jsonl │ │ │ ├── baseline_errordetection_prompt_3.jsonl │ │ │ └── baseline_errordetection_prompt_4.jsonl │ │ ├── gpt-4-0125-preview │ │ │ ├── baseline_errordetection_prompt_1.jsonl │ │ │ ├── baseline_errordetection_prompt_2.jsonl │ │ │ ├── baseline_errordetection_prompt_3.jsonl │ │ │ └── baseline_errordetection_prompt_4.jsonl │ │ └── gpt-4-0613 │ │ │ ├── baseline_errordetection_prompt_1.jsonl │ │ │ ├── baseline_errordetection_prompt_2.jsonl │ │ │ ├── baseline_errordetection_prompt_3.jsonl │ │ │ └── baseline_errordetection_prompt_4.jsonl │ └── gpt-4-0613 │ │ ├── Llama-2-13b-chat-hf │ │ ├── baseline_errordetection_prompt_1.jsonl │ │ ├── baseline_errordetection_prompt_2.jsonl │ │ ├── baseline_errordetection_prompt_3.jsonl │ │ └── baseline_errordetection_prompt_4.jsonl │ │ ├── Llama-2-70b-chat-hf │ │ ├── baseline_errordetection_prompt_1.jsonl │ │ ├── baseline_errordetection_prompt_2.jsonl │ │ ├── baseline_errordetection_prompt_3.jsonl │ │ └── baseline_errordetection_prompt_4.jsonl │ │ ├── Mistral-7B-Instruct-v0.1 │ │ ├── baseline_errordetection_prompt_1.jsonl │ │ ├── baseline_errordetection_prompt_2.jsonl │ │ ├── baseline_errordetection_prompt_3.jsonl │ │ └── baseline_errordetection_prompt_4.jsonl │ │ ├── Mixtral-8x7B-Instruct-v0.1 │ │ ├── baseline_errordetection_prompt_1.jsonl │ │ ├── baseline_errordetection_prompt_2.jsonl │ │ ├── baseline_errordetection_prompt_3.jsonl │ │ └── baseline_errordetection_prompt_4.jsonl │ │ ├── Qwen1.5-14B-Chat │ │ ├── baseline_errordetection_prompt_1.jsonl │ │ ├── baseline_errordetection_prompt_2.jsonl │ │ ├── baseline_errordetection_prompt_3.jsonl │ │ └── baseline_errordetection_prompt_4.jsonl │ │ ├── Qwen1.5-72B-Chat │ │ ├── baseline_errordetection_prompt_1.jsonl │ │ ├── baseline_errordetection_prompt_2.jsonl │ │ ├── baseline_errordetection_prompt_3.jsonl │ │ └── baseline_errordetection_prompt_4.jsonl │ │ ├── claude-3-opus-20240229 │ │ ├── baseline_errordetection_prompt_1.jsonl │ │ ├── baseline_errordetection_prompt_2.jsonl │ │ ├── baseline_errordetection_prompt_3.jsonl │ │ └── baseline_errordetection_prompt_4.jsonl │ │ ├── gemini-1.0-pro-001 │ │ ├── baseline_errordetection_prompt_1.jsonl │ │ ├── baseline_errordetection_prompt_2.jsonl │ │ ├── baseline_errordetection_prompt_3.jsonl │ │ └── baseline_errordetection_prompt_4.jsonl │ │ ├── gemma-7b-it │ │ ├── baseline_errordetection_prompt_1.jsonl │ │ ├── baseline_errordetection_prompt_2.jsonl │ │ ├── baseline_errordetection_prompt_3.jsonl │ │ └── baseline_errordetection_prompt_4.jsonl │ │ ├── gpt-3.5-turbo-0125 │ │ ├── baseline_errordetection_prompt_1.jsonl │ │ ├── baseline_errordetection_prompt_2.jsonl │ │ ├── baseline_errordetection_prompt_3.jsonl │ │ └── baseline_errordetection_prompt_4.jsonl │ │ ├── gpt-4-0125-preview │ │ ├── baseline_errordetection_prompt_1.jsonl │ │ ├── baseline_errordetection_prompt_2.jsonl │ │ ├── baseline_errordetection_prompt_3.jsonl │ │ └── baseline_errordetection_prompt_4.jsonl │ │ └── gpt-4-0613 │ │ ├── baseline_errordetection_prompt_1.jsonl │ │ ├── baseline_errordetection_prompt_2.jsonl │ │ ├── baseline_errordetection_prompt_3.jsonl │ │ └── baseline_errordetection_prompt_4.jsonl │ ├── finegrained_fact_verification │ ├── Llama-2-70b-chat-hf │ │ ├── Llama-2-13b-chat-hf │ │ │ ├── baseline_errordetection_prompt_1.jsonl │ │ │ ├── baseline_errordetection_prompt_2.jsonl │ │ │ ├── baseline_errordetection_prompt_3.jsonl │ │ │ └── baseline_errordetection_prompt_4.jsonl │ │ ├── Llama-2-70b-chat-hf │ │ │ ├── baseline_errordetection_prompt_1.jsonl │ │ │ ├── baseline_errordetection_prompt_2.jsonl │ │ │ ├── baseline_errordetection_prompt_3.jsonl │ │ │ └── baseline_errordetection_prompt_4.jsonl │ │ ├── Mistral-7B-Instruct-v0.1 │ │ │ ├── baseline_errordetection_prompt_1.jsonl │ │ │ ├── baseline_errordetection_prompt_2.jsonl │ │ │ ├── baseline_errordetection_prompt_3.jsonl │ │ │ └── baseline_errordetection_prompt_4.jsonl │ │ ├── Mixtral-8x7B-Instruct-v0.1 │ │ │ ├── baseline_errordetection_prompt_1.jsonl │ │ │ ├── baseline_errordetection_prompt_2.jsonl │ │ │ ├── baseline_errordetection_prompt_3.jsonl │ │ │ └── baseline_errordetection_prompt_4.jsonl │ │ ├── Qwen1.5-14B-Chat │ │ │ ├── baseline_errordetection_prompt_1.jsonl │ │ │ ├── baseline_errordetection_prompt_2.jsonl │ │ │ ├── baseline_errordetection_prompt_3.jsonl │ │ │ └── baseline_errordetection_prompt_4.jsonl │ │ ├── Qwen1.5-72B-Chat │ │ │ ├── baseline_errordetection_prompt_1.jsonl │ │ │ ├── baseline_errordetection_prompt_2.jsonl │ │ │ ├── baseline_errordetection_prompt_3.jsonl │ │ │ └── baseline_errordetection_prompt_4.jsonl │ │ ├── claude-3-opus-20240229 │ │ │ ├── baseline_errordetection_prompt_1.jsonl │ │ │ ├── baseline_errordetection_prompt_2.jsonl │ │ │ ├── baseline_errordetection_prompt_3.jsonl │ │ │ └── baseline_errordetection_prompt_4.jsonl │ │ ├── gemini-1.0-pro-001 │ │ │ ├── baseline_errordetection_prompt_1.jsonl │ │ │ ├── baseline_errordetection_prompt_2.jsonl │ │ │ ├── baseline_errordetection_prompt_3.jsonl │ │ │ └── baseline_errordetection_prompt_4.jsonl │ │ ├── gemma-7b-it │ │ │ ├── baseline_errordetection_prompt_1.jsonl │ │ │ ├── baseline_errordetection_prompt_2.jsonl │ │ │ ├── baseline_errordetection_prompt_3.jsonl │ │ │ └── baseline_errordetection_prompt_4.jsonl │ │ ├── gpt-3.5-turbo-0125 │ │ │ ├── baseline_errordetection_prompt_1.jsonl │ │ │ ├── baseline_errordetection_prompt_2.jsonl │ │ │ ├── baseline_errordetection_prompt_3.jsonl │ │ │ └── baseline_errordetection_prompt_4.jsonl │ │ ├── gpt-4-0125-preview │ │ │ ├── baseline_errordetection_prompt_1.jsonl │ │ │ ├── baseline_errordetection_prompt_2.jsonl │ │ │ ├── baseline_errordetection_prompt_3.jsonl │ │ │ └── baseline_errordetection_prompt_4.jsonl │ │ └── gpt-4-0613 │ │ │ ├── baseline_errordetection_prompt_1.jsonl │ │ │ ├── baseline_errordetection_prompt_2.jsonl │ │ │ ├── baseline_errordetection_prompt_3.jsonl │ │ │ └── baseline_errordetection_prompt_4.jsonl │ └── gpt-4-0613 │ │ ├── Llama-2-13b-chat-hf │ │ ├── baseline_errordetection_prompt_1.jsonl │ │ ├── baseline_errordetection_prompt_2.jsonl │ │ ├── baseline_errordetection_prompt_3.jsonl │ │ └── baseline_errordetection_prompt_4.jsonl │ │ ├── Llama-2-70b-chat-hf │ │ ├── baseline_errordetection_prompt_1.jsonl │ │ ├── baseline_errordetection_prompt_2.jsonl │ │ ├── baseline_errordetection_prompt_3.jsonl │ │ └── baseline_errordetection_prompt_4.jsonl │ │ ├── Mistral-7B-Instruct-v0.1 │ │ ├── baseline_errordetection_prompt_1.jsonl │ │ ├── baseline_errordetection_prompt_2.jsonl │ │ ├── baseline_errordetection_prompt_3.jsonl │ │ └── baseline_errordetection_prompt_4.jsonl │ │ ├── Mixtral-8x7B-Instruct-v0.1 │ │ ├── baseline_errordetection_prompt_1.jsonl │ │ ├── baseline_errordetection_prompt_2.jsonl │ │ ├── baseline_errordetection_prompt_3.jsonl │ │ └── baseline_errordetection_prompt_4.jsonl │ │ ├── Qwen1.5-14B-Chat │ │ ├── baseline_errordetection_prompt_1.jsonl │ │ ├── baseline_errordetection_prompt_2.jsonl │ │ ├── baseline_errordetection_prompt_3.jsonl │ │ └── baseline_errordetection_prompt_4.jsonl │ │ ├── Qwen1.5-72B-Chat │ │ ├── baseline_errordetection_prompt_1.jsonl │ │ ├── baseline_errordetection_prompt_2.jsonl │ │ ├── baseline_errordetection_prompt_3.jsonl │ │ └── baseline_errordetection_prompt_4.jsonl │ │ ├── claude-3-opus-20240229 │ │ ├── baseline_errordetection_prompt_1.jsonl │ │ ├── baseline_errordetection_prompt_2.jsonl │ │ ├── baseline_errordetection_prompt_3.jsonl │ │ └── baseline_errordetection_prompt_4.jsonl │ │ ├── gemini-1.0-pro-001 │ │ ├── baseline_errordetection_prompt_1.jsonl │ │ ├── baseline_errordetection_prompt_2.jsonl │ │ ├── baseline_errordetection_prompt_3.jsonl │ │ └── baseline_errordetection_prompt_4.jsonl │ │ ├── gemma-7b-it │ │ ├── baseline_errordetection_prompt_1.jsonl │ │ ├── baseline_errordetection_prompt_2.jsonl │ │ ├── baseline_errordetection_prompt_3.jsonl │ │ └── baseline_errordetection_prompt_4.jsonl │ │ ├── gpt-3.5-turbo-0125 │ │ ├── baseline_errordetection_prompt_1.jsonl │ │ ├── baseline_errordetection_prompt_2.jsonl │ │ ├── baseline_errordetection_prompt_3.jsonl │ │ └── baseline_errordetection_prompt_4.jsonl │ │ ├── gpt-4-0125-preview │ │ ├── baseline_errordetection_prompt_1.jsonl │ │ ├── baseline_errordetection_prompt_2.jsonl │ │ ├── baseline_errordetection_prompt_3.jsonl │ │ └── baseline_errordetection_prompt_4.jsonl │ │ └── gpt-4-0613 │ │ ├── baseline_errordetection_prompt_1.jsonl │ │ ├── baseline_errordetection_prompt_2.jsonl │ │ ├── baseline_errordetection_prompt_3.jsonl │ │ └── baseline_errordetection_prompt_4.jsonl │ └── math_word_problem_generation │ ├── Llama-2-70b-chat-hf │ ├── Llama-2-13b-chat-hf │ │ ├── baseline_errordetection_prompt_1.jsonl │ │ ├── baseline_errordetection_prompt_2.jsonl │ │ ├── baseline_errordetection_prompt_3.jsonl │ │ └── baseline_errordetection_prompt_4.jsonl │ ├── Llama-2-70b-chat-hf │ │ ├── baseline_errordetection_prompt_1.jsonl │ │ ├── baseline_errordetection_prompt_2.jsonl │ │ ├── baseline_errordetection_prompt_3.jsonl │ │ └── baseline_errordetection_prompt_4.jsonl │ ├── Mistral-7B-Instruct-v0.1 │ │ ├── baseline_errordetection_prompt_1.jsonl │ │ ├── baseline_errordetection_prompt_2.jsonl │ │ ├── baseline_errordetection_prompt_3.jsonl │ │ └── baseline_errordetection_prompt_4.jsonl │ ├── Mixtral-8x7B-Instruct-v0.1 │ │ ├── baseline_errordetection_prompt_1.jsonl │ │ ├── baseline_errordetection_prompt_2.jsonl │ │ ├── baseline_errordetection_prompt_3.jsonl │ │ └── baseline_errordetection_prompt_4.jsonl │ ├── Qwen1.5-14B-Chat │ │ ├── baseline_errordetection_prompt_1.jsonl │ │ ├── baseline_errordetection_prompt_2.jsonl │ │ ├── baseline_errordetection_prompt_3.jsonl │ │ └── baseline_errordetection_prompt_4.jsonl │ ├── Qwen1.5-72B-Chat │ │ ├── baseline_errordetection_prompt_1.jsonl │ │ ├── baseline_errordetection_prompt_2.jsonl │ │ ├── baseline_errordetection_prompt_3.jsonl │ │ └── baseline_errordetection_prompt_4.jsonl │ ├── claude-3-opus-20240229 │ │ ├── baseline_errordetection_prompt_1.jsonl │ │ ├── baseline_errordetection_prompt_2.jsonl │ │ ├── baseline_errordetection_prompt_3.jsonl │ │ └── baseline_errordetection_prompt_4.jsonl │ ├── gemini-1.0-pro-001 │ │ ├── baseline_errordetection_prompt_1.jsonl │ │ ├── baseline_errordetection_prompt_2.jsonl │ │ ├── baseline_errordetection_prompt_3.jsonl │ │ └── baseline_errordetection_prompt_4.jsonl │ ├── gemma-7b-it │ │ ├── baseline_errordetection_prompt_1.jsonl │ │ ├── baseline_errordetection_prompt_2.jsonl │ │ ├── baseline_errordetection_prompt_3.jsonl │ │ └── baseline_errordetection_prompt_4.jsonl │ ├── gpt-3.5-turbo-0125 │ │ ├── baseline_errordetection_prompt_1.jsonl │ │ ├── baseline_errordetection_prompt_2.jsonl │ │ ├── baseline_errordetection_prompt_3.jsonl │ │ └── baseline_errordetection_prompt_4.jsonl │ ├── gpt-4-0125-preview │ │ ├── baseline_errordetection_prompt_1.jsonl │ │ ├── baseline_errordetection_prompt_2.jsonl │ │ ├── baseline_errordetection_prompt_3.jsonl │ │ └── baseline_errordetection_prompt_4.jsonl │ └── gpt-4-0613 │ │ ├── baseline_errordetection_prompt_1.jsonl │ │ ├── baseline_errordetection_prompt_2.jsonl │ │ ├── baseline_errordetection_prompt_3.jsonl │ │ └── baseline_errordetection_prompt_4.jsonl │ └── gpt-4-0613 │ ├── Llama-2-13b-chat-hf │ ├── baseline_errordetection_prompt_1.jsonl │ ├── baseline_errordetection_prompt_2.jsonl │ ├── baseline_errordetection_prompt_3.jsonl │ └── baseline_errordetection_prompt_4.jsonl │ ├── Llama-2-70b-chat-hf │ ├── baseline_errordetection_prompt_1.jsonl │ ├── baseline_errordetection_prompt_2.jsonl │ ├── baseline_errordetection_prompt_3.jsonl │ └── baseline_errordetection_prompt_4.jsonl │ ├── Mistral-7B-Instruct-v0.1 │ ├── baseline_errordetection_prompt_1.jsonl │ ├── baseline_errordetection_prompt_2.jsonl │ ├── baseline_errordetection_prompt_3.jsonl │ └── baseline_errordetection_prompt_4.jsonl │ ├── Mixtral-8x7B-Instruct-v0.1 │ ├── baseline_errordetection_prompt_1.jsonl │ ├── baseline_errordetection_prompt_2.jsonl │ ├── baseline_errordetection_prompt_3.jsonl │ └── baseline_errordetection_prompt_4.jsonl │ ├── Qwen1.5-14B-Chat │ ├── baseline_errordetection_prompt_1.jsonl │ ├── baseline_errordetection_prompt_2.jsonl │ ├── baseline_errordetection_prompt_3.jsonl │ └── baseline_errordetection_prompt_4.jsonl │ ├── Qwen1.5-72B-Chat │ ├── baseline_errordetection_prompt_1.jsonl │ ├── baseline_errordetection_prompt_2.jsonl │ ├── baseline_errordetection_prompt_3.jsonl │ └── baseline_errordetection_prompt_4.jsonl │ ├── claude-3-opus-20240229 │ ├── baseline_errordetection_prompt_1.jsonl │ ├── baseline_errordetection_prompt_2.jsonl │ ├── baseline_errordetection_prompt_3.jsonl │ └── baseline_errordetection_prompt_4.jsonl │ ├── gemini-1.0-pro-001 │ ├── baseline_errordetection_prompt_1.jsonl │ ├── baseline_errordetection_prompt_2.jsonl │ ├── baseline_errordetection_prompt_3.jsonl │ └── baseline_errordetection_prompt_4.jsonl │ ├── gemma-7b-it │ ├── baseline_errordetection_prompt_1.jsonl │ ├── baseline_errordetection_prompt_2.jsonl │ ├── baseline_errordetection_prompt_3.jsonl │ └── baseline_errordetection_prompt_4.jsonl │ ├── gpt-3.5-turbo-0125 │ ├── baseline_errordetection_prompt_1.jsonl │ ├── baseline_errordetection_prompt_2.jsonl │ ├── baseline_errordetection_prompt_3.jsonl │ └── baseline_errordetection_prompt_4.jsonl │ ├── gpt-4-0125-preview │ ├── baseline_errordetection_prompt_1.jsonl │ ├── baseline_errordetection_prompt_2.jsonl │ ├── baseline_errordetection_prompt_3.jsonl │ └── baseline_errordetection_prompt_4.jsonl │ └── gpt-4-0613 │ ├── baseline_errordetection_prompt_1.jsonl │ ├── baseline_errordetection_prompt_2.jsonl │ ├── baseline_errordetection_prompt_3.jsonl │ └── baseline_errordetection_prompt_4.jsonl ├── error_detection_performance ├── performance │ ├── advanced_prompt_baseline │ │ ├── category_performance.json │ │ └── performance.json │ ├── easy_baseline │ │ └── performance.json │ ├── human_performance.json │ ├── majority_vote │ │ ├── category_performance.json │ │ └── performance.json │ ├── self_consistency │ │ ├── category_performance.json │ │ └── performance.json │ └── simple_prompt_baseline │ │ ├── category_performance.json │ │ └── performance.json └── table │ ├── category_results_tables │ └── simple_prompt_baseline │ │ └── recall │ │ ├── recall_Llama-2-13b-chat-hf.txt │ │ ├── recall_Llama-2-13b-chat-hf_single_space.txt │ │ ├── recall_Llama-2-70b-chat-hf.txt │ │ ├── recall_Llama-2-70b-chat-hf_single_space.txt │ │ ├── recall_Mistral-7B-Instruct-v0.1.txt │ │ ├── recall_Mistral-7B-Instruct-v0.1_single_space.txt │ │ ├── recall_Mixtral-8x7B-Instruct-v0.1.txt │ │ ├── recall_Mixtral-8x7B-Instruct-v0.1_single_space.txt │ │ ├── recall_Qwen1.5-14B-Chat.txt │ │ ├── recall_Qwen1.5-14B-Chat_single_space.txt │ │ ├── recall_Qwen1.5-72B-Chat.txt │ │ ├── recall_Qwen1.5-72B-Chat_single_space.txt │ │ ├── recall_average.txt │ │ ├── recall_average_single_space.txt │ │ ├── recall_claude-3-opus-20240229.txt │ │ ├── recall_claude-3-opus-20240229_single_space.txt │ │ ├── recall_gemini-1.0-pro-001.txt │ │ ├── recall_gemini-1.0-pro-001_single_space.txt │ │ ├── recall_gemma-7b-it.txt │ │ ├── recall_gemma-7b-it_single_space.txt │ │ ├── recall_gpt-3.5-turbo-0125.txt │ │ ├── recall_gpt-3.5-turbo-0125_single_space.txt │ │ ├── recall_gpt-4-0125-preview.txt │ │ ├── recall_gpt-4-0125-preview_single_space.txt │ │ ├── recall_gpt-4-0613.txt │ │ └── recall_gpt-4-0613_single_space.txt │ ├── improvement_tables │ └── advanced_prompt_baseline │ │ ├── accuracy │ │ ├── accuracy_average_table.txt │ │ └── accuracy_average_table_single_space.txt │ │ ├── f1 │ │ ├── f1_average_table.txt │ │ └── f1_average_table_single_space.txt │ │ ├── precision │ │ ├── precision_average_table.txt │ │ └── precision_average_table_single_space.txt │ │ └── recall │ │ ├── recall_average_table.txt │ │ └── recall_average_table_single_space.txt │ ├── majority_vote_tables │ ├── accuracy │ │ ├── accuracy_majority_vote_table.txt │ │ └── accuracy_majority_vote_table_single_space.txt │ ├── f1 │ │ ├── f1_majority_vote_table.txt │ │ └── f1_majority_vote_table_single_space.txt │ ├── precision │ │ ├── precision_majority_vote_table.txt │ │ └── precision_majority_vote_table_single_space.txt │ └── recall │ │ ├── recall_majority_vote_table.txt │ │ └── recall_majority_vote_table_single_space.txt │ └── simple_prompt_baseline │ ├── accuracy │ ├── accuracy_average_table.txt │ ├── accuracy_average_table_single_space.txt │ ├── accuracy_baseline_errordetection_prompt_1_table.txt │ ├── accuracy_baseline_errordetection_prompt_1_table_single_space.txt │ ├── accuracy_baseline_errordetection_prompt_2_table.txt │ ├── accuracy_baseline_errordetection_prompt_2_table_single_space.txt │ ├── accuracy_baseline_errordetection_prompt_3_table.txt │ ├── accuracy_baseline_errordetection_prompt_3_table_single_space.txt │ ├── accuracy_baseline_errordetection_prompt_4_table.txt │ └── accuracy_baseline_errordetection_prompt_4_table_single_space.txt │ ├── f1 │ ├── f1_average_table.txt │ ├── f1_average_table_single_space.txt │ ├── f1_baseline_errordetection_prompt_1_table.txt │ ├── f1_baseline_errordetection_prompt_1_table_single_space.txt │ ├── f1_baseline_errordetection_prompt_2_table.txt │ ├── f1_baseline_errordetection_prompt_2_table_single_space.txt │ ├── f1_baseline_errordetection_prompt_3_table.txt │ ├── f1_baseline_errordetection_prompt_3_table_single_space.txt │ ├── f1_baseline_errordetection_prompt_4_table.txt │ └── f1_baseline_errordetection_prompt_4_table_single_space.txt │ ├── precision │ ├── precision_average_table.txt │ ├── precision_average_table_single_space.txt │ ├── precision_baseline_errordetection_prompt_1_table.txt │ ├── precision_baseline_errordetection_prompt_1_table_single_space.txt │ ├── precision_baseline_errordetection_prompt_2_table.txt │ ├── precision_baseline_errordetection_prompt_2_table_single_space.txt │ ├── precision_baseline_errordetection_prompt_3_table.txt │ ├── precision_baseline_errordetection_prompt_3_table_single_space.txt │ ├── precision_baseline_errordetection_prompt_4_table.txt │ └── precision_baseline_errordetection_prompt_4_table_single_space.txt │ └── recall │ ├── recall_average_table.txt │ ├── recall_average_table_single_space.txt │ ├── recall_baseline_errordetection_prompt_1_table.txt │ ├── recall_baseline_errordetection_prompt_1_table_single_space.txt │ ├── recall_baseline_errordetection_prompt_2_table.txt │ ├── recall_baseline_errordetection_prompt_2_table_single_space.txt │ ├── recall_baseline_errordetection_prompt_3_table.txt │ ├── recall_baseline_errordetection_prompt_3_table_single_space.txt │ ├── recall_baseline_errordetection_prompt_4_table.txt │ └── recall_baseline_errordetection_prompt_4_table_single_space.txt ├── readme_figures ├── realmistake_dataexample.png └── realmistake_stats.png ├── sh ├── get_dataset_stat.sh └── get_performance.sh └── src ├── baseline └── prompt.py ├── config.py ├── error_detection_analysis ├── analyze_self_consistency.py ├── bias_analysis.py ├── calculate_performance.py ├── compare_to_other_tasks.py ├── generate_improvement_tables.py ├── generate_majority_vote_tables.py ├── generate_manual_analysis_figure.py └── generate_performance_tables.py ├── get_dataset_stats ├── easy_baseline.py └── get_dataset_stats.py └── path.py /LICENSE.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/LICENSE.md -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/README.md -------------------------------------------------------------------------------- /data.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/data.zip -------------------------------------------------------------------------------- /dataset_stats/error_categories_table.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/dataset_stats/error_categories_table.txt -------------------------------------------------------------------------------- /dataset_stats/stats.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/dataset_stats/stats.json -------------------------------------------------------------------------------- /dataset_stats/stats_table.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/dataset_stats/stats_table.txt -------------------------------------------------------------------------------- /environment.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/environment.yml -------------------------------------------------------------------------------- /error_detection_outputs/advanced_prompt_baseline/answerability_classification/Llama-2-70b-chat-hf/Llama-2-13b-chat-hf/cot_instruction_prompt.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/advanced_prompt_baseline/answerability_classification/Llama-2-70b-chat-hf/Llama-2-13b-chat-hf/cot_instruction_prompt.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/advanced_prompt_baseline/answerability_classification/Llama-2-70b-chat-hf/Llama-2-70b-chat-hf/cot_instruction_prompt.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/advanced_prompt_baseline/answerability_classification/Llama-2-70b-chat-hf/Llama-2-70b-chat-hf/cot_instruction_prompt.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/advanced_prompt_baseline/answerability_classification/Llama-2-70b-chat-hf/Mistral-7B-Instruct-v0.1/cot_instruction_prompt.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/advanced_prompt_baseline/answerability_classification/Llama-2-70b-chat-hf/Mistral-7B-Instruct-v0.1/cot_instruction_prompt.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/advanced_prompt_baseline/answerability_classification/Llama-2-70b-chat-hf/Mixtral-8x7B-Instruct-v0.1/cot_instruction_prompt.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/advanced_prompt_baseline/answerability_classification/Llama-2-70b-chat-hf/Mixtral-8x7B-Instruct-v0.1/cot_instruction_prompt.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/advanced_prompt_baseline/answerability_classification/Llama-2-70b-chat-hf/Qwen1.5-14B-Chat/cot_instruction_prompt.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/advanced_prompt_baseline/answerability_classification/Llama-2-70b-chat-hf/Qwen1.5-14B-Chat/cot_instruction_prompt.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/advanced_prompt_baseline/answerability_classification/Llama-2-70b-chat-hf/Qwen1.5-72B-Chat/cot_instruction_prompt.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/advanced_prompt_baseline/answerability_classification/Llama-2-70b-chat-hf/Qwen1.5-72B-Chat/cot_instruction_prompt.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/advanced_prompt_baseline/answerability_classification/Llama-2-70b-chat-hf/claude-3-opus-20240229/cot_instruction_prompt.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/advanced_prompt_baseline/answerability_classification/Llama-2-70b-chat-hf/claude-3-opus-20240229/cot_instruction_prompt.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/advanced_prompt_baseline/answerability_classification/Llama-2-70b-chat-hf/gemini-1.0-pro-001/cot_instruction_prompt.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/advanced_prompt_baseline/answerability_classification/Llama-2-70b-chat-hf/gemini-1.0-pro-001/cot_instruction_prompt.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/advanced_prompt_baseline/answerability_classification/Llama-2-70b-chat-hf/gemma-7b-it/cot_instruction_prompt.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/advanced_prompt_baseline/answerability_classification/Llama-2-70b-chat-hf/gemma-7b-it/cot_instruction_prompt.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/advanced_prompt_baseline/answerability_classification/Llama-2-70b-chat-hf/gpt-3.5-turbo-0125/cot_instruction_prompt.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/advanced_prompt_baseline/answerability_classification/Llama-2-70b-chat-hf/gpt-3.5-turbo-0125/cot_instruction_prompt.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/advanced_prompt_baseline/answerability_classification/Llama-2-70b-chat-hf/gpt-4-0125-preview/cot_instruction_prompt.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/advanced_prompt_baseline/answerability_classification/Llama-2-70b-chat-hf/gpt-4-0125-preview/cot_instruction_prompt.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/advanced_prompt_baseline/answerability_classification/Llama-2-70b-chat-hf/gpt-4-0613/cot_instruction_prompt.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/advanced_prompt_baseline/answerability_classification/Llama-2-70b-chat-hf/gpt-4-0613/cot_instruction_prompt.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/advanced_prompt_baseline/answerability_classification/gpt-4-0613/Llama-2-13b-chat-hf/cot_instruction_prompt.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/advanced_prompt_baseline/answerability_classification/gpt-4-0613/Llama-2-13b-chat-hf/cot_instruction_prompt.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/advanced_prompt_baseline/answerability_classification/gpt-4-0613/Llama-2-70b-chat-hf/cot_instruction_prompt.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/advanced_prompt_baseline/answerability_classification/gpt-4-0613/Llama-2-70b-chat-hf/cot_instruction_prompt.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/advanced_prompt_baseline/answerability_classification/gpt-4-0613/Mistral-7B-Instruct-v0.1/cot_instruction_prompt.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/advanced_prompt_baseline/answerability_classification/gpt-4-0613/Mistral-7B-Instruct-v0.1/cot_instruction_prompt.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/advanced_prompt_baseline/answerability_classification/gpt-4-0613/Mixtral-8x7B-Instruct-v0.1/cot_instruction_prompt.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/advanced_prompt_baseline/answerability_classification/gpt-4-0613/Mixtral-8x7B-Instruct-v0.1/cot_instruction_prompt.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/advanced_prompt_baseline/answerability_classification/gpt-4-0613/Qwen1.5-14B-Chat/cot_instruction_prompt.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/advanced_prompt_baseline/answerability_classification/gpt-4-0613/Qwen1.5-14B-Chat/cot_instruction_prompt.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/advanced_prompt_baseline/answerability_classification/gpt-4-0613/Qwen1.5-72B-Chat/cot_instruction_prompt.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/advanced_prompt_baseline/answerability_classification/gpt-4-0613/Qwen1.5-72B-Chat/cot_instruction_prompt.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/advanced_prompt_baseline/answerability_classification/gpt-4-0613/claude-3-opus-20240229/cot_instruction_prompt.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/advanced_prompt_baseline/answerability_classification/gpt-4-0613/claude-3-opus-20240229/cot_instruction_prompt.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/advanced_prompt_baseline/answerability_classification/gpt-4-0613/gemini-1.0-pro-001/cot_instruction_prompt.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/advanced_prompt_baseline/answerability_classification/gpt-4-0613/gemini-1.0-pro-001/cot_instruction_prompt.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/advanced_prompt_baseline/answerability_classification/gpt-4-0613/gemma-7b-it/cot_instruction_prompt.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/advanced_prompt_baseline/answerability_classification/gpt-4-0613/gemma-7b-it/cot_instruction_prompt.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/advanced_prompt_baseline/answerability_classification/gpt-4-0613/gpt-3.5-turbo-0125/cot_instruction_prompt.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/advanced_prompt_baseline/answerability_classification/gpt-4-0613/gpt-3.5-turbo-0125/cot_instruction_prompt.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/advanced_prompt_baseline/answerability_classification/gpt-4-0613/gpt-4-0125-preview/cot_instruction_prompt.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/advanced_prompt_baseline/answerability_classification/gpt-4-0613/gpt-4-0125-preview/cot_instruction_prompt.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/advanced_prompt_baseline/answerability_classification/gpt-4-0613/gpt-4-0613/cot_instruction_prompt.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/advanced_prompt_baseline/answerability_classification/gpt-4-0613/gpt-4-0613/cot_instruction_prompt.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/advanced_prompt_baseline/finegrained_fact_verification/Llama-2-70b-chat-hf/Llama-2-13b-chat-hf/cot_instruction_prompt.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/advanced_prompt_baseline/finegrained_fact_verification/Llama-2-70b-chat-hf/Llama-2-13b-chat-hf/cot_instruction_prompt.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/advanced_prompt_baseline/finegrained_fact_verification/Llama-2-70b-chat-hf/Llama-2-70b-chat-hf/cot_instruction_prompt.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/advanced_prompt_baseline/finegrained_fact_verification/Llama-2-70b-chat-hf/Llama-2-70b-chat-hf/cot_instruction_prompt.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/advanced_prompt_baseline/finegrained_fact_verification/Llama-2-70b-chat-hf/Mistral-7B-Instruct-v0.1/cot_instruction_prompt.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/advanced_prompt_baseline/finegrained_fact_verification/Llama-2-70b-chat-hf/Mistral-7B-Instruct-v0.1/cot_instruction_prompt.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/advanced_prompt_baseline/finegrained_fact_verification/Llama-2-70b-chat-hf/Mixtral-8x7B-Instruct-v0.1/cot_instruction_prompt.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/advanced_prompt_baseline/finegrained_fact_verification/Llama-2-70b-chat-hf/Mixtral-8x7B-Instruct-v0.1/cot_instruction_prompt.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/advanced_prompt_baseline/finegrained_fact_verification/Llama-2-70b-chat-hf/Qwen1.5-14B-Chat/cot_instruction_prompt.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/advanced_prompt_baseline/finegrained_fact_verification/Llama-2-70b-chat-hf/Qwen1.5-14B-Chat/cot_instruction_prompt.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/advanced_prompt_baseline/finegrained_fact_verification/Llama-2-70b-chat-hf/Qwen1.5-72B-Chat/cot_instruction_prompt.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/advanced_prompt_baseline/finegrained_fact_verification/Llama-2-70b-chat-hf/Qwen1.5-72B-Chat/cot_instruction_prompt.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/advanced_prompt_baseline/finegrained_fact_verification/Llama-2-70b-chat-hf/claude-3-opus-20240229/cot_instruction_prompt.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/advanced_prompt_baseline/finegrained_fact_verification/Llama-2-70b-chat-hf/claude-3-opus-20240229/cot_instruction_prompt.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/advanced_prompt_baseline/finegrained_fact_verification/Llama-2-70b-chat-hf/gemini-1.0-pro-001/cot_instruction_prompt.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/advanced_prompt_baseline/finegrained_fact_verification/Llama-2-70b-chat-hf/gemini-1.0-pro-001/cot_instruction_prompt.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/advanced_prompt_baseline/finegrained_fact_verification/Llama-2-70b-chat-hf/gemma-7b-it/cot_instruction_prompt.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/advanced_prompt_baseline/finegrained_fact_verification/Llama-2-70b-chat-hf/gemma-7b-it/cot_instruction_prompt.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/advanced_prompt_baseline/finegrained_fact_verification/Llama-2-70b-chat-hf/gpt-3.5-turbo-0125/cot_instruction_prompt.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/advanced_prompt_baseline/finegrained_fact_verification/Llama-2-70b-chat-hf/gpt-3.5-turbo-0125/cot_instruction_prompt.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/advanced_prompt_baseline/finegrained_fact_verification/Llama-2-70b-chat-hf/gpt-4-0125-preview/cot_instruction_prompt.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/advanced_prompt_baseline/finegrained_fact_verification/Llama-2-70b-chat-hf/gpt-4-0125-preview/cot_instruction_prompt.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/advanced_prompt_baseline/finegrained_fact_verification/Llama-2-70b-chat-hf/gpt-4-0613/cot_instruction_prompt.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/advanced_prompt_baseline/finegrained_fact_verification/Llama-2-70b-chat-hf/gpt-4-0613/cot_instruction_prompt.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/advanced_prompt_baseline/finegrained_fact_verification/gpt-4-0613/Llama-2-13b-chat-hf/cot_instruction_prompt.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/advanced_prompt_baseline/finegrained_fact_verification/gpt-4-0613/Llama-2-13b-chat-hf/cot_instruction_prompt.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/advanced_prompt_baseline/finegrained_fact_verification/gpt-4-0613/Llama-2-70b-chat-hf/cot_instruction_prompt.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/advanced_prompt_baseline/finegrained_fact_verification/gpt-4-0613/Llama-2-70b-chat-hf/cot_instruction_prompt.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/advanced_prompt_baseline/finegrained_fact_verification/gpt-4-0613/Mistral-7B-Instruct-v0.1/cot_instruction_prompt.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/advanced_prompt_baseline/finegrained_fact_verification/gpt-4-0613/Mistral-7B-Instruct-v0.1/cot_instruction_prompt.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/advanced_prompt_baseline/finegrained_fact_verification/gpt-4-0613/Mixtral-8x7B-Instruct-v0.1/cot_instruction_prompt.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/advanced_prompt_baseline/finegrained_fact_verification/gpt-4-0613/Mixtral-8x7B-Instruct-v0.1/cot_instruction_prompt.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/advanced_prompt_baseline/finegrained_fact_verification/gpt-4-0613/Qwen1.5-14B-Chat/cot_instruction_prompt.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/advanced_prompt_baseline/finegrained_fact_verification/gpt-4-0613/Qwen1.5-14B-Chat/cot_instruction_prompt.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/advanced_prompt_baseline/finegrained_fact_verification/gpt-4-0613/Qwen1.5-72B-Chat/cot_instruction_prompt.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/advanced_prompt_baseline/finegrained_fact_verification/gpt-4-0613/Qwen1.5-72B-Chat/cot_instruction_prompt.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/advanced_prompt_baseline/finegrained_fact_verification/gpt-4-0613/claude-3-opus-20240229/cot_instruction_prompt.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/advanced_prompt_baseline/finegrained_fact_verification/gpt-4-0613/claude-3-opus-20240229/cot_instruction_prompt.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/advanced_prompt_baseline/finegrained_fact_verification/gpt-4-0613/gemini-1.0-pro-001/cot_instruction_prompt.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/advanced_prompt_baseline/finegrained_fact_verification/gpt-4-0613/gemini-1.0-pro-001/cot_instruction_prompt.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/advanced_prompt_baseline/finegrained_fact_verification/gpt-4-0613/gemma-7b-it/cot_instruction_prompt.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/advanced_prompt_baseline/finegrained_fact_verification/gpt-4-0613/gemma-7b-it/cot_instruction_prompt.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/advanced_prompt_baseline/finegrained_fact_verification/gpt-4-0613/gpt-3.5-turbo-0125/cot_instruction_prompt.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/advanced_prompt_baseline/finegrained_fact_verification/gpt-4-0613/gpt-3.5-turbo-0125/cot_instruction_prompt.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/advanced_prompt_baseline/finegrained_fact_verification/gpt-4-0613/gpt-4-0125-preview/cot_instruction_prompt.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/advanced_prompt_baseline/finegrained_fact_verification/gpt-4-0613/gpt-4-0125-preview/cot_instruction_prompt.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/advanced_prompt_baseline/finegrained_fact_verification/gpt-4-0613/gpt-4-0613/cot_instruction_prompt.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/advanced_prompt_baseline/finegrained_fact_verification/gpt-4-0613/gpt-4-0613/cot_instruction_prompt.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/advanced_prompt_baseline/math_word_problem_generation/Llama-2-70b-chat-hf/Llama-2-13b-chat-hf/cot_instruction_prompt.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/advanced_prompt_baseline/math_word_problem_generation/Llama-2-70b-chat-hf/Llama-2-13b-chat-hf/cot_instruction_prompt.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/advanced_prompt_baseline/math_word_problem_generation/Llama-2-70b-chat-hf/Llama-2-70b-chat-hf/cot_instruction_prompt.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/advanced_prompt_baseline/math_word_problem_generation/Llama-2-70b-chat-hf/Llama-2-70b-chat-hf/cot_instruction_prompt.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/advanced_prompt_baseline/math_word_problem_generation/Llama-2-70b-chat-hf/Mistral-7B-Instruct-v0.1/cot_instruction_prompt.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/advanced_prompt_baseline/math_word_problem_generation/Llama-2-70b-chat-hf/Mistral-7B-Instruct-v0.1/cot_instruction_prompt.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/advanced_prompt_baseline/math_word_problem_generation/Llama-2-70b-chat-hf/Mixtral-8x7B-Instruct-v0.1/cot_instruction_prompt.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/advanced_prompt_baseline/math_word_problem_generation/Llama-2-70b-chat-hf/Mixtral-8x7B-Instruct-v0.1/cot_instruction_prompt.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/advanced_prompt_baseline/math_word_problem_generation/Llama-2-70b-chat-hf/Qwen1.5-14B-Chat/cot_instruction_prompt.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/advanced_prompt_baseline/math_word_problem_generation/Llama-2-70b-chat-hf/Qwen1.5-14B-Chat/cot_instruction_prompt.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/advanced_prompt_baseline/math_word_problem_generation/Llama-2-70b-chat-hf/Qwen1.5-72B-Chat/cot_instruction_prompt.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/advanced_prompt_baseline/math_word_problem_generation/Llama-2-70b-chat-hf/Qwen1.5-72B-Chat/cot_instruction_prompt.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/advanced_prompt_baseline/math_word_problem_generation/Llama-2-70b-chat-hf/claude-3-opus-20240229/cot_instruction_prompt.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/advanced_prompt_baseline/math_word_problem_generation/Llama-2-70b-chat-hf/claude-3-opus-20240229/cot_instruction_prompt.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/advanced_prompt_baseline/math_word_problem_generation/Llama-2-70b-chat-hf/gemini-1.0-pro-001/cot_instruction_prompt.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/advanced_prompt_baseline/math_word_problem_generation/Llama-2-70b-chat-hf/gemini-1.0-pro-001/cot_instruction_prompt.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/advanced_prompt_baseline/math_word_problem_generation/Llama-2-70b-chat-hf/gemma-7b-it/cot_instruction_prompt.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/advanced_prompt_baseline/math_word_problem_generation/Llama-2-70b-chat-hf/gemma-7b-it/cot_instruction_prompt.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/advanced_prompt_baseline/math_word_problem_generation/Llama-2-70b-chat-hf/gpt-3.5-turbo-0125/cot_instruction_prompt.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/advanced_prompt_baseline/math_word_problem_generation/Llama-2-70b-chat-hf/gpt-3.5-turbo-0125/cot_instruction_prompt.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/advanced_prompt_baseline/math_word_problem_generation/Llama-2-70b-chat-hf/gpt-4-0125-preview/cot_instruction_prompt.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/advanced_prompt_baseline/math_word_problem_generation/Llama-2-70b-chat-hf/gpt-4-0125-preview/cot_instruction_prompt.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/advanced_prompt_baseline/math_word_problem_generation/Llama-2-70b-chat-hf/gpt-4-0613/cot_instruction_prompt.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/advanced_prompt_baseline/math_word_problem_generation/Llama-2-70b-chat-hf/gpt-4-0613/cot_instruction_prompt.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/advanced_prompt_baseline/math_word_problem_generation/gpt-4-0613/Llama-2-13b-chat-hf/cot_instruction_prompt.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/advanced_prompt_baseline/math_word_problem_generation/gpt-4-0613/Llama-2-13b-chat-hf/cot_instruction_prompt.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/advanced_prompt_baseline/math_word_problem_generation/gpt-4-0613/Llama-2-70b-chat-hf/cot_instruction_prompt.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/advanced_prompt_baseline/math_word_problem_generation/gpt-4-0613/Llama-2-70b-chat-hf/cot_instruction_prompt.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/advanced_prompt_baseline/math_word_problem_generation/gpt-4-0613/Mistral-7B-Instruct-v0.1/cot_instruction_prompt.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/advanced_prompt_baseline/math_word_problem_generation/gpt-4-0613/Mistral-7B-Instruct-v0.1/cot_instruction_prompt.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/advanced_prompt_baseline/math_word_problem_generation/gpt-4-0613/Mixtral-8x7B-Instruct-v0.1/cot_instruction_prompt.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/advanced_prompt_baseline/math_word_problem_generation/gpt-4-0613/Mixtral-8x7B-Instruct-v0.1/cot_instruction_prompt.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/advanced_prompt_baseline/math_word_problem_generation/gpt-4-0613/Qwen1.5-14B-Chat/cot_instruction_prompt.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/advanced_prompt_baseline/math_word_problem_generation/gpt-4-0613/Qwen1.5-14B-Chat/cot_instruction_prompt.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/advanced_prompt_baseline/math_word_problem_generation/gpt-4-0613/Qwen1.5-72B-Chat/cot_instruction_prompt.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/advanced_prompt_baseline/math_word_problem_generation/gpt-4-0613/Qwen1.5-72B-Chat/cot_instruction_prompt.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/advanced_prompt_baseline/math_word_problem_generation/gpt-4-0613/claude-3-opus-20240229/cot_instruction_prompt.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/advanced_prompt_baseline/math_word_problem_generation/gpt-4-0613/claude-3-opus-20240229/cot_instruction_prompt.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/advanced_prompt_baseline/math_word_problem_generation/gpt-4-0613/gemini-1.0-pro-001/cot_instruction_prompt.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/advanced_prompt_baseline/math_word_problem_generation/gpt-4-0613/gemini-1.0-pro-001/cot_instruction_prompt.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/advanced_prompt_baseline/math_word_problem_generation/gpt-4-0613/gemma-7b-it/cot_instruction_prompt.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/advanced_prompt_baseline/math_word_problem_generation/gpt-4-0613/gemma-7b-it/cot_instruction_prompt.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/advanced_prompt_baseline/math_word_problem_generation/gpt-4-0613/gpt-3.5-turbo-0125/cot_instruction_prompt.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/advanced_prompt_baseline/math_word_problem_generation/gpt-4-0613/gpt-3.5-turbo-0125/cot_instruction_prompt.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/advanced_prompt_baseline/math_word_problem_generation/gpt-4-0613/gpt-4-0125-preview/cot_instruction_prompt.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/advanced_prompt_baseline/math_word_problem_generation/gpt-4-0613/gpt-4-0125-preview/cot_instruction_prompt.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/advanced_prompt_baseline/math_word_problem_generation/gpt-4-0613/gpt-4-0613/cot_instruction_prompt.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/advanced_prompt_baseline/math_word_problem_generation/gpt-4-0613/gpt-4-0613/cot_instruction_prompt.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/majority_vote/answerability_classification/Llama-2-70b-chat-hf/Llama-2-70b-chat-hf__Mixtral-8x7B-Instruct-v0.1__Qwen1.5-72B-Chat/majority_vote.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/majority_vote/answerability_classification/Llama-2-70b-chat-hf/Llama-2-70b-chat-hf__Mixtral-8x7B-Instruct-v0.1__Qwen1.5-72B-Chat/majority_vote.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/majority_vote/answerability_classification/gpt-4-0613/Llama-2-70b-chat-hf__Mixtral-8x7B-Instruct-v0.1__Qwen1.5-72B-Chat/majority_vote.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/majority_vote/answerability_classification/gpt-4-0613/Llama-2-70b-chat-hf__Mixtral-8x7B-Instruct-v0.1__Qwen1.5-72B-Chat/majority_vote.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/majority_vote/finegrained_fact_verification/Llama-2-70b-chat-hf/Llama-2-70b-chat-hf__Mixtral-8x7B-Instruct-v0.1__Qwen1.5-72B-Chat/majority_vote.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/majority_vote/finegrained_fact_verification/Llama-2-70b-chat-hf/Llama-2-70b-chat-hf__Mixtral-8x7B-Instruct-v0.1__Qwen1.5-72B-Chat/majority_vote.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/majority_vote/finegrained_fact_verification/gpt-4-0613/Llama-2-70b-chat-hf__Mixtral-8x7B-Instruct-v0.1__Qwen1.5-72B-Chat/majority_vote.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/majority_vote/finegrained_fact_verification/gpt-4-0613/Llama-2-70b-chat-hf__Mixtral-8x7B-Instruct-v0.1__Qwen1.5-72B-Chat/majority_vote.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/majority_vote/math_word_problem_generation/Llama-2-70b-chat-hf/Llama-2-70b-chat-hf__Mixtral-8x7B-Instruct-v0.1__Qwen1.5-72B-Chat/majority_vote.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/majority_vote/math_word_problem_generation/Llama-2-70b-chat-hf/Llama-2-70b-chat-hf__Mixtral-8x7B-Instruct-v0.1__Qwen1.5-72B-Chat/majority_vote.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/majority_vote/math_word_problem_generation/gpt-4-0613/Llama-2-70b-chat-hf__Mixtral-8x7B-Instruct-v0.1__Qwen1.5-72B-Chat/majority_vote.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/majority_vote/math_word_problem_generation/gpt-4-0613/Llama-2-70b-chat-hf__Mixtral-8x7B-Instruct-v0.1__Qwen1.5-72B-Chat/majority_vote.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/self_consistency/answerability_classification/gpt-4-0613/Mixtral-8x7B-Instruct-v0.1/baseline_errordetection_prompt_1.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/self_consistency/answerability_classification/gpt-4-0613/Mixtral-8x7B-Instruct-v0.1/baseline_errordetection_prompt_1.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/self_consistency/answerability_classification/gpt-4-0613/gpt-3.5-turbo-0125/baseline_errordetection_prompt_1.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/self_consistency/answerability_classification/gpt-4-0613/gpt-3.5-turbo-0125/baseline_errordetection_prompt_1.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/self_consistency/answerability_classification/gpt-4-0613/gpt-4-0125-preview/baseline_errordetection_prompt_1.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/self_consistency/answerability_classification/gpt-4-0613/gpt-4-0125-preview/baseline_errordetection_prompt_1.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/self_consistency/finegrained_fact_verification/gpt-4-0613/Mixtral-8x7B-Instruct-v0.1/baseline_errordetection_prompt_1.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/self_consistency/finegrained_fact_verification/gpt-4-0613/Mixtral-8x7B-Instruct-v0.1/baseline_errordetection_prompt_1.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/self_consistency/finegrained_fact_verification/gpt-4-0613/gpt-3.5-turbo-0125/baseline_errordetection_prompt_1.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/self_consistency/finegrained_fact_verification/gpt-4-0613/gpt-3.5-turbo-0125/baseline_errordetection_prompt_1.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/self_consistency/finegrained_fact_verification/gpt-4-0613/gpt-4-0125-preview/baseline_errordetection_prompt_1.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/self_consistency/finegrained_fact_verification/gpt-4-0613/gpt-4-0125-preview/baseline_errordetection_prompt_1.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/self_consistency/math_word_problem_generation/gpt-4-0613/Mixtral-8x7B-Instruct-v0.1/baseline_errordetection_prompt_1.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/self_consistency/math_word_problem_generation/gpt-4-0613/Mixtral-8x7B-Instruct-v0.1/baseline_errordetection_prompt_1.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/self_consistency/math_word_problem_generation/gpt-4-0613/gpt-3.5-turbo-0125/baseline_errordetection_prompt_1.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/self_consistency/math_word_problem_generation/gpt-4-0613/gpt-3.5-turbo-0125/baseline_errordetection_prompt_1.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/self_consistency/math_word_problem_generation/gpt-4-0613/gpt-4-0125-preview/baseline_errordetection_prompt_1.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/self_consistency/math_word_problem_generation/gpt-4-0613/gpt-4-0125-preview/baseline_errordetection_prompt_1.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/answerability_classification/Llama-2-70b-chat-hf/Llama-2-13b-chat-hf/baseline_errordetection_prompt_1.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/answerability_classification/Llama-2-70b-chat-hf/Llama-2-13b-chat-hf/baseline_errordetection_prompt_1.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/answerability_classification/Llama-2-70b-chat-hf/Llama-2-13b-chat-hf/baseline_errordetection_prompt_2.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/answerability_classification/Llama-2-70b-chat-hf/Llama-2-13b-chat-hf/baseline_errordetection_prompt_2.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/answerability_classification/Llama-2-70b-chat-hf/Llama-2-13b-chat-hf/baseline_errordetection_prompt_3.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/answerability_classification/Llama-2-70b-chat-hf/Llama-2-13b-chat-hf/baseline_errordetection_prompt_3.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/answerability_classification/Llama-2-70b-chat-hf/Llama-2-13b-chat-hf/baseline_errordetection_prompt_4.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/answerability_classification/Llama-2-70b-chat-hf/Llama-2-13b-chat-hf/baseline_errordetection_prompt_4.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/answerability_classification/Llama-2-70b-chat-hf/Llama-2-70b-chat-hf/baseline_errordetection_prompt_1.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/answerability_classification/Llama-2-70b-chat-hf/Llama-2-70b-chat-hf/baseline_errordetection_prompt_1.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/answerability_classification/Llama-2-70b-chat-hf/Llama-2-70b-chat-hf/baseline_errordetection_prompt_2.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/answerability_classification/Llama-2-70b-chat-hf/Llama-2-70b-chat-hf/baseline_errordetection_prompt_2.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/answerability_classification/Llama-2-70b-chat-hf/Llama-2-70b-chat-hf/baseline_errordetection_prompt_3.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/answerability_classification/Llama-2-70b-chat-hf/Llama-2-70b-chat-hf/baseline_errordetection_prompt_3.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/answerability_classification/Llama-2-70b-chat-hf/Llama-2-70b-chat-hf/baseline_errordetection_prompt_4.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/answerability_classification/Llama-2-70b-chat-hf/Llama-2-70b-chat-hf/baseline_errordetection_prompt_4.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/answerability_classification/Llama-2-70b-chat-hf/Mistral-7B-Instruct-v0.1/baseline_errordetection_prompt_1.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/answerability_classification/Llama-2-70b-chat-hf/Mistral-7B-Instruct-v0.1/baseline_errordetection_prompt_1.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/answerability_classification/Llama-2-70b-chat-hf/Mistral-7B-Instruct-v0.1/baseline_errordetection_prompt_2.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/answerability_classification/Llama-2-70b-chat-hf/Mistral-7B-Instruct-v0.1/baseline_errordetection_prompt_2.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/answerability_classification/Llama-2-70b-chat-hf/Mistral-7B-Instruct-v0.1/baseline_errordetection_prompt_3.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/answerability_classification/Llama-2-70b-chat-hf/Mistral-7B-Instruct-v0.1/baseline_errordetection_prompt_3.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/answerability_classification/Llama-2-70b-chat-hf/Mistral-7B-Instruct-v0.1/baseline_errordetection_prompt_4.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/answerability_classification/Llama-2-70b-chat-hf/Mistral-7B-Instruct-v0.1/baseline_errordetection_prompt_4.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/answerability_classification/Llama-2-70b-chat-hf/Mixtral-8x7B-Instruct-v0.1/baseline_errordetection_prompt_1.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/answerability_classification/Llama-2-70b-chat-hf/Mixtral-8x7B-Instruct-v0.1/baseline_errordetection_prompt_1.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/answerability_classification/Llama-2-70b-chat-hf/Mixtral-8x7B-Instruct-v0.1/baseline_errordetection_prompt_2.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/answerability_classification/Llama-2-70b-chat-hf/Mixtral-8x7B-Instruct-v0.1/baseline_errordetection_prompt_2.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/answerability_classification/Llama-2-70b-chat-hf/Mixtral-8x7B-Instruct-v0.1/baseline_errordetection_prompt_3.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/answerability_classification/Llama-2-70b-chat-hf/Mixtral-8x7B-Instruct-v0.1/baseline_errordetection_prompt_3.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/answerability_classification/Llama-2-70b-chat-hf/Mixtral-8x7B-Instruct-v0.1/baseline_errordetection_prompt_4.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/answerability_classification/Llama-2-70b-chat-hf/Mixtral-8x7B-Instruct-v0.1/baseline_errordetection_prompt_4.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/answerability_classification/Llama-2-70b-chat-hf/Qwen1.5-14B-Chat/baseline_errordetection_prompt_1.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/answerability_classification/Llama-2-70b-chat-hf/Qwen1.5-14B-Chat/baseline_errordetection_prompt_1.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/answerability_classification/Llama-2-70b-chat-hf/Qwen1.5-14B-Chat/baseline_errordetection_prompt_2.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/answerability_classification/Llama-2-70b-chat-hf/Qwen1.5-14B-Chat/baseline_errordetection_prompt_2.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/answerability_classification/Llama-2-70b-chat-hf/Qwen1.5-14B-Chat/baseline_errordetection_prompt_3.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/answerability_classification/Llama-2-70b-chat-hf/Qwen1.5-14B-Chat/baseline_errordetection_prompt_3.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/answerability_classification/Llama-2-70b-chat-hf/Qwen1.5-14B-Chat/baseline_errordetection_prompt_4.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/answerability_classification/Llama-2-70b-chat-hf/Qwen1.5-14B-Chat/baseline_errordetection_prompt_4.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/answerability_classification/Llama-2-70b-chat-hf/Qwen1.5-72B-Chat/baseline_errordetection_prompt_1.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/answerability_classification/Llama-2-70b-chat-hf/Qwen1.5-72B-Chat/baseline_errordetection_prompt_1.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/answerability_classification/Llama-2-70b-chat-hf/Qwen1.5-72B-Chat/baseline_errordetection_prompt_2.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/answerability_classification/Llama-2-70b-chat-hf/Qwen1.5-72B-Chat/baseline_errordetection_prompt_2.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/answerability_classification/Llama-2-70b-chat-hf/Qwen1.5-72B-Chat/baseline_errordetection_prompt_3.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/answerability_classification/Llama-2-70b-chat-hf/Qwen1.5-72B-Chat/baseline_errordetection_prompt_3.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/answerability_classification/Llama-2-70b-chat-hf/Qwen1.5-72B-Chat/baseline_errordetection_prompt_4.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/answerability_classification/Llama-2-70b-chat-hf/Qwen1.5-72B-Chat/baseline_errordetection_prompt_4.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/answerability_classification/Llama-2-70b-chat-hf/claude-3-opus-20240229/baseline_errordetection_prompt_1.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/answerability_classification/Llama-2-70b-chat-hf/claude-3-opus-20240229/baseline_errordetection_prompt_1.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/answerability_classification/Llama-2-70b-chat-hf/claude-3-opus-20240229/baseline_errordetection_prompt_2.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/answerability_classification/Llama-2-70b-chat-hf/claude-3-opus-20240229/baseline_errordetection_prompt_2.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/answerability_classification/Llama-2-70b-chat-hf/claude-3-opus-20240229/baseline_errordetection_prompt_3.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/answerability_classification/Llama-2-70b-chat-hf/claude-3-opus-20240229/baseline_errordetection_prompt_3.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/answerability_classification/Llama-2-70b-chat-hf/claude-3-opus-20240229/baseline_errordetection_prompt_4.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/answerability_classification/Llama-2-70b-chat-hf/claude-3-opus-20240229/baseline_errordetection_prompt_4.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/answerability_classification/Llama-2-70b-chat-hf/gemini-1.0-pro-001/baseline_errordetection_prompt_1.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/answerability_classification/Llama-2-70b-chat-hf/gemini-1.0-pro-001/baseline_errordetection_prompt_1.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/answerability_classification/Llama-2-70b-chat-hf/gemini-1.0-pro-001/baseline_errordetection_prompt_2.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/answerability_classification/Llama-2-70b-chat-hf/gemini-1.0-pro-001/baseline_errordetection_prompt_2.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/answerability_classification/Llama-2-70b-chat-hf/gemini-1.0-pro-001/baseline_errordetection_prompt_3.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/answerability_classification/Llama-2-70b-chat-hf/gemini-1.0-pro-001/baseline_errordetection_prompt_3.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/answerability_classification/Llama-2-70b-chat-hf/gemini-1.0-pro-001/baseline_errordetection_prompt_4.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/answerability_classification/Llama-2-70b-chat-hf/gemini-1.0-pro-001/baseline_errordetection_prompt_4.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/answerability_classification/Llama-2-70b-chat-hf/gemma-7b-it/baseline_errordetection_prompt_1.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/answerability_classification/Llama-2-70b-chat-hf/gemma-7b-it/baseline_errordetection_prompt_1.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/answerability_classification/Llama-2-70b-chat-hf/gemma-7b-it/baseline_errordetection_prompt_2.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/answerability_classification/Llama-2-70b-chat-hf/gemma-7b-it/baseline_errordetection_prompt_2.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/answerability_classification/Llama-2-70b-chat-hf/gemma-7b-it/baseline_errordetection_prompt_3.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/answerability_classification/Llama-2-70b-chat-hf/gemma-7b-it/baseline_errordetection_prompt_3.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/answerability_classification/Llama-2-70b-chat-hf/gemma-7b-it/baseline_errordetection_prompt_4.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/answerability_classification/Llama-2-70b-chat-hf/gemma-7b-it/baseline_errordetection_prompt_4.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/answerability_classification/Llama-2-70b-chat-hf/gpt-3.5-turbo-0125/baseline_errordetection_prompt_1.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/answerability_classification/Llama-2-70b-chat-hf/gpt-3.5-turbo-0125/baseline_errordetection_prompt_1.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/answerability_classification/Llama-2-70b-chat-hf/gpt-3.5-turbo-0125/baseline_errordetection_prompt_2.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/answerability_classification/Llama-2-70b-chat-hf/gpt-3.5-turbo-0125/baseline_errordetection_prompt_2.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/answerability_classification/Llama-2-70b-chat-hf/gpt-3.5-turbo-0125/baseline_errordetection_prompt_3.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/answerability_classification/Llama-2-70b-chat-hf/gpt-3.5-turbo-0125/baseline_errordetection_prompt_3.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/answerability_classification/Llama-2-70b-chat-hf/gpt-3.5-turbo-0125/baseline_errordetection_prompt_4.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/answerability_classification/Llama-2-70b-chat-hf/gpt-3.5-turbo-0125/baseline_errordetection_prompt_4.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/answerability_classification/Llama-2-70b-chat-hf/gpt-4-0125-preview/baseline_errordetection_prompt_1.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/answerability_classification/Llama-2-70b-chat-hf/gpt-4-0125-preview/baseline_errordetection_prompt_1.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/answerability_classification/Llama-2-70b-chat-hf/gpt-4-0125-preview/baseline_errordetection_prompt_2.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/answerability_classification/Llama-2-70b-chat-hf/gpt-4-0125-preview/baseline_errordetection_prompt_2.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/answerability_classification/Llama-2-70b-chat-hf/gpt-4-0125-preview/baseline_errordetection_prompt_3.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/answerability_classification/Llama-2-70b-chat-hf/gpt-4-0125-preview/baseline_errordetection_prompt_3.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/answerability_classification/Llama-2-70b-chat-hf/gpt-4-0125-preview/baseline_errordetection_prompt_4.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/answerability_classification/Llama-2-70b-chat-hf/gpt-4-0125-preview/baseline_errordetection_prompt_4.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/answerability_classification/Llama-2-70b-chat-hf/gpt-4-0613/baseline_errordetection_prompt_1.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/answerability_classification/Llama-2-70b-chat-hf/gpt-4-0613/baseline_errordetection_prompt_1.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/answerability_classification/Llama-2-70b-chat-hf/gpt-4-0613/baseline_errordetection_prompt_2.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/answerability_classification/Llama-2-70b-chat-hf/gpt-4-0613/baseline_errordetection_prompt_2.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/answerability_classification/Llama-2-70b-chat-hf/gpt-4-0613/baseline_errordetection_prompt_3.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/answerability_classification/Llama-2-70b-chat-hf/gpt-4-0613/baseline_errordetection_prompt_3.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/answerability_classification/Llama-2-70b-chat-hf/gpt-4-0613/baseline_errordetection_prompt_4.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/answerability_classification/Llama-2-70b-chat-hf/gpt-4-0613/baseline_errordetection_prompt_4.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/answerability_classification/gpt-4-0613/Llama-2-13b-chat-hf/baseline_errordetection_prompt_1.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/answerability_classification/gpt-4-0613/Llama-2-13b-chat-hf/baseline_errordetection_prompt_1.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/answerability_classification/gpt-4-0613/Llama-2-13b-chat-hf/baseline_errordetection_prompt_2.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/answerability_classification/gpt-4-0613/Llama-2-13b-chat-hf/baseline_errordetection_prompt_2.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/answerability_classification/gpt-4-0613/Llama-2-13b-chat-hf/baseline_errordetection_prompt_3.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/answerability_classification/gpt-4-0613/Llama-2-13b-chat-hf/baseline_errordetection_prompt_3.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/answerability_classification/gpt-4-0613/Llama-2-13b-chat-hf/baseline_errordetection_prompt_4.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/answerability_classification/gpt-4-0613/Llama-2-13b-chat-hf/baseline_errordetection_prompt_4.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/answerability_classification/gpt-4-0613/Llama-2-70b-chat-hf/baseline_errordetection_prompt_1.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/answerability_classification/gpt-4-0613/Llama-2-70b-chat-hf/baseline_errordetection_prompt_1.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/answerability_classification/gpt-4-0613/Llama-2-70b-chat-hf/baseline_errordetection_prompt_2.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/answerability_classification/gpt-4-0613/Llama-2-70b-chat-hf/baseline_errordetection_prompt_2.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/answerability_classification/gpt-4-0613/Llama-2-70b-chat-hf/baseline_errordetection_prompt_3.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/answerability_classification/gpt-4-0613/Llama-2-70b-chat-hf/baseline_errordetection_prompt_3.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/answerability_classification/gpt-4-0613/Llama-2-70b-chat-hf/baseline_errordetection_prompt_4.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/answerability_classification/gpt-4-0613/Llama-2-70b-chat-hf/baseline_errordetection_prompt_4.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/answerability_classification/gpt-4-0613/Mistral-7B-Instruct-v0.1/baseline_errordetection_prompt_1.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/answerability_classification/gpt-4-0613/Mistral-7B-Instruct-v0.1/baseline_errordetection_prompt_1.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/answerability_classification/gpt-4-0613/Mistral-7B-Instruct-v0.1/baseline_errordetection_prompt_2.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/answerability_classification/gpt-4-0613/Mistral-7B-Instruct-v0.1/baseline_errordetection_prompt_2.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/answerability_classification/gpt-4-0613/Mistral-7B-Instruct-v0.1/baseline_errordetection_prompt_3.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/answerability_classification/gpt-4-0613/Mistral-7B-Instruct-v0.1/baseline_errordetection_prompt_3.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/answerability_classification/gpt-4-0613/Mistral-7B-Instruct-v0.1/baseline_errordetection_prompt_4.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/answerability_classification/gpt-4-0613/Mistral-7B-Instruct-v0.1/baseline_errordetection_prompt_4.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/answerability_classification/gpt-4-0613/Mixtral-8x7B-Instruct-v0.1/baseline_errordetection_prompt_1.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/answerability_classification/gpt-4-0613/Mixtral-8x7B-Instruct-v0.1/baseline_errordetection_prompt_1.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/answerability_classification/gpt-4-0613/Mixtral-8x7B-Instruct-v0.1/baseline_errordetection_prompt_2.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/answerability_classification/gpt-4-0613/Mixtral-8x7B-Instruct-v0.1/baseline_errordetection_prompt_2.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/answerability_classification/gpt-4-0613/Mixtral-8x7B-Instruct-v0.1/baseline_errordetection_prompt_3.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/answerability_classification/gpt-4-0613/Mixtral-8x7B-Instruct-v0.1/baseline_errordetection_prompt_3.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/answerability_classification/gpt-4-0613/Mixtral-8x7B-Instruct-v0.1/baseline_errordetection_prompt_4.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/answerability_classification/gpt-4-0613/Mixtral-8x7B-Instruct-v0.1/baseline_errordetection_prompt_4.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/answerability_classification/gpt-4-0613/Qwen1.5-14B-Chat/baseline_errordetection_prompt_1.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/answerability_classification/gpt-4-0613/Qwen1.5-14B-Chat/baseline_errordetection_prompt_1.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/answerability_classification/gpt-4-0613/Qwen1.5-14B-Chat/baseline_errordetection_prompt_2.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/answerability_classification/gpt-4-0613/Qwen1.5-14B-Chat/baseline_errordetection_prompt_2.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/answerability_classification/gpt-4-0613/Qwen1.5-14B-Chat/baseline_errordetection_prompt_3.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/answerability_classification/gpt-4-0613/Qwen1.5-14B-Chat/baseline_errordetection_prompt_3.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/answerability_classification/gpt-4-0613/Qwen1.5-14B-Chat/baseline_errordetection_prompt_4.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/answerability_classification/gpt-4-0613/Qwen1.5-14B-Chat/baseline_errordetection_prompt_4.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/answerability_classification/gpt-4-0613/Qwen1.5-72B-Chat/baseline_errordetection_prompt_1.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/answerability_classification/gpt-4-0613/Qwen1.5-72B-Chat/baseline_errordetection_prompt_1.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/answerability_classification/gpt-4-0613/Qwen1.5-72B-Chat/baseline_errordetection_prompt_2.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/answerability_classification/gpt-4-0613/Qwen1.5-72B-Chat/baseline_errordetection_prompt_2.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/answerability_classification/gpt-4-0613/Qwen1.5-72B-Chat/baseline_errordetection_prompt_3.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/answerability_classification/gpt-4-0613/Qwen1.5-72B-Chat/baseline_errordetection_prompt_3.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/answerability_classification/gpt-4-0613/Qwen1.5-72B-Chat/baseline_errordetection_prompt_4.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/answerability_classification/gpt-4-0613/Qwen1.5-72B-Chat/baseline_errordetection_prompt_4.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/answerability_classification/gpt-4-0613/claude-3-opus-20240229/baseline_errordetection_prompt_1.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/answerability_classification/gpt-4-0613/claude-3-opus-20240229/baseline_errordetection_prompt_1.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/answerability_classification/gpt-4-0613/claude-3-opus-20240229/baseline_errordetection_prompt_2.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/answerability_classification/gpt-4-0613/claude-3-opus-20240229/baseline_errordetection_prompt_2.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/answerability_classification/gpt-4-0613/claude-3-opus-20240229/baseline_errordetection_prompt_3.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/answerability_classification/gpt-4-0613/claude-3-opus-20240229/baseline_errordetection_prompt_3.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/answerability_classification/gpt-4-0613/claude-3-opus-20240229/baseline_errordetection_prompt_4.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/answerability_classification/gpt-4-0613/claude-3-opus-20240229/baseline_errordetection_prompt_4.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/answerability_classification/gpt-4-0613/gemini-1.0-pro-001/baseline_errordetection_prompt_1.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/answerability_classification/gpt-4-0613/gemini-1.0-pro-001/baseline_errordetection_prompt_1.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/answerability_classification/gpt-4-0613/gemini-1.0-pro-001/baseline_errordetection_prompt_2.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/answerability_classification/gpt-4-0613/gemini-1.0-pro-001/baseline_errordetection_prompt_2.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/answerability_classification/gpt-4-0613/gemini-1.0-pro-001/baseline_errordetection_prompt_3.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/answerability_classification/gpt-4-0613/gemini-1.0-pro-001/baseline_errordetection_prompt_3.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/answerability_classification/gpt-4-0613/gemini-1.0-pro-001/baseline_errordetection_prompt_4.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/answerability_classification/gpt-4-0613/gemini-1.0-pro-001/baseline_errordetection_prompt_4.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/answerability_classification/gpt-4-0613/gemma-7b-it/baseline_errordetection_prompt_1.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/answerability_classification/gpt-4-0613/gemma-7b-it/baseline_errordetection_prompt_1.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/answerability_classification/gpt-4-0613/gemma-7b-it/baseline_errordetection_prompt_2.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/answerability_classification/gpt-4-0613/gemma-7b-it/baseline_errordetection_prompt_2.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/answerability_classification/gpt-4-0613/gemma-7b-it/baseline_errordetection_prompt_3.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/answerability_classification/gpt-4-0613/gemma-7b-it/baseline_errordetection_prompt_3.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/answerability_classification/gpt-4-0613/gemma-7b-it/baseline_errordetection_prompt_4.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/answerability_classification/gpt-4-0613/gemma-7b-it/baseline_errordetection_prompt_4.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/answerability_classification/gpt-4-0613/gpt-3.5-turbo-0125/baseline_errordetection_prompt_1.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/answerability_classification/gpt-4-0613/gpt-3.5-turbo-0125/baseline_errordetection_prompt_1.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/answerability_classification/gpt-4-0613/gpt-3.5-turbo-0125/baseline_errordetection_prompt_2.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/answerability_classification/gpt-4-0613/gpt-3.5-turbo-0125/baseline_errordetection_prompt_2.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/answerability_classification/gpt-4-0613/gpt-3.5-turbo-0125/baseline_errordetection_prompt_3.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/answerability_classification/gpt-4-0613/gpt-3.5-turbo-0125/baseline_errordetection_prompt_3.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/answerability_classification/gpt-4-0613/gpt-3.5-turbo-0125/baseline_errordetection_prompt_4.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/answerability_classification/gpt-4-0613/gpt-3.5-turbo-0125/baseline_errordetection_prompt_4.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/answerability_classification/gpt-4-0613/gpt-4-0125-preview/baseline_errordetection_prompt_1.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/answerability_classification/gpt-4-0613/gpt-4-0125-preview/baseline_errordetection_prompt_1.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/answerability_classification/gpt-4-0613/gpt-4-0125-preview/baseline_errordetection_prompt_2.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/answerability_classification/gpt-4-0613/gpt-4-0125-preview/baseline_errordetection_prompt_2.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/answerability_classification/gpt-4-0613/gpt-4-0125-preview/baseline_errordetection_prompt_3.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/answerability_classification/gpt-4-0613/gpt-4-0125-preview/baseline_errordetection_prompt_3.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/answerability_classification/gpt-4-0613/gpt-4-0125-preview/baseline_errordetection_prompt_4.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/answerability_classification/gpt-4-0613/gpt-4-0125-preview/baseline_errordetection_prompt_4.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/answerability_classification/gpt-4-0613/gpt-4-0613/baseline_errordetection_prompt_1.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/answerability_classification/gpt-4-0613/gpt-4-0613/baseline_errordetection_prompt_1.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/answerability_classification/gpt-4-0613/gpt-4-0613/baseline_errordetection_prompt_2.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/answerability_classification/gpt-4-0613/gpt-4-0613/baseline_errordetection_prompt_2.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/answerability_classification/gpt-4-0613/gpt-4-0613/baseline_errordetection_prompt_3.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/answerability_classification/gpt-4-0613/gpt-4-0613/baseline_errordetection_prompt_3.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/answerability_classification/gpt-4-0613/gpt-4-0613/baseline_errordetection_prompt_4.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/answerability_classification/gpt-4-0613/gpt-4-0613/baseline_errordetection_prompt_4.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/finegrained_fact_verification/Llama-2-70b-chat-hf/Llama-2-13b-chat-hf/baseline_errordetection_prompt_1.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/finegrained_fact_verification/Llama-2-70b-chat-hf/Llama-2-13b-chat-hf/baseline_errordetection_prompt_1.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/finegrained_fact_verification/Llama-2-70b-chat-hf/Llama-2-13b-chat-hf/baseline_errordetection_prompt_2.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/finegrained_fact_verification/Llama-2-70b-chat-hf/Llama-2-13b-chat-hf/baseline_errordetection_prompt_2.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/finegrained_fact_verification/Llama-2-70b-chat-hf/Llama-2-13b-chat-hf/baseline_errordetection_prompt_3.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/finegrained_fact_verification/Llama-2-70b-chat-hf/Llama-2-13b-chat-hf/baseline_errordetection_prompt_3.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/finegrained_fact_verification/Llama-2-70b-chat-hf/Llama-2-13b-chat-hf/baseline_errordetection_prompt_4.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/finegrained_fact_verification/Llama-2-70b-chat-hf/Llama-2-13b-chat-hf/baseline_errordetection_prompt_4.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/finegrained_fact_verification/Llama-2-70b-chat-hf/Llama-2-70b-chat-hf/baseline_errordetection_prompt_1.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/finegrained_fact_verification/Llama-2-70b-chat-hf/Llama-2-70b-chat-hf/baseline_errordetection_prompt_1.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/finegrained_fact_verification/Llama-2-70b-chat-hf/Llama-2-70b-chat-hf/baseline_errordetection_prompt_2.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/finegrained_fact_verification/Llama-2-70b-chat-hf/Llama-2-70b-chat-hf/baseline_errordetection_prompt_2.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/finegrained_fact_verification/Llama-2-70b-chat-hf/Llama-2-70b-chat-hf/baseline_errordetection_prompt_3.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/finegrained_fact_verification/Llama-2-70b-chat-hf/Llama-2-70b-chat-hf/baseline_errordetection_prompt_3.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/finegrained_fact_verification/Llama-2-70b-chat-hf/Llama-2-70b-chat-hf/baseline_errordetection_prompt_4.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/finegrained_fact_verification/Llama-2-70b-chat-hf/Llama-2-70b-chat-hf/baseline_errordetection_prompt_4.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/finegrained_fact_verification/Llama-2-70b-chat-hf/Mistral-7B-Instruct-v0.1/baseline_errordetection_prompt_1.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/finegrained_fact_verification/Llama-2-70b-chat-hf/Mistral-7B-Instruct-v0.1/baseline_errordetection_prompt_1.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/finegrained_fact_verification/Llama-2-70b-chat-hf/Mistral-7B-Instruct-v0.1/baseline_errordetection_prompt_2.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/finegrained_fact_verification/Llama-2-70b-chat-hf/Mistral-7B-Instruct-v0.1/baseline_errordetection_prompt_2.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/finegrained_fact_verification/Llama-2-70b-chat-hf/Mistral-7B-Instruct-v0.1/baseline_errordetection_prompt_3.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/finegrained_fact_verification/Llama-2-70b-chat-hf/Mistral-7B-Instruct-v0.1/baseline_errordetection_prompt_3.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/finegrained_fact_verification/Llama-2-70b-chat-hf/Mistral-7B-Instruct-v0.1/baseline_errordetection_prompt_4.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/finegrained_fact_verification/Llama-2-70b-chat-hf/Mistral-7B-Instruct-v0.1/baseline_errordetection_prompt_4.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/finegrained_fact_verification/Llama-2-70b-chat-hf/Mixtral-8x7B-Instruct-v0.1/baseline_errordetection_prompt_1.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/finegrained_fact_verification/Llama-2-70b-chat-hf/Mixtral-8x7B-Instruct-v0.1/baseline_errordetection_prompt_1.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/finegrained_fact_verification/Llama-2-70b-chat-hf/Mixtral-8x7B-Instruct-v0.1/baseline_errordetection_prompt_2.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/finegrained_fact_verification/Llama-2-70b-chat-hf/Mixtral-8x7B-Instruct-v0.1/baseline_errordetection_prompt_2.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/finegrained_fact_verification/Llama-2-70b-chat-hf/Mixtral-8x7B-Instruct-v0.1/baseline_errordetection_prompt_3.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/finegrained_fact_verification/Llama-2-70b-chat-hf/Mixtral-8x7B-Instruct-v0.1/baseline_errordetection_prompt_3.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/finegrained_fact_verification/Llama-2-70b-chat-hf/Mixtral-8x7B-Instruct-v0.1/baseline_errordetection_prompt_4.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/finegrained_fact_verification/Llama-2-70b-chat-hf/Mixtral-8x7B-Instruct-v0.1/baseline_errordetection_prompt_4.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/finegrained_fact_verification/Llama-2-70b-chat-hf/Qwen1.5-14B-Chat/baseline_errordetection_prompt_1.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/finegrained_fact_verification/Llama-2-70b-chat-hf/Qwen1.5-14B-Chat/baseline_errordetection_prompt_1.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/finegrained_fact_verification/Llama-2-70b-chat-hf/Qwen1.5-14B-Chat/baseline_errordetection_prompt_2.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/finegrained_fact_verification/Llama-2-70b-chat-hf/Qwen1.5-14B-Chat/baseline_errordetection_prompt_2.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/finegrained_fact_verification/Llama-2-70b-chat-hf/Qwen1.5-14B-Chat/baseline_errordetection_prompt_3.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/finegrained_fact_verification/Llama-2-70b-chat-hf/Qwen1.5-14B-Chat/baseline_errordetection_prompt_3.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/finegrained_fact_verification/Llama-2-70b-chat-hf/Qwen1.5-14B-Chat/baseline_errordetection_prompt_4.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/finegrained_fact_verification/Llama-2-70b-chat-hf/Qwen1.5-14B-Chat/baseline_errordetection_prompt_4.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/finegrained_fact_verification/Llama-2-70b-chat-hf/Qwen1.5-72B-Chat/baseline_errordetection_prompt_1.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/finegrained_fact_verification/Llama-2-70b-chat-hf/Qwen1.5-72B-Chat/baseline_errordetection_prompt_1.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/finegrained_fact_verification/Llama-2-70b-chat-hf/Qwen1.5-72B-Chat/baseline_errordetection_prompt_2.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/finegrained_fact_verification/Llama-2-70b-chat-hf/Qwen1.5-72B-Chat/baseline_errordetection_prompt_2.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/finegrained_fact_verification/Llama-2-70b-chat-hf/Qwen1.5-72B-Chat/baseline_errordetection_prompt_3.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/finegrained_fact_verification/Llama-2-70b-chat-hf/Qwen1.5-72B-Chat/baseline_errordetection_prompt_3.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/finegrained_fact_verification/Llama-2-70b-chat-hf/Qwen1.5-72B-Chat/baseline_errordetection_prompt_4.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/finegrained_fact_verification/Llama-2-70b-chat-hf/Qwen1.5-72B-Chat/baseline_errordetection_prompt_4.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/finegrained_fact_verification/Llama-2-70b-chat-hf/claude-3-opus-20240229/baseline_errordetection_prompt_1.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/finegrained_fact_verification/Llama-2-70b-chat-hf/claude-3-opus-20240229/baseline_errordetection_prompt_1.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/finegrained_fact_verification/Llama-2-70b-chat-hf/claude-3-opus-20240229/baseline_errordetection_prompt_2.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/finegrained_fact_verification/Llama-2-70b-chat-hf/claude-3-opus-20240229/baseline_errordetection_prompt_2.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/finegrained_fact_verification/Llama-2-70b-chat-hf/claude-3-opus-20240229/baseline_errordetection_prompt_3.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/finegrained_fact_verification/Llama-2-70b-chat-hf/claude-3-opus-20240229/baseline_errordetection_prompt_3.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/finegrained_fact_verification/Llama-2-70b-chat-hf/claude-3-opus-20240229/baseline_errordetection_prompt_4.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/finegrained_fact_verification/Llama-2-70b-chat-hf/claude-3-opus-20240229/baseline_errordetection_prompt_4.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/finegrained_fact_verification/Llama-2-70b-chat-hf/gemini-1.0-pro-001/baseline_errordetection_prompt_1.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/finegrained_fact_verification/Llama-2-70b-chat-hf/gemini-1.0-pro-001/baseline_errordetection_prompt_1.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/finegrained_fact_verification/Llama-2-70b-chat-hf/gemini-1.0-pro-001/baseline_errordetection_prompt_2.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/finegrained_fact_verification/Llama-2-70b-chat-hf/gemini-1.0-pro-001/baseline_errordetection_prompt_2.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/finegrained_fact_verification/Llama-2-70b-chat-hf/gemini-1.0-pro-001/baseline_errordetection_prompt_3.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/finegrained_fact_verification/Llama-2-70b-chat-hf/gemini-1.0-pro-001/baseline_errordetection_prompt_3.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/finegrained_fact_verification/Llama-2-70b-chat-hf/gemini-1.0-pro-001/baseline_errordetection_prompt_4.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/finegrained_fact_verification/Llama-2-70b-chat-hf/gemini-1.0-pro-001/baseline_errordetection_prompt_4.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/finegrained_fact_verification/Llama-2-70b-chat-hf/gemma-7b-it/baseline_errordetection_prompt_1.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/finegrained_fact_verification/Llama-2-70b-chat-hf/gemma-7b-it/baseline_errordetection_prompt_1.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/finegrained_fact_verification/Llama-2-70b-chat-hf/gemma-7b-it/baseline_errordetection_prompt_2.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/finegrained_fact_verification/Llama-2-70b-chat-hf/gemma-7b-it/baseline_errordetection_prompt_2.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/finegrained_fact_verification/Llama-2-70b-chat-hf/gemma-7b-it/baseline_errordetection_prompt_3.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/finegrained_fact_verification/Llama-2-70b-chat-hf/gemma-7b-it/baseline_errordetection_prompt_3.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/finegrained_fact_verification/Llama-2-70b-chat-hf/gemma-7b-it/baseline_errordetection_prompt_4.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/finegrained_fact_verification/Llama-2-70b-chat-hf/gemma-7b-it/baseline_errordetection_prompt_4.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/finegrained_fact_verification/Llama-2-70b-chat-hf/gpt-3.5-turbo-0125/baseline_errordetection_prompt_1.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/finegrained_fact_verification/Llama-2-70b-chat-hf/gpt-3.5-turbo-0125/baseline_errordetection_prompt_1.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/finegrained_fact_verification/Llama-2-70b-chat-hf/gpt-3.5-turbo-0125/baseline_errordetection_prompt_2.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/finegrained_fact_verification/Llama-2-70b-chat-hf/gpt-3.5-turbo-0125/baseline_errordetection_prompt_2.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/finegrained_fact_verification/Llama-2-70b-chat-hf/gpt-3.5-turbo-0125/baseline_errordetection_prompt_3.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/finegrained_fact_verification/Llama-2-70b-chat-hf/gpt-3.5-turbo-0125/baseline_errordetection_prompt_3.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/finegrained_fact_verification/Llama-2-70b-chat-hf/gpt-3.5-turbo-0125/baseline_errordetection_prompt_4.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/finegrained_fact_verification/Llama-2-70b-chat-hf/gpt-3.5-turbo-0125/baseline_errordetection_prompt_4.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/finegrained_fact_verification/Llama-2-70b-chat-hf/gpt-4-0125-preview/baseline_errordetection_prompt_1.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/finegrained_fact_verification/Llama-2-70b-chat-hf/gpt-4-0125-preview/baseline_errordetection_prompt_1.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/finegrained_fact_verification/Llama-2-70b-chat-hf/gpt-4-0125-preview/baseline_errordetection_prompt_2.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/finegrained_fact_verification/Llama-2-70b-chat-hf/gpt-4-0125-preview/baseline_errordetection_prompt_2.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/finegrained_fact_verification/Llama-2-70b-chat-hf/gpt-4-0125-preview/baseline_errordetection_prompt_3.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/finegrained_fact_verification/Llama-2-70b-chat-hf/gpt-4-0125-preview/baseline_errordetection_prompt_3.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/finegrained_fact_verification/Llama-2-70b-chat-hf/gpt-4-0125-preview/baseline_errordetection_prompt_4.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/finegrained_fact_verification/Llama-2-70b-chat-hf/gpt-4-0125-preview/baseline_errordetection_prompt_4.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/finegrained_fact_verification/Llama-2-70b-chat-hf/gpt-4-0613/baseline_errordetection_prompt_1.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/finegrained_fact_verification/Llama-2-70b-chat-hf/gpt-4-0613/baseline_errordetection_prompt_1.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/finegrained_fact_verification/Llama-2-70b-chat-hf/gpt-4-0613/baseline_errordetection_prompt_2.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/finegrained_fact_verification/Llama-2-70b-chat-hf/gpt-4-0613/baseline_errordetection_prompt_2.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/finegrained_fact_verification/Llama-2-70b-chat-hf/gpt-4-0613/baseline_errordetection_prompt_3.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/finegrained_fact_verification/Llama-2-70b-chat-hf/gpt-4-0613/baseline_errordetection_prompt_3.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/finegrained_fact_verification/Llama-2-70b-chat-hf/gpt-4-0613/baseline_errordetection_prompt_4.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/finegrained_fact_verification/Llama-2-70b-chat-hf/gpt-4-0613/baseline_errordetection_prompt_4.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/finegrained_fact_verification/gpt-4-0613/Llama-2-13b-chat-hf/baseline_errordetection_prompt_1.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/finegrained_fact_verification/gpt-4-0613/Llama-2-13b-chat-hf/baseline_errordetection_prompt_1.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/finegrained_fact_verification/gpt-4-0613/Llama-2-13b-chat-hf/baseline_errordetection_prompt_2.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/finegrained_fact_verification/gpt-4-0613/Llama-2-13b-chat-hf/baseline_errordetection_prompt_2.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/finegrained_fact_verification/gpt-4-0613/Llama-2-13b-chat-hf/baseline_errordetection_prompt_3.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/finegrained_fact_verification/gpt-4-0613/Llama-2-13b-chat-hf/baseline_errordetection_prompt_3.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/finegrained_fact_verification/gpt-4-0613/Llama-2-13b-chat-hf/baseline_errordetection_prompt_4.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/finegrained_fact_verification/gpt-4-0613/Llama-2-13b-chat-hf/baseline_errordetection_prompt_4.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/finegrained_fact_verification/gpt-4-0613/Llama-2-70b-chat-hf/baseline_errordetection_prompt_1.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/finegrained_fact_verification/gpt-4-0613/Llama-2-70b-chat-hf/baseline_errordetection_prompt_1.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/finegrained_fact_verification/gpt-4-0613/Llama-2-70b-chat-hf/baseline_errordetection_prompt_2.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/finegrained_fact_verification/gpt-4-0613/Llama-2-70b-chat-hf/baseline_errordetection_prompt_2.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/finegrained_fact_verification/gpt-4-0613/Llama-2-70b-chat-hf/baseline_errordetection_prompt_3.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/finegrained_fact_verification/gpt-4-0613/Llama-2-70b-chat-hf/baseline_errordetection_prompt_3.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/finegrained_fact_verification/gpt-4-0613/Llama-2-70b-chat-hf/baseline_errordetection_prompt_4.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/finegrained_fact_verification/gpt-4-0613/Llama-2-70b-chat-hf/baseline_errordetection_prompt_4.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/finegrained_fact_verification/gpt-4-0613/Mistral-7B-Instruct-v0.1/baseline_errordetection_prompt_1.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/finegrained_fact_verification/gpt-4-0613/Mistral-7B-Instruct-v0.1/baseline_errordetection_prompt_1.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/finegrained_fact_verification/gpt-4-0613/Mistral-7B-Instruct-v0.1/baseline_errordetection_prompt_2.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/finegrained_fact_verification/gpt-4-0613/Mistral-7B-Instruct-v0.1/baseline_errordetection_prompt_2.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/finegrained_fact_verification/gpt-4-0613/Mistral-7B-Instruct-v0.1/baseline_errordetection_prompt_3.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/finegrained_fact_verification/gpt-4-0613/Mistral-7B-Instruct-v0.1/baseline_errordetection_prompt_3.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/finegrained_fact_verification/gpt-4-0613/Mistral-7B-Instruct-v0.1/baseline_errordetection_prompt_4.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/finegrained_fact_verification/gpt-4-0613/Mistral-7B-Instruct-v0.1/baseline_errordetection_prompt_4.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/finegrained_fact_verification/gpt-4-0613/Mixtral-8x7B-Instruct-v0.1/baseline_errordetection_prompt_1.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/finegrained_fact_verification/gpt-4-0613/Mixtral-8x7B-Instruct-v0.1/baseline_errordetection_prompt_1.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/finegrained_fact_verification/gpt-4-0613/Mixtral-8x7B-Instruct-v0.1/baseline_errordetection_prompt_2.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/finegrained_fact_verification/gpt-4-0613/Mixtral-8x7B-Instruct-v0.1/baseline_errordetection_prompt_2.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/finegrained_fact_verification/gpt-4-0613/Mixtral-8x7B-Instruct-v0.1/baseline_errordetection_prompt_3.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/finegrained_fact_verification/gpt-4-0613/Mixtral-8x7B-Instruct-v0.1/baseline_errordetection_prompt_3.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/finegrained_fact_verification/gpt-4-0613/Mixtral-8x7B-Instruct-v0.1/baseline_errordetection_prompt_4.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/finegrained_fact_verification/gpt-4-0613/Mixtral-8x7B-Instruct-v0.1/baseline_errordetection_prompt_4.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/finegrained_fact_verification/gpt-4-0613/Qwen1.5-14B-Chat/baseline_errordetection_prompt_1.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/finegrained_fact_verification/gpt-4-0613/Qwen1.5-14B-Chat/baseline_errordetection_prompt_1.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/finegrained_fact_verification/gpt-4-0613/Qwen1.5-14B-Chat/baseline_errordetection_prompt_2.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/finegrained_fact_verification/gpt-4-0613/Qwen1.5-14B-Chat/baseline_errordetection_prompt_2.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/finegrained_fact_verification/gpt-4-0613/Qwen1.5-14B-Chat/baseline_errordetection_prompt_3.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/finegrained_fact_verification/gpt-4-0613/Qwen1.5-14B-Chat/baseline_errordetection_prompt_3.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/finegrained_fact_verification/gpt-4-0613/Qwen1.5-14B-Chat/baseline_errordetection_prompt_4.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/finegrained_fact_verification/gpt-4-0613/Qwen1.5-14B-Chat/baseline_errordetection_prompt_4.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/finegrained_fact_verification/gpt-4-0613/Qwen1.5-72B-Chat/baseline_errordetection_prompt_1.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/finegrained_fact_verification/gpt-4-0613/Qwen1.5-72B-Chat/baseline_errordetection_prompt_1.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/finegrained_fact_verification/gpt-4-0613/Qwen1.5-72B-Chat/baseline_errordetection_prompt_2.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/finegrained_fact_verification/gpt-4-0613/Qwen1.5-72B-Chat/baseline_errordetection_prompt_2.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/finegrained_fact_verification/gpt-4-0613/Qwen1.5-72B-Chat/baseline_errordetection_prompt_3.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/finegrained_fact_verification/gpt-4-0613/Qwen1.5-72B-Chat/baseline_errordetection_prompt_3.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/finegrained_fact_verification/gpt-4-0613/Qwen1.5-72B-Chat/baseline_errordetection_prompt_4.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/finegrained_fact_verification/gpt-4-0613/Qwen1.5-72B-Chat/baseline_errordetection_prompt_4.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/finegrained_fact_verification/gpt-4-0613/claude-3-opus-20240229/baseline_errordetection_prompt_1.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/finegrained_fact_verification/gpt-4-0613/claude-3-opus-20240229/baseline_errordetection_prompt_1.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/finegrained_fact_verification/gpt-4-0613/claude-3-opus-20240229/baseline_errordetection_prompt_2.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/finegrained_fact_verification/gpt-4-0613/claude-3-opus-20240229/baseline_errordetection_prompt_2.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/finegrained_fact_verification/gpt-4-0613/claude-3-opus-20240229/baseline_errordetection_prompt_3.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/finegrained_fact_verification/gpt-4-0613/claude-3-opus-20240229/baseline_errordetection_prompt_3.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/finegrained_fact_verification/gpt-4-0613/claude-3-opus-20240229/baseline_errordetection_prompt_4.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/finegrained_fact_verification/gpt-4-0613/claude-3-opus-20240229/baseline_errordetection_prompt_4.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/finegrained_fact_verification/gpt-4-0613/gemini-1.0-pro-001/baseline_errordetection_prompt_1.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/finegrained_fact_verification/gpt-4-0613/gemini-1.0-pro-001/baseline_errordetection_prompt_1.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/finegrained_fact_verification/gpt-4-0613/gemini-1.0-pro-001/baseline_errordetection_prompt_2.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/finegrained_fact_verification/gpt-4-0613/gemini-1.0-pro-001/baseline_errordetection_prompt_2.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/finegrained_fact_verification/gpt-4-0613/gemini-1.0-pro-001/baseline_errordetection_prompt_3.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/finegrained_fact_verification/gpt-4-0613/gemini-1.0-pro-001/baseline_errordetection_prompt_3.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/finegrained_fact_verification/gpt-4-0613/gemini-1.0-pro-001/baseline_errordetection_prompt_4.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/finegrained_fact_verification/gpt-4-0613/gemini-1.0-pro-001/baseline_errordetection_prompt_4.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/finegrained_fact_verification/gpt-4-0613/gemma-7b-it/baseline_errordetection_prompt_1.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/finegrained_fact_verification/gpt-4-0613/gemma-7b-it/baseline_errordetection_prompt_1.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/finegrained_fact_verification/gpt-4-0613/gemma-7b-it/baseline_errordetection_prompt_2.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/finegrained_fact_verification/gpt-4-0613/gemma-7b-it/baseline_errordetection_prompt_2.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/finegrained_fact_verification/gpt-4-0613/gemma-7b-it/baseline_errordetection_prompt_3.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/finegrained_fact_verification/gpt-4-0613/gemma-7b-it/baseline_errordetection_prompt_3.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/finegrained_fact_verification/gpt-4-0613/gemma-7b-it/baseline_errordetection_prompt_4.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/finegrained_fact_verification/gpt-4-0613/gemma-7b-it/baseline_errordetection_prompt_4.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/finegrained_fact_verification/gpt-4-0613/gpt-3.5-turbo-0125/baseline_errordetection_prompt_1.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/finegrained_fact_verification/gpt-4-0613/gpt-3.5-turbo-0125/baseline_errordetection_prompt_1.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/finegrained_fact_verification/gpt-4-0613/gpt-3.5-turbo-0125/baseline_errordetection_prompt_2.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/finegrained_fact_verification/gpt-4-0613/gpt-3.5-turbo-0125/baseline_errordetection_prompt_2.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/finegrained_fact_verification/gpt-4-0613/gpt-3.5-turbo-0125/baseline_errordetection_prompt_3.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/finegrained_fact_verification/gpt-4-0613/gpt-3.5-turbo-0125/baseline_errordetection_prompt_3.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/finegrained_fact_verification/gpt-4-0613/gpt-3.5-turbo-0125/baseline_errordetection_prompt_4.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/finegrained_fact_verification/gpt-4-0613/gpt-3.5-turbo-0125/baseline_errordetection_prompt_4.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/finegrained_fact_verification/gpt-4-0613/gpt-4-0125-preview/baseline_errordetection_prompt_1.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/finegrained_fact_verification/gpt-4-0613/gpt-4-0125-preview/baseline_errordetection_prompt_1.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/finegrained_fact_verification/gpt-4-0613/gpt-4-0125-preview/baseline_errordetection_prompt_2.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/finegrained_fact_verification/gpt-4-0613/gpt-4-0125-preview/baseline_errordetection_prompt_2.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/finegrained_fact_verification/gpt-4-0613/gpt-4-0125-preview/baseline_errordetection_prompt_3.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/finegrained_fact_verification/gpt-4-0613/gpt-4-0125-preview/baseline_errordetection_prompt_3.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/finegrained_fact_verification/gpt-4-0613/gpt-4-0125-preview/baseline_errordetection_prompt_4.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/finegrained_fact_verification/gpt-4-0613/gpt-4-0125-preview/baseline_errordetection_prompt_4.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/finegrained_fact_verification/gpt-4-0613/gpt-4-0613/baseline_errordetection_prompt_1.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/finegrained_fact_verification/gpt-4-0613/gpt-4-0613/baseline_errordetection_prompt_1.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/finegrained_fact_verification/gpt-4-0613/gpt-4-0613/baseline_errordetection_prompt_2.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/finegrained_fact_verification/gpt-4-0613/gpt-4-0613/baseline_errordetection_prompt_2.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/finegrained_fact_verification/gpt-4-0613/gpt-4-0613/baseline_errordetection_prompt_3.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/finegrained_fact_verification/gpt-4-0613/gpt-4-0613/baseline_errordetection_prompt_3.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/finegrained_fact_verification/gpt-4-0613/gpt-4-0613/baseline_errordetection_prompt_4.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/finegrained_fact_verification/gpt-4-0613/gpt-4-0613/baseline_errordetection_prompt_4.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/math_word_problem_generation/Llama-2-70b-chat-hf/Llama-2-13b-chat-hf/baseline_errordetection_prompt_1.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/math_word_problem_generation/Llama-2-70b-chat-hf/Llama-2-13b-chat-hf/baseline_errordetection_prompt_1.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/math_word_problem_generation/Llama-2-70b-chat-hf/Llama-2-13b-chat-hf/baseline_errordetection_prompt_2.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/math_word_problem_generation/Llama-2-70b-chat-hf/Llama-2-13b-chat-hf/baseline_errordetection_prompt_2.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/math_word_problem_generation/Llama-2-70b-chat-hf/Llama-2-13b-chat-hf/baseline_errordetection_prompt_3.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/math_word_problem_generation/Llama-2-70b-chat-hf/Llama-2-13b-chat-hf/baseline_errordetection_prompt_3.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/math_word_problem_generation/Llama-2-70b-chat-hf/Llama-2-13b-chat-hf/baseline_errordetection_prompt_4.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/math_word_problem_generation/Llama-2-70b-chat-hf/Llama-2-13b-chat-hf/baseline_errordetection_prompt_4.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/math_word_problem_generation/Llama-2-70b-chat-hf/Llama-2-70b-chat-hf/baseline_errordetection_prompt_1.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/math_word_problem_generation/Llama-2-70b-chat-hf/Llama-2-70b-chat-hf/baseline_errordetection_prompt_1.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/math_word_problem_generation/Llama-2-70b-chat-hf/Llama-2-70b-chat-hf/baseline_errordetection_prompt_2.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/math_word_problem_generation/Llama-2-70b-chat-hf/Llama-2-70b-chat-hf/baseline_errordetection_prompt_2.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/math_word_problem_generation/Llama-2-70b-chat-hf/Llama-2-70b-chat-hf/baseline_errordetection_prompt_3.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/math_word_problem_generation/Llama-2-70b-chat-hf/Llama-2-70b-chat-hf/baseline_errordetection_prompt_3.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/math_word_problem_generation/Llama-2-70b-chat-hf/Llama-2-70b-chat-hf/baseline_errordetection_prompt_4.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/math_word_problem_generation/Llama-2-70b-chat-hf/Llama-2-70b-chat-hf/baseline_errordetection_prompt_4.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/math_word_problem_generation/Llama-2-70b-chat-hf/Mistral-7B-Instruct-v0.1/baseline_errordetection_prompt_1.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/math_word_problem_generation/Llama-2-70b-chat-hf/Mistral-7B-Instruct-v0.1/baseline_errordetection_prompt_1.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/math_word_problem_generation/Llama-2-70b-chat-hf/Mistral-7B-Instruct-v0.1/baseline_errordetection_prompt_2.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/math_word_problem_generation/Llama-2-70b-chat-hf/Mistral-7B-Instruct-v0.1/baseline_errordetection_prompt_2.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/math_word_problem_generation/Llama-2-70b-chat-hf/Mistral-7B-Instruct-v0.1/baseline_errordetection_prompt_3.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/math_word_problem_generation/Llama-2-70b-chat-hf/Mistral-7B-Instruct-v0.1/baseline_errordetection_prompt_3.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/math_word_problem_generation/Llama-2-70b-chat-hf/Mistral-7B-Instruct-v0.1/baseline_errordetection_prompt_4.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/math_word_problem_generation/Llama-2-70b-chat-hf/Mistral-7B-Instruct-v0.1/baseline_errordetection_prompt_4.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/math_word_problem_generation/Llama-2-70b-chat-hf/Mixtral-8x7B-Instruct-v0.1/baseline_errordetection_prompt_1.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/math_word_problem_generation/Llama-2-70b-chat-hf/Mixtral-8x7B-Instruct-v0.1/baseline_errordetection_prompt_1.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/math_word_problem_generation/Llama-2-70b-chat-hf/Mixtral-8x7B-Instruct-v0.1/baseline_errordetection_prompt_2.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/math_word_problem_generation/Llama-2-70b-chat-hf/Mixtral-8x7B-Instruct-v0.1/baseline_errordetection_prompt_2.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/math_word_problem_generation/Llama-2-70b-chat-hf/Mixtral-8x7B-Instruct-v0.1/baseline_errordetection_prompt_3.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/math_word_problem_generation/Llama-2-70b-chat-hf/Mixtral-8x7B-Instruct-v0.1/baseline_errordetection_prompt_3.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/math_word_problem_generation/Llama-2-70b-chat-hf/Mixtral-8x7B-Instruct-v0.1/baseline_errordetection_prompt_4.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/math_word_problem_generation/Llama-2-70b-chat-hf/Mixtral-8x7B-Instruct-v0.1/baseline_errordetection_prompt_4.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/math_word_problem_generation/Llama-2-70b-chat-hf/Qwen1.5-14B-Chat/baseline_errordetection_prompt_1.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/math_word_problem_generation/Llama-2-70b-chat-hf/Qwen1.5-14B-Chat/baseline_errordetection_prompt_1.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/math_word_problem_generation/Llama-2-70b-chat-hf/Qwen1.5-14B-Chat/baseline_errordetection_prompt_2.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/math_word_problem_generation/Llama-2-70b-chat-hf/Qwen1.5-14B-Chat/baseline_errordetection_prompt_2.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/math_word_problem_generation/Llama-2-70b-chat-hf/Qwen1.5-14B-Chat/baseline_errordetection_prompt_3.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/math_word_problem_generation/Llama-2-70b-chat-hf/Qwen1.5-14B-Chat/baseline_errordetection_prompt_3.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/math_word_problem_generation/Llama-2-70b-chat-hf/Qwen1.5-14B-Chat/baseline_errordetection_prompt_4.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/math_word_problem_generation/Llama-2-70b-chat-hf/Qwen1.5-14B-Chat/baseline_errordetection_prompt_4.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/math_word_problem_generation/Llama-2-70b-chat-hf/Qwen1.5-72B-Chat/baseline_errordetection_prompt_1.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/math_word_problem_generation/Llama-2-70b-chat-hf/Qwen1.5-72B-Chat/baseline_errordetection_prompt_1.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/math_word_problem_generation/Llama-2-70b-chat-hf/Qwen1.5-72B-Chat/baseline_errordetection_prompt_2.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/math_word_problem_generation/Llama-2-70b-chat-hf/Qwen1.5-72B-Chat/baseline_errordetection_prompt_2.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/math_word_problem_generation/Llama-2-70b-chat-hf/Qwen1.5-72B-Chat/baseline_errordetection_prompt_3.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/math_word_problem_generation/Llama-2-70b-chat-hf/Qwen1.5-72B-Chat/baseline_errordetection_prompt_3.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/math_word_problem_generation/Llama-2-70b-chat-hf/Qwen1.5-72B-Chat/baseline_errordetection_prompt_4.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/math_word_problem_generation/Llama-2-70b-chat-hf/Qwen1.5-72B-Chat/baseline_errordetection_prompt_4.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/math_word_problem_generation/Llama-2-70b-chat-hf/claude-3-opus-20240229/baseline_errordetection_prompt_1.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/math_word_problem_generation/Llama-2-70b-chat-hf/claude-3-opus-20240229/baseline_errordetection_prompt_1.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/math_word_problem_generation/Llama-2-70b-chat-hf/claude-3-opus-20240229/baseline_errordetection_prompt_2.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/math_word_problem_generation/Llama-2-70b-chat-hf/claude-3-opus-20240229/baseline_errordetection_prompt_2.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/math_word_problem_generation/Llama-2-70b-chat-hf/claude-3-opus-20240229/baseline_errordetection_prompt_3.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/math_word_problem_generation/Llama-2-70b-chat-hf/claude-3-opus-20240229/baseline_errordetection_prompt_3.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/math_word_problem_generation/Llama-2-70b-chat-hf/claude-3-opus-20240229/baseline_errordetection_prompt_4.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/math_word_problem_generation/Llama-2-70b-chat-hf/claude-3-opus-20240229/baseline_errordetection_prompt_4.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/math_word_problem_generation/Llama-2-70b-chat-hf/gemini-1.0-pro-001/baseline_errordetection_prompt_1.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/math_word_problem_generation/Llama-2-70b-chat-hf/gemini-1.0-pro-001/baseline_errordetection_prompt_1.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/math_word_problem_generation/Llama-2-70b-chat-hf/gemini-1.0-pro-001/baseline_errordetection_prompt_2.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/math_word_problem_generation/Llama-2-70b-chat-hf/gemini-1.0-pro-001/baseline_errordetection_prompt_2.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/math_word_problem_generation/Llama-2-70b-chat-hf/gemini-1.0-pro-001/baseline_errordetection_prompt_3.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/math_word_problem_generation/Llama-2-70b-chat-hf/gemini-1.0-pro-001/baseline_errordetection_prompt_3.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/math_word_problem_generation/Llama-2-70b-chat-hf/gemini-1.0-pro-001/baseline_errordetection_prompt_4.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/math_word_problem_generation/Llama-2-70b-chat-hf/gemini-1.0-pro-001/baseline_errordetection_prompt_4.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/math_word_problem_generation/Llama-2-70b-chat-hf/gemma-7b-it/baseline_errordetection_prompt_1.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/math_word_problem_generation/Llama-2-70b-chat-hf/gemma-7b-it/baseline_errordetection_prompt_1.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/math_word_problem_generation/Llama-2-70b-chat-hf/gemma-7b-it/baseline_errordetection_prompt_2.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/math_word_problem_generation/Llama-2-70b-chat-hf/gemma-7b-it/baseline_errordetection_prompt_2.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/math_word_problem_generation/Llama-2-70b-chat-hf/gemma-7b-it/baseline_errordetection_prompt_3.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/math_word_problem_generation/Llama-2-70b-chat-hf/gemma-7b-it/baseline_errordetection_prompt_3.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/math_word_problem_generation/Llama-2-70b-chat-hf/gemma-7b-it/baseline_errordetection_prompt_4.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/math_word_problem_generation/Llama-2-70b-chat-hf/gemma-7b-it/baseline_errordetection_prompt_4.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/math_word_problem_generation/Llama-2-70b-chat-hf/gpt-3.5-turbo-0125/baseline_errordetection_prompt_1.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/math_word_problem_generation/Llama-2-70b-chat-hf/gpt-3.5-turbo-0125/baseline_errordetection_prompt_1.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/math_word_problem_generation/Llama-2-70b-chat-hf/gpt-3.5-turbo-0125/baseline_errordetection_prompt_2.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/math_word_problem_generation/Llama-2-70b-chat-hf/gpt-3.5-turbo-0125/baseline_errordetection_prompt_2.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/math_word_problem_generation/Llama-2-70b-chat-hf/gpt-3.5-turbo-0125/baseline_errordetection_prompt_3.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/math_word_problem_generation/Llama-2-70b-chat-hf/gpt-3.5-turbo-0125/baseline_errordetection_prompt_3.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/math_word_problem_generation/Llama-2-70b-chat-hf/gpt-3.5-turbo-0125/baseline_errordetection_prompt_4.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/math_word_problem_generation/Llama-2-70b-chat-hf/gpt-3.5-turbo-0125/baseline_errordetection_prompt_4.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/math_word_problem_generation/Llama-2-70b-chat-hf/gpt-4-0125-preview/baseline_errordetection_prompt_1.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/math_word_problem_generation/Llama-2-70b-chat-hf/gpt-4-0125-preview/baseline_errordetection_prompt_1.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/math_word_problem_generation/Llama-2-70b-chat-hf/gpt-4-0125-preview/baseline_errordetection_prompt_2.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/math_word_problem_generation/Llama-2-70b-chat-hf/gpt-4-0125-preview/baseline_errordetection_prompt_2.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/math_word_problem_generation/Llama-2-70b-chat-hf/gpt-4-0125-preview/baseline_errordetection_prompt_3.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/math_word_problem_generation/Llama-2-70b-chat-hf/gpt-4-0125-preview/baseline_errordetection_prompt_3.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/math_word_problem_generation/Llama-2-70b-chat-hf/gpt-4-0125-preview/baseline_errordetection_prompt_4.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/math_word_problem_generation/Llama-2-70b-chat-hf/gpt-4-0125-preview/baseline_errordetection_prompt_4.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/math_word_problem_generation/Llama-2-70b-chat-hf/gpt-4-0613/baseline_errordetection_prompt_1.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/math_word_problem_generation/Llama-2-70b-chat-hf/gpt-4-0613/baseline_errordetection_prompt_1.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/math_word_problem_generation/Llama-2-70b-chat-hf/gpt-4-0613/baseline_errordetection_prompt_2.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/math_word_problem_generation/Llama-2-70b-chat-hf/gpt-4-0613/baseline_errordetection_prompt_2.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/math_word_problem_generation/Llama-2-70b-chat-hf/gpt-4-0613/baseline_errordetection_prompt_3.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/math_word_problem_generation/Llama-2-70b-chat-hf/gpt-4-0613/baseline_errordetection_prompt_3.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/math_word_problem_generation/Llama-2-70b-chat-hf/gpt-4-0613/baseline_errordetection_prompt_4.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/math_word_problem_generation/Llama-2-70b-chat-hf/gpt-4-0613/baseline_errordetection_prompt_4.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/math_word_problem_generation/gpt-4-0613/Llama-2-13b-chat-hf/baseline_errordetection_prompt_1.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/math_word_problem_generation/gpt-4-0613/Llama-2-13b-chat-hf/baseline_errordetection_prompt_1.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/math_word_problem_generation/gpt-4-0613/Llama-2-13b-chat-hf/baseline_errordetection_prompt_2.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/math_word_problem_generation/gpt-4-0613/Llama-2-13b-chat-hf/baseline_errordetection_prompt_2.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/math_word_problem_generation/gpt-4-0613/Llama-2-13b-chat-hf/baseline_errordetection_prompt_3.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/math_word_problem_generation/gpt-4-0613/Llama-2-13b-chat-hf/baseline_errordetection_prompt_3.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/math_word_problem_generation/gpt-4-0613/Llama-2-13b-chat-hf/baseline_errordetection_prompt_4.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/math_word_problem_generation/gpt-4-0613/Llama-2-13b-chat-hf/baseline_errordetection_prompt_4.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/math_word_problem_generation/gpt-4-0613/Llama-2-70b-chat-hf/baseline_errordetection_prompt_1.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/math_word_problem_generation/gpt-4-0613/Llama-2-70b-chat-hf/baseline_errordetection_prompt_1.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/math_word_problem_generation/gpt-4-0613/Llama-2-70b-chat-hf/baseline_errordetection_prompt_2.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/math_word_problem_generation/gpt-4-0613/Llama-2-70b-chat-hf/baseline_errordetection_prompt_2.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/math_word_problem_generation/gpt-4-0613/Llama-2-70b-chat-hf/baseline_errordetection_prompt_3.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/math_word_problem_generation/gpt-4-0613/Llama-2-70b-chat-hf/baseline_errordetection_prompt_3.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/math_word_problem_generation/gpt-4-0613/Llama-2-70b-chat-hf/baseline_errordetection_prompt_4.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/math_word_problem_generation/gpt-4-0613/Llama-2-70b-chat-hf/baseline_errordetection_prompt_4.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/math_word_problem_generation/gpt-4-0613/Mistral-7B-Instruct-v0.1/baseline_errordetection_prompt_1.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/math_word_problem_generation/gpt-4-0613/Mistral-7B-Instruct-v0.1/baseline_errordetection_prompt_1.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/math_word_problem_generation/gpt-4-0613/Mistral-7B-Instruct-v0.1/baseline_errordetection_prompt_2.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/math_word_problem_generation/gpt-4-0613/Mistral-7B-Instruct-v0.1/baseline_errordetection_prompt_2.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/math_word_problem_generation/gpt-4-0613/Mistral-7B-Instruct-v0.1/baseline_errordetection_prompt_3.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/math_word_problem_generation/gpt-4-0613/Mistral-7B-Instruct-v0.1/baseline_errordetection_prompt_3.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/math_word_problem_generation/gpt-4-0613/Mistral-7B-Instruct-v0.1/baseline_errordetection_prompt_4.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/math_word_problem_generation/gpt-4-0613/Mistral-7B-Instruct-v0.1/baseline_errordetection_prompt_4.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/math_word_problem_generation/gpt-4-0613/Mixtral-8x7B-Instruct-v0.1/baseline_errordetection_prompt_1.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/math_word_problem_generation/gpt-4-0613/Mixtral-8x7B-Instruct-v0.1/baseline_errordetection_prompt_1.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/math_word_problem_generation/gpt-4-0613/Mixtral-8x7B-Instruct-v0.1/baseline_errordetection_prompt_2.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/math_word_problem_generation/gpt-4-0613/Mixtral-8x7B-Instruct-v0.1/baseline_errordetection_prompt_2.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/math_word_problem_generation/gpt-4-0613/Mixtral-8x7B-Instruct-v0.1/baseline_errordetection_prompt_3.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/math_word_problem_generation/gpt-4-0613/Mixtral-8x7B-Instruct-v0.1/baseline_errordetection_prompt_3.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/math_word_problem_generation/gpt-4-0613/Mixtral-8x7B-Instruct-v0.1/baseline_errordetection_prompt_4.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/math_word_problem_generation/gpt-4-0613/Mixtral-8x7B-Instruct-v0.1/baseline_errordetection_prompt_4.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/math_word_problem_generation/gpt-4-0613/Qwen1.5-14B-Chat/baseline_errordetection_prompt_1.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/math_word_problem_generation/gpt-4-0613/Qwen1.5-14B-Chat/baseline_errordetection_prompt_1.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/math_word_problem_generation/gpt-4-0613/Qwen1.5-14B-Chat/baseline_errordetection_prompt_2.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/math_word_problem_generation/gpt-4-0613/Qwen1.5-14B-Chat/baseline_errordetection_prompt_2.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/math_word_problem_generation/gpt-4-0613/Qwen1.5-14B-Chat/baseline_errordetection_prompt_3.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/math_word_problem_generation/gpt-4-0613/Qwen1.5-14B-Chat/baseline_errordetection_prompt_3.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/math_word_problem_generation/gpt-4-0613/Qwen1.5-14B-Chat/baseline_errordetection_prompt_4.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/math_word_problem_generation/gpt-4-0613/Qwen1.5-14B-Chat/baseline_errordetection_prompt_4.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/math_word_problem_generation/gpt-4-0613/Qwen1.5-72B-Chat/baseline_errordetection_prompt_1.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/math_word_problem_generation/gpt-4-0613/Qwen1.5-72B-Chat/baseline_errordetection_prompt_1.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/math_word_problem_generation/gpt-4-0613/Qwen1.5-72B-Chat/baseline_errordetection_prompt_2.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/math_word_problem_generation/gpt-4-0613/Qwen1.5-72B-Chat/baseline_errordetection_prompt_2.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/math_word_problem_generation/gpt-4-0613/Qwen1.5-72B-Chat/baseline_errordetection_prompt_3.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/math_word_problem_generation/gpt-4-0613/Qwen1.5-72B-Chat/baseline_errordetection_prompt_3.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/math_word_problem_generation/gpt-4-0613/Qwen1.5-72B-Chat/baseline_errordetection_prompt_4.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/math_word_problem_generation/gpt-4-0613/Qwen1.5-72B-Chat/baseline_errordetection_prompt_4.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/math_word_problem_generation/gpt-4-0613/claude-3-opus-20240229/baseline_errordetection_prompt_1.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/math_word_problem_generation/gpt-4-0613/claude-3-opus-20240229/baseline_errordetection_prompt_1.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/math_word_problem_generation/gpt-4-0613/claude-3-opus-20240229/baseline_errordetection_prompt_2.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/math_word_problem_generation/gpt-4-0613/claude-3-opus-20240229/baseline_errordetection_prompt_2.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/math_word_problem_generation/gpt-4-0613/claude-3-opus-20240229/baseline_errordetection_prompt_3.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/math_word_problem_generation/gpt-4-0613/claude-3-opus-20240229/baseline_errordetection_prompt_3.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/math_word_problem_generation/gpt-4-0613/claude-3-opus-20240229/baseline_errordetection_prompt_4.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/math_word_problem_generation/gpt-4-0613/claude-3-opus-20240229/baseline_errordetection_prompt_4.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/math_word_problem_generation/gpt-4-0613/gemini-1.0-pro-001/baseline_errordetection_prompt_1.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/math_word_problem_generation/gpt-4-0613/gemini-1.0-pro-001/baseline_errordetection_prompt_1.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/math_word_problem_generation/gpt-4-0613/gemini-1.0-pro-001/baseline_errordetection_prompt_2.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/math_word_problem_generation/gpt-4-0613/gemini-1.0-pro-001/baseline_errordetection_prompt_2.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/math_word_problem_generation/gpt-4-0613/gemini-1.0-pro-001/baseline_errordetection_prompt_3.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/math_word_problem_generation/gpt-4-0613/gemini-1.0-pro-001/baseline_errordetection_prompt_3.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/math_word_problem_generation/gpt-4-0613/gemini-1.0-pro-001/baseline_errordetection_prompt_4.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/math_word_problem_generation/gpt-4-0613/gemini-1.0-pro-001/baseline_errordetection_prompt_4.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/math_word_problem_generation/gpt-4-0613/gemma-7b-it/baseline_errordetection_prompt_1.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/math_word_problem_generation/gpt-4-0613/gemma-7b-it/baseline_errordetection_prompt_1.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/math_word_problem_generation/gpt-4-0613/gemma-7b-it/baseline_errordetection_prompt_2.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/math_word_problem_generation/gpt-4-0613/gemma-7b-it/baseline_errordetection_prompt_2.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/math_word_problem_generation/gpt-4-0613/gemma-7b-it/baseline_errordetection_prompt_3.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/math_word_problem_generation/gpt-4-0613/gemma-7b-it/baseline_errordetection_prompt_3.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/math_word_problem_generation/gpt-4-0613/gemma-7b-it/baseline_errordetection_prompt_4.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/math_word_problem_generation/gpt-4-0613/gemma-7b-it/baseline_errordetection_prompt_4.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/math_word_problem_generation/gpt-4-0613/gpt-3.5-turbo-0125/baseline_errordetection_prompt_1.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/math_word_problem_generation/gpt-4-0613/gpt-3.5-turbo-0125/baseline_errordetection_prompt_1.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/math_word_problem_generation/gpt-4-0613/gpt-3.5-turbo-0125/baseline_errordetection_prompt_2.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/math_word_problem_generation/gpt-4-0613/gpt-3.5-turbo-0125/baseline_errordetection_prompt_2.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/math_word_problem_generation/gpt-4-0613/gpt-3.5-turbo-0125/baseline_errordetection_prompt_3.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/math_word_problem_generation/gpt-4-0613/gpt-3.5-turbo-0125/baseline_errordetection_prompt_3.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/math_word_problem_generation/gpt-4-0613/gpt-3.5-turbo-0125/baseline_errordetection_prompt_4.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/math_word_problem_generation/gpt-4-0613/gpt-3.5-turbo-0125/baseline_errordetection_prompt_4.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/math_word_problem_generation/gpt-4-0613/gpt-4-0125-preview/baseline_errordetection_prompt_1.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/math_word_problem_generation/gpt-4-0613/gpt-4-0125-preview/baseline_errordetection_prompt_1.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/math_word_problem_generation/gpt-4-0613/gpt-4-0125-preview/baseline_errordetection_prompt_2.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/math_word_problem_generation/gpt-4-0613/gpt-4-0125-preview/baseline_errordetection_prompt_2.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/math_word_problem_generation/gpt-4-0613/gpt-4-0125-preview/baseline_errordetection_prompt_3.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/math_word_problem_generation/gpt-4-0613/gpt-4-0125-preview/baseline_errordetection_prompt_3.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/math_word_problem_generation/gpt-4-0613/gpt-4-0125-preview/baseline_errordetection_prompt_4.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/math_word_problem_generation/gpt-4-0613/gpt-4-0125-preview/baseline_errordetection_prompt_4.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/math_word_problem_generation/gpt-4-0613/gpt-4-0613/baseline_errordetection_prompt_1.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/math_word_problem_generation/gpt-4-0613/gpt-4-0613/baseline_errordetection_prompt_1.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/math_word_problem_generation/gpt-4-0613/gpt-4-0613/baseline_errordetection_prompt_2.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/math_word_problem_generation/gpt-4-0613/gpt-4-0613/baseline_errordetection_prompt_2.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/math_word_problem_generation/gpt-4-0613/gpt-4-0613/baseline_errordetection_prompt_3.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/math_word_problem_generation/gpt-4-0613/gpt-4-0613/baseline_errordetection_prompt_3.jsonl -------------------------------------------------------------------------------- /error_detection_outputs/simple_prompt_baseline/math_word_problem_generation/gpt-4-0613/gpt-4-0613/baseline_errordetection_prompt_4.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_outputs/simple_prompt_baseline/math_word_problem_generation/gpt-4-0613/gpt-4-0613/baseline_errordetection_prompt_4.jsonl -------------------------------------------------------------------------------- /error_detection_performance/performance/advanced_prompt_baseline/category_performance.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_performance/performance/advanced_prompt_baseline/category_performance.json -------------------------------------------------------------------------------- /error_detection_performance/performance/advanced_prompt_baseline/performance.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_performance/performance/advanced_prompt_baseline/performance.json -------------------------------------------------------------------------------- /error_detection_performance/performance/easy_baseline/performance.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_performance/performance/easy_baseline/performance.json -------------------------------------------------------------------------------- /error_detection_performance/performance/human_performance.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_performance/performance/human_performance.json -------------------------------------------------------------------------------- /error_detection_performance/performance/majority_vote/category_performance.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_performance/performance/majority_vote/category_performance.json -------------------------------------------------------------------------------- /error_detection_performance/performance/majority_vote/performance.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_performance/performance/majority_vote/performance.json -------------------------------------------------------------------------------- /error_detection_performance/performance/self_consistency/category_performance.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_performance/performance/self_consistency/category_performance.json -------------------------------------------------------------------------------- /error_detection_performance/performance/self_consistency/performance.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_performance/performance/self_consistency/performance.json -------------------------------------------------------------------------------- /error_detection_performance/performance/simple_prompt_baseline/category_performance.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_performance/performance/simple_prompt_baseline/category_performance.json -------------------------------------------------------------------------------- /error_detection_performance/performance/simple_prompt_baseline/performance.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_performance/performance/simple_prompt_baseline/performance.json -------------------------------------------------------------------------------- /error_detection_performance/table/category_results_tables/simple_prompt_baseline/recall/recall_Llama-2-13b-chat-hf.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_performance/table/category_results_tables/simple_prompt_baseline/recall/recall_Llama-2-13b-chat-hf.txt -------------------------------------------------------------------------------- /error_detection_performance/table/category_results_tables/simple_prompt_baseline/recall/recall_Llama-2-13b-chat-hf_single_space.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_performance/table/category_results_tables/simple_prompt_baseline/recall/recall_Llama-2-13b-chat-hf_single_space.txt -------------------------------------------------------------------------------- /error_detection_performance/table/category_results_tables/simple_prompt_baseline/recall/recall_Llama-2-70b-chat-hf.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_performance/table/category_results_tables/simple_prompt_baseline/recall/recall_Llama-2-70b-chat-hf.txt -------------------------------------------------------------------------------- /error_detection_performance/table/category_results_tables/simple_prompt_baseline/recall/recall_Llama-2-70b-chat-hf_single_space.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_performance/table/category_results_tables/simple_prompt_baseline/recall/recall_Llama-2-70b-chat-hf_single_space.txt -------------------------------------------------------------------------------- /error_detection_performance/table/category_results_tables/simple_prompt_baseline/recall/recall_Mistral-7B-Instruct-v0.1.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_performance/table/category_results_tables/simple_prompt_baseline/recall/recall_Mistral-7B-Instruct-v0.1.txt -------------------------------------------------------------------------------- /error_detection_performance/table/category_results_tables/simple_prompt_baseline/recall/recall_Mistral-7B-Instruct-v0.1_single_space.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_performance/table/category_results_tables/simple_prompt_baseline/recall/recall_Mistral-7B-Instruct-v0.1_single_space.txt -------------------------------------------------------------------------------- /error_detection_performance/table/category_results_tables/simple_prompt_baseline/recall/recall_Mixtral-8x7B-Instruct-v0.1.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_performance/table/category_results_tables/simple_prompt_baseline/recall/recall_Mixtral-8x7B-Instruct-v0.1.txt -------------------------------------------------------------------------------- /error_detection_performance/table/category_results_tables/simple_prompt_baseline/recall/recall_Mixtral-8x7B-Instruct-v0.1_single_space.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_performance/table/category_results_tables/simple_prompt_baseline/recall/recall_Mixtral-8x7B-Instruct-v0.1_single_space.txt -------------------------------------------------------------------------------- /error_detection_performance/table/category_results_tables/simple_prompt_baseline/recall/recall_Qwen1.5-14B-Chat.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_performance/table/category_results_tables/simple_prompt_baseline/recall/recall_Qwen1.5-14B-Chat.txt -------------------------------------------------------------------------------- /error_detection_performance/table/category_results_tables/simple_prompt_baseline/recall/recall_Qwen1.5-14B-Chat_single_space.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_performance/table/category_results_tables/simple_prompt_baseline/recall/recall_Qwen1.5-14B-Chat_single_space.txt -------------------------------------------------------------------------------- /error_detection_performance/table/category_results_tables/simple_prompt_baseline/recall/recall_Qwen1.5-72B-Chat.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_performance/table/category_results_tables/simple_prompt_baseline/recall/recall_Qwen1.5-72B-Chat.txt -------------------------------------------------------------------------------- /error_detection_performance/table/category_results_tables/simple_prompt_baseline/recall/recall_Qwen1.5-72B-Chat_single_space.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_performance/table/category_results_tables/simple_prompt_baseline/recall/recall_Qwen1.5-72B-Chat_single_space.txt -------------------------------------------------------------------------------- /error_detection_performance/table/category_results_tables/simple_prompt_baseline/recall/recall_average.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_performance/table/category_results_tables/simple_prompt_baseline/recall/recall_average.txt -------------------------------------------------------------------------------- /error_detection_performance/table/category_results_tables/simple_prompt_baseline/recall/recall_average_single_space.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_performance/table/category_results_tables/simple_prompt_baseline/recall/recall_average_single_space.txt -------------------------------------------------------------------------------- /error_detection_performance/table/category_results_tables/simple_prompt_baseline/recall/recall_claude-3-opus-20240229.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_performance/table/category_results_tables/simple_prompt_baseline/recall/recall_claude-3-opus-20240229.txt -------------------------------------------------------------------------------- /error_detection_performance/table/category_results_tables/simple_prompt_baseline/recall/recall_claude-3-opus-20240229_single_space.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_performance/table/category_results_tables/simple_prompt_baseline/recall/recall_claude-3-opus-20240229_single_space.txt -------------------------------------------------------------------------------- /error_detection_performance/table/category_results_tables/simple_prompt_baseline/recall/recall_gemini-1.0-pro-001.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_performance/table/category_results_tables/simple_prompt_baseline/recall/recall_gemini-1.0-pro-001.txt -------------------------------------------------------------------------------- /error_detection_performance/table/category_results_tables/simple_prompt_baseline/recall/recall_gemini-1.0-pro-001_single_space.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_performance/table/category_results_tables/simple_prompt_baseline/recall/recall_gemini-1.0-pro-001_single_space.txt -------------------------------------------------------------------------------- /error_detection_performance/table/category_results_tables/simple_prompt_baseline/recall/recall_gemma-7b-it.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_performance/table/category_results_tables/simple_prompt_baseline/recall/recall_gemma-7b-it.txt -------------------------------------------------------------------------------- /error_detection_performance/table/category_results_tables/simple_prompt_baseline/recall/recall_gemma-7b-it_single_space.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_performance/table/category_results_tables/simple_prompt_baseline/recall/recall_gemma-7b-it_single_space.txt -------------------------------------------------------------------------------- /error_detection_performance/table/category_results_tables/simple_prompt_baseline/recall/recall_gpt-3.5-turbo-0125.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_performance/table/category_results_tables/simple_prompt_baseline/recall/recall_gpt-3.5-turbo-0125.txt -------------------------------------------------------------------------------- /error_detection_performance/table/category_results_tables/simple_prompt_baseline/recall/recall_gpt-3.5-turbo-0125_single_space.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_performance/table/category_results_tables/simple_prompt_baseline/recall/recall_gpt-3.5-turbo-0125_single_space.txt -------------------------------------------------------------------------------- /error_detection_performance/table/category_results_tables/simple_prompt_baseline/recall/recall_gpt-4-0125-preview.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_performance/table/category_results_tables/simple_prompt_baseline/recall/recall_gpt-4-0125-preview.txt -------------------------------------------------------------------------------- /error_detection_performance/table/category_results_tables/simple_prompt_baseline/recall/recall_gpt-4-0125-preview_single_space.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_performance/table/category_results_tables/simple_prompt_baseline/recall/recall_gpt-4-0125-preview_single_space.txt -------------------------------------------------------------------------------- /error_detection_performance/table/category_results_tables/simple_prompt_baseline/recall/recall_gpt-4-0613.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_performance/table/category_results_tables/simple_prompt_baseline/recall/recall_gpt-4-0613.txt -------------------------------------------------------------------------------- /error_detection_performance/table/category_results_tables/simple_prompt_baseline/recall/recall_gpt-4-0613_single_space.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_performance/table/category_results_tables/simple_prompt_baseline/recall/recall_gpt-4-0613_single_space.txt -------------------------------------------------------------------------------- /error_detection_performance/table/improvement_tables/advanced_prompt_baseline/accuracy/accuracy_average_table.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_performance/table/improvement_tables/advanced_prompt_baseline/accuracy/accuracy_average_table.txt -------------------------------------------------------------------------------- /error_detection_performance/table/improvement_tables/advanced_prompt_baseline/accuracy/accuracy_average_table_single_space.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_performance/table/improvement_tables/advanced_prompt_baseline/accuracy/accuracy_average_table_single_space.txt -------------------------------------------------------------------------------- /error_detection_performance/table/improvement_tables/advanced_prompt_baseline/f1/f1_average_table.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_performance/table/improvement_tables/advanced_prompt_baseline/f1/f1_average_table.txt -------------------------------------------------------------------------------- /error_detection_performance/table/improvement_tables/advanced_prompt_baseline/f1/f1_average_table_single_space.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_performance/table/improvement_tables/advanced_prompt_baseline/f1/f1_average_table_single_space.txt -------------------------------------------------------------------------------- /error_detection_performance/table/improvement_tables/advanced_prompt_baseline/precision/precision_average_table.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_performance/table/improvement_tables/advanced_prompt_baseline/precision/precision_average_table.txt -------------------------------------------------------------------------------- /error_detection_performance/table/improvement_tables/advanced_prompt_baseline/precision/precision_average_table_single_space.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_performance/table/improvement_tables/advanced_prompt_baseline/precision/precision_average_table_single_space.txt -------------------------------------------------------------------------------- /error_detection_performance/table/improvement_tables/advanced_prompt_baseline/recall/recall_average_table.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_performance/table/improvement_tables/advanced_prompt_baseline/recall/recall_average_table.txt -------------------------------------------------------------------------------- /error_detection_performance/table/improvement_tables/advanced_prompt_baseline/recall/recall_average_table_single_space.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_performance/table/improvement_tables/advanced_prompt_baseline/recall/recall_average_table_single_space.txt -------------------------------------------------------------------------------- /error_detection_performance/table/majority_vote_tables/accuracy/accuracy_majority_vote_table.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_performance/table/majority_vote_tables/accuracy/accuracy_majority_vote_table.txt -------------------------------------------------------------------------------- /error_detection_performance/table/majority_vote_tables/accuracy/accuracy_majority_vote_table_single_space.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_performance/table/majority_vote_tables/accuracy/accuracy_majority_vote_table_single_space.txt -------------------------------------------------------------------------------- /error_detection_performance/table/majority_vote_tables/f1/f1_majority_vote_table.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_performance/table/majority_vote_tables/f1/f1_majority_vote_table.txt -------------------------------------------------------------------------------- /error_detection_performance/table/majority_vote_tables/f1/f1_majority_vote_table_single_space.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_performance/table/majority_vote_tables/f1/f1_majority_vote_table_single_space.txt -------------------------------------------------------------------------------- /error_detection_performance/table/majority_vote_tables/precision/precision_majority_vote_table.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_performance/table/majority_vote_tables/precision/precision_majority_vote_table.txt -------------------------------------------------------------------------------- /error_detection_performance/table/majority_vote_tables/precision/precision_majority_vote_table_single_space.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_performance/table/majority_vote_tables/precision/precision_majority_vote_table_single_space.txt -------------------------------------------------------------------------------- /error_detection_performance/table/majority_vote_tables/recall/recall_majority_vote_table.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_performance/table/majority_vote_tables/recall/recall_majority_vote_table.txt -------------------------------------------------------------------------------- /error_detection_performance/table/majority_vote_tables/recall/recall_majority_vote_table_single_space.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_performance/table/majority_vote_tables/recall/recall_majority_vote_table_single_space.txt -------------------------------------------------------------------------------- /error_detection_performance/table/simple_prompt_baseline/accuracy/accuracy_average_table.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_performance/table/simple_prompt_baseline/accuracy/accuracy_average_table.txt -------------------------------------------------------------------------------- /error_detection_performance/table/simple_prompt_baseline/accuracy/accuracy_average_table_single_space.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_performance/table/simple_prompt_baseline/accuracy/accuracy_average_table_single_space.txt -------------------------------------------------------------------------------- /error_detection_performance/table/simple_prompt_baseline/accuracy/accuracy_baseline_errordetection_prompt_1_table.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_performance/table/simple_prompt_baseline/accuracy/accuracy_baseline_errordetection_prompt_1_table.txt -------------------------------------------------------------------------------- /error_detection_performance/table/simple_prompt_baseline/accuracy/accuracy_baseline_errordetection_prompt_1_table_single_space.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_performance/table/simple_prompt_baseline/accuracy/accuracy_baseline_errordetection_prompt_1_table_single_space.txt -------------------------------------------------------------------------------- /error_detection_performance/table/simple_prompt_baseline/accuracy/accuracy_baseline_errordetection_prompt_2_table.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_performance/table/simple_prompt_baseline/accuracy/accuracy_baseline_errordetection_prompt_2_table.txt -------------------------------------------------------------------------------- /error_detection_performance/table/simple_prompt_baseline/accuracy/accuracy_baseline_errordetection_prompt_2_table_single_space.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_performance/table/simple_prompt_baseline/accuracy/accuracy_baseline_errordetection_prompt_2_table_single_space.txt -------------------------------------------------------------------------------- /error_detection_performance/table/simple_prompt_baseline/accuracy/accuracy_baseline_errordetection_prompt_3_table.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_performance/table/simple_prompt_baseline/accuracy/accuracy_baseline_errordetection_prompt_3_table.txt -------------------------------------------------------------------------------- /error_detection_performance/table/simple_prompt_baseline/accuracy/accuracy_baseline_errordetection_prompt_3_table_single_space.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_performance/table/simple_prompt_baseline/accuracy/accuracy_baseline_errordetection_prompt_3_table_single_space.txt -------------------------------------------------------------------------------- /error_detection_performance/table/simple_prompt_baseline/accuracy/accuracy_baseline_errordetection_prompt_4_table.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_performance/table/simple_prompt_baseline/accuracy/accuracy_baseline_errordetection_prompt_4_table.txt -------------------------------------------------------------------------------- /error_detection_performance/table/simple_prompt_baseline/accuracy/accuracy_baseline_errordetection_prompt_4_table_single_space.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_performance/table/simple_prompt_baseline/accuracy/accuracy_baseline_errordetection_prompt_4_table_single_space.txt -------------------------------------------------------------------------------- /error_detection_performance/table/simple_prompt_baseline/f1/f1_average_table.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_performance/table/simple_prompt_baseline/f1/f1_average_table.txt -------------------------------------------------------------------------------- /error_detection_performance/table/simple_prompt_baseline/f1/f1_average_table_single_space.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_performance/table/simple_prompt_baseline/f1/f1_average_table_single_space.txt -------------------------------------------------------------------------------- /error_detection_performance/table/simple_prompt_baseline/f1/f1_baseline_errordetection_prompt_1_table.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_performance/table/simple_prompt_baseline/f1/f1_baseline_errordetection_prompt_1_table.txt -------------------------------------------------------------------------------- /error_detection_performance/table/simple_prompt_baseline/f1/f1_baseline_errordetection_prompt_1_table_single_space.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_performance/table/simple_prompt_baseline/f1/f1_baseline_errordetection_prompt_1_table_single_space.txt -------------------------------------------------------------------------------- /error_detection_performance/table/simple_prompt_baseline/f1/f1_baseline_errordetection_prompt_2_table.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_performance/table/simple_prompt_baseline/f1/f1_baseline_errordetection_prompt_2_table.txt -------------------------------------------------------------------------------- /error_detection_performance/table/simple_prompt_baseline/f1/f1_baseline_errordetection_prompt_2_table_single_space.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_performance/table/simple_prompt_baseline/f1/f1_baseline_errordetection_prompt_2_table_single_space.txt -------------------------------------------------------------------------------- /error_detection_performance/table/simple_prompt_baseline/f1/f1_baseline_errordetection_prompt_3_table.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_performance/table/simple_prompt_baseline/f1/f1_baseline_errordetection_prompt_3_table.txt -------------------------------------------------------------------------------- /error_detection_performance/table/simple_prompt_baseline/f1/f1_baseline_errordetection_prompt_3_table_single_space.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_performance/table/simple_prompt_baseline/f1/f1_baseline_errordetection_prompt_3_table_single_space.txt -------------------------------------------------------------------------------- /error_detection_performance/table/simple_prompt_baseline/f1/f1_baseline_errordetection_prompt_4_table.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_performance/table/simple_prompt_baseline/f1/f1_baseline_errordetection_prompt_4_table.txt -------------------------------------------------------------------------------- /error_detection_performance/table/simple_prompt_baseline/f1/f1_baseline_errordetection_prompt_4_table_single_space.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_performance/table/simple_prompt_baseline/f1/f1_baseline_errordetection_prompt_4_table_single_space.txt -------------------------------------------------------------------------------- /error_detection_performance/table/simple_prompt_baseline/precision/precision_average_table.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_performance/table/simple_prompt_baseline/precision/precision_average_table.txt -------------------------------------------------------------------------------- /error_detection_performance/table/simple_prompt_baseline/precision/precision_average_table_single_space.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_performance/table/simple_prompt_baseline/precision/precision_average_table_single_space.txt -------------------------------------------------------------------------------- /error_detection_performance/table/simple_prompt_baseline/precision/precision_baseline_errordetection_prompt_1_table.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_performance/table/simple_prompt_baseline/precision/precision_baseline_errordetection_prompt_1_table.txt -------------------------------------------------------------------------------- /error_detection_performance/table/simple_prompt_baseline/precision/precision_baseline_errordetection_prompt_1_table_single_space.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_performance/table/simple_prompt_baseline/precision/precision_baseline_errordetection_prompt_1_table_single_space.txt -------------------------------------------------------------------------------- /error_detection_performance/table/simple_prompt_baseline/precision/precision_baseline_errordetection_prompt_2_table.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_performance/table/simple_prompt_baseline/precision/precision_baseline_errordetection_prompt_2_table.txt -------------------------------------------------------------------------------- /error_detection_performance/table/simple_prompt_baseline/precision/precision_baseline_errordetection_prompt_2_table_single_space.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_performance/table/simple_prompt_baseline/precision/precision_baseline_errordetection_prompt_2_table_single_space.txt -------------------------------------------------------------------------------- /error_detection_performance/table/simple_prompt_baseline/precision/precision_baseline_errordetection_prompt_3_table.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_performance/table/simple_prompt_baseline/precision/precision_baseline_errordetection_prompt_3_table.txt -------------------------------------------------------------------------------- /error_detection_performance/table/simple_prompt_baseline/precision/precision_baseline_errordetection_prompt_3_table_single_space.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_performance/table/simple_prompt_baseline/precision/precision_baseline_errordetection_prompt_3_table_single_space.txt -------------------------------------------------------------------------------- /error_detection_performance/table/simple_prompt_baseline/precision/precision_baseline_errordetection_prompt_4_table.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_performance/table/simple_prompt_baseline/precision/precision_baseline_errordetection_prompt_4_table.txt -------------------------------------------------------------------------------- /error_detection_performance/table/simple_prompt_baseline/precision/precision_baseline_errordetection_prompt_4_table_single_space.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_performance/table/simple_prompt_baseline/precision/precision_baseline_errordetection_prompt_4_table_single_space.txt -------------------------------------------------------------------------------- /error_detection_performance/table/simple_prompt_baseline/recall/recall_average_table.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_performance/table/simple_prompt_baseline/recall/recall_average_table.txt -------------------------------------------------------------------------------- /error_detection_performance/table/simple_prompt_baseline/recall/recall_average_table_single_space.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_performance/table/simple_prompt_baseline/recall/recall_average_table_single_space.txt -------------------------------------------------------------------------------- /error_detection_performance/table/simple_prompt_baseline/recall/recall_baseline_errordetection_prompt_1_table.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_performance/table/simple_prompt_baseline/recall/recall_baseline_errordetection_prompt_1_table.txt -------------------------------------------------------------------------------- /error_detection_performance/table/simple_prompt_baseline/recall/recall_baseline_errordetection_prompt_1_table_single_space.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_performance/table/simple_prompt_baseline/recall/recall_baseline_errordetection_prompt_1_table_single_space.txt -------------------------------------------------------------------------------- /error_detection_performance/table/simple_prompt_baseline/recall/recall_baseline_errordetection_prompt_2_table.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_performance/table/simple_prompt_baseline/recall/recall_baseline_errordetection_prompt_2_table.txt -------------------------------------------------------------------------------- /error_detection_performance/table/simple_prompt_baseline/recall/recall_baseline_errordetection_prompt_2_table_single_space.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_performance/table/simple_prompt_baseline/recall/recall_baseline_errordetection_prompt_2_table_single_space.txt -------------------------------------------------------------------------------- /error_detection_performance/table/simple_prompt_baseline/recall/recall_baseline_errordetection_prompt_3_table.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_performance/table/simple_prompt_baseline/recall/recall_baseline_errordetection_prompt_3_table.txt -------------------------------------------------------------------------------- /error_detection_performance/table/simple_prompt_baseline/recall/recall_baseline_errordetection_prompt_3_table_single_space.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_performance/table/simple_prompt_baseline/recall/recall_baseline_errordetection_prompt_3_table_single_space.txt -------------------------------------------------------------------------------- /error_detection_performance/table/simple_prompt_baseline/recall/recall_baseline_errordetection_prompt_4_table.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_performance/table/simple_prompt_baseline/recall/recall_baseline_errordetection_prompt_4_table.txt -------------------------------------------------------------------------------- /error_detection_performance/table/simple_prompt_baseline/recall/recall_baseline_errordetection_prompt_4_table_single_space.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/error_detection_performance/table/simple_prompt_baseline/recall/recall_baseline_errordetection_prompt_4_table_single_space.txt -------------------------------------------------------------------------------- /readme_figures/realmistake_dataexample.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/readme_figures/realmistake_dataexample.png -------------------------------------------------------------------------------- /readme_figures/realmistake_stats.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/readme_figures/realmistake_stats.png -------------------------------------------------------------------------------- /sh/get_dataset_stat.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/sh/get_dataset_stat.sh -------------------------------------------------------------------------------- /sh/get_performance.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/sh/get_performance.sh -------------------------------------------------------------------------------- /src/baseline/prompt.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/src/baseline/prompt.py -------------------------------------------------------------------------------- /src/config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/src/config.py -------------------------------------------------------------------------------- /src/error_detection_analysis/analyze_self_consistency.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/src/error_detection_analysis/analyze_self_consistency.py -------------------------------------------------------------------------------- /src/error_detection_analysis/bias_analysis.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/src/error_detection_analysis/bias_analysis.py -------------------------------------------------------------------------------- /src/error_detection_analysis/calculate_performance.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/src/error_detection_analysis/calculate_performance.py -------------------------------------------------------------------------------- /src/error_detection_analysis/compare_to_other_tasks.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/src/error_detection_analysis/compare_to_other_tasks.py -------------------------------------------------------------------------------- /src/error_detection_analysis/generate_improvement_tables.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/src/error_detection_analysis/generate_improvement_tables.py -------------------------------------------------------------------------------- /src/error_detection_analysis/generate_majority_vote_tables.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/src/error_detection_analysis/generate_majority_vote_tables.py -------------------------------------------------------------------------------- /src/error_detection_analysis/generate_manual_analysis_figure.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/src/error_detection_analysis/generate_manual_analysis_figure.py -------------------------------------------------------------------------------- /src/error_detection_analysis/generate_performance_tables.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/src/error_detection_analysis/generate_performance_tables.py -------------------------------------------------------------------------------- /src/get_dataset_stats/easy_baseline.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/src/get_dataset_stats/easy_baseline.py -------------------------------------------------------------------------------- /src/get_dataset_stats/get_dataset_stats.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/src/get_dataset_stats/get_dataset_stats.py -------------------------------------------------------------------------------- /src/path.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psunlpgroup/ReaLMistake/HEAD/src/path.py --------------------------------------------------------------------------------