├── .amltconfig ├── .amltignore ├── .gitignore ├── __init__.py ├── docs └── overview.png ├── experiments ├── 00_download_dataset.py ├── 01_calc_resp_decomp.py ├── 02_fit_encoding.py ├── 03_finetune.py └── 04_fit_single_question.py ├── mteb ├── .gitattributes ├── .gitignore ├── fit_mteb.py ├── fit_mteb_pytorch.ipynb ├── fit_mteb_pytorch.py ├── pip-freeze ├── question_embedder.py ├── question_embedder_oai.py ├── questions │ └── msmarco │ │ ├── convert_query_text.sh │ │ ├── gen_questions.py │ │ ├── msmarco-beir.py │ │ ├── msmarco-hf.py │ │ ├── process_output.py │ │ └── tmp │ │ ├── tmp.py │ │ └── tmp2.py └── scripts │ ├── encode │ ├── correlation-question-filter.py │ ├── embed_dev.sh │ ├── encode.py │ ├── encode_QAembedder.py │ ├── greedy_search.py │ ├── greedy_search_cleaned.py │ └── score.py │ ├── extract_metadata.py │ ├── merge_cqadupstack.py │ ├── mteb_meta.py │ ├── run_mteb_english.py │ └── run_mteb_english_tfidf_bow.py ├── neuro1 ├── __init__.py ├── config.py ├── data │ ├── __init__.py │ ├── data_sequence.py │ ├── interp_data.py │ ├── npp.py │ ├── response_utils.py │ ├── semantic_model.py │ ├── story_names.py │ ├── textgrid.py │ ├── utils.py │ └── utils_ds.py ├── encoding │ ├── __init__.py │ ├── eval.py │ ├── mlp.py │ └── ridge.py └── features │ ├── __init__.py │ ├── feature_spaces.py │ ├── feature_utils.py │ ├── qa_embedder.py │ ├── qa_questions.py │ ├── questions │ ├── __init__.py │ ├── all_questions.json │ ├── base_questions.json │ ├── qa_questions_base.py │ ├── qa_questions_data_boost.py │ ├── qa_questions_llama_boost.py │ └── v3_boostexamples.json │ └── stim_utils.py ├── notebooks ├── 01_encoding.ipynb ├── 02_flatmaps_diffs.py ├── 02_flatmaps_per_question.py ├── 03_error_examples.ipynb ├── 04_explore_qs.ipynb ├── 05_explore_downsampling.ipynb ├── 06_emb.ipynb ├── 07_sparsity.ipynb ├── analyze_helper.py └── viz.py ├── readme.md ├── requirements.txt ├── results └── results_aggregated_mini.pkl ├── scripts ├── .amltconfig ├── 01_sub_encoding.py ├── 02_sub_linear.py ├── d3 │ ├── 2024-05-21 D3 Averages.ipynb │ ├── 2024-05-22 D3 Averages.ipynb │ ├── d3.py │ ├── d3_processed │ │ ├── d3_0.csv │ │ ├── d3_1.csv │ │ ├── d3_10.csv │ │ ├── d3_11.csv │ │ ├── d3_12.csv │ │ ├── d3_13.csv │ │ ├── d3_14.csv │ │ ├── d3_15.csv │ │ ├── d3_16.csv │ │ ├── d3_17.csv │ │ ├── d3_18.csv │ │ ├── d3_19.csv │ │ ├── d3_2.csv │ │ ├── d3_20.csv │ │ ├── d3_21.csv │ │ ├── d3_22.csv │ │ ├── d3_23.csv │ │ ├── d3_24.csv │ │ ├── d3_25.csv │ │ ├── d3_26.csv │ │ ├── d3_27.csv │ │ ├── d3_28.csv │ │ ├── d3_29.csv │ │ ├── d3_3.csv │ │ ├── d3_30.csv │ │ ├── d3_31.csv │ │ ├── d3_32.csv │ │ ├── d3_33.csv │ │ ├── d3_34.csv │ │ ├── d3_35.csv │ │ ├── d3_36.csv │ │ ├── d3_37.csv │ │ ├── d3_38.csv │ │ ├── d3_39.csv │ │ ├── d3_4.csv │ │ ├── d3_40.csv │ │ ├── d3_41.csv │ │ ├── d3_42.csv │ │ ├── d3_43.csv │ │ ├── d3_44.csv │ │ ├── d3_45.csv │ │ ├── d3_46.csv │ │ ├── d3_47.csv │ │ ├── d3_48.csv │ │ ├── d3_49.csv │ │ ├── d3_5.csv │ │ ├── d3_50.csv │ │ ├── d3_51.csv │ │ ├── d3_52.csv │ │ ├── d3_53.csv │ │ ├── d3_6.csv │ │ ├── d3_7.csv │ │ ├── d3_8.csv │ │ ├── d3_9.csv │ │ └── task_defs.json │ ├── out │ │ ├── d3_0_irony.csv │ │ ├── d3_10_infrastructure.csv │ │ ├── d3_13_water.csv │ │ ├── d3_14_search.csv │ │ ├── d3_15_utility.csv │ │ ├── d3_16_hillary.csv │ │ ├── d3_17_hillary.csv │ │ ├── d3_18_offensive.csv │ │ ├── d3_19_offensive.csv │ │ ├── d3_1_objective.csv │ │ ├── d3_20_pro-life.csv │ │ ├── d3_21_pro-choice.csv │ │ ├── d3_25_math.csv │ │ ├── d3_27_grammar.csv │ │ ├── d3_28_sexis.csv │ │ ├── d3_29_sexis.csv │ │ ├── d3_2_subjective.csv │ │ ├── d3_30_news.csv │ │ ├── d3_31_sports.csv │ │ ├── d3_32_business.csv │ │ ├── d3_33_tech.csv │ │ ├── d3_34_bad.csv │ │ ├── d3_35_good.csv │ │ ├── d3_36_quantity.csv │ │ ├── d3_37_location.csv │ │ ├── d3_38_person.csv │ │ ├── d3_39_entity.csv │ │ ├── d3_3_god.csv │ │ ├── d3_40_abbrevation.csv │ │ ├── d3_42_environment.csv │ │ ├── d3_43_environment.csv │ │ ├── d3_44_spam.csv │ │ ├── d3_45_fact.csv │ │ ├── d3_46_opinion.csv │ │ ├── d3_47_math.csv │ │ ├── d3_49_computer.csv │ │ ├── d3_4_atheism.csv │ │ ├── d3_50_sport.csv │ │ ├── d3_51_entertainment.csv │ │ ├── d3_52_family.csv │ │ ├── d3_53_politic.csv │ │ ├── d3_5_evacuate.csv │ │ ├── d3_6_terorrism.csv │ │ ├── d3_7_crime.csv │ │ ├── d3_8_shelter.csv │ │ └── d3_9_food.csv │ ├── out__one_shot │ │ ├── d3_0_irony.csv │ │ ├── d3_10_infrastructure.csv │ │ ├── d3_12_medical.csv │ │ ├── d3_13_water.csv │ │ ├── d3_14_search.csv │ │ ├── d3_15_utility.csv │ │ ├── d3_16_hillary.csv │ │ ├── d3_17_hillary.csv │ │ ├── d3_18_offensive.csv │ │ ├── d3_19_offensive.csv │ │ ├── d3_1_objective.csv │ │ ├── d3_20_pro-life.csv │ │ ├── d3_21_pro-choice.csv │ │ ├── d3_22_physics.csv │ │ ├── d3_24_statistics.csv │ │ ├── d3_25_math.csv │ │ ├── d3_26_grammar.csv │ │ ├── d3_27_grammar.csv │ │ ├── d3_28_sexis.csv │ │ ├── d3_29_sexis.csv │ │ ├── d3_2_subjective.csv │ │ ├── d3_30_news.csv │ │ ├── d3_31_sports.csv │ │ ├── d3_32_business.csv │ │ ├── d3_33_tech.csv │ │ ├── d3_34_bad.csv │ │ ├── d3_35_good.csv │ │ ├── d3_36_quantity.csv │ │ ├── d3_37_location.csv │ │ ├── d3_38_person.csv │ │ ├── d3_39_entity.csv │ │ ├── d3_3_god.csv │ │ ├── d3_40_abbrevation.csv │ │ ├── d3_41_defin.csv │ │ ├── d3_42_environment.csv │ │ ├── d3_43_environment.csv │ │ ├── d3_44_spam.csv │ │ ├── d3_45_fact.csv │ │ ├── d3_46_opinion.csv │ │ ├── d3_47_math.csv │ │ ├── d3_48_health.csv │ │ ├── d3_49_computer.csv │ │ ├── d3_4_atheism.csv │ │ ├── d3_50_sport.csv │ │ ├── d3_51_entertainment.csv │ │ ├── d3_52_family.csv │ │ ├── d3_53_politic.csv │ │ ├── d3_5_evacuate.csv │ │ ├── d3_6_terorrism.csv │ │ ├── d3_7_crime.csv │ │ ├── d3_8_shelter.csv │ │ └── d3_9_food.csv │ ├── run_d3.py │ └── run_eval.py ├── launch.yaml └── launch_cpu.yaml └── setup.py /.amltconfig: -------------------------------------------------------------------------------- 1 | {"project_name": "fmri", "storage_account_name": "chansingh", "container_name": "amulet", "blob_storage_account_name": "chansingh", "registry_name": "projects", "local_path": "/home/chansingh/fmri/test", "default_output_dir": "/home/chansingh/fmri/test/amlt", "project_uuid": "7288079910.75442-d062492b-5700-4558-99a4-0bc1fc812dad"} -------------------------------------------------------------------------------- /.amltignore: -------------------------------------------------------------------------------- 1 | **.png 2 | **.ipynb 3 | **.pdf 4 | **results* 5 | **logs* 6 | **.pyc 7 | **pycache__ 8 | **.pkl 9 | **.npz 10 | dist 11 | figs 12 | ref 13 | processed 14 | **.jbl 15 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | **.ipynb_checkpoints 2 | data 3 | **cache* 4 | **.pdf 5 | **.zip 6 | **.npy 7 | .coverage 8 | **slurm* 9 | **logs* 10 | env 11 | **.pyc 12 | **.gif 13 | **.DS_Store 14 | **.idea 15 | *.pth 16 | **Icon* 17 | venv 18 | imodels_env 19 | .vscode/** 20 | # Compiled python modules. 21 | *.pyc 22 | 23 | # Setuptools distribution folder. 24 | /dist/ 25 | 26 | # Python egg metadata, regenerated from source files by setuptools. 27 | /*.egg-info 28 | 29 | build 30 | .gitmodules 31 | build 32 | dist 33 | 34 | **AutogluonModels* 35 | **.gslides 36 | **.xml 37 | 38 | experiments/data/ 39 | !experiments/data/00_get_datasets.ipynb 40 | .hypothesis 41 | experiments/results 42 | experiments/*.txt 43 | **_site* 44 | .nfs* 45 | results 46 | scratch 47 | .embgrams 48 | figs 49 | **.png 50 | **.jbl 51 | results* 52 | **pycache* 53 | **logs* 54 | **.pt 55 | qa_results/finetune* 56 | **.pkl 57 | **.txt 58 | **.npz 59 | -------------------------------------------------------------------------------- /__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/csinva/interpretable-embeddings/786ea8195c2236c0275528f2e0beb0fb2dc4e4d8/__init__.py -------------------------------------------------------------------------------- /docs/overview.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/csinva/interpretable-embeddings/786ea8195c2236c0275528f2e0beb0fb2dc4e4d8/docs/overview.png -------------------------------------------------------------------------------- /experiments/00_download_dataset.py: -------------------------------------------------------------------------------- 1 | """ 2 | Get data from openneuro. 3 | This will make a `data` directory in the main directory of the repo called data if it doesn't already exist. 4 | """ 5 | import os 6 | import pathlib 7 | import argparse 8 | 9 | if __name__ == "__main__": 10 | parser = argparse.ArgumentParser() 11 | args = parser.parse_args() 12 | 13 | current_path = pathlib.Path(__file__).parent.resolve() 14 | main_dir = pathlib.Path(__file__).parent.parent.resolve() 15 | data_dir = os.path.join(main_dir, "data") 16 | 17 | os.chdir(main_dir) 18 | if not os.path.isdir(data_dir): 19 | os.system("mkdir data") 20 | data_dir = os.path.join(main_dir, "data") 21 | os.chdir(data_dir) 22 | os.system("datalad clone https://github.com/OpenNeuroDatasets/ds003020") 23 | os.chdir("ds003020") 24 | os.system("datalad get derivative") 25 | -------------------------------------------------------------------------------- /mteb/.gitattributes: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/csinva/interpretable-embeddings/786ea8195c2236c0275528f2e0beb0fb2dc4e4d8/mteb/.gitattributes -------------------------------------------------------------------------------- /mteb/.gitignore: -------------------------------------------------------------------------------- 1 | questions/msmarco/msmarco-dataset 2 | **/data_hash.txt 3 | **/*.pkl 4 | *.json 5 | 6 | -------------------------------------------------------------------------------- /mteb/questions/msmarco/convert_query_text.sh: -------------------------------------------------------------------------------- 1 | python gen_questions.py \ 2 | --input_file="questions_query_2k.json" \ 3 | --prompt_file="query_text_prompt.txt" \ 4 | --output_file="questions_corpus_2k.json" \ 5 | --model="gpt-4-1106-preview" \ 6 | --sort_key="question_id" \ 7 | --batch_size=20 \ 8 | --mode="local" \ 9 | # --tq=10 -------------------------------------------------------------------------------- /mteb/questions/msmarco/msmarco-beir.py: -------------------------------------------------------------------------------- 1 | import json 2 | import argparse 3 | import pandas as pd 4 | import os 5 | from random import sample 6 | 7 | def extract_queries_from_dataset(dataset_path, key, split, num_queries=None): 8 | """ 9 | Extracts queries from the msmarco dataset. 10 | 11 | Args: 12 | dataset_path (str): The path to the msmarco dataset. 13 | split (str): The split of the dataset to extract queries from. Default is 'test'. 14 | num_queries (int, optional): The number of queries to extract. If None, all queries are extracted. Default is None. 15 | key: the key to extract. 16 | 17 | Returns: 18 | list: A list of dictionaries, each containing the query_id and query. 19 | """ 20 | qrels_path = os.path.join(dataset_path, "qrels", f"{split}.tsv") 21 | qrels_df = pd.read_csv(qrels_path, sep='\t', header=None, names=["query_id", "corpus_id", "score"], skiprows=1) 22 | unique_queries = list(dict.fromkeys(qrels_df[key].values)) 23 | 24 | if num_queries is not None and len(unique_queries) < num_queries: 25 | num_queries = len(unique_queries) 26 | 27 | top_queries = unique_queries if num_queries is None else unique_queries[:num_queries] 28 | top_queries = [int(query) for query in top_queries] # Convert top_queries to int 29 | 30 | if key == "query_id": 31 | queries_path = os.path.join(dataset_path, "queries.jsonl") 32 | queries_df = pd.read_json(queries_path, lines=True) 33 | queries_df.rename(columns={"_id": key, "text": "query"}, inplace=True) 34 | elif key == "corpus_id": 35 | queries_path = os.path.join(dataset_path, "corpus.jsonl") 36 | queries_df = pd.read_json(queries_path, lines=True) 37 | queries_df.rename(columns={"_id": key, "text": "corpus"}, inplace=True) 38 | else: 39 | raise ValueError("Invalid key. Key should be either 'query_id' or 'corpus_id'") 40 | 41 | queries_df[key] = queries_df[key].astype(int) # Convert key to int to match datatype of top_queries 42 | 43 | extracted_queries = queries_df[queries_df[key].isin(top_queries)].to_dict('records') 44 | return extracted_queries, len(top_queries) 45 | 46 | def save_data_to_json_file(data, num_queries, split, key, filename=None): 47 | """ 48 | Writes data to a JSON file. 49 | 50 | Args: 51 | data (list): The data to write to the file. 52 | num_queries (int): The number of queries in the data. 53 | split (str): The split of the dataset from which the queries were extracted. 54 | filename (str, optional): The name of the file to write to. If None, a default name is generated. 55 | """ 56 | if filename is None: 57 | filename = f"sample_{split}_{key}_{num_queries if num_queries is not None else 'all'}_beir.json" 58 | 59 | with open(filename, "w") as file: 60 | if data: 61 | json.dump(data, file, indent=4) 62 | 63 | if __name__ == "__main__": 64 | parser = argparse.ArgumentParser() 65 | parser.add_argument("--dataset_path", default="./msmarco-dataset", help="Path to the msmarco dataset.") 66 | parser.add_argument("--split", default="test", choices=["test", "dev", "train"], help="The split of the dataset to extract queries from.") 67 | parser.add_argument("--num_queries", type=int, default=None, help="The number of queries to extract. If None, all queries are extracted.") 68 | parser.add_argument("--key", default="query_id", help="The key to extract.") 69 | parser.add_argument("--output_file", default=None, help="The name of the output file.") 70 | args = parser.parse_args() 71 | 72 | queries, num_queries = extract_queries_from_dataset(args.dataset_path, args.key, args.split, args.num_queries) 73 | if queries: 74 | save_data_to_json_file(queries, num_queries, args.split, args.key, args.output_file) 75 | 76 | # python msmarco-beir.py --split="dev" --key="corpus_id" -------------------------------------------------------------------------------- /mteb/questions/msmarco/msmarco-hf.py: -------------------------------------------------------------------------------- 1 | import json 2 | from datasets import load_dataset 3 | 4 | def extract_queries_from_dataset(split="test", num_queries=100, params=["query_id", "query"]): 5 | """ 6 | Extracts queries from the msmarco dataset. 7 | 8 | Args: 9 | split (str): The split of the dataset to extract queries from. Default is 'test'. 10 | num_queries (int): The number of queries to extract. Default is 100. 11 | params (list): The parameters to extract for each query. Default is ['query_id', 'query']. 12 | 13 | Returns: 14 | list: A list of dictionaries, each containing the specified parameters for a query. 15 | """ 16 | dataset = load_dataset("ms_marco", "v2.1") 17 | extracted_queries = [ 18 | {param: dataset[split][i][param] for param in params} 19 | for i in range(num_queries) 20 | ] 21 | return extracted_queries 22 | 23 | def save_data_to_json_file(data, filename="sample_queries_100_hf.json"): 24 | """ 25 | Writes data to a JSON file. 26 | 27 | Args: 28 | data (list): The data to write to the file. 29 | filename (str): The name of the file to write to. 30 | """ 31 | with open(filename, "w") as file: 32 | json.dump(data, file, indent=4) 33 | 34 | if __name__ == "__main__": 35 | queries = extract_queries_from_dataset() 36 | save_data_to_json_file(queries) 37 | -------------------------------------------------------------------------------- /mteb/questions/msmarco/process_output.py: -------------------------------------------------------------------------------- 1 | import json 2 | import argparse 3 | import os 4 | 5 | 6 | # Create the parser 7 | parser = argparse.ArgumentParser(description="Process and sort JSON data.") 8 | 9 | # Add the arguments 10 | parser.add_argument('--in', dest='InputFilePath', metavar='input_file_path', type=str, help='the path of the input file to process', default='output_beir_400_train.json') 11 | parser.add_argument('--out', dest='OutputFilePath', metavar='output_file_path', type=str, help='the path of the output file to write to', default='questions.json') 12 | 13 | # Parse the arguments 14 | args = parser.parse_args() 15 | 16 | if os.path.exists(args.OutputFilePath): 17 | os.remove(args.OutputFilePath) 18 | 19 | # Load the data 20 | with open(args.InputFilePath, 'r') as f: 21 | data = json.load(f) 22 | 23 | # Sort the data 24 | # data.sort(key=lambda x: x['query_id']) 25 | 26 | # Create new data structure 27 | new_data = [{'question_id': i, 'question': item['question']} for i, item in enumerate(data)] 28 | 29 | # Write to a new file 30 | with open(args.OutputFilePath, 'w') as f: 31 | json.dump(new_data, f, indent=4) 32 | -------------------------------------------------------------------------------- /mteb/questions/msmarco/tmp/tmp.py: -------------------------------------------------------------------------------- 1 | import json 2 | from sentence_transformers import SentenceTransformer 3 | from sklearn.metrics.pairwise import cosine_similarity 4 | 5 | def read_json_file(filepath): 6 | with open(filepath, 'r') as file: 7 | return json.load(file) 8 | 9 | def write_json_file(data, filepath): 10 | with open(filepath, 'w') as file: 11 | json.dump(data, file, indent=4) 12 | 13 | 14 | # def find_similar_questions(questions, model_name='all-MiniLM-L6-v2', similarity_threshold=0.95): 15 | def find_similar_questions(questions, model_name='sentence-t5-xl', similarity_threshold=0.98): 16 | # Initialize the model 17 | model = SentenceTransformer(model_name) 18 | 19 | # Extract question texts 20 | question_texts = [question['question'] for question in questions] 21 | 22 | # Generate embeddings 23 | embeddings = model.encode(question_texts, convert_to_tensor=True) 24 | 25 | # Move embeddings to CPU for cosine_similarity calculation 26 | embeddings = embeddings.cpu() 27 | 28 | similar_questions = [] 29 | 30 | # Calculate cosine similarity 31 | cos_sim = cosine_similarity(embeddings, embeddings) 32 | 33 | for i in range(len(questions)): 34 | for j in range(i+1, len(questions)): 35 | if cos_sim[i, j] >= similarity_threshold: 36 | similar_questions.append({ 37 | 'question_id_1': questions[i]['question_id'], 38 | 'question_1': questions[i]['question'], 39 | 'question_id_2': questions[j]['question_id'], 40 | 'question_2': questions[j]['question'], 41 | 'similarity': float(cos_sim[i, j]) 42 | }) 43 | 44 | return similar_questions 45 | 46 | input_file = 'questions.json' # Replace with your JSON file path 47 | questions = read_json_file(input_file) 48 | 49 | similar_questions = find_similar_questions(questions) 50 | 51 | output_file = 'output_similar_questions.json' # Replace with your desired output file name 52 | write_json_file(similar_questions, output_file) 53 | 54 | print(f"Similar questions with nuanced scoring written to {output_file}.") -------------------------------------------------------------------------------- /mteb/questions/msmarco/tmp/tmp2.py: -------------------------------------------------------------------------------- 1 | import json 2 | from sentence_transformers import SentenceTransformer 3 | from sklearn.cluster import DBSCAN 4 | 5 | def read_json_file(filepath): 6 | """Reads a JSON file and returns its content.""" 7 | with open(filepath, 'r') as file: 8 | return json.load(file) 9 | 10 | def write_json_file(data, filepath): 11 | """Writes data to a JSON file.""" 12 | with open(filepath, 'w') as file: 13 | json.dump(data, file, indent=4) 14 | 15 | def cluster_and_filter_questions(questions, model_name='all-MiniLM-L6-v2', eps=0.05, min_samples=2): 16 | """Clusters questions, selects unique ones per cluster, and tracks removed questions.""" 17 | model = SentenceTransformer(model_name) 18 | question_texts = [question['question'] for question in questions] 19 | embeddings = model.encode(question_texts, convert_to_tensor=False) 20 | clustering = DBSCAN(eps=eps, min_samples=min_samples, metric="cosine").fit(embeddings) 21 | 22 | unique_questions = [] 23 | removed_questions = [] 24 | added_clusters = set() 25 | 26 | for question_idx, cluster_label in enumerate(clustering.labels_): 27 | if cluster_label == -1: # Treat outliers as unique 28 | unique_questions.append(questions[question_idx]) 29 | continue 30 | cluster_label_str = str(cluster_label) 31 | if cluster_label_str not in added_clusters: 32 | added_clusters.add(cluster_label_str) 33 | unique_questions.append(questions[question_idx]) 34 | else: 35 | removed_questions.append(questions[question_idx]) 36 | 37 | return unique_questions, removed_questions 38 | 39 | def renumber_question_ids(questions): 40 | """Renumbers question IDs from 1 to n, maintaining the original question content.""" 41 | return [{'question_id': i + 1, 'question': question['question']} for i, question in enumerate(questions)] 42 | 43 | # Specify the file paths 44 | input_file = 'questions.json' # Path to your input JSON file 45 | output_file_unique = 'unique_questions.json' # File to write unique questions 46 | output_file_removed = 'removed_questions.json' # File to write removed questions 47 | output_file_renumbered = 'renumbered_questions.json' # File to write renumbered unique questions 48 | 49 | # Processing 50 | questions = read_json_file(input_file) 51 | unique_questions, removed_questions = cluster_and_filter_questions(questions) 52 | write_json_file(unique_questions, output_file_unique) 53 | write_json_file(removed_questions, output_file_removed) 54 | 55 | # Renumber and write the renumbered unique questions 56 | renumbered_questions = renumber_question_ids(unique_questions) 57 | write_json_file(renumbered_questions, output_file_renumbered) -------------------------------------------------------------------------------- /mteb/scripts/encode/embed_dev.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | python encode.py \ 4 | --input_file="../../questions/msmarco/sample_dev_queries_100_beir.json" \ 5 | --questions_file="../../questions/msmarco/questions_query.json" \ 6 | --prompt_file="prompts/QAembedder_system_prompt_queries.txt" \ 7 | --key="query_id" \ 8 | --output_file="dev_queries_out.json" \ 9 | --tq 50 10 | 11 | python encode.py \ 12 | --input_file="../../questions/msmarco/sample_dev_corpus_120_beir.json" \ 13 | --questions_file="../../questions/msmarco/questions_corpus.json" \ 14 | --prompt_file="prompts/QAembedder_system_prompt_corpus.txt" \ 15 | --key="corpus_id" \ 16 | --output_file="dev_corpus_out.json" \ 17 | --tq 60 18 | -------------------------------------------------------------------------------- /mteb/scripts/extract_metadata.py: -------------------------------------------------------------------------------- 1 | # Use: python extract.py mteb/tasks 2 | # Extracts the MTEB task and dataset metadata 3 | 4 | import os 5 | import json 6 | import ast 7 | 8 | def find_python_files(directory): 9 | for root, _, files in os.walk(directory): 10 | for file in files: 11 | if file.endswith('.py'): 12 | yield os.path.join(root, file) 13 | 14 | def parse_globals(file_content): 15 | globals_dict = {} 16 | try: 17 | tree = ast.parse(file_content) 18 | for node in ast.walk(tree): 19 | if isinstance(node, ast.Assign): 20 | for target in node.targets: 21 | if isinstance(target, ast.Name): 22 | try: 23 | globals_dict[target.id] = eval(compile(ast.Expression(node.value), '', 'eval'), {}) 24 | except Exception as e: 25 | pass # Skip if unable to evaluate 26 | except SyntaxError as e: 27 | pass # Ignore files with syntax errors 28 | return globals_dict 29 | 30 | def process_class_node(class_node, globals_dict): 31 | for node in ast.walk(class_node): 32 | if isinstance(node, ast.FunctionDef) and node.name == 'description': 33 | for return_node in ast.walk(node): 34 | if isinstance(return_node, ast.Return): 35 | try: 36 | description_dict = eval(compile(ast.Expression(return_node.value), '', 'eval'), globals_dict) 37 | eval_langs = description_dict.get("eval_langs") 38 | if "en" in eval_langs: 39 | return description_dict 40 | except Exception as e: 41 | pass # Handle errors silently 42 | return None 43 | 44 | def append_to_json(output_file, data): 45 | with open(output_file, 'a') as f: 46 | json.dump(data, f) 47 | f.write('\n') 48 | 49 | def main(directory, output_file): 50 | folder_stats = {} # Dictionary to hold the count of instances per folder 51 | 52 | for file_path in find_python_files(directory): 53 | parent_folder = os.path.basename(os.path.dirname(file_path)) 54 | with open(file_path, 'r', encoding='utf-8') as file: 55 | file_content = file.read() 56 | globals_dict = parse_globals(file_content) 57 | tree = ast.parse(file_content) 58 | for node in ast.walk(tree): 59 | if isinstance(node, ast.ClassDef): 60 | description = process_class_node(node, globals_dict) 61 | if description: 62 | # Update stats 63 | folder_stats[parent_folder] = folder_stats.get(parent_folder, 0) + 1 64 | append_to_json(output_file, { 65 | "parent_folder": parent_folder, 66 | "class_name": node.name, 67 | "description": description 68 | }) 69 | 70 | # Print the stats after processing all files 71 | for folder, count in folder_stats.items(): 72 | print(f"{folder}: {count} instances") 73 | 74 | if __name__ == "__main__": 75 | import sys 76 | if len(sys.argv) != 3: 77 | print("Usage: python script.py ") 78 | else: 79 | directory = sys.argv[1] 80 | output_file = sys.argv[2] 81 | main(directory, output_file) 82 | -------------------------------------------------------------------------------- /mteb/scripts/merge_cqadupstack.py: -------------------------------------------------------------------------------- 1 | """ 2 | Merges CQADupstack subset results 3 | Usage: python merge_cqadupstack.py path_to_results_folder 4 | """ 5 | import glob 6 | import json 7 | import logging 8 | import os 9 | import shutil 10 | import sys 11 | 12 | logging.basicConfig(level=logging.INFO) 13 | logger = logging.getLogger(__name__) 14 | 15 | TASK_LIST_CQA = [ 16 | "CQADupstackAndroidRetrieval", 17 | "CQADupstackEnglishRetrieval", 18 | "CQADupstackGamingRetrieval", 19 | "CQADupstackGisRetrieval", 20 | "CQADupstackMathematicaRetrieval", 21 | "CQADupstackPhysicsRetrieval", 22 | "CQADupstackProgrammersRetrieval", 23 | "CQADupstackStatsRetrieval", 24 | "CQADupstackTexRetrieval", 25 | "CQADupstackUnixRetrieval", 26 | "CQADupstackWebmastersRetrieval", 27 | "CQADupstackWordpressRetrieval", 28 | ] 29 | 30 | NOAVG_KEYS = [ 31 | "evaluation_time", 32 | "mteb_version", 33 | "mteb_dataset_name", 34 | "dataset_revision", 35 | ] 36 | 37 | results_folder = sys.argv[1] 38 | # Ensure at least 1 character btw CQADupstack & Retrieval 39 | files = glob.glob(f'{results_folder.rstrip("/")}/CQADupstack*?*Retrieval.json') 40 | 41 | logger.info(f"Found CQADupstack files: {files}") 42 | 43 | if len(files) == len(TASK_LIST_CQA): 44 | all_results = {} 45 | for file_name in files: 46 | with open(file_name, "r", encoding="utf-8") as f: 47 | results = json.load(f) 48 | for split, split_results in results.items(): 49 | if split not in ("train", "validation", "dev", "test"): 50 | all_results[split] = split_results 51 | continue 52 | all_results.setdefault(split, {}) 53 | for metric, score in split_results.items(): 54 | all_results[split].setdefault(metric, 0) 55 | if metric == "evaluation_time": 56 | score = all_results[split][metric] + score 57 | elif metric not in NOAVG_KEYS: 58 | score = all_results[split][metric] + score * 1 / len(TASK_LIST_CQA) 59 | all_results[split][metric] = score 60 | all_results["mteb_dataset_name"] = "CQADupstackRetrieval" 61 | 62 | logger.info("Saving merged results") 63 | with open(os.path.join(results_folder, "CQADupstackRetrieval.json"), "w", encoding="utf-8") as f: 64 | json.dump(all_results, f, indent=4) 65 | 66 | # Move TASK_LIST_CQA files to a separate folder 67 | processed_folder = os.path.join(results_folder, "processed") 68 | if not os.path.exists(processed_folder): 69 | os.makedirs(processed_folder) 70 | for task in TASK_LIST_CQA: 71 | src_file_path = os.path.join(results_folder, f"{task}.json") 72 | dst_file_path = os.path.join(processed_folder, f"{task}.json") 73 | shutil.move(src_file_path, dst_file_path) 74 | logger.info(f"Moved TASK_LIST_CQA files to {processed_folder}") 75 | else: 76 | logger.warning( 77 | f"Got {len(files)}, but expected {len(TASK_LIST_CQA)} files. Missing: {set(TASK_LIST_CQA) - set([x.split('/')[-1].split('.')[0] for x in files])}; Too much: {set([x.split('/')[-1].split('.')[0] for x in files]) - set(TASK_LIST_CQA)}" 78 | ) 79 | -------------------------------------------------------------------------------- /mteb/scripts/run_mteb_english.py: -------------------------------------------------------------------------------- 1 | """Example script for benchmarking all datasets constituting the MTEB English leaderboard & average scores""" 2 | 3 | import logging 4 | 5 | from mteb import MTEB 6 | from sentence_transformers import SentenceTransformer 7 | 8 | logging.basicConfig(level=logging.INFO) 9 | 10 | logger = logging.getLogger("main") 11 | 12 | TASK_LIST_CLASSIFICATION = [ 13 | "AmazonCounterfactualClassification", 14 | "AmazonPolarityClassification", 15 | "AmazonReviewsClassification", 16 | "Banking77Classification", 17 | "EmotionClassification", 18 | "ImdbClassification", 19 | "MassiveIntentClassification", 20 | "MassiveScenarioClassification", 21 | "MTOPDomainClassification", 22 | "MTOPIntentClassification", 23 | "ToxicConversationsClassification", 24 | "TweetSentimentExtractionClassification", 25 | ] 26 | 27 | TASK_LIST_CLUSTERING = [ 28 | "ArxivClusteringP2P", 29 | "ArxivClusteringS2S", 30 | "BiorxivClusteringP2P", 31 | "BiorxivClusteringS2S", 32 | "MedrxivClusteringP2P", 33 | "MedrxivClusteringS2S", 34 | "RedditClustering", 35 | "RedditClusteringP2P", 36 | "StackExchangeClustering", 37 | "StackExchangeClusteringP2P", 38 | "TwentyNewsgroupsClustering", 39 | ] 40 | 41 | TASK_LIST_PAIR_CLASSIFICATION = [ 42 | "SprintDuplicateQuestions", 43 | "TwitterSemEval2015", 44 | "TwitterURLCorpus", 45 | ] 46 | 47 | TASK_LIST_RERANKING = [ 48 | "AskUbuntuDupQuestions", 49 | "MindSmallReranking", 50 | "SciDocsRR", 51 | "StackOverflowDupQuestions", 52 | ] 53 | 54 | TASK_LIST_RETRIEVAL = [ 55 | "ArguAna", 56 | "ClimateFEVER", 57 | "CQADupstackAndroidRetrieval", 58 | "CQADupstackEnglishRetrieval", 59 | "CQADupstackGamingRetrieval", 60 | "CQADupstackGisRetrieval", 61 | "CQADupstackMathematicaRetrieval", 62 | "CQADupstackPhysicsRetrieval", 63 | "CQADupstackProgrammersRetrieval", 64 | "CQADupstackStatsRetrieval", 65 | "CQADupstackTexRetrieval", 66 | "CQADupstackUnixRetrieval", 67 | "CQADupstackWebmastersRetrieval", 68 | "CQADupstackWordpressRetrieval", 69 | "DBPedia", 70 | "FEVER", 71 | "FiQA2018", 72 | "HotpotQA", 73 | "MSMARCO", 74 | "NFCorpus", 75 | "NQ", 76 | "QuoraRetrieval", 77 | "SCIDOCS", 78 | "SciFact", 79 | "Touche2020", 80 | "TRECCOVID", 81 | ] 82 | 83 | TASK_LIST_STS = [ 84 | "BIOSSES", 85 | "SICK-R", 86 | "STS12", 87 | "STS13", 88 | "STS14", 89 | "STS15", 90 | "STS16", 91 | "STS17", 92 | "STS22", 93 | "STSBenchmark", 94 | "SummEval", 95 | ] 96 | 97 | TASK_LIST = ( 98 | TASK_LIST_CLASSIFICATION 99 | + TASK_LIST_CLUSTERING 100 | + TASK_LIST_PAIR_CLASSIFICATION 101 | + TASK_LIST_RERANKING 102 | + TASK_LIST_RETRIEVAL 103 | + TASK_LIST_STS 104 | ) 105 | 106 | model_names = [ 107 | "hkunlp/instructor-base" 108 | ] 109 | 110 | # model_names = [ 111 | # "princeton-nlp/sup-simcse-bert-base-uncased", 112 | # "Luyu/co-condenser-marco", 113 | # "facebook/contriever-msmarco", 114 | # "Luyu/co-condenser-marco-retriever" 115 | # "Luyu/co-condenser-marco-retriever", 116 | # "sentence-transformers/gtr-t5-large", 117 | # "sentence-transformers/average_word_embeddings_glove.6B.300d" 118 | # ] 119 | 120 | for model_name in model_names: 121 | model = SentenceTransformer(model_name) 122 | for task in TASK_LIST: 123 | logger.info(f"Running task: {task} with model: {model_name}") 124 | eval_splits = ["dev"] if task == "MSMARCO" else ["test"] 125 | evaluation = MTEB(tasks=[task], task_langs=["en"]) # Remove "en" for running all languages 126 | evaluation.run(model, output_folder=f"results/{model_name}", eval_splits=eval_splits) -------------------------------------------------------------------------------- /neuro1/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/csinva/interpretable-embeddings/786ea8195c2236c0275528f2e0beb0fb2dc4e4d8/neuro1/__init__.py -------------------------------------------------------------------------------- /neuro1/config.py: -------------------------------------------------------------------------------- 1 | from os.path import join, expanduser 2 | # join(dirname(dirname(os.path.abspath(__file__)))) 3 | 4 | if 'chansingh' in expanduser('~'): 5 | mnt_dir = '/home/chansingh/mntv1' 6 | else: 7 | mnt_dir = '/mntv1' 8 | 9 | root_dir = join(mnt_dir, 'deep-fMRI') 10 | cache_embs_dir = join(root_dir, 'qa', 'cache_embs') 11 | resp_processing_dir = join(root_dir, 'qa', 'resp_processing') 12 | 13 | # eng1000 data, download from [here](https://github.com/HuthLab/deep-fMRI-dataset) 14 | em_data_dir = join(root_dir, 'eng1000') 15 | nlp_utils_dir = join(root_dir, 'nlp_utils') 16 | -------------------------------------------------------------------------------- /neuro1/data/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/csinva/interpretable-embeddings/786ea8195c2236c0275528f2e0beb0fb2dc4e4d8/neuro1/data/__init__.py -------------------------------------------------------------------------------- /neuro1/data/npp.py: -------------------------------------------------------------------------------- 1 | """This module contains one line functions that should, by all rights, be in numpy. 2 | """ 3 | import numpy as np 4 | 5 | # Demean -- remove the mean from each column 6 | 7 | 8 | def demean(v): return v-v.mean(0) 9 | 10 | 11 | demean.__doc__ = """Removes the mean from each column of [v].""" 12 | dm = demean 13 | 14 | # Z-score -- z-score each column 15 | 16 | 17 | def zscore(v): 18 | s = v.std(0) 19 | m = v - v.mean(0) 20 | for i in range(len(s)): 21 | if s[i] != 0.: 22 | m[:, i] /= s[i] 23 | return m 24 | 25 | 26 | # zscore = lambda v: (v-v.mean(0))/v.std(0) 27 | zscore.__doc__ = """Z-scores (standardizes) each column of [v].""" 28 | zs = zscore 29 | 30 | # Rescale -- make each column have unit variance 31 | 32 | 33 | def rescale(v): return v/v.std(0) 34 | 35 | 36 | rescale.__doc__ = """Rescales each column of [v] to have unit variance.""" 37 | rs = rescale 38 | 39 | # Matrix corr -- find correlation between each column of c1 and the corresponding column of c2 40 | 41 | 42 | def mcorr(c1, c2): return (zs(c1)*zs(c2)).mean(0) 43 | 44 | 45 | mcorr.__doc__ = """Matrix correlation. Find the correlation between each column of [c1] and the corresponding column of [c2].""" 46 | 47 | # Cross corr -- find corr. between each row of c1 and EACH row of c2 48 | 49 | 50 | def xcorr(c1, c2): return np.dot(zs(c1.T).T, zs(c2.T)) / (c1.shape[1]) 51 | 52 | 53 | xcorr.__doc__ = """Cross-column correlation. Finds the correlation between each row of [c1] and each row of [c2].""" 54 | -------------------------------------------------------------------------------- /neuro1/encoding/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/csinva/interpretable-embeddings/786ea8195c2236c0275528f2e0beb0fb2dc4e4d8/neuro1/encoding/__init__.py -------------------------------------------------------------------------------- /neuro1/encoding/eval.py: -------------------------------------------------------------------------------- 1 | 2 | import logging 3 | import numpy as np 4 | 5 | 6 | def nancorr(x, y): 7 | mask = ~np.isnan(x) & ~np.isnan(y) 8 | return np.corrcoef(x[mask], y[mask])[0, 1] 9 | 10 | 11 | def evaluate_pc_model_on_each_voxel( 12 | args, stim, resp, 13 | model_params_to_save, pca, scaler): 14 | if args.encoding_model == 'ridge': 15 | weights_pc = model_params_to_save['weights_pc'] 16 | preds_pc = stim @ weights_pc 17 | model_params_to_save['weights'] = weights_pc * \ 18 | scaler.scale_ @ pca.components_ 19 | model_params_to_save['bias'] = scaler.mean_ @ pca.components_ + pca.mean_ 20 | # note: prediction = stim @ weights + bias 21 | preds_voxels = pca.inverse_transform( 22 | scaler.inverse_transform(preds_pc) 23 | ) # (n_trs x n_voxels) 24 | corrs = [] 25 | for i in range(preds_voxels.shape[1]): 26 | corrs.append(nancorr(preds_voxels[:, i], resp[:, i])) 27 | corrs = np.array(corrs) 28 | corrs[np.isnan(corrs)] = 0 29 | return corrs 30 | 31 | 32 | def add_summary_stats(r, verbose=True): 33 | for key in ['corrs_test', 'corrs_tune', 'corrs_tune_pc', 'corrs_test_pc']: 34 | if key in r: 35 | r[key + '_mean'] = np.nanmean(r[key]) 36 | r[key + '_median'] = np.nanmedian(r[key]) 37 | r[key + '_frac>0'] = np.nanmean(r[key] > 0) 38 | r[key + '_mean_top1_percentile'] = np.nanmean( 39 | np.sort(r[key])[-len(r[key]) // 100:]) 40 | r[key + '_mean_top5_percentile'] = np.nanmean( 41 | np.sort(r[key])[-len(r[key]) // 20:]) 42 | 43 | # add r2 stats 44 | r[key.replace('corrs', 'r2') + 45 | '_mean'] = np.nanmean(r[key] * np.abs(r[key])) 46 | r[key.replace('corrs', 'r2') + 47 | '_median'] = np.nanmedian(r[key] * np.abs(r[key])) 48 | 49 | if key == 'corrs_test' and verbose: 50 | logging.info(f"mean {key}: {r[key + '_mean']:.4f}") 51 | logging.info(f"median {key}: {r[key + '_median']:.4f}") 52 | logging.info(f"frac>0 {key}: {r[key + '_frac>0']:.4f}") 53 | logging.info( 54 | f"mean top1 percentile {key}: {r[key + '_mean_top1_percentile']:.4f}") 55 | logging.info( 56 | f"mean top5 percentile {key}: {r[key + '_mean_top5_percentile']:.4f}") 57 | 58 | return r 59 | -------------------------------------------------------------------------------- /neuro1/encoding/mlp.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | # def get_model(args): 4 | # if args.encoding_model == 'mlp': 5 | # return NeuralNetRegressor( 6 | # encoding_models.MLP( 7 | # dim_inputs=stim_train_delayed.shape[1], 8 | # dim_hidden=args.mlp_dim_hidden, 9 | # dim_outputs=resp_train.shape[1] 10 | # ), 11 | # max_epochs=3000, 12 | # lr=1e-5, 13 | # optimizer=torch.optim.Adam, 14 | # callbacks=[EarlyStopping(patience=30)], 15 | # iterator_train__shuffle=True, 16 | # # device='cuda', 17 | # ) 18 | 19 | # elif args.encoding_model == 'mlp': 20 | # stim_train_delayed = stim_train_delayed.astype(np.float32) 21 | # resp_train = resp_train.astype(np.float32) 22 | # stim_test_delayed = stim_test_delayed.astype(np.float32) 23 | # net = get_model(args) 24 | # net.fit(stim_train_delayed, resp_train) 25 | # preds = net.predict(stim_test_delayed) 26 | # corrs_test = [] 27 | # for i in range(preds.shape[1]): 28 | # corrs_test.append(np.corrcoef(resp_test[:, i], preds[:, i])[0, 1]) 29 | # corrs_test = np.array(corrs_test) 30 | # r[corrs_key_test] = corrs_test 31 | # model_params_to_save = { 32 | # 'weights': net.module_.state_dict(), 33 | # } 34 | # torch.save(net.module_.state_dict(), join(save_dir, 'weights.pt')) 35 | -------------------------------------------------------------------------------- /neuro1/features/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/csinva/interpretable-embeddings/786ea8195c2236c0275528f2e0beb0fb2dc4e4d8/neuro1/features/__init__.py -------------------------------------------------------------------------------- /neuro1/features/questions/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/csinva/interpretable-embeddings/786ea8195c2236c0275528f2e0beb0fb2dc4e4d8/neuro1/features/questions/__init__.py -------------------------------------------------------------------------------- /neuro1/features/stim_utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import neuro1.config as config 3 | from neuro1.data.textgrid import TextGrid 4 | from neuro1.data.data_sequence import DataSequence 5 | from neuro1.data.utils_ds import make_word_ds 6 | import json 7 | from typing import Dict, List 8 | import os 9 | from os.path import join, dirname 10 | 11 | 12 | def load_story_wordseqs(stories) -> Dict[str, DataSequence]: 13 | # load textgrids 14 | base = join(config.root_dir, 'data', "ds003020/derivative/TextGrids") 15 | grids = {} 16 | for story in stories: 17 | grid_path = os.path.join(base, f"{story}.TextGrid") 18 | grids[story] = TextGrid(open(grid_path).read()) 19 | 20 | # make into wordseqs 21 | with open(join(config.root_dir, 'data', "ds003020/derivative/respdict.json"), "r") as f: 22 | respdict = json.load(f) 23 | trfiles = load_simulated_trfiles(respdict) 24 | wordseqs = make_word_ds(grids, trfiles) 25 | return wordseqs 26 | 27 | 28 | class TRFile(object): 29 | def __init__(self, trfilename, expectedtr=2.0045): 30 | """Loads data from [trfilename], should be output from stimulus presentation code. 31 | """ 32 | self.trtimes = [] 33 | self.soundstarttime = -1 34 | self.soundstoptime = -1 35 | self.otherlabels = [] 36 | self.expectedtr = expectedtr 37 | 38 | if trfilename is not None: 39 | self.load_from_file(trfilename) 40 | 41 | def load_from_file(self, trfilename): 42 | """Loads TR data from report with given [trfilename]. 43 | """ 44 | # Read the report file and populate the datastructure 45 | for ll in open(trfilename): 46 | timestr = ll.split()[0] 47 | label = " ".join(ll.split()[1:]) 48 | time = float(timestr) 49 | 50 | if label in ("init-trigger", "trigger"): 51 | self.trtimes.append(time) 52 | 53 | elif label == "sound-start": 54 | self.soundstarttime = time 55 | 56 | elif label == "sound-stop": 57 | self.soundstoptime = time 58 | 59 | else: 60 | self.otherlabels.append((time, label)) 61 | 62 | # Fix weird TR times 63 | itrtimes = np.diff(self.trtimes) 64 | badtrtimes = np.nonzero(itrtimes > (itrtimes.mean()*1.5))[0] 65 | newtrs = [] 66 | for btr in badtrtimes: 67 | # Insert new TR where it was missing.. 68 | newtrtime = self.trtimes[btr]+self.expectedtr 69 | newtrs.append((newtrtime, btr)) 70 | 71 | for ntr, btr in newtrs: 72 | self.trtimes.insert(btr+1, ntr) 73 | 74 | def simulate(self, ntrs): 75 | """Simulates [ntrs] TRs that occur at the expected TR. 76 | """ 77 | self.trtimes = list(np.arange(ntrs)*self.expectedtr) 78 | 79 | def get_reltriggertimes(self): 80 | """Returns the times of all trigger events relative to the sound. 81 | """ 82 | return np.array(self.trtimes)-self.soundstarttime 83 | 84 | @property 85 | def avgtr(self): 86 | """Returns the average TR for this run. 87 | """ 88 | return np.diff(self.trtimes).mean() 89 | 90 | 91 | def load_simulated_trfiles(respdict, tr=2.0, start_time=10.0, pad=5): 92 | trdict = dict() 93 | for story, resps in respdict.items(): 94 | trf = TRFile(None, tr) 95 | trf.soundstarttime = start_time 96 | trf.simulate(resps - pad) 97 | trdict[story] = [trf] 98 | return trdict 99 | -------------------------------------------------------------------------------- /notebooks/02_flatmaps_diffs.py: -------------------------------------------------------------------------------- 1 | import cortex 2 | from tqdm import tqdm 3 | import joblib 4 | import imodelsx.process_results 5 | import numpy as np 6 | from os.path import join 7 | from matplotlib import pyplot as plt 8 | from copy import deepcopy 9 | import pandas as pd 10 | import os 11 | from os.path import dirname 12 | import seaborn as sns 13 | import dvu 14 | import analyze_helper 15 | import sys 16 | import json 17 | import matplotlib.pyplot as plt 18 | import numpy as np 19 | from matplotlib.colors import Normalize 20 | from matplotlib.cm import ScalarMappable 21 | sys.path.append('..') 22 | path_to_repo = dirname(dirname(os.path.abspath(__file__))) 23 | 24 | 25 | def _save_flatmap(vals, subject, fname_save, clab): 26 | vabs = max(np.abs(vals)) 27 | cmap = 'RdBu' 28 | # cmap = sns.diverging_palette(12, 210, as_cmap=True) 29 | # cmap = sns.diverging_palette(16, 240, as_cmap=True) 30 | 31 | vol = cortex.Volume( 32 | vals, 'UT' + subject, xfmname=f'UT{subject}_auto', vmin=-vabs, vmax=vabs, cmap=cmap) 33 | 34 | cortex.quickshow(vol, 35 | with_rois=False, 36 | with_labels=False, 37 | with_colorbar=False 38 | ) 39 | plt.savefig(fname_save) 40 | plt.close() 41 | 42 | # save cbar 43 | norm = Normalize(vmin=-vabs, vmax=vabs) 44 | # need to invert this to match above 45 | sm = ScalarMappable(norm=norm, cmap=cmap) 46 | sm.set_array([]) 47 | fig, ax = plt.subplots(figsize=(5, 0.35)) 48 | cbar = plt.colorbar(sm, cax=ax, orientation='horizontal') 49 | cbar.set_label(clab, fontsize='x-large') 50 | plt.savefig(fname_save.replace('flatmap.pdf', 51 | 'cbar.pdf'), bbox_inches='tight') 52 | plt.close() 53 | 54 | 55 | if __name__ == '__main__': 56 | results_dir = analyze_helper.best_results_dir 57 | out_dir = join(path_to_repo, 'qa_results', 'diffs') 58 | os.makedirs(out_dir, exist_ok=True) 59 | 60 | # load the results in to a pandas dataframe 61 | r, cols_varied, mets = analyze_helper.load_clean_results(results_dir) 62 | r = r[r.feature_selection_alpha_index < 0] 63 | r = r[r.distill_model_path.isna()] 64 | r = r[~(r.feature_space == 'qa_embedder-25')] 65 | r = r[r.pc_components == 100] 66 | r = r[~((r.feature_space == 'qa_embedder-10') & 67 | (r.qa_embedding_model != 'ensemble1'))] 68 | 69 | for subject in ['S03', 'S02', 'S01']: 70 | args_qa = r[ 71 | (r.subject == subject) * 72 | (r.feature_space.str.contains('qa_embedder')) 73 | ].sort_values(by='corrs_tune_mean', ascending=False).iloc[0] 74 | for feature_space in ['qa_embedder', 'bert']: # , 'llama']: 75 | corrs = [] 76 | 77 | args_baseline = r[ 78 | # (r.feature_space.str.contains('bert')) 79 | (r.feature_space.str.contains(feature_space)) * 80 | (r.subject == subject) 81 | # (r.ndelays == 8) 82 | ].sort_values(by='corrs_tune_mean', ascending=False).iloc[0] 83 | 84 | print('means', 'qa', args_qa['corrs_test'].mean( 85 | ), 'baseline', args_baseline['corrs_test'].mean()) 86 | 87 | # fname_save = join(out_dir, f'diff_bert-qa.png') 88 | 89 | lab_name_dict = { 90 | 'qa_embedder': 'QA-Emb', 91 | 'bert': 'BERT', 92 | 'llama': 'LLaMA' 93 | } 94 | clab = f'Test correlation ({lab_name_dict[feature_space]})' 95 | fname_save = join( 96 | out_dir, f'{subject}_{feature_space.replace("qa_embedder", "qa")}_flatmap.pdf') 97 | _save_flatmap(args_baseline['corrs_test'], 98 | subject, fname_save, clab=clab) 99 | 100 | if not feature_space == 'qa_embedder': 101 | fname_save = join( 102 | out_dir, f'{subject}_qa-{feature_space.replace("qa_embedder", "qa")}_flatmap.pdf') 103 | clab = f'Test correlation (QA-Emb - {lab_name_dict[feature_space]})' 104 | _save_flatmap( 105 | args_qa['corrs_test'] - args_baseline['corrs_test'], subject, fname_save, clab=clab) 106 | -------------------------------------------------------------------------------- /notebooks/06_emb.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 9, 6 | "metadata": {}, 7 | "outputs": [ 8 | { 9 | "name": "stdout", 10 | "output_type": "stream", 11 | "text": [ 12 | "The autoreload extension is already loaded. To reload it, use:\n", 13 | " %reload_ext autoreload\n" 14 | ] 15 | } 16 | ], 17 | "source": [ 18 | "%load_ext autoreload\n", 19 | "%autoreload 2\n", 20 | "import sys\n", 21 | "sys.path.append('..')\n", 22 | "import imodelsx.llm\n", 23 | "from transformers import AutoModel, AutoTokenizer, pipeline\n", 24 | "from tqdm import tqdm\n", 25 | "fit_encoding = __import__('01_fit_encoding')" 26 | ] 27 | }, 28 | { 29 | "cell_type": "code", 30 | "execution_count": 10, 31 | "metadata": {}, 32 | "outputs": [], 33 | "source": [ 34 | "# Input text\n", 35 | "texts = ['this is a sample text that is longer than the other one',\n", 36 | " 'the cat chased the dog']" 37 | ] 38 | }, 39 | { 40 | "cell_type": "code", 41 | "execution_count": null, 42 | "metadata": {}, 43 | "outputs": [], 44 | "source": [ 45 | "checkpoint = 'meta-llama/Meta-Llama-3-8B'\n", 46 | "llm = imodelsx.llm.LLMEmbs(checkpoint=checkpoint)" 47 | ] 48 | }, 49 | { 50 | "cell_type": "code", 51 | "execution_count": 6, 52 | "metadata": {}, 53 | "outputs": [ 54 | { 55 | "name": "stderr", 56 | "output_type": "stream", 57 | "text": [ 58 | "100%|██████████| 1/1 [00:00<00:00, 1.33it/s]\n" 59 | ] 60 | } 61 | ], 62 | "source": [ 63 | "embs = llm(texts, layer_idx=18, batch_size=16)" 64 | ] 65 | }, 66 | { 67 | "cell_type": "code", 68 | "execution_count": null, 69 | "metadata": {}, 70 | "outputs": [], 71 | "source": [ 72 | "inputs = llm.tokenizer_(\n", 73 | " texts, return_tensors='pt', padding=True).to(llm.model_.device)\n", 74 | "hidden_states = llm.model_(**inputs).hidden_states" 75 | ] 76 | }, 77 | { 78 | "cell_type": "code", 79 | "execution_count": 7, 80 | "metadata": {}, 81 | "outputs": [ 82 | { 83 | "data": { 84 | "text/plain": [ 85 | "(2, 4096)" 86 | ] 87 | }, 88 | "execution_count": 7, 89 | "metadata": {}, 90 | "output_type": "execute_result" 91 | } 92 | ], 93 | "source": [ 94 | "embs.shape" 95 | ] 96 | }, 97 | { 98 | "cell_type": "code", 99 | "execution_count": null, 100 | "metadata": {}, 101 | "outputs": [], 102 | "source": [] 103 | } 104 | ], 105 | "metadata": { 106 | "kernelspec": { 107 | "display_name": "Python 3.8.10 ('.embgam')", 108 | "language": "python", 109 | "name": "python3" 110 | }, 111 | "language_info": { 112 | "codemirror_mode": { 113 | "name": "ipython", 114 | "version": 3 115 | }, 116 | "file_extension": ".py", 117 | "mimetype": "text/x-python", 118 | "name": "python", 119 | "nbconvert_exporter": "python", 120 | "pygments_lexer": "ipython3", 121 | "version": "3.11.9" 122 | }, 123 | "orig_nbformat": 4, 124 | "vscode": { 125 | "interpreter": { 126 | "hash": "559535f78d940c882783b39501b2581b5193373045707e5f8a51d046029cfd38" 127 | } 128 | } 129 | }, 130 | "nbformat": 4, 131 | "nbformat_minor": 2 132 | } 133 | -------------------------------------------------------------------------------- /notebooks/analyze_helper.py: -------------------------------------------------------------------------------- 1 | from neuro1.data.response_utils import load_pca 2 | import imodelsx.process_results 3 | import viz 4 | import dvu 5 | from tqdm import tqdm 6 | import seaborn as sns 7 | import os 8 | import pandas as pd 9 | from collections import defaultdict 10 | from matplotlib import pyplot as plt 11 | from os.path import join 12 | import numpy as np 13 | import sys 14 | sys.path.append('../experiments') 15 | dvu.set_style() 16 | fit_encoding = __import__('02_fit_encoding') 17 | best_results_dir = '/home/chansingh/mntv1/deep-fMRI/qa/results/results_apr7' 18 | 19 | 20 | def load_results(save_dir): 21 | dfs = [] 22 | fnames = [ 23 | fname for fname in os.listdir(save_dir)[::-1] 24 | if not fname.startswith('coef') 25 | ] 26 | for fname in tqdm(fnames): 27 | df = pd.read_pickle(join(save_dir, fname)) 28 | # print(fname) 29 | # display(df) 30 | dfs.append(df.reset_index()) 31 | d = pd.concat(dfs) 32 | # d = d.drop(columns='coef_') 33 | # .round(2) 34 | # d.set_index(['feats', 'dset'], inplace=True) 35 | d['nonlin_suffix'] = d['nonlinearity'].fillna( 36 | '').str.replace('None', '').str.replace('tanh', '_tanh') 37 | d['model'] = d['model'] + d['nonlin_suffix'] 38 | d['model_full'] = d['model'] + '_thresh=' + \ 39 | d['perc_threshold_fmri'].astype(str) 40 | return d 41 | 42 | 43 | def load_clean_results(results_dir, experiment_filename='../experiments/02_fit_encoding.py'): 44 | # load the results in to a pandas dataframe 45 | r = imodelsx.process_results.get_results_df(results_dir) 46 | r = imodelsx.process_results.fill_missing_args_with_default( 47 | r, experiment_filename) 48 | for k in ['save_dir', 'save_dir_unique']: 49 | r[k] = r[k].map(lambda x: x if x.startswith('/home') 50 | else x.replace('/mntv1', '/home/chansingh/mntv1')) 51 | r['qa_embedding_model'] = r.apply(lambda row: { 52 | 'mistralai/Mistral-7B-Instruct-v0.2': 'mist-7B', 53 | 'mistralai/Mixtral-8x7B-Instruct-v0.1': 'mixt-moe', 54 | 'meta-llama/Meta-Llama-3-8B-Instruct': 'llama3-8B', 55 | 'meta-llama/Meta-Llama-3-8B-Instruct-fewshot': 'llama3-8B-fewshot', 56 | 'meta-llama/Meta-Llama-3-8B-Instruct-refined': 'llama3-8B-refined', 57 | }.get(row['qa_embedding_model'], row['qa_embedding_model']) if 'qa_emb' in row['feature_space'] else '', axis=1) 58 | r['subject'] = r['subject'].str.replace('UTS', 'S') 59 | r['qa_questions_version'] = r.apply( 60 | lambda row: row['qa_questions_version'] if 'qa_emb' in row['feature_space'] else 'eng1000', axis=1) 61 | mets = [c for c in r.columns if 'corrs' in c and ( 62 | 'mean' in c or 'frac' in c)] 63 | cols_varied = imodelsx.process_results.get_experiment_keys( 64 | r, experiment_filename) 65 | print('experiment varied these params:', cols_varied) 66 | r['corrs_test_mean_sem'] = r['corrs_test'].apply( 67 | lambda x: np.std(x) / np.sqrt(len(x))) 68 | mets.append('corrs_test_mean_sem') 69 | return r, cols_varied, mets 70 | 71 | 72 | def add_corrs_tune_pc_weighted(r): 73 | if not 'corrs_tune_pc_weighted_mean' in r.columns: 74 | r['corrs_tune_pc_weighted_mean'] = np.nan 75 | for subject in ['S01', 'S02', 'S03']: 76 | pca = load_pca('UT' + subject, pc_components=100) 77 | explained_var_weight = pca.explained_variance_[:100] 78 | explained_var_weight = explained_var_weight / \ 79 | explained_var_weight.sum() * len(explained_var_weight) 80 | 81 | for i, row in r[r.subject == subject].iterrows(): 82 | corrs = row['corrs_tune_pc'] 83 | corrs_weighted = corrs * explained_var_weight 84 | r.loc[i, 'corrs_tune_pc_weighted_mean'] = corrs_weighted.sum() 85 | return r 86 | -------------------------------------------------------------------------------- /notebooks/viz.py: -------------------------------------------------------------------------------- 1 | MODELS_RENAME = { 2 | 'bert-base-uncased': 'BERT (Finetuned)', 3 | 'bert-10__ndel=4fmri': 'BERT+fMRI (Finetuned)', 4 | } 5 | 6 | 7 | def feature_space_rename(x): 8 | FEATURE_SPACE_RENAME = { 9 | 'bert-10': 'BERT', 10 | 'eng1000': 'Eng1000', 11 | 'finetune_roberta-base-10': 'QA-Emb (distill, probabilistic)', 12 | 'finetune_roberta-base_binary-10': 'QA-Emb (distill, binary)', 13 | } 14 | if x in FEATURE_SPACE_RENAME: 15 | return FEATURE_SPACE_RENAME[x] 16 | x = x.replace('-10', '') 17 | x = x.replace('llama2-70B', 'LLaMA-2 (70B)') 18 | x = x.replace('llama2-7B', 'LLaMA-2 (7B)') 19 | x = x.replace('llama3-8B', 'LLaMA-3 (8B)') 20 | x = x.replace('mist-7B', 'Mistral (7B)') 21 | x = x.replace('ensemble1', 'Ensemble') 22 | if '_lay' in x: 23 | x = x.replace('_lay', ' (lay ') + ')' 24 | x = x.replace('(lay 6)', '(lay 06)') 25 | return x 26 | 27 | 28 | def version_rename(x): 29 | if x == 'v1': 30 | return 'Prompts 1-3 (376 questions)' 31 | elif x == 'v2': 32 | return 'Prompts 1-5 (518 questions)' 33 | elif x == 'v3_boostexamples': 34 | return 'Prompts 1-6 (674 questions)' 35 | else: 36 | return x 37 | 38 | 39 | DSETS_RENAME = { 40 | 'tweet_eval': 'Tweet Eval', 41 | 'sst2': 'SST2', 42 | 'rotten_tomatoes': 'Rotten tomatoes', 43 | 'moral_stories': 'Moral stories', 44 | } 45 | 46 | 47 | def dset_rename(x): 48 | if x in DSETS_RENAME: 49 | return DSETS_RENAME[x] 50 | else: 51 | x = x.replace('probing-', '') 52 | x = x.replace('_', ' ') 53 | return x.capitalize() 54 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | # for download data initially 2 | # datalad # note: pip-installing didn't work, had to do sudo apt-get install datalad 3 | 4 | # actual reqs 5 | pathlib 6 | h5py 7 | tables 8 | numpy 9 | pandas 10 | torch 11 | transformers 12 | datasets 13 | scikit-learn 14 | imodels 15 | skorch 16 | nltk 17 | fire 18 | dict_hash 19 | # imodelsx should be installed from source 20 | # improved parallelization 21 | # vllm 22 | # ray 23 | # pydantic==1.10.15 24 | 25 | # basic 26 | tqdm 27 | # ridge_utils -------------------------------------------------------------------------------- /results/results_aggregated_mini.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/csinva/interpretable-embeddings/786ea8195c2236c0275528f2e0beb0fb2dc4e4d8/results/results_aggregated_mini.pkl -------------------------------------------------------------------------------- /scripts/.amltconfig: -------------------------------------------------------------------------------- 1 | { 2 | "project_name": "fmri_sweep", 3 | "storage_account_name": "chansingh", 4 | "container_name": "amulet", 5 | "blob_storage_account_name": "chansingh", 6 | "registry_name": "projects", 7 | "local_path": "/home/chansingh/fmri/scripts", 8 | "default_output_dir": "/home/chansingh/auto-prompt-engineering/scripts/amlt", 9 | "project_uuid": "7338179652.94061-f9cd80fb-8b46-471e-a59b-0e0fe0799d14", 10 | "version": "9.8.4" 11 | } -------------------------------------------------------------------------------- /scripts/02_sub_linear.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | import os 4 | from os.path import dirname, join, expanduser 5 | import sys 6 | from imodelsx import submit_utils 7 | path_to_file = os.path.dirname(os.path.abspath(__file__)) 8 | repo_dir = dirname(dirname(os.path.abspath(__file__))) 9 | sys.path.append(repo_dir) 10 | # python /home/chansingh/fmri/01_fit_encoding.py 11 | MIST7B = 'mistralai/Mistral-7B-Instruct-v0.2' 12 | MIXTMOE = 'mistralai/Mixtral-8x7B-Instruct-v0.1' 13 | LLAMA8B = 'meta-llama/Meta-Llama-3-8B-Instruct' 14 | LLAMA8B_fewshot = 'meta-llama/Meta-Llama-3-8B-Instruct-fewshot' 15 | LLAMA70B_fewshot = 'meta-llama/Meta-Llama-3-70B-Instruct-fewshot2' 16 | # (llama2-70B_lay24-10, 4 delays) 17 | 18 | params_shared_dict = { 19 | # things to average over 20 | 'use_cache': [1], 21 | 'nboots': [5], 22 | 'use_test_setup': [0], 23 | 'encoding_model': ['ridge'], 24 | 'subject': ['UTS03'], 25 | # 'subject': ['UTS02'], 26 | # 'subject': ['UTS01', 'UTS02', 'UTS03'], 27 | # 'distill_model_path': [BEST_RUN], 28 | 'save_dir': ['/home/chansingh/mntv1/deep-fMRI/encoding/results_apr7'], 29 | # 'ndelays': [4, 8, 12], 30 | 'ndelays': [8], 31 | 'pc_components': [100], 32 | 33 | # feature selection... 34 | 'num_stories': [0], # this is used to get shared stories, only u 35 | 'feature_selection_alpha_index': [1], 36 | # 'feature_selection_alpha_index': range(2, 10), 37 | # 'feature_selection_alpha_index': range(3, 11), 38 | 39 | # local 40 | # 'seed': [1], 41 | # 'pc_components': [1000, 100, -1], 42 | 'use_extract_only': [0], 43 | } 44 | 45 | params_coupled_dict = { 46 | ('feature_space', 'qa_questions_version', 'qa_embedding_model'): [ 47 | # new 48 | ('bert-10', 'v1', MIST7B), 49 | ('eng1000', 'v1', MIST7B), # need to rerun sparsity for this... 50 | # run this with num_stories not 0 for old 51 | ('qa_embedder-10', 'v3_boostexamples', 'ensemble1'), 52 | ], 53 | } 54 | # Args list is a list of dictionaries 55 | # If you want to do something special to remove some of these runs, can remove them before calling run_args_list 56 | args_list = submit_utils.get_args_list( 57 | params_shared_dict=params_shared_dict, 58 | params_coupled_dict=params_coupled_dict, 59 | ) 60 | script_name = join(repo_dir, 'experiments', '02_fit_encoding.py') 61 | # amlt_kwargs = { 62 | # # 'amlt_file': join(repo_dir, 'scripts', 'launch_cpu.yaml'), 63 | # # 'sku': 'E4ads_v5', 64 | # # 'mnt_rename': ('/home/chansingh/mntv1', '/mntv1'), 65 | # 'amlt_file': join(repo_dir, 'launch.yaml'), # change this to run a cpu job 66 | # 'sku': '64G2-MI200-xGMI', 67 | # 'mnt_rename': ('/home/chansingh/mntv1', '/mntv1'), 68 | # } 69 | amlt_kwargs = { 70 | 'amlt_file': join(repo_dir, 'scripts', 'launch_cpu.yaml'), 71 | # E4ads_v5 (30 GB), E8ads_v5 (56 GB), E16ads_v5 (120GB), E32ads_v5 (240GB), E64ads_v5 (480 GB) 72 | 'sku': 'E64ads_v5', 73 | # 'sku': 'E32ads_v5', 74 | 'mnt_rename': ('/home/chansingh/mntv1', '/mntv1'), 75 | } 76 | submit_utils.run_args_list( 77 | args_list, 78 | script_name=script_name, 79 | # unique_seeds='seed_stories', 80 | amlt_kwargs=amlt_kwargs, 81 | # n_cpus=9, 82 | # n_cpus=2, 83 | # gpu_ids=[0, 1], 84 | # gpu_ids=[0, 1, 2, 3], 85 | # gpu_ids=[[0, 1], [2, 3]], 86 | # gpu_ids=[[0, 1, 2, 3]], 87 | # actually_run=False, 88 | repeat_failed_jobs=True, 89 | shuffle=True, 90 | cmd_python=f'export HF_TOKEN={open(expanduser("~/.HF_TOKEN"), "r").read().strip()}; python', 91 | ) 92 | -------------------------------------------------------------------------------- /scripts/d3/d3_processed/task_defs.json: -------------------------------------------------------------------------------- 1 | { 2 | "d3_0": "contains irony", 3 | "d3_1": "is a more objective description of what happened", 4 | "d3_2": "contains subjective opinion", 5 | "d3_3": "believes in god", 6 | "d3_4": "is against religion", 7 | "d3_5": "involves a need for people to evacuate", 8 | "d3_6": "describes a situation that involves terrorism", 9 | "d3_7": "involves crime", 10 | "d3_8": "describes a situation where people need shelter", 11 | "d3_9": "is related to food security", 12 | "d3_10": "is related to infrastructure", 13 | "d3_11": "describes a regime change", 14 | "d3_12": "is related to a medical situation", 15 | "d3_13": "involves a situation where people need clean water", 16 | "d3_14": "involves a search/rescue situation", 17 | "d3_15": "expresses need for utility, energy or sanitation", 18 | "d3_16": "is against Hillary", 19 | "d3_17": "supports hillary", 20 | "d3_18": "contains offensive content", 21 | "d3_19": "insult women or immigrants", 22 | "d3_20": "is pro-life", 23 | "d3_21": "supports abortion", 24 | "d3_22": "is about physics", 25 | "d3_23": "is related to computer science", 26 | "d3_24": "is about statistics", 27 | "d3_25": "is about math research", 28 | "d3_26": "is ungrammatical", 29 | "d3_27": "is grammatical", 30 | "d3_28": "is offensive to women", 31 | "d3_29": "supports feminism", 32 | "d3_30": "is about world news", 33 | "d3_31": "is about sports news", 34 | "d3_32": "is related to business", 35 | "d3_33": "is related to technology", 36 | "d3_34": "contains a bad movie review", 37 | "d3_35": "thinks the movie is good", 38 | "d3_36": "asks for a quantity", 39 | "d3_37": "asks about a location", 40 | "d3_38": "asks about a person", 41 | "d3_39": "asks about an entity", 42 | "d3_40": "asks about an abbreviation", 43 | "d3_41": "contains a definition", 44 | "d3_42": "is against environmentalist", 45 | "d3_43": "is environmentalist", 46 | "d3_44": "is a spam", 47 | "d3_45": "asks for factual information", 48 | "d3_46": "asks for an opinion", 49 | "d3_47": "is related to math and science", 50 | "d3_48": "is related to health", 51 | "d3_49": "related to computer or internet", 52 | "d3_50": "is related to sports", 53 | "d3_51": "is about entertainment", 54 | "d3_52": "is about family and relationships", 55 | "d3_53": "is related to politics or government" 56 | } -------------------------------------------------------------------------------- /scripts/d3/out/d3_0_irony.csv: -------------------------------------------------------------------------------- 1 | ,0 2 | 0,task 3 | 1,idx 4 | 2,true_label 5 | 3,pred_label 6 | 4,answer 7 | 5,model 8 | 6,"['d3_0_irony', 377, 0, 0, 'no', 'openai']" 9 | 7,"['d3_0_irony', 377, 0, 0, 'no', 'meta']" 10 | 8,"['d3_0_irony', 377, 0, 0, 'no', 'mistral']" 11 | 9,"['d3_0_irony', 377, 0, 0, 'no', 'gpt4']" 12 | 10,"['d3_0_irony', 14, 1, 1, 'yes', 'meta']" 13 | 11,"['d3_0_irony', 14, 1, 0, 'no', 'openai']" 14 | 12,"['d3_0_irony', 14, 1, 1, 'yes', 'mistral']" 15 | 13,"['d3_0_irony', 14, 1, 1, 'yes', 'gpt4']" 16 | 14,"['d3_0_irony', 162, 1, 1, 'yes', 'openai']" 17 | 15,"['d3_0_irony', 162, 1, 1, 'yes', 'mistral']" 18 | 16,"['d3_0_irony', 162, 1, 1, 'yes', 'meta']" 19 | 17,"['d3_0_irony', 162, 1, 1, 'yes', 'gpt4']" 20 | 18,"['d3_0_irony', 131, 1, 1, 'yes', 'mistral']" 21 | 19,"['d3_0_irony', 131, 1, 1, 'yes', 'meta']" 22 | 20,"['d3_0_irony', 131, 1, 1, 'yes', 'openai']" 23 | 21,"['d3_0_irony', 131, 1, 1, 'yes', 'gpt4']" 24 | 22,"['d3_0_irony', 434, 0, 1, 'yes', 'openai']" 25 | 23,"['d3_0_irony', 434, 0, 1, 'yes', 'meta']" 26 | 24,"['d3_0_irony', 434, 0, 1, 'yes', 'mistral']" 27 | 25,"['d3_0_irony', 434, 0, 1, 'yes', 'gpt4']" 28 | 26,"['d3_0_irony', 399, 0, 0, 'no', 'openai']" 29 | 27,"['d3_0_irony', 399, 0, 0, 'no', 'meta']" 30 | 28,"['d3_0_irony', 399, 0, 0, 'no', 'mistral']" 31 | 29,"['d3_0_irony', 399, 0, 0, 'no', 'gpt4']" 32 | 30,"['d3_0_irony', 526, 0, 1, 'yes', 'meta']" 33 | 31,"['d3_0_irony', 526, 0, 0, 'no', 'openai']" 34 | 32,"['d3_0_irony', 526, 0, 0, 'no', 'mistral']" 35 | 33,"['d3_0_irony', 526, 0, 0, 'no', 'gpt4']" 36 | 34,"['d3_0_irony', 51, 1, 1, 'yes', 'mistral']" 37 | 35,"['d3_0_irony', 51, 1, 1, 'yes', 'meta']" 38 | 36,"['d3_0_irony', 51, 1, 1, 'yes', 'openai']" 39 | 37,"['d3_0_irony', 51, 1, 1, 'yes', 'gpt4']" 40 | 38,"['d3_0_irony', 248, 0, 1, 'yes', 'openai']" 41 | 39,"['d3_0_irony', 248, 0, 1, 'yes', 'meta']" 42 | 40,"['d3_0_irony', 248, 0, 1, 'yes', 'mistral']" 43 | 41,"['d3_0_irony', 248, 0, 1, 'yes', 'gpt4']" 44 | 42,"['d3_0_irony', 17, 1, 1, 'yes', 'mistral']" 45 | 43,"['d3_0_irony', 17, 1, 1, 'yes', 'meta']" 46 | 44,"['d3_0_irony', 17, 1, 1, 'yes', 'openai']" 47 | 45,"['d3_0_irony', 17, 1, 1, 'yes', 'gpt4']" 48 | 46,"['d3_0_irony', 128, 1, 1, 'yes', 'mistral']" 49 | 47,"['d3_0_irony', 128, 1, 1, 'yes', 'meta']" 50 | 48,"['d3_0_irony', 128, 1, 1, 'yes', 'openai']" 51 | 49,"['d3_0_irony', 128, 1, 1, 'yes', 'gpt4']" 52 | 50,"['d3_0_irony', 298, 0, 0, 'no', 'openai']" 53 | 51,"['d3_0_irony', 298, 0, 0, 'no', 'mistral']" 54 | 52,"['d3_0_irony', 298, 0, 0, 'no', 'meta']" 55 | 53,"['d3_0_irony', 298, 0, 0, 'no', 'gpt4']" 56 | 54,"['d3_0_irony', 15, 1, 1, 'yes', 'openai']" 57 | 55,"['d3_0_irony', 15, 1, 1, 'yes', 'meta']" 58 | 56,"['d3_0_irony', 15, 1, 1, 'yes', 'mistral']" 59 | 57,"['d3_0_irony', 15, 1, 1, 'yes', 'gpt4']" 60 | 58,"['d3_0_irony', 117, 1, 1, 'yes', 'mistral']" 61 | 59,"['d3_0_irony', 117, 1, 1, 'yes', 'meta']" 62 | 60,"['d3_0_irony', 117, 1, 1, 'yes', 'openai']" 63 | 61,"['d3_0_irony', 117, 1, 1, 'yes', 'gpt4']" 64 | 62,"['d3_0_irony', 383, 0, 1, 'yes', 'meta']" 65 | 63,"['d3_0_irony', 383, 0, 0, 'no', 'openai']" 66 | 64,"['d3_0_irony', 383, 0, 0, 'no', 'mistral']" 67 | 65,"['d3_0_irony', 383, 0, 0, 'no', 'gpt4']" 68 | 66,"['d3_0_irony', 321, 0, 0, 'no', 'openai']" 69 | 67,"['d3_0_irony', 321, 0, 0, 'no', 'meta']" 70 | 68,"['d3_0_irony', 321, 0, 0, 'no', 'mistral']" 71 | 69,"['d3_0_irony', 321, 0, 0, 'no', 'gpt4']" 72 | 70,"['d3_0_irony', 130, 1, 1, 'yes', 'openai']" 73 | 71,"['d3_0_irony', 130, 1, 1, 'yes', 'meta']" 74 | 72,"['d3_0_irony', 130, 1, 1, 'yes', 'mistral']" 75 | 73,"['d3_0_irony', 130, 1, 1, 'yes', 'gpt4']" 76 | 74,"['d3_0_irony', 347, 0, 0, 'no', 'mistral']" 77 | 75,"['d3_0_irony', 347, 0, 1, 'yes', 'meta']" 78 | 76,"['d3_0_irony', 347, 0, 0, 'no', 'openai']" 79 | 77,"['d3_0_irony', 347, 0, 0, 'no', 'gpt4']" 80 | 78,"['d3_0_irony', 477, 0, 0, 'no', 'openai']" 81 | 79,"['d3_0_irony', 477, 0, 0, 'no', 'meta']" 82 | 80,"['d3_0_irony', 477, 0, 0, 'no', 'mistral']" 83 | 81,"['d3_0_irony', 477, 0, 0, 'no', 'gpt4']" 84 | 82,"['d3_0_irony', 3, 1, 1, 'yes', 'mistral']" 85 | 83,"['d3_0_irony', 3, 1, 1, 'yes', 'meta']" 86 | 84,"['d3_0_irony', 3, 1, 0, 'no', 'openai']" 87 | 85,"['d3_0_irony', 3, 1, 1, 'yes', 'gpt4']" 88 | -------------------------------------------------------------------------------- /scripts/d3/out/d3_13_water.csv: -------------------------------------------------------------------------------- 1 | ,0 2 | 0,task 3 | 1,idx 4 | 2,true_label 5 | 3,pred_label 6 | 4,answer 7 | 5,model 8 | 6,"['d3_13_water', 1674, 0, 0, 'no', 'openai']" 9 | 7,"['d3_13_water', 1674, 0, 0, 'no', 'meta']" 10 | 8,"['d3_13_water', 1674, 0, 0, 'no', 'mistral']" 11 | 9,"['d3_13_water', 1674, 0, 0, 'no', 'gpt4']" 12 | 10,"['d3_13_water', 65, 1, 0, 'no', 'openai']" 13 | 11,"['d3_13_water', 65, 1, 0, 'no', 'mistral']" 14 | 12,"['d3_13_water', 65, 1, 0, 'no', 'meta']" 15 | 13,"['d3_13_water', 65, 1, 0, 'no', 'gpt4']" 16 | 14,"['d3_13_water', 720, 0, 0, 'no', 'mistral']" 17 | 15,"['d3_13_water', 720, 0, 0, 'no', 'meta']" 18 | 16,"['d3_13_water', 720, 0, 0, 'no', 'openai']" 19 | 17,"['d3_13_water', 720, 0, 0, 'no', 'gpt4']" 20 | 18,"['d3_13_water', 584, 0, 0, 'no', 'mistral']" 21 | 19,"['d3_13_water', 584, 0, 0, 'no', 'meta']" 22 | 20,"['d3_13_water', 584, 0, 0, 'no', 'openai']" 23 | 21,"['d3_13_water', 584, 0, 0, 'no', 'gpt4']" 24 | 22,"['d3_13_water', 1928, 0, 0, 'no', 'openai']" 25 | 23,"['d3_13_water', 1928, 0, 0, 'no', 'mistral']" 26 | 24,"['d3_13_water', 1928, 0, 0, 'no', 'meta']" 27 | 25,"['d3_13_water', 1928, 0, 0, 'no', 'gpt4']" 28 | 26,"['d3_13_water', 1772, 0, 0, 'no', 'mistral']" 29 | 27,"['d3_13_water', 1772, 0, 0, 'no', 'meta']" 30 | 28,"['d3_13_water', 1772, 0, 0, 'no', 'openai']" 31 | 29,"['d3_13_water', 1772, 0, 0, 'no', 'gpt4']" 32 | 30,"['d3_13_water', 2336, 0, 0, 'no', 'openai']" 33 | 31,"['d3_13_water', 2336, 0, 0, 'no', 'meta']" 34 | 32,"['d3_13_water', 2336, 0, 0, 'no', 'mistral']" 35 | 33,"['d3_13_water', 2336, 0, 0, 'no', 'gpt4']" 36 | 34,"['d3_13_water', 227, 0, 0, 'no', 'mistral']" 37 | 35,"['d3_13_water', 227, 0, 0, 'no', 'meta']" 38 | 36,"['d3_13_water', 227, 0, 0, 'no', 'openai']" 39 | 37,"['d3_13_water', 227, 0, 0, 'no', 'gpt4']" 40 | 38,"['d3_13_water', 1105, 0, 1, 'yes', 'meta']" 41 | 39,"['d3_13_water', 1105, 0, 1, 'yes', 'openai']" 42 | 40,"['d3_13_water', 1105, 0, 1, 'yes', 'mistral']" 43 | 41,"['d3_13_water', 1105, 0, 1, 'yes', 'gpt4']" 44 | 42,"['d3_13_water', 78, 1, 1, 'yes', 'openai']" 45 | 43,"['d3_13_water', 78, 1, 1, 'yes', 'meta']" 46 | 44,"['d3_13_water', 78, 1, 1, 'yes', 'mistral']" 47 | 45,"['d3_13_water', 78, 1, 1, 'yes', 'gpt4']" 48 | 46,"['d3_13_water', 572, 0, 0, 'no', 'mistral']" 49 | 47,"['d3_13_water', 572, 0, 0, 'no', 'meta']" 50 | 48,"['d3_13_water', 572, 0, 0, 'no', 'openai']" 51 | 49,"['d3_13_water', 572, 0, 0, 'no', 'gpt4']" 52 | 50,"['d3_13_water', 1323, 0, 0, 'no', 'mistral']" 53 | 51,"['d3_13_water', 1323, 0, 0, 'no', 'meta']" 54 | 52,"['d3_13_water', 1323, 0, 0, 'no', 'openai']" 55 | 53,"['d3_13_water', 1323, 0, 0, 'no', 'gpt4']" 56 | 54,"['d3_13_water', 69, 1, 1, 'yes', 'meta']" 57 | 55,"['d3_13_water', 69, 1, 0, 'no', 'openai']" 58 | 56,"['d3_13_water', 69, 1, 1, 'yes', 'mistral']" 59 | 57,"['d3_13_water', 69, 1, 0, 'no', 'gpt4']" 60 | 58,"['d3_13_water', 520, 0, 0, 'no', 'openai']" 61 | 59,"['d3_13_water', 520, 0, 0, 'no', 'mistral']" 62 | 60,"['d3_13_water', 520, 0, 0, 'no', 'meta']" 63 | 61,"['d3_13_water', 520, 0, 0, 'no', 'gpt4']" 64 | 62,"['d3_13_water', 1702, 0, 0, 'no', 'mistral']" 65 | 63,"['d3_13_water', 1702, 0, 0, 'no', 'meta']" 66 | 64,"['d3_13_water', 1702, 0, 0, 'no', 'openai']" 67 | 65,"['d3_13_water', 1702, 0, 0, 'no', 'gpt4']" 68 | 66,"['d3_13_water', 1427, 0, 0, 'no', 'mistral']" 69 | 67,"['d3_13_water', 1427, 0, 0, 'no', 'meta']" 70 | 68,"['d3_13_water', 1427, 0, 0, 'no', 'openai']" 71 | 69,"['d3_13_water', 1427, 0, 0, 'no', 'gpt4']" 72 | 70,"['d3_13_water', 577, 0, 0, 'no', 'openai']" 73 | 71,"['d3_13_water', 577, 0, 1, 'yes', 'meta']" 74 | 72,"['d3_13_water', 577, 0, 1, 'yes', 'mistral']" 75 | 73,"['d3_13_water', 577, 0, 0, 'no', 'gpt4']" 76 | 74,"['d3_13_water', 1543, 0, 0, 'no', 'meta']" 77 | 75,"['d3_13_water', 1543, 0, 0, 'no', 'openai']" 78 | 76,"['d3_13_water', 1543, 0, 0, 'no', 'mistral']" 79 | 77,"['d3_13_water', 1543, 0, 0, 'no', 'gpt4']" 80 | 78,"['d3_13_water', 2119, 0, 0, 'no', 'openai']" 81 | 79,"['d3_13_water', 2119, 0, 0, 'no', 'meta']" 82 | 80,"['d3_13_water', 2119, 0, 0, 'no', 'mistral']" 83 | 81,"['d3_13_water', 2119, 0, 0, 'no', 'gpt4']" 84 | 82,"['d3_13_water', 17, 1, 0, 'no', 'openai']" 85 | 83,"['d3_13_water', 17, 1, 0, 'no', 'meta']" 86 | 84,"['d3_13_water', 17, 1, 0, 'no', 'mistral']" 87 | 85,"['d3_13_water', 17, 1, 1, 'yes', 'gpt4']" 88 | -------------------------------------------------------------------------------- /scripts/d3/out/d3_14_search.csv: -------------------------------------------------------------------------------- 1 | ,0 2 | 0,task 3 | 1,idx 4 | 2,true_label 5 | 3,pred_label 6 | 4,answer 7 | 5,model 8 | 6,"['d3_14_search', 1680, 0, 1, 'yes', 'mistral']" 9 | 7,"['d3_14_search', 1680, 0, 1, 'yes', 'meta']" 10 | 8,"['d3_14_search', 1680, 0, 0, 'no', 'openai']" 11 | 9,"['d3_14_search', 1680, 0, 1, 'yes', 'gpt4']" 12 | 10,"['d3_14_search', 65, 1, 1, 'yes', 'meta']" 13 | 11,"['d3_14_search', 65, 1, 1, 'yes', 'openai']" 14 | 12,"['d3_14_search', 65, 1, 1, 'yes', 'mistral']" 15 | 13,"['d3_14_search', 65, 1, 1, 'yes', 'gpt4']" 16 | 14,"['d3_14_search', 722, 0, 0, 'no', 'mistral']" 17 | 15,"['d3_14_search', 722, 0, 1, 'yes', 'meta']" 18 | 16,"['d3_14_search', 722, 0, 0, 'no', 'openai']" 19 | 17,"['d3_14_search', 722, 0, 0, 'no', 'gpt4']" 20 | 18,"['d3_14_search', 586, 0, 0, 'no', 'openai']" 21 | 19,"['d3_14_search', 586, 0, 0, 'no', 'meta']" 22 | 20,"['d3_14_search', 586, 0, 1, 'yes', 'mistral']" 23 | 21,"['d3_14_search', 586, 0, 1, 'yes', 'gpt4']" 24 | 22,"['d3_14_search', 1935, 0, 0, 'no', 'mistral']" 25 | 23,"['d3_14_search', 1935, 0, 0, 'no', 'meta']" 26 | 24,"['d3_14_search', 1935, 0, 0, 'no', 'openai']" 27 | 25,"['d3_14_search', 1935, 0, 0, 'no', 'gpt4']" 28 | 26,"['d3_14_search', 1778, 0, 0, 'no', 'openai']" 29 | 27,"['d3_14_search', 1778, 0, 0, 'no', 'meta']" 30 | 28,"['d3_14_search', 1778, 0, 1, 'yes', 'mistral']" 31 | 29,"['d3_14_search', 1778, 0, 0, 'no', 'gpt4']" 32 | 30,"['d3_14_search', 2344, 0, 1, 'yes', 'openai']" 33 | 31,"['d3_14_search', 2344, 0, 1, 'yes', 'meta']" 34 | 32,"['d3_14_search', 2344, 0, 1, 'yes', 'mistral']" 35 | 33,"['d3_14_search', 2344, 0, 1, 'yes', 'gpt4']" 36 | 34,"['d3_14_search', 228, 0, 1, 'yes', 'openai']" 37 | 35,"['d3_14_search', 228, 0, 1, 'yes', 'mistral']" 38 | 36,"['d3_14_search', 228, 0, 1, 'yes', 'meta']" 39 | 37,"['d3_14_search', 228, 0, 1, 'yes', 'gpt4']" 40 | 38,"['d3_14_search', 1108, 0, 1, 'yes', 'mistral']" 41 | 39,"['d3_14_search', 1108, 0, 1, 'yes', 'meta']" 42 | 40,"['d3_14_search', 1108, 0, 1, 'yes', 'openai']" 43 | 41,"['d3_14_search', 1108, 0, 1, 'yes', 'gpt4']" 44 | 42,"['d3_14_search', 78, 1, 0, 'no', 'openai']" 45 | 43,"['d3_14_search', 78, 1, 0, 'no', 'mistral']" 46 | 44,"['d3_14_search', 78, 1, 0, 'no', 'meta']" 47 | 45,"['d3_14_search', 78, 1, 0, 'no', 'gpt4']" 48 | 46,"['d3_14_search', 574, 0, 0, 'no', 'openai']" 49 | 47,"['d3_14_search', 574, 0, 0, 'no', 'meta']" 50 | 48,"['d3_14_search', 574, 0, 0, 'no', 'mistral']" 51 | 49,"['d3_14_search', 574, 0, 0, 'no', 'gpt4']" 52 | 50,"['d3_14_search', 1328, 0, 0, 'no', 'openai']" 53 | 51,"['d3_14_search', 1328, 0, 0, 'no', 'meta']" 54 | 52,"['d3_14_search', 1328, 0, 0, 'no', 'mistral']" 55 | 53,"['d3_14_search', 1328, 0, 0, 'no', 'gpt4']" 56 | 54,"['d3_14_search', 69, 1, 1, 'yes', 'openai']" 57 | 55,"['d3_14_search', 69, 1, 1, 'yes', 'meta']" 58 | 56,"['d3_14_search', 69, 1, 1, 'yes', 'mistral']" 59 | 57,"['d3_14_search', 69, 1, 1, 'yes', 'gpt4']" 60 | 58,"['d3_14_search', 522, 0, 0, 'no', 'openai']" 61 | 59,"['d3_14_search', 522, 0, 0, 'no', 'meta']" 62 | 60,"['d3_14_search', 522, 0, 0, 'no', 'mistral']" 63 | 61,"['d3_14_search', 522, 0, 1, 'yes', 'gpt4']" 64 | 62,"['d3_14_search', 1707, 0, 1, 'yes', 'mistral']" 65 | 63,"['d3_14_search', 1707, 0, 1, 'yes', 'meta']" 66 | 64,"['d3_14_search', 1707, 0, 1, 'yes', 'openai']" 67 | 65,"['d3_14_search', 1707, 0, 1, 'yes', 'gpt4']" 68 | 66,"['d3_14_search', 1432, 0, 1, 'yes', 'openai']" 69 | 67,"['d3_14_search', 1432, 0, 1, 'yes', 'meta']" 70 | 68,"['d3_14_search', 1432, 0, 1, 'yes', 'mistral']" 71 | 69,"['d3_14_search', 1432, 0, 1, 'yes', 'gpt4']" 72 | 70,"['d3_14_search', 579, 0, 0, 'no', 'openai']" 73 | 71,"['d3_14_search', 579, 0, 1, 'yes', 'meta']" 74 | 72,"['d3_14_search', 579, 0, 1, 'yes', 'mistral']" 75 | 73,"['d3_14_search', 579, 0, 1, 'yes', 'gpt4']" 76 | 74,"['d3_14_search', 1548, 0, 0, 'no', 'openai']" 77 | 75,"['d3_14_search', 1548, 0, 1, 'yes', 'meta']" 78 | 76,"['d3_14_search', 1548, 0, 1, 'yes', 'mistral']" 79 | 77,"['d3_14_search', 1548, 0, 1, 'yes', 'gpt4']" 80 | 78,"['d3_14_search', 2127, 0, 0, 'no', 'openai']" 81 | 79,"['d3_14_search', 2127, 0, 0, 'no', 'mistral']" 82 | 80,"['d3_14_search', 2127, 0, 0, 'no', 'meta']" 83 | 81,"['d3_14_search', 2127, 0, 0, 'no', 'gpt4']" 84 | 82,"['d3_14_search', 17, 1, 1, 'yes', 'openai']" 85 | 83,"['d3_14_search', 17, 1, 1, 'yes', 'mistral']" 86 | 84,"['d3_14_search', 17, 1, 1, 'yes', 'meta']" 87 | 85,"['d3_14_search', 17, 1, 1, 'yes', 'gpt4']" 88 | -------------------------------------------------------------------------------- /scripts/d3/out/d3_15_utility.csv: -------------------------------------------------------------------------------- 1 | ,0 2 | 0,task 3 | 1,idx 4 | 2,true_label 5 | 3,pred_label 6 | 4,answer 7 | 5,model 8 | 6,"['d3_15_utility', 1688, 0, 1, 'yes', 'mistral']" 9 | 7,"['d3_15_utility', 1688, 0, 1, 'yes', 'meta']" 10 | 8,"['d3_15_utility', 1688, 0, 1, 'yes', 'openai']" 11 | 9,"['d3_15_utility', 1688, 0, 1, 'yes', 'gpt4']" 12 | 10,"['d3_15_utility', 66, 1, 1, 'yes', 'mistral']" 13 | 11,"['d3_15_utility', 66, 1, 1, 'yes', 'meta']" 14 | 12,"['d3_15_utility', 66, 1, 1, 'yes', 'openai']" 15 | 13,"['d3_15_utility', 66, 1, 1, 'yes', 'gpt4']" 16 | 14,"['d3_15_utility', 726, 0, 0, 'no', 'mistral']" 17 | 15,"['d3_15_utility', 726, 0, 0, 'no', 'meta']" 18 | 16,"['d3_15_utility', 726, 0, 0, 'no', 'openai']" 19 | 17,"['d3_15_utility', 726, 0, 0, 'no', 'gpt4']" 20 | 18,"['d3_15_utility', 589, 0, 0, 'no', 'openai']" 21 | 19,"['d3_15_utility', 589, 0, 0, 'no', 'mistral']" 22 | 20,"['d3_15_utility', 589, 0, 0, 'no', 'meta']" 23 | 21,"['d3_15_utility', 589, 0, 0, 'no', 'gpt4']" 24 | 22,"['d3_15_utility', 1944, 0, 0, 'no', 'mistral']" 25 | 23,"['d3_15_utility', 1944, 0, 1, 'yes', 'meta']" 26 | 24,"['d3_15_utility', 1944, 0, 0, 'no', 'openai']" 27 | 25,"['d3_15_utility', 1944, 0, 1, 'yes', 'gpt4']" 28 | 26,"['d3_15_utility', 1786, 0, 0, 'no', 'mistral']" 29 | 27,"['d3_15_utility', 1786, 0, 0, 'no', 'meta']" 30 | 28,"['d3_15_utility', 1786, 0, 0, 'no', 'openai']" 31 | 29,"['d3_15_utility', 1786, 0, 0, 'no', 'gpt4']" 32 | 30,"['d3_15_utility', 2355, 0, 1, 'yes', 'openai']" 33 | 31,"['d3_15_utility', 2355, 0, 1, 'yes', 'meta']" 34 | 32,"['d3_15_utility', 2355, 0, 0, 'no', 'mistral']" 35 | 33,"['d3_15_utility', 2355, 0, 0, 'no', 'gpt4']" 36 | 34,"['d3_15_utility', 229, 0, 0, 'no', 'mistral']" 37 | 35,"['d3_15_utility', 229, 0, 1, 'yes', 'meta']" 38 | 36,"['d3_15_utility', 229, 0, 0, 'no', 'openai']" 39 | 37,"['d3_15_utility', 229, 0, 0, 'no', 'gpt4']" 40 | 38,"['d3_15_utility', 1113, 0, 1, 'yes', 'openai']" 41 | 39,"['d3_15_utility', 1113, 0, 0, 'no', 'meta']" 42 | 40,"['d3_15_utility', 1113, 0, 1, 'yes', 'mistral']" 43 | 41,"['d3_15_utility', 1113, 0, 0, 'no', 'gpt4']" 44 | 42,"['d3_15_utility', 78, 1, 0, 'no', 'openai']" 45 | 43,"['d3_15_utility', 78, 1, 0, 'no', 'meta']" 46 | 44,"['d3_15_utility', 78, 1, 0, 'no', 'mistral']" 47 | 45,"['d3_15_utility', 78, 1, 0, 'no', 'gpt4']" 48 | 46,"['d3_15_utility', 577, 0, 0, 'no', 'mistral']" 49 | 47,"['d3_15_utility', 577, 0, 0, 'no', 'meta']" 50 | 48,"['d3_15_utility', 577, 0, 0, 'no', 'openai']" 51 | 49,"['d3_15_utility', 577, 0, 0, 'no', 'gpt4']" 52 | 50,"['d3_15_utility', 1334, 0, 0, 'no', 'mistral']" 53 | 51,"['d3_15_utility', 1334, 0, 0, 'no', 'meta']" 54 | 52,"['d3_15_utility', 1334, 0, 0, 'no', 'openai']" 55 | 53,"['d3_15_utility', 1334, 0, 0, 'no', 'gpt4']" 56 | 54,"['d3_15_utility', 70, 1, 1, 'yes', 'mistral']" 57 | 55,"['d3_15_utility', 70, 1, 1, 'yes', 'meta']" 58 | 56,"['d3_15_utility', 70, 1, 1, 'yes', 'openai']" 59 | 57,"['d3_15_utility', 70, 1, 1, 'yes', 'gpt4']" 60 | 58,"['d3_15_utility', 524, 0, 1, 'yes', 'openai']" 61 | 59,"['d3_15_utility', 524, 0, 1, 'yes', 'meta']" 62 | 60,"['d3_15_utility', 524, 0, 1, 'yes', 'mistral']" 63 | 61,"['d3_15_utility', 524, 0, 1, 'yes', 'gpt4']" 64 | 62,"['d3_15_utility', 1715, 0, 1, 'yes', 'meta']" 65 | 63,"['d3_15_utility', 1715, 0, 1, 'yes', 'openai']" 66 | 64,"['d3_15_utility', 1715, 0, 1, 'yes', 'mistral']" 67 | 65,"['d3_15_utility', 1715, 0, 1, 'yes', 'gpt4']" 68 | 66,"['d3_15_utility', 1438, 0, 1, 'yes', 'openai']" 69 | 67,"['d3_15_utility', 1438, 0, 1, 'yes', 'mistral']" 70 | 68,"['d3_15_utility', 1438, 0, 1, 'yes', 'meta']" 71 | 69,"['d3_15_utility', 1438, 0, 0, 'no', 'gpt4']" 72 | 70,"['d3_15_utility', 581, 0, 0, 'no', 'openai']" 73 | 71,"['d3_15_utility', 581, 0, 0, 'no', 'meta']" 74 | 72,"['d3_15_utility', 581, 0, 0, 'no', 'mistral']" 75 | 73,"['d3_15_utility', 581, 0, 0, 'no', 'gpt4']" 76 | 74,"['d3_15_utility', 1555, 0, 0, 'no', 'mistral']" 77 | 75,"['d3_15_utility', 1555, 0, 0, 'no', 'meta']" 78 | 76,"['d3_15_utility', 1555, 0, 0, 'no', 'openai']" 79 | 77,"['d3_15_utility', 1555, 0, 0, 'no', 'gpt4']" 80 | 78,"['d3_15_utility', 2136, 0, 0, 'no', 'mistral']" 81 | 79,"['d3_15_utility', 2136, 0, 0, 'no', 'meta']" 82 | 80,"['d3_15_utility', 2136, 0, 0, 'no', 'openai']" 83 | 81,"['d3_15_utility', 2136, 0, 0, 'no', 'gpt4']" 84 | 82,"['d3_15_utility', 17, 1, 0, 'no', 'mistral']" 85 | 83,"['d3_15_utility', 17, 1, 0, 'no', 'meta']" 86 | 84,"['d3_15_utility', 17, 1, 0, 'no', 'openai']" 87 | 85,"['d3_15_utility', 17, 1, 0, 'no', 'gpt4']" 88 | -------------------------------------------------------------------------------- /scripts/d3/out/d3_16_hillary.csv: -------------------------------------------------------------------------------- 1 | ,0 2 | 0,task 3 | 1,idx 4 | 2,true_label 5 | 3,pred_label 6 | 4,answer 7 | 5,model 8 | 6,"['d3_16_hillary', 143, 0, 1, 'yes', 'openai']" 9 | 7,"['d3_16_hillary', 143, 0, 0, 'no', 'meta']" 10 | 8,"['d3_16_hillary', 143, 0, 0, 'no', 'mistral']" 11 | 9,"['d3_16_hillary', 143, 0, 0, 'no', 'gpt4']" 12 | 10,"['d3_16_hillary', 5, 1, 1, 'yes', 'mistral']" 13 | 11,"['d3_16_hillary', 5, 1, 1, 'yes', 'meta']" 14 | 12,"['d3_16_hillary', 5, 1, 1, 'yes', 'openai']" 15 | 13,"['d3_16_hillary', 5, 1, 1, 'yes', 'gpt4']" 16 | 14,"['d3_16_hillary', 61, 1, 1, 'yes', 'openai']" 17 | 15,"['d3_16_hillary', 61, 1, 1, 'yes', 'meta']" 18 | 16,"['d3_16_hillary', 61, 1, 1, 'yes', 'mistral']" 19 | 17,"['d3_16_hillary', 61, 1, 1, 'yes', 'gpt4']" 20 | 18,"['d3_16_hillary', 49, 1, 1, 'yes', 'openai']" 21 | 19,"['d3_16_hillary', 49, 1, 1, 'yes', 'meta']" 22 | 20,"['d3_16_hillary', 49, 1, 1, 'yes', 'mistral']" 23 | 21,"['d3_16_hillary', 49, 1, 1, 'yes', 'gpt4']" 24 | 22,"['d3_16_hillary', 164, 0, 0, 'no', 'openai']" 25 | 23,"['d3_16_hillary', 164, 0, 0, 'no', 'meta']" 26 | 24,"['d3_16_hillary', 164, 0, 0, 'no', 'mistral']" 27 | 25,"['d3_16_hillary', 164, 0, 0, 'no', 'gpt4']" 28 | 26,"['d3_16_hillary', 151, 0, 0, 'no', 'mistral']" 29 | 27,"['d3_16_hillary', 151, 0, 0, 'no', 'meta']" 30 | 28,"['d3_16_hillary', 151, 0, 0, 'no', 'openai']" 31 | 29,"['d3_16_hillary', 151, 0, 0, 'no', 'gpt4']" 32 | 30,"['d3_16_hillary', 199, 0, 1, 'yes', 'mistral']" 33 | 31,"['d3_16_hillary', 199, 0, 1, 'yes', 'meta']" 34 | 32,"['d3_16_hillary', 199, 0, 1, 'yes', 'openai']" 35 | 33,"['d3_16_hillary', 199, 0, 1, 'yes', 'gpt4']" 36 | 34,"['d3_16_hillary', 19, 1, 1, 'yes', 'openai']" 37 | 35,"['d3_16_hillary', 19, 1, 1, 'yes', 'mistral']" 38 | 36,"['d3_16_hillary', 19, 1, 1, 'yes', 'meta']" 39 | 37,"['d3_16_hillary', 19, 1, 1, 'yes', 'gpt4']" 40 | 38,"['d3_16_hillary', 94, 1, 1, 'yes', 'meta']" 41 | 39,"['d3_16_hillary', 94, 1, 0, 'no', 'openai']" 42 | 40,"['d3_16_hillary', 94, 1, 1, 'yes', 'mistral']" 43 | 41,"['d3_16_hillary', 94, 1, 0, 'no', 'gpt4']" 44 | 42,"['d3_16_hillary', 6, 1, 1, 'yes', 'openai']" 45 | 43,"['d3_16_hillary', 6, 1, 1, 'yes', 'mistral']" 46 | 44,"['d3_16_hillary', 6, 1, 1, 'yes', 'meta']" 47 | 45,"['d3_16_hillary', 6, 1, 1, 'yes', 'gpt4']" 48 | 46,"['d3_16_hillary', 48, 1, 1, 'yes', 'openai']" 49 | 47,"['d3_16_hillary', 48, 1, 1, 'yes', 'mistral']" 50 | 48,"['d3_16_hillary', 48, 1, 1, 'yes', 'meta']" 51 | 49,"['d3_16_hillary', 48, 1, 1, 'yes', 'gpt4']" 52 | 50,"['d3_16_hillary', 113, 1, 1, 'yes', 'openai']" 53 | 51,"['d3_16_hillary', 113, 1, 0, 'no', 'meta']" 54 | 52,"['d3_16_hillary', 113, 1, 0, 'no', 'mistral']" 55 | 53,"['d3_16_hillary', 113, 1, 0, 'no', 'gpt4']" 56 | 54,"['d3_16_hillary', 5, 1, 1, 'yes', 'meta']" 57 | 55,"['d3_16_hillary', 5, 1, 1, 'yes', 'openai']" 58 | 56,"['d3_16_hillary', 5, 1, 1, 'yes', 'mistral']" 59 | 57,"['d3_16_hillary', 5, 1, 1, 'yes', 'gpt4']" 60 | 58,"['d3_16_hillary', 44, 1, 1, 'yes', 'openai']" 61 | 59,"['d3_16_hillary', 44, 1, 1, 'yes', 'meta']" 62 | 60,"['d3_16_hillary', 44, 1, 1, 'yes', 'mistral']" 63 | 61,"['d3_16_hillary', 44, 1, 1, 'yes', 'gpt4']" 64 | 62,"['d3_16_hillary', 145, 0, 0, 'no', 'meta']" 65 | 63,"['d3_16_hillary', 145, 0, 0, 'no', 'openai']" 66 | 64,"['d3_16_hillary', 145, 0, 0, 'no', 'mistral']" 67 | 65,"['d3_16_hillary', 145, 0, 0, 'no', 'gpt4']" 68 | 66,"['d3_16_hillary', 122, 1, 0, 'no', 'mistral']" 69 | 67,"['d3_16_hillary', 122, 1, 0, 'no', 'meta']" 70 | 68,"['d3_16_hillary', 122, 1, 1, 'yes', 'openai']" 71 | 69,"['d3_16_hillary', 122, 1, 0, 'no', 'gpt4']" 72 | 70,"['d3_16_hillary', 49, 1, 1, 'yes', 'mistral']" 73 | 71,"['d3_16_hillary', 49, 1, 1, 'yes', 'meta']" 74 | 72,"['d3_16_hillary', 49, 1, 1, 'yes', 'openai']" 75 | 73,"['d3_16_hillary', 49, 1, 1, 'yes', 'gpt4']" 76 | 74,"['d3_16_hillary', 131, 1, 1, 'yes', 'openai']" 77 | 75,"['d3_16_hillary', 131, 1, 0, 'no', 'meta']" 78 | 76,"['d3_16_hillary', 131, 1, 0, 'no', 'mistral']" 79 | 77,"['d3_16_hillary', 131, 1, 1, 'yes', 'gpt4']" 80 | 78,"['d3_16_hillary', 181, 0, 0, 'no', 'mistral']" 81 | 79,"['d3_16_hillary', 181, 0, 0, 'no', 'meta']" 82 | 80,"['d3_16_hillary', 181, 0, 0, 'no', 'openai']" 83 | 81,"['d3_16_hillary', 181, 0, 0, 'no', 'gpt4']" 84 | 82,"['d3_16_hillary', 1, 1, 1, 'yes', 'mistral']" 85 | 83,"['d3_16_hillary', 1, 1, 1, 'yes', 'meta']" 86 | 84,"['d3_16_hillary', 1, 1, 1, 'yes', 'openai']" 87 | 85,"['d3_16_hillary', 1, 1, 1, 'yes', 'gpt4']" 88 | -------------------------------------------------------------------------------- /scripts/d3/out/d3_17_hillary.csv: -------------------------------------------------------------------------------- 1 | ,0 2 | 0,task 3 | 1,idx 4 | 2,true_label 5 | 3,pred_label 6 | 4,answer 7 | 5,model 8 | 6,"['d3_17_hillary', 139, 0, 0, 'no', 'mistral']" 9 | 7,"['d3_17_hillary', 139, 0, 0, 'no', 'meta']" 10 | 8,"['d3_17_hillary', 139, 0, 0, 'no', 'openai']" 11 | 9,"['d3_17_hillary', 139, 0, 0, 'no', 'gpt4']" 12 | 10,"['d3_17_hillary', 5, 1, 1, 'yes', 'mistral']" 13 | 11,"['d3_17_hillary', 5, 1, 1, 'yes', 'meta']" 14 | 12,"['d3_17_hillary', 5, 1, 1, 'yes', 'openai']" 15 | 13,"['d3_17_hillary', 5, 1, 1, 'yes', 'gpt4']" 16 | 14,"['d3_17_hillary', 59, 0, 0, 'no', 'meta']" 17 | 15,"['d3_17_hillary', 59, 0, 0, 'no', 'openai']" 18 | 16,"['d3_17_hillary', 59, 0, 0, 'no', 'mistral']" 19 | 17,"['d3_17_hillary', 59, 0, 0, 'no', 'gpt4']" 20 | 18,"['d3_17_hillary', 48, 0, 0, 'no', 'meta']" 21 | 19,"['d3_17_hillary', 48, 0, 0, 'no', 'openai']" 22 | 20,"['d3_17_hillary', 48, 0, 0, 'no', 'mistral']" 23 | 21,"['d3_17_hillary', 48, 0, 0, 'no', 'gpt4']" 24 | 22,"['d3_17_hillary', 160, 0, 0, 'no', 'mistral']" 25 | 23,"['d3_17_hillary', 160, 0, 0, 'no', 'meta']" 26 | 24,"['d3_17_hillary', 160, 0, 0, 'no', 'openai']" 27 | 25,"['d3_17_hillary', 160, 0, 0, 'no', 'gpt4']" 28 | 26,"['d3_17_hillary', 147, 0, 0, 'no', 'openai']" 29 | 27,"['d3_17_hillary', 147, 0, 0, 'no', 'mistral']" 30 | 28,"['d3_17_hillary', 147, 0, 0, 'no', 'meta']" 31 | 29,"['d3_17_hillary', 147, 0, 0, 'no', 'gpt4']" 32 | 30,"['d3_17_hillary', 194, 0, 0, 'no', 'openai']" 33 | 31,"['d3_17_hillary', 194, 0, 0, 'no', 'meta']" 34 | 32,"['d3_17_hillary', 194, 0, 0, 'no', 'mistral']" 35 | 33,"['d3_17_hillary', 194, 0, 0, 'no', 'gpt4']" 36 | 34,"['d3_17_hillary', 18, 1, 1, 'yes', 'openai']" 37 | 35,"['d3_17_hillary', 18, 1, 1, 'yes', 'meta']" 38 | 36,"['d3_17_hillary', 18, 1, 1, 'yes', 'mistral']" 39 | 37,"['d3_17_hillary', 18, 1, 1, 'yes', 'gpt4']" 40 | 38,"['d3_17_hillary', 91, 0, 0, 'no', 'openai']" 41 | 39,"['d3_17_hillary', 91, 0, 0, 'no', 'meta']" 42 | 40,"['d3_17_hillary', 91, 0, 0, 'no', 'mistral']" 43 | 41,"['d3_17_hillary', 91, 0, 0, 'no', 'gpt4']" 44 | 42,"['d3_17_hillary', 6, 1, 1, 'yes', 'openai']" 45 | 43,"['d3_17_hillary', 6, 1, 1, 'yes', 'meta']" 46 | 44,"['d3_17_hillary', 6, 1, 1, 'yes', 'mistral']" 47 | 45,"['d3_17_hillary', 6, 1, 0, 'unc', 'gpt4']" 48 | 46,"['d3_17_hillary', 47, 0, 0, 'no', 'openai']" 49 | 47,"['d3_17_hillary', 47, 0, 0, 'no', 'mistral']" 50 | 48,"['d3_17_hillary', 47, 0, 1, 'yes', 'meta']" 51 | 49,"['d3_17_hillary', 47, 0, 0, 'no', 'gpt4']" 52 | 50,"['d3_17_hillary', 110, 0, 0, 'no', 'mistral']" 53 | 51,"['d3_17_hillary', 110, 0, 0, 'no', 'meta']" 54 | 52,"['d3_17_hillary', 110, 0, 0, 'no', 'openai']" 55 | 53,"['d3_17_hillary', 110, 0, 0, 'no', 'gpt4']" 56 | 54,"['d3_17_hillary', 5, 1, 1, 'yes', 'openai']" 57 | 55,"['d3_17_hillary', 5, 1, 1, 'yes', 'meta']" 58 | 56,"['d3_17_hillary', 5, 1, 1, 'yes', 'mistral']" 59 | 57,"['d3_17_hillary', 5, 1, 1, 'yes', 'gpt4']" 60 | 58,"['d3_17_hillary', 43, 0, 0, 'no', 'mistral']" 61 | 59,"['d3_17_hillary', 43, 0, 0, 'no', 'meta']" 62 | 60,"['d3_17_hillary', 43, 0, 0, 'no', 'openai']" 63 | 61,"['d3_17_hillary', 43, 0, 0, 'no', 'gpt4']" 64 | 62,"['d3_17_hillary', 141, 0, 0, 'no', 'mistral']" 65 | 63,"['d3_17_hillary', 141, 0, 0, 'no', 'meta']" 66 | 64,"['d3_17_hillary', 141, 0, 0, 'no', 'openai']" 67 | 65,"['d3_17_hillary', 141, 0, 0, 'no', 'gpt4']" 68 | 66,"['d3_17_hillary', 118, 0, 0, 'no', 'mistral']" 69 | 67,"['d3_17_hillary', 118, 0, 0, 'no', 'meta']" 70 | 68,"['d3_17_hillary', 118, 0, 0, 'no', 'openai']" 71 | 69,"['d3_17_hillary', 118, 0, 0, 'no', 'gpt4']" 72 | 70,"['d3_17_hillary', 48, 0, 0, 'no', 'openai']" 73 | 71,"['d3_17_hillary', 48, 0, 0, 'no', 'mistral']" 74 | 72,"['d3_17_hillary', 48, 0, 0, 'no', 'meta']" 75 | 73,"['d3_17_hillary', 48, 0, 0, 'no', 'gpt4']" 76 | 74,"['d3_17_hillary', 128, 0, 0, 'no', 'mistral']" 77 | 75,"['d3_17_hillary', 128, 0, 0, 'no', 'meta']" 78 | 76,"['d3_17_hillary', 128, 0, 0, 'no', 'openai']" 79 | 77,"['d3_17_hillary', 128, 0, 0, 'no', 'gpt4']" 80 | 78,"['d3_17_hillary', 176, 0, 0, 'no', 'openai']" 81 | 79,"['d3_17_hillary', 176, 0, 0, 'no', 'meta']" 82 | 80,"['d3_17_hillary', 176, 0, 0, 'no', 'mistral']" 83 | 81,"['d3_17_hillary', 176, 0, 0, 'no', 'gpt4']" 84 | 82,"['d3_17_hillary', 1, 1, 1, 'yes', 'meta']" 85 | 83,"['d3_17_hillary', 1, 1, 1, 'yes', 'openai']" 86 | 84,"['d3_17_hillary', 1, 1, 1, 'yes', 'mistral']" 87 | 85,"['d3_17_hillary', 1, 1, 1, 'yes', 'gpt4']" 88 | -------------------------------------------------------------------------------- /scripts/d3/out/d3_18_offensive.csv: -------------------------------------------------------------------------------- 1 | ,0 2 | 0,task 3 | 1,idx 4 | 2,true_label 5 | 3,pred_label 6 | 4,answer 7 | 5,model 8 | 6,"['d3_18_offensive', 416, 0, 0, 'no', 'openai']" 9 | 7,"['d3_18_offensive', 416, 0, 1, 'yes', 'mistral']" 10 | 8,"['d3_18_offensive', 416, 0, 1, 'yes', 'meta']" 11 | 9,"['d3_18_offensive', 416, 0, 1, 'yes', 'gpt4']" 12 | 10,"['d3_18_offensive', 16, 1, 1, 'yes', 'meta']" 13 | 11,"['d3_18_offensive', 16, 1, 1, 'yes', 'openai']" 14 | 12,"['d3_18_offensive', 16, 1, 1, 'yes', 'mistral']" 15 | 13,"['d3_18_offensive', 16, 1, 1, 'yes', 'gpt4']" 16 | 14,"['d3_18_offensive', 179, 1, 1, 'yes', 'openai']" 17 | 15,"['d3_18_offensive', 179, 1, 1, 'yes', 'mistral']" 18 | 16,"['d3_18_offensive', 179, 1, 1, 'yes', 'meta']" 19 | 17,"['d3_18_offensive', 179, 1, 1, 'yes', 'gpt4']" 20 | 18,"['d3_18_offensive', 145, 1, 1, 'yes', 'openai']" 21 | 19,"['d3_18_offensive', 145, 1, 1, 'yes', 'meta']" 22 | 20,"['d3_18_offensive', 145, 1, 1, 'yes', 'mistral']" 23 | 21,"['d3_18_offensive', 145, 1, 1, 'yes', 'gpt4']" 24 | 22,"['d3_18_offensive', 480, 0, 1, 'yes', 'mistral']" 25 | 23,"['d3_18_offensive', 480, 0, 1, 'yes', 'meta']" 26 | 24,"['d3_18_offensive', 480, 0, 0, 'no', 'openai']" 27 | 25,"['d3_18_offensive', 480, 0, 1, 'yes', 'gpt4']" 28 | 26,"['d3_18_offensive', 441, 0, 1, 'yes', 'mistral']" 29 | 27,"['d3_18_offensive', 441, 0, 1, 'yes', 'meta']" 30 | 28,"['d3_18_offensive', 441, 0, 0, 'no', 'openai']" 31 | 29,"['d3_18_offensive', 441, 0, 1, 'yes', 'gpt4']" 32 | 30,"['d3_18_offensive', 581, 0, 1, 'yes', 'mistral']" 33 | 31,"['d3_18_offensive', 581, 0, 1, 'yes', 'meta']" 34 | 32,"['d3_18_offensive', 581, 0, 1, 'yes', 'openai']" 35 | 33,"['d3_18_offensive', 581, 0, 1, 'yes', 'gpt4']" 36 | 34,"['d3_18_offensive', 56, 1, 0, 'no', 'openai']" 37 | 35,"['d3_18_offensive', 56, 1, 1, 'yes', 'meta']" 38 | 36,"['d3_18_offensive', 56, 1, 1, 'yes', 'mistral']" 39 | 37,"['d3_18_offensive', 56, 1, 0, 'no', 'gpt4']" 40 | 38,"['d3_18_offensive', 275, 0, 1, 'yes', 'mistral']" 41 | 39,"['d3_18_offensive', 275, 0, 1, 'yes', 'meta']" 42 | 40,"['d3_18_offensive', 275, 0, 0, 'no', 'openai']" 43 | 41,"['d3_18_offensive', 275, 0, 1, 'yes', 'gpt4']" 44 | 42,"['d3_18_offensive', 19, 1, 1, 'yes', 'openai']" 45 | 43,"['d3_18_offensive', 19, 1, 1, 'yes', 'meta']" 46 | 44,"['d3_18_offensive', 19, 1, 1, 'yes', 'mistral']" 47 | 45,"['d3_18_offensive', 19, 1, 1, 'yes', 'gpt4']" 48 | 46,"['d3_18_offensive', 142, 1, 0, 'no', 'openai']" 49 | 47,"['d3_18_offensive', 142, 1, 1, 'yes', 'meta']" 50 | 48,"['d3_18_offensive', 142, 1, 1, 'yes', 'mistral']" 51 | 49,"['d3_18_offensive', 142, 1, 1, 'yes', 'gpt4']" 52 | 50,"['d3_18_offensive', 329, 0, 0, 'no', 'openai']" 53 | 51,"['d3_18_offensive', 329, 0, 1, 'yes', 'meta']" 54 | 52,"['d3_18_offensive', 329, 0, 1, 'yes', 'mistral']" 55 | 53,"['d3_18_offensive', 329, 0, 1, 'yes', 'gpt4']" 56 | 54,"['d3_18_offensive', 17, 1, 1, 'yes', 'mistral']" 57 | 55,"['d3_18_offensive', 17, 1, 1, 'yes', 'meta']" 58 | 56,"['d3_18_offensive', 17, 1, 0, 'no', 'openai']" 59 | 57,"['d3_18_offensive', 17, 1, 1, 'yes', 'gpt4']" 60 | 58,"['d3_18_offensive', 129, 1, 0, 'no', 'openai']" 61 | 59,"['d3_18_offensive', 129, 1, 1, 'yes', 'meta']" 62 | 60,"['d3_18_offensive', 129, 1, 1, 'yes', 'mistral']" 63 | 61,"['d3_18_offensive', 129, 1, 1, 'yes', 'gpt4']" 64 | 62,"['d3_18_offensive', 423, 0, 1, 'yes', 'mistral']" 65 | 63,"['d3_18_offensive', 423, 0, 1, 'yes', 'meta']" 66 | 64,"['d3_18_offensive', 423, 0, 0, 'no', 'openai']" 67 | 65,"['d3_18_offensive', 423, 0, 0, 'no', 'gpt4']" 68 | 66,"['d3_18_offensive', 355, 0, 0, 'no', 'openai']" 69 | 67,"['d3_18_offensive', 355, 0, 0, 'no', 'meta']" 70 | 68,"['d3_18_offensive', 355, 0, 0, 'no', 'mistral']" 71 | 69,"['d3_18_offensive', 355, 0, 0, 'no', 'gpt4']" 72 | 70,"['d3_18_offensive', 143, 1, 0, 'no', 'mistral']" 73 | 71,"['d3_18_offensive', 143, 1, 0, 'no', 'meta']" 74 | 72,"['d3_18_offensive', 143, 1, 0, 'no', 'openai']" 75 | 73,"['d3_18_offensive', 143, 1, 0, 'no', 'gpt4']" 76 | 74,"['d3_18_offensive', 384, 0, 0, 'no', 'openai']" 77 | 75,"['d3_18_offensive', 384, 0, 0, 'no', 'mistral']" 78 | 76,"['d3_18_offensive', 384, 0, 0, 'no', 'meta']" 79 | 77,"['d3_18_offensive', 384, 0, 0, 'no', 'gpt4']" 80 | 78,"['d3_18_offensive', 527, 0, 0, 'no', 'openai']" 81 | 79,"['d3_18_offensive', 527, 0, 1, 'yes', 'meta']" 82 | 80,"['d3_18_offensive', 527, 0, 1, 'yes', 'mistral']" 83 | 81,"['d3_18_offensive', 527, 0, 0, 'no', 'gpt4']" 84 | 82,"['d3_18_offensive', 4, 1, 1, 'yes', 'meta']" 85 | 83,"['d3_18_offensive', 4, 1, 1, 'yes', 'openai']" 86 | 84,"['d3_18_offensive', 4, 1, 1, 'yes', 'mistral']" 87 | 85,"['d3_18_offensive', 4, 1, 1, 'yes', 'gpt4']" 88 | -------------------------------------------------------------------------------- /scripts/d3/out/d3_1_objective.csv: -------------------------------------------------------------------------------- 1 | ,0 2 | 0,task 3 | 1,idx 4 | 2,true_label 5 | 3,pred_label 6 | 4,answer 7 | 5,model 8 | 6,"['d3_1_objective', 472, 0, 1, 'yes', 'openai']" 9 | 7,"['d3_1_objective', 472, 0, 1, 'yes', 'meta']" 10 | 8,"['d3_1_objective', 472, 0, 0, 'no', 'mistral']" 11 | 9,"['d3_1_objective', 472, 0, 0, 'no', 'gpt4']" 12 | 10,"['d3_1_objective', 18, 1, 1, 'yes', 'meta']" 13 | 11,"['d3_1_objective', 18, 1, 1, 'yes', 'openai']" 14 | 12,"['d3_1_objective', 18, 1, 1, 'yes', 'mistral']" 15 | 13,"['d3_1_objective', 18, 1, 1, 'yes', 'gpt4']" 16 | 14,"['d3_1_objective', 203, 1, 1, 'yes', 'openai']" 17 | 15,"['d3_1_objective', 203, 1, 0, 'no', 'mistral']" 18 | 16,"['d3_1_objective', 203, 1, 0, 'no', 'meta']" 19 | 17,"['d3_1_objective', 203, 1, 1, 'yes', 'gpt4']" 20 | 18,"['d3_1_objective', 164, 1, 1, 'yes', 'meta']" 21 | 19,"['d3_1_objective', 164, 1, 1, 'yes', 'openai']" 22 | 20,"['d3_1_objective', 164, 1, 1, 'yes', 'mistral']" 23 | 21,"['d3_1_objective', 164, 1, 1, 'yes', 'gpt4']" 24 | 22,"['d3_1_objective', 544, 0, 1, 'yes', 'mistral']" 25 | 23,"['d3_1_objective', 544, 0, 1, 'yes', 'meta']" 26 | 24,"['d3_1_objective', 544, 0, 1, 'yes', 'openai']" 27 | 25,"['d3_1_objective', 544, 0, 0, 'no', 'gpt4']" 28 | 26,"['d3_1_objective', 500, 0, 0, 'no', 'openai']" 29 | 27,"['d3_1_objective', 500, 0, 0, 'no', 'meta']" 30 | 28,"['d3_1_objective', 500, 0, 0, 'no', 'mistral']" 31 | 29,"['d3_1_objective', 500, 0, 0, 'no', 'gpt4']" 32 | 30,"['d3_1_objective', 659, 0, 1, 'yes', 'openai']" 33 | 31,"['d3_1_objective', 659, 0, 0, 'no', 'mistral']" 34 | 32,"['d3_1_objective', 659, 0, 0, 'no', 'meta']" 35 | 33,"['d3_1_objective', 659, 0, 0, 'no', 'gpt4']" 36 | 34,"['d3_1_objective', 64, 1, 1, 'yes', 'openai']" 37 | 35,"['d3_1_objective', 64, 1, 1, 'yes', 'mistral']" 38 | 36,"['d3_1_objective', 64, 1, 1, 'yes', 'meta']" 39 | 37,"['d3_1_objective', 64, 1, 1, 'yes', 'gpt4']" 40 | 38,"['d3_1_objective', 311, 1, 1, 'yes', 'mistral']" 41 | 39,"['d3_1_objective', 311, 1, 1, 'yes', 'meta']" 42 | 40,"['d3_1_objective', 311, 1, 1, 'yes', 'openai']" 43 | 41,"['d3_1_objective', 311, 1, 1, 'yes', 'gpt4']" 44 | 42,"['d3_1_objective', 22, 1, 1, 'yes', 'openai']" 45 | 43,"['d3_1_objective', 22, 1, 1, 'yes', 'mistral']" 46 | 44,"['d3_1_objective', 22, 1, 1, 'yes', 'meta']" 47 | 45,"['d3_1_objective', 22, 1, 1, 'yes', 'gpt4']" 48 | 46,"['d3_1_objective', 161, 1, 0, 'no', 'openai']" 49 | 47,"['d3_1_objective', 161, 1, 1, 'yes', 'meta']" 50 | 48,"['d3_1_objective', 161, 1, 1, 'yes', 'mistral']" 51 | 49,"['d3_1_objective', 161, 1, 1, 'yes', 'gpt4']" 52 | 50,"['d3_1_objective', 373, 0, 0, 'no', 'mistral']" 53 | 51,"['d3_1_objective', 373, 0, 0, 'no', 'meta']" 54 | 52,"['d3_1_objective', 373, 0, 0, 'no', 'openai']" 55 | 53,"['d3_1_objective', 373, 0, 0, 'no', 'gpt4']" 56 | 54,"['d3_1_objective', 19, 1, 1, 'yes', 'mistral']" 57 | 55,"['d3_1_objective', 19, 1, 1, 'yes', 'meta']" 58 | 56,"['d3_1_objective', 19, 1, 1, 'yes', 'openai']" 59 | 57,"['d3_1_objective', 19, 1, 1, 'yes', 'gpt4']" 60 | 58,"['d3_1_objective', 146, 1, 1, 'yes', 'openai']" 61 | 59,"['d3_1_objective', 146, 1, 1, 'yes', 'meta']" 62 | 60,"['d3_1_objective', 146, 1, 1, 'yes', 'mistral']" 63 | 61,"['d3_1_objective', 146, 1, 1, 'yes', 'gpt4']" 64 | 62,"['d3_1_objective', 480, 0, 0, 'no', 'mistral']" 65 | 63,"['d3_1_objective', 480, 0, 0, 'no', 'meta']" 66 | 64,"['d3_1_objective', 480, 0, 1, 'yes', 'openai']" 67 | 65,"['d3_1_objective', 480, 0, 0, 'no', 'gpt4']" 68 | 66,"['d3_1_objective', 402, 0, 0, 'no', 'meta']" 69 | 67,"['d3_1_objective', 402, 0, 1, 'yes', 'openai']" 70 | 68,"['d3_1_objective', 402, 0, 0, 'no', 'mistral']" 71 | 69,"['d3_1_objective', 402, 0, 0, 'no', 'gpt4']" 72 | 70,"['d3_1_objective', 162, 1, 0, 'no', 'meta']" 73 | 71,"['d3_1_objective', 162, 1, 1, 'yes', 'openai']" 74 | 72,"['d3_1_objective', 162, 1, 0, 'no', 'mistral']" 75 | 73,"['d3_1_objective', 162, 1, 1, 'yes', 'gpt4']" 76 | 74,"['d3_1_objective', 435, 0, 0, 'no', 'mistral']" 77 | 75,"['d3_1_objective', 435, 0, 0, 'no', 'meta']" 78 | 76,"['d3_1_objective', 435, 0, 0, 'no', 'openai']" 79 | 77,"['d3_1_objective', 435, 0, 0, 'no', 'gpt4']" 80 | 78,"['d3_1_objective', 598, 0, 0, 'no', 'mistral']" 81 | 79,"['d3_1_objective', 598, 0, 0, 'no', 'meta']" 82 | 80,"['d3_1_objective', 598, 0, 1, 'yes', 'openai']" 83 | 81,"['d3_1_objective', 598, 0, 0, 'no', 'gpt4']" 84 | 82,"['d3_1_objective', 4, 1, 1, 'yes', 'openai']" 85 | 83,"['d3_1_objective', 4, 1, 0, 'no', 'meta']" 86 | 84,"['d3_1_objective', 4, 1, 1, 'yes', 'mistral']" 87 | 85,"['d3_1_objective', 4, 1, 0, 'no', 'gpt4']" 88 | -------------------------------------------------------------------------------- /scripts/d3/out/d3_20_pro-life.csv: -------------------------------------------------------------------------------- 1 | ,0 2 | 0,task 3 | 1,idx 4 | 2,true_label 5 | 3,pred_label 6 | 4,answer 7 | 5,model 8 | 6,"['d3_20_pro-life', 136, 1, 1, 'yes', 'meta']" 9 | 7,"['d3_20_pro-life', 136, 1, 1, 'yes', 'openai']" 10 | 8,"['d3_20_pro-life', 136, 1, 1, 'yes', 'mistral']" 11 | 9,"['d3_20_pro-life', 136, 1, 1, 'yes', 'gpt4']" 12 | 10,"['d3_20_pro-life', 5, 1, 1, 'yes', 'meta']" 13 | 11,"['d3_20_pro-life', 5, 1, 1, 'yes', 'openai']" 14 | 12,"['d3_20_pro-life', 5, 1, 1, 'yes', 'mistral']" 15 | 13,"['d3_20_pro-life', 5, 1, 1, 'yes', 'gpt4']" 16 | 14,"['d3_20_pro-life', 58, 1, 0, 'no', 'mistral']" 17 | 15,"['d3_20_pro-life', 58, 1, 0, 'no', 'meta']" 18 | 16,"['d3_20_pro-life', 58, 1, 0, 'no', 'openai']" 19 | 17,"['d3_20_pro-life', 58, 1, 0, 'no', 'gpt4']" 20 | 18,"['d3_20_pro-life', 47, 1, 0, 'no', 'openai']" 21 | 19,"['d3_20_pro-life', 47, 1, 1, 'yes', 'meta']" 22 | 20,"['d3_20_pro-life', 47, 1, 0, 'no', 'mistral']" 23 | 21,"['d3_20_pro-life', 47, 1, 0, 'no', 'gpt4']" 24 | 22,"['d3_20_pro-life', 156, 0, 0, 'no', 'openai']" 25 | 23,"['d3_20_pro-life', 156, 0, 0, 'no', 'mistral']" 26 | 24,"['d3_20_pro-life', 156, 0, 0, 'no', 'meta']" 27 | 25,"['d3_20_pro-life', 156, 0, 0, 'no', 'gpt4']" 28 | 26,"['d3_20_pro-life', 144, 1, 1, 'yes', 'openai']" 29 | 27,"['d3_20_pro-life', 144, 1, 1, 'yes', 'mistral']" 30 | 28,"['d3_20_pro-life', 144, 1, 1, 'yes', 'meta']" 31 | 29,"['d3_20_pro-life', 144, 1, 1, 'yes', 'gpt4']" 32 | 30,"['d3_20_pro-life', 190, 0, 0, 'no', 'openai']" 33 | 31,"['d3_20_pro-life', 190, 0, 0, 'no', 'mistral']" 34 | 32,"['d3_20_pro-life', 190, 0, 0, 'no', 'meta']" 35 | 33,"['d3_20_pro-life', 190, 0, 0, 'no', 'gpt4']" 36 | 34,"['d3_20_pro-life', 18, 1, 0, 'no', 'openai']" 37 | 35,"['d3_20_pro-life', 18, 1, 1, 'yes', 'meta']" 38 | 36,"['d3_20_pro-life', 18, 1, 0, 'no', 'mistral']" 39 | 37,"['d3_20_pro-life', 18, 1, 0, 'no', 'gpt4']" 40 | 38,"['d3_20_pro-life', 89, 1, 1, 'yes', 'meta']" 41 | 39,"['d3_20_pro-life', 89, 1, 1, 'yes', 'openai']" 42 | 40,"['d3_20_pro-life', 89, 1, 1, 'yes', 'mistral']" 43 | 41,"['d3_20_pro-life', 89, 1, 1, 'yes', 'gpt4']" 44 | 42,"['d3_20_pro-life', 6, 1, 1, 'yes', 'openai']" 45 | 43,"['d3_20_pro-life', 6, 1, 1, 'yes', 'meta']" 46 | 44,"['d3_20_pro-life', 6, 1, 1, 'yes', 'mistral']" 47 | 45,"['d3_20_pro-life', 6, 1, 1, 'yes', 'gpt4']" 48 | 46,"['d3_20_pro-life', 46, 1, 1, 'yes', 'meta']" 49 | 47,"['d3_20_pro-life', 46, 1, 1, 'yes', 'openai']" 50 | 48,"['d3_20_pro-life', 46, 1, 1, 'yes', 'mistral']" 51 | 49,"['d3_20_pro-life', 46, 1, 1, 'yes', 'gpt4']" 52 | 50,"['d3_20_pro-life', 107, 1, 0, 'no', 'openai']" 53 | 51,"['d3_20_pro-life', 107, 1, 1, 'yes', 'meta']" 54 | 52,"['d3_20_pro-life', 107, 1, 1, 'yes', 'mistral']" 55 | 53,"['d3_20_pro-life', 107, 1, 1, 'yes', 'gpt4']" 56 | 54,"['d3_20_pro-life', 5, 1, 1, 'yes', 'mistral']" 57 | 55,"['d3_20_pro-life', 5, 1, 1, 'yes', 'meta']" 58 | 56,"['d3_20_pro-life', 5, 1, 1, 'yes', 'openai']" 59 | 57,"['d3_20_pro-life', 5, 1, 1, 'yes', 'gpt4']" 60 | 58,"['d3_20_pro-life', 42, 1, 1, 'yes', 'openai']" 61 | 59,"['d3_20_pro-life', 42, 1, 0, 'no', 'mistral']" 62 | 60,"['d3_20_pro-life', 42, 1, 1, 'yes', 'meta']" 63 | 61,"['d3_20_pro-life', 42, 1, 0, 'no', 'gpt4']" 64 | 62,"['d3_20_pro-life', 138, 1, 0, 'no', 'openai']" 65 | 63,"['d3_20_pro-life', 138, 1, 0, 'no', 'mistral']" 66 | 64,"['d3_20_pro-life', 138, 1, 1, 'yes', 'meta']" 67 | 65,"['d3_20_pro-life', 138, 1, 0, 'no', 'gpt4']" 68 | 66,"['d3_20_pro-life', 116, 1, 1, 'yes', 'openai']" 69 | 67,"['d3_20_pro-life', 116, 1, 1, 'yes', 'meta']" 70 | 68,"['d3_20_pro-life', 116, 1, 1, 'yes', 'mistral']" 71 | 69,"['d3_20_pro-life', 116, 1, 1, 'yes', 'gpt4']" 72 | 70,"['d3_20_pro-life', 46, 1, 1, 'yes', 'openai']" 73 | 71,"['d3_20_pro-life', 46, 1, 1, 'yes', 'meta']" 74 | 72,"['d3_20_pro-life', 46, 1, 1, 'yes', 'mistral']" 75 | 73,"['d3_20_pro-life', 46, 1, 1, 'yes', 'gpt4']" 76 | 74,"['d3_20_pro-life', 125, 1, 1, 'yes', 'meta']" 77 | 75,"['d3_20_pro-life', 125, 1, 0, 'no', 'openai']" 78 | 76,"['d3_20_pro-life', 125, 1, 1, 'yes', 'mistral']" 79 | 77,"['d3_20_pro-life', 125, 1, 0, 'no', 'gpt4']" 80 | 78,"['d3_20_pro-life', 172, 0, 0, 'no', 'mistral']" 81 | 79,"['d3_20_pro-life', 172, 0, 1, 'yes', 'meta']" 82 | 80,"['d3_20_pro-life', 172, 0, 0, 'no', 'openai']" 83 | 81,"['d3_20_pro-life', 172, 0, 0, 'no', 'gpt4']" 84 | 82,"['d3_20_pro-life', 1, 1, 1, 'yes', 'openai']" 85 | 83,"['d3_20_pro-life', 1, 1, 1, 'yes', 'mistral']" 86 | 84,"['d3_20_pro-life', 1, 1, 1, 'yes', 'meta']" 87 | 85,"['d3_20_pro-life', 1, 1, 1, 'yes', 'gpt4']" 88 | -------------------------------------------------------------------------------- /scripts/d3/out/d3_25_math.csv: -------------------------------------------------------------------------------- 1 | ,0 2 | 0,task 3 | 1,idx 4 | 2,true_label 5 | 3,pred_label 6 | 4,answer 7 | 5,model 8 | 6,"['d3_25_math', 5689, 0, 0, 'no', 'mistral']" 9 | 7,"['d3_25_math', 5689, 0, 0, 'no', 'meta']" 10 | 8,"['d3_25_math', 5689, 0, 0, 'no', 'openai']" 11 | 9,"['d3_25_math', 5689, 0, 0, 'no', 'gpt4']" 12 | 10,"['d3_25_math', 222, 1, 1, 'yes', 'openai']" 13 | 11,"['d3_25_math', 222, 1, 1, 'yes', 'meta']" 14 | 12,"['d3_25_math', 222, 1, 1, 'yes', 'mistral']" 15 | 13,"['d3_25_math', 222, 1, 1, 'yes', 'gpt4']" 16 | 14,"['d3_25_math', 2447, 0, 1, 'yes', 'mistral']" 17 | 15,"['d3_25_math', 2447, 0, 1, 'yes', 'meta']" 18 | 16,"['d3_25_math', 2447, 0, 1, 'yes', 'openai']" 19 | 17,"['d3_25_math', 2447, 0, 1, 'yes', 'gpt4']" 20 | 18,"['d3_25_math', 1986, 0, 0, 'no', 'mistral']" 21 | 19,"['d3_25_math', 1986, 0, 0, 'no', 'meta']" 22 | 20,"['d3_25_math', 1986, 0, 0, 'no', 'openai']" 23 | 21,"['d3_25_math', 1986, 0, 0, 'no', 'gpt4']" 24 | 22,"['d3_25_math', 6553, 0, 0, 'no', 'openai']" 25 | 23,"['d3_25_math', 6553, 0, 0, 'no', 'meta']" 26 | 24,"['d3_25_math', 6553, 0, 1, 'yes', 'mistral']" 27 | 25,"['d3_25_math', 6553, 0, 0, 'no', 'gpt4']" 28 | 26,"['d3_25_math', 6021, 0, 0, 'no', 'meta']" 29 | 27,"['d3_25_math', 6021, 0, 0, 'no', 'openai']" 30 | 28,"['d3_25_math', 6021, 0, 0, 'no', 'mistral']" 31 | 29,"['d3_25_math', 6021, 0, 0, 'no', 'gpt4']" 32 | 30,"['d3_25_math', 7938, 0, 0, 'no', 'mistral']" 33 | 31,"['d3_25_math', 7938, 0, 0, 'no', 'meta']" 34 | 32,"['d3_25_math', 7938, 0, 1, 'yes', 'openai']" 35 | 33,"['d3_25_math', 7938, 0, 1, 'yes', 'gpt4']" 36 | 34,"['d3_25_math', 773, 1, 1, 'yes', 'mistral']" 37 | 35,"['d3_25_math', 773, 1, 1, 'yes', 'meta']" 38 | 36,"['d3_25_math', 773, 1, 1, 'yes', 'openai']" 39 | 37,"['d3_25_math', 773, 1, 1, 'yes', 'gpt4']" 40 | 38,"['d3_25_math', 3754, 0, 0, 'no', 'meta']" 41 | 39,"['d3_25_math', 3754, 0, 1, 'yes', 'openai']" 42 | 40,"['d3_25_math', 3754, 0, 0, 'no', 'mistral']" 43 | 41,"['d3_25_math', 3754, 0, 0, 'no', 'gpt4']" 44 | 42,"['d3_25_math', 265, 1, 1, 'yes', 'openai']" 45 | 43,"['d3_25_math', 265, 1, 1, 'yes', 'meta']" 46 | 44,"['d3_25_math', 265, 1, 1, 'yes', 'mistral']" 47 | 45,"['d3_25_math', 265, 1, 1, 'yes', 'gpt4']" 48 | 46,"['d3_25_math', 1945, 0, 1, 'yes', 'openai']" 49 | 47,"['d3_25_math', 1945, 0, 1, 'yes', 'mistral']" 50 | 48,"['d3_25_math', 1945, 0, 1, 'yes', 'meta']" 51 | 49,"['d3_25_math', 1945, 0, 1, 'yes', 'gpt4']" 52 | 50,"['d3_25_math', 4496, 0, 0, 'no', 'openai']" 53 | 51,"['d3_25_math', 4496, 0, 0, 'no', 'meta']" 54 | 52,"['d3_25_math', 4496, 0, 0, 'no', 'mistral']" 55 | 53,"['d3_25_math', 4496, 0, 0, 'no', 'gpt4']" 56 | 54,"['d3_25_math', 236, 1, 1, 'yes', 'mistral']" 57 | 55,"['d3_25_math', 236, 1, 1, 'yes', 'meta']" 58 | 56,"['d3_25_math', 236, 1, 1, 'yes', 'openai']" 59 | 57,"['d3_25_math', 236, 1, 1, 'yes', 'gpt4']" 60 | 58,"['d3_25_math', 1769, 0, 0, 'no', 'mistral']" 61 | 59,"['d3_25_math', 1769, 0, 0, 'no', 'meta']" 62 | 60,"['d3_25_math', 1769, 0, 0, 'no', 'openai']" 63 | 61,"['d3_25_math', 1769, 0, 0, 'no', 'gpt4']" 64 | 62,"['d3_25_math', 5782, 0, 0, 'no', 'mistral']" 65 | 63,"['d3_25_math', 5782, 0, 0, 'no', 'meta']" 66 | 64,"['d3_25_math', 5782, 0, 0, 'no', 'openai']" 67 | 65,"['d3_25_math', 5782, 0, 0, 'no', 'gpt4']" 68 | 66,"['d3_25_math', 4848, 0, 0, 'no', 'meta']" 69 | 67,"['d3_25_math', 4848, 0, 1, 'yes', 'openai']" 70 | 68,"['d3_25_math', 4848, 0, 0, 'no', 'mistral']" 71 | 69,"['d3_25_math', 4848, 0, 1, 'yes', 'gpt4']" 72 | 70,"['d3_25_math', 1961, 0, 1, 'yes', 'mistral']" 73 | 71,"['d3_25_math', 1961, 0, 0, 'no', 'meta']" 74 | 72,"['d3_25_math', 1961, 0, 1, 'yes', 'openai']" 75 | 73,"['d3_25_math', 1961, 0, 1, 'yes', 'gpt4']" 76 | 74,"['d3_25_math', 5243, 0, 0, 'no', 'openai']" 77 | 75,"['d3_25_math', 5243, 0, 0, 'no', 'meta']" 78 | 76,"['d3_25_math', 5243, 0, 0, 'no', 'mistral']" 79 | 77,"['d3_25_math', 5243, 0, 0, 'no', 'gpt4']" 80 | 78,"['d3_25_math', 7202, 0, 1, 'yes', 'openai']" 81 | 79,"['d3_25_math', 7202, 0, 0, 'no', 'meta']" 82 | 80,"['d3_25_math', 7202, 0, 0, 'no', 'mistral']" 83 | 81,"['d3_25_math', 7202, 0, 1, 'yes', 'gpt4']" 84 | 82,"['d3_25_math', 57, 1, 1, 'yes', 'mistral']" 85 | 83,"['d3_25_math', 57, 1, 1, 'yes', 'meta']" 86 | 84,"['d3_25_math', 57, 1, 1, 'yes', 'openai']" 87 | 85,"['d3_25_math', 57, 1, 1, 'yes', 'gpt4']" 88 | -------------------------------------------------------------------------------- /scripts/d3/out/d3_27_grammar.csv: -------------------------------------------------------------------------------- 1 | ,0 2 | 0,task 3 | 1,idx 4 | 2,true_label 5 | 3,pred_label 6 | 4,answer 7 | 5,model 8 | 6,"['d3_27_grammar', 528, 1, 0, 'no', 'mistral']" 9 | 7,"['d3_27_grammar', 528, 1, 0, 'no', 'meta']" 10 | 8,"['d3_27_grammar', 528, 1, 0, 'no', 'openai']" 11 | 9,"['d3_27_grammar', 528, 1, 0, 'no', 'gpt4']" 12 | 10,"['d3_27_grammar', 20, 1, 1, 'yes', 'openai']" 13 | 11,"['d3_27_grammar', 20, 1, 1, 'yes', 'meta']" 14 | 12,"['d3_27_grammar', 20, 1, 1, 'yes', 'mistral']" 15 | 13,"['d3_27_grammar', 20, 1, 1, 'yes', 'gpt4']" 16 | 14,"['d3_27_grammar', 227, 1, 1, 'yes', 'openai']" 17 | 15,"['d3_27_grammar', 227, 1, 1, 'yes', 'meta']" 18 | 16,"['d3_27_grammar', 227, 1, 1, 'yes', 'mistral']" 19 | 17,"['d3_27_grammar', 227, 1, 1, 'yes', 'gpt4']" 20 | 18,"['d3_27_grammar', 184, 1, 1, 'yes', 'mistral']" 21 | 19,"['d3_27_grammar', 184, 1, 1, 'yes', 'meta']" 22 | 20,"['d3_27_grammar', 184, 1, 1, 'yes', 'openai']" 23 | 21,"['d3_27_grammar', 184, 1, 1, 'yes', 'gpt4']" 24 | 22,"['d3_27_grammar', 608, 0, 0, 'no', 'openai']" 25 | 23,"['d3_27_grammar', 608, 0, 0, 'no', 'mistral']" 26 | 24,"['d3_27_grammar', 608, 0, 0, 'no', 'meta']" 27 | 25,"['d3_27_grammar', 608, 0, 1, 'yes', 'gpt4']" 28 | 26,"['d3_27_grammar', 558, 1, 1, 'yes', 'openai']" 29 | 27,"['d3_27_grammar', 558, 1, 1, 'yes', 'mistral']" 30 | 28,"['d3_27_grammar', 558, 1, 1, 'yes', 'meta']" 31 | 29,"['d3_27_grammar', 558, 1, 1, 'yes', 'gpt4']" 32 | 30,"['d3_27_grammar', 736, 0, 0, 'no', 'meta']" 33 | 31,"['d3_27_grammar', 736, 0, 1, 'yes', 'openai']" 34 | 32,"['d3_27_grammar', 736, 0, 1, 'yes', 'mistral']" 35 | 33,"['d3_27_grammar', 736, 0, 1, 'yes', 'gpt4']" 36 | 34,"['d3_27_grammar', 71, 1, 1, 'yes', 'openai']" 37 | 35,"['d3_27_grammar', 71, 1, 0, 'no', 'meta']" 38 | 36,"['d3_27_grammar', 71, 1, 1, 'yes', 'mistral']" 39 | 37,"['d3_27_grammar', 71, 1, 1, 'yes', 'gpt4']" 40 | 38,"['d3_27_grammar', 348, 1, 1, 'yes', 'mistral']" 41 | 39,"['d3_27_grammar', 348, 1, 1, 'yes', 'meta']" 42 | 40,"['d3_27_grammar', 348, 1, 1, 'yes', 'openai']" 43 | 41,"['d3_27_grammar', 348, 1, 1, 'yes', 'gpt4']" 44 | 42,"['d3_27_grammar', 24, 1, 1, 'yes', 'openai']" 45 | 43,"['d3_27_grammar', 24, 1, 1, 'yes', 'mistral']" 46 | 44,"['d3_27_grammar', 24, 1, 1, 'yes', 'meta']" 47 | 45,"['d3_27_grammar', 24, 1, 1, 'yes', 'gpt4']" 48 | 46,"['d3_27_grammar', 180, 1, 1, 'yes', 'openai']" 49 | 47,"['d3_27_grammar', 180, 1, 0, 'no', 'meta']" 50 | 48,"['d3_27_grammar', 180, 1, 1, 'yes', 'mistral']" 51 | 49,"['d3_27_grammar', 180, 1, 1, 'yes', 'gpt4']" 52 | 50,"['d3_27_grammar', 417, 1, 1, 'yes', 'openai']" 53 | 51,"['d3_27_grammar', 417, 1, 1, 'yes', 'meta']" 54 | 52,"['d3_27_grammar', 417, 1, 1, 'yes', 'mistral']" 55 | 53,"['d3_27_grammar', 417, 1, 1, 'yes', 'gpt4']" 56 | 54,"['d3_27_grammar', 21, 1, 0, 'no', 'meta']" 57 | 55,"['d3_27_grammar', 21, 1, 0, 'no', 'openai']" 58 | 56,"['d3_27_grammar', 21, 1, 0, 'no', 'mistral']" 59 | 57,"['d3_27_grammar', 21, 1, 1, 'yes', 'gpt4']" 60 | 58,"['d3_27_grammar', 164, 1, 1, 'yes', 'openai']" 61 | 59,"['d3_27_grammar', 164, 1, 1, 'yes', 'meta']" 62 | 60,"['d3_27_grammar', 164, 1, 1, 'yes', 'mistral']" 63 | 61,"['d3_27_grammar', 164, 1, 1, 'yes', 'gpt4']" 64 | 62,"['d3_27_grammar', 536, 1, 1, 'yes', 'openai']" 65 | 63,"['d3_27_grammar', 536, 1, 1, 'yes', 'mistral']" 66 | 64,"['d3_27_grammar', 536, 1, 1, 'yes', 'meta']" 67 | 65,"['d3_27_grammar', 536, 1, 1, 'yes', 'gpt4']" 68 | 66,"['d3_27_grammar', 450, 1, 1, 'yes', 'meta']" 69 | 67,"['d3_27_grammar', 450, 1, 1, 'yes', 'openai']" 70 | 68,"['d3_27_grammar', 450, 1, 1, 'yes', 'mistral']" 71 | 69,"['d3_27_grammar', 450, 1, 1, 'yes', 'gpt4']" 72 | 70,"['d3_27_grammar', 182, 1, 1, 'yes', 'meta']" 73 | 71,"['d3_27_grammar', 182, 1, 1, 'yes', 'openai']" 74 | 72,"['d3_27_grammar', 182, 1, 1, 'yes', 'mistral']" 75 | 73,"['d3_27_grammar', 182, 1, 1, 'yes', 'gpt4']" 76 | 74,"['d3_27_grammar', 486, 1, 0, 'no', 'mistral']" 77 | 75,"['d3_27_grammar', 486, 1, 0, 'no', 'meta']" 78 | 76,"['d3_27_grammar', 486, 1, 0, 'no', 'openai']" 79 | 77,"['d3_27_grammar', 486, 1, 0, 'no', 'gpt4']" 80 | 78,"['d3_27_grammar', 668, 0, 0, 'no', 'openai']" 81 | 79,"['d3_27_grammar', 668, 0, 0, 'no', 'mistral']" 82 | 80,"['d3_27_grammar', 668, 0, 0, 'no', 'meta']" 83 | 81,"['d3_27_grammar', 668, 0, 1, 'yes', 'gpt4']" 84 | 82,"['d3_27_grammar', 5, 1, 1, 'yes', 'meta']" 85 | 83,"['d3_27_grammar', 5, 1, 1, 'yes', 'openai']" 86 | 84,"['d3_27_grammar', 5, 1, 1, 'yes', 'mistral']" 87 | 85,"['d3_27_grammar', 5, 1, 1, 'yes', 'gpt4']" 88 | -------------------------------------------------------------------------------- /scripts/d3/out/d3_28_sexis.csv: -------------------------------------------------------------------------------- 1 | ,0 2 | 0,task 3 | 1,idx 4 | 2,true_label 5 | 3,pred_label 6 | 4,answer 7 | 5,model 8 | 6,"['d3_28_sexis', 133, 0, 0, 'no', 'openai']" 9 | 7,"['d3_28_sexis', 133, 0, 0, 'no', 'meta']" 10 | 8,"['d3_28_sexis', 133, 0, 0, 'no', 'mistral']" 11 | 9,"['d3_28_sexis', 133, 0, 0, 'no', 'gpt4']" 12 | 10,"['d3_28_sexis', 5, 1, 1, 'yes', 'openai']" 13 | 11,"['d3_28_sexis', 5, 1, 1, 'yes', 'meta']" 14 | 12,"['d3_28_sexis', 5, 1, 0, 'no', 'mistral']" 15 | 13,"['d3_28_sexis', 5, 1, 0, 'no', 'gpt4']" 16 | 14,"['d3_28_sexis', 57, 1, 0, 'no', 'openai']" 17 | 15,"['d3_28_sexis', 57, 1, 1, 'yes', 'meta']" 18 | 16,"['d3_28_sexis', 57, 1, 0, 'no', 'mistral']" 19 | 17,"['d3_28_sexis', 57, 1, 1, 'yes', 'gpt4']" 20 | 18,"['d3_28_sexis', 46, 1, 0, 'no', 'meta']" 21 | 19,"['d3_28_sexis', 46, 1, 0, 'no', 'openai']" 22 | 20,"['d3_28_sexis', 46, 1, 0, 'no', 'mistral']" 23 | 21,"['d3_28_sexis', 46, 1, 0, 'no', 'gpt4']" 24 | 22,"['d3_28_sexis', 153, 0, 0, 'no', 'openai']" 25 | 23,"['d3_28_sexis', 153, 0, 0, 'no', 'meta']" 26 | 24,"['d3_28_sexis', 153, 0, 0, 'no', 'mistral']" 27 | 25,"['d3_28_sexis', 153, 0, 0, 'no', 'gpt4']" 28 | 26,"['d3_28_sexis', 141, 0, 0, 'no', 'openai']" 29 | 27,"['d3_28_sexis', 141, 0, 0, 'no', 'mistral']" 30 | 28,"['d3_28_sexis', 141, 0, 0, 'no', 'meta']" 31 | 29,"['d3_28_sexis', 141, 0, 0, 'no', 'gpt4']" 32 | 30,"['d3_28_sexis', 186, 0, 1, 'yes', 'openai']" 33 | 31,"['d3_28_sexis', 186, 0, 1, 'yes', 'meta']" 34 | 32,"['d3_28_sexis', 186, 0, 0, 'no', 'mistral']" 35 | 33,"['d3_28_sexis', 186, 0, 1, 'yes', 'gpt4']" 36 | 34,"['d3_28_sexis', 18, 1, 0, 'no', 'openai']" 37 | 35,"['d3_28_sexis', 18, 1, 0, 'no', 'meta']" 38 | 36,"['d3_28_sexis', 18, 1, 0, 'no', 'mistral']" 39 | 37,"['d3_28_sexis', 18, 1, 0, 'no', 'gpt4']" 40 | 38,"['d3_28_sexis', 88, 1, 1, 'yes', 'meta']" 41 | 39,"['d3_28_sexis', 88, 1, 1, 'yes', 'openai']" 42 | 40,"['d3_28_sexis', 88, 1, 1, 'yes', 'mistral']" 43 | 41,"['d3_28_sexis', 88, 1, 1, 'yes', 'gpt4']" 44 | 42,"['d3_28_sexis', 6, 1, 1, 'yes', 'mistral']" 45 | 43,"['d3_28_sexis', 6, 1, 1, 'yes', 'meta']" 46 | 44,"['d3_28_sexis', 6, 1, 1, 'yes', 'openai']" 47 | 45,"['d3_28_sexis', 6, 1, 1, 'yes', 'gpt4']" 48 | 46,"['d3_28_sexis', 45, 1, 1, 'yes', 'mistral']" 49 | 47,"['d3_28_sexis', 45, 1, 1, 'yes', 'meta']" 50 | 48,"['d3_28_sexis', 45, 1, 0, 'no', 'openai']" 51 | 49,"['d3_28_sexis', 45, 1, 0, 'no', 'gpt4']" 52 | 50,"['d3_28_sexis', 105, 1, 1, 'yes', 'meta']" 53 | 51,"['d3_28_sexis', 105, 1, 1, 'yes', 'openai']" 54 | 52,"['d3_28_sexis', 105, 1, 1, 'yes', 'mistral']" 55 | 53,"['d3_28_sexis', 105, 1, 1, 'yes', 'gpt4']" 56 | 54,"['d3_28_sexis', 5, 1, 0, 'no', 'mistral']" 57 | 55,"['d3_28_sexis', 5, 1, 1, 'yes', 'meta']" 58 | 56,"['d3_28_sexis', 5, 1, 1, 'yes', 'openai']" 59 | 57,"['d3_28_sexis', 5, 1, 1, 'yes', 'gpt4']" 60 | 58,"['d3_28_sexis', 41, 1, 0, 'no', 'openai']" 61 | 59,"['d3_28_sexis', 41, 1, 1, 'yes', 'meta']" 62 | 60,"['d3_28_sexis', 41, 1, 0, 'no', 'mistral']" 63 | 61,"['d3_28_sexis', 41, 1, 0, 'no', 'gpt4']" 64 | 62,"['d3_28_sexis', 135, 0, 0, 'no', 'meta']" 65 | 63,"['d3_28_sexis', 135, 0, 0, 'no', 'openai']" 66 | 64,"['d3_28_sexis', 135, 0, 0, 'no', 'mistral']" 67 | 65,"['d3_28_sexis', 135, 0, 1, 'yes', 'gpt4']" 68 | 66,"['d3_28_sexis', 113, 1, 0, 'no', 'openai']" 69 | 67,"['d3_28_sexis', 113, 1, 1, 'yes', 'meta']" 70 | 68,"['d3_28_sexis', 113, 1, 0, 'no', 'mistral']" 71 | 69,"['d3_28_sexis', 113, 1, 1, 'yes', 'gpt4']" 72 | 70,"['d3_28_sexis', 46, 1, 0, 'no', 'openai']" 73 | 71,"['d3_28_sexis', 46, 1, 0, 'no', 'mistral']" 74 | 72,"['d3_28_sexis', 46, 1, 0, 'no', 'meta']" 75 | 73,"['d3_28_sexis', 46, 1, 0, 'no', 'gpt4']" 76 | 74,"['d3_28_sexis', 123, 1, 1, 'yes', 'meta']" 77 | 75,"['d3_28_sexis', 123, 1, 1, 'yes', 'openai']" 78 | 76,"['d3_28_sexis', 123, 1, 0, 'no', 'mistral']" 79 | 77,"['d3_28_sexis', 123, 1, 0, 'no', 'gpt4']" 80 | 78,"['d3_28_sexis', 169, 0, 1, 'yes', 'meta']" 81 | 79,"['d3_28_sexis', 169, 0, 0, 'no', 'openai']" 82 | 80,"['d3_28_sexis', 169, 0, 0, 'no', 'mistral']" 83 | 81,"['d3_28_sexis', 169, 0, 1, 'yes', 'gpt4']" 84 | 82,"['d3_28_sexis', 1, 1, 1, 'yes', 'meta']" 85 | 83,"['d3_28_sexis', 1, 1, 0, 'no', 'openai']" 86 | 84,"['d3_28_sexis', 1, 1, 1, 'yes', 'mistral']" 87 | 85,"['d3_28_sexis', 1, 1, 1, 'yes', 'gpt4']" 88 | -------------------------------------------------------------------------------- /scripts/d3/out/d3_29_sexis.csv: -------------------------------------------------------------------------------- 1 | ,0 2 | 0,task 3 | 1,idx 4 | 2,true_label 5 | 3,pred_label 6 | 4,answer 7 | 5,model 8 | 6,"['d3_29_sexis', 137, 0, 0, 'no', 'openai']" 9 | 7,"['d3_29_sexis', 137, 0, 0, 'no', 'meta']" 10 | 8,"['d3_29_sexis', 137, 0, 0, 'no', 'mistral']" 11 | 9,"['d3_29_sexis', 137, 0, 0, 'no', 'gpt4']" 12 | 10,"['d3_29_sexis', 5, 1, 1, 'yes', 'mistral']" 13 | 11,"['d3_29_sexis', 5, 1, 1, 'yes', 'meta']" 14 | 12,"['d3_29_sexis', 5, 1, 1, 'yes', 'openai']" 15 | 13,"['d3_29_sexis', 5, 1, 1, 'yes', 'gpt4']" 16 | 14,"['d3_29_sexis', 59, 0, 0, 'no', 'mistral']" 17 | 15,"['d3_29_sexis', 59, 0, 1, 'yes', 'meta']" 18 | 16,"['d3_29_sexis', 59, 0, 1, 'yes', 'openai']" 19 | 17,"['d3_29_sexis', 59, 0, 0, 'no', 'gpt4']" 20 | 18,"['d3_29_sexis', 47, 0, 1, 'yes', 'openai']" 21 | 19,"['d3_29_sexis', 47, 0, 1, 'yes', 'meta']" 22 | 20,"['d3_29_sexis', 47, 0, 1, 'yes', 'mistral']" 23 | 21,"['d3_29_sexis', 47, 0, 1, 'yes', 'gpt4']" 24 | 22,"['d3_29_sexis', 158, 0, 0, 'no', 'meta']" 25 | 23,"['d3_29_sexis', 158, 0, 0, 'no', 'openai']" 26 | 24,"['d3_29_sexis', 158, 0, 0, 'no', 'mistral']" 27 | 25,"['d3_29_sexis', 158, 0, 0, 'no', 'gpt4']" 28 | 26,"['d3_29_sexis', 145, 0, 0, 'no', 'openai']" 29 | 27,"['d3_29_sexis', 145, 0, 0, 'no', 'mistral']" 30 | 28,"['d3_29_sexis', 145, 0, 0, 'no', 'meta']" 31 | 29,"['d3_29_sexis', 145, 0, 0, 'no', 'gpt4']" 32 | 30,"['d3_29_sexis', 191, 0, 0, 'no', 'meta']" 33 | 31,"['d3_29_sexis', 191, 0, 0, 'no', 'openai']" 34 | 32,"['d3_29_sexis', 191, 0, 0, 'no', 'mistral']" 35 | 33,"['d3_29_sexis', 191, 0, 0, 'no', 'gpt4']" 36 | 34,"['d3_29_sexis', 18, 1, 1, 'yes', 'mistral']" 37 | 35,"['d3_29_sexis', 18, 1, 1, 'yes', 'meta']" 38 | 36,"['d3_29_sexis', 18, 1, 1, 'yes', 'openai']" 39 | 37,"['d3_29_sexis', 18, 1, 1, 'yes', 'gpt4']" 40 | 38,"['d3_29_sexis', 90, 0, 0, 'no', 'openai']" 41 | 39,"['d3_29_sexis', 90, 0, 0, 'no', 'meta']" 42 | 40,"['d3_29_sexis', 90, 0, 0, 'no', 'mistral']" 43 | 41,"['d3_29_sexis', 90, 0, 0, 'unc', 'gpt4']" 44 | 42,"['d3_29_sexis', 6, 1, 0, 'no', 'mistral']" 45 | 43,"['d3_29_sexis', 6, 1, 0, 'no', 'meta']" 46 | 44,"['d3_29_sexis', 6, 1, 0, 'no', 'openai']" 47 | 45,"['d3_29_sexis', 6, 1, 0, 'no', 'gpt4']" 48 | 46,"['d3_29_sexis', 47, 0, 1, 'yes', 'openai']" 49 | 47,"['d3_29_sexis', 47, 0, 1, 'yes', 'mistral']" 50 | 48,"['d3_29_sexis', 47, 0, 1, 'yes', 'meta']" 51 | 49,"['d3_29_sexis', 47, 0, 1, 'yes', 'gpt4']" 52 | 50,"['d3_29_sexis', 108, 0, 0, 'no', 'mistral']" 53 | 51,"['d3_29_sexis', 108, 0, 0, 'no', 'meta']" 54 | 52,"['d3_29_sexis', 108, 0, 0, 'no', 'openai']" 55 | 53,"['d3_29_sexis', 108, 0, 0, 'no', 'gpt4']" 56 | 54,"['d3_29_sexis', 5, 1, 1, 'yes', 'mistral']" 57 | 55,"['d3_29_sexis', 5, 1, 1, 'yes', 'meta']" 58 | 56,"['d3_29_sexis', 5, 1, 1, 'yes', 'openai']" 59 | 57,"['d3_29_sexis', 5, 1, 1, 'yes', 'gpt4']" 60 | 58,"['d3_29_sexis', 42, 0, 0, 'no', 'meta']" 61 | 59,"['d3_29_sexis', 42, 0, 0, 'no', 'openai']" 62 | 60,"['d3_29_sexis', 42, 0, 0, 'no', 'mistral']" 63 | 61,"['d3_29_sexis', 42, 0, 0, 'no', 'gpt4']" 64 | 62,"['d3_29_sexis', 139, 0, 0, 'no', 'mistral']" 65 | 63,"['d3_29_sexis', 139, 0, 0, 'no', 'meta']" 66 | 64,"['d3_29_sexis', 139, 0, 0, 'no', 'openai']" 67 | 65,"['d3_29_sexis', 139, 0, 0, 'no', 'gpt4']" 68 | 66,"['d3_29_sexis', 117, 0, 0, 'no', 'meta']" 69 | 67,"['d3_29_sexis', 117, 0, 0, 'no', 'openai']" 70 | 68,"['d3_29_sexis', 117, 0, 0, 'no', 'mistral']" 71 | 69,"['d3_29_sexis', 117, 0, 0, 'no', 'gpt4']" 72 | 70,"['d3_29_sexis', 47, 0, 1, 'yes', 'openai']" 73 | 71,"['d3_29_sexis', 47, 0, 1, 'yes', 'meta']" 74 | 72,"['d3_29_sexis', 47, 0, 1, 'yes', 'mistral']" 75 | 73,"['d3_29_sexis', 47, 0, 1, 'yes', 'gpt4']" 76 | 74,"['d3_29_sexis', 126, 0, 0, 'no', 'mistral']" 77 | 75,"['d3_29_sexis', 126, 0, 0, 'no', 'meta']" 78 | 76,"['d3_29_sexis', 126, 0, 0, 'no', 'openai']" 79 | 77,"['d3_29_sexis', 126, 0, 0, 'unc', 'gpt4']" 80 | 78,"['d3_29_sexis', 174, 0, 1, 'yes', 'openai']" 81 | 79,"['d3_29_sexis', 174, 0, 1, 'yes', 'meta']" 82 | 80,"['d3_29_sexis', 174, 0, 0, 'no', 'mistral']" 83 | 81,"['d3_29_sexis', 174, 0, 0, 'no', 'gpt4']" 84 | 82,"['d3_29_sexis', 1, 1, 1, 'yes', 'openai']" 85 | 83,"['d3_29_sexis', 1, 1, 1, 'yes', 'meta']" 86 | 84,"['d3_29_sexis', 1, 1, 1, 'yes', 'mistral']" 87 | 85,"['d3_29_sexis', 1, 1, 1, 'yes', 'gpt4']" 88 | -------------------------------------------------------------------------------- /scripts/d3/out/d3_2_subjective.csv: -------------------------------------------------------------------------------- 1 | ,0 2 | 0,task 3 | 1,idx 4 | 2,true_label 5 | 3,pred_label 6 | 4,answer 7 | 5,model 8 | 6,"['d3_2_subjective', 484, 0, 1, 'yes', 'openai']" 9 | 7,"['d3_2_subjective', 484, 0, 1, 'yes', 'meta']" 10 | 8,"['d3_2_subjective', 484, 0, 0, 'no', 'mistral']" 11 | 9,"['d3_2_subjective', 484, 0, 0, 'no', 'gpt4']" 12 | 10,"['d3_2_subjective', 18, 1, 1, 'yes', 'openai']" 13 | 11,"['d3_2_subjective', 18, 1, 1, 'yes', 'meta']" 14 | 12,"['d3_2_subjective', 18, 1, 1, 'yes', 'mistral']" 15 | 13,"['d3_2_subjective', 18, 1, 1, 'yes', 'gpt4']" 16 | 14,"['d3_2_subjective', 208, 1, 1, 'yes', 'mistral']" 17 | 15,"['d3_2_subjective', 208, 1, 1, 'yes', 'meta']" 18 | 16,"['d3_2_subjective', 208, 1, 1, 'yes', 'openai']" 19 | 17,"['d3_2_subjective', 208, 1, 1, 'yes', 'gpt4']" 20 | 18,"['d3_2_subjective', 168, 1, 1, 'yes', 'openai']" 21 | 19,"['d3_2_subjective', 168, 1, 1, 'yes', 'meta']" 22 | 20,"['d3_2_subjective', 168, 1, 1, 'yes', 'mistral']" 23 | 21,"['d3_2_subjective', 168, 1, 1, 'yes', 'gpt4']" 24 | 22,"['d3_2_subjective', 557, 0, 1, 'yes', 'openai']" 25 | 23,"['d3_2_subjective', 557, 0, 1, 'yes', 'mistral']" 26 | 24,"['d3_2_subjective', 557, 0, 1, 'yes', 'meta']" 27 | 25,"['d3_2_subjective', 557, 0, 0, 'no', 'gpt4']" 28 | 26,"['d3_2_subjective', 512, 0, 0, 'no', 'mistral']" 29 | 27,"['d3_2_subjective', 512, 0, 0, 'no', 'meta']" 30 | 28,"['d3_2_subjective', 512, 0, 0, 'no', 'openai']" 31 | 29,"['d3_2_subjective', 512, 0, 0, 'no', 'gpt4']" 32 | 30,"['d3_2_subjective', 675, 0, 0, 'no', 'meta']" 33 | 31,"['d3_2_subjective', 675, 0, 0, 'no', 'openai']" 34 | 32,"['d3_2_subjective', 675, 0, 0, 'no', 'mistral']" 35 | 33,"['d3_2_subjective', 675, 0, 0, 'no', 'gpt4']" 36 | 34,"['d3_2_subjective', 65, 1, 1, 'yes', 'openai']" 37 | 35,"['d3_2_subjective', 65, 1, 1, 'yes', 'mistral']" 38 | 36,"['d3_2_subjective', 65, 1, 1, 'yes', 'meta']" 39 | 37,"['d3_2_subjective', 65, 1, 1, 'yes', 'gpt4']" 40 | 38,"['d3_2_subjective', 319, 1, 1, 'yes', 'meta']" 41 | 39,"['d3_2_subjective', 319, 1, 1, 'yes', 'openai']" 42 | 40,"['d3_2_subjective', 319, 1, 1, 'yes', 'mistral']" 43 | 41,"['d3_2_subjective', 319, 1, 1, 'yes', 'gpt4']" 44 | 42,"['d3_2_subjective', 22, 1, 1, 'yes', 'mistral']" 45 | 43,"['d3_2_subjective', 22, 1, 1, 'yes', 'meta']" 46 | 44,"['d3_2_subjective', 22, 1, 1, 'yes', 'openai']" 47 | 45,"['d3_2_subjective', 22, 1, 1, 'yes', 'gpt4']" 48 | 46,"['d3_2_subjective', 165, 1, 1, 'yes', 'openai']" 49 | 47,"['d3_2_subjective', 165, 1, 1, 'yes', 'mistral']" 50 | 48,"['d3_2_subjective', 165, 1, 1, 'yes', 'meta']" 51 | 49,"['d3_2_subjective', 165, 1, 1, 'yes', 'gpt4']" 52 | 50,"['d3_2_subjective', 382, 0, 0, 'no', 'mistral']" 53 | 51,"['d3_2_subjective', 382, 0, 0, 'no', 'meta']" 54 | 52,"['d3_2_subjective', 382, 0, 0, 'no', 'openai']" 55 | 53,"['d3_2_subjective', 382, 0, 0, 'no', 'gpt4']" 56 | 54,"['d3_2_subjective', 20, 1, 1, 'yes', 'meta']" 57 | 55,"['d3_2_subjective', 20, 1, 1, 'yes', 'openai']" 58 | 56,"['d3_2_subjective', 20, 1, 1, 'yes', 'mistral']" 59 | 57,"['d3_2_subjective', 20, 1, 1, 'yes', 'gpt4']" 60 | 58,"['d3_2_subjective', 150, 1, 1, 'yes', 'openai']" 61 | 59,"['d3_2_subjective', 150, 1, 1, 'yes', 'meta']" 62 | 60,"['d3_2_subjective', 150, 1, 1, 'yes', 'mistral']" 63 | 61,"['d3_2_subjective', 150, 1, 1, 'yes', 'gpt4']" 64 | 62,"['d3_2_subjective', 491, 0, 0, 'no', 'meta']" 65 | 63,"['d3_2_subjective', 491, 0, 0, 'no', 'openai']" 66 | 64,"['d3_2_subjective', 491, 0, 0, 'no', 'mistral']" 67 | 65,"['d3_2_subjective', 491, 0, 0, 'no', 'gpt4']" 68 | 66,"['d3_2_subjective', 412, 0, 0, 'no', 'openai']" 69 | 67,"['d3_2_subjective', 412, 0, 0, 'no', 'mistral']" 70 | 68,"['d3_2_subjective', 412, 0, 0, 'no', 'meta']" 71 | 69,"['d3_2_subjective', 412, 0, 0, 'no', 'gpt4']" 72 | 70,"['d3_2_subjective', 166, 1, 1, 'yes', 'openai']" 73 | 71,"['d3_2_subjective', 166, 1, 1, 'yes', 'meta']" 74 | 72,"['d3_2_subjective', 166, 1, 1, 'yes', 'mistral']" 75 | 73,"['d3_2_subjective', 166, 1, 1, 'yes', 'gpt4']" 76 | 74,"['d3_2_subjective', 446, 0, 1, 'yes', 'openai']" 77 | 75,"['d3_2_subjective', 446, 0, 0, 'no', 'mistral']" 78 | 76,"['d3_2_subjective', 446, 0, 0, 'no', 'meta']" 79 | 77,"['d3_2_subjective', 446, 0, 0, 'no', 'gpt4']" 80 | 78,"['d3_2_subjective', 612, 0, 0, 'no', 'meta']" 81 | 79,"['d3_2_subjective', 612, 0, 1, 'yes', 'openai']" 82 | 80,"['d3_2_subjective', 612, 0, 0, 'no', 'mistral']" 83 | 81,"['d3_2_subjective', 612, 0, 0, 'no', 'gpt4']" 84 | 82,"['d3_2_subjective', 4, 1, 1, 'yes', 'meta']" 85 | 83,"['d3_2_subjective', 4, 1, 0, 'no', 'openai']" 86 | 84,"['d3_2_subjective', 4, 1, 1, 'yes', 'mistral']" 87 | 85,"['d3_2_subjective', 4, 1, 1, 'yes', 'gpt4']" 88 | -------------------------------------------------------------------------------- /scripts/d3/out/d3_30_news.csv: -------------------------------------------------------------------------------- 1 | ,0 2 | 0,task 3 | 1,idx 4 | 2,true_label 5 | 3,pred_label 6 | 4,answer 7 | 5,model 8 | 6,"['d3_30_news', 3694, 0, 0, 'no', 'openai']" 9 | 7,"['d3_30_news', 3694, 0, 0, 'no', 'meta']" 10 | 8,"['d3_30_news', 3694, 0, 0, 'no', 'mistral']" 11 | 9,"['d3_30_news', 3694, 0, 0, 'no', 'gpt4']" 12 | 10,"['d3_30_news', 144, 1, 1, 'yes', 'openai']" 13 | 11,"['d3_30_news', 144, 1, 1, 'yes', 'mistral']" 14 | 12,"['d3_30_news', 144, 1, 1, 'yes', 'meta']" 15 | 13,"['d3_30_news', 144, 1, 1, 'yes', 'gpt4']" 16 | 14,"['d3_30_news', 1589, 0, 0, 'no', 'mistral']" 17 | 15,"['d3_30_news', 1589, 0, 0, 'no', 'meta']" 18 | 16,"['d3_30_news', 1589, 0, 0, 'no', 'openai']" 19 | 17,"['d3_30_news', 1589, 0, 0, 'no', 'gpt4']" 20 | 18,"['d3_30_news', 1289, 1, 1, 'yes', 'mistral']" 21 | 19,"['d3_30_news', 1289, 1, 0, 'no', 'meta']" 22 | 20,"['d3_30_news', 1289, 1, 0, 'no', 'openai']" 23 | 21,"['d3_30_news', 1289, 1, 1, 'yes', 'gpt4']" 24 | 22,"['d3_30_news', 4255, 0, 0, 'no', 'openai']" 25 | 23,"['d3_30_news', 4255, 0, 0, 'no', 'meta']" 26 | 24,"['d3_30_news', 4255, 0, 0, 'no', 'mistral']" 27 | 25,"['d3_30_news', 4255, 0, 0, 'no', 'gpt4']" 28 | 26,"['d3_30_news', 3909, 0, 0, 'no', 'mistral']" 29 | 27,"['d3_30_news', 3909, 0, 0, 'no', 'meta']" 30 | 28,"['d3_30_news', 3909, 0, 0, 'no', 'openai']" 31 | 29,"['d3_30_news', 3909, 0, 0, 'no', 'gpt4']" 32 | 30,"['d3_30_news', 5155, 0, 1, 'yes', 'meta']" 33 | 31,"['d3_30_news', 5155, 0, 0, 'no', 'openai']" 34 | 32,"['d3_30_news', 5155, 0, 1, 'yes', 'mistral']" 35 | 33,"['d3_30_news', 5155, 0, 1, 'yes', 'gpt4']" 36 | 34,"['d3_30_news', 502, 1, 1, 'yes', 'openai']" 37 | 35,"['d3_30_news', 502, 1, 1, 'yes', 'meta']" 38 | 36,"['d3_30_news', 502, 1, 1, 'yes', 'mistral']" 39 | 37,"['d3_30_news', 502, 1, 1, 'yes', 'gpt4']" 40 | 38,"['d3_30_news', 2437, 0, 0, 'no', 'openai']" 41 | 39,"['d3_30_news', 2437, 0, 0, 'no', 'meta']" 42 | 40,"['d3_30_news', 2437, 0, 0, 'no', 'mistral']" 43 | 41,"['d3_30_news', 2437, 0, 0, 'no', 'gpt4']" 44 | 42,"['d3_30_news', 172, 1, 1, 'yes', 'openai']" 45 | 43,"['d3_30_news', 172, 1, 1, 'yes', 'meta']" 46 | 44,"['d3_30_news', 172, 1, 1, 'yes', 'mistral']" 47 | 45,"['d3_30_news', 172, 1, 1, 'yes', 'gpt4']" 48 | 46,"['d3_30_news', 1263, 1, 1, 'yes', 'openai']" 49 | 47,"['d3_30_news', 1263, 1, 1, 'yes', 'mistral']" 50 | 48,"['d3_30_news', 1263, 1, 1, 'yes', 'meta']" 51 | 49,"['d3_30_news', 1263, 1, 1, 'yes', 'gpt4']" 52 | 50,"['d3_30_news', 2919, 0, 0, 'no', 'openai']" 53 | 51,"['d3_30_news', 2919, 0, 0, 'no', 'mistral']" 54 | 52,"['d3_30_news', 2919, 0, 0, 'no', 'meta']" 55 | 53,"['d3_30_news', 2919, 0, 0, 'no', 'gpt4']" 56 | 54,"['d3_30_news', 153, 1, 1, 'yes', 'openai']" 57 | 55,"['d3_30_news', 153, 1, 1, 'yes', 'meta']" 58 | 56,"['d3_30_news', 153, 1, 1, 'yes', 'mistral']" 59 | 57,"['d3_30_news', 153, 1, 1, 'yes', 'gpt4']" 60 | 58,"['d3_30_news', 1148, 1, 1, 'yes', 'openai']" 61 | 59,"['d3_30_news', 1148, 1, 1, 'yes', 'meta']" 62 | 60,"['d3_30_news', 1148, 1, 1, 'yes', 'mistral']" 63 | 61,"['d3_30_news', 1148, 1, 1, 'yes', 'gpt4']" 64 | 62,"['d3_30_news', 3755, 0, 0, 'no', 'mistral']" 65 | 63,"['d3_30_news', 3755, 0, 0, 'no', 'meta']" 66 | 64,"['d3_30_news', 3755, 0, 0, 'no', 'openai']" 67 | 65,"['d3_30_news', 3755, 0, 0, 'no', 'gpt4']" 68 | 66,"['d3_30_news', 3148, 0, 1, 'yes', 'mistral']" 69 | 67,"['d3_30_news', 3148, 0, 1, 'yes', 'meta']" 70 | 68,"['d3_30_news', 3148, 0, 0, 'no', 'openai']" 71 | 69,"['d3_30_news', 3148, 0, 0, 'no', 'gpt4']" 72 | 70,"['d3_30_news', 1273, 1, 1, 'yes', 'openai']" 73 | 71,"['d3_30_news', 1273, 1, 1, 'yes', 'meta']" 74 | 72,"['d3_30_news', 1273, 1, 1, 'yes', 'mistral']" 75 | 73,"['d3_30_news', 1273, 1, 1, 'yes', 'gpt4']" 76 | 74,"['d3_30_news', 3404, 0, 0, 'no', 'mistral']" 77 | 75,"['d3_30_news', 3404, 0, 0, 'no', 'meta']" 78 | 76,"['d3_30_news', 3404, 0, 0, 'no', 'openai']" 79 | 77,"['d3_30_news', 3404, 0, 0, 'no', 'gpt4']" 80 | 78,"['d3_30_news', 4676, 0, 0, 'no', 'openai']" 81 | 79,"['d3_30_news', 4676, 0, 0, 'no', 'mistral']" 82 | 80,"['d3_30_news', 4676, 0, 0, 'no', 'meta']" 83 | 81,"['d3_30_news', 4676, 0, 1, 'yes', 'gpt4']" 84 | 82,"['d3_30_news', 37, 1, 1, 'yes', 'meta']" 85 | 83,"['d3_30_news', 37, 1, 1, 'yes', 'openai']" 86 | 84,"['d3_30_news', 37, 1, 1, 'yes', 'mistral']" 87 | 85,"['d3_30_news', 37, 1, 1, 'yes', 'gpt4']" 88 | -------------------------------------------------------------------------------- /scripts/d3/out/d3_31_sports.csv: -------------------------------------------------------------------------------- 1 | ,0 2 | 0,task 3 | 1,idx 4 | 2,true_label 5 | 3,pred_label 6 | 4,answer 7 | 5,model 8 | 6,"['d3_31_sports', 3628, 0, 0, 'no', 'mistral']" 9 | 7,"['d3_31_sports', 3628, 0, 0, 'no', 'meta']" 10 | 8,"['d3_31_sports', 3628, 0, 0, 'no', 'openai']" 11 | 9,"['d3_31_sports', 3628, 0, 0, 'no', 'gpt4']" 12 | 10,"['d3_31_sports', 141, 1, 1, 'yes', 'openai']" 13 | 11,"['d3_31_sports', 141, 1, 1, 'yes', 'mistral']" 14 | 12,"['d3_31_sports', 141, 1, 1, 'yes', 'meta']" 15 | 13,"['d3_31_sports', 141, 1, 1, 'yes', 'gpt4']" 16 | 14,"['d3_31_sports', 1560, 0, 0, 'no', 'mistral']" 17 | 15,"['d3_31_sports', 1560, 0, 0, 'no', 'meta']" 18 | 16,"['d3_31_sports', 1560, 0, 0, 'no', 'openai']" 19 | 17,"['d3_31_sports', 1560, 0, 0, 'no', 'gpt4']" 20 | 18,"['d3_31_sports', 1266, 1, 1, 'yes', 'mistral']" 21 | 19,"['d3_31_sports', 1266, 1, 1, 'yes', 'meta']" 22 | 20,"['d3_31_sports', 1266, 1, 1, 'yes', 'openai']" 23 | 21,"['d3_31_sports', 1266, 1, 1, 'yes', 'gpt4']" 24 | 22,"['d3_31_sports', 4178, 0, 0, 'no', 'openai']" 25 | 23,"['d3_31_sports', 4178, 0, 0, 'no', 'meta']" 26 | 24,"['d3_31_sports', 4178, 0, 0, 'no', 'mistral']" 27 | 25,"['d3_31_sports', 4178, 0, 0, 'no', 'gpt4']" 28 | 26,"['d3_31_sports', 3839, 0, 0, 'no', 'openai']" 29 | 27,"['d3_31_sports', 3839, 0, 0, 'no', 'meta']" 30 | 28,"['d3_31_sports', 3839, 0, 0, 'no', 'mistral']" 31 | 29,"['d3_31_sports', 3839, 0, 0, 'no', 'gpt4']" 32 | 30,"['d3_31_sports', 5062, 0, 0, 'no', 'mistral']" 33 | 31,"['d3_31_sports', 5062, 0, 0, 'no', 'meta']" 34 | 32,"['d3_31_sports', 5062, 0, 0, 'no', 'openai']" 35 | 33,"['d3_31_sports', 5062, 0, 0, 'no', 'gpt4']" 36 | 34,"['d3_31_sports', 493, 1, 1, 'yes', 'openai']" 37 | 35,"['d3_31_sports', 493, 1, 0, 'no', 'meta']" 38 | 36,"['d3_31_sports', 493, 1, 0, 'no', 'mistral']" 39 | 37,"['d3_31_sports', 493, 1, 1, 'yes', 'gpt4']" 40 | 38,"['d3_31_sports', 2393, 0, 0, 'no', 'mistral']" 41 | 39,"['d3_31_sports', 2393, 0, 0, 'no', 'meta']" 42 | 40,"['d3_31_sports', 2393, 0, 0, 'no', 'openai']" 43 | 41,"['d3_31_sports', 2393, 0, 0, 'no', 'gpt4']" 44 | 42,"['d3_31_sports', 169, 1, 1, 'yes', 'meta']" 45 | 43,"['d3_31_sports', 169, 1, 1, 'yes', 'openai']" 46 | 44,"['d3_31_sports', 169, 1, 1, 'yes', 'mistral']" 47 | 45,"['d3_31_sports', 169, 1, 1, 'yes', 'gpt4']" 48 | 46,"['d3_31_sports', 1240, 1, 1, 'yes', 'mistral']" 49 | 47,"['d3_31_sports', 1240, 1, 1, 'yes', 'meta']" 50 | 48,"['d3_31_sports', 1240, 1, 1, 'yes', 'openai']" 51 | 49,"['d3_31_sports', 1240, 1, 1, 'yes', 'gpt4']" 52 | 50,"['d3_31_sports', 2867, 0, 0, 'no', 'openai']" 53 | 51,"['d3_31_sports', 2867, 0, 0, 'no', 'meta']" 54 | 52,"['d3_31_sports', 2867, 0, 0, 'no', 'mistral']" 55 | 53,"['d3_31_sports', 2867, 0, 0, 'no', 'gpt4']" 56 | 54,"['d3_31_sports', 150, 1, 1, 'yes', 'openai']" 57 | 55,"['d3_31_sports', 150, 1, 0, 'no', 'meta']" 58 | 56,"['d3_31_sports', 150, 1, 0, 'poss', 'mistral']" 59 | 57,"['d3_31_sports', 150, 1, 1, 'yes', 'gpt4']" 60 | 58,"['d3_31_sports', 1128, 1, 1, 'yes', 'openai']" 61 | 59,"['d3_31_sports', 1128, 1, 1, 'yes', 'mistral']" 62 | 60,"['d3_31_sports', 1128, 1, 1, 'yes', 'meta']" 63 | 61,"['d3_31_sports', 1128, 1, 1, 'yes', 'gpt4']" 64 | 62,"['d3_31_sports', 3687, 0, 0, 'no', 'meta']" 65 | 63,"['d3_31_sports', 3687, 0, 0, 'no', 'openai']" 66 | 64,"['d3_31_sports', 3687, 0, 0, 'no', 'mistral']" 67 | 65,"['d3_31_sports', 3687, 0, 0, 'no', 'gpt4']" 68 | 66,"['d3_31_sports', 3091, 0, 0, 'no', 'openai']" 69 | 67,"['d3_31_sports', 3091, 0, 0, 'no', 'meta']" 70 | 68,"['d3_31_sports', 3091, 0, 0, 'no', 'mistral']" 71 | 69,"['d3_31_sports', 3091, 0, 0, 'no', 'gpt4']" 72 | 70,"['d3_31_sports', 1250, 1, 1, 'yes', 'mistral']" 73 | 71,"['d3_31_sports', 1250, 1, 1, 'yes', 'meta']" 74 | 72,"['d3_31_sports', 1250, 1, 1, 'yes', 'openai']" 75 | 73,"['d3_31_sports', 1250, 1, 1, 'yes', 'gpt4']" 76 | 74,"['d3_31_sports', 3343, 0, 0, 'no', 'openai']" 77 | 75,"['d3_31_sports', 3343, 0, 0, 'no', 'meta']" 78 | 76,"['d3_31_sports', 3343, 0, 0, 'no', 'mistral']" 79 | 77,"['d3_31_sports', 3343, 0, 0, 'no', 'gpt4']" 80 | 78,"['d3_31_sports', 4592, 0, 0, 'no', 'mistral']" 81 | 79,"['d3_31_sports', 4592, 0, 0, 'no', 'meta']" 82 | 80,"['d3_31_sports', 4592, 0, 0, 'no', 'openai']" 83 | 81,"['d3_31_sports', 4592, 0, 0, 'no', 'gpt4']" 84 | 82,"['d3_31_sports', 36, 1, 1, 'yes', 'openai']" 85 | 83,"['d3_31_sports', 36, 1, 1, 'yes', 'mistral']" 86 | 84,"['d3_31_sports', 36, 1, 1, 'yes', 'meta']" 87 | 85,"['d3_31_sports', 36, 1, 1, 'yes', 'gpt4']" 88 | -------------------------------------------------------------------------------- /scripts/d3/out/d3_32_business.csv: -------------------------------------------------------------------------------- 1 | ,0 2 | 0,task 3 | 1,idx 4 | 2,true_label 5 | 3,pred_label 6 | 4,answer 7 | 5,model 8 | 6,"['d3_32_business', 3644, 0, 0, 'no', 'openai']" 9 | 7,"['d3_32_business', 3644, 0, 0, 'no', 'mistral']" 10 | 8,"['d3_32_business', 3644, 0, 0, 'no', 'meta']" 11 | 9,"['d3_32_business', 3644, 0, 0, 'no', 'gpt4']" 12 | 10,"['d3_32_business', 142, 1, 1, 'yes', 'openai']" 13 | 11,"['d3_32_business', 142, 1, 1, 'yes', 'meta']" 14 | 12,"['d3_32_business', 142, 1, 1, 'yes', 'mistral']" 15 | 13,"['d3_32_business', 142, 1, 1, 'yes', 'gpt4']" 16 | 14,"['d3_32_business', 1567, 0, 0, 'no', 'openai']" 17 | 15,"['d3_32_business', 1567, 0, 0, 'no', 'mistral']" 18 | 16,"['d3_32_business', 1567, 0, 0, 'no', 'meta']" 19 | 17,"['d3_32_business', 1567, 0, 0, 'no', 'gpt4']" 20 | 18,"['d3_32_business', 1272, 1, 1, 'yes', 'openai']" 21 | 19,"['d3_32_business', 1272, 1, 1, 'yes', 'mistral']" 22 | 20,"['d3_32_business', 1272, 1, 1, 'yes', 'meta']" 23 | 21,"['d3_32_business', 1272, 1, 1, 'yes', 'gpt4']" 24 | 22,"['d3_32_business', 4197, 0, 0, 'no', 'openai']" 25 | 23,"['d3_32_business', 4197, 0, 0, 'no', 'meta']" 26 | 24,"['d3_32_business', 4197, 0, 0, 'no', 'mistral']" 27 | 25,"['d3_32_business', 4197, 0, 0, 'no', 'gpt4']" 28 | 26,"['d3_32_business', 3856, 0, 0, 'no', 'openai']" 29 | 27,"['d3_32_business', 3856, 0, 0, 'no', 'meta']" 30 | 28,"['d3_32_business', 3856, 0, 1, 'yes', 'mistral']" 31 | 29,"['d3_32_business', 3856, 0, 0, 'no', 'gpt4']" 32 | 30,"['d3_32_business', 5084, 0, 0, 'no', 'openai']" 33 | 31,"['d3_32_business', 5084, 0, 0, 'no', 'meta']" 34 | 32,"['d3_32_business', 5084, 0, 0, 'no', 'mistral']" 35 | 33,"['d3_32_business', 5084, 0, 0, 'no', 'gpt4']" 36 | 34,"['d3_32_business', 495, 1, 1, 'yes', 'openai']" 37 | 35,"['d3_32_business', 495, 1, 1, 'yes', 'meta']" 38 | 36,"['d3_32_business', 495, 1, 1, 'yes', 'mistral']" 39 | 37,"['d3_32_business', 495, 1, 1, 'yes', 'gpt4']" 40 | 38,"['d3_32_business', 2404, 0, 0, 'no', 'meta']" 41 | 39,"['d3_32_business', 2404, 0, 0, 'no', 'openai']" 42 | 40,"['d3_32_business', 2404, 0, 0, 'no', 'mistral']" 43 | 41,"['d3_32_business', 2404, 0, 0, 'no', 'gpt4']" 44 | 42,"['d3_32_business', 169, 1, 1, 'yes', 'openai']" 45 | 43,"['d3_32_business', 169, 1, 1, 'yes', 'meta']" 46 | 44,"['d3_32_business', 169, 1, 1, 'yes', 'mistral']" 47 | 45,"['d3_32_business', 169, 1, 1, 'yes', 'gpt4']" 48 | 46,"['d3_32_business', 1246, 1, 1, 'yes', 'openai']" 49 | 47,"['d3_32_business', 1246, 1, 1, 'yes', 'meta']" 50 | 48,"['d3_32_business', 1246, 1, 1, 'yes', 'mistral']" 51 | 49,"['d3_32_business', 1246, 1, 1, 'yes', 'gpt4']" 52 | 50,"['d3_32_business', 2880, 0, 1, 'yes', 'mistral']" 53 | 51,"['d3_32_business', 2880, 0, 0, 'no', 'meta']" 54 | 52,"['d3_32_business', 2880, 0, 0, 'no', 'openai']" 55 | 53,"['d3_32_business', 2880, 0, 0, 'no', 'gpt4']" 56 | 54,"['d3_32_business', 151, 1, 1, 'yes', 'meta']" 57 | 55,"['d3_32_business', 151, 1, 1, 'yes', 'openai']" 58 | 56,"['d3_32_business', 151, 1, 1, 'yes', 'mistral']" 59 | 57,"['d3_32_business', 151, 1, 1, 'yes', 'gpt4']" 60 | 58,"['d3_32_business', 1133, 1, 0, 'no', 'openai']" 61 | 59,"['d3_32_business', 1133, 1, 0, 'no', 'meta']" 62 | 60,"['d3_32_business', 1133, 1, 0, 'no', 'mistral']" 63 | 61,"['d3_32_business', 1133, 1, 0, 'no', 'gpt4']" 64 | 62,"['d3_32_business', 3703, 0, 0, 'no', 'mistral']" 65 | 63,"['d3_32_business', 3703, 0, 0, 'no', 'meta']" 66 | 64,"['d3_32_business', 3703, 0, 0, 'no', 'openai']" 67 | 65,"['d3_32_business', 3703, 0, 0, 'no', 'gpt4']" 68 | 66,"['d3_32_business', 3105, 0, 0, 'no', 'openai']" 69 | 67,"['d3_32_business', 3105, 0, 1, 'yes', 'meta']" 70 | 68,"['d3_32_business', 3105, 0, 1, 'yes', 'mistral']" 71 | 69,"['d3_32_business', 3105, 0, 1, 'yes', 'gpt4']" 72 | 70,"['d3_32_business', 1256, 1, 1, 'yes', 'openai']" 73 | 71,"['d3_32_business', 1256, 1, 1, 'yes', 'meta']" 74 | 72,"['d3_32_business', 1256, 1, 1, 'yes', 'mistral']" 75 | 73,"['d3_32_business', 1256, 1, 1, 'yes', 'gpt4']" 76 | 74,"['d3_32_business', 3358, 0, 1, 'yes', 'openai']" 77 | 75,"['d3_32_business', 3358, 0, 1, 'yes', 'mistral']" 78 | 76,"['d3_32_business', 3358, 0, 1, 'yes', 'meta']" 79 | 77,"['d3_32_business', 3358, 0, 1, 'yes', 'gpt4']" 80 | 78,"['d3_32_business', 4612, 0, 0, 'no', 'mistral']" 81 | 79,"['d3_32_business', 4612, 0, 0, 'no', 'meta']" 82 | 80,"['d3_32_business', 4612, 0, 0, 'no', 'openai']" 83 | 81,"['d3_32_business', 4612, 0, 0, 'no', 'gpt4']" 84 | 82,"['d3_32_business', 37, 1, 1, 'yes', 'mistral']" 85 | 83,"['d3_32_business', 37, 1, 1, 'yes', 'meta']" 86 | 84,"['d3_32_business', 37, 1, 1, 'yes', 'openai']" 87 | 85,"['d3_32_business', 37, 1, 1, 'yes', 'gpt4']" 88 | -------------------------------------------------------------------------------- /scripts/d3/out/d3_33_tech.csv: -------------------------------------------------------------------------------- 1 | ,0 2 | 0,task 3 | 1,idx 4 | 2,true_label 5 | 3,pred_label 6 | 4,answer 7 | 5,model 8 | 6,"['d3_33_tech', 3661, 0, 0, 'no', 'openai']" 9 | 7,"['d3_33_tech', 3661, 0, 0, 'no', 'meta']" 10 | 8,"['d3_33_tech', 3661, 0, 0, 'no', 'mistral']" 11 | 9,"['d3_33_tech', 3661, 0, 0, 'no', 'gpt4']" 12 | 10,"['d3_33_tech', 143, 1, 1, 'yes', 'mistral']" 13 | 11,"['d3_33_tech', 143, 1, 1, 'yes', 'meta']" 14 | 12,"['d3_33_tech', 143, 1, 1, 'yes', 'openai']" 15 | 13,"['d3_33_tech', 143, 1, 1, 'yes', 'gpt4']" 16 | 14,"['d3_33_tech', 1575, 0, 0, 'no', 'openai']" 17 | 15,"['d3_33_tech', 1575, 0, 0, 'no', 'mistral']" 18 | 16,"['d3_33_tech', 1575, 0, 0, 'no', 'meta']" 19 | 17,"['d3_33_tech', 1575, 0, 0, 'no', 'gpt4']" 20 | 18,"['d3_33_tech', 1278, 1, 0, 'no', 'mistral']" 21 | 19,"['d3_33_tech', 1278, 1, 0, 'no', 'meta']" 22 | 20,"['d3_33_tech', 1278, 1, 0, 'no', 'openai']" 23 | 21,"['d3_33_tech', 1278, 1, 0, 'no', 'gpt4']" 24 | 22,"['d3_33_tech', 4217, 0, 1, 'yes', 'mistral']" 25 | 23,"['d3_33_tech', 4217, 0, 1, 'yes', 'meta']" 26 | 24,"['d3_33_tech', 4217, 0, 1, 'yes', 'openai']" 27 | 25,"['d3_33_tech', 4217, 0, 1, 'yes', 'gpt4']" 28 | 26,"['d3_33_tech', 3875, 0, 0, 'no', 'openai']" 29 | 27,"['d3_33_tech', 3875, 0, 0, 'no', 'meta']" 30 | 28,"['d3_33_tech', 3875, 0, 0, 'no', 'mistral']" 31 | 29,"['d3_33_tech', 3875, 0, 0, 'no', 'gpt4']" 32 | 30,"['d3_33_tech', 5109, 0, 0, 'no', 'openai']" 33 | 31,"['d3_33_tech', 5109, 0, 0, 'no', 'meta']" 34 | 32,"['d3_33_tech', 5109, 0, 0, 'no', 'mistral']" 35 | 33,"['d3_33_tech', 5109, 0, 0, 'no', 'gpt4']" 36 | 34,"['d3_33_tech', 497, 1, 1, 'yes', 'openai']" 37 | 35,"['d3_33_tech', 497, 1, 1, 'yes', 'mistral']" 38 | 36,"['d3_33_tech', 497, 1, 1, 'yes', 'meta']" 39 | 37,"['d3_33_tech', 497, 1, 1, 'yes', 'gpt4']" 40 | 38,"['d3_33_tech', 2416, 0, 0, 'no', 'openai']" 41 | 39,"['d3_33_tech', 2416, 0, 0, 'no', 'meta']" 42 | 40,"['d3_33_tech', 2416, 0, 0, 'no', 'mistral']" 43 | 41,"['d3_33_tech', 2416, 0, 0, 'no', 'gpt4']" 44 | 42,"['d3_33_tech', 170, 1, 1, 'yes', 'meta']" 45 | 43,"['d3_33_tech', 170, 1, 1, 'yes', 'openai']" 46 | 44,"['d3_33_tech', 170, 1, 1, 'yes', 'mistral']" 47 | 45,"['d3_33_tech', 170, 1, 1, 'yes', 'gpt4']" 48 | 46,"['d3_33_tech', 1252, 1, 1, 'yes', 'mistral']" 49 | 47,"['d3_33_tech', 1252, 1, 1, 'yes', 'meta']" 50 | 48,"['d3_33_tech', 1252, 1, 1, 'yes', 'openai']" 51 | 49,"['d3_33_tech', 1252, 1, 1, 'yes', 'gpt4']" 52 | 50,"['d3_33_tech', 2894, 0, 0, 'no', 'meta']" 53 | 51,"['d3_33_tech', 2894, 0, 1, 'yes', 'openai']" 54 | 52,"['d3_33_tech', 2894, 0, 1, 'yes', 'mistral']" 55 | 53,"['d3_33_tech', 2894, 0, 1, 'yes', 'gpt4']" 56 | 54,"['d3_33_tech', 151, 1, 1, 'yes', 'meta']" 57 | 55,"['d3_33_tech', 151, 1, 1, 'yes', 'openai']" 58 | 56,"['d3_33_tech', 151, 1, 1, 'yes', 'mistral']" 59 | 57,"['d3_33_tech', 151, 1, 1, 'yes', 'gpt4']" 60 | 58,"['d3_33_tech', 1138, 1, 1, 'yes', 'openai']" 61 | 59,"['d3_33_tech', 1138, 1, 1, 'yes', 'meta']" 62 | 60,"['d3_33_tech', 1138, 1, 1, 'yes', 'mistral']" 63 | 61,"['d3_33_tech', 1138, 1, 1, 'yes', 'gpt4']" 64 | 62,"['d3_33_tech', 3721, 0, 0, 'no', 'openai']" 65 | 63,"['d3_33_tech', 3721, 0, 0, 'no', 'meta']" 66 | 64,"['d3_33_tech', 3721, 0, 0, 'no', 'mistral']" 67 | 65,"['d3_33_tech', 3721, 0, 0, 'no', 'gpt4']" 68 | 66,"['d3_33_tech', 3120, 0, 0, 'no', 'meta']" 69 | 67,"['d3_33_tech', 3120, 0, 0, 'no', 'openai']" 70 | 68,"['d3_33_tech', 3120, 0, 0, 'no', 'mistral']" 71 | 69,"['d3_33_tech', 3120, 0, 0, 'no', 'gpt4']" 72 | 70,"['d3_33_tech', 1262, 1, 1, 'yes', 'openai']" 73 | 71,"['d3_33_tech', 1262, 1, 1, 'yes', 'mistral']" 74 | 72,"['d3_33_tech', 1262, 1, 1, 'yes', 'meta']" 75 | 73,"['d3_33_tech', 1262, 1, 1, 'yes', 'gpt4']" 76 | 74,"['d3_33_tech', 3374, 0, 0, 'no', 'openai']" 77 | 75,"['d3_33_tech', 3374, 0, 0, 'no', 'meta']" 78 | 76,"['d3_33_tech', 3374, 0, 0, 'no', 'mistral']" 79 | 77,"['d3_33_tech', 3374, 0, 0, 'no', 'gpt4']" 80 | 78,"['d3_33_tech', 4635, 0, 0, 'no', 'meta']" 81 | 79,"['d3_33_tech', 4635, 0, 0, 'no', 'openai']" 82 | 80,"['d3_33_tech', 4635, 0, 0, 'no', 'mistral']" 83 | 81,"['d3_33_tech', 4635, 0, 0, 'no', 'gpt4']" 84 | 82,"['d3_33_tech', 37, 1, 0, 'no', 'openai']" 85 | 83,"['d3_33_tech', 37, 1, 0, 'no', 'meta']" 86 | 84,"['d3_33_tech', 37, 1, 0, 'no', 'mistral']" 87 | 85,"['d3_33_tech', 37, 1, 0, 'no', 'gpt4']" 88 | -------------------------------------------------------------------------------- /scripts/d3/out/d3_34_bad.csv: -------------------------------------------------------------------------------- 1 | ,0 2 | 0,task 3 | 1,idx 4 | 2,true_label 5 | 3,pred_label 6 | 4,answer 7 | 5,model 8 | 6,"['d3_34_bad', 228, 0, 0, 'no', 'openai']" 9 | 7,"['d3_34_bad', 228, 0, 0, 'no', 'meta']" 10 | 8,"['d3_34_bad', 228, 0, 0, 'no', 'mistral']" 11 | 9,"['d3_34_bad', 228, 0, 0, 'no', 'gpt4']" 12 | 10,"['d3_34_bad', 8, 1, 1, 'yes', 'openai']" 13 | 11,"['d3_34_bad', 8, 1, 1, 'yes', 'mistral']" 14 | 12,"['d3_34_bad', 8, 1, 1, 'yes', 'meta']" 15 | 13,"['d3_34_bad', 8, 1, 1, 'yes', 'gpt4']" 16 | 14,"['d3_34_bad', 98, 1, 1, 'yes', 'meta']" 17 | 15,"['d3_34_bad', 98, 1, 1, 'yes', 'openai']" 18 | 16,"['d3_34_bad', 98, 1, 1, 'yes', 'mistral']" 19 | 17,"['d3_34_bad', 98, 1, 1, 'yes', 'gpt4']" 20 | 18,"['d3_34_bad', 79, 1, 1, 'yes', 'openai']" 21 | 19,"['d3_34_bad', 79, 1, 1, 'yes', 'meta']" 22 | 20,"['d3_34_bad', 79, 1, 1, 'yes', 'mistral']" 23 | 21,"['d3_34_bad', 79, 1, 1, 'yes', 'gpt4']" 24 | 22,"['d3_34_bad', 262, 0, 0, 'no', 'openai']" 25 | 23,"['d3_34_bad', 262, 0, 1, 'yes', 'meta']" 26 | 24,"['d3_34_bad', 262, 0, 0, 'no', 'mistral']" 27 | 25,"['d3_34_bad', 262, 0, 0, 'no', 'gpt4']" 28 | 26,"['d3_34_bad', 241, 0, 0, 'no', 'mistral']" 29 | 27,"['d3_34_bad', 241, 0, 0, 'no', 'meta']" 30 | 28,"['d3_34_bad', 241, 0, 0, 'no', 'openai']" 31 | 29,"['d3_34_bad', 241, 0, 0, 'no', 'gpt4']" 32 | 30,"['d3_34_bad', 318, 0, 0, 'no', 'mistral']" 33 | 31,"['d3_34_bad', 318, 0, 0, 'no', 'meta']" 34 | 32,"['d3_34_bad', 318, 0, 0, 'no', 'openai']" 35 | 33,"['d3_34_bad', 318, 0, 0, 'no', 'gpt4']" 36 | 34,"['d3_34_bad', 31, 1, 1, 'yes', 'meta']" 37 | 35,"['d3_34_bad', 31, 1, 1, 'yes', 'openai']" 38 | 36,"['d3_34_bad', 31, 1, 1, 'yes', 'mistral']" 39 | 37,"['d3_34_bad', 31, 1, 1, 'yes', 'gpt4']" 40 | 38,"['d3_34_bad', 150, 1, 1, 'yes', 'openai']" 41 | 39,"['d3_34_bad', 150, 1, 1, 'yes', 'mistral']" 42 | 40,"['d3_34_bad', 150, 1, 1, 'yes', 'meta']" 43 | 41,"['d3_34_bad', 150, 1, 1, 'yes', 'gpt4']" 44 | 42,"['d3_34_bad', 10, 1, 1, 'yes', 'openai']" 45 | 43,"['d3_34_bad', 10, 1, 1, 'yes', 'meta']" 46 | 44,"['d3_34_bad', 10, 1, 1, 'yes', 'mistral']" 47 | 45,"['d3_34_bad', 10, 1, 1, 'yes', 'gpt4']" 48 | 46,"['d3_34_bad', 78, 1, 1, 'yes', 'openai']" 49 | 47,"['d3_34_bad', 78, 1, 1, 'yes', 'meta']" 50 | 48,"['d3_34_bad', 78, 1, 1, 'yes', 'mistral']" 51 | 49,"['d3_34_bad', 78, 1, 1, 'yes', 'gpt4']" 52 | 50,"['d3_34_bad', 180, 1, 1, 'yes', 'openai']" 53 | 51,"['d3_34_bad', 180, 1, 1, 'yes', 'mistral']" 54 | 52,"['d3_34_bad', 180, 1, 1, 'yes', 'meta']" 55 | 53,"['d3_34_bad', 180, 1, 1, 'yes', 'gpt4']" 56 | 54,"['d3_34_bad', 9, 1, 1, 'yes', 'mistral']" 57 | 55,"['d3_34_bad', 9, 1, 1, 'yes', 'meta']" 58 | 56,"['d3_34_bad', 9, 1, 1, 'yes', 'openai']" 59 | 57,"['d3_34_bad', 9, 1, 1, 'yes', 'gpt4']" 60 | 58,"['d3_34_bad', 70, 1, 1, 'yes', 'openai']" 61 | 59,"['d3_34_bad', 70, 1, 1, 'yes', 'meta']" 62 | 60,"['d3_34_bad', 70, 1, 1, 'yes', 'mistral']" 63 | 61,"['d3_34_bad', 70, 1, 1, 'yes', 'gpt4']" 64 | 62,"['d3_34_bad', 232, 0, 0, 'no', 'mistral']" 65 | 63,"['d3_34_bad', 232, 0, 1, 'yes', 'meta']" 66 | 64,"['d3_34_bad', 232, 0, 1, 'yes', 'openai']" 67 | 65,"['d3_34_bad', 232, 0, 1, 'yes', 'gpt4']" 68 | 66,"['d3_34_bad', 194, 0, 0, 'no', 'mistral']" 69 | 67,"['d3_34_bad', 194, 0, 0, 'no', 'meta']" 70 | 68,"['d3_34_bad', 194, 0, 0, 'no', 'openai']" 71 | 69,"['d3_34_bad', 194, 0, 0, 'no', 'gpt4']" 72 | 70,"['d3_34_bad', 78, 1, 1, 'yes', 'openai']" 73 | 71,"['d3_34_bad', 78, 1, 1, 'yes', 'mistral']" 74 | 72,"['d3_34_bad', 78, 1, 1, 'yes', 'meta']" 75 | 73,"['d3_34_bad', 78, 1, 1, 'yes', 'gpt4']" 76 | 74,"['d3_34_bad', 210, 0, 0, 'no', 'mistral']" 77 | 75,"['d3_34_bad', 210, 0, 0, 'no', 'meta']" 78 | 76,"['d3_34_bad', 210, 0, 0, 'no', 'openai']" 79 | 77,"['d3_34_bad', 210, 0, 0, 'no', 'gpt4']" 80 | 78,"['d3_34_bad', 288, 0, 0, 'no', 'openai']" 81 | 79,"['d3_34_bad', 288, 0, 0, 'no', 'meta']" 82 | 80,"['d3_34_bad', 288, 0, 0, 'no', 'mistral']" 83 | 81,"['d3_34_bad', 288, 0, 0, 'no', 'gpt4']" 84 | 82,"['d3_34_bad', 2, 1, 1, 'yes', 'openai']" 85 | 83,"['d3_34_bad', 2, 1, 1, 'yes', 'mistral']" 86 | 84,"['d3_34_bad', 2, 1, 1, 'yes', 'meta']" 87 | 85,"['d3_34_bad', 2, 1, 1, 'yes', 'gpt4']" 88 | -------------------------------------------------------------------------------- /scripts/d3/out/d3_35_good.csv: -------------------------------------------------------------------------------- 1 | ,0 2 | 0,task 3 | 1,idx 4 | 2,true_label 5 | 3,pred_label 6 | 4,answer 7 | 5,model 8 | 6,"['d3_35_good', 242, 0, 0, 'no', 'openai']" 9 | 7,"['d3_35_good', 242, 0, 0, 'no', 'meta']" 10 | 8,"['d3_35_good', 242, 0, 0, 'no', 'mistral']" 11 | 9,"['d3_35_good', 242, 0, 0, 'no', 'gpt4']" 12 | 10,"['d3_35_good', 9, 1, 1, 'yes', 'mistral']" 13 | 11,"['d3_35_good', 9, 1, 1, 'yes', 'meta']" 14 | 12,"['d3_35_good', 9, 1, 1, 'yes', 'openai']" 15 | 13,"['d3_35_good', 9, 1, 1, 'yes', 'gpt4']" 16 | 14,"['d3_35_good', 104, 1, 1, 'yes', 'openai']" 17 | 15,"['d3_35_good', 104, 1, 1, 'yes', 'mistral']" 18 | 16,"['d3_35_good', 104, 1, 1, 'yes', 'meta']" 19 | 17,"['d3_35_good', 104, 1, 1, 'yes', 'gpt4']" 20 | 18,"['d3_35_good', 84, 1, 1, 'yes', 'mistral']" 21 | 19,"['d3_35_good', 84, 1, 1, 'yes', 'meta']" 22 | 20,"['d3_35_good', 84, 1, 1, 'yes', 'openai']" 23 | 21,"['d3_35_good', 84, 1, 1, 'yes', 'gpt4']" 24 | 22,"['d3_35_good', 279, 0, 0, 'no', 'openai']" 25 | 23,"['d3_35_good', 279, 0, 0, 'no', 'mistral']" 26 | 24,"['d3_35_good', 279, 0, 0, 'no', 'meta']" 27 | 25,"['d3_35_good', 279, 0, 0, 'no', 'gpt4']" 28 | 26,"['d3_35_good', 257, 0, 0, 'no', 'meta']" 29 | 27,"['d3_35_good', 257, 0, 0, 'no', 'openai']" 30 | 28,"['d3_35_good', 257, 0, 0, 'no', 'mistral']" 31 | 29,"['d3_35_good', 257, 0, 0, 'no', 'gpt4']" 32 | 30,"['d3_35_good', 339, 0, 0, 'no', 'mistral']" 33 | 31,"['d3_35_good', 339, 0, 0, 'no', 'meta']" 34 | 32,"['d3_35_good', 339, 0, 0, 'no', 'openai']" 35 | 33,"['d3_35_good', 339, 0, 0, 'no', 'gpt4']" 36 | 34,"['d3_35_good', 33, 1, 1, 'yes', 'openai']" 37 | 35,"['d3_35_good', 33, 1, 1, 'yes', 'mistral']" 38 | 36,"['d3_35_good', 33, 1, 1, 'yes', 'meta']" 39 | 37,"['d3_35_good', 33, 1, 1, 'yes', 'gpt4']" 40 | 38,"['d3_35_good', 160, 1, 1, 'yes', 'mistral']" 41 | 39,"['d3_35_good', 160, 1, 1, 'yes', 'meta']" 42 | 40,"['d3_35_good', 160, 1, 1, 'yes', 'openai']" 43 | 41,"['d3_35_good', 160, 1, 1, 'yes', 'gpt4']" 44 | 42,"['d3_35_good', 11, 1, 1, 'yes', 'mistral']" 45 | 43,"['d3_35_good', 11, 1, 1, 'yes', 'meta']" 46 | 44,"['d3_35_good', 11, 1, 1, 'yes', 'openai']" 47 | 45,"['d3_35_good', 11, 1, 1, 'yes', 'gpt4']" 48 | 46,"['d3_35_good', 83, 1, 1, 'yes', 'mistral']" 49 | 47,"['d3_35_good', 83, 1, 1, 'yes', 'meta']" 50 | 48,"['d3_35_good', 83, 1, 1, 'yes', 'openai']" 51 | 49,"['d3_35_good', 83, 1, 1, 'yes', 'gpt4']" 52 | 50,"['d3_35_good', 192, 0, 0, 'no', 'meta']" 53 | 51,"['d3_35_good', 192, 0, 0, 'no', 'openai']" 54 | 52,"['d3_35_good', 192, 0, 0, 'no', 'mistral']" 55 | 53,"['d3_35_good', 192, 0, 0, 'no', 'gpt4']" 56 | 54,"['d3_35_good', 10, 1, 1, 'yes', 'mistral']" 57 | 55,"['d3_35_good', 10, 1, 1, 'yes', 'meta']" 58 | 56,"['d3_35_good', 10, 1, 1, 'yes', 'openai']" 59 | 57,"['d3_35_good', 10, 1, 1, 'yes', 'gpt4']" 60 | 58,"['d3_35_good', 75, 1, 1, 'yes', 'mistral']" 61 | 59,"['d3_35_good', 75, 1, 1, 'yes', 'meta']" 62 | 60,"['d3_35_good', 75, 1, 1, 'yes', 'openai']" 63 | 61,"['d3_35_good', 75, 1, 1, 'yes', 'gpt4']" 64 | 62,"['d3_35_good', 246, 0, 0, 'no', 'openai']" 65 | 63,"['d3_35_good', 246, 0, 0, 'no', 'meta']" 66 | 64,"['d3_35_good', 246, 0, 0, 'no', 'mistral']" 67 | 65,"['d3_35_good', 246, 0, 0, 'no', 'gpt4']" 68 | 66,"['d3_35_good', 207, 0, 0, 'no', 'mistral']" 69 | 67,"['d3_35_good', 207, 0, 0, 'no', 'meta']" 70 | 68,"['d3_35_good', 207, 0, 0, 'no', 'openai']" 71 | 69,"['d3_35_good', 207, 0, 0, 'no', 'gpt4']" 72 | 70,"['d3_35_good', 83, 1, 1, 'yes', 'openai']" 73 | 71,"['d3_35_good', 83, 1, 1, 'yes', 'mistral']" 74 | 72,"['d3_35_good', 83, 1, 1, 'yes', 'meta']" 75 | 73,"['d3_35_good', 83, 1, 1, 'yes', 'gpt4']" 76 | 74,"['d3_35_good', 223, 0, 0, 'no', 'openai']" 77 | 75,"['d3_35_good', 223, 0, 0, 'no', 'meta']" 78 | 76,"['d3_35_good', 223, 0, 0, 'no', 'mistral']" 79 | 77,"['d3_35_good', 223, 0, 0, 'no', 'gpt4']" 80 | 78,"['d3_35_good', 307, 0, 0, 'no', 'mistral']" 81 | 79,"['d3_35_good', 307, 0, 0, 'no', 'meta']" 82 | 80,"['d3_35_good', 307, 0, 0, 'no', 'openai']" 83 | 81,"['d3_35_good', 307, 0, 0, 'no', 'gpt4']" 84 | 82,"['d3_35_good', 2, 1, 1, 'yes', 'meta']" 85 | 83,"['d3_35_good', 2, 1, 1, 'yes', 'openai']" 86 | 84,"['d3_35_good', 2, 1, 1, 'yes', 'mistral']" 87 | 85,"['d3_35_good', 2, 1, 1, 'yes', 'gpt4']" 88 | -------------------------------------------------------------------------------- /scripts/d3/out/d3_36_quantity.csv: -------------------------------------------------------------------------------- 1 | ,0 2 | 0,task 3 | 1,idx 4 | 2,true_label 5 | 3,pred_label 6 | 4,answer 7 | 5,model 8 | 6,"['d3_36_quantity', 1215, 0, 0, 'no', 'openai']" 9 | 7,"['d3_36_quantity', 1215, 0, 0, 'no', 'meta']" 10 | 8,"['d3_36_quantity', 1215, 0, 0, 'no', 'mistral']" 11 | 9,"['d3_36_quantity', 1215, 0, 0, 'no', 'gpt4']" 12 | 10,"['d3_36_quantity', 47, 1, 1, 'yes', 'meta']" 13 | 11,"['d3_36_quantity', 47, 1, 1, 'yes', 'openai']" 14 | 12,"['d3_36_quantity', 47, 1, 1, 'yes', 'mistral']" 15 | 13,"['d3_36_quantity', 47, 1, 1, 'yes', 'gpt4']" 16 | 14,"['d3_36_quantity', 522, 0, 0, 'no', 'openai']" 17 | 15,"['d3_36_quantity', 522, 0, 0, 'no', 'meta']" 18 | 16,"['d3_36_quantity', 522, 0, 0, 'no', 'mistral']" 19 | 17,"['d3_36_quantity', 522, 0, 0, 'no', 'gpt4']" 20 | 18,"['d3_36_quantity', 424, 0, 0, 'no', 'mistral']" 21 | 19,"['d3_36_quantity', 424, 0, 0, 'no', 'meta']" 22 | 20,"['d3_36_quantity', 424, 0, 0, 'no', 'openai']" 23 | 21,"['d3_36_quantity', 424, 0, 0, 'no', 'gpt4']" 24 | 22,"['d3_36_quantity', 1400, 0, 0, 'no', 'openai']" 25 | 23,"['d3_36_quantity', 1400, 0, 0, 'no', 'meta']" 26 | 24,"['d3_36_quantity', 1400, 0, 0, 'no', 'mistral']" 27 | 25,"['d3_36_quantity', 1400, 0, 0, 'no', 'gpt4']" 28 | 26,"['d3_36_quantity', 1286, 0, 0, 'no', 'openai']" 29 | 27,"['d3_36_quantity', 1286, 0, 0, 'no', 'mistral']" 30 | 28,"['d3_36_quantity', 1286, 0, 0, 'no', 'meta']" 31 | 29,"['d3_36_quantity', 1286, 0, 0, 'no', 'gpt4']" 32 | 30,"['d3_36_quantity', 1696, 0, 0, 'no', 'meta']" 33 | 31,"['d3_36_quantity', 1696, 0, 0, 'no', 'openai']" 34 | 32,"['d3_36_quantity', 1696, 0, 0, 'no', 'mistral']" 35 | 33,"['d3_36_quantity', 1696, 0, 0, 'no', 'gpt4']" 36 | 34,"['d3_36_quantity', 165, 1, 1, 'yes', 'openai']" 37 | 35,"['d3_36_quantity', 165, 1, 1, 'yes', 'meta']" 38 | 36,"['d3_36_quantity', 165, 1, 1, 'yes', 'mistral']" 39 | 37,"['d3_36_quantity', 165, 1, 1, 'yes', 'gpt4']" 40 | 38,"['d3_36_quantity', 802, 0, 0, 'no', 'openai']" 41 | 39,"['d3_36_quantity', 802, 0, 0, 'no', 'mistral']" 42 | 40,"['d3_36_quantity', 802, 0, 0, 'no', 'meta']" 43 | 41,"['d3_36_quantity', 802, 0, 0, 'no', 'gpt4']" 44 | 42,"['d3_36_quantity', 56, 1, 1, 'yes', 'openai']" 45 | 43,"['d3_36_quantity', 56, 1, 1, 'yes', 'meta']" 46 | 44,"['d3_36_quantity', 56, 1, 1, 'yes', 'mistral']" 47 | 45,"['d3_36_quantity', 56, 1, 1, 'yes', 'gpt4']" 48 | 46,"['d3_36_quantity', 415, 0, 0, 'no', 'meta']" 49 | 47,"['d3_36_quantity', 415, 0, 0, 'no', 'openai']" 50 | 48,"['d3_36_quantity', 415, 0, 1, 'yes', 'mistral']" 51 | 49,"['d3_36_quantity', 415, 0, 0, 'no', 'gpt4']" 52 | 50,"['d3_36_quantity', 960, 0, 0, 'no', 'mistral']" 53 | 51,"['d3_36_quantity', 960, 0, 0, 'no', 'meta']" 54 | 52,"['d3_36_quantity', 960, 0, 0, 'no', 'openai']" 55 | 53,"['d3_36_quantity', 960, 0, 0, 'no', 'gpt4']" 56 | 54,"['d3_36_quantity', 50, 1, 1, 'yes', 'mistral']" 57 | 55,"['d3_36_quantity', 50, 1, 1, 'yes', 'meta']" 58 | 56,"['d3_36_quantity', 50, 1, 1, 'yes', 'openai']" 59 | 57,"['d3_36_quantity', 50, 1, 1, 'yes', 'gpt4']" 60 | 58,"['d3_36_quantity', 377, 0, 0, 'no', 'openai']" 61 | 59,"['d3_36_quantity', 377, 0, 1, 'yes', 'mistral']" 62 | 60,"['d3_36_quantity', 377, 0, 0, 'no', 'meta']" 63 | 61,"['d3_36_quantity', 377, 0, 0, 'no', 'gpt4']" 64 | 62,"['d3_36_quantity', 1235, 0, 0, 'no', 'meta']" 65 | 63,"['d3_36_quantity', 1235, 0, 0, 'no', 'openai']" 66 | 64,"['d3_36_quantity', 1235, 0, 0, 'no', 'mistral']" 67 | 65,"['d3_36_quantity', 1235, 0, 0, 'no', 'gpt4']" 68 | 66,"['d3_36_quantity', 1035, 0, 0, 'no', 'openai']" 69 | 67,"['d3_36_quantity', 1035, 0, 0, 'no', 'mistral']" 70 | 68,"['d3_36_quantity', 1035, 0, 0, 'no', 'meta']" 71 | 69,"['d3_36_quantity', 1035, 0, 0, 'no', 'gpt4']" 72 | 70,"['d3_36_quantity', 419, 0, 0, 'no', 'meta']" 73 | 71,"['d3_36_quantity', 419, 0, 0, 'no', 'openai']" 74 | 72,"['d3_36_quantity', 419, 0, 0, 'no', 'mistral']" 75 | 73,"['d3_36_quantity', 419, 0, 0, 'no', 'gpt4']" 76 | 74,"['d3_36_quantity', 1120, 0, 0, 'no', 'mistral']" 77 | 75,"['d3_36_quantity', 1120, 0, 0, 'no', 'meta']" 78 | 76,"['d3_36_quantity', 1120, 0, 0, 'no', 'openai']" 79 | 77,"['d3_36_quantity', 1120, 0, 0, 'no', 'gpt4']" 80 | 78,"['d3_36_quantity', 1538, 0, 0, 'no', 'openai']" 81 | 79,"['d3_36_quantity', 1538, 0, 0, 'no', 'meta']" 82 | 80,"['d3_36_quantity', 1538, 0, 0, 'no', 'mistral']" 83 | 81,"['d3_36_quantity', 1538, 0, 0, 'no', 'gpt4']" 84 | 82,"['d3_36_quantity', 12, 1, 1, 'yes', 'mistral']" 85 | 83,"['d3_36_quantity', 12, 1, 1, 'yes', 'meta']" 86 | 84,"['d3_36_quantity', 12, 1, 1, 'yes', 'openai']" 87 | 85,"['d3_36_quantity', 12, 1, 1, 'yes', 'gpt4']" 88 | -------------------------------------------------------------------------------- /scripts/d3/out/d3_37_location.csv: -------------------------------------------------------------------------------- 1 | ,0 2 | 0,task 3 | 1,idx 4 | 2,true_label 5 | 3,pred_label 6 | 4,answer 7 | 5,model 8 | 6,"['d3_37_location', 1230, 0, 0, 'no', 'openai']" 9 | 7,"['d3_37_location', 1230, 0, 1, 'yes', 'meta']" 10 | 8,"['d3_37_location', 1230, 0, 0, 'no', 'mistral']" 11 | 9,"['d3_37_location', 1230, 0, 0, 'no', 'gpt4']" 12 | 10,"['d3_37_location', 48, 1, 1, 'yes', 'mistral']" 13 | 11,"['d3_37_location', 48, 1, 1, 'yes', 'meta']" 14 | 12,"['d3_37_location', 48, 1, 1, 'yes', 'openai']" 15 | 13,"['d3_37_location', 48, 1, 1, 'yes', 'gpt4']" 16 | 14,"['d3_37_location', 529, 0, 0, 'no', 'mistral']" 17 | 15,"['d3_37_location', 529, 0, 0, 'no', 'meta']" 18 | 16,"['d3_37_location', 529, 0, 0, 'no', 'openai']" 19 | 17,"['d3_37_location', 529, 0, 0, 'no', 'gpt4']" 20 | 18,"['d3_37_location', 429, 0, 0, 'no', 'mistral']" 21 | 19,"['d3_37_location', 429, 0, 0, 'no', 'meta']" 22 | 20,"['d3_37_location', 429, 0, 0, 'no', 'openai']" 23 | 21,"['d3_37_location', 429, 0, 0, 'no', 'gpt4']" 24 | 22,"['d3_37_location', 1417, 0, 0, 'no', 'mistral']" 25 | 23,"['d3_37_location', 1417, 0, 0, 'no', 'meta']" 26 | 24,"['d3_37_location', 1417, 0, 0, 'no', 'openai']" 27 | 25,"['d3_37_location', 1417, 0, 0, 'no', 'gpt4']" 28 | 26,"['d3_37_location', 1302, 0, 1, 'yes', 'openai']" 29 | 27,"['d3_37_location', 1302, 0, 1, 'yes', 'mistral']" 30 | 28,"['d3_37_location', 1302, 0, 1, 'yes', 'meta']" 31 | 29,"['d3_37_location', 1302, 0, 1, 'yes', 'gpt4']" 32 | 30,"['d3_37_location', 1717, 0, 0, 'no', 'openai']" 33 | 31,"['d3_37_location', 1717, 0, 0, 'no', 'meta']" 34 | 32,"['d3_37_location', 1717, 0, 0, 'no', 'mistral']" 35 | 33,"['d3_37_location', 1717, 0, 0, 'no', 'gpt4']" 36 | 34,"['d3_37_location', 167, 1, 1, 'yes', 'openai']" 37 | 35,"['d3_37_location', 167, 1, 1, 'yes', 'meta']" 38 | 36,"['d3_37_location', 167, 1, 1, 'yes', 'mistral']" 39 | 37,"['d3_37_location', 167, 1, 1, 'yes', 'gpt4']" 40 | 38,"['d3_37_location', 812, 0, 0, 'no', 'meta']" 41 | 39,"['d3_37_location', 812, 0, 0, 'no', 'openai']" 42 | 40,"['d3_37_location', 812, 0, 0, 'no', 'mistral']" 43 | 41,"['d3_37_location', 812, 0, 0, 'no', 'gpt4']" 44 | 42,"['d3_37_location', 57, 1, 1, 'yes', 'openai']" 45 | 43,"['d3_37_location', 57, 1, 0, 'no', 'meta']" 46 | 44,"['d3_37_location', 57, 1, 1, 'yes', 'mistral']" 47 | 45,"['d3_37_location', 57, 1, 1, 'yes', 'gpt4']" 48 | 46,"['d3_37_location', 420, 0, 0, 'no', 'openai']" 49 | 47,"['d3_37_location', 420, 0, 0, 'no', 'mistral']" 50 | 48,"['d3_37_location', 420, 0, 0, 'no', 'meta']" 51 | 49,"['d3_37_location', 420, 0, 0, 'no', 'gpt4']" 52 | 50,"['d3_37_location', 972, 0, 0, 'no', 'meta']" 53 | 51,"['d3_37_location', 972, 0, 0, 'no', 'openai']" 54 | 52,"['d3_37_location', 972, 0, 0, 'no', 'mistral']" 55 | 53,"['d3_37_location', 972, 0, 0, 'no', 'gpt4']" 56 | 54,"['d3_37_location', 51, 1, 1, 'yes', 'openai']" 57 | 55,"['d3_37_location', 51, 1, 1, 'yes', 'meta']" 58 | 56,"['d3_37_location', 51, 1, 1, 'yes', 'mistral']" 59 | 57,"['d3_37_location', 51, 1, 1, 'yes', 'gpt4']" 60 | 58,"['d3_37_location', 382, 0, 0, 'no', 'openai']" 61 | 59,"['d3_37_location', 382, 0, 0, 'no', 'mistral']" 62 | 60,"['d3_37_location', 382, 0, 0, 'no', 'meta']" 63 | 61,"['d3_37_location', 382, 0, 0, 'no', 'gpt4']" 64 | 62,"['d3_37_location', 1251, 0, 0, 'no', 'openai']" 65 | 63,"['d3_37_location', 1251, 0, 0, 'no', 'meta']" 66 | 64,"['d3_37_location', 1251, 0, 0, 'no', 'mistral']" 67 | 65,"['d3_37_location', 1251, 0, 0, 'no', 'gpt4']" 68 | 66,"['d3_37_location', 1049, 0, 0, 'no', 'openai']" 69 | 67,"['d3_37_location', 1049, 0, 0, 'no', 'meta']" 70 | 68,"['d3_37_location', 1049, 0, 0, 'no', 'mistral']" 71 | 69,"['d3_37_location', 1049, 0, 0, 'no', 'gpt4']" 72 | 70,"['d3_37_location', 424, 0, 0, 'no', 'openai']" 73 | 71,"['d3_37_location', 424, 0, 0, 'no', 'meta']" 74 | 72,"['d3_37_location', 424, 0, 0, 'no', 'mistral']" 75 | 73,"['d3_37_location', 424, 0, 0, 'no', 'gpt4']" 76 | 74,"['d3_37_location', 1134, 0, 0, 'no', 'mistral']" 77 | 75,"['d3_37_location', 1134, 0, 0, 'no', 'meta']" 78 | 76,"['d3_37_location', 1134, 0, 0, 'no', 'openai']" 79 | 77,"['d3_37_location', 1134, 0, 0, 'no', 'gpt4']" 80 | 78,"['d3_37_location', 1558, 0, 0, 'no', 'openai']" 81 | 79,"['d3_37_location', 1558, 0, 0, 'no', 'mistral']" 82 | 80,"['d3_37_location', 1558, 0, 0, 'no', 'meta']" 83 | 81,"['d3_37_location', 1558, 0, 0, 'no', 'gpt4']" 84 | 82,"['d3_37_location', 12, 1, 1, 'yes', 'meta']" 85 | 83,"['d3_37_location', 12, 1, 1, 'yes', 'openai']" 86 | 84,"['d3_37_location', 12, 1, 1, 'yes', 'mistral']" 87 | 85,"['d3_37_location', 12, 1, 1, 'yes', 'gpt4']" 88 | -------------------------------------------------------------------------------- /scripts/d3/out/d3_38_person.csv: -------------------------------------------------------------------------------- 1 | ,0 2 | 0,task 3 | 1,idx 4 | 2,true_label 5 | 3,pred_label 6 | 4,answer 7 | 5,model 8 | 6,"['d3_38_person', 1181, 0, 0, 'no', 'openai']" 9 | 7,"['d3_38_person', 1181, 0, 0, 'no', 'meta']" 10 | 8,"['d3_38_person', 1181, 0, 0, 'no', 'mistral']" 11 | 9,"['d3_38_person', 1181, 0, 0, 'no', 'gpt4']" 12 | 10,"['d3_38_person', 46, 1, 1, 'yes', 'openai']" 13 | 11,"['d3_38_person', 46, 1, 1, 'yes', 'meta']" 14 | 12,"['d3_38_person', 46, 1, 1, 'yes', 'mistral']" 15 | 13,"['d3_38_person', 46, 1, 1, 'yes', 'gpt4']" 16 | 14,"['d3_38_person', 508, 0, 0, 'no', 'mistral']" 17 | 15,"['d3_38_person', 508, 0, 0, 'no', 'meta']" 18 | 16,"['d3_38_person', 508, 0, 0, 'no', 'openai']" 19 | 17,"['d3_38_person', 508, 0, 0, 'no', 'gpt4']" 20 | 18,"['d3_38_person', 412, 0, 0, 'no', 'openai']" 21 | 19,"['d3_38_person', 412, 0, 0, 'no', 'meta']" 22 | 20,"['d3_38_person', 412, 0, 0, 'no', 'mistral']" 23 | 21,"['d3_38_person', 412, 0, 0, 'no', 'gpt4']" 24 | 22,"['d3_38_person', 1360, 0, 0, 'no', 'meta']" 25 | 23,"['d3_38_person', 1360, 0, 0, 'no', 'openai']" 26 | 24,"['d3_38_person', 1360, 0, 0, 'no', 'mistral']" 27 | 25,"['d3_38_person', 1360, 0, 0, 'no', 'gpt4']" 28 | 26,"['d3_38_person', 1250, 0, 0, 'no', 'openai']" 29 | 27,"['d3_38_person', 1250, 0, 0, 'no', 'meta']" 30 | 28,"['d3_38_person', 1250, 0, 0, 'no', 'mistral']" 31 | 29,"['d3_38_person', 1250, 0, 0, 'no', 'gpt4']" 32 | 30,"['d3_38_person', 1648, 0, 0, 'no', 'openai']" 33 | 31,"['d3_38_person', 1648, 0, 0, 'no', 'meta']" 34 | 32,"['d3_38_person', 1648, 0, 0, 'no', 'mistral']" 35 | 33,"['d3_38_person', 1648, 0, 0, 'no', 'gpt4']" 36 | 34,"['d3_38_person', 160, 1, 1, 'yes', 'openai']" 37 | 35,"['d3_38_person', 160, 1, 1, 'yes', 'meta']" 38 | 36,"['d3_38_person', 160, 1, 1, 'yes', 'mistral']" 39 | 37,"['d3_38_person', 160, 1, 1, 'yes', 'gpt4']" 40 | 38,"['d3_38_person', 779, 0, 0, 'no', 'meta']" 41 | 39,"['d3_38_person', 779, 0, 0, 'no', 'openai']" 42 | 40,"['d3_38_person', 779, 0, 0, 'no', 'mistral']" 43 | 41,"['d3_38_person', 779, 0, 0, 'no', 'gpt4']" 44 | 42,"['d3_38_person', 55, 1, 1, 'yes', 'mistral']" 45 | 43,"['d3_38_person', 55, 1, 1, 'yes', 'meta']" 46 | 44,"['d3_38_person', 55, 1, 1, 'yes', 'openai']" 47 | 45,"['d3_38_person', 55, 1, 1, 'yes', 'gpt4']" 48 | 46,"['d3_38_person', 404, 0, 0, 'no', 'openai']" 49 | 47,"['d3_38_person', 404, 0, 0, 'no', 'meta']" 50 | 48,"['d3_38_person', 404, 0, 0, 'no', 'mistral']" 51 | 49,"['d3_38_person', 404, 0, 0, 'no', 'gpt4']" 52 | 50,"['d3_38_person', 933, 0, 0, 'no', 'openai']" 53 | 51,"['d3_38_person', 933, 0, 0, 'no', 'mistral']" 54 | 52,"['d3_38_person', 933, 0, 0, 'no', 'meta']" 55 | 53,"['d3_38_person', 933, 0, 0, 'no', 'gpt4']" 56 | 54,"['d3_38_person', 49, 1, 1, 'yes', 'meta']" 57 | 55,"['d3_38_person', 49, 1, 1, 'yes', 'openai']" 58 | 56,"['d3_38_person', 49, 1, 1, 'yes', 'mistral']" 59 | 57,"['d3_38_person', 49, 1, 1, 'yes', 'gpt4']" 60 | 58,"['d3_38_person', 367, 0, 0, 'no', 'openai']" 61 | 59,"['d3_38_person', 367, 0, 0, 'no', 'meta']" 62 | 60,"['d3_38_person', 367, 0, 0, 'no', 'mistral']" 63 | 61,"['d3_38_person', 367, 0, 0, 'no', 'gpt4']" 64 | 62,"['d3_38_person', 1200, 0, 0, 'no', 'mistral']" 65 | 63,"['d3_38_person', 1200, 0, 1, 'yes', 'meta']" 66 | 64,"['d3_38_person', 1200, 0, 0, 'no', 'openai']" 67 | 65,"['d3_38_person', 1200, 0, 1, 'yes', 'gpt4']" 68 | 66,"['d3_38_person', 1007, 0, 0, 'no', 'mistral']" 69 | 67,"['d3_38_person', 1007, 0, 0, 'no', 'meta']" 70 | 68,"['d3_38_person', 1007, 0, 0, 'no', 'openai']" 71 | 69,"['d3_38_person', 1007, 0, 0, 'no', 'gpt4']" 72 | 70,"['d3_38_person', 407, 0, 0, 'no', 'mistral']" 73 | 71,"['d3_38_person', 407, 0, 0, 'no', 'meta']" 74 | 72,"['d3_38_person', 407, 0, 0, 'no', 'openai']" 75 | 73,"['d3_38_person', 407, 0, 0, 'no', 'gpt4']" 76 | 74,"['d3_38_person', 1088, 0, 0, 'no', 'mistral']" 77 | 75,"['d3_38_person', 1088, 0, 0, 'no', 'meta']" 78 | 76,"['d3_38_person', 1088, 0, 0, 'no', 'openai']" 79 | 77,"['d3_38_person', 1088, 0, 0, 'no', 'gpt4']" 80 | 78,"['d3_38_person', 1495, 0, 0, 'no', 'openai']" 81 | 79,"['d3_38_person', 1495, 0, 0, 'no', 'mistral']" 82 | 80,"['d3_38_person', 1495, 0, 0, 'no', 'meta']" 83 | 81,"['d3_38_person', 1495, 0, 0, 'no', 'gpt4']" 84 | 82,"['d3_38_person', 12, 1, 1, 'yes', 'openai']" 85 | 83,"['d3_38_person', 12, 1, 1, 'yes', 'meta']" 86 | 84,"['d3_38_person', 12, 1, 1, 'yes', 'mistral']" 87 | 85,"['d3_38_person', 12, 1, 1, 'yes', 'gpt4']" 88 | -------------------------------------------------------------------------------- /scripts/d3/out/d3_39_entity.csv: -------------------------------------------------------------------------------- 1 | ,0 2 | 0,task 3 | 1,idx 4 | 2,true_label 5 | 3,pred_label 6 | 4,answer 7 | 5,model 8 | 6,"['d3_39_entity', 1212, 0, 0, 'no', 'openai']" 9 | 7,"['d3_39_entity', 1212, 0, 1, 'yes', 'mistral']" 10 | 8,"['d3_39_entity', 1212, 0, 0, 'no', 'meta']" 11 | 9,"['d3_39_entity', 1212, 0, 0, 'no', 'gpt4']" 12 | 10,"['d3_39_entity', 47, 1, 1, 'yes', 'mistral']" 13 | 11,"['d3_39_entity', 47, 1, 0, 'no', 'meta']" 14 | 12,"['d3_39_entity', 47, 1, 1, 'yes', 'openai']" 15 | 13,"['d3_39_entity', 47, 1, 0, 'no', 'gpt4']" 16 | 14,"['d3_39_entity', 521, 0, 1, 'yes', 'openai']" 17 | 15,"['d3_39_entity', 521, 0, 0, 'no', 'mistral']" 18 | 16,"['d3_39_entity', 521, 0, 0, 'no', 'meta']" 19 | 17,"['d3_39_entity', 521, 0, 0, 'no', 'gpt4']" 20 | 18,"['d3_39_entity', 423, 0, 1, 'yes', 'mistral']" 21 | 19,"['d3_39_entity', 423, 0, 1, 'yes', 'meta']" 22 | 20,"['d3_39_entity', 423, 0, 1, 'yes', 'openai']" 23 | 21,"['d3_39_entity', 423, 0, 1, 'yes', 'gpt4']" 24 | 22,"['d3_39_entity', 1396, 0, 1, 'yes', 'openai']" 25 | 23,"['d3_39_entity', 1396, 0, 1, 'yes', 'mistral']" 26 | 24,"['d3_39_entity', 1396, 0, 1, 'yes', 'meta']" 27 | 25,"['d3_39_entity', 1396, 0, 1, 'yes', 'gpt4']" 28 | 26,"['d3_39_entity', 1283, 0, 1, 'yes', 'mistral']" 29 | 27,"['d3_39_entity', 1283, 0, 1, 'yes', 'meta']" 30 | 28,"['d3_39_entity', 1283, 0, 1, 'yes', 'openai']" 31 | 29,"['d3_39_entity', 1283, 0, 0, 'no', 'gpt4']" 32 | 30,"['d3_39_entity', 1691, 0, 1, 'yes', 'openai']" 33 | 31,"['d3_39_entity', 1691, 0, 1, 'yes', 'meta']" 34 | 32,"['d3_39_entity', 1691, 0, 1, 'yes', 'mistral']" 35 | 33,"['d3_39_entity', 1691, 0, 1, 'yes', 'gpt4']" 36 | 34,"['d3_39_entity', 164, 1, 0, 'no', 'meta']" 37 | 35,"['d3_39_entity', 164, 1, 0, 'no', 'openai']" 38 | 36,"['d3_39_entity', 164, 1, 1, 'yes', 'mistral']" 39 | 37,"['d3_39_entity', 164, 1, 1, 'yes', 'gpt4']" 40 | 38,"['d3_39_entity', 799, 0, 1, 'yes', 'openai']" 41 | 39,"['d3_39_entity', 799, 0, 1, 'yes', 'mistral']" 42 | 40,"['d3_39_entity', 799, 0, 1, 'yes', 'meta']" 43 | 41,"['d3_39_entity', 799, 0, 1, 'yes', 'gpt4']" 44 | 42,"['d3_39_entity', 56, 1, 1, 'yes', 'mistral']" 45 | 43,"['d3_39_entity', 56, 1, 1, 'yes', 'meta']" 46 | 44,"['d3_39_entity', 56, 1, 1, 'yes', 'openai']" 47 | 45,"['d3_39_entity', 56, 1, 1, 'yes', 'gpt4']" 48 | 46,"['d3_39_entity', 414, 0, 1, 'yes', 'mistral']" 49 | 47,"['d3_39_entity', 414, 0, 0, 'no', 'meta']" 50 | 48,"['d3_39_entity', 414, 0, 1, 'yes', 'openai']" 51 | 49,"['d3_39_entity', 414, 0, 1, 'yes', 'gpt4']" 52 | 50,"['d3_39_entity', 958, 0, 1, 'yes', 'meta']" 53 | 51,"['d3_39_entity', 958, 0, 1, 'yes', 'openai']" 54 | 52,"['d3_39_entity', 958, 0, 1, 'yes', 'mistral']" 55 | 53,"['d3_39_entity', 958, 0, 1, 'yes', 'gpt4']" 56 | 54,"['d3_39_entity', 50, 1, 1, 'yes', 'openai']" 57 | 55,"['d3_39_entity', 50, 1, 0, 'no', 'meta']" 58 | 56,"['d3_39_entity', 50, 1, 1, 'yes', 'mistral']" 59 | 57,"['d3_39_entity', 50, 1, 1, 'yes', 'gpt4']" 60 | 58,"['d3_39_entity', 376, 0, 1, 'yes', 'openai']" 61 | 59,"['d3_39_entity', 376, 0, 1, 'yes', 'mistral']" 62 | 60,"['d3_39_entity', 376, 0, 1, 'yes', 'meta']" 63 | 61,"['d3_39_entity', 376, 0, 1, 'yes', 'gpt4']" 64 | 62,"['d3_39_entity', 1232, 0, 1, 'yes', 'openai']" 65 | 63,"['d3_39_entity', 1232, 0, 1, 'yes', 'meta']" 66 | 64,"['d3_39_entity', 1232, 0, 1, 'yes', 'mistral']" 67 | 65,"['d3_39_entity', 1232, 0, 1, 'yes', 'gpt4']" 68 | 66,"['d3_39_entity', 1033, 0, 1, 'yes', 'openai']" 69 | 67,"['d3_39_entity', 1033, 0, 1, 'yes', 'mistral']" 70 | 68,"['d3_39_entity', 1033, 0, 0, 'no', 'meta']" 71 | 69,"['d3_39_entity', 1033, 0, 1, 'yes', 'gpt4']" 72 | 70,"['d3_39_entity', 417, 0, 0, 'no', 'mistral']" 73 | 71,"['d3_39_entity', 417, 0, 0, 'no', 'meta']" 74 | 72,"['d3_39_entity', 417, 0, 1, 'yes', 'openai']" 75 | 73,"['d3_39_entity', 417, 0, 0, 'no', 'gpt4']" 76 | 74,"['d3_39_entity', 1117, 0, 0, 'no', 'meta']" 77 | 75,"['d3_39_entity', 1117, 0, 0, 'no', 'openai']" 78 | 76,"['d3_39_entity', 1117, 0, 1, 'yes', 'mistral']" 79 | 77,"['d3_39_entity', 1117, 0, 0, 'no', 'gpt4']" 80 | 78,"['d3_39_entity', 1534, 0, 0, 'no', 'openai']" 81 | 79,"['d3_39_entity', 1534, 0, 1, 'yes', 'mistral']" 82 | 80,"['d3_39_entity', 1534, 0, 1, 'yes', 'meta']" 83 | 81,"['d3_39_entity', 1534, 0, 0, 'no', 'gpt4']" 84 | 82,"['d3_39_entity', 12, 1, 0, 'no', 'mistral']" 85 | 83,"['d3_39_entity', 12, 1, 0, 'no', 'meta']" 86 | 84,"['d3_39_entity', 12, 1, 1, 'yes', 'openai']" 87 | 85,"['d3_39_entity', 12, 1, 0, 'no', 'gpt4']" 88 | -------------------------------------------------------------------------------- /scripts/d3/out/d3_3_god.csv: -------------------------------------------------------------------------------- 1 | ,0 2 | 0,task 3 | 1,idx 4 | 2,true_label 5 | 3,pred_label 6 | 4,answer 7 | 5,model 8 | 6,"['d3_3_god', 104, 1, 1, 'yes', 'mistral']" 9 | 7,"['d3_3_god', 104, 1, 1, 'yes', 'meta']" 10 | 8,"['d3_3_god', 104, 1, 1, 'yes', 'openai']" 11 | 9,"['d3_3_god', 104, 1, 1, 'yes', 'gpt4']" 12 | 10,"['d3_3_god', 4, 1, 0, 'no', 'mistral']" 13 | 11,"['d3_3_god', 4, 1, 0, 'no', 'meta']" 14 | 12,"['d3_3_god', 4, 1, 0, 'no', 'openai']" 15 | 13,"['d3_3_god', 4, 1, 0, 'no', 'gpt4']" 16 | 14,"['d3_3_god', 45, 1, 1, 'yes', 'meta']" 17 | 15,"['d3_3_god', 45, 1, 1, 'yes', 'openai']" 18 | 16,"['d3_3_god', 45, 1, 1, 'yes', 'mistral']" 19 | 17,"['d3_3_god', 45, 1, 1, 'yes', 'gpt4']" 20 | 18,"['d3_3_god', 36, 1, 1, 'yes', 'openai']" 21 | 19,"['d3_3_god', 36, 1, 1, 'yes', 'mistral']" 22 | 20,"['d3_3_god', 36, 1, 1, 'yes', 'meta']" 23 | 21,"['d3_3_god', 36, 1, 1, 'yes', 'gpt4']" 24 | 22,"['d3_3_god', 120, 0, 0, 'no', 'meta']" 25 | 23,"['d3_3_god', 120, 0, 0, 'no', 'openai']" 26 | 24,"['d3_3_god', 120, 0, 0, 'no', 'mistral']" 27 | 25,"['d3_3_god', 120, 0, 0, 'no', 'gpt4']" 28 | 26,"['d3_3_god', 110, 1, 0, 'no', 'openai']" 29 | 27,"['d3_3_god', 110, 1, 1, 'yes', 'meta']" 30 | 28,"['d3_3_god', 110, 1, 0, 'no', 'mistral']" 31 | 29,"['d3_3_god', 110, 1, 0, 'no', 'gpt4']" 32 | 30,"['d3_3_god', 146, 0, 0, 'no', 'openai']" 33 | 31,"['d3_3_god', 146, 0, 0, 'no', 'meta']" 34 | 32,"['d3_3_god', 146, 0, 0, 'no', 'mistral']" 35 | 33,"['d3_3_god', 146, 0, 0, 'no', 'gpt4']" 36 | 34,"['d3_3_god', 14, 1, 1, 'yes', 'mistral']" 37 | 35,"['d3_3_god', 14, 1, 0, 'no', 'meta']" 38 | 36,"['d3_3_god', 14, 1, 0, 'no', 'openai']" 39 | 37,"['d3_3_god', 14, 1, 1, 'yes', 'gpt4']" 40 | 38,"['d3_3_god', 69, 1, 1, 'yes', 'mistral']" 41 | 39,"['d3_3_god', 69, 1, 1, 'yes', 'meta']" 42 | 40,"['d3_3_god', 69, 1, 1, 'yes', 'openai']" 43 | 41,"['d3_3_god', 69, 1, 1, 'yes', 'gpt4']" 44 | 42,"['d3_3_god', 4, 1, 0, 'no', 'openai']" 45 | 43,"['d3_3_god', 4, 1, 0, 'no', 'meta']" 46 | 44,"['d3_3_god', 4, 1, 0, 'no', 'mistral']" 47 | 45,"['d3_3_god', 4, 1, 0, 'no', 'gpt4']" 48 | 46,"['d3_3_god', 35, 1, 1, 'yes', 'openai']" 49 | 47,"['d3_3_god', 35, 1, 1, 'yes', 'meta']" 50 | 48,"['d3_3_god', 35, 1, 1, 'yes', 'mistral']" 51 | 49,"['d3_3_god', 35, 1, 1, 'yes', 'gpt4']" 52 | 50,"['d3_3_god', 82, 1, 1, 'yes', 'meta']" 53 | 51,"['d3_3_god', 82, 1, 1, 'yes', 'openai']" 54 | 52,"['d3_3_god', 82, 1, 1, 'yes', 'mistral']" 55 | 53,"['d3_3_god', 82, 1, 1, 'yes', 'gpt4']" 56 | 54,"['d3_3_god', 4, 1, 0, 'no', 'openai']" 57 | 55,"['d3_3_god', 4, 1, 0, 'no', 'mistral']" 58 | 56,"['d3_3_god', 4, 1, 0, 'no', 'meta']" 59 | 57,"['d3_3_god', 4, 1, 0, 'no', 'gpt4']" 60 | 58,"['d3_3_god', 32, 1, 1, 'yes', 'mistral']" 61 | 59,"['d3_3_god', 32, 1, 1, 'yes', 'meta']" 62 | 60,"['d3_3_god', 32, 1, 1, 'yes', 'openai']" 63 | 61,"['d3_3_god', 32, 1, 1, 'yes', 'gpt4']" 64 | 62,"['d3_3_god', 106, 1, 0, 'no', 'mistral']" 65 | 63,"['d3_3_god', 106, 1, 0, 'no', 'meta']" 66 | 64,"['d3_3_god', 106, 1, 0, 'no', 'openai']" 67 | 65,"['d3_3_god', 106, 1, 1, 'yes', 'gpt4']" 68 | 66,"['d3_3_god', 89, 1, 1, 'yes', 'openai']" 69 | 67,"['d3_3_god', 89, 1, 1, 'yes', 'mistral']" 70 | 68,"['d3_3_god', 89, 1, 1, 'yes', 'meta']" 71 | 69,"['d3_3_god', 89, 1, 1, 'yes', 'gpt4']" 72 | 70,"['d3_3_god', 36, 1, 1, 'yes', 'openai']" 73 | 71,"['d3_3_god', 36, 1, 1, 'yes', 'meta']" 74 | 72,"['d3_3_god', 36, 1, 1, 'yes', 'mistral']" 75 | 73,"['d3_3_god', 36, 1, 1, 'yes', 'gpt4']" 76 | 74,"['d3_3_god', 96, 1, 0, 'no', 'mistral']" 77 | 75,"['d3_3_god', 96, 1, 0, 'no', 'meta']" 78 | 76,"['d3_3_god', 96, 1, 0, 'no', 'openai']" 79 | 77,"['d3_3_god', 96, 1, 1, 'yes', 'gpt4']" 80 | 78,"['d3_3_god', 132, 0, 0, 'no', 'mistral']" 81 | 79,"['d3_3_god', 132, 0, 0, 'no', 'meta']" 82 | 80,"['d3_3_god', 132, 0, 0, 'no', 'openai']" 83 | 81,"['d3_3_god', 132, 0, 0, 'no', 'gpt4']" 84 | 82,"['d3_3_god', 1, 1, 0, 'no', 'openai']" 85 | 83,"['d3_3_god', 1, 1, 0, 'no', 'meta']" 86 | 84,"['d3_3_god', 1, 1, 0, 'no', 'mistral']" 87 | 85,"['d3_3_god', 1, 1, 0, 'no', 'gpt4']" 88 | -------------------------------------------------------------------------------- /scripts/d3/out/d3_44_spam.csv: -------------------------------------------------------------------------------- 1 | ,0 2 | 0,task 3 | 1,idx 4 | 2,true_label 5 | 3,pred_label 6 | 4,answer 7 | 5,model 8 | 6,"['d3_44_spam', 230, 0, 0, 'no', 'mistral']" 9 | 7,"['d3_44_spam', 230, 0, 0, 'no', 'meta']" 10 | 8,"['d3_44_spam', 230, 0, 0, 'no', 'openai']" 11 | 9,"['d3_44_spam', 230, 0, 0, 'no', 'gpt4']" 12 | 10,"['d3_44_spam', 9, 1, 1, 'yes', 'mistral']" 13 | 11,"['d3_44_spam', 9, 1, 1, 'yes', 'meta']" 14 | 12,"['d3_44_spam', 9, 1, 1, 'yes', 'openai']" 15 | 13,"['d3_44_spam', 9, 1, 1, 'yes', 'gpt4']" 16 | 14,"['d3_44_spam', 99, 0, 0, 'no', 'mistral']" 17 | 15,"['d3_44_spam', 99, 0, 0, 'no', 'meta']" 18 | 16,"['d3_44_spam', 99, 0, 0, 'no', 'openai']" 19 | 17,"['d3_44_spam', 99, 0, 0, 'no', 'gpt4']" 20 | 18,"['d3_44_spam', 80, 0, 0, 'no', 'meta']" 21 | 19,"['d3_44_spam', 80, 0, 0, 'no', 'openai']" 22 | 20,"['d3_44_spam', 80, 0, 0, 'no', 'mistral']" 23 | 21,"['d3_44_spam', 80, 0, 0, 'no', 'gpt4']" 24 | 22,"['d3_44_spam', 265, 0, 0, 'no', 'openai']" 25 | 23,"['d3_44_spam', 265, 0, 0, 'no', 'meta']" 26 | 24,"['d3_44_spam', 265, 0, 0, 'no', 'mistral']" 27 | 25,"['d3_44_spam', 265, 0, 0, 'no', 'gpt4']" 28 | 26,"['d3_44_spam', 243, 0, 0, 'no', 'openai']" 29 | 27,"['d3_44_spam', 243, 0, 0, 'no', 'meta']" 30 | 28,"['d3_44_spam', 243, 0, 0, 'no', 'mistral']" 31 | 29,"['d3_44_spam', 243, 0, 0, 'no', 'gpt4']" 32 | 30,"['d3_44_spam', 321, 0, 0, 'no', 'meta']" 33 | 31,"['d3_44_spam', 321, 0, 0, 'no', 'openai']" 34 | 32,"['d3_44_spam', 321, 0, 0, 'no', 'mistral']" 35 | 33,"['d3_44_spam', 321, 0, 0, 'no', 'gpt4']" 36 | 34,"['d3_44_spam', 31, 1, 1, 'yes', 'mistral']" 37 | 35,"['d3_44_spam', 31, 1, 1, 'yes', 'meta']" 38 | 36,"['d3_44_spam', 31, 1, 1, 'yes', 'openai']" 39 | 37,"['d3_44_spam', 31, 1, 1, 'yes', 'gpt4']" 40 | 38,"['d3_44_spam', 151, 0, 0, 'no', 'openai']" 41 | 39,"['d3_44_spam', 151, 0, 0, 'no', 'meta']" 42 | 40,"['d3_44_spam', 151, 0, 0, 'no', 'mistral']" 43 | 41,"['d3_44_spam', 151, 0, 0, 'no', 'gpt4']" 44 | 42,"['d3_44_spam', 10, 1, 1, 'yes', 'openai']" 45 | 43,"['d3_44_spam', 10, 1, 1, 'yes', 'meta']" 46 | 44,"['d3_44_spam', 10, 1, 1, 'yes', 'mistral']" 47 | 45,"['d3_44_spam', 10, 1, 1, 'yes', 'gpt4']" 48 | 46,"['d3_44_spam', 78, 0, 1, 'yes', 'meta']" 49 | 47,"['d3_44_spam', 78, 0, 0, 'no', 'openai']" 50 | 48,"['d3_44_spam', 78, 0, 1, 'yes', 'mistral']" 51 | 49,"['d3_44_spam', 78, 0, 0, 'no', 'gpt4']" 52 | 50,"['d3_44_spam', 181, 0, 0, 'no', 'meta']" 53 | 51,"['d3_44_spam', 181, 0, 0, 'no', 'openai']" 54 | 52,"['d3_44_spam', 181, 0, 0, 'no', 'mistral']" 55 | 53,"['d3_44_spam', 181, 0, 0, 'no', 'gpt4']" 56 | 54,"['d3_44_spam', 9, 1, 1, 'yes', 'openai']" 57 | 55,"['d3_44_spam', 9, 1, 1, 'yes', 'meta']" 58 | 56,"['d3_44_spam', 9, 1, 1, 'yes', 'mistral']" 59 | 57,"['d3_44_spam', 9, 1, 1, 'yes', 'gpt4']" 60 | 58,"['d3_44_spam', 71, 0, 0, 'no', 'mistral']" 61 | 59,"['d3_44_spam', 71, 0, 0, 'no', 'meta']" 62 | 60,"['d3_44_spam', 71, 0, 0, 'no', 'openai']" 63 | 61,"['d3_44_spam', 71, 0, 0, 'no', 'gpt4']" 64 | 62,"['d3_44_spam', 233, 0, 0, 'no', 'openai']" 65 | 63,"['d3_44_spam', 233, 0, 0, 'no', 'mistral']" 66 | 64,"['d3_44_spam', 233, 0, 0, 'no', 'meta']" 67 | 65,"['d3_44_spam', 233, 0, 0, 'no', 'gpt4']" 68 | 66,"['d3_44_spam', 196, 0, 0, 'no', 'openai']" 69 | 67,"['d3_44_spam', 196, 0, 0, 'no', 'meta']" 70 | 68,"['d3_44_spam', 196, 0, 0, 'no', 'mistral']" 71 | 69,"['d3_44_spam', 196, 0, 0, 'no', 'gpt4']" 72 | 70,"['d3_44_spam', 79, 0, 0, 'no', 'meta']" 73 | 71,"['d3_44_spam', 79, 0, 0, 'no', 'openai']" 74 | 72,"['d3_44_spam', 79, 0, 0, 'no', 'mistral']" 75 | 73,"['d3_44_spam', 79, 0, 0, 'no', 'gpt4']" 76 | 74,"['d3_44_spam', 212, 0, 0, 'no', 'mistral']" 77 | 75,"['d3_44_spam', 212, 0, 1, 'yes', 'meta']" 78 | 76,"['d3_44_spam', 212, 0, 0, 'no', 'openai']" 79 | 77,"['d3_44_spam', 212, 0, 0, 'no', 'gpt4']" 80 | 78,"['d3_44_spam', 291, 0, 0, 'no', 'mistral']" 81 | 79,"['d3_44_spam', 291, 0, 0, 'no', 'meta']" 82 | 80,"['d3_44_spam', 291, 0, 0, 'no', 'openai']" 83 | 81,"['d3_44_spam', 291, 0, 0, 'no', 'gpt4']" 84 | 82,"['d3_44_spam', 2, 1, 1, 'yes', 'openai']" 85 | 83,"['d3_44_spam', 2, 1, 1, 'yes', 'meta']" 86 | 84,"['d3_44_spam', 2, 1, 1, 'yes', 'mistral']" 87 | 85,"['d3_44_spam', 2, 1, 1, 'yes', 'gpt4']" 88 | -------------------------------------------------------------------------------- /scripts/d3/out/d3_45_fact.csv: -------------------------------------------------------------------------------- 1 | ,0 2 | 0,task 3 | 1,idx 4 | 2,true_label 5 | 3,pred_label 6 | 4,answer 7 | 5,model 8 | 6,"['d3_45_fact', 450, 0, 1, 'yes', 'mistral']" 9 | 7,"['d3_45_fact', 450, 0, 1, 'yes', 'meta']" 10 | 8,"['d3_45_fact', 450, 0, 0, 'no', 'openai']" 11 | 9,"['d3_45_fact', 450, 0, 1, 'yes', 'gpt4']" 12 | 10,"['d3_45_fact', 17, 1, 1, 'yes', 'openai']" 13 | 11,"['d3_45_fact', 17, 1, 1, 'yes', 'meta']" 14 | 12,"['d3_45_fact', 17, 1, 1, 'yes', 'mistral']" 15 | 13,"['d3_45_fact', 17, 1, 1, 'yes', 'gpt4']" 16 | 14,"['d3_45_fact', 193, 1, 1, 'yes', 'openai']" 17 | 15,"['d3_45_fact', 193, 1, 1, 'yes', 'mistral']" 18 | 16,"['d3_45_fact', 193, 1, 1, 'yes', 'meta']" 19 | 17,"['d3_45_fact', 193, 1, 1, 'yes', 'gpt4']" 20 | 18,"['d3_45_fact', 157, 1, 1, 'yes', 'openai']" 21 | 19,"['d3_45_fact', 157, 1, 1, 'yes', 'meta']" 22 | 20,"['d3_45_fact', 157, 1, 0, 'no', 'mistral']" 23 | 21,"['d3_45_fact', 157, 1, 1, 'yes', 'gpt4']" 24 | 22,"['d3_45_fact', 518, 0, 1, 'yes', 'openai']" 25 | 23,"['d3_45_fact', 518, 0, 1, 'yes', 'mistral']" 26 | 24,"['d3_45_fact', 518, 0, 1, 'yes', 'meta']" 27 | 25,"['d3_45_fact', 518, 0, 0, 'no', 'gpt4']" 28 | 26,"['d3_45_fact', 476, 0, 1, 'yes', 'openai']" 29 | 27,"['d3_45_fact', 476, 0, 0, 'no', 'meta']" 30 | 28,"['d3_45_fact', 476, 0, 0, 'no', 'mistral']" 31 | 29,"['d3_45_fact', 476, 0, 1, 'yes', 'gpt4']" 32 | 30,"['d3_45_fact', 628, 0, 1, 'yes', 'mistral']" 33 | 31,"['d3_45_fact', 628, 0, 1, 'yes', 'meta']" 34 | 32,"['d3_45_fact', 628, 0, 1, 'yes', 'openai']" 35 | 33,"['d3_45_fact', 628, 0, 1, 'yes', 'gpt4']" 36 | 34,"['d3_45_fact', 61, 1, 1, 'yes', 'mistral']" 37 | 35,"['d3_45_fact', 61, 1, 1, 'yes', 'meta']" 38 | 36,"['d3_45_fact', 61, 1, 1, 'yes', 'openai']" 39 | 37,"['d3_45_fact', 61, 1, 1, 'yes', 'gpt4']" 40 | 38,"['d3_45_fact', 297, 0, 0, 'no', 'openai']" 41 | 39,"['d3_45_fact', 297, 0, 0, 'no', 'meta']" 42 | 40,"['d3_45_fact', 297, 0, 0, 'no', 'mistral']" 43 | 41,"['d3_45_fact', 297, 0, 0, 'no', 'gpt4']" 44 | 42,"['d3_45_fact', 20, 1, 1, 'yes', 'mistral']" 45 | 43,"['d3_45_fact', 20, 1, 1, 'yes', 'meta']" 46 | 44,"['d3_45_fact', 20, 1, 1, 'yes', 'openai']" 47 | 45,"['d3_45_fact', 20, 1, 1, 'yes', 'gpt4']" 48 | 46,"['d3_45_fact', 153, 1, 1, 'yes', 'openai']" 49 | 47,"['d3_45_fact', 153, 1, 1, 'yes', 'meta']" 50 | 48,"['d3_45_fact', 153, 1, 1, 'yes', 'mistral']" 51 | 49,"['d3_45_fact', 153, 1, 1, 'yes', 'gpt4']" 52 | 50,"['d3_45_fact', 355, 0, 1, 'yes', 'openai']" 53 | 51,"['d3_45_fact', 355, 0, 1, 'yes', 'mistral']" 54 | 52,"['d3_45_fact', 355, 0, 1, 'yes', 'meta']" 55 | 53,"['d3_45_fact', 355, 0, 1, 'yes', 'gpt4']" 56 | 54,"['d3_45_fact', 18, 1, 1, 'yes', 'mistral']" 57 | 55,"['d3_45_fact', 18, 1, 1, 'yes', 'meta']" 58 | 56,"['d3_45_fact', 18, 1, 1, 'yes', 'openai']" 59 | 57,"['d3_45_fact', 18, 1, 1, 'yes', 'gpt4']" 60 | 58,"['d3_45_fact', 139, 1, 1, 'yes', 'meta']" 61 | 59,"['d3_45_fact', 139, 1, 1, 'yes', 'openai']" 62 | 60,"['d3_45_fact', 139, 1, 1, 'yes', 'mistral']" 63 | 61,"['d3_45_fact', 139, 1, 1, 'yes', 'gpt4']" 64 | 62,"['d3_45_fact', 457, 0, 1, 'yes', 'openai']" 65 | 63,"['d3_45_fact', 457, 0, 0, 'no', 'meta']" 66 | 64,"['d3_45_fact', 457, 0, 0, 'no', 'mistral']" 67 | 65,"['d3_45_fact', 457, 0, 0, 'no', 'gpt4']" 68 | 66,"['d3_45_fact', 383, 0, 0, 'no', 'openai']" 69 | 67,"['d3_45_fact', 383, 0, 0, 'no', 'mistral']" 70 | 68,"['d3_45_fact', 383, 0, 0, 'no', 'meta']" 71 | 69,"['d3_45_fact', 383, 0, 0, 'no', 'gpt4']" 72 | 70,"['d3_45_fact', 155, 1, 1, 'yes', 'openai']" 73 | 71,"['d3_45_fact', 155, 1, 0, 'no', 'mistral']" 74 | 72,"['d3_45_fact', 155, 1, 0, 'no', 'meta']" 75 | 73,"['d3_45_fact', 155, 1, 0, 'no', 'gpt4']" 76 | 74,"['d3_45_fact', 414, 0, 1, 'yes', 'meta']" 77 | 75,"['d3_45_fact', 414, 0, 1, 'yes', 'openai']" 78 | 76,"['d3_45_fact', 414, 0, 1, 'yes', 'mistral']" 79 | 77,"['d3_45_fact', 414, 0, 1, 'yes', 'gpt4']" 80 | 78,"['d3_45_fact', 569, 0, 0, 'no', 'openai']" 81 | 79,"['d3_45_fact', 569, 0, 0, 'no', 'meta']" 82 | 80,"['d3_45_fact', 569, 0, 0, 'no', 'mistral']" 83 | 81,"['d3_45_fact', 569, 0, 0, 'no', 'gpt4']" 84 | 82,"['d3_45_fact', 4, 1, 1, 'yes', 'openai']" 85 | 83,"['d3_45_fact', 4, 1, 1, 'yes', 'meta']" 86 | 84,"['d3_45_fact', 4, 1, 1, 'yes', 'mistral']" 87 | 85,"['d3_45_fact', 4, 1, 1, 'yes', 'gpt4']" 88 | -------------------------------------------------------------------------------- /scripts/d3/out/d3_46_opinion.csv: -------------------------------------------------------------------------------- 1 | ,0 2 | 0,task 3 | 1,idx 4 | 2,true_label 5 | 3,pred_label 6 | 4,answer 7 | 5,model 8 | 6,"['d3_46_opinion', 459, 0, 1, 'yes', 'mistral']" 9 | 7,"['d3_46_opinion', 459, 0, 1, 'yes', 'meta']" 10 | 8,"['d3_46_opinion', 459, 0, 1, 'yes', 'openai']" 11 | 9,"['d3_46_opinion', 459, 0, 1, 'yes', 'gpt4']" 12 | 10,"['d3_46_opinion', 17, 1, 0, 'no', 'openai']" 13 | 11,"['d3_46_opinion', 17, 1, 1, 'yes', 'meta']" 14 | 12,"['d3_46_opinion', 17, 1, 0, 'no', 'mistral']" 15 | 13,"['d3_46_opinion', 17, 1, 1, 'yes', 'gpt4']" 16 | 14,"['d3_46_opinion', 197, 0, 1, 'yes', 'openai']" 17 | 15,"['d3_46_opinion', 197, 0, 1, 'yes', 'mistral']" 18 | 16,"['d3_46_opinion', 197, 0, 1, 'yes', 'meta']" 19 | 17,"['d3_46_opinion', 197, 0, 1, 'yes', 'gpt4']" 20 | 18,"['d3_46_opinion', 160, 0, 0, 'no', 'mistral']" 21 | 19,"['d3_46_opinion', 160, 0, 1, 'yes', 'meta']" 22 | 20,"['d3_46_opinion', 160, 0, 0, 'no', 'openai']" 23 | 21,"['d3_46_opinion', 160, 0, 1, 'yes', 'gpt4']" 24 | 22,"['d3_46_opinion', 529, 0, 0, 'no', 'openai']" 25 | 23,"['d3_46_opinion', 529, 0, 0, 'no', 'meta']" 26 | 24,"['d3_46_opinion', 529, 0, 0, 'no', 'mistral']" 27 | 25,"['d3_46_opinion', 529, 0, 0, 'no', 'gpt4']" 28 | 26,"['d3_46_opinion', 486, 0, 0, 'no', 'mistral']" 29 | 27,"['d3_46_opinion', 486, 0, 1, 'yes', 'meta']" 30 | 28,"['d3_46_opinion', 486, 0, 0, 'no', 'openai']" 31 | 29,"['d3_46_opinion', 486, 0, 0, 'no', 'gpt4']" 32 | 30,"['d3_46_opinion', 641, 0, 0, 'no', 'openai']" 33 | 31,"['d3_46_opinion', 641, 0, 1, 'yes', 'meta']" 34 | 32,"['d3_46_opinion', 641, 0, 0, 'no', 'mistral']" 35 | 33,"['d3_46_opinion', 641, 0, 0, 'no', 'gpt4']" 36 | 34,"['d3_46_opinion', 62, 1, 1, 'yes', 'openai']" 37 | 35,"['d3_46_opinion', 62, 1, 1, 'yes', 'mistral']" 38 | 36,"['d3_46_opinion', 62, 1, 1, 'yes', 'meta']" 39 | 37,"['d3_46_opinion', 62, 1, 1, 'yes', 'gpt4']" 40 | 38,"['d3_46_opinion', 303, 0, 0, 'no', 'mistral']" 41 | 39,"['d3_46_opinion', 303, 0, 1, 'yes', 'meta']" 42 | 40,"['d3_46_opinion', 303, 0, 1, 'yes', 'openai']" 43 | 41,"['d3_46_opinion', 303, 0, 1, 'yes', 'gpt4']" 44 | 42,"['d3_46_opinion', 21, 1, 0, 'no', 'openai']" 45 | 43,"['d3_46_opinion', 21, 1, 1, 'yes', 'meta']" 46 | 44,"['d3_46_opinion', 21, 1, 0, 'no', 'mistral']" 47 | 45,"['d3_46_opinion', 21, 1, 0, 'no', 'gpt4']" 48 | 46,"['d3_46_opinion', 157, 0, 0, 'no', 'openai']" 49 | 47,"['d3_46_opinion', 157, 0, 0, 'no', 'mistral']" 50 | 48,"['d3_46_opinion', 157, 0, 0, 'no', 'meta']" 51 | 49,"['d3_46_opinion', 157, 0, 0, 'no', 'gpt4']" 52 | 50,"['d3_46_opinion', 363, 0, 1, 'yes', 'openai']" 53 | 51,"['d3_46_opinion', 363, 0, 1, 'yes', 'meta']" 54 | 52,"['d3_46_opinion', 363, 0, 1, 'yes', 'mistral']" 55 | 53,"['d3_46_opinion', 363, 0, 1, 'yes', 'gpt4']" 56 | 54,"['d3_46_opinion', 19, 1, 0, 'no', 'mistral']" 57 | 55,"['d3_46_opinion', 19, 1, 1, 'yes', 'meta']" 58 | 56,"['d3_46_opinion', 19, 1, 1, 'yes', 'openai']" 59 | 57,"['d3_46_opinion', 19, 1, 0, 'no', 'gpt4']" 60 | 58,"['d3_46_opinion', 142, 0, 1, 'yes', 'mistral']" 61 | 59,"['d3_46_opinion', 142, 0, 1, 'yes', 'meta']" 62 | 60,"['d3_46_opinion', 142, 0, 1, 'yes', 'openai']" 63 | 61,"['d3_46_opinion', 142, 0, 1, 'yes', 'gpt4']" 64 | 62,"['d3_46_opinion', 467, 0, 0, 'no', 'openai']" 65 | 63,"['d3_46_opinion', 467, 0, 0, 'no', 'mistral']" 66 | 64,"['d3_46_opinion', 467, 0, 0, 'no', 'meta']" 67 | 65,"['d3_46_opinion', 467, 0, 0, 'no', 'gpt4']" 68 | 66,"['d3_46_opinion', 391, 0, 1, 'yes', 'mistral']" 69 | 67,"['d3_46_opinion', 391, 0, 1, 'yes', 'meta']" 70 | 68,"['d3_46_opinion', 391, 0, 1, 'yes', 'openai']" 71 | 69,"['d3_46_opinion', 391, 0, 1, 'yes', 'gpt4']" 72 | 70,"['d3_46_opinion', 158, 0, 1, 'yes', 'meta']" 73 | 71,"['d3_46_opinion', 158, 0, 1, 'yes', 'openai']" 74 | 72,"['d3_46_opinion', 158, 0, 0, 'no', 'mistral']" 75 | 73,"['d3_46_opinion', 158, 0, 0, 'no', 'gpt4']" 76 | 74,"['d3_46_opinion', 423, 0, 0, 'no', 'mistral']" 77 | 75,"['d3_46_opinion', 423, 0, 0, 'no', 'meta']" 78 | 76,"['d3_46_opinion', 423, 0, 0, 'no', 'openai']" 79 | 77,"['d3_46_opinion', 423, 0, 0, 'no', 'gpt4']" 80 | 78,"['d3_46_opinion', 581, 0, 0, 'no', 'openai']" 81 | 79,"['d3_46_opinion', 581, 0, 0, 'no', 'mistral']" 82 | 80,"['d3_46_opinion', 581, 0, 1, 'yes', 'meta']" 83 | 81,"['d3_46_opinion', 581, 0, 0, 'no', 'gpt4']" 84 | 82,"['d3_46_opinion', 4, 1, 1, 'yes', 'meta']" 85 | 83,"['d3_46_opinion', 4, 1, 1, 'yes', 'openai']" 86 | 84,"['d3_46_opinion', 4, 1, 1, 'yes', 'mistral']" 87 | 85,"['d3_46_opinion', 4, 1, 1, 'yes', 'gpt4']" 88 | -------------------------------------------------------------------------------- /scripts/d3/out/d3_47_math.csv: -------------------------------------------------------------------------------- 1 | ,0 2 | 0,task 3 | 1,idx 4 | 2,true_label 5 | 3,pred_label 6 | 4,answer 7 | 5,model 8 | 6,"['d3_47_math', 4804, 0, 0, 'no', 'openai']" 9 | 7,"['d3_47_math', 4804, 0, 0, 'no', 'meta']" 10 | 8,"['d3_47_math', 4804, 0, 0, 'no', 'mistral']" 11 | 9,"['d3_47_math', 4804, 0, 0, 'no', 'gpt4']" 12 | 10,"['d3_47_math', 187, 1, 1, 'yes', 'meta']" 13 | 11,"['d3_47_math', 187, 1, 1, 'yes', 'openai']" 14 | 12,"['d3_47_math', 187, 1, 1, 'yes', 'mistral']" 15 | 13,"['d3_47_math', 187, 1, 1, 'yes', 'gpt4']" 16 | 14,"['d3_47_math', 2066, 0, 0, 'no', 'openai']" 17 | 15,"['d3_47_math', 2066, 0, 0, 'no', 'meta']" 18 | 16,"['d3_47_math', 2066, 0, 0, '', 'mistral']" 19 | 17,"['d3_47_math', 2066, 0, 0, 'no', 'gpt4']" 20 | 18,"['d3_47_math', 1677, 0, 0, 'no', 'openai']" 21 | 19,"['d3_47_math', 1677, 0, 0, 'no', 'meta']" 22 | 20,"['d3_47_math', 1677, 0, 0, 'no', 'mistral']" 23 | 21,"['d3_47_math', 1677, 0, 0, 'no', 'gpt4']" 24 | 22,"['d3_47_math', 5533, 0, 1, 'yes', 'meta']" 25 | 23,"['d3_47_math', 5533, 0, 1, 'yes', 'openai']" 26 | 24,"['d3_47_math', 5533, 0, 1, 'yes', 'mistral']" 27 | 25,"['d3_47_math', 5533, 0, 1, 'yes', 'gpt4']" 28 | 26,"['d3_47_math', 5084, 0, 0, 'no', 'mistral']" 29 | 27,"['d3_47_math', 5084, 0, 0, 'no', 'meta']" 30 | 28,"['d3_47_math', 5084, 0, 1, 'yes', 'openai']" 31 | 29,"['d3_47_math', 5084, 0, 0, 'no', 'gpt4']" 32 | 30,"['d3_47_math', 6703, 0, 0, 'no', 'mistral']" 33 | 31,"['d3_47_math', 6703, 0, 0, 'no', 'meta']" 34 | 32,"['d3_47_math', 6703, 0, 0, 'no', 'openai']" 35 | 33,"['d3_47_math', 6703, 0, 0, 'no', 'gpt4']" 36 | 34,"['d3_47_math', 653, 1, 1, 'yes', 'openai']" 37 | 35,"['d3_47_math', 653, 1, 1, 'yes', 'meta']" 38 | 36,"['d3_47_math', 653, 1, 1, 'yes', 'mistral']" 39 | 37,"['d3_47_math', 653, 1, 1, 'yes', 'gpt4']" 40 | 38,"['d3_47_math', 3170, 0, 0, 'no', 'openai']" 41 | 39,"['d3_47_math', 3170, 0, 0, 'no', 'mistral']" 42 | 40,"['d3_47_math', 3170, 0, 0, 'no', 'meta']" 43 | 41,"['d3_47_math', 3170, 0, 0, 'no', 'gpt4']" 44 | 42,"['d3_47_math', 223, 1, 1, 'yes', 'openai']" 45 | 43,"['d3_47_math', 223, 1, 1, 'yes', 'mistral']" 46 | 44,"['d3_47_math', 223, 1, 1, 'yes', 'meta']" 47 | 45,"['d3_47_math', 223, 1, 1, 'yes', 'gpt4']" 48 | 46,"['d3_47_math', 1642, 0, 0, 'no', 'mistral']" 49 | 47,"['d3_47_math', 1642, 0, 1, 'yes', 'meta']" 50 | 48,"['d3_47_math', 1642, 0, 0, 'no', 'openai']" 51 | 49,"['d3_47_math', 1642, 0, 1, 'yes', 'gpt4']" 52 | 50,"['d3_47_math', 3797, 0, 0, 'no', 'meta']" 53 | 51,"['d3_47_math', 3797, 0, 0, 'no', 'openai']" 54 | 52,"['d3_47_math', 3797, 0, 0, 'no', 'mistral']" 55 | 53,"['d3_47_math', 3797, 0, 0, 'no', 'gpt4']" 56 | 54,"['d3_47_math', 199, 1, 1, 'yes', 'openai']" 57 | 55,"['d3_47_math', 199, 1, 1, 'yes', 'meta']" 58 | 56,"['d3_47_math', 199, 1, 1, 'yes', 'mistral']" 59 | 57,"['d3_47_math', 199, 1, 1, 'yes', 'gpt4']" 60 | 58,"['d3_47_math', 1494, 0, 1, 'yes', 'openai']" 61 | 59,"['d3_47_math', 1494, 0, 1, 'yes', 'meta']" 62 | 60,"['d3_47_math', 1494, 0, 1, 'yes', 'mistral']" 63 | 61,"['d3_47_math', 1494, 0, 1, 'yes', 'gpt4']" 64 | 62,"['d3_47_math', 4883, 0, 0, 'no', 'meta']" 65 | 63,"['d3_47_math', 4883, 0, 0, 'no', 'openai']" 66 | 64,"['d3_47_math', 4883, 0, 0, 'no', 'mistral']" 67 | 65,"['d3_47_math', 4883, 0, 1, 'yes', 'gpt4']" 68 | 66,"['d3_47_math', 4094, 0, 0, 'no', 'openai']" 69 | 67,"['d3_47_math', 4094, 0, 0, 'no', 'mistral']" 70 | 68,"['d3_47_math', 4094, 0, 0, 'no', 'meta']" 71 | 69,"['d3_47_math', 4094, 0, 1, 'yes', 'gpt4']" 72 | 70,"['d3_47_math', 1656, 0, 0, 'no', 'meta']" 73 | 71,"['d3_47_math', 1656, 0, 0, 'no', 'openai']" 74 | 72,"['d3_47_math', 1656, 0, 0, 'no', 'mistral']" 75 | 73,"['d3_47_math', 1656, 0, 0, 'no', 'gpt4']" 76 | 74,"['d3_47_math', 4427, 0, 0, 'no', 'meta']" 77 | 75,"['d3_47_math', 4427, 0, 0, 'no', 'openai']" 78 | 76,"['d3_47_math', 4427, 0, 0, 'no', 'mistral']" 79 | 77,"['d3_47_math', 4427, 0, 0, 'no', 'gpt4']" 80 | 78,"['d3_47_math', 6082, 0, 0, 'no', 'openai']" 81 | 79,"['d3_47_math', 6082, 0, 0, 'no', 'meta']" 82 | 80,"['d3_47_math', 6082, 0, 0, 'no', 'mistral']" 83 | 81,"['d3_47_math', 6082, 0, 0, 'no', 'gpt4']" 84 | 82,"['d3_47_math', 48, 1, 1, 'yes', 'openai']" 85 | 83,"['d3_47_math', 48, 1, 1, 'yes', 'meta']" 86 | 84,"['d3_47_math', 48, 1, 1, 'yes', 'mistral']" 87 | 85,"['d3_47_math', 48, 1, 1, 'yes', 'gpt4']" 88 | -------------------------------------------------------------------------------- /scripts/d3/out/d3_49_computer.csv: -------------------------------------------------------------------------------- 1 | ,0 2 | 0,task 3 | 1,idx 4 | 2,true_label 5 | 3,pred_label 6 | 4,answer 7 | 5,model 8 | 6,"['d3_49_computer', 4786, 0, 0, 'no', 'mistral']" 9 | 7,"['d3_49_computer', 4786, 0, 0, 'no', 'meta']" 10 | 8,"['d3_49_computer', 4786, 0, 0, 'no', 'openai']" 11 | 9,"['d3_49_computer', 4786, 0, 0, 'no', 'gpt4']" 12 | 10,"['d3_49_computer', 187, 1, 1, 'yes', 'mistral']" 13 | 11,"['d3_49_computer', 187, 1, 1, 'yes', 'meta']" 14 | 12,"['d3_49_computer', 187, 1, 1, 'yes', 'openai']" 15 | 13,"['d3_49_computer', 187, 1, 1, 'yes', 'gpt4']" 16 | 14,"['d3_49_computer', 2058, 0, 1, 'yes', 'mistral']" 17 | 15,"['d3_49_computer', 2058, 0, 1, 'yes', 'meta']" 18 | 16,"['d3_49_computer', 2058, 0, 1, 'yes', 'openai']" 19 | 17,"['d3_49_computer', 2058, 0, 1, 'yes', 'gpt4']" 20 | 18,"['d3_49_computer', 1670, 0, 0, 'no', 'meta']" 21 | 19,"['d3_49_computer', 1670, 0, 0, 'no', 'openai']" 22 | 20,"['d3_49_computer', 1670, 0, 0, 'no', 'mistral']" 23 | 21,"['d3_49_computer', 1670, 0, 0, 'no', 'gpt4']" 24 | 22,"['d3_49_computer', 5513, 0, 0, 'no', 'openai']" 25 | 23,"['d3_49_computer', 5513, 0, 0, 'no', 'meta']" 26 | 24,"['d3_49_computer', 5513, 0, 0, 'no', 'mistral']" 27 | 25,"['d3_49_computer', 5513, 0, 0, 'no', 'gpt4']" 28 | 26,"['d3_49_computer', 5065, 0, 0, 'no', 'mistral']" 29 | 27,"['d3_49_computer', 5065, 0, 0, 'no', 'meta']" 30 | 28,"['d3_49_computer', 5065, 0, 0, 'no', 'openai']" 31 | 29,"['d3_49_computer', 5065, 0, 0, 'no', 'gpt4']" 32 | 30,"['d3_49_computer', 6678, 0, 0, 'no', 'openai']" 33 | 31,"['d3_49_computer', 6678, 0, 0, 'no', 'meta']" 34 | 32,"['d3_49_computer', 6678, 0, 0, 'no', 'mistral']" 35 | 33,"['d3_49_computer', 6678, 0, 0, 'no', 'gpt4']" 36 | 34,"['d3_49_computer', 650, 1, 1, 'yes', 'openai']" 37 | 35,"['d3_49_computer', 650, 1, 1, 'yes', 'meta']" 38 | 36,"['d3_49_computer', 650, 1, 1, 'yes', 'mistral']" 39 | 37,"['d3_49_computer', 650, 1, 1, 'yes', 'gpt4']" 40 | 38,"['d3_49_computer', 3158, 0, 1, 'yes', 'mistral']" 41 | 39,"['d3_49_computer', 3158, 0, 1, 'yes', 'meta']" 42 | 40,"['d3_49_computer', 3158, 0, 1, 'yes', 'openai']" 43 | 41,"['d3_49_computer', 3158, 0, 1, 'yes', 'gpt4']" 44 | 42,"['d3_49_computer', 223, 1, 1, 'yes', 'meta']" 45 | 43,"['d3_49_computer', 223, 1, 1, 'yes', 'openai']" 46 | 44,"['d3_49_computer', 223, 1, 1, 'yes', 'mistral']" 47 | 45,"['d3_49_computer', 223, 1, 1, 'yes', 'gpt4']" 48 | 46,"['d3_49_computer', 1636, 0, 0, 'no', 'openai']" 49 | 47,"['d3_49_computer', 1636, 0, 0, 'no', 'mistral']" 50 | 48,"['d3_49_computer', 1636, 0, 0, 'no', 'meta']" 51 | 49,"['d3_49_computer', 1636, 0, 0, 'no', 'gpt4']" 52 | 50,"['d3_49_computer', 3783, 0, 0, 'no', 'openai']" 53 | 51,"['d3_49_computer', 3783, 0, 1, 'yes', 'meta']" 54 | 52,"['d3_49_computer', 3783, 0, 0, 'no', 'mistral']" 55 | 53,"['d3_49_computer', 3783, 0, 1, 'yes', 'gpt4']" 56 | 54,"['d3_49_computer', 198, 1, 1, 'yes', 'openai']" 57 | 55,"['d3_49_computer', 198, 1, 1, 'yes', 'meta']" 58 | 56,"['d3_49_computer', 198, 1, 1, 'yes', 'mistral']" 59 | 57,"['d3_49_computer', 198, 1, 1, 'yes', 'gpt4']" 60 | 58,"['d3_49_computer', 1488, 0, 0, 'no', 'meta']" 61 | 59,"['d3_49_computer', 1488, 0, 0, 'no', 'openai']" 62 | 60,"['d3_49_computer', 1488, 0, 0, 'no', 'mistral']" 63 | 61,"['d3_49_computer', 1488, 0, 0, 'no', 'gpt4']" 64 | 62,"['d3_49_computer', 4865, 0, 1, 'yes', 'openai']" 65 | 63,"['d3_49_computer', 4865, 0, 1, 'yes', 'meta']" 66 | 64,"['d3_49_computer', 4865, 0, 1, 'yes', 'mistral']" 67 | 65,"['d3_49_computer', 4865, 0, 1, 'yes', 'gpt4']" 68 | 66,"['d3_49_computer', 4079, 0, 0, 'no', 'mistral']" 69 | 67,"['d3_49_computer', 4079, 0, 0, 'no', 'meta']" 70 | 68,"['d3_49_computer', 4079, 0, 0, 'no', 'openai']" 71 | 69,"['d3_49_computer', 4079, 0, 0, 'no', 'gpt4']" 72 | 70,"['d3_49_computer', 1650, 0, 0, 'no', 'openai']" 73 | 71,"['d3_49_computer', 1650, 0, 0, 'no', 'mistral']" 74 | 72,"['d3_49_computer', 1650, 0, 0, 'no', 'meta']" 75 | 73,"['d3_49_computer', 1650, 0, 0, 'no', 'gpt4']" 76 | 74,"['d3_49_computer', 4411, 0, 0, 'no', 'mistral']" 77 | 75,"['d3_49_computer', 4411, 0, 1, 'yes', 'meta']" 78 | 76,"['d3_49_computer', 4411, 0, 0, 'no', 'openai']" 79 | 77,"['d3_49_computer', 4411, 0, 0, 'no', 'gpt4']" 80 | 78,"['d3_49_computer', 6059, 0, 0, 'no', 'mistral']" 81 | 79,"['d3_49_computer', 6059, 0, 1, 'yes', 'meta']" 82 | 80,"['d3_49_computer', 6059, 0, 0, 'no', 'openai']" 83 | 81,"['d3_49_computer', 6059, 0, 0, 'no', 'gpt4']" 84 | 82,"['d3_49_computer', 48, 1, 1, 'yes', 'openai']" 85 | 83,"['d3_49_computer', 48, 1, 1, 'yes', 'meta']" 86 | 84,"['d3_49_computer', 48, 1, 1, 'yes', 'mistral']" 87 | 85,"['d3_49_computer', 48, 1, 1, 'yes', 'gpt4']" 88 | -------------------------------------------------------------------------------- /scripts/d3/out/d3_4_atheism.csv: -------------------------------------------------------------------------------- 1 | ,0 2 | 0,task 3 | 1,idx 4 | 2,true_label 5 | 3,pred_label 6 | 4,answer 7 | 5,model 8 | 6,"['d3_4_atheism', 109, 0, 0, 'no', 'openai']" 9 | 7,"['d3_4_atheism', 109, 0, 0, 'no', 'meta']" 10 | 8,"['d3_4_atheism', 109, 0, 0, 'no', 'mistral']" 11 | 9,"['d3_4_atheism', 109, 0, 0, 'no', 'gpt4']" 12 | 10,"['d3_4_atheism', 4, 1, 1, 'yes', 'mistral']" 13 | 11,"['d3_4_atheism', 4, 1, 1, 'yes', 'meta']" 14 | 12,"['d3_4_atheism', 4, 1, 1, 'yes', 'openai']" 15 | 13,"['d3_4_atheism', 4, 1, 1, 'yes', 'gpt4']" 16 | 14,"['d3_4_atheism', 47, 0, 0, 'no', 'openai']" 17 | 15,"['d3_4_atheism', 47, 0, 0, 'no', 'meta']" 18 | 16,"['d3_4_atheism', 47, 0, 0, 'no', 'mistral']" 19 | 17,"['d3_4_atheism', 47, 0, 0, 'no', 'gpt4']" 20 | 18,"['d3_4_atheism', 38, 0, 0, 'no', 'mistral']" 21 | 19,"['d3_4_atheism', 38, 0, 0, 'no', 'meta']" 22 | 20,"['d3_4_atheism', 38, 0, 0, 'no', 'openai']" 23 | 21,"['d3_4_atheism', 38, 0, 0, 'no', 'gpt4']" 24 | 22,"['d3_4_atheism', 126, 0, 0, 'no', 'mistral']" 25 | 23,"['d3_4_atheism', 126, 0, 0, 'no', 'meta']" 26 | 24,"['d3_4_atheism', 126, 0, 0, 'no', 'openai']" 27 | 25,"['d3_4_atheism', 126, 0, 0, 'no', 'gpt4']" 28 | 26,"['d3_4_atheism', 116, 0, 1, 'yes', 'openai']" 29 | 27,"['d3_4_atheism', 116, 0, 1, 'yes', 'mistral']" 30 | 28,"['d3_4_atheism', 116, 0, 1, 'yes', 'meta']" 31 | 29,"['d3_4_atheism', 116, 0, 1, 'yes', 'gpt4']" 32 | 30,"['d3_4_atheism', 153, 0, 0, 'no', 'openai']" 33 | 31,"['d3_4_atheism', 153, 0, 0, 'no', 'meta']" 34 | 32,"['d3_4_atheism', 153, 0, 0, 'no', 'mistral']" 35 | 33,"['d3_4_atheism', 153, 0, 0, 'no', 'gpt4']" 36 | 34,"['d3_4_atheism', 14, 1, 1, 'yes', 'openai']" 37 | 35,"['d3_4_atheism', 14, 1, 0, 'no', 'mistral']" 38 | 36,"['d3_4_atheism', 14, 1, 1, 'yes', 'meta']" 39 | 37,"['d3_4_atheism', 14, 1, 1, 'yes', 'gpt4']" 40 | 38,"['d3_4_atheism', 72, 0, 0, 'this', 'mistral']" 41 | 39,"['d3_4_atheism', 72, 0, 0, 'no', 'meta']" 42 | 40,"['d3_4_atheism', 72, 0, 0, 'no', 'openai']" 43 | 41,"['d3_4_atheism', 72, 0, 0, 'no', 'gpt4']" 44 | 42,"['d3_4_atheism', 5, 1, 1, 'yes', 'mistral']" 45 | 43,"['d3_4_atheism', 5, 1, 1, 'yes', 'meta']" 46 | 44,"['d3_4_atheism', 5, 1, 1, 'yes', 'openai']" 47 | 45,"['d3_4_atheism', 5, 1, 1, 'yes', 'gpt4']" 48 | 46,"['d3_4_atheism', 37, 0, 0, 'no', 'mistral']" 49 | 47,"['d3_4_atheism', 37, 0, 0, 'no', 'meta']" 50 | 48,"['d3_4_atheism', 37, 0, 0, 'no', 'openai']" 51 | 49,"['d3_4_atheism', 37, 0, 0, 'no', 'gpt4']" 52 | 50,"['d3_4_atheism', 86, 0, 0, 'no', 'mistral']" 53 | 51,"['d3_4_atheism', 86, 0, 0, 'no', 'meta']" 54 | 52,"['d3_4_atheism', 86, 0, 0, 'no', 'openai']" 55 | 53,"['d3_4_atheism', 86, 0, 0, 'no', 'gpt4']" 56 | 54,"['d3_4_atheism', 4, 1, 1, 'yes', 'openai']" 57 | 55,"['d3_4_atheism', 4, 1, 1, 'yes', 'mistral']" 58 | 56,"['d3_4_atheism', 4, 1, 1, 'yes', 'meta']" 59 | 57,"['d3_4_atheism', 4, 1, 1, 'yes', 'gpt4']" 60 | 58,"['d3_4_atheism', 34, 0, 0, 'no', 'meta']" 61 | 59,"['d3_4_atheism', 34, 0, 0, 'no', 'openai']" 62 | 60,"['d3_4_atheism', 34, 0, 0, 'no', 'mistral']" 63 | 61,"['d3_4_atheism', 34, 0, 0, 'no', 'gpt4']" 64 | 62,"['d3_4_atheism', 111, 0, 0, 'no', 'meta']" 65 | 63,"['d3_4_atheism', 111, 0, 0, 'no', 'openai']" 66 | 64,"['d3_4_atheism', 111, 0, 0, 'no', 'mistral']" 67 | 65,"['d3_4_atheism', 111, 0, 0, 'no', 'gpt4']" 68 | 66,"['d3_4_atheism', 93, 0, 0, 'no', 'openai']" 69 | 67,"['d3_4_atheism', 93, 0, 0, 'no', 'mistral']" 70 | 68,"['d3_4_atheism', 93, 0, 0, 'no', 'meta']" 71 | 69,"['d3_4_atheism', 93, 0, 0, 'no', 'gpt4']" 72 | 70,"['d3_4_atheism', 37, 0, 0, 'no', 'mistral']" 73 | 71,"['d3_4_atheism', 37, 0, 0, 'no', 'meta']" 74 | 72,"['d3_4_atheism', 37, 0, 0, 'no', 'openai']" 75 | 73,"['d3_4_atheism', 37, 0, 0, 'no', 'gpt4']" 76 | 74,"['d3_4_atheism', 101, 0, 0, 'no', 'mistral']" 77 | 75,"['d3_4_atheism', 101, 0, 0, 'no', 'meta']" 78 | 76,"['d3_4_atheism', 101, 0, 0, 'no', 'openai']" 79 | 77,"['d3_4_atheism', 101, 0, 0, 'no', 'gpt4']" 80 | 78,"['d3_4_atheism', 139, 0, 0, 'no', 'openai']" 81 | 79,"['d3_4_atheism', 139, 0, 0, 'no', 'meta']" 82 | 80,"['d3_4_atheism', 139, 0, 0, 'no', 'mistral']" 83 | 81,"['d3_4_atheism', 139, 0, 0, 'no', 'gpt4']" 84 | 82,"['d3_4_atheism', 1, 1, 0, 'no', 'mistral']" 85 | 83,"['d3_4_atheism', 1, 1, 1, 'yes', 'meta']" 86 | 84,"['d3_4_atheism', 1, 1, 0, 'no', 'openai']" 87 | 85,"['d3_4_atheism', 1, 1, 1, 'yes', 'gpt4']" 88 | -------------------------------------------------------------------------------- /scripts/d3/out/d3_50_sport.csv: -------------------------------------------------------------------------------- 1 | ,0 2 | 0,task 3 | 1,idx 4 | 2,true_label 5 | 3,pred_label 6 | 4,answer 7 | 5,model 8 | 6,"['d3_50_sport', 4798, 0, 0, 'no', 'openai']" 9 | 7,"['d3_50_sport', 4798, 0, 0, 'no', 'meta']" 10 | 8,"['d3_50_sport', 4798, 0, 0, 'no', 'mistral']" 11 | 9,"['d3_50_sport', 4798, 0, 0, 'no', 'gpt4']" 12 | 10,"['d3_50_sport', 187, 1, 1, 'yes', 'mistral']" 13 | 11,"['d3_50_sport', 187, 1, 0, 'no', 'meta']" 14 | 12,"['d3_50_sport', 187, 1, 1, 'yes', 'openai']" 15 | 13,"['d3_50_sport', 187, 1, 1, 'yes', 'gpt4']" 16 | 14,"['d3_50_sport', 2064, 0, 0, 'no', 'mistral']" 17 | 15,"['d3_50_sport', 2064, 0, 0, 'no', 'meta']" 18 | 16,"['d3_50_sport', 2064, 0, 0, 'no', 'openai']" 19 | 17,"['d3_50_sport', 2064, 0, 0, 'no', 'gpt4']" 20 | 18,"['d3_50_sport', 1675, 0, 0, 'no', 'openai']" 21 | 19,"['d3_50_sport', 1675, 0, 0, 'no', 'meta']" 22 | 20,"['d3_50_sport', 1675, 0, 0, 'no', 'mistral']" 23 | 21,"['d3_50_sport', 1675, 0, 0, 'no', 'gpt4']" 24 | 22,"['d3_50_sport', 5527, 0, 0, 'no', 'mistral']" 25 | 23,"['d3_50_sport', 5527, 0, 0, 'no', 'meta']" 26 | 24,"['d3_50_sport', 5527, 0, 0, 'no', 'openai']" 27 | 25,"['d3_50_sport', 5527, 0, 0, 'no', 'gpt4']" 28 | 26,"['d3_50_sport', 5078, 0, 0, 'no', 'mistral']" 29 | 27,"['d3_50_sport', 5078, 0, 0, 'no', 'meta']" 30 | 28,"['d3_50_sport', 5078, 0, 0, 'no', 'openai']" 31 | 29,"['d3_50_sport', 5078, 0, 0, 'no', 'gpt4']" 32 | 30,"['d3_50_sport', 6695, 0, 0, 'no', 'mistral']" 33 | 31,"['d3_50_sport', 6695, 0, 0, 'no', 'meta']" 34 | 32,"['d3_50_sport', 6695, 0, 0, 'no', 'openai']" 35 | 33,"['d3_50_sport', 6695, 0, 0, 'no', 'gpt4']" 36 | 34,"['d3_50_sport', 652, 1, 1, 'yes', 'openai']" 37 | 35,"['d3_50_sport', 652, 1, 1, 'yes', 'meta']" 38 | 36,"['d3_50_sport', 652, 1, 1, 'yes', 'mistral']" 39 | 37,"['d3_50_sport', 652, 1, 1, 'yes', 'gpt4']" 40 | 38,"['d3_50_sport', 3166, 0, 0, 'no', 'openai']" 41 | 39,"['d3_50_sport', 3166, 0, 0, 'no', 'mistral']" 42 | 40,"['d3_50_sport', 3166, 0, 0, 'no', 'meta']" 43 | 41,"['d3_50_sport', 3166, 0, 0, 'no', 'gpt4']" 44 | 42,"['d3_50_sport', 223, 1, 1, 'yes', 'openai']" 45 | 43,"['d3_50_sport', 223, 1, 1, 'yes', 'meta']" 46 | 44,"['d3_50_sport', 223, 1, 1, 'yes', 'mistral']" 47 | 45,"['d3_50_sport', 223, 1, 1, 'yes', 'gpt4']" 48 | 46,"['d3_50_sport', 1640, 0, 0, 'no', 'openai']" 49 | 47,"['d3_50_sport', 1640, 0, 0, 'no', 'meta']" 50 | 48,"['d3_50_sport', 1640, 0, 0, 'no', 'mistral']" 51 | 49,"['d3_50_sport', 1640, 0, 0, 'no', 'gpt4']" 52 | 50,"['d3_50_sport', 3792, 0, 0, 'no', 'mistral']" 53 | 51,"['d3_50_sport', 3792, 0, 0, 'no', 'meta']" 54 | 52,"['d3_50_sport', 3792, 0, 0, 'no', 'openai']" 55 | 53,"['d3_50_sport', 3792, 0, 0, 'no', 'gpt4']" 56 | 54,"['d3_50_sport', 199, 1, 1, 'yes', 'mistral']" 57 | 55,"['d3_50_sport', 199, 1, 1, 'yes', 'meta']" 58 | 56,"['d3_50_sport', 199, 1, 1, 'yes', 'openai']" 59 | 57,"['d3_50_sport', 199, 1, 1, 'yes', 'gpt4']" 60 | 58,"['d3_50_sport', 1492, 0, 0, 'no', 'mistral']" 61 | 59,"['d3_50_sport', 1492, 0, 0, 'no', 'meta']" 62 | 60,"['d3_50_sport', 1492, 0, 0, 'no', 'openai']" 63 | 61,"['d3_50_sport', 1492, 0, 0, 'no', 'gpt4']" 64 | 62,"['d3_50_sport', 4877, 0, 0, 'no', 'mistral']" 65 | 63,"['d3_50_sport', 4877, 0, 0, 'no', 'meta']" 66 | 64,"['d3_50_sport', 4877, 0, 0, 'no', 'openai']" 67 | 65,"['d3_50_sport', 4877, 0, 0, 'no', 'gpt4']" 68 | 66,"['d3_50_sport', 4089, 0, 0, 'no', 'meta']" 69 | 67,"['d3_50_sport', 4089, 0, 0, 'no', 'openai']" 70 | 68,"['d3_50_sport', 4089, 0, 0, 'no', 'mistral']" 71 | 69,"['d3_50_sport', 4089, 0, 0, 'no', 'gpt4']" 72 | 70,"['d3_50_sport', 1654, 0, 0, 'no', 'openai']" 73 | 71,"['d3_50_sport', 1654, 0, 0, 'no', 'mistral']" 74 | 72,"['d3_50_sport', 1654, 0, 0, 'no', 'meta']" 75 | 73,"['d3_50_sport', 1654, 0, 0, 'no', 'gpt4']" 76 | 74,"['d3_50_sport', 4422, 0, 0, 'no', 'openai']" 77 | 75,"['d3_50_sport', 4422, 0, 0, 'no', 'meta']" 78 | 76,"['d3_50_sport', 4422, 0, 0, 'no', 'mistral']" 79 | 77,"['d3_50_sport', 4422, 0, 0, 'no', 'gpt4']" 80 | 78,"['d3_50_sport', 6074, 0, 0, 'no', 'mistral']" 81 | 79,"['d3_50_sport', 6074, 0, 0, 'no', 'meta']" 82 | 80,"['d3_50_sport', 6074, 0, 0, 'no', 'openai']" 83 | 81,"['d3_50_sport', 6074, 0, 0, 'no', 'gpt4']" 84 | 82,"['d3_50_sport', 48, 1, 1, 'yes', 'mistral']" 85 | 83,"['d3_50_sport', 48, 1, 1, 'yes', 'meta']" 86 | 84,"['d3_50_sport', 48, 1, 1, 'yes', 'openai']" 87 | 85,"['d3_50_sport', 48, 1, 1, 'yes', 'gpt4']" 88 | -------------------------------------------------------------------------------- /scripts/d3/out/d3_52_family.csv: -------------------------------------------------------------------------------- 1 | ,0 2 | 0,task 3 | 1,idx 4 | 2,true_label 5 | 3,pred_label 6 | 4,answer 7 | 5,model 8 | 6,"['d3_52_family', 4756, 0, 0, 'no', 'meta']" 9 | 7,"['d3_52_family', 4756, 0, 0, 'no', 'openai']" 10 | 8,"['d3_52_family', 4756, 0, 0, 'no', 'mistral']" 11 | 9,"['d3_52_family', 4756, 0, 0, 'no', 'gpt4']" 12 | 10,"['d3_52_family', 186, 1, 1, 'yes', 'openai']" 13 | 11,"['d3_52_family', 186, 1, 1, 'yes', 'mistral']" 14 | 12,"['d3_52_family', 186, 1, 1, 'yes', 'meta']" 15 | 13,"['d3_52_family', 186, 1, 1, 'yes', 'gpt4']" 16 | 14,"['d3_52_family', 2045, 0, 0, 'no', 'meta']" 17 | 15,"['d3_52_family', 2045, 0, 0, 'no', 'openai']" 18 | 16,"['d3_52_family', 2045, 0, 0, 'no', 'mistral']" 19 | 17,"['d3_52_family', 2045, 0, 0, 'no', 'gpt4']" 20 | 18,"['d3_52_family', 1660, 0, 0, 'no', 'openai']" 21 | 19,"['d3_52_family', 1660, 0, 1, 'yes', 'meta']" 22 | 20,"['d3_52_family', 1660, 0, 0, 'no', 'mistral']" 23 | 21,"['d3_52_family', 1660, 0, 1, 'yes', 'gpt4']" 24 | 22,"['d3_52_family', 5477, 0, 0, 'no', 'openai']" 25 | 23,"['d3_52_family', 5477, 0, 0, 'no', 'mistral']" 26 | 24,"['d3_52_family', 5477, 0, 0, 'no', 'meta']" 27 | 25,"['d3_52_family', 5477, 0, 0, 'no', 'gpt4']" 28 | 26,"['d3_52_family', 5033, 0, 0, 'no', 'openai']" 29 | 27,"['d3_52_family', 5033, 0, 0, 'no', 'meta']" 30 | 28,"['d3_52_family', 5033, 0, 0, 'no', 'mistral']" 31 | 29,"['d3_52_family', 5033, 0, 0, 'no', 'gpt4']" 32 | 30,"['d3_52_family', 6636, 0, 0, 'no', 'mistral']" 33 | 31,"['d3_52_family', 6636, 0, 1, 'yes', 'meta']" 34 | 32,"['d3_52_family', 6636, 0, 0, 'no', 'openai']" 35 | 33,"['d3_52_family', 6636, 0, 0, 'no', 'gpt4']" 36 | 34,"['d3_52_family', 646, 1, 1, 'yes', 'mistral']" 37 | 35,"['d3_52_family', 646, 1, 1, 'yes', 'meta']" 38 | 36,"['d3_52_family', 646, 1, 1, 'yes', 'openai']" 39 | 37,"['d3_52_family', 646, 1, 1, 'yes', 'gpt4']" 40 | 38,"['d3_52_family', 3138, 0, 0, 'no', 'openai']" 41 | 39,"['d3_52_family', 3138, 0, 0, 'no', 'meta']" 42 | 40,"['d3_52_family', 3138, 0, 0, 'no', 'mistral']" 43 | 41,"['d3_52_family', 3138, 0, 0, 'no', 'gpt4']" 44 | 42,"['d3_52_family', 221, 1, 1, 'yes', 'openai']" 45 | 43,"['d3_52_family', 221, 1, 1, 'yes', 'mistral']" 46 | 44,"['d3_52_family', 221, 1, 1, 'yes', 'meta']" 47 | 45,"['d3_52_family', 221, 1, 1, 'yes', 'gpt4']" 48 | 46,"['d3_52_family', 1626, 0, 0, 'no', 'mistral']" 49 | 47,"['d3_52_family', 1626, 0, 0, 'no', 'meta']" 50 | 48,"['d3_52_family', 1626, 0, 0, 'no', 'openai']" 51 | 49,"['d3_52_family', 1626, 0, 0, 'no', 'gpt4']" 52 | 50,"['d3_52_family', 3758, 0, 0, 'no', 'meta']" 53 | 51,"['d3_52_family', 3758, 0, 0, 'no', 'openai']" 54 | 52,"['d3_52_family', 3758, 0, 0, 'no', 'mistral']" 55 | 53,"['d3_52_family', 3758, 0, 0, 'no', 'gpt4']" 56 | 54,"['d3_52_family', 197, 1, 1, 'yes', 'openai']" 57 | 55,"['d3_52_family', 197, 1, 1, 'yes', 'meta']" 58 | 56,"['d3_52_family', 197, 1, 1, 'yes', 'mistral']" 59 | 57,"['d3_52_family', 197, 1, 1, 'yes', 'gpt4']" 60 | 58,"['d3_52_family', 1478, 0, 0, 'no', 'mistral']" 61 | 59,"['d3_52_family', 1478, 0, 0, 'no', 'meta']" 62 | 60,"['d3_52_family', 1478, 0, 0, 'no', 'openai']" 63 | 61,"['d3_52_family', 1478, 0, 0, 'no', 'gpt4']" 64 | 62,"['d3_52_family', 4833, 0, 1, 'yes', 'mistral']" 65 | 63,"['d3_52_family', 4833, 0, 1, 'yes', 'meta']" 66 | 64,"['d3_52_family', 4833, 0, 1, 'yes', 'openai']" 67 | 65,"['d3_52_family', 4833, 0, 1, 'yes', 'gpt4']" 68 | 66,"['d3_52_family', 4053, 0, 0, 'no', 'openai']" 69 | 67,"['d3_52_family', 4053, 0, 0, 'no', 'meta']" 70 | 68,"['d3_52_family', 4053, 0, 0, 'no', 'mistral']" 71 | 69,"['d3_52_family', 4053, 0, 0, 'no', 'gpt4']" 72 | 70,"['d3_52_family', 1639, 0, 0, 'no', 'openai']" 73 | 71,"['d3_52_family', 1639, 0, 0, 'no', 'meta']" 74 | 72,"['d3_52_family', 1639, 0, 0, 'no', 'mistral']" 75 | 73,"['d3_52_family', 1639, 0, 0, 'no', 'gpt4']" 76 | 74,"['d3_52_family', 4382, 0, 0, 'no', 'openai']" 77 | 75,"['d3_52_family', 4382, 0, 0, 'no', 'mistral']" 78 | 76,"['d3_52_family', 4382, 0, 0, 'no', 'meta']" 79 | 77,"['d3_52_family', 4382, 0, 0, 'no', 'gpt4']" 80 | 78,"['d3_52_family', 6020, 0, 0, 'no', 'openai']" 81 | 79,"['d3_52_family', 6020, 0, 0, 'no', 'mistral']" 82 | 80,"['d3_52_family', 6020, 0, 0, 'no', 'meta']" 83 | 81,"['d3_52_family', 6020, 0, 1, 'yes', 'gpt4']" 84 | 82,"['d3_52_family', 48, 1, 1, 'yes', 'meta']" 85 | 83,"['d3_52_family', 48, 1, 1, 'yes', 'openai']" 86 | 84,"['d3_52_family', 48, 1, 1, 'yes', 'mistral']" 87 | 85,"['d3_52_family', 48, 1, 1, 'yes', 'gpt4']" 88 | -------------------------------------------------------------------------------- /scripts/d3/out/d3_53_politic.csv: -------------------------------------------------------------------------------- 1 | ,0 2 | 0,task 3 | 1,idx 4 | 2,true_label 5 | 3,pred_label 6 | 4,answer 7 | 5,model 8 | 6,"['d3_53_politic', 4738, 0, 0, 'no', 'openai']" 9 | 7,"['d3_53_politic', 4738, 0, 1, 'yes', 'meta']" 10 | 8,"['d3_53_politic', 4738, 0, 0, 'no', 'mistral']" 11 | 9,"['d3_53_politic', 4738, 0, 1, 'yes', 'gpt4']" 12 | 10,"['d3_53_politic', 185, 1, 0, 'no', 'mistral']" 13 | 11,"['d3_53_politic', 185, 1, 0, 'no', 'meta']" 14 | 12,"['d3_53_politic', 185, 1, 0, 'no', 'openai']" 15 | 13,"['d3_53_politic', 185, 1, 0, 'no', 'gpt4']" 16 | 14,"['d3_53_politic', 2037, 0, 0, 'no', 'openai']" 17 | 15,"['d3_53_politic', 2037, 0, 0, 'no', 'mistral']" 18 | 16,"['d3_53_politic', 2037, 0, 0, 'no', 'meta']" 19 | 17,"['d3_53_politic', 2037, 0, 0, 'no', 'gpt4']" 20 | 18,"['d3_53_politic', 1653, 0, 0, 'no', 'openai']" 21 | 19,"['d3_53_politic', 1653, 0, 0, 'no', 'meta']" 22 | 20,"['d3_53_politic', 1653, 0, 0, 'no', 'mistral']" 23 | 21,"['d3_53_politic', 1653, 0, 0, 'no', 'gpt4']" 24 | 22,"['d3_53_politic', 5457, 0, 0, 'no', 'openai']" 25 | 23,"['d3_53_politic', 5457, 0, 0, 'no', 'meta']" 26 | 24,"['d3_53_politic', 5457, 0, 0, 'no', 'mistral']" 27 | 25,"['d3_53_politic', 5457, 0, 0, 'no', 'gpt4']" 28 | 26,"['d3_53_politic', 5014, 0, 0, 'no', 'mistral']" 29 | 27,"['d3_53_politic', 5014, 0, 0, 'no', 'meta']" 30 | 28,"['d3_53_politic', 5014, 0, 0, 'no', 'openai']" 31 | 29,"['d3_53_politic', 5014, 0, 0, 'no', 'gpt4']" 32 | 30,"['d3_53_politic', 6611, 0, 0, 'no', 'openai']" 33 | 31,"['d3_53_politic', 6611, 0, 0, 'no', 'meta']" 34 | 32,"['d3_53_politic', 6611, 0, 0, 'no', 'mistral']" 35 | 33,"['d3_53_politic', 6611, 0, 0, 'no', 'gpt4']" 36 | 34,"['d3_53_politic', 644, 1, 1, 'yes', 'mistral']" 37 | 35,"['d3_53_politic', 644, 1, 1, 'yes', 'meta']" 38 | 36,"['d3_53_politic', 644, 1, 1, 'yes', 'openai']" 39 | 37,"['d3_53_politic', 644, 1, 1, 'yes', 'gpt4']" 40 | 38,"['d3_53_politic', 3126, 0, 0, 'no', 'mistral']" 41 | 39,"['d3_53_politic', 3126, 0, 0, 'no', 'meta']" 42 | 40,"['d3_53_politic', 3126, 0, 0, 'no', 'openai']" 43 | 41,"['d3_53_politic', 3126, 0, 0, 'no', 'gpt4']" 44 | 42,"['d3_53_politic', 220, 1, 0, 'no', 'openai']" 45 | 43,"['d3_53_politic', 220, 1, 1, 'yes', 'meta']" 46 | 44,"['d3_53_politic', 220, 1, 0, 'no', 'mistral']" 47 | 45,"['d3_53_politic', 220, 1, 1, 'yes', 'gpt4']" 48 | 46,"['d3_53_politic', 1620, 0, 0, 'no', 'openai']" 49 | 47,"['d3_53_politic', 1620, 0, 0, 'no', 'meta']" 50 | 48,"['d3_53_politic', 1620, 0, 0, 'no', 'mistral']" 51 | 49,"['d3_53_politic', 1620, 0, 0, 'no', 'gpt4']" 52 | 50,"['d3_53_politic', 3744, 0, 0, 'no', 'meta']" 53 | 51,"['d3_53_politic', 3744, 0, 0, 'no', 'openai']" 54 | 52,"['d3_53_politic', 3744, 0, 0, 'no', 'mistral']" 55 | 53,"['d3_53_politic', 3744, 0, 0, 'no', 'gpt4']" 56 | 54,"['d3_53_politic', 196, 1, 1, 'yes', 'openai']" 57 | 55,"['d3_53_politic', 196, 1, 1, 'yes', 'mistral']" 58 | 56,"['d3_53_politic', 196, 1, 1, 'yes', 'meta']" 59 | 57,"['d3_53_politic', 196, 1, 1, 'yes', 'gpt4']" 60 | 58,"['d3_53_politic', 1473, 0, 0, 'no', 'mistral']" 61 | 59,"['d3_53_politic', 1473, 0, 0, 'no', 'meta']" 62 | 60,"['d3_53_politic', 1473, 0, 0, 'no', 'openai']" 63 | 61,"['d3_53_politic', 1473, 0, 0, 'no', 'gpt4']" 64 | 62,"['d3_53_politic', 4815, 0, 0, 'no', 'openai']" 65 | 63,"['d3_53_politic', 4815, 0, 0, 'no', 'meta']" 66 | 64,"['d3_53_politic', 4815, 0, 0, 'no', 'mistral']" 67 | 65,"['d3_53_politic', 4815, 0, 0, 'no', 'gpt4']" 68 | 66,"['d3_53_politic', 4038, 0, 0, 'no', 'openai']" 69 | 67,"['d3_53_politic', 4038, 0, 0, 'no', 'mistral']" 70 | 68,"['d3_53_politic', 4038, 0, 0, 'no', 'meta']" 71 | 69,"['d3_53_politic', 4038, 0, 0, 'no', 'gpt4']" 72 | 70,"['d3_53_politic', 1633, 0, 0, 'no', 'openai']" 73 | 71,"['d3_53_politic', 1633, 0, 0, 'no', 'meta']" 74 | 72,"['d3_53_politic', 1633, 0, 0, 'no', 'mistral']" 75 | 73,"['d3_53_politic', 1633, 0, 0, 'no', 'gpt4']" 76 | 74,"['d3_53_politic', 4366, 0, 0, 'no', 'mistral']" 77 | 75,"['d3_53_politic', 4366, 0, 0, 'no', 'meta']" 78 | 76,"['d3_53_politic', 4366, 0, 0, 'no', 'openai']" 79 | 77,"['d3_53_politic', 4366, 0, 0, 'no', 'gpt4']" 80 | 78,"['d3_53_politic', 5997, 0, 0, 'no', 'mistral']" 81 | 79,"['d3_53_politic', 5997, 0, 0, 'no', 'meta']" 82 | 80,"['d3_53_politic', 5997, 0, 0, 'no', 'openai']" 83 | 81,"['d3_53_politic', 5997, 0, 0, 'no', 'gpt4']" 84 | 82,"['d3_53_politic', 48, 1, 1, 'yes', 'openai']" 85 | 83,"['d3_53_politic', 48, 1, 1, 'yes', 'meta']" 86 | 84,"['d3_53_politic', 48, 1, 1, 'yes', 'mistral']" 87 | 85,"['d3_53_politic', 48, 1, 1, 'yes', 'gpt4']" 88 | -------------------------------------------------------------------------------- /scripts/d3/out/d3_5_evacuate.csv: -------------------------------------------------------------------------------- 1 | ,0 2 | 0,task 3 | 1,idx 4 | 2,true_label 5 | 3,pred_label 6 | 4,answer 7 | 5,model 8 | 6,"['d3_5_evacuate', 1707, 0, 0, 'no', 'openai']" 9 | 7,"['d3_5_evacuate', 1707, 0, 0, 'no', 'mistral']" 10 | 8,"['d3_5_evacuate', 1707, 0, 1, 'yes', 'meta']" 11 | 9,"['d3_5_evacuate', 1707, 0, 1, 'yes', 'gpt4']" 12 | 10,"['d3_5_evacuate', 66, 1, 1, 'yes', 'openai']" 13 | 11,"['d3_5_evacuate', 66, 1, 1, 'yes', 'meta']" 14 | 12,"['d3_5_evacuate', 66, 1, 0, 'no', 'mistral']" 15 | 13,"['d3_5_evacuate', 66, 1, 1, 'yes', 'gpt4']" 16 | 14,"['d3_5_evacuate', 734, 0, 0, 'no', 'mistral']" 17 | 15,"['d3_5_evacuate', 734, 0, 0, 'no', 'meta']" 18 | 16,"['d3_5_evacuate', 734, 0, 0, 'no', 'openai']" 19 | 17,"['d3_5_evacuate', 734, 0, 0, 'no', 'gpt4']" 20 | 18,"['d3_5_evacuate', 595, 0, 0, 'no', 'openai']" 21 | 19,"['d3_5_evacuate', 595, 0, 0, 'no', 'mistral']" 22 | 20,"['d3_5_evacuate', 595, 0, 1, 'yes', 'meta']" 23 | 21,"['d3_5_evacuate', 595, 0, 0, 'no', 'gpt4']" 24 | 22,"['d3_5_evacuate', 1966, 0, 0, 'no', 'openai']" 25 | 23,"['d3_5_evacuate', 1966, 0, 0, 'no', 'meta']" 26 | 24,"['d3_5_evacuate', 1966, 0, 0, 'no', 'mistral']" 27 | 25,"['d3_5_evacuate', 1966, 0, 0, 'no', 'gpt4']" 28 | 26,"['d3_5_evacuate', 1806, 0, 0, 'no', 'openai']" 29 | 27,"['d3_5_evacuate', 1806, 0, 1, 'yes', 'meta']" 30 | 28,"['d3_5_evacuate', 1806, 0, 0, 'no', 'mistral']" 31 | 29,"['d3_5_evacuate', 1806, 0, 1, 'yes', 'gpt4']" 32 | 30,"['d3_5_evacuate', 2382, 0, 1, 'yes', 'openai']" 33 | 31,"['d3_5_evacuate', 2382, 0, 1, 'yes', 'meta']" 34 | 32,"['d3_5_evacuate', 2382, 0, 0, 'no', 'mistral']" 35 | 33,"['d3_5_evacuate', 2382, 0, 1, 'yes', 'gpt4']" 36 | 34,"['d3_5_evacuate', 232, 0, 1, 'yes', 'openai']" 37 | 35,"['d3_5_evacuate', 232, 0, 1, 'yes', 'mistral']" 38 | 36,"['d3_5_evacuate', 232, 0, 1, 'yes', 'meta']" 39 | 37,"['d3_5_evacuate', 232, 0, 1, 'yes', 'gpt4']" 40 | 38,"['d3_5_evacuate', 1126, 0, 0, 'no', 'mistral']" 41 | 39,"['d3_5_evacuate', 1126, 0, 0, 'no', 'meta']" 42 | 40,"['d3_5_evacuate', 1126, 0, 0, 'no', 'openai']" 43 | 41,"['d3_5_evacuate', 1126, 0, 0, 'no', 'gpt4']" 44 | 42,"['d3_5_evacuate', 79, 1, 1, 'yes', 'mistral']" 45 | 43,"['d3_5_evacuate', 79, 1, 1, 'yes', 'meta']" 46 | 44,"['d3_5_evacuate', 79, 1, 1, 'yes', 'openai']" 47 | 45,"['d3_5_evacuate', 79, 1, 1, 'yes', 'gpt4']" 48 | 46,"['d3_5_evacuate', 583, 0, 0, 'no', 'openai']" 49 | 47,"['d3_5_evacuate', 583, 0, 0, 'no', 'meta']" 50 | 48,"['d3_5_evacuate', 583, 0, 0, 'no', 'mistral']" 51 | 49,"['d3_5_evacuate', 583, 0, 0, 'no', 'gpt4']" 52 | 50,"['d3_5_evacuate', 1349, 0, 1, 'yes', 'openai']" 53 | 51,"['d3_5_evacuate', 1349, 0, 1, 'yes', 'mistral']" 54 | 52,"['d3_5_evacuate', 1349, 0, 1, 'yes', 'meta']" 55 | 53,"['d3_5_evacuate', 1349, 0, 1, 'yes', 'gpt4']" 56 | 54,"['d3_5_evacuate', 70, 1, 1, 'yes', 'openai']" 57 | 55,"['d3_5_evacuate', 70, 1, 1, 'yes', 'meta']" 58 | 56,"['d3_5_evacuate', 70, 1, 0, 'no', 'mistral']" 59 | 57,"['d3_5_evacuate', 70, 1, 1, 'yes', 'gpt4']" 60 | 58,"['d3_5_evacuate', 530, 0, 0, 'no', 'openai']" 61 | 59,"['d3_5_evacuate', 530, 0, 0, 'no', 'mistral']" 62 | 60,"['d3_5_evacuate', 530, 0, 0, 'no', 'meta']" 63 | 61,"['d3_5_evacuate', 530, 0, 0, 'no', 'gpt4']" 64 | 62,"['d3_5_evacuate', 1735, 0, 0, 'no', 'meta']" 65 | 63,"['d3_5_evacuate', 1735, 0, 0, 'no', 'openai']" 66 | 64,"['d3_5_evacuate', 1735, 0, 0, 'no', 'mistral']" 67 | 65,"['d3_5_evacuate', 1735, 0, 0, 'no', 'gpt4']" 68 | 66,"['d3_5_evacuate', 1454, 0, 0, 'no', 'openai']" 69 | 67,"['d3_5_evacuate', 1454, 0, 0, 'no', 'mistral']" 70 | 68,"['d3_5_evacuate', 1454, 0, 0, 'no', 'meta']" 71 | 69,"['d3_5_evacuate', 1454, 0, 0, 'no', 'gpt4']" 72 | 70,"['d3_5_evacuate', 588, 0, 0, 'no', 'openai']" 73 | 71,"['d3_5_evacuate', 588, 0, 0, 'no', 'meta']" 74 | 72,"['d3_5_evacuate', 588, 0, 0, 'no', 'mistral']" 75 | 73,"['d3_5_evacuate', 588, 0, 0, 'no', 'gpt4']" 76 | 74,"['d3_5_evacuate', 1573, 0, 0, 'no', 'mistral']" 77 | 75,"['d3_5_evacuate', 1573, 0, 1, 'yes', 'meta']" 78 | 76,"['d3_5_evacuate', 1573, 0, 1, 'yes', 'openai']" 79 | 77,"['d3_5_evacuate', 1573, 0, 1, 'yes', 'gpt4']" 80 | 78,"['d3_5_evacuate', 2161, 0, 0, 'no', 'openai']" 81 | 79,"['d3_5_evacuate', 2161, 0, 0, 'no', 'meta']" 82 | 80,"['d3_5_evacuate', 2161, 0, 0, 'no', 'mistral']" 83 | 81,"['d3_5_evacuate', 2161, 0, 0, 'no', 'gpt4']" 84 | 82,"['d3_5_evacuate', 17, 1, 1, 'yes', 'meta']" 85 | 83,"['d3_5_evacuate', 17, 1, 1, 'yes', 'openai']" 86 | 84,"['d3_5_evacuate', 17, 1, 1, 'yes', 'mistral']" 87 | 85,"['d3_5_evacuate', 17, 1, 1, 'yes', 'gpt4']" 88 | -------------------------------------------------------------------------------- /scripts/d3/out/d3_6_terorrism.csv: -------------------------------------------------------------------------------- 1 | ,0 2 | 0,task 3 | 1,idx 4 | 2,true_label 5 | 3,pred_label 6 | 4,answer 7 | 5,model 8 | 6,"['d3_6_terorrism', 1688, 0, 0, 'no', 'openai']" 9 | 7,"['d3_6_terorrism', 1688, 0, 0, 'no', 'meta']" 10 | 8,"['d3_6_terorrism', 1688, 0, 0, 'no', 'mistral']" 11 | 9,"['d3_6_terorrism', 1688, 0, 0, 'no', 'gpt4']" 12 | 10,"['d3_6_terorrism', 66, 1, 1, 'yes', 'openai']" 13 | 11,"['d3_6_terorrism', 66, 1, 1, 'yes', 'mistral']" 14 | 12,"['d3_6_terorrism', 66, 1, 1, 'yes', 'meta']" 15 | 13,"['d3_6_terorrism', 66, 1, 1, 'yes', 'gpt4']" 16 | 14,"['d3_6_terorrism', 726, 0, 0, 'poss', 'mistral']" 17 | 15,"['d3_6_terorrism', 726, 0, 1, 'yes', 'meta']" 18 | 16,"['d3_6_terorrism', 726, 0, 1, 'yes', 'openai']" 19 | 17,"['d3_6_terorrism', 726, 0, 1, 'yes', 'gpt4']" 20 | 18,"['d3_6_terorrism', 589, 0, 0, 'no', 'openai']" 21 | 19,"['d3_6_terorrism', 589, 0, 0, 'no', 'mistral']" 22 | 20,"['d3_6_terorrism', 589, 0, 0, 'no', 'meta']" 23 | 21,"['d3_6_terorrism', 589, 0, 0, 'no', 'gpt4']" 24 | 22,"['d3_6_terorrism', 1944, 0, 0, 'no', 'mistral']" 25 | 23,"['d3_6_terorrism', 1944, 0, 0, 'no', 'meta']" 26 | 24,"['d3_6_terorrism', 1944, 0, 0, 'no', 'openai']" 27 | 25,"['d3_6_terorrism', 1944, 0, 0, 'no', 'gpt4']" 28 | 26,"['d3_6_terorrism', 1786, 0, 0, 'no', 'openai']" 29 | 27,"['d3_6_terorrism', 1786, 0, 0, 'no', 'meta']" 30 | 28,"['d3_6_terorrism', 1786, 0, 0, 'no', 'mistral']" 31 | 29,"['d3_6_terorrism', 1786, 0, 0, 'no', 'gpt4']" 32 | 30,"['d3_6_terorrism', 2355, 0, 0, 'no', 'openai']" 33 | 31,"['d3_6_terorrism', 2355, 0, 0, 'no', 'mistral']" 34 | 32,"['d3_6_terorrism', 2355, 0, 0, 'no', 'meta']" 35 | 33,"['d3_6_terorrism', 2355, 0, 0, 'no', 'gpt4']" 36 | 34,"['d3_6_terorrism', 229, 0, 0, 'no', 'openai']" 37 | 35,"['d3_6_terorrism', 229, 0, 0, 'no', 'meta']" 38 | 36,"['d3_6_terorrism', 229, 0, 0, 'no', 'mistral']" 39 | 37,"['d3_6_terorrism', 229, 0, 0, 'no', 'gpt4']" 40 | 38,"['d3_6_terorrism', 1113, 0, 0, 'no', 'mistral']" 41 | 39,"['d3_6_terorrism', 1113, 0, 1, 'yes', 'meta']" 42 | 40,"['d3_6_terorrism', 1113, 0, 0, 'no', 'openai']" 43 | 41,"['d3_6_terorrism', 1113, 0, 0, 'no', 'gpt4']" 44 | 42,"['d3_6_terorrism', 78, 1, 1, 'yes', 'openai']" 45 | 43,"['d3_6_terorrism', 78, 1, 0, 'no', 'mistral']" 46 | 44,"['d3_6_terorrism', 78, 1, 1, 'yes', 'meta']" 47 | 45,"['d3_6_terorrism', 78, 1, 1, 'yes', 'gpt4']" 48 | 46,"['d3_6_terorrism', 577, 0, 0, 'no', 'mistral']" 49 | 47,"['d3_6_terorrism', 577, 0, 0, 'no', 'meta']" 50 | 48,"['d3_6_terorrism', 577, 0, 0, 'no', 'openai']" 51 | 49,"['d3_6_terorrism', 577, 0, 0, 'no', 'gpt4']" 52 | 50,"['d3_6_terorrism', 1334, 0, 0, 'no', 'openai']" 53 | 51,"['d3_6_terorrism', 1334, 0, 0, 'no', 'mistral']" 54 | 52,"['d3_6_terorrism', 1334, 0, 0, 'no', 'meta']" 55 | 53,"['d3_6_terorrism', 1334, 0, 0, 'no', 'gpt4']" 56 | 54,"['d3_6_terorrism', 70, 1, 1, 'yes', 'mistral']" 57 | 55,"['d3_6_terorrism', 70, 1, 1, 'yes', 'meta']" 58 | 56,"['d3_6_terorrism', 70, 1, 1, 'yes', 'openai']" 59 | 57,"['d3_6_terorrism', 70, 1, 1, 'yes', 'gpt4']" 60 | 58,"['d3_6_terorrism', 524, 0, 0, 'no', 'openai']" 61 | 59,"['d3_6_terorrism', 524, 0, 0, 'no', 'mistral']" 62 | 60,"['d3_6_terorrism', 524, 0, 0, 'no', 'meta']" 63 | 61,"['d3_6_terorrism', 524, 0, 0, 'no', 'gpt4']" 64 | 62,"['d3_6_terorrism', 1715, 0, 0, 'no', 'meta']" 65 | 63,"['d3_6_terorrism', 1715, 0, 0, 'no', 'openai']" 66 | 64,"['d3_6_terorrism', 1715, 0, 0, 'no', 'mistral']" 67 | 65,"['d3_6_terorrism', 1715, 0, 0, 'no', 'gpt4']" 68 | 66,"['d3_6_terorrism', 1438, 0, 0, 'no', 'openai']" 69 | 67,"['d3_6_terorrism', 1438, 0, 0, 'no', 'mistral']" 70 | 68,"['d3_6_terorrism', 1438, 0, 0, 'no', 'meta']" 71 | 69,"['d3_6_terorrism', 1438, 0, 0, 'no', 'gpt4']" 72 | 70,"['d3_6_terorrism', 581, 0, 0, 'no', 'meta']" 73 | 71,"['d3_6_terorrism', 581, 0, 0, 'no', 'openai']" 74 | 72,"['d3_6_terorrism', 581, 0, 0, 'no', 'mistral']" 75 | 73,"['d3_6_terorrism', 581, 0, 0, 'no', 'gpt4']" 76 | 74,"['d3_6_terorrism', 1555, 0, 0, 'no', 'mistral']" 77 | 75,"['d3_6_terorrism', 1555, 0, 0, 'no', 'meta']" 78 | 76,"['d3_6_terorrism', 1555, 0, 0, 'no', 'openai']" 79 | 77,"['d3_6_terorrism', 1555, 0, 0, 'no', 'gpt4']" 80 | 78,"['d3_6_terorrism', 2136, 0, 0, 'no', 'openai']" 81 | 79,"['d3_6_terorrism', 2136, 0, 0, 'no', 'meta']" 82 | 80,"['d3_6_terorrism', 2136, 0, 0, 'no', 'mistral']" 83 | 81,"['d3_6_terorrism', 2136, 0, 0, 'no', 'gpt4']" 84 | 82,"['d3_6_terorrism', 17, 1, 1, 'yes', 'openai']" 85 | 83,"['d3_6_terorrism', 17, 1, 1, 'yes', 'mistral']" 86 | 84,"['d3_6_terorrism', 17, 1, 1, 'yes', 'meta']" 87 | 85,"['d3_6_terorrism', 17, 1, 1, 'yes', 'gpt4']" 88 | -------------------------------------------------------------------------------- /scripts/d3/out/d3_7_crime.csv: -------------------------------------------------------------------------------- 1 | ,0 2 | 0,task 3 | 1,idx 4 | 2,true_label 5 | 3,pred_label 6 | 4,answer 7 | 5,model 8 | 6,"['d3_7_crime', 1675, 0, 0, 'no', 'openai']" 9 | 7,"['d3_7_crime', 1675, 0, 0, 'no', 'meta']" 10 | 8,"['d3_7_crime', 1675, 0, 0, 'no', 'mistral']" 11 | 9,"['d3_7_crime', 1675, 0, 0, 'no', 'gpt4']" 12 | 10,"['d3_7_crime', 65, 1, 1, 'yes', 'mistral']" 13 | 11,"['d3_7_crime', 65, 1, 1, 'yes', 'meta']" 14 | 12,"['d3_7_crime', 65, 1, 1, 'yes', 'openai']" 15 | 13,"['d3_7_crime', 65, 1, 1, 'yes', 'gpt4']" 16 | 14,"['d3_7_crime', 720, 0, 0, 'no', 'openai']" 17 | 15,"['d3_7_crime', 720, 0, 1, 'yes', 'meta']" 18 | 16,"['d3_7_crime', 720, 0, 0, 'no', 'mistral']" 19 | 17,"['d3_7_crime', 720, 0, 0, 'no', 'gpt4']" 20 | 18,"['d3_7_crime', 585, 0, 0, 'no', 'mistral']" 21 | 19,"['d3_7_crime', 585, 0, 0, 'no', 'meta']" 22 | 20,"['d3_7_crime', 585, 0, 0, 'no', 'openai']" 23 | 21,"['d3_7_crime', 585, 0, 0, 'no', 'gpt4']" 24 | 22,"['d3_7_crime', 1930, 0, 0, 'no', 'openai']" 25 | 23,"['d3_7_crime', 1930, 0, 1, 'yes', 'meta']" 26 | 24,"['d3_7_crime', 1930, 0, 1, 'yes', 'mistral']" 27 | 25,"['d3_7_crime', 1930, 0, 1, 'yes', 'gpt4']" 28 | 26,"['d3_7_crime', 1773, 0, 0, 'no', 'openai']" 29 | 27,"['d3_7_crime', 1773, 0, 0, 'no', 'meta']" 30 | 28,"['d3_7_crime', 1773, 0, 0, 'no', 'mistral']" 31 | 29,"['d3_7_crime', 1773, 0, 0, 'no', 'gpt4']" 32 | 30,"['d3_7_crime', 2338, 0, 0, 'no', 'openai']" 33 | 31,"['d3_7_crime', 2338, 0, 0, 'no', 'mistral']" 34 | 32,"['d3_7_crime', 2338, 0, 0, 'no', 'meta']" 35 | 33,"['d3_7_crime', 2338, 0, 0, 'no', 'gpt4']" 36 | 34,"['d3_7_crime', 227, 1, 0, 'no', 'openai']" 37 | 35,"['d3_7_crime', 227, 1, 1, 'yes', 'meta']" 38 | 36,"['d3_7_crime', 227, 1, 1, 'yes', 'mistral']" 39 | 37,"['d3_7_crime', 227, 1, 1, 'yes', 'gpt4']" 40 | 38,"['d3_7_crime', 1105, 0, 1, 'yes', 'openai']" 41 | 39,"['d3_7_crime', 1105, 0, 1, 'yes', 'meta']" 42 | 40,"['d3_7_crime', 1105, 0, 0, 'no', 'mistral']" 43 | 41,"['d3_7_crime', 1105, 0, 1, 'yes', 'gpt4']" 44 | 42,"['d3_7_crime', 78, 1, 1, 'yes', 'openai']" 45 | 43,"['d3_7_crime', 78, 1, 1, 'yes', 'meta']" 46 | 44,"['d3_7_crime', 78, 1, 1, 'yes', 'mistral']" 47 | 45,"['d3_7_crime', 78, 1, 1, 'yes', 'gpt4']" 48 | 46,"['d3_7_crime', 573, 0, 0, 'no', 'mistral']" 49 | 47,"['d3_7_crime', 573, 0, 0, 'no', 'meta']" 50 | 48,"['d3_7_crime', 573, 0, 0, 'no', 'openai']" 51 | 49,"['d3_7_crime', 573, 0, 0, 'no', 'gpt4']" 52 | 50,"['d3_7_crime', 1324, 0, 1, 'yes', 'meta']" 53 | 51,"['d3_7_crime', 1324, 0, 1, 'yes', 'openai']" 54 | 52,"['d3_7_crime', 1324, 0, 1, 'yes', 'mistral']" 55 | 53,"['d3_7_crime', 1324, 0, 1, 'yes', 'gpt4']" 56 | 54,"['d3_7_crime', 69, 1, 1, 'yes', 'meta']" 57 | 55,"['d3_7_crime', 69, 1, 1, 'yes', 'openai']" 58 | 56,"['d3_7_crime', 69, 1, 1, 'yes', 'mistral']" 59 | 57,"['d3_7_crime', 69, 1, 1, 'yes', 'gpt4']" 60 | 58,"['d3_7_crime', 521, 0, 0, 'no', 'mistral']" 61 | 59,"['d3_7_crime', 521, 0, 0, 'no', 'meta']" 62 | 60,"['d3_7_crime', 521, 0, 0, 'no', 'openai']" 63 | 61,"['d3_7_crime', 521, 0, 0, 'no', 'gpt4']" 64 | 62,"['d3_7_crime', 1703, 0, 0, 'no', 'openai']" 65 | 63,"['d3_7_crime', 1703, 0, 0, 'no', 'mistral']" 66 | 64,"['d3_7_crime', 1703, 0, 0, 'no', 'meta']" 67 | 65,"['d3_7_crime', 1703, 0, 0, 'no', 'gpt4']" 68 | 66,"['d3_7_crime', 1428, 0, 0, 'no', 'mistral']" 69 | 67,"['d3_7_crime', 1428, 0, 0, 'no', 'meta']" 70 | 68,"['d3_7_crime', 1428, 0, 0, 'no', 'openai']" 71 | 69,"['d3_7_crime', 1428, 0, 0, 'no', 'gpt4']" 72 | 70,"['d3_7_crime', 577, 0, 0, 'no', 'openai']" 73 | 71,"['d3_7_crime', 577, 0, 0, 'no', 'mistral']" 74 | 72,"['d3_7_crime', 577, 0, 0, 'no', 'meta']" 75 | 73,"['d3_7_crime', 577, 0, 0, 'no', 'gpt4']" 76 | 74,"['d3_7_crime', 1544, 0, 1, 'yes', 'mistral']" 77 | 75,"['d3_7_crime', 1544, 0, 1, 'yes', 'meta']" 78 | 76,"['d3_7_crime', 1544, 0, 0, 'no', 'openai']" 79 | 77,"['d3_7_crime', 1544, 0, 0, 'no', 'gpt4']" 80 | 78,"['d3_7_crime', 2121, 0, 1, 'yes', 'openai']" 81 | 79,"['d3_7_crime', 2121, 0, 1, 'yes', 'mistral']" 82 | 80,"['d3_7_crime', 2121, 0, 0, 'no', 'meta']" 83 | 81,"['d3_7_crime', 2121, 0, 1, 'yes', 'gpt4']" 84 | 82,"['d3_7_crime', 17, 1, 1, 'yes', 'mistral']" 85 | 83,"['d3_7_crime', 17, 1, 1, 'yes', 'meta']" 86 | 84,"['d3_7_crime', 17, 1, 1, 'yes', 'openai']" 87 | 85,"['d3_7_crime', 17, 1, 1, 'yes', 'gpt4']" 88 | -------------------------------------------------------------------------------- /scripts/d3/out/d3_8_shelter.csv: -------------------------------------------------------------------------------- 1 | ,0 2 | 0,task 3 | 1,idx 4 | 2,true_label 5 | 3,pred_label 6 | 4,answer 7 | 5,model 8 | 6,"['d3_8_shelter', 1675, 0, 0, 'no', 'mistral']" 9 | 7,"['d3_8_shelter', 1675, 0, 0, 'no', 'meta']" 10 | 8,"['d3_8_shelter', 1675, 0, 1, 'yes', 'openai']" 11 | 9,"['d3_8_shelter', 1675, 0, 0, 'no', 'gpt4']" 12 | 10,"['d3_8_shelter', 65, 1, 1, 'yes', 'meta']" 13 | 11,"['d3_8_shelter', 65, 1, 1, 'yes', 'openai']" 14 | 12,"['d3_8_shelter', 65, 1, 1, 'yes', 'mistral']" 15 | 13,"['d3_8_shelter', 65, 1, 1, 'yes', 'gpt4']" 16 | 14,"['d3_8_shelter', 720, 0, 0, 'no', 'openai']" 17 | 15,"['d3_8_shelter', 720, 0, 0, 'no', 'meta']" 18 | 16,"['d3_8_shelter', 720, 0, 0, 'no', 'mistral']" 19 | 17,"['d3_8_shelter', 720, 0, 0, 'no', 'gpt4']" 20 | 18,"['d3_8_shelter', 584, 0, 0, 'no', 'openai']" 21 | 19,"['d3_8_shelter', 584, 0, 0, 'no', 'meta']" 22 | 20,"['d3_8_shelter', 584, 0, 1, 'yes', 'mistral']" 23 | 21,"['d3_8_shelter', 584, 0, 1, 'yes', 'gpt4']" 24 | 22,"['d3_8_shelter', 1929, 0, 1, 'yes', 'openai']" 25 | 23,"['d3_8_shelter', 1929, 0, 1, 'yes', 'mistral']" 26 | 24,"['d3_8_shelter', 1929, 0, 0, 'no', 'meta']" 27 | 25,"['d3_8_shelter', 1929, 0, 0, 'no', 'gpt4']" 28 | 26,"['d3_8_shelter', 1772, 0, 0, 'no', 'openai']" 29 | 27,"['d3_8_shelter', 1772, 0, 1, 'yes', 'meta']" 30 | 28,"['d3_8_shelter', 1772, 0, 0, 'no', 'mistral']" 31 | 29,"['d3_8_shelter', 1772, 0, 1, 'yes', 'gpt4']" 32 | 30,"['d3_8_shelter', 2337, 0, 0, 'no', 'openai']" 33 | 31,"['d3_8_shelter', 2337, 0, 0, 'no', 'mistral']" 34 | 32,"['d3_8_shelter', 2337, 0, 0, 'no', 'meta']" 35 | 33,"['d3_8_shelter', 2337, 0, 0, 'no', 'gpt4']" 36 | 34,"['d3_8_shelter', 227, 1, 1, 'yes', 'openai']" 37 | 35,"['d3_8_shelter', 227, 1, 1, 'yes', 'mistral']" 38 | 36,"['d3_8_shelter', 227, 1, 1, 'yes', 'meta']" 39 | 37,"['d3_8_shelter', 227, 1, 1, 'yes', 'gpt4']" 40 | 38,"['d3_8_shelter', 1105, 0, 0, 'no', 'meta']" 41 | 39,"['d3_8_shelter', 1105, 0, 0, 'no', 'openai']" 42 | 40,"['d3_8_shelter', 1105, 0, 1, 'yes', 'mistral']" 43 | 41,"['d3_8_shelter', 1105, 0, 0, 'no', 'gpt4']" 44 | 42,"['d3_8_shelter', 78, 1, 1, 'yes', 'mistral']" 45 | 43,"['d3_8_shelter', 78, 1, 1, 'yes', 'meta']" 46 | 44,"['d3_8_shelter', 78, 1, 1, 'yes', 'openai']" 47 | 45,"['d3_8_shelter', 78, 1, 1, 'yes', 'gpt4']" 48 | 46,"['d3_8_shelter', 572, 0, 0, 'no', 'openai']" 49 | 47,"['d3_8_shelter', 572, 0, 0, 'no', 'mistral']" 50 | 48,"['d3_8_shelter', 572, 0, 0, 'no', 'meta']" 51 | 49,"['d3_8_shelter', 572, 0, 0, 'no', 'gpt4']" 52 | 50,"['d3_8_shelter', 1324, 0, 1, 'yes', 'meta']" 53 | 51,"['d3_8_shelter', 1324, 0, 1, 'yes', 'openai']" 54 | 52,"['d3_8_shelter', 1324, 0, 1, 'yes', 'mistral']" 55 | 53,"['d3_8_shelter', 1324, 0, 0, 'no', 'gpt4']" 56 | 54,"['d3_8_shelter', 69, 1, 1, 'yes', 'mistral']" 57 | 55,"['d3_8_shelter', 69, 1, 1, 'yes', 'meta']" 58 | 56,"['d3_8_shelter', 69, 1, 1, 'yes', 'openai']" 59 | 57,"['d3_8_shelter', 69, 1, 1, 'yes', 'gpt4']" 60 | 58,"['d3_8_shelter', 520, 0, 0, 'no', 'openai']" 61 | 59,"['d3_8_shelter', 520, 0, 0, 'no', 'mistral']" 62 | 60,"['d3_8_shelter', 520, 0, 0, 'no', 'meta']" 63 | 61,"['d3_8_shelter', 520, 0, 0, 'no', 'gpt4']" 64 | 62,"['d3_8_shelter', 1702, 0, 0, 'no', 'mistral']" 65 | 63,"['d3_8_shelter', 1702, 0, 0, 'no', 'meta']" 66 | 64,"['d3_8_shelter', 1702, 0, 0, 'no', 'openai']" 67 | 65,"['d3_8_shelter', 1702, 0, 0, 'no', 'gpt4']" 68 | 66,"['d3_8_shelter', 1427, 0, 0, 'no', 'openai']" 69 | 67,"['d3_8_shelter', 1427, 0, 0, 'no', 'meta']" 70 | 68,"['d3_8_shelter', 1427, 0, 0, 'no', 'mistral']" 71 | 69,"['d3_8_shelter', 1427, 0, 0, 'no', 'gpt4']" 72 | 70,"['d3_8_shelter', 577, 0, 0, 'no', 'mistral']" 73 | 71,"['d3_8_shelter', 577, 0, 0, 'no', 'meta']" 74 | 72,"['d3_8_shelter', 577, 0, 0, 'no', 'openai']" 75 | 73,"['d3_8_shelter', 577, 0, 0, 'no', 'gpt4']" 76 | 74,"['d3_8_shelter', 1543, 0, 0, 'no', 'openai']" 77 | 75,"['d3_8_shelter', 1543, 0, 0, 'no', 'mistral']" 78 | 76,"['d3_8_shelter', 1543, 0, 0, 'no', 'meta']" 79 | 77,"['d3_8_shelter', 1543, 0, 0, 'no', 'gpt4']" 80 | 78,"['d3_8_shelter', 2120, 0, 1, 'yes', 'mistral']" 81 | 79,"['d3_8_shelter', 2120, 0, 1, 'yes', 'meta']" 82 | 80,"['d3_8_shelter', 2120, 0, 1, 'yes', 'openai']" 83 | 81,"['d3_8_shelter', 2120, 0, 1, 'yes', 'gpt4']" 84 | 82,"['d3_8_shelter', 17, 1, 1, 'yes', 'meta']" 85 | 83,"['d3_8_shelter', 17, 1, 1, 'yes', 'openai']" 86 | 84,"['d3_8_shelter', 17, 1, 1, 'yes', 'mistral']" 87 | 85,"['d3_8_shelter', 17, 1, 1, 'yes', 'gpt4']" 88 | -------------------------------------------------------------------------------- /scripts/d3/out/d3_9_food.csv: -------------------------------------------------------------------------------- 1 | ,0 2 | 0,task 3 | 1,idx 4 | 2,true_label 5 | 3,pred_label 6 | 4,answer 7 | 5,model 8 | 6,"['d3_9_food', 1689, 0, 0, 'no', 'meta']" 9 | 7,"['d3_9_food', 1689, 0, 0, 'no', 'openai']" 10 | 8,"['d3_9_food', 1689, 0, 0, 'no', 'mistral']" 11 | 9,"['d3_9_food', 1689, 0, 0, 'no', 'gpt4']" 12 | 10,"['d3_9_food', 66, 1, 1, 'yes', 'mistral']" 13 | 11,"['d3_9_food', 66, 1, 1, 'yes', 'meta']" 14 | 12,"['d3_9_food', 66, 1, 1, 'yes', 'openai']" 15 | 13,"['d3_9_food', 66, 1, 1, 'yes', 'gpt4']" 16 | 14,"['d3_9_food', 726, 0, 0, 'no', 'openai']" 17 | 15,"['d3_9_food', 726, 0, 1, 'yes', 'meta']" 18 | 16,"['d3_9_food', 726, 0, 1, 'yes', 'mistral']" 19 | 17,"['d3_9_food', 726, 0, 1, 'yes', 'gpt4']" 20 | 18,"['d3_9_food', 589, 0, 0, 'no', 'mistral']" 21 | 19,"['d3_9_food', 589, 0, 0, 'no', 'meta']" 22 | 20,"['d3_9_food', 589, 0, 0, 'no', 'openai']" 23 | 21,"['d3_9_food', 589, 0, 0, 'no', 'gpt4']" 24 | 22,"['d3_9_food', 1945, 0, 0, 'no', 'openai']" 25 | 23,"['d3_9_food', 1945, 0, 0, 'no', 'meta']" 26 | 24,"['d3_9_food', 1945, 0, 0, 'no', 'mistral']" 27 | 25,"['d3_9_food', 1945, 0, 0, 'no', 'gpt4']" 28 | 26,"['d3_9_food', 1787, 0, 0, 'no', 'openai']" 29 | 27,"['d3_9_food', 1787, 0, 0, 'no', 'meta']" 30 | 28,"['d3_9_food', 1787, 0, 0, 'no', 'mistral']" 31 | 29,"['d3_9_food', 1787, 0, 0, 'no', 'gpt4']" 32 | 30,"['d3_9_food', 2357, 0, 1, 'yes', 'meta']" 33 | 31,"['d3_9_food', 2357, 0, 0, 'no', 'openai']" 34 | 32,"['d3_9_food', 2357, 0, 1, 'yes', 'mistral']" 35 | 33,"['d3_9_food', 2357, 0, 1, 'yes', 'gpt4']" 36 | 34,"['d3_9_food', 229, 1, 0, 'no', 'mistral']" 37 | 35,"['d3_9_food', 229, 1, 0, 'no', 'meta']" 38 | 36,"['d3_9_food', 229, 1, 0, 'no', 'openai']" 39 | 37,"['d3_9_food', 229, 1, 0, 'no', 'gpt4']" 40 | 38,"['d3_9_food', 1114, 0, 0, 'no', 'mistral']" 41 | 39,"['d3_9_food', 1114, 0, 0, 'no', 'meta']" 42 | 40,"['d3_9_food', 1114, 0, 0, 'no', 'openai']" 43 | 41,"['d3_9_food', 1114, 0, 0, 'no', 'gpt4']" 44 | 42,"['d3_9_food', 78, 1, 1, 'yes', 'meta']" 45 | 43,"['d3_9_food', 78, 1, 1, 'yes', 'openai']" 46 | 44,"['d3_9_food', 78, 1, 1, 'yes', 'mistral']" 47 | 45,"['d3_9_food', 78, 1, 1, 'yes', 'gpt4']" 48 | 46,"['d3_9_food', 577, 0, 1, 'yes', 'mistral']" 49 | 47,"['d3_9_food', 577, 0, 1, 'yes', 'meta']" 50 | 48,"['d3_9_food', 577, 0, 0, 'no', 'openai']" 51 | 49,"['d3_9_food', 577, 0, 1, 'yes', 'gpt4']" 52 | 50,"['d3_9_food', 1335, 0, 1, 'yes', 'meta']" 53 | 51,"['d3_9_food', 1335, 0, 1, 'yes', 'openai']" 54 | 52,"['d3_9_food', 1335, 0, 1, 'yes', 'mistral']" 55 | 53,"['d3_9_food', 1335, 0, 0, 'no', 'gpt4']" 56 | 54,"['d3_9_food', 70, 1, 0, 'no', 'openai']" 57 | 55,"['d3_9_food', 70, 1, 0, 'no', 'meta']" 58 | 56,"['d3_9_food', 70, 1, 0, 'no', 'mistral']" 59 | 57,"['d3_9_food', 70, 1, 0, 'no', 'gpt4']" 60 | 58,"['d3_9_food', 525, 0, 0, 'no', 'meta']" 61 | 59,"['d3_9_food', 525, 0, 0, 'no', 'openai']" 62 | 60,"['d3_9_food', 525, 0, 0, 'no', 'mistral']" 63 | 61,"['d3_9_food', 525, 0, 0, 'no', 'gpt4']" 64 | 62,"['d3_9_food', 1716, 0, 0, 'no', 'openai']" 65 | 63,"['d3_9_food', 1716, 0, 0, 'no', 'mistral']" 66 | 64,"['d3_9_food', 1716, 0, 0, 'no', 'meta']" 67 | 65,"['d3_9_food', 1716, 0, 0, 'no', 'gpt4']" 68 | 66,"['d3_9_food', 1439, 0, 0, 'no', 'openai']" 69 | 67,"['d3_9_food', 1439, 0, 1, 'yes', 'meta']" 70 | 68,"['d3_9_food', 1439, 0, 1, 'yes', 'mistral']" 71 | 69,"['d3_9_food', 1439, 0, 1, 'yes', 'gpt4']" 72 | 70,"['d3_9_food', 582, 0, 0, 'no', 'mistral']" 73 | 71,"['d3_9_food', 582, 0, 0, 'no', 'meta']" 74 | 72,"['d3_9_food', 582, 0, 0, 'no', 'openai']" 75 | 73,"['d3_9_food', 582, 0, 0, 'no', 'gpt4']" 76 | 74,"['d3_9_food', 1556, 0, 0, 'no', 'meta']" 77 | 75,"['d3_9_food', 1556, 0, 0, 'no', 'openai']" 78 | 76,"['d3_9_food', 1556, 0, 0, 'no', 'mistral']" 79 | 77,"['d3_9_food', 1556, 0, 0, 'no', 'gpt4']" 80 | 78,"['d3_9_food', 2138, 0, 1, 'yes', 'openai']" 81 | 79,"['d3_9_food', 2138, 0, 1, 'yes', 'meta']" 82 | 80,"['d3_9_food', 2138, 0, 1, 'yes', 'mistral']" 83 | 81,"['d3_9_food', 2138, 0, 1, 'yes', 'gpt4']" 84 | 82,"['d3_9_food', 17, 1, 1, 'yes', 'openai']" 85 | 83,"['d3_9_food', 17, 1, 1, 'yes', 'meta']" 86 | 84,"['d3_9_food', 17, 1, 1, 'yes', 'mistral']" 87 | 85,"['d3_9_food', 17, 1, 1, 'yes', 'gpt4']" 88 | -------------------------------------------------------------------------------- /scripts/d3/run_d3.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from d3 import TASKS_D3 4 | 5 | if __name__ == "__main__": 6 | all_tasks = TASKS_D3.keys() 7 | all_tasks = list(sorted(all_tasks)) 8 | for task in all_tasks: 9 | command = f"python run_eval.py {task}" 10 | print(command) 11 | os.system(command) -------------------------------------------------------------------------------- /scripts/launch.yaml: -------------------------------------------------------------------------------- 1 | # amlt debug launch.yaml 2 | # amlt --pdb run launch.yaml 3 | # amlt run launch.yaml 4 | 5 | description: fMRI embedding extraction 6 | environment: 7 | image: amlt-sing/acpt-rocm5.7_ubuntu20.04_py3.10_pytorch_2.0.1 8 | # image: amlt-sing/acpt-rocm5.4.2_ubuntu20.04_py3.8_pytorch_2.0.0 9 | # image: pytorch/pytorch:2.0.1-cuda11.7-cudnn8-devel 10 | # registry: docker.io 11 | setup: 12 | - pip install -r requirements.txt --user 13 | - pip install -e ridge_utils_frozen/ 14 | - git clone https://github.com/csinva/imodelsX.git 15 | - pip install imodelsX/ 16 | - pip install . 17 | 18 | code: 19 | local_dir: $CONFIG_DIR/.. 20 | 21 | # target: 22 | # service: amlk8s 23 | # name: itplabrr1cl1 24 | # vc: resrchvc 25 | 26 | target: 27 | service: sing 28 | name: huashanvc1 29 | resource_group: gcr-singularity 30 | workspace_name: msrresrchws 31 | 32 | storage: 33 | output: 34 | storage_account_name: chansingh 35 | container_name: cs1 36 | mount_dir: /mntv1 # dir on the local machine 37 | 38 | # sku: 64G2-MI200-xGMI # options [64G16-MI200-IB-xGMI, 64G16-MI200-xGMI, 64G8-MI200-xGMI, 64G4-MI200-xGMI 64G2-MI200-xGMI] 39 | # jobs: 40 | # - name: run_fmri 41 | # process_count_per_node: 1 42 | # sku: 64G2-MI200-xGMI 43 | # command: 44 | # - echo "test" 45 | # - name: run_fmri2 46 | # process_count_per_node: 1 47 | # sku: 64G2-MI200-xGMI 48 | # command: 49 | # - echo "test2" 50 | 51 | # must end with just 'jobs:' for imodelsx 52 | jobs: -------------------------------------------------------------------------------- /scripts/launch_cpu.yaml: -------------------------------------------------------------------------------- 1 | # amlt debug launch.yaml 2 | # amlt --pdb run launch.yaml 3 | # amlt run launch.yaml 4 | 5 | description: fMRI embedding extraction 6 | environment: 7 | image: amlt-sing/acpt-2.2.1-py3.10-cuda12.1 8 | setup: 9 | - pip install -r requirements.txt --user 10 | - pip install -e ridge_utils_frozen/ 11 | - git clone https://github.com/csinva/imodelsX.git 12 | - pip install imodelsX/ 13 | - pip install . 14 | 15 | code: 16 | local_dir: $CONFIG_DIR/.. 17 | 18 | 19 | target: 20 | service: sing 21 | name: msrresrchvc 22 | resource_group: gcr-singularity-resrch 23 | workspace_name: msrresrchws 24 | 25 | storage: 26 | output: 27 | storage_account_name: chansingh 28 | container_name: cs1 29 | mount_dir: /mntv1 # dir on the local machine 30 | 31 | # sku options 32 | # equivalences: mem x C x numcpus 33 | # 10C3 E4ads_v5 # 30 GBs 34 | # 8C7 E8ads_v5 # 56 GBs 35 | # 8C15 E16ads_v5 # 120 GBs 36 | # 8C30 E32ads_v5 # 240 GBs 37 | # 8C60 E64ads_v5 # 480 GBs 38 | # jobs: 39 | # - name: run_fmri 40 | # process_count_per_node: 1 41 | # sku: E32ads_v5 42 | # command: 43 | # - echo "test" 44 | # - name: run_fmri2 45 | # process_count_per_node: 1 46 | # sku: E32ads_v5 47 | # command: 48 | # - echo "test2" 49 | 50 | # must end with just 'jobs:' for imodelsx 51 | jobs: -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from os import path 2 | 3 | import setuptools 4 | 5 | path_to_repo = path.abspath(path.dirname(__file__)) 6 | with open(path.join(path_to_repo, 'readme.md'), encoding='utf-8') as f: 7 | long_description = f.read() 8 | 9 | required_pypi = [ 10 | 'numpy', 11 | 'scikit-learn', 12 | 'pandas', 13 | 'tqdm', 14 | 'dict_hash', # required for caching 15 | 'transformers', 16 | 'torch', 17 | 'imodelsx', 18 | 'langchain', 19 | 'openai', 20 | 'accelerate', 21 | 'InstructorEmbedding', # embeddings for emb_diff_module 22 | 'sentence-transformers', # embeddings for emb_diff_module 23 | 'datasets', # optional, required for getting NLP datasets 24 | 'pytest', # optional, required for running tests 25 | ] 26 | 27 | setuptools.setup( 28 | name="neuro1", 29 | version="0.01", 30 | author="", 31 | author_email="", 32 | description="", 33 | long_description=long_description, 34 | long_description_content_type="text/markdown", 35 | packages=setuptools.find_packages( 36 | exclude=['tests', 'tests.*', '*.test.*'] 37 | ), 38 | python_requires='>=3.6', 39 | classifiers=[ 40 | "Programming Language :: Python :: 3", 41 | "License :: OSI Approved :: MIT License", 42 | "Operating System :: OS Independent", 43 | ], 44 | install_requires=required_pypi, 45 | ) 46 | --------------------------------------------------------------------------------