├── .gitignore ├── .pre-commit-config.yaml ├── LICENSE ├── Makefile ├── README.md ├── evaluation ├── __init__.py ├── classification_benchmark.py ├── evaluation.py ├── pearl │ ├── __init__.py │ ├── eval.py │ ├── pearl.py │ └── probing.py ├── utils.py └── wordsim │ ├── __init__.py │ ├── data │ ├── all_vocab.txt │ ├── card_660.txt │ ├── men.txt │ ├── mturk_771.txt │ ├── rel353.txt │ ├── rw.txt │ ├── simLex.txt │ └── simverb_3500.txt │ ├── tasks.py │ └── wordsim.py ├── pyproject.toml ├── requirements ├── requirements-linux.txt └── requirements-macos.txt ├── tests ├── conftest.py ├── test_evaluation.py └── test_summarize.py └── uv.lock /.gitignore: -------------------------------------------------------------------------------- 1 | # User specific files 2 | local/ 3 | lightning_logs/ 4 | results/ 5 | .vscode/ 6 | .DS_store 7 | 8 | # Byte-compiled / optimized / DLL files 9 | __pycache__/ 10 | *.py[cod] 11 | *$py.class 12 | 13 | # C extensions 14 | *.so 15 | 16 | # Distribution / packaging 17 | .Python 18 | build/ 19 | develop-eggs/ 20 | dist/ 21 | downloads/ 22 | eggs/ 23 | .eggs/ 24 | lib/ 25 | lib64/ 26 | parts/ 27 | sdist/ 28 | var/ 29 | wheels/ 30 | share/python-wheels/ 31 | *.egg-info/ 32 | .installed.cfg 33 | *.egg 34 | MANIFEST 35 | 36 | # PyInstaller 37 | # Usually these files are written by a python script from a template 38 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 39 | *.manifest 40 | *.spec 41 | 42 | # Installer logs 43 | pip-log.txt 44 | pip-delete-this-directory.txt 45 | 46 | # Unit test / coverage reports 47 | htmlcov/ 48 | .tox/ 49 | .nox/ 50 | .coverage 51 | .coverage.* 52 | .cache 53 | nosetests.xml 54 | coverage.xml 55 | *.cover 56 | *.py,cover 57 | .hypothesis/ 58 | .pytest_cache/ 59 | cover/ 60 | 61 | # Translations 62 | *.mo 63 | *.pot 64 | 65 | # Django stuff: 66 | *.log 67 | local_settings.py 68 | db.sqlite3 69 | db.sqlite3-journal 70 | 71 | # Flask stuff: 72 | instance/ 73 | .webassets-cache 74 | 75 | # Scrapy stuff: 76 | .scrapy 77 | 78 | # Sphinx documentation 79 | docs/_build/ 80 | 81 | # PyBuilder 82 | .pybuilder/ 83 | target/ 84 | 85 | # Jupyter Notebook 86 | .ipynb_checkpoints 87 | 88 | # IPython 89 | profile_default/ 90 | ipython_config.py 91 | 92 | # pyenv 93 | # For a library or package, you might want to ignore these files since the code is 94 | # intended to run in multiple environments; otherwise, check them in: 95 | # .python-version 96 | 97 | # pipenv 98 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 99 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 100 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 101 | # install all needed dependencies. 102 | #Pipfile.lock 103 | 104 | # poetry 105 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 106 | # This is especially recommended for binary packages to ensure reproducibility, and is more 107 | # commonly ignored for libraries. 108 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 109 | #poetry.lock 110 | 111 | # pdm 112 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 113 | #pdm.lock 114 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 115 | # in version control. 116 | # https://pdm.fming.dev/latest/usage/project/#working-with-version-control 117 | .pdm.toml 118 | .pdm-python 119 | .pdm-build/ 120 | 121 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 122 | __pypackages__/ 123 | 124 | # Celery stuff 125 | celerybeat-schedule 126 | celerybeat.pid 127 | 128 | # SageMath parsed files 129 | *.sage.py 130 | 131 | # Environments 132 | .env 133 | .venv 134 | env/ 135 | venv/ 136 | ENV/ 137 | env.bak/ 138 | venv.bak/ 139 | 140 | # Spyder project settings 141 | .spyderproject 142 | .spyproject 143 | 144 | # Rope project settings 145 | .ropeproject 146 | 147 | # mkdocs documentation 148 | /site 149 | 150 | # mypy 151 | .mypy_cache/ 152 | .dmypy.json 153 | dmypy.json 154 | 155 | # Pyre type checker 156 | .pyre/ 157 | 158 | # pytype static type analyzer 159 | .pytype/ 160 | 161 | # Cython debug symbols 162 | cython_debug/ 163 | 164 | # PyCharm 165 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 166 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 167 | # and can be added to the global gitignore or merged into this file. For a more nuclear 168 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 169 | #.idea/ 170 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | # See https://pre-commit.com for more information 2 | # See https://pre-commit.com/hooks.html for more hooks 3 | repos: 4 | - repo: https://github.com/pre-commit/pre-commit-hooks 5 | rev: v4.4.0 6 | hooks: 7 | - id: check-ast 8 | description: Simply check whether files parse as valid python. 9 | - id: trailing-whitespace 10 | description: Trims trailing whitespace 11 | - id: end-of-file-fixer 12 | description: Makes sure files end in a newline and only a newline. 13 | - id: check-added-large-files 14 | description: Prevent giant files from being committed. 15 | - id: check-case-conflict 16 | description: Check for files with names that would conflict on case-insensitive filesystems like MacOS/Windows. 17 | - repo: https://github.com/astral-sh/ruff-pre-commit 18 | rev: v0.4.10 19 | hooks: 20 | - id: ruff 21 | args: [ --fix ] 22 | - id: ruff-format 23 | - repo: https://github.com/jsh9/pydoclint 24 | rev: 0.5.3 25 | hooks: 26 | - id: pydoclint 27 | - repo: local 28 | hooks: 29 | - id: mypy 30 | name: mypy 31 | entry: mypy 32 | language: python 33 | types: [python] 34 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2024 The Minish Lab 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | install: 2 | uv sync --all-extras 3 | uv run pre-commit install 4 | 5 | fix: 6 | uv run pre-commit run --all-files 7 | 8 | test: 9 | uv run pytest --cov=model2vec --cov-report=term-missing 10 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Evaluation 2 | 3 | This repository can be used to evaluate word embeddings on several tasks. All tasks are implemented as [MTEB](https://github.com/embeddings-benchmark/mteb) tasks and can be run using the same interface. 4 | 5 | ## Usage 6 | 7 | To run the evaluation on all available tasks and summarize the results, the following code can be used: 8 | 9 | ```python 10 | from sentence_transformers import SentenceTransformer 11 | 12 | from evaluation import CustomMTEB, get_tasks, parse_mteb_results, make_leaderboard, summarize_results 13 | 14 | # Define the model name 15 | model_name = "average_word_embeddings_glove.6B.300d" 16 | 17 | # Get all available tasks 18 | tasks = get_tasks() 19 | # Define the CustomMTEB object with the specified tasks 20 | evaluation = CustomMTEB(tasks=tasks) 21 | model = SentenceTransformer(model_name) 22 | results = evaluation.run(model, eval_splits=["test"], output_folder=f"results/{model_name}") 23 | 24 | # Parse the results and summarize them 25 | parsed_results = parse_mteb_results(mteb_results=results, model_name=model_name) 26 | task_scores = summarize_results(parsed_results) 27 | # Print the results in a leaderboard format 28 | print(make_leaderboard(task_scores)) 29 | ``` 30 | 31 | This will print a markdown table similar to the [MTEB leaderboard](https://huggingface.co/spaces/mteb/leaderboard), e.g.: 32 | 33 | ``` 34 | | Model | Average (All) | Average (MTEB) | Classification | Clustering | PairClassification | Reranking | Retrieval | STS | Summarization | PEARL | WordSim | 35 | |:-----------------|----------------:|-----------------:|-----------------:|-------------:|---------------------:|------------:|------------:|-------:|----------------:|--------:|----------:| 36 | | GloVe_300d | 42.84 | 42.36 | 57.31 | 27.66 | 72.48 | 43.3 | 22.78 | 61.9 | 28.81 | 45.65 | 43.05 | 37 | ``` 38 | 39 | Alternatively, the evaluation can be run on a subset of tasks by specifying the task types: 40 | 41 | ```python 42 | from evaluation import CustomMTEB, get_tasks, TaskType 43 | from sentence_transformers import SentenceTransformer 44 | 45 | # Define the model name 46 | model_name = "average_word_embeddings_glove.6B.300d" 47 | 48 | # Get the specified tasks, in this case the classification and wordsim tasks 49 | task_types = [TaskType.CLASSIFICATION, TaskType.WORDSIM] 50 | tasks = get_tasks(task_types=task_types) 51 | 52 | # Define the CustomMTEB object with the specified tasks 53 | evaluation = CustomMTEB(tasks=tasks) 54 | # Run the rest of the evaluation and summarization as before 55 | ``` 56 | 57 | The following tasks are supported and can be used via the `TaskType` enum: 58 | ```python 59 | - CLASSIFICATION 60 | - CLUSTERING 61 | - PAIRCLASSIFICATION 62 | - RERANKING 63 | - RETRIEVAL 64 | - STS 65 | - SUMMARIZATION 66 | - WORDSIM 67 | - PEARL 68 | ``` 69 | Alternatively, the task types can also be specified as a list of strings, e.g. `task_types=["Classification", "WordSim"]`. 70 | 71 | Custom embedders can be used by implementing the [Encoder protocol](https://github.com/embeddings-benchmark/mteb/blob/main/mteb/encoder_interface.py#L12) from `MTEB`. 72 | 73 | ### Summarizing results 74 | 75 | The `summarize_results` function can be used to summarize results from an existing results folder, e.g.: 76 | 77 | ```python 78 | from evaluation import load_results, make_leaderboard, summarize_results 79 | 80 | # To summarize all models in a results folder: 81 | results = load_results("results/") 82 | task_scores = summarize_results(results) 83 | print(make_leaderboard(task_scores)) 84 | 85 | # To summarize a single model: 86 | results = load_results("results/average_word_embeddings_glove.6B.300d/") 87 | task_scores = summarize_results(results) 88 | print(make_leaderboard(task_scores)) 89 | ``` 90 | 91 | 92 | ## Supported Tasks 93 | All tasks from [MTEB](https://github.com/embeddings-benchmark/mteb) are supported: 94 | - Classification 95 | - Clustering 96 | - PairClassification 97 | - Reranking 98 | - Retrieval 99 | - STS 100 | - Summarization 101 | 102 | ### PEARL 103 | All tasks from the [PEARL paper](https://arxiv.org/pdf/2401.10407) benchmark are supported (PEARL codebase [here](https://github.com/tigerchen52/PEARL)): 104 | - Paraphrase Classification 105 | - Phrase Similarity 106 | - Entity Retrieval 107 | - Entity Clustering 108 | - Fuzzy Join 109 | 110 | ### WordSim 111 | A collection of single word similarity datasets are supported: 112 | - RareWord 113 | - MEN 114 | - SimLex 115 | - rel353 116 | - simverb 117 | - muturk 118 | - Card660 119 | -------------------------------------------------------------------------------- /evaluation/__init__.py: -------------------------------------------------------------------------------- 1 | from importlib.metadata import version 2 | 3 | from evaluation.evaluation import CustomMTEB, TaskType, get_tasks 4 | from evaluation.utils import load_results, make_leaderboard, parse_mteb_results, summarize_results 5 | 6 | __all__ = [ 7 | "CustomMTEB", 8 | "TaskType", 9 | "get_tasks", 10 | "load_results", 11 | "parse_mteb_results", 12 | "make_leaderboard", 13 | "summarize_results", 14 | ] 15 | __version__ = version("evaluation") # fetch version from install metadata 16 | -------------------------------------------------------------------------------- /evaluation/classification_benchmark.py: -------------------------------------------------------------------------------- 1 | import json 2 | import time 3 | from logging import getLogger 4 | from pathlib import Path 5 | 6 | import pandas as pd 7 | from datasets import Dataset, load_dataset 8 | from mteb.encoder_interface import Encoder 9 | from plotnine import aes, geom_point, ggplot, guides, scale_size, theme, theme_classic 10 | from sklearn.linear_model import LogisticRegression 11 | from sklearn.metrics import precision_recall_fscore_support 12 | from sklearn.pipeline import make_pipeline 13 | from sklearn.preprocessing import StandardScaler 14 | 15 | logger = getLogger(__name__) 16 | 17 | datasets = [ 18 | {"ds_name": "sst2", "text_name": "sentence", "label_name": "label", "type": "classification"}, 19 | {"ds_name": "imdb", "text_name": "text", "label_name": "label", "type": "classification"}, 20 | {"ds_name": "trec", "text_name": "text", "label_name": "coarse_label", "type": "classification"}, 21 | {"ds_name": "ag_news", "text_name": "text", "label_name": "label", "type": "classification"}, 22 | ] 23 | 24 | 25 | class ClassificationBenchmark: 26 | def __init__(self, encoder: Encoder, save_path: str) -> None: 27 | """ 28 | Initialize the classification benchmark. 29 | 30 | :param encoder: The encoder to use. Should be an implementation of an MTEB Encoder protocol. 31 | :param save_path: The path to save the results to. 32 | """ 33 | self.encoder = encoder 34 | # First check if the encoder has the 'mteb_model_meta' attribute, and if it does, check for 'name' 35 | if hasattr(encoder, "mteb_model_meta") and hasattr(encoder.mteb_model_meta, "name"): 36 | model_name = encoder.mteb_model_meta.name 37 | else: 38 | model_name = "no_model_name_available" 39 | logger.warning( 40 | "Encoder does not have a model name or mteb_model_meta attribute. Defaulting model name to 'no_model_name_available'." 41 | ) 42 | 43 | self.model_name = model_name 44 | self.save_path = Path(save_path) / f"{model_name}_classification_results.json" 45 | # Make sure the save directory exists 46 | self.save_path.parent.mkdir(parents=True, exist_ok=True) 47 | self.results: dict[str, dict] = {self.model_name: {}} 48 | 49 | def train_test_classification( 50 | self, encoder: Encoder, dataset: Dataset, text_name: str, label_name: str 51 | ) -> tuple[list[str], list[str], float]: 52 | """ 53 | Train and test a classification model for a specific encoder. 54 | 55 | :param encoder: The encoder to use. 56 | :param dataset: The dataset to use. 57 | :param text_name: The name of the text column in the dataset. 58 | :param label_name: The name of the label column in the dataset. 59 | :return: The predictions and labels. 60 | """ 61 | encode_time = 0.0 62 | model = make_pipeline(StandardScaler(), LogisticRegression(max_iter=1000)) 63 | split = dataset["train"].train_test_split(test_size=0.1, seed=42) 64 | s = time.time() 65 | X_train = encoder.encode(split["train"][text_name], show_progress_bar=True) 66 | encode_time += time.time() - s 67 | y_train = split["train"][label_name] 68 | 69 | s = time.time() 70 | X_dev = encoder.encode(split["test"][text_name], show_progress_bar=True) 71 | encode_time += time.time() - s 72 | y_dev = split["test"][label_name] 73 | 74 | model.fit(X_train, y_train) 75 | pred = model.predict(X_dev) 76 | 77 | return pred, y_dev, encode_time 78 | 79 | def run(self) -> None: 80 | """Run the classification benchmark.""" 81 | for dataset_config in datasets: 82 | ds_name = dataset_config["ds_name"] 83 | dataset = load_dataset(ds_name) 84 | 85 | logger.info(f"Evaluating {ds_name}") 86 | text_name = dataset_config["text_name"] 87 | label_name = dataset_config["label_name"] 88 | 89 | start_time = time.time() 90 | 91 | pred, gold, encode_time = self.train_test_classification(self.encoder, dataset, text_name, label_name) 92 | metrics = precision_recall_fscore_support(gold, pred, average="micro") 93 | runtime = time.time() - start_time 94 | 95 | self.results[self.model_name][ds_name] = { 96 | "dataset": ds_name, 97 | "main_score": metrics[2], # Main score 98 | "runtime": runtime, 99 | "encode_time": encode_time, 100 | "dataset_length": len(dataset["train"]), 101 | "samples_second": len(dataset["train"]) / encode_time, 102 | } 103 | 104 | # Save the results to a JSON file 105 | self.save_results(self.save_path) 106 | 107 | def save_results(self, save_path: Path) -> None: 108 | """Save the results to a JSON file.""" 109 | with open(save_path, "w") as file: 110 | json.dump(self.results, file, indent=4) 111 | 112 | 113 | def summarize_classification_results(results_path: str) -> pd.DataFrame: 114 | """ 115 | Summarize the results by generating a pandas DataFrame and an enhanced scatterplot. 116 | 117 | The bubble colors transition from grey (left, slower models) to green (right, faster models) 118 | using logarithmic scaling for a smoother gradient and more gradual transitions. 119 | 120 | :param results_path: Path to the directory containing the results JSON files. 121 | :return: A pandas DataFrame containing the results. 122 | """ 123 | result_files = Path(results_path).glob("*.json") 124 | 125 | data = [] 126 | model_averages = [] 127 | 128 | names = {"GloVe_300d": "GloVe 6B 300d"} 129 | 130 | # Process each file and extract the model name, dataset scores, and runtimes 131 | for file in result_files: 132 | with open(file, "r") as f: 133 | result_data = json.load(f) 134 | 135 | model_name = list(result_data.keys())[0] # Extract model name 136 | model_info = result_data[model_name] 137 | 138 | row = {"model": model_name} 139 | total_score = 0 140 | total_time = 0 141 | dataset_count = 0 142 | total_samples = 0 143 | 144 | # Extract params and dataset scores and runtimes 145 | params = model_info["params"] # Extract params from the file 146 | 147 | for dataset_name, metrics in model_info.items(): 148 | if dataset_name == "params": 149 | continue # Skip the params entry 150 | row[dataset_name] = metrics["main_score"] 151 | total_score += metrics["main_score"] 152 | total_time += metrics["encode_time"] 153 | total_samples += metrics["dataset_length"] 154 | dataset_count += 1 155 | 156 | # Append data for the DataFrame 157 | data.append(row) 158 | 159 | # Calculate averages for scatterplot 160 | avg_score = total_score / dataset_count 161 | samples_second = total_samples / total_time 162 | 163 | model_averages.append( 164 | { 165 | "Model": names.get(model_name, model_name), 166 | "Accuracy": avg_score, 167 | "Samples per second": samples_second, 168 | "Params (Million)": params / 1_000_000, # Use the params from the file 169 | } 170 | ) 171 | 172 | # Generate enhanced scatterplot for sentences per second vs average score 173 | avg_df = pd.DataFrame(model_averages) 174 | 175 | return avg_df 176 | 177 | 178 | def plot_avg_df(df: pd.DataFrame) -> ggplot: 179 | """Creates a plot of the average df returned by the summarization.""" 180 | plot = ( 181 | ggplot(df, aes(x="Samples per second", y="Accuracy", size="Params (Million)", color="Model")) 182 | + geom_point() # Plot points with variable size 183 | + scale_size(range=(5, 15)) # Adjust the range: min size = 5, max size = 15 184 | + theme(figure_size=(10, 6)) # Adjust figure size (width, height) in inches 185 | + theme_classic() 186 | + guides(None) 187 | ) 188 | 189 | return plot 190 | -------------------------------------------------------------------------------- /evaluation/evaluation.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from enum import Enum 3 | from typing import Any 4 | 5 | import mteb 6 | from mteb.abstasks import AbsTask 7 | from mteb.evaluation import MTEB 8 | 9 | from evaluation.pearl import PEARL 10 | from evaluation.wordsim import WordSim 11 | 12 | logger = logging.getLogger(__name__) 13 | 14 | 15 | class TaskType(str, Enum): 16 | """Enum for the different supported task types.""" 17 | 18 | CLASSIFICATION = "Classification" 19 | CLUSTERING = "Clustering" 20 | PAIRCLASSIFICATION = "PairClassification" 21 | RERANKING = "Reranking" 22 | RETRIEVAL = "Retrieval" 23 | STS = "STS" 24 | SUMMARIZATION = "Summarization" 25 | PEARL = "PEARL" 26 | WORDSIM = "WordSim" 27 | 28 | 29 | class CustomMTEB(MTEB): 30 | def select_tasks(self, *args: Any, **kwargs: Any) -> None: 31 | """Override select_tasks to directly use passed task instances.""" 32 | if self._tasks is not None: 33 | # If any args or kwargs are passed, log a warning 34 | if args or kwargs: 35 | logger.warning("Ignoring passed arguments and using provided tasks directly.") 36 | # Use tasks directly without reinitializing 37 | self.tasks = [task for task in self._tasks if isinstance(task, AbsTask)] 38 | # Initialize tasks_cls with the classes of the provided tasks 39 | self.tasks_cls = [type(task) for task in self.tasks] 40 | if len(self.tasks) != len(self._tasks): 41 | task_names = [task.metadata_dict["name"] for task in self.tasks] 42 | logger.warning(f"Some tasks may not have been initialized correctly: {task_names}") 43 | else: 44 | # If no tasks are passed, fall back to the original behavior 45 | super().select_tasks(*args, **kwargs) 46 | 47 | @property 48 | def available_task_types(self) -> set[str]: 49 | """Override to ensure task types are gathered from the instances.""" 50 | return {task.metadata.type for task in self.tasks} 51 | 52 | 53 | def get_tasks(task_types: list[TaskType | str] | None = None) -> list[AbsTask]: 54 | """ 55 | Get the MTEB tasks that match the provided task types. 56 | 57 | :param task_types: The task types to include. If None, all task types are included. 58 | :return: The MTEB tasks that match the provided task types. 59 | :raises ValueError: If any task types are invalid. 60 | """ 61 | all_task_types = list(TaskType) 62 | # If no task types are provided, default to all task types 63 | if task_types is None: 64 | valid_task_types = all_task_types 65 | else: 66 | # Validate that all items in task_types are in TaskType 67 | invalid_types = [task for task in task_types if task not in all_task_types] 68 | if invalid_types: 69 | supported_types = ", ".join([t.name for t in TaskType]) 70 | raise ValueError( 71 | f"Invalid task types: {invalid_types}. " 72 | f"All task types must be instances of TaskType. " 73 | f"Supported task types are: {supported_types}" 74 | ) 75 | # Convert to a list of TaskType instances 76 | valid_task_types = [TaskType(task_type) for task_type in task_types] 77 | 78 | # Get the MTEB tasks that match the provided task types 79 | tasks = [ 80 | task 81 | for task in (mteb.get_task(task_name, languages=["eng"]) for task_name in mteb.MTEB_MAIN_EN.tasks) 82 | if task.metadata.type in valid_task_types 83 | ] 84 | 85 | # If WordSim is in the task types, add the WordSim subtasks 86 | if TaskType.WORDSIM in valid_task_types: 87 | wordsim_subtasks = WordSim.get_subtasks() 88 | tasks.extend(wordsim_subtasks) 89 | 90 | # If PEARL is in the task types, add the PEARL subtasks 91 | if TaskType.PEARL in valid_task_types: 92 | pearl_subtasks = PEARL.get_subtasks() 93 | tasks.extend(pearl_subtasks) 94 | 95 | return tasks 96 | -------------------------------------------------------------------------------- /evaluation/pearl/__init__.py: -------------------------------------------------------------------------------- 1 | from evaluation.pearl.pearl import PEARL 2 | 3 | __all__ = ["PEARL"] 4 | -------------------------------------------------------------------------------- /evaluation/pearl/eval.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from typing import Literal, cast 3 | 4 | import numpy as np 5 | from autofj.datasets import load_data 6 | from datasets import Dataset 7 | from mteb.encoder_interface import Encoder 8 | from reach import Reach, normalize 9 | from scipy.stats import pearsonr 10 | from sklearn.cluster import KMeans 11 | from sklearn.metrics.cluster import normalized_mutual_info_score 12 | 13 | from evaluation.pearl.probing import run_probing_model 14 | 15 | logger = logging.getLogger(__name__) 16 | 17 | 18 | def eval_bird(model: Encoder, dataset: Dataset) -> float: 19 | """ 20 | Evaluate the BIRD dataset. 21 | 22 | :param model: The model to evaluate. 23 | :param dataset: The dataset to evaluate. 24 | :return: The accuracy of the model on the dataset. 25 | """ 26 | input1 = normalize(model.encode(dataset["term1"])) 27 | input2 = normalize(model.encode(dataset["term2"])) 28 | 29 | sim = (input1 * input2).sum(1) 30 | sim = (sim + 1) / 2.0 31 | cor, _ = pearsonr(sim, dataset["relatedness score"]) 32 | 33 | return cor 34 | 35 | 36 | def eval_turney(model: Encoder, dataset: Dataset) -> float: 37 | """ 38 | Evaluate the Turney dataset. 39 | 40 | :param model: The model to evaluate. 41 | :param dataset: The dataset to evaluate. 42 | :return: The accuracy of the model on the dataset. 43 | """ 44 | data_list = [] 45 | for row in dataset: 46 | data_list.append( 47 | list( 48 | ( 49 | row["query"], 50 | row["label"], 51 | row["candidate_1"], 52 | row["candidate_2"], 53 | row["candidate_3"], 54 | row["candidate_4"], 55 | ) 56 | ) 57 | ) 58 | 59 | num_correct = 0 60 | for components in data_list: 61 | emb = cast(np.ndarray, model.encode(components)) 62 | query = emb[0, :] 63 | matrix = emb[1:, :] 64 | scores = np.dot(matrix, query) 65 | chosen = np.argmax(scores) 66 | 67 | if chosen == 0: 68 | num_correct += 1 69 | accuracy = num_correct / len(data_list) 70 | 71 | return accuracy 72 | 73 | 74 | def eval_ppdb(model: Encoder, dataset: Dataset) -> float: 75 | """ 76 | Evaluate the PPDB dataset. 77 | 78 | :param model: The model to evaluate. 79 | :param dataset: The dataset to evaluate. 80 | :return: The accuracy of the model on the dataset. 81 | """ 82 | phrase1_emb = model.encode(dataset["phrase_1"]) 83 | phrase2_emb = model.encode(dataset["phrase_2"]) 84 | label_list = [1 if e == "pos" else 0 for e in dataset["label"]] 85 | 86 | score = run_probing_model(np.concatenate([phrase1_emb, phrase2_emb], axis=1), label_list) 87 | 88 | return score 89 | 90 | 91 | def eval_clustering(model: Encoder, dataset: Dataset, name: Literal["conll", "bc5cdr"]) -> float: 92 | """ 93 | Evaluate the clustering dataset. 94 | 95 | :param model: The model to evaluate. 96 | :param dataset: The dataset to evaluate. 97 | :param name: The name of the dataset. Can be "conll" or "bc5cdr". 98 | :return: The normalized mutual information score of the model on the dataset. 99 | :raises ValueError: If the dataset name is invalid. 100 | """ 101 | label_dict = dict() 102 | match name: 103 | case "conll": 104 | label_dict = {"PER": 0, "LOC": 1, "ORG": 2} 105 | case "bc5cdr": 106 | label_dict = {"Chemical": 0, "Disease": 1} 107 | case _: 108 | raise ValueError(f"Invalid dataset name: {name}") 109 | 110 | num_class = len(label_dict) 111 | 112 | phrases, labels = [], [] 113 | for row in dataset: 114 | phrases.append(row["entity"] or "NA") 115 | labels.append(row["label"]) 116 | 117 | phrase_emb = model.encode(phrases) 118 | kmeans = KMeans(n_clusters=num_class, random_state=0).fit(phrase_emb) 119 | nmi_score = normalized_mutual_info_score(labels, kmeans.labels_) 120 | 121 | return nmi_score 122 | 123 | 124 | def eval_retrieval(model: Encoder, kb_dataset: Dataset, test_dataset: Dataset) -> float: 125 | """ 126 | Evaluate the retrieval dataset. 127 | 128 | :param model: The model to evaluate. 129 | :param kb_dataset: The dataset containing the knowledge base. 130 | :param test_dataset: The dataset to evaluate. 131 | :return: The accuracy of the model on the dataset. 132 | """ 133 | e_names = [x for x in kb_dataset["entity_name"] if x is not None] 134 | sen_embeddings = model.encode(e_names) 135 | 136 | emb_index = Reach(sen_embeddings, e_names) 137 | 138 | cnt, wrong_cnt = 0, 0 139 | mentions = test_dataset["query"] 140 | labels = test_dataset["label"] 141 | 142 | batch_emb = model.encode(mentions) 143 | 144 | I = emb_index.nearest_neighbor(batch_emb) 145 | predicted = [i[0][0] for i in I] 146 | for label, predict in zip(labels, predicted): 147 | cnt += 1 148 | if predict != label: 149 | wrong_cnt += 1 150 | acc = (cnt - wrong_cnt) * 1.0 / cnt 151 | 152 | return acc 153 | 154 | 155 | def eval_single_autofj(dataset_name: str, model: Encoder) -> float: 156 | """ 157 | Evaluate a single dataset from the AutoFJ benchmark. 158 | 159 | :param dataset_name: The name of the dataset to evaluate. 160 | :param model: The model to evaluate. 161 | :return: The accuracy of the model on the dataset. 162 | """ 163 | left_table, right_table, gt_table = load_data(dataset_name) 164 | left_table_list: list[str] = list(left_table.title) 165 | right_table_list: list[str] = list(right_table.title) 166 | left_label, right_label = list(gt_table.title_l), list(gt_table.title_r) 167 | gt_label = dict(zip(right_label, left_label)) 168 | 169 | left_embs = normalize(model.encode(left_table_list)) 170 | right_embs = normalize(model.encode(right_table_list)) 171 | 172 | acc_cnt, total = 0, 0 173 | 174 | for index, r_t_emb in enumerate(right_embs): 175 | r_t = right_table_list[index] 176 | try: 177 | g_t = gt_label[r_t] 178 | except KeyError: 179 | continue 180 | 181 | score = r_t_emb @ left_embs.T 182 | pred_i = np.argmax(score) 183 | predicted = left_table_list[pred_i] 184 | 185 | if predicted == g_t: 186 | acc_cnt += 1 187 | total += 1 188 | return acc_cnt * 1.0 / total 189 | 190 | 191 | def eval_autofj(model: Encoder, dataset: Dataset) -> float: 192 | """ 193 | Evaluate the AutoFJ benchmark. 194 | 195 | :param model: The model to evaluate. 196 | :param dataset: The dataset to evaluate. 197 | :return: The accuracy of the model on the dataset. 198 | """ 199 | table_names: list[str] = [row["Dataset"] for row in dataset] 200 | acc_list = [] 201 | for table_name in table_names: 202 | acc_list.append(eval_single_autofj(dataset_name=table_name, model=model)) 203 | 204 | return sum(acc_list) / len(acc_list) 205 | -------------------------------------------------------------------------------- /evaluation/pearl/pearl.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from typing import Any, Literal, cast 4 | 5 | from datasets import DatasetDict, load_dataset 6 | from mteb import TaskMetadata 7 | from mteb.abstasks import AbsTask 8 | from mteb.encoder_interface import Encoder 9 | 10 | from evaluation.pearl.eval import eval_autofj, eval_bird, eval_clustering, eval_ppdb, eval_retrieval, eval_turney 11 | 12 | 13 | class PEARL(AbsTask): 14 | DATASET_TASK_MAPPING = { 15 | "bird": "Classification", 16 | "turney": "Classification", 17 | "ppdb": "Classification", 18 | "ppdb_filtered": "Classification", 19 | "yago": "Retrieval", 20 | "umls": "Retrieval", 21 | "autofj": "Retrieval", 22 | "conll": "Clustering", 23 | "bc5cdr": "Clustering", 24 | } 25 | 26 | def __init__(self, dataset_name: str, hf_subsets: Any = None, **kwargs: Any) -> None: 27 | """ 28 | Initialize a PEARL task with the given dataset name. 29 | 30 | :param dataset_name: The name of the dataset to use. 31 | :param hf_subsets: The Hugging Face dataset splits to use. 32 | :param **kwargs: Additional keyword arguments. 33 | :raises ValueError: If the dataset name is unknown. 34 | """ 35 | # Use the mapping to get the task type 36 | try: 37 | task_type = self.DATASET_TASK_MAPPING[dataset_name] 38 | except KeyError: 39 | raise ValueError(f"Unknown dataset name: {dataset_name}") 40 | 41 | self.dataset_name = dataset_name 42 | self.metadata = TaskMetadata( 43 | name=dataset_name, 44 | description=f"PEARL Task: {dataset_name}", 45 | dataset={ 46 | "path": "Lihuchen/pearl_benchmark", 47 | "revision": "1.0.0", 48 | }, 49 | reference=None, 50 | type=task_type, 51 | modalities=["text"], 52 | eval_splits=["test"], 53 | eval_langs=["en"], 54 | main_score="accuracy", 55 | ) 56 | 57 | # Initialize the parent class after setting the metadata 58 | super().__init__(hf_subsets=hf_subsets, **kwargs) 59 | 60 | def _calculate_metrics_from_split(self) -> None: 61 | """Calculate the metrics from the dataset split.""" 62 | raise NotImplementedError("Method not implemented") 63 | 64 | def load_data(self, eval_splits: Any = None) -> None: 65 | """Load the appropriate dataset based on the task name.""" 66 | if self.dataset_name == "umls": 67 | dataset = load_dataset("Lihuchen/pearl_benchmark", "umls", split="umls") 68 | else: 69 | dataset = load_dataset("Lihuchen/pearl_benchmark", self.dataset_name, split="test") 70 | self.dataset = DatasetDict( 71 | { 72 | "test": dataset, 73 | } 74 | ) 75 | 76 | def evaluate( 77 | self, model: Encoder, split: str = "test", output_folder: str | None = None, **kwargs: Any 78 | ) -> dict[str, dict[str, float]]: 79 | """Evaluate the given model on the specified dataset split.""" 80 | dataset_split = self.dataset[split] 81 | result = self._evaluate_subset(model, dataset_split) 82 | 83 | return {"default": {"accuracy": result, "main_score": result}} 84 | 85 | def _evaluate_subset(self, model: Encoder, dataset_split: str, **kwargs: Any) -> float: 86 | """Evaluate the given model on the specified dataset split.""" 87 | match self.dataset_name: 88 | case "bird": 89 | return eval_bird(model, dataset_split) 90 | case "turney": 91 | return eval_turney(model, dataset_split) 92 | case "ppdb" | "ppdb_filtered": 93 | return eval_ppdb(model, dataset_split) 94 | case "yago" | "umls": 95 | kb_dataset = load_dataset("Lihuchen/pearl_benchmark", "kb", split=self.dataset_name) 96 | return eval_retrieval(model, kb_dataset, dataset_split) 97 | case "autofj": 98 | return eval_autofj(model, dataset_split) 99 | case "conll" | "bc5cdr": 100 | return eval_clustering(model, dataset_split, name=cast(Literal["conll", "bc5cdr"], self.dataset_name)) 101 | case _: 102 | raise ValueError(f"Unknown dataset: {self.dataset_name}") 103 | 104 | @classmethod 105 | def get_subtasks(cls) -> list[PEARL]: 106 | """Return a list of subtasks, one for each dataset in the PEARL benchmark.""" 107 | return [cls(dataset_name=name) for name in cls.DATASET_TASK_MAPPING.keys()] 108 | -------------------------------------------------------------------------------- /evaluation/pearl/probing.py: -------------------------------------------------------------------------------- 1 | from typing import Any 2 | 3 | import numpy as np 4 | import torch 5 | from pytorch_lightning import LightningModule, Trainer 6 | from pytorch_lightning.callbacks import EarlyStopping 7 | from sklearn.model_selection import train_test_split 8 | from torch import LongTensor, Tensor, nn, optim 9 | from torch.nn import functional as F 10 | from torch.utils.data import DataLoader, Dataset 11 | 12 | 13 | class ParaphraseDataset(Dataset): 14 | """Dataset for paraphrase probing task.""" 15 | 16 | def __init__(self, X: Tensor, label_tensor: Tensor) -> None: 17 | """ 18 | Initialize the dataset. 19 | 20 | :param X: The input data. 21 | :param label_tensor: The labels. 22 | """ 23 | self.concat_input = X.float() 24 | self.label = label_tensor.float() 25 | 26 | def __getitem__(self, index: int) -> tuple[Tensor, Tensor]: 27 | """Get the item at the given index.""" 28 | return self.concat_input[index], self.label[index] 29 | 30 | def __len__(self) -> int: 31 | """Get the length of the dataset.""" 32 | return len(self.concat_input) 33 | 34 | 35 | class ProbingModel(LightningModule): 36 | """Probing model for paraphrase detection.""" 37 | 38 | def __init__(self, input_dim: int, train_dataset: Dataset, valid_dataset: Dataset, test_dataset: Dataset) -> None: 39 | """ 40 | Initialize the probing model. 41 | 42 | :param input_dim: The input dimension. 43 | :param train_dataset: The training dataset. 44 | :param valid_dataset: The validation dataset. 45 | :param test_dataset: The test dataset. 46 | """ 47 | super().__init__() 48 | self.input_dim = input_dim 49 | self.linear = nn.Linear(self.input_dim, 256) 50 | self.linear2 = nn.Linear(256, 1) 51 | self.output = nn.Sigmoid() 52 | 53 | # Hyper-parameters, that we will auto-tune using lightning. 54 | self.lr = 0.0001 55 | self.batch_size = 200 56 | 57 | # datasets 58 | self.train_dataset = train_dataset 59 | self.valid_dataset = valid_dataset 60 | self.test_dataset = test_dataset 61 | 62 | # Store validation and test outputs 63 | self.validation_outputs: list[dict[str, Tensor]] = [] 64 | self.test_outputs: list[dict[str, Tensor]] = [] 65 | 66 | def forward(self, x: Tensor) -> Tensor: 67 | """Forward pass of the model.""" 68 | x1 = F.relu(self.linear(x)) 69 | x2 = self.linear2(x1) 70 | output: Tensor = self.output(x2) 71 | return output.reshape((-1,)) 72 | 73 | def configure_optimizers(self) -> optim.Adam: 74 | """Configure the optimizer.""" 75 | return optim.Adam(self.parameters(), lr=self.lr) 76 | 77 | def train_dataloader(self) -> DataLoader: 78 | """Get the training dataloader.""" 79 | return DataLoader(self.train_dataset, batch_size=self.batch_size, shuffle=True) 80 | 81 | def val_dataloader(self) -> DataLoader: 82 | """Get the validation dataloader.""" 83 | return DataLoader(self.valid_dataset, batch_size=self.batch_size, shuffle=False) 84 | 85 | def test_dataloader(self) -> DataLoader: 86 | """Get the test dataloader.""" 87 | return DataLoader(self.test_dataset, batch_size=self.batch_size, shuffle=False) 88 | 89 | def compute_accuracy(self, y_hat: Tensor, y: Tensor) -> Tensor: 90 | """Compute the accuracy of the model.""" 91 | y_pred = (y_hat >= 0.5).long() 92 | num_correct = (y_pred == y).long().sum().item() 93 | accuracy = torch.as_tensor(num_correct / len(y_hat)) 94 | return accuracy 95 | 96 | def training_step(self, batch: tuple[Tensor, Tensor], batch_nb: int) -> dict[str, Any]: 97 | """Training step of the model.""" 98 | mode = "train" 99 | x, y = batch 100 | y_hat = self(x) 101 | loss = F.binary_cross_entropy(y_hat, y) 102 | accuracy = self.compute_accuracy(y_hat, y) 103 | return {f"loss": loss, f"{mode}_accuracy": accuracy} 104 | 105 | def validation_step(self, batch: tuple[Tensor, Tensor], batch_nb: int) -> dict[str, Any]: 106 | """Validation step of the model.""" 107 | mode = "val" 108 | x, y = batch 109 | y_hat = self(x) 110 | loss = F.binary_cross_entropy(y_hat, y) 111 | accuracy = self.compute_accuracy(y_hat, y) 112 | self.log(f"{mode}_loss", loss, on_epoch=True, on_step=False) 113 | self.log(f"{mode}_accuracy", accuracy, on_epoch=True, on_step=False) 114 | 115 | # Store the outputs for aggregation later 116 | self.validation_outputs.append({"val_loss": loss, "val_accuracy": accuracy}) 117 | return {"val_loss": loss, "val_accuracy": accuracy} 118 | 119 | def on_validation_epoch_end(self) -> None: 120 | """Validation epoch end hook.""" 121 | mode = "val" 122 | loss_mean = torch.stack([x["val_loss"] for x in self.validation_outputs]).mean() 123 | accuracy_mean = torch.tensor([x["val_accuracy"] for x in self.validation_outputs]).mean() 124 | self.log(f"epoch_{mode}_loss", loss_mean, on_epoch=True, on_step=False) 125 | self.log(f"epoch_{mode}_accuracy", accuracy_mean, on_epoch=True, on_step=False) 126 | 127 | # Clear the outputs for the next epoch 128 | self.validation_outputs.clear() 129 | 130 | def test_step(self, batch: tuple[Tensor, Tensor], batch_nb: int) -> dict[str, Any]: 131 | """Test step of the model.""" 132 | mode = "test" 133 | x, y = batch 134 | y_hat = self(x) 135 | loss = F.binary_cross_entropy(y_hat, y) 136 | accuracy = self.compute_accuracy(y_hat, y) 137 | self.log(f"{mode}_loss", loss, on_epoch=True, on_step=False) 138 | self.log(f"{mode}_accuracy", accuracy, on_epoch=True, on_step=False) 139 | 140 | # Store the outputs for aggregation later 141 | self.test_outputs.append({"test_loss": loss, "test_accuracy": accuracy}) 142 | return {"test_loss": loss, "test_accuracy": accuracy} 143 | 144 | def on_test_epoch_end(self) -> None: 145 | """Test epoch end hook.""" 146 | mode = "test" 147 | loss_mean = torch.stack([x["test_loss"] for x in self.test_outputs]).mean() 148 | accuracy_mean = torch.tensor([x["test_accuracy"] for x in self.test_outputs]).mean() 149 | self.log(f"epoch_{mode}_loss", loss_mean, on_epoch=True, on_step=False) 150 | self.log(f"epoch_{mode}_accuracy", accuracy_mean, on_epoch=True, on_step=False) 151 | 152 | # Clear the outputs for the next epoch 153 | self.test_outputs.clear() 154 | 155 | 156 | def run_probing_model(X: np.ndarray, y: list[int]) -> float: 157 | """ 158 | Run the probing model. 159 | 160 | :param X: The input data. 161 | :param y: The labels. 162 | :return: The test accuracy. 163 | """ 164 | X_train, X_to_split, y_train, y_to_split = train_test_split(X, y, test_size=0.2, random_state=42) 165 | X_test, X_dev, y_test, y_dev = train_test_split(X_to_split, y_to_split, test_size=0.5, random_state=42) 166 | 167 | train_dataset = ParaphraseDataset(torch.from_numpy(X_train), LongTensor(y_train)) 168 | valid_dataset = ParaphraseDataset(torch.from_numpy(X_dev), LongTensor(y_dev)) 169 | test_dataset = ParaphraseDataset(torch.from_numpy(X_test), LongTensor(y_test)) 170 | 171 | model = ProbingModel( 172 | input_dim=X.shape[1], 173 | train_dataset=train_dataset, 174 | valid_dataset=valid_dataset, 175 | test_dataset=test_dataset, 176 | ) 177 | 178 | early_stop_callback = EarlyStopping( 179 | monitor="epoch_val_accuracy", min_delta=0.00, patience=5, verbose=False, mode="max" 180 | ) 181 | 182 | trainer = Trainer(max_epochs=100, min_epochs=3, callbacks=[early_stop_callback]) 183 | trainer.fit(model) 184 | result = trainer.test(dataloaders=model.test_dataloader()) 185 | 186 | return result[0]["epoch_test_accuracy"] 187 | -------------------------------------------------------------------------------- /evaluation/utils.py: -------------------------------------------------------------------------------- 1 | import json 2 | import logging 3 | from collections import defaultdict 4 | from dataclasses import dataclass, field 5 | from pathlib import Path 6 | from typing import Any 7 | 8 | import click 9 | import mteb 10 | import numpy as np 11 | import pandas as pd 12 | from mteb.evaluation.LangMapping import LANG_MAPPING 13 | from mteb.load_results import MTEBResults 14 | from rich.logging import RichHandler 15 | from scipy.stats._stats_py import SignificanceResult 16 | 17 | from evaluation import TaskType, get_tasks 18 | 19 | _FORBIDDEN_JSON = "model_meta.json" 20 | _SUPPORTED_LANGS = {"default", "en-en", "en"}.union(LANG_MAPPING["en"]) 21 | 22 | _TASK_LIST_CQA = { 23 | "CQADupstackAndroidRetrieval", 24 | "CQADupstackEnglishRetrieval", 25 | "CQADupstackGamingRetrieval", 26 | "CQADupstackGisRetrieval", 27 | "CQADupstackMathematicaRetrieval", 28 | "CQADupstackPhysicsRetrieval", 29 | "CQADupstackProgrammersRetrieval", 30 | "CQADupstackStatsRetrieval", 31 | "CQADupstackTexRetrieval", 32 | "CQADupstackUnixRetrieval", 33 | "CQADupstackWebmastersRetrieval", 34 | "CQADupstackWordpressRetrieval", 35 | } 36 | 37 | logger = logging.getLogger(__name__) 38 | 39 | 40 | def setup_task_mappings() -> tuple[dict[str, list[str]], list[str]]: 41 | """ 42 | Setup the task mappings for the evaluation. 43 | 44 | :return: A dictionary mapping task types to task names and a list of custom task names. 45 | """ 46 | # Get all tasks 47 | all_tasks = get_tasks() 48 | # Create a dictionary mapping task types to task names 49 | task_type_to_tasks_mapping = defaultdict(list) 50 | 51 | # Get all WordSim tasks 52 | wordsim_tasks = get_tasks([TaskType.WORDSIM]) 53 | wordsim_task_names = [task.metadata.name for task in wordsim_tasks] 54 | 55 | # Get all PEARL tasks 56 | pearl_tasks = get_tasks([TaskType.PEARL]) 57 | pearl_task_names = [task.metadata.name for task in pearl_tasks] 58 | 59 | # Get all custom task names 60 | custom_task_names = wordsim_task_names + pearl_task_names 61 | 62 | # Populate the dictionary 63 | for task in all_tasks: 64 | if task.metadata.name in wordsim_task_names: 65 | task_type_to_tasks_mapping["WordSim"].append(task.metadata.name) 66 | elif task.metadata.name in pearl_task_names: 67 | task_type_to_tasks_mapping["PEARL"].append(task.metadata.name) 68 | else: 69 | task_type_to_tasks_mapping[task.metadata.type].append(task.metadata.name) 70 | 71 | return task_type_to_tasks_mapping, custom_task_names 72 | 73 | 74 | _task_type_to_tasks_mapping, _custom_task_names = setup_task_mappings() 75 | 76 | 77 | def setup_logging() -> None: 78 | """Simple logging setup.""" 79 | logging.basicConfig( 80 | level="INFO", 81 | format="%(name)s - %(message)s", 82 | datefmt="%Y-%m-%d %H:%M:%S", 83 | handlers=[RichHandler(rich_tracebacks=True, tracebacks_suppress=[click])], 84 | ) 85 | 86 | 87 | @dataclass 88 | class DatasetResult: 89 | """ 90 | Scores for a single dataset. 91 | 92 | Attributes 93 | ---------- 94 | scores: The scores for the dataset. 95 | time: The time it took to evaluate the dataset. 96 | 97 | """ 98 | 99 | scores: list[float] 100 | time: float 101 | 102 | def mean(self) -> float: 103 | """Calculate the mean of all scores.""" 104 | return float(np.mean(self.scores)) 105 | 106 | 107 | @dataclass 108 | class ResultSet: 109 | """A set of results over multiple datasets.""" 110 | 111 | datasets: dict[str, DatasetResult] = field(default_factory=dict) 112 | 113 | def summarize(self, task_type: str) -> pd.Series: 114 | """Summarize the results by taking the mean of all datasets.""" 115 | result_dict = {} 116 | for name, result in self.datasets.items(): 117 | # Check if the task is a custom task or an MTEB task 118 | if name not in _custom_task_names: 119 | task = mteb.get_task(name) 120 | if task.metadata.type == task_type: 121 | result_dict[name] = result.mean() 122 | if task_type == "WordSim": 123 | if name in _task_type_to_tasks_mapping["WordSim"]: 124 | result_dict[name] = result.mean() 125 | elif task_type == "PEARL": 126 | if name in _task_type_to_tasks_mapping["PEARL"]: 127 | result_dict[name] = result.mean() 128 | 129 | return pd.Series(result_dict) 130 | 131 | def times(self) -> dict[str, float]: 132 | """Return the evaluation times for all datasets.""" 133 | return {name: result.time for name, result in self.datasets.items()} 134 | 135 | 136 | def load_results(results_dir: str | Path) -> dict[str, ResultSet]: 137 | """ 138 | Load results from the specified directory. 139 | 140 | :param results_dir: The root directory containing results for all models. 141 | :return: A dictionary of model names to ResultSet objects. 142 | """ 143 | results: defaultdict = defaultdict(ResultSet) 144 | results_path = Path(results_dir).resolve() 145 | 146 | # Glob for all JSON files in the directory and subdirectories 147 | json_paths = results_path.glob("**/*.json") 148 | 149 | for json_path in json_paths: 150 | # Construct the model name from the parent folder (model_revision) and its parent (model_name) 151 | model_revision = json_path.parent.name 152 | model_name = json_path.parent.parent.name 153 | if model_name == "no_model_name_available": 154 | logger.warning(f"Model name not available for {json_path}. Skipping.") 155 | continue 156 | elif model_revision == "no_revision_available": 157 | full_model_name = model_name 158 | else: 159 | full_model_name = f"{model_name}_{model_revision}" 160 | 161 | if json_path.name != _FORBIDDEN_JSON: 162 | with open(json_path) as f: 163 | data = json.load(f) 164 | results[full_model_name].datasets[json_path.stem] = _process_result_data(data) 165 | 166 | return dict(results) 167 | 168 | 169 | def _process_result_data(data: dict[str, Any]) -> DatasetResult: 170 | """ 171 | Process a single result JSON. 172 | 173 | :param data: The data to process. 174 | :return: The processed data. 175 | """ 176 | scores = [score["main_score"] for score in data["scores"]["test"] if score["hf_subset"] in _SUPPORTED_LANGS] 177 | scores = [score[0] if isinstance(score, list) else score for score in scores] 178 | 179 | return DatasetResult(scores=scores, time=data["evaluation_time"]) 180 | 181 | 182 | def parse_mteb_results(mteb_results: list[MTEBResults], model_name: str) -> dict[str, ResultSet]: 183 | """Parse MTEBResults into a dictionary of ResultSet objects.""" 184 | dataset_results = {} 185 | 186 | for result in mteb_results: 187 | task_name = result.task_name 188 | test_scores = result.scores.get("test", []) 189 | if not test_scores: 190 | continue 191 | 192 | main_score = [score["main_score"] for score in test_scores if score["hf_subset"] in _SUPPORTED_LANGS][0] 193 | 194 | # Check if the main score is a SignificanceResult. If so, extract the statistic 195 | if isinstance(main_score, SignificanceResult): 196 | main_score = main_score.statistic 197 | 198 | # Populate the DatasetResult 199 | dataset_results[task_name] = DatasetResult(scores=[main_score], time=result.evaluation_time) 200 | 201 | return {model_name: ResultSet(datasets=dataset_results)} 202 | 203 | 204 | def summarize_results( 205 | results: dict[str, ResultSet], 206 | ) -> dict[str, pd.DataFrame]: 207 | """ 208 | Summarize the results for all models and tasks. 209 | 210 | :param results: The results to summarize. 211 | :return: A dictionary mapping model names to DataFrames containing the mean scores for each task, if available. 212 | """ 213 | model_scores = {} 214 | task_types = [task.value for task in TaskType] 215 | 216 | for model_name, result_set in results.items(): 217 | # dataset_scores = [] 218 | dataset_scores = {} 219 | task_summaries = {} 220 | 221 | for task_type in task_types: 222 | # Summarize the results for the specific task type 223 | task_summary = result_set.summarize(task_type=task_type) 224 | if task_type == "Retrieval": 225 | # Retrieval task is a special case, as it has multiple datasets for CQA 226 | scores = {} 227 | scores_cqa = [] 228 | for name, score in task_summary.items(): 229 | if name not in _TASK_LIST_CQA: 230 | scores[name] = score 231 | else: 232 | scores_cqa.append(score) 233 | scores["CQADupstack"] = np.mean(scores_cqa) 234 | task_summary = pd.Series(scores) 235 | # Get the expected datasets for this task type 236 | expected_datasets = _task_type_to_tasks_mapping[task_type] 237 | # Check if the model has results for all required datasets, or the Retrieval task 238 | if set(task_summary.index) == set(expected_datasets) or task_type == "Retrieval": 239 | task_summaries[task_type] = task_summary.mean() 240 | for dataset, score in task_summary.items(): 241 | dataset_scores[dataset] = score 242 | else: 243 | task_summaries[task_type] = np.nan 244 | logger.warning(f"Model {model_name} is missing results for some datasets in task type {task_type}.") 245 | 246 | # Store task means but also collect all individual dataset scores for macro averaging 247 | model_scores[model_name] = { 248 | "task_means": pd.Series(task_summaries), 249 | "dataset_scores": dataset_scores, # Collecting all dataset scores for macro averaging 250 | } 251 | 252 | return model_scores 253 | 254 | 255 | def make_leaderboard(model_scores: dict[str, dict]) -> pd.DataFrame: 256 | """Make the leaderboard with the mean scores for each task and compute macro scores.""" 257 | # Extract task means and dataset scores 258 | task_means = {model: scores["task_means"] for model, scores in model_scores.items()} 259 | dataset_scores = {model: scores["dataset_scores"] for model, scores in model_scores.items()} 260 | 261 | # Convert the task_means dictionary to a DataFrame for task-wise averaging 262 | leaderboard = pd.DataFrame(task_means) 263 | 264 | # Calculate the overall macro score for each model (mean of all datasets across all tasks) 265 | leaderboard.loc["Average (All)"] = { 266 | model: np.mean(list(scores.values())) if task_means[model].notna().all() else np.nan 267 | for model, scores in dataset_scores.items() 268 | } 269 | # Filter out the custom task names from dataset_scores 270 | mteb_dataset_scores = { 271 | model: {dataset: score for dataset, score in scores.items() if dataset not in _custom_task_names} 272 | for model, scores in dataset_scores.items() 273 | } 274 | 275 | # Calculate the overall mean for MTEB tasks (excluding custom task names) 276 | leaderboard.loc["Average (MTEB)"] = { 277 | model: np.mean(list(scores.values())) 278 | if task_means[model].notna().all() and pd.Series(mteb_dataset_scores[model]).notna().all() 279 | else np.nan 280 | for model, scores in mteb_dataset_scores.items() 281 | } 282 | 283 | # Multiply all values by 100 and format to 2 decimal places 284 | leaderboard = leaderboard.applymap(lambda x: f"{x * 100:.2f}" if isinstance(x, (int, float)) else x) 285 | 286 | # Replace NaN values with "N/A" 287 | leaderboard = leaderboard.fillna("N/A") 288 | 289 | # Transpose the DataFrame so models are in rows and task types in columns 290 | leaderboard = leaderboard.transpose().reset_index() 291 | 292 | # Rename the index column to "Model" 293 | leaderboard.rename(columns={"index": "Model"}, inplace=True) 294 | 295 | # Reorder columns to place "Average (All)" and "Average (MTEB)" right after "Model" 296 | columns = ["Model", "Average (All)", "Average (MTEB)"] + [ 297 | col for col in leaderboard.columns if col not in ["Model", "Average (All)", "Average (MTEB)"] 298 | ] 299 | leaderboard = leaderboard[columns] 300 | 301 | return leaderboard 302 | -------------------------------------------------------------------------------- /evaluation/wordsim/__init__.py: -------------------------------------------------------------------------------- 1 | from evaluation.wordsim.wordsim import WordSim 2 | 3 | __all__ = ["WordSim"] 4 | -------------------------------------------------------------------------------- /evaluation/wordsim/data/card_660.txt: -------------------------------------------------------------------------------- 1 | Pokemon Pocket_Monsters 3.81 2 | prejudice chauvinist 2.25 3 | formic_acid arachnology 1.19 4 | NetMeeting Marwar_Hall 0.00 5 | kingfish kingship 0.31 6 | iight ok 3.94 7 | ACL EMNLP 3.13 8 | Qintex Allwaste 2.06 9 | Australian_Open mixed_doubles 1.88 10 | Curry_powder pumpkin_spice 2.75 11 | full-HD 1080p 4.00 12 | cheddah cheddar 0.25 13 | convocation gathering 3.56 14 | random_seed BiLSTM 1.56 15 | heater convector 3.50 16 | half-life ratemeter 1.81 17 | 3D black-and-white 1.13 18 | Hero's_engine aeolipile 4.00 19 | Josef_Albers Richard_Anuszkiewicz 2.06 20 | MacBook ZenBook 3.13 21 | microwaving pesto 0.75 22 | primality mathematics 2.00 23 | Park_Ji-sung Yosemite_Park 0.00 24 | router D-Link 2.13 25 | Winamp VLC_media_player 3.19 26 | gown pelerine 2.38 27 | Malva_parviflora cheeseweed 4.00 28 | care caution 3.00 29 | rope-a-dope WWE 1.81 30 | oldster dotard 3.31 31 | navaid HIV/Aids 0.00 32 | excellent top-notch 3.81 33 | MIDlet Oracle_Java 2.25 34 | fakelore photostimulation 0.06 35 | black_hole blackmail 0.06 36 | night_sky skyglow 1.94 37 | neuropore nervous_system 2.31 38 | underspecification incompleteness 2.94 39 | exequatur equator 0.00 40 | Rotary_International Rota_Island 0.06 41 | TorPark parkour 0.00 42 | yellow_dwarf yellow_pages 0.06 43 | Mercedes-Benz BMW 3.13 44 | circus ropedancer 2.00 45 | Hoover_hog armadillo 3.94 46 | irresistibleness illiterateness 0.13 47 | metolazone blazonry 0.00 48 | vasocongestion engorgement 2.88 49 | salicylic_acid carbonate 2.31 50 | Kepler-11 red_giant 2.56 51 | baby cutee 1.56 52 | Rubik's_Cube RuBisCO 0.06 53 | goatsbeard tragopogon 4.00 54 | fundraiser event 2.00 55 | going really 0.00 56 | transmigration residence_permit 2.06 57 | prospector sourdough 3.56 58 | sorry srry 4.00 59 | avionics aeronautics 2.50 60 | rallentando slowly 2.88 61 | retweeting RTing 4.00 62 | Apple Applebees 0.38 63 | exponential logx 2.38 64 | Zeta-Jones Catherine_Zeta-Jones 3.88 65 | Mosul Mawsil 4.00 66 | Pizza_Hut Pizzle_rot 0.06 67 | remainder difference 2.44 68 | preheat reheat 2.63 69 | disembodied spiritual 2.56 70 | crested_tit Amazon_rainforest 0.94 71 | afterworld purgatory 2.88 72 | screenshot screengrab 3.88 73 | practicable goal 1.13 74 | Skype_Lite ooVoo 2.75 75 | decomposition factorization 3.31 76 | LOL looool 3.88 77 | skateboard_deck halfpipe 2.63 78 | Breuil-Cervinia Val_Gardena 3.06 79 | inheritor hoarded_wealth 1.94 80 | appendage swimmeret 2.69 81 | passenger passepied 0.06 82 | tedious old-fashioned 1.00 83 | radionavigation frequency_band 1.75 84 | Tag_Heuer Jaeger-LeCoultre 3.13 85 | Followback Twitter 2.31 86 | weekend race 0.13 87 | septenary Pleiades 1.25 88 | monsignor assignor 0.25 89 | preoccupation prepossession 1.19 90 | spontaneousness returnability 0.31 91 | under-appreciated unnoticeable 1.69 92 | covfefe coverage 2.69 93 | devious untrustworthy 2.75 94 | comedian stand-up 2.44 95 | infant breastfeeder 2.19 96 | trusteeship traineeship 0.31 97 | human_face make-up 1.88 98 | backslash backsolving 0.19 99 | cr8 create 3.94 100 | fluoride monofluoride 3.31 101 | unforeseen unanticipated 3.88 102 | fancifully whimsically 3.88 103 | Winn-Dixie Winnipeg 0.13 104 | iPhone bendgate 1.94 105 | ultracompetitiveness overcompetitiveness 3.69 106 | 1/4 quarter 4.00 107 | modern-day futuristic 2.38 108 | yeahh yessss 3.94 109 | serious tongue-in-cheek 1.88 110 | New_Zealander enzedder 4.00 111 | circumnavigation baggage 1.44 112 | school intramural_program 1.81 113 | iMac hairgate 0.94 114 | driver autoinstall 1.50 115 | credit borrower 2.00 116 | saving saver 2.44 117 | hearth_tax property_tax 3.44 118 | Britain_First racism 2.31 119 | fetishism fractionate 0.00 120 | carboxylesterase carbonara 0.06 121 | wandmaker Xanthosoma 0.00 122 | bootjack blackjack 0.19 123 | lecturership ERC_grant 1.31 124 | imaginativeness non-existent 1.81 125 | swimwear swimshorts 3.00 126 | grocery Wal-Mart 2.31 127 | office desklamp 2.00 128 | circumcision foreskin 2.13 129 | Phelps Philips 0.88 130 | reverse_engineering copy_protection 1.25 131 | heart-wrenching spanner 0.00 132 | unprecedented unexpected 2.75 133 | smallish dwarfish 3.06 134 | chasable purchasable 0.50 135 | highlander pathfinder 1.94 136 | borrowing loanword 3.19 137 | DHL_Express Deutsche_Bundespost 2.94 138 | value rate 1.81 139 | witch-hunt McCarthyism 2.81 140 | leggin legging 4.00 141 | run-of-the-mill ordinary 3.44 142 | ropewalker tightrope 2.13 143 | Ben-Hur Titanic 1.56 144 | 1st 2nd 2.88 145 | tonbak tombac_alloy 0.50 146 | twitcon twitch 0.06 147 | threadworm pinworm 3.69 148 | roller-coaster Disneyland_Park 2.13 149 | wrathful hesitant 0.38 150 | Ebola hyperbola 0.00 151 | promegakaryocyte precursor 2.69 152 | tasteless indelicate 3.06 153 | unwantedly unintendedly 3.38 154 | Head_tilt cervix 2.06 155 | relaxed nerveless 3.06 156 | 3rd third 4.00 157 | self-driving unmanned 3.13 158 | leadscrew leader 0.13 159 | constable metpoliceuk 2.44 160 | eye-candy mesomorphic 1.06 161 | real-world real-life 3.56 162 | bystrite strike 0.00 163 | union sum 2.81 164 | weaving tussah 1.56 165 | manpage helpsheet 3.25 166 | bureau subdepartment 2.56 167 | cover horsecloth 2.44 168 | non-refundable irredeemable 2.63 169 | hemorrhage 2morro 0.00 170 | knifemaking spoonworm 0.00 171 | boatmaster captain 3.44 172 | bloodloss radiotherapist 0.88 173 | copilot cockpit 1.94 174 | scam cybershopping 1.06 175 | screensaver FoodSaver 0.19 176 | headword syntax 1.75 177 | slam-bang suddenly 3.25 178 | pyelography urography 3.44 179 | sandglass hourglass 3.88 180 | sleepwalking somnambulists 3.88 181 | Colorado_Technical_University Colorado_Tech 4.00 182 | dishwasher-safe sturdy 1.69 183 | galvanize zinc 2.38 184 | banksia Banksy 0.06 185 | teary-eyed misty-eyed 3.63 186 | protrusion bewitchment 0.00 187 | preteen preadolescent 3.50 188 | bowling bowler 2.38 189 | aviation airscoop 1.75 190 | Golden_Delicious Aldi 0.75 191 | platelayer media_player 0.06 192 | casteless portrayer 0.25 193 | pretax_income gross_salary 3.31 194 | understood gotchu 3.81 195 | payment reward 2.63 196 | security immobilizer 2.25 197 | dysentery paratyphoid 2.56 198 | BitTorrent peer-to-peer 2.69 199 | beleaguering besieging 3.56 200 | Christiane_Amanpour Hala_Gorani 2.44 201 | infection inflection 0.25 202 | hard-boiled pugnacious 3.81 203 | flying_fox wurbagool 3.19 204 | bone pseudofracture 1.69 205 | intertitle title_card 3.75 206 | Athlete's_Foot tinea_pedis 3.63 207 | exchange replacement 2.63 208 | hyporesponsiveness hyponymy 0.13 209 | skimobile T-Mobile 0.06 210 | cannoli Haagen-Dazs 1.56 211 | white_horehound Marrubium_vulgare 4.00 212 | metapsychology schizophrenia 1.50 213 | mousse yellowcake 0.00 214 | schemer strategist 2.56 215 | first_milk colostrum 4.00 216 | genetics genethics 2.25 217 | FIVB AFib 0.06 218 | thing think 0.00 219 | AccuRay AccuWeather 0.25 220 | Anthony_Joshua Persepolis 0.69 221 | slipper daysleeper 0.25 222 | romance tweethearts 2.06 223 | matchlock arquebusier 1.25 224 | chickpea Chick-fil-A 0.44 225 | 3D_glasses one-dimensional 1.13 226 | AKG Mercedes-AMG 0.44 227 | minute_of_arc arcmin 3.88 228 | red_tape political_campaign 0.81 229 | postmortem_examination autopsy 4.00 230 | unformed uniformed 0.13 231 | Addison's_Disease corticosteroid 2.06 232 | roofball backyard 0.56 233 | Guasacaca gnocchi 1.25 234 | x-intercept inception 0.50 235 | Sotloff type-c 0.00 236 | Boko_Haram Taliban 2.81 237 | Nobelist novelist 0.44 238 | prognosticator foreteller 3.75 239 | editor Redditor 1.25 240 | director cinematography 2.00 241 | pike spontoon 3.25 242 | autopilot autophyte 0.25 243 | surrounding ecclesiastic 0.00 244 | abetalipoproteinemia familial_dysbetalipoproteinemia 2.44 245 | do-over leftover 0.25 246 | perpend stipend 0.00 247 | placidity acidity 0.06 248 | IOCCC ACM-ICPC 2.81 249 | ISIS terrorism 2.75 250 | undersuit understanding 0.06 251 | saccharose sucrose 3.94 252 | DVB-H mobile_TV 2.75 253 | science-fiction sci-fi 4.00 254 | swan seaduck 2.81 255 | plane sheet 0.75 256 | amusement_park hypercoaster 2.25 257 | ATI Nvidia 3.06 258 | choice election 2.13 259 | little_bee-eater Merops_pusillus 4.00 260 | b-day present 2.00 261 | Kilroy-Silk Nick_Clegg 2.50 262 | orogeny progeny 0.19 263 | Yun-Fat fatness 0.00 264 | ballistic_missile combust 1.31 265 | slave hierodule 3.13 266 | combination union 2.88 267 | 2mro tomorrow 4.00 268 | pestis pestilence 4.00 269 | CUDA parallelize 2.13 270 | Kurdish Turkey 2.50 271 | syllable lemma 2.13 272 | Stephen_Hawking hawk 0.06 273 | Under_Armour armor 0.13 274 | DNA transactivator 1.56 275 | review IrfanView 0.19 276 | dialler modem 1.75 277 | lukewarmness unattractiveness 0.81 278 | thoughtful die-hard 0.13 279 | doorcase door_frame 3.94 280 | judge court 2.25 281 | boutta boat 0.00 282 | algebraist eigenvalue 1.69 283 | biting_point clutch 2.19 284 | Sam_Mraovich Ben_and_Arthur 2.00 285 | 0.5 half 4.00 286 | fast-forward tape 1.81 287 | 7-Zip C++ 1.38 288 | HD high-definition 4.00 289 | entity corporation 2.00 290 | flag tricolor 2.81 291 | collaborator cooperator 3.25 292 | Douglas Isle_of_Man 2.44 293 | farsightedness hyperopia 3.25 294 | halomethane oscilloscope 0.56 295 | coincineration waste_treatment 2.63 296 | NGC_4622 Ursa_Major 2.00 297 | fake_news Fox_News 1.56 298 | eye_of_the_storm cyclone 2.31 299 | astronomy finderscope 1.75 300 | phototransmutation photodisintegration 4.00 301 | nonmonogamy consanguinity 0.44 302 | microfiche photocopy 1.63 303 | Commodore_64 microcomputer 2.44 304 | billhook plough 2.13 305 | AirBake A-bike 0.13 306 | transplant autograft 2.56 307 | thylakoid photosynthesis 2.06 308 | gnuplot Plotinus 0.00 309 | delayingly slothfully 2.75 310 | LAMO lambda 0.00 311 | payment note 1.63 312 | EAGGF market 1.25 313 | Champions_League Champlain_Sea 0.00 314 | physiopathology medical_diagnosis 1.75 315 | for_sure fasho 4.00 316 | autocrime truck 1.06 317 | geometry cylindric 1.69 318 | 4sho surely 4.00 319 | disomy intragenic 1.81 320 | revelatory apocalyptical 3.88 321 | pain neckache 2.56 322 | imperfectness imperfection 3.69 323 | electrolytic_polishing electropolishing 3.94 324 | Brexit Fatuzzo 0.56 325 | Visual_C .NET_Framework 2.31 326 | microwave All-Clad 1.75 327 | stubbornly single-mindedly 2.94 328 | smdh smfh 3.75 329 | njoy killjoy 1.50 330 | cool swagg 2.56 331 | combat belligerence 2.25 332 | Bushihr Donald_Horne 0.00 333 | cheap low-budget 3.50 334 | Bitcoin exchange_rate 1.69 335 | impossible far-fetched 3.44 336 | nonnative nonnegative 0.06 337 | reduction overeducation 0.06 338 | vanilla pastrycook 1.69 339 | protein CIITA 1.94 340 | descensus prolapse 4.00 341 | microsyringe hyperfunction 0.25 342 | washbasin handbasin 3.88 343 | geneflow gene_migration 4.00 344 | knowingness cognizance 3.94 345 | predominance prepotency 3.75 346 | stellar_nucleosynthesis nitrogen-14 2.19 347 | Tefal T-Fal 4.00 348 | real_estate Millcraft 2.00 349 | snow_leopard Summer_snowflake 0.19 350 | flysheet tent 2.50 351 | feeing feeling 0.06 352 | Snowblood Japan 1.44 353 | infinity alligation 1.00 354 | wanna-be Kardashians 0.38 355 | film_production filmmaking 3.56 356 | cartshed bloodshed 0.06 357 | hypocrisy pretense 3.06 358 | zoonosis tepoxalin 1.56 359 | Shelby_GT350 Shelby_Farms 0.19 360 | drop-off rip-off 0.19 361 | mode fashion 3.38 362 | semivowel aspirate 2.44 363 | Ferguson Stamford_Bridge 1.44 364 | Seven_foot growth_hormone 1.94 365 | down-to-earth practical 3.50 366 | textual_matter concordance 2.06 367 | Phenelzine XXXXX_syndrome 0.69 368 | sqrt square_root 4.00 369 | sunsuit beach 1.75 370 | epidiascope epidioscope 4.00 371 | Kile kite 0.06 372 | leaf_roller leaflet 0.31 373 | Cyber-Shot Coolpix 2.81 374 | voidness emptyness 3.63 375 | shot close-up 2.75 376 | Chad Yemen 2.13 377 | hygiene autoflush 1.31 378 | currency concurrency 0.13 379 | Deathstalker Wikipedia 0.00 380 | thaumasite silicate_mineral 3.06 381 | must-see interesting 3.06 382 | cycloheximide Streptomyces_bacteria 2.25 383 | 25 twenty-five 4.00 384 | ceramic pottery 2.75 385 | constipation constitutionalisation 0.00 386 | protagonist deuteragonist 2.81 387 | overt undisclosed 1.88 388 | Java_DB J2SE 2.63 389 | underclothes underlinen 2.81 390 | dentinogenesis_imperfecta itch 0.31 391 | Democracy_Player Miro 3.81 392 | handpan belieber 0.25 393 | Google_Earth 3D_imagery 2.00 394 | bigot 4got 0.13 395 | bankruptcy failure 1.94 396 | dreadnought battleship 3.06 397 | noisemonger electric_guitar 1.81 398 | leucosis Bird_flu 2.31 399 | Autodesk_Revit 3D_Studio_Max 2.88 400 | supremacy omnipotence 3.00 401 | letting rental 3.63 402 | entryphone door_phone 3.88 403 | inharmonious architecture 0.81 404 | hugger-mugger disorganized 3.75 405 | organelle endosymbiotic 1.44 406 | wrathfully self-righteously 1.38 407 | altimeter all-time 0.00 408 | WWII Fishbed 1.31 409 | Kobani Bale 0.00 410 | shit shxt 3.94 411 | fishplate template 0.06 412 | extraterrestrial_being hypothetical_creature 2.56 413 | applicant application 2.31 414 | lemmatization treebank 1.63 415 | imperishingness easygoingness 0.00 416 | Valentine's_Day lovee 2.00 417 | mozzarella pizzamaker 1.56 418 | cop copilot 0.38 419 | clinic triclinic 0.06 420 | isotope dioxygen 1.31 421 | Al-Jazeera Mehdi_Hasan 1.94 422 | bedgown bedrobe 2.63 423 | stepsize stepfather 0.06 424 | concise one-liner 2.44 425 | human_herpesvirus_4 Epstein-Barr 4.00 426 | leathercrafter leatherworker 3.69 427 | oceanic_trench sea_floor 2.50 428 | brainless sheepish 2.75 429 | missed_call so-called 0.13 430 | metamorphose transmute 3.81 431 | bood_clot agammaglobulinemia 1.63 432 | man-at-arms traitorous 0.19 433 | fringe_benefit bonus 3.00 434 | headcollar hair_color 0.00 435 | madrier plank 3.19 436 | Boeing_747 Stratojet 2.75 437 | teratospermia teratozoospermia 4.00 438 | tariqa tariqah 4.00 439 | seriously foreal 3.75 440 | insidiously harmfully 3.50 441 | pasteurization cowkeeper 0.81 442 | vindictively revengefully 3.94 443 | scornful contemptuous 3.94 444 | jawfish streamwater 1.19 445 | mollusc parapodia 1.63 446 | cheapjack amberjack 0.06 447 | dual_screen off-screen 1.00 448 | Regosol prioritarianism 0.00 449 | end tail 3.00 450 | DVD Blu-ray 3.19 451 | Gorilla_Glass scratch-resistant 2.31 452 | pacemaker lacemaker 0.19 453 | starless starkness 0.06 454 | foul-mouthed foul-spoken 4.00 455 | snow_lotus Saussurea 3.69 456 | inadvertence oversight 3.38 457 | mark grade 3.75 458 | Wristwatch smartwatch 2.88 459 | dessertspoon vanilla 1.38 460 | Christstollen flour 1.75 461 | massive astronomical 3.38 462 | sweeeet sweet 3.94 463 | Alcohol_withdrawal_syndrome Whiskey_fits 3.31 464 | kicksled sledge 2.88 465 | errbody embody 0.25 466 | bluethroat nightingale 3.06 467 | proportional_tax capitation 2.69 468 | children dollmaking 1.31 469 | about bouta 3.94 470 | fairness unbiasedness 3.69 471 | PiO President-in-Office 4.00 472 | greyishness oil_paint 0.75 473 | dunno idek 3.69 474 | In-N-Out_Burger Jamie_Oliver 1.81 475 | rail minecart 2.00 476 | parameter hyperparameter 3.00 477 | undercover self-indulgent 0.00 478 | expounding anlysis 2.75 479 | ontologize democratize 0.50 480 | Woods Phil_Mickelson 3.00 481 | Nutella Belgian_Congo 0.06 482 | transalpine pretense 0.00 483 | latewood fastfood 0.00 484 | skater pool_skating 2.31 485 | ctenophore comb_jelly 3.94 486 | actinomycin_D infertility 0.75 487 | oilbird beak 2.31 488 | magnet gyromagnetic 2.25 489 | jaw-dropping eyedropper 0.06 490 | foot base 1.69 491 | coalitionist casern 0.38 492 | fundamentalism intergovernmentalism 0.63 493 | angle angleworm 0.25 494 | relaxedly cold-bloodedly 2.06 495 | thought-provoking provocative 1.63 496 | midquel sequel 3.13 497 | Orient_Heights Constitution_beach 2.31 498 | engine Shovelhead 2.56 499 | opopanax opobalsam 3.94 500 | agriculture monoculture 2.50 501 | cowberry lingonberry 3.75 502 | Schwartz-Jampel lathosterol 0.63 503 | movie must-see 1.13 504 | nephoscope meteorologist 1.63 505 | dripstone cave 1.56 506 | disinvestment subtropical 0.00 507 | promise promiscuous 0.06 508 | puncture tricycle 1.25 509 | AirDrop airdome 0.00 510 | univocal unquestionable 3.19 511 | maximization maxilliped 0.06 512 | Hindooism Hinduism 3.94 513 | co-prime Amazon_Prime 0.00 514 | liberty censorship 1.69 515 | seater fruiteater 0.00 516 | Smorgasburg food_market 2.75 517 | hydrohalic_acid dehydrohalogenation 1.94 518 | fibroplasia angioplasty 1.94 519 | prefinancing investment 2.50 520 | loool ctfu 2.94 521 | Brooklyn Brklyn 4.00 522 | decomposition biowaste 2.06 523 | snapshot clapshot 0.06 524 | Mohammed_Emwazi Jihadi_John 3.88 525 | gap opening 3.19 526 | extramarital unmentionable 0.75 527 | omniscience discovery 1.25 528 | VoIP interconnectedness 0.88 529 | offering contribution 2.88 530 | tweet retweet 3.25 531 | commodious hand-held 0.44 532 | lmbo jajajajaja 3.63 533 | autosuggestion autocomplete 3.44 534 | horsecart throatlatch 1.75 535 | coin coinsurance 0.31 536 | boatman logboat 1.75 537 | Roman_Church Inquisition 2.06 538 | puddingwife plumcake 0.25 539 | carbinolamine hemiaminal 3.88 540 | grimy imy 0.06 541 | defense_mechanism anxiety 1.44 542 | lymphocyte T-cell 3.00 543 | oneirocritic dream 2.06 544 | medallist Emirates_Stadium 0.81 545 | bottle can 2.50 546 | sowing plantlet 1.81 547 | Buxus defoliator 1.25 548 | Seoul Yo-jong 1.00 549 | strive foreclose 0.31 550 | IE4 Internet_Explorer_4 4.00 551 | blitz attack 3.19 552 | praecipe praecipuum 0.13 553 | fundholder construction 0.44 554 | foodstuff feedingstuff 2.88 555 | filtrate first-rate 0.00 556 | ITV2 ITV_Two 4.00 557 | photophone homophone 0.25 558 | wrinkle Early_aging 1.81 559 | water_bearer Aquarius 3.81 560 | novelist sub-plot 1.63 561 | fruiterer fruit 2.00 562 | electrodiagnosis ecosocialist 0.06 563 | dukedom princedom 2.75 564 | thought_process thought-provoking 1.94 565 | draftswoman watercolor_painting 1.75 566 | census nosecount 4.00 567 | Mathematica differential_equation 2.06 568 | soldier footguard 2.94 569 | Fish_and_chips Nando's 1.38 570 | Scooby-Doo Shaun_the_Sheep 2.75 571 | post-apocalyptic postage 0.00 572 | pragmatics stemmatics 1.50 573 | locomotion zoospore 0.25 574 | transaction cyberbank 1.75 575 | iCloud cyberlocker 2.69 576 | Ramadan Al_Ramadi 0.44 577 | erythroleukemia blood_cancer 3.06 578 | vestibular_system Ototoxicity 1.81 579 | miniguide tourist 1.69 580 | miniskirt microskirt 3.31 581 | all-star legendary 2.44 582 | X-Files science_fiction_drama 2.56 583 | Adobe_AfterEffects Canon_EOS_6D 1.63 584 | responsibility respondee 0.50 585 | surgery therapeutical 1.56 586 | Fiat_500 venice 0.69 587 | K-PAX Kevin_Spacey 1.94 588 | overcurious inquisitive 3.06 589 | Top_Gear Isuzu_Vehicross 1.19 590 | European_Parliament Oomen-Ruijten 1.81 591 | June JunB 0.06 592 | PyeongChang_2018 Richard_Seymour 0.81 593 | Apple monopolist 0.94 594 | 2nite 2mrw 2.38 595 | biological_function teleosemantics 2.19 596 | single-handedly independently 2.69 597 | trail_and_error trialogue 0.06 598 | Fire_TV Snapchat 0.94 599 | trilogy mini-series 2.44 600 | Hale-Bopp Thomas_Bopp 2.00 601 | groundcrew airport 1.88 602 | roadster musclebike 3.06 603 | corncutter cornsilk 1.50 604 | greengrocery grocery 3.06 605 | Galaxy_S8 Samsung_S8 3.63 606 | early precociously 2.38 607 | PGA_Tour Steven_O'Hara 1.94 608 | all-nighter deadline 1.50 609 | favism broad_bean 1.69 610 | Blind_Freddy window_blind 0.06 611 | soon sooon 3.94 612 | religion secularist 2.00 613 | down feather 2.88 614 | mini-USB minium 0.06 615 | anamorphosis replica 1.31 616 | wheel cyclometer 1.50 617 | macrocosmic Big_Bang 2.13 618 | raincoat rainjacket 3.88 619 | penthouse premises 1.88 620 | pigswill swill 3.81 621 | fifth top 0.81 622 | tubocurarine anesthetic 1.38 623 | C# C++ 3.25 624 | reburial tomb 2.19 625 | gluttonously voraciously 3.19 626 | Cambridge_University Anglia_Ruskin 2.88 627 | New_York Mnhttn 2.75 628 | stage_fright diffidence 2.44 629 | Chelsea_tractor Blvd 1.06 630 | offprint printer 1.44 631 | Inkscape vector_graphics 2.19 632 | hashtag insta 1.69 633 | English_Channel History_Channel 0.13 634 | hankering handling 0.25 635 | double-faults Wimbledon 1.75 636 | BMW omw 0.06 637 | nutmeg mace 2.56 638 | blithesome light-hearted 3.94 639 | spiritualist autographic 0.00 640 | frizzen flintlock_firearm 2.19 641 | work_permit Blue_Card 2.94 642 | slow-cooker crock-pot 3.81 643 | doggo invisible 2.94 644 | irritate antagonize 2.81 645 | archer pathfinder 1.38 646 | Ossessione Italian_neorealism 1.69 647 | piscary containership 0.50 648 | unhygienically lip-smackingly 0.38 649 | tissue abiotrophy 1.81 650 | swordmaker weapon 2.06 651 | hyperproliferation cell 2.19 652 | Milk_spots Chris_Milk 0.06 653 | wallhack Xbox 1.31 654 | Keras Titan_Xp 1.44 655 | Tandoori_chicken LeCreuset 1.00 656 | whole basically 0.25 657 | shapeless amorphous 3.94 658 | rule convention 3.00 659 | cotransfection siRNA 2.13 660 | multiplayer Supersonic_Warriors 1.88 661 | -------------------------------------------------------------------------------- /evaluation/wordsim/data/mturk_771.txt: -------------------------------------------------------------------------------- 1 | 0 access gateway 3.791666667 2 | 1 account explanation 2 3 | 2 account invoice 3.75 4 | 3 account statement 3.681818182 5 | 4 acoustic remedy 1.227272727 6 | 5 acrylic cloth 2.739130435 7 | 6 action adjustment 2 8 | 7 action entrance 1.583333333 9 | 8 activity event 4.083333333 10 | 9 activity music 2.681818182 11 | 10 activity skiing 3.45 12 | 11 addition segment 2.5 13 | 12 adhesive glue 4.608695652 14 | 13 adult dentist 2.47826087 15 | 14 adult doctor 2.782608696 16 | 15 afternoon substance 1 17 | 16 age childhood 3.782608696 18 | 17 agency army 2.916666667 19 | 18 agency office 3.857142857 20 | 19 agency police 3.19047619 21 | 20 agent spy 4 22 | 21 agreement contract 4.476190476 23 | 22 aim purpose 4.363636364 24 | 23 aircraft balloon 2.869565217 25 | 24 aircraft yacht 2.434782609 26 | 25 alarm horn 3.458333333 27 | 26 alarm press 2 28 | 27 algorithm search 1.863636364 29 | 28 alien stranger 3.428571429 30 | 29 alloy metal 3.954545455 31 | 30 alphabet script 3.5 32 | 31 aluminum oxygen 1.608695652 33 | 32 amount distance 1.958333333 34 | 33 amount number 4.136363636 35 | 34 amount season 1.434782609 36 | 35 amusement athletics 2.6 37 | 36 amusement play 4.041666667 38 | 37 amusement procedure 1.454545455 39 | 38 anatomy creation 2.565217391 40 | 39 animal flora 2 41 | 40 animal worm 2.72 42 | 41 ankle joint 4.227272727 43 | 42 anniversary birthday 3.727272727 44 | 43 answer plea 2.44 45 | 44 apparel dress 4.227272727 46 | 45 appearance shadow 2.52173913 47 | 46 apple bank 1.125 48 | 47 apple orange 3.47826087 49 | 48 apple pod 2.043478261 50 | 49 appliance dryer 3.333333333 51 | 50 appliance refrigerator 4.227272727 52 | 51 approach swing 1.652173913 53 | 52 approval encouragement 2.863636364 54 | 53 approving interaction 2 55 | 54 arc rainbow 3.347826087 56 | 55 architecture engineering 3.25 57 | 56 area patio 2.545454545 58 | 57 area region 4.318181818 59 | 58 argument evidence 3.045454545 60 | 59 argument indication 1.772727273 61 | 60 arm arrow 2.230769231 62 | 61 arm missile 2.772727273 63 | 62 armor mail 1.913043478 64 | 63 army legion 3.285714286 65 | 64 aroma fragrance 4.681818182 66 | 65 aroma smell 4.19047619 67 | 66 arrangement blizzard 1.25 68 | 67 arrangement room 2.380952381 69 | 68 article girl 1.208333333 70 | 69 artillery gun 3.541666667 71 | 70 aspen maple 2.791666667 72 | 71 ass donkey 4.85 73 | 72 assembly crowd 3.363636364 74 | 73 assets capital 4.090909091 75 | 74 assets credit 3.47826087 76 | 75 assets income 4.038461538 77 | 76 association organization 4.362318841 78 | 77 athlete participant 3.458333333 79 | 78 athletics racing 3.826086957 80 | 79 athletics swimming 3.45 81 | 80 attitude notice 1.486486486 82 | 81 attitude study 1.88 83 | 82 attorney lawyer 4.681818182 84 | 83 attraction pressure 1.565217391 85 | 84 attraction quality 2.347826087 86 | 85 attribute condition 2.576923077 87 | 86 average time 2.380952381 88 | 87 baby computer 1.24 89 | 88 bail bond 3.086956522 90 | 89 bait instrument 2.090909091 91 | 90 bait pump 1.208333333 92 | 91 bakery work 2.541666667 93 | 92 bakery workplace 3 94 | 93 balance gauge 2.6 95 | 94 ball egg 1.727272727 96 | 95 ball nut 1.727272727 97 | 96 ball poker 2.165217391 98 | 97 ball sphere 4.142857143 99 | 98 band circle 2.954545455 100 | 99 bar needle 1.772727273 101 | 100 bar rod 4.047619048 102 | 101 barrel gallon 3.153846154 103 | 102 base club 1.619047619 104 | 103 base stock 2.476190476 105 | 104 baseball softball 3.230769231 106 | 105 basin vessel 4.076923077 107 | 106 basketball hockey 2.782608696 108 | 107 basketball squash 2.833333333 109 | 108 bathroom chamber 2.681818182 110 | 109 battle fight 4.583333333 111 | 110 bay mere 1.583333333 112 | 111 beach chain 1.047619048 113 | 112 beach ridge 2.260869565 114 | 113 beam column 2.961538462 115 | 114 beat meter 1.92 116 | 115 beat rhythm 4.363636364 117 | 116 bed layer 2.136363636 118 | 117 bedroom cell 2.136363636 119 | 118 bedroom construction 2.208333333 120 | 119 bee insect 4.043478261 121 | 120 beef cattle 4.173913043 122 | 121 beef meat 4.619047619 123 | 122 beginner novice 4.166666667 124 | 123 behavior purpose 2.304347826 125 | 124 belief magic 2.565217391 126 | 125 belief purpose 2.333333333 127 | 126 bench seat 4.428571429 128 | 127 bend curve 4.541666667 129 | 128 berry citrus 3 130 | 129 bill invoice 4.588235294 131 | 130 billboard structure 3 132 | 131 bin box 3.956521739 133 | 132 bin cup 2.217391304 134 | 133 binary star 1.863636364 135 | 134 bird creature 3.458333333 136 | 135 bird solid 1.090909091 137 | 136 birth modification 1.434782609 138 | 137 bishop priest 4.269230769 139 | 138 bit tool 2.708333333 140 | 139 bite breakfast 2.869565217 141 | 140 bite taste 3.636363636 142 | 141 black juvenile 1.238095238 143 | 142 blade projector 1.434782609 144 | 143 blanket sleeve 2.111111111 145 | 144 blizzard rash 1.307692308 146 | 145 blow contact 1.695652174 147 | 146 blue red 3.272727273 148 | 147 board commission 3.173913043 149 | 148 boat ferry 4.083333333 150 | 149 boat vessel 3.208333333 151 | 150 body trunk 3.260869565 152 | 151 bond security 3.904761905 153 | 152 bone skull 4.272727273 154 | 153 book encyclopedia 3.904761905 155 | 154 booklet reference 3.380952381 156 | 155 boot kick 3.173913043 157 | 156 boot punch 1.75 158 | 157 bottom sole 3.347826087 159 | 158 boy male 4.52173913 160 | 159 boy rover 2 161 | 160 brace stand 2.913043478 162 | 161 brain head 4.173913043 163 | 162 brake click 1.761904762 164 | 163 branch department 4.043478261 165 | 164 brand knife 1.681818182 166 | 165 brand surname 2.318181818 167 | 166 brandy liquor 4.541666667 168 | 167 brass executive 1.913043478 169 | 168 bread bun 4.304347826 170 | 169 break dash 2.916666667 171 | 170 break insert 2.041666667 172 | 171 brick cement 3.625 173 | 172 brick strip 1.541666667 174 | 173 broadcast packet 1.727272727 175 | 174 brochure publication 3.875 176 | 175 brochure reference 2.95 177 | 176 brother member 2.583333333 178 | 177 brush implement 2.47826087 179 | 178 budget fund 4 180 | 179 buffer cache 2.409090909 181 | 180 bug child 1.276595745 182 | 181 build flesh 1.833333333 183 | 182 building cafe 3.142857143 184 | 183 bulb onion 2.92 185 | 184 bulletin news 4.666666667 186 | 185 bunny server 1.25 187 | 186 burn hurt 3.730769231 188 | 187 burning flame 4.48 189 | 188 burning punishment 2.52 190 | 189 burst transformation 1.952380952 191 | 190 business disposition 1.583333333 192 | 191 business railway 2.541666667 193 | 192 butter stick 2.375 194 | 193 butterfly comma 1.130434783 195 | 194 cab taxi 4.476190476 196 | 195 cake pie 3.545454545 197 | 196 calendar circulation 1.695652174 198 | 197 calendar system 1.916666667 199 | 198 call meeting 2.727272727 200 | 199 call statement 2.125 201 | 200 campaign operation 2.826086957 202 | 201 candy sweet 4.510638298 203 | 202 cap covering 3.681818182 204 | 203 capital seat 2.32 205 | 204 captain officer 4 206 | 205 card plastic 2.391304348 207 | 206 carriage coach 3.708333333 208 | 207 cart wagon 4.375 209 | 208 cartoon wit 2.666666667 210 | 209 case grip 2.12 211 | 210 case luggage 3.68 212 | 211 case tin 2.6 213 | 212 cast fishing 2.6 214 | 213 cat vision 1.431818182 215 | 214 category flavor 2.428571429 216 | 215 cave formation 2.347826087 217 | 216 ceiling overhead 3.52 218 | 217 cement glue 3 219 | 218 center loss 1.304347826 220 | 219 century decade 3.434782609 221 | 220 century period 3.769230769 222 | 221 century temperature 1.130434783 223 | 222 certificate study 3.136363636 224 | 223 certificate wave 1.145833333 225 | 224 chair furniture 3.869565217 226 | 225 chair rocker 2.826086957 227 | 226 chance opportunity 4.590909091 228 | 227 chance probability 4.619047619 229 | 228 chandler retailer 2.130434783 230 | 229 change decrease 3 231 | 230 change move 3.434782609 232 | 231 channel sound 3.2 233 | 232 character vision 1.666666667 234 | 233 charge damage 2.375 235 | 234 charge tax 4.086956522 236 | 235 check draft 3.952380952 237 | 236 cheese food 4 238 | 237 chemical salt 3.590909091 239 | 238 chess duty 1.166666667 240 | 239 chick hen 3.863636364 241 | 240 chicken poultry 4.217391304 242 | 241 chief guru 3.904761905 243 | 242 child kid 4.857142857 244 | 243 chin feature 2.875 245 | 244 choice option 4.590909091 246 | 245 chuck jaw 2.090909091 247 | 246 church temple 4.347826087 248 | 247 circle oval 3.347826087 249 | 248 citrus orange 4.208333333 250 | 249 climb mount 4.166666667 251 | 250 clock timer 4.416666667 252 | 251 cloth satin 3.857142857 253 | 252 cloud energy 1.727272727 254 | 253 club society 3.869565217 255 | 254 coach trainer 4.6 256 | 255 coat newspaper 1.088888889 257 | 256 coat roof 2.304347826 258 | 257 code software 3.52 259 | 258 coffee meeting 2.602941176 260 | 259 coin currency 4.571428571 261 | 260 collapse shock 3.043478261 262 | 261 collection packet 2.772727273 263 | 262 collision smash 3.904761905 264 | 263 color purple 4.090909091 265 | 264 color quality 2.407407407 266 | 265 coloring yellow 3.791666667 267 | 266 comfort relief 3.857142857 268 | 267 commander editor 2.041666667 269 | 268 commerce transport 2.739130435 270 | 269 commitment guarantee 3.791666667 271 | 270 communication message 4.083333333 272 | 271 communication statement 3.391304348 273 | 272 communication tune 2.409090909 274 | 273 community province 3.391304348 275 | 274 community territory 2.739130435 276 | 275 company distributor 3.523809524 277 | 276 company establishment 3.833333333 278 | 277 comparison scrutiny 2.727272727 279 | 278 compound salt 3.045454545 280 | 279 conclusion result 4.523809524 281 | 280 condition status 4.090909091 282 | 281 conditions weather 3.666666667 283 | 282 congress sex 1.818181818 284 | 283 connection keyboard 2.4 285 | 284 conservation traveling 1.717391304 286 | 285 construction window 2.761904762 287 | 286 continent ground 2.791666667 288 | 287 control driving 3.714285714 289 | 288 cook printer 1.347826087 290 | 289 cooking cuisine 4.25 291 | 290 copy image 3.25 292 | 291 copy work 3.125 293 | 292 cord pick 1.590909091 294 | 293 cord yarn 2.761904762 295 | 294 corridor hall 4.041666667 296 | 295 corruption house 1.255319149 297 | 296 cost postage 3.086956522 298 | 297 cost reward 2.80952381 299 | 298 couch lounge 3.347826087 300 | 299 count number 4.166666667 301 | 300 counter furniture 2.3 302 | 301 country playground 1.791666667 303 | 302 course starter 2.666666667 304 | 303 court drawer 1.272727273 305 | 304 court tribunal 3.791666667 306 | 305 cousin relation 4.043478261 307 | 306 cousin relative 4.5 308 | 307 cover feather 2.44 309 | 308 cover hair 2.65 310 | 309 cover sleeve 3.260869565 311 | 310 covering skin 3.35 312 | 311 crack hole 3.04 313 | 312 craft trade 3.458333333 314 | 313 creation stitch 2.68 315 | 314 creativity vision 3.181818182 316 | 315 credit sum 3 317 | 316 creek stream 3.88 318 | 317 crew society 2 319 | 318 crew unit 3.590909091 320 | 319 crop plant 4.083333333 321 | 320 crow jay 2.217391304 322 | 321 crown place 1.541666667 323 | 322 crush push 2.181818182 324 | 323 cube dice 3.925925926 325 | 324 cup handbag 1.318181818 326 | 325 cup son 1.086956522 327 | 326 current flow 3.761904762 328 | 327 curve rainbow 3.44 329 | 328 customers mission 1.641304348 330 | 329 cut meat 2.904761905 331 | 330 cutter knife 4.458333333 332 | 331 cylinder pen 2.227272727 333 | 332 dad parent 4.545454545 334 | 333 daisy flower 4.5 335 | 334 damage terms 1.761904762 336 | 335 danger status 1.904761905 337 | 336 dash sprint 3.375 338 | 337 dashboard protection 2.173913043 339 | 338 database list 4.08 340 | 339 daughter girl 4.04 341 | 340 day shoes 1.288888889 342 | 341 deal hand 3.041666667 343 | 342 debt deficit 3.458333333 344 | 343 debt loan 4.347826087 345 | 344 debt possession 2.481481481 346 | 345 decision option 2.909090909 347 | 346 decrease increase 3.541666667 348 | 347 deep ocean 3.636363636 349 | 348 deficit trust 1.576923077 350 | 349 degree quantity 2.681818182 351 | 350 degree style 1.652173913 352 | 351 delivery distribution 3.590909091 353 | 352 department division 4.625 354 | 353 descent fall 3.304347826 355 | 354 desert tract 2.2 356 | 355 desire feeling 4.227272727 357 | 356 desk table 4.172413793 358 | 357 determination discovery 2.608695652 359 | 358 determination selection 2.75 360 | 359 development exploitation 2.2 361 | 360 device drum 2.583333333 362 | 361 devil satan 4.782608696 363 | 362 dialogue play 3.44 364 | 363 diamond parcel 1.217391304 365 | 364 digit toe 2.64 366 | 365 digit unit 3.681818182 367 | 366 dinner party 3.826086957 368 | 367 direction government 2.173913043 369 | 368 direction protocol 3.142857143 370 | 369 direction traveling 3.136363636 371 | 370 dirt sand 3.391304348 372 | 371 dirt soil 4 373 | 372 discharge spark 2.782608696 374 | 373 disease illness 4.739130435 375 | 374 display language 1.916666667 376 | 375 distance distribution 1.739130435 377 | 376 diversion skiing 1.826086957 378 | 377 dividend net 2.52 379 | 378 dividend profit 4.045454545 380 | 379 diving swim 3.96 381 | 380 dock herb 1.238095238 382 | 381 document report 3.826086957 383 | 382 dog fauna 2.666666667 384 | 383 domain land 2.523809524 385 | 384 door light 1.577777778 386 | 385 doubt ego 1.772727273 387 | 386 drama genre 2.714285714 388 | 387 draw finish 2.375 389 | 388 draw tie 2.958333333 390 | 389 drawer pan 1.818181818 391 | 390 dressing patch 2.523809524 392 | 391 dressing sauce 2.869565217 393 | 392 drill implement 2.666666667 394 | 393 drink slice 2.772727273 395 | 394 driver supporter 2.090909091 396 | 395 driver worker 3.1 397 | 396 drop serving 2.136363636 398 | 397 drug liquor 3.523809524 399 | 398 drug operator 1.416666667 400 | 399 drum piano 3.652173913 401 | 400 eagle hawk 3.833333333 402 | 401 ear organ 3.909090909 403 | 402 ease relaxation 4.5 404 | 403 ease rest 3.916666667 405 | 404 editing instrument 1.64 406 | 405 editor worker 3.125 407 | 406 eight movement 1.318181818 408 | 407 element iron 3.318181818 409 | 408 element mixture 3 410 | 409 element nickel 3.782608696 411 | 410 elevator lift 4.72 412 | 411 emission gum 1.25 413 | 412 employee server 3.318181818 414 | 413 endorsement signature 3.545454545 415 | 414 energy microwave 3.583333333 416 | 415 engineering technology 4.16 417 | 416 environment land 3.090909091 418 | 417 equipment recorder 3.391304348 419 | 418 equipment seat 2.136363636 420 | 419 establishment religion 2.173913043 421 | 420 event influence 1.76 422 | 421 event phenomenon 3.173913043 423 | 422 evidence format 1.476190476 424 | 423 evidence record 3.681818182 425 | 424 examination quiz 4.391304348 426 | 425 examiner tea 1.3 427 | 426 executive minister 3.095238095 428 | 427 executive organization 3.2 429 | 428 explanation theory 3.652173913 430 | 429 express mail 3.619047619 431 | 430 extract selection 3.086956522 432 | 431 eye organ 4.125 433 | 432 fabric lace 3.625 434 | 433 fabric sail 2.380952381 435 | 434 faith religion 4.227272727 436 | 435 fantasy recycling 1.125 437 | 436 farmer individual 2.545454545 438 | 437 fault mistake 4.541666667 439 | 438 fauna toy 1.304347826 440 | 439 feature side 1.833333333 441 | 440 feedback flow 2.666666667 442 | 441 feeling hope 3.48 443 | 442 female slave 2.125 444 | 443 female woman 4.96 445 | 444 fiction literature 3.590909091 446 | 445 fiction romance 2.476190476 447 | 446 field science 3 448 | 447 field yard 3.8 449 | 448 fight separation 2.681818182 450 | 449 figure stamp 1.88 451 | 450 film movie 4.912280702 452 | 451 find implementation 2.045454545 453 | 452 find occurrence 2.739130435 454 | 453 finger toe 3.76 455 | 454 flag iris 1.708333333 456 | 455 flame reaction 1.954545455 457 | 456 flash lightning 3.96 458 | 457 flat housing 3.96 459 | 458 flavor variety 3.318181818 460 | 459 flight trip 3.772727273 461 | 460 floor level 3.333333333 462 | 461 floor porch 2.739130435 463 | 462 flora plant 4.384615385 464 | 463 flora violet 3.347826087 465 | 464 flour garlic 2.4 466 | 465 flower pink 2.739130435 467 | 466 flute wind 2.826086957 468 | 467 flyer justice 1.181818182 469 | 468 food sausage 3.913043478 470 | 469 foot recognition 1.431372549 471 | 470 football rugby 3.208333333 472 | 471 forecast message 2.5 473 | 472 forest ground 2.681818182 474 | 473 form type 3.909090909 475 | 474 format packaging 2.47826087 476 | 475 format style 4.086956522 477 | 476 foundation support 3.913043478 478 | 477 fox wolf 3.090909091 479 | 478 framework grill 3.428571429 480 | 479 freeze frost 4 481 | 480 friend individual 2.791666667 482 | 481 front school 1.48 483 | 482 front surface 3 484 | 483 fruit seed 3.818181818 485 | 484 fuel gasoline 4.48 486 | 485 fuel nutrition 2.428571429 487 | 486 fund store 1.826086957 488 | 487 funds interest 3.375 489 | 488 furniture table 4.181818182 490 | 489 gamble kitty 1.636363636 491 | 490 gamble pyramid 1.380952381 492 | 491 game tennis 4.166666667 493 | 492 garbage rubbish 4.520833333 494 | 493 garden plantation 3.55 495 | 494 garlic meal 3.083333333 496 | 495 garment sweater 3.7 497 | 496 garment tie 3.636363636 498 | 497 gas hydrogen 4.090909091 499 | 498 gas neon 3.869565217 500 | 499 gathering parade 3.625 501 | 500 gauge meter 3.863636364 502 | 501 gear mechanism 4.090909091 503 | 502 gem quartz 3.227272727 504 | 503 gender sex 4.434782609 505 | 504 gender size 1.571428571 506 | 505 genre prose 2.363636364 507 | 506 glass tub 2.88 508 | 507 glove wear 3.416666667 509 | 508 goal objective 4.590909091 510 | 509 golf hockey 2.727272727 511 | 510 good sheet 1.458333333 512 | 511 governor mayor 3.666666667 513 | 512 governor politician 4.125 514 | 513 graphic image 4.318181818 515 | 514 grass universe 1.76 516 | 515 gray property 1.5 517 | 516 grip handle 4.047619048 518 | 517 grip hold 4.391304348 519 | 518 growth process 3.291666667 520 | 519 guarantee warranty 4.230769231 521 | 520 guess universe 1.243243243 522 | 521 gulf ocean 3.227272727 523 | 522 hack machine 2.458333333 524 | 523 hamburger nutrition 3.041666667 525 | 524 hand script 3.260869565 526 | 525 happening surprise 2.708333333 527 | 526 head question 1.24 528 | 527 head secretary 1.909090909 529 | 528 health welfare 3.5 530 | 529 hearing proceedings 3.130434783 531 | 530 heart space 1.583333333 532 | 531 heart ticker 3.181818182 533 | 532 heat temperature 4.25 534 | 533 height infinite 2.434782609 535 | 534 helmet scale 1.380952381 536 | 535 help support 4.619047619 537 | 536 help supporter 3.833333333 538 | 537 heritage loss 1.681818182 539 | 538 highway street 3.545454545 540 | 539 highway trail 2.863636364 541 | 540 hit tourist 1.44 542 | 541 hole opening 3.76 543 | 542 holiday vacation 4.619047619 544 | 543 hood protection 2.913043478 545 | 544 hood shelter 2.571428571 546 | 545 horn tail 2.692307692 547 | 546 horse mount 2.875 548 | 547 hose pipe 4 549 | 548 housing vault 2 550 | 549 icon representation 3.380952381 551 | 550 implement stick 2.5 552 | 551 impulse motive 2.869565217 553 | 552 impulse urge 4.5 554 | 553 information target 1.95 555 | 554 ink liquid 3.64 556 | 555 installation zoo 1.44 557 | 556 institution prison 2.565217391 558 | 557 instruction lesson 3.8 559 | 558 instruction teaching 4.523809524 560 | 559 instrumentation perfume 1.208333333 561 | 560 instrumentation rod 2.863636364 562 | 561 intensity quiet 2.181818182 563 | 562 interest lien 2.541666667 564 | 563 intervention treatment 2.363636364 565 | 564 inventory listing 3.380952381 566 | 565 investment tomato 1.155555556 567 | 566 jail nick 1.782608696 568 | 567 jail prison 4.739130435 569 | 568 jaw lens 1.260869565 570 | 569 join union 4 571 | 570 joke message 2.636363636 572 | 571 journey travel 4.8 573 | 572 judgment sense 3.541666667 574 | 573 jumper sweater 3.347826087 575 | 574 jury school 1.476190476 576 | 575 justice official 3.043478261 577 | 576 kiss sweet 3 578 | 577 kitchen toilet 2.545454545 579 | 578 knight prince 3.375 580 | 579 knowledge revolution 2.2 581 | 580 knowledge taste 1.869565217 582 | 581 lake stream 4.130434783 583 | 582 lamb young 3.285714286 584 | 583 language tongue 3.652173913 585 | 584 latex rubber 3.913043478 586 | 585 law personnel 1.772727273 587 | 586 layer region 2.458333333 588 | 587 layer snow 2.434782609 589 | 588 leader politician 4.238095238 590 | 589 lesson teaching 4.136363636 591 | 590 letter text 3.84 592 | 591 level stage 4 593 | 592 license permission 4.19047619 594 | 593 license security 3.130434783 595 | 594 lien share 2.136363636 596 | 595 life story 3 597 | 596 line occupation 2.260869565 598 | 597 line plane 2.833333333 599 | 598 line queue 4.541666667 600 | 599 lineup roll 2.958333333 601 | 600 link union 3.583333333 602 | 601 lion tiger 3.565217391 603 | 602 literature poem 3.76 604 | 603 load weight 4.043478261 605 | 604 location property 3.347826087 606 | 605 loss possession 2.954545455 607 | 606 low shoulder 1.387755102 608 | 607 lyric printer 1.244444444 609 | 608 magnolia maple 2.68 610 | 609 male man 4.619047619 611 | 610 man soldier 3.875 612 | 611 manager trainer 3.347826087 613 | 612 map representation 3.434782609 614 | 613 map sewing 1.347826087 615 | 614 map video 1.695652174 616 | 615 maple tree 3.833333333 617 | 616 mark print 2.954545455 618 | 617 mark slash 3.304347826 619 | 618 mask roof 1.782608696 620 | 619 mate relation 3.434782609 621 | 620 matter text 3.269230769 622 | 621 matter verse 1.913043478 623 | 622 mayor water 1.113636364 624 | 623 meal mixture 2.272727273 625 | 624 meal rice 4 626 | 625 measure money 2.565217391 627 | 626 measure twist 1.590909091 628 | 627 meat solid 2.5 629 | 628 melody music 4.5 630 | 629 memory operation 2.043478261 631 | 630 metal zinc 3.956521739 632 | 631 meter radar 2.954545455 633 | 632 microwave radiation 3.454545455 634 | 633 middle scene 1.4 635 | 634 minute quantity 2.608695652 636 | 635 mode scale 2.173913043 637 | 636 modification surprise 1.714285714 638 | 637 moment thief 1.242424242 639 | 638 mortal mother 2.25 640 | 639 mortal visitor 1.923076923 641 | 640 motion snowboarding 2.363636364 642 | 641 motion step 3.142857143 643 | 642 motive reason 3.909090909 644 | 643 motorcycle tank 2.043478261 645 | 644 mount volcano 3.238095238 646 | 645 mouth opening 3.304347826 647 | 646 mouth trap 2.333333333 648 | 647 murphy potato 1.173913043 649 | 648 museum store 2.695652174 650 | 649 music print 1.958333333 651 | 650 musician performer 3.952380952 652 | 651 needle sharp 3.954545455 653 | 652 noise trouble 3.15 654 | 653 note obligation 2.166666667 655 | 654 notebook product 2.217391304 656 | 655 notebook production 1.666666667 657 | 656 novel story 4.363636364 658 | 657 oak tree 4.576923077 659 | 658 objective target 4.086956522 660 | 659 occasion second 1.75 661 | 660 occupation place 1.956521739 662 | 661 occurrence tsunami 2.47826087 663 | 662 ocean pond 3.545454545 664 | 663 office outlet 2.142857143 665 | 664 office situation 1.24 666 | 665 onion topic 1.183673469 667 | 666 operation processing 3.409090909 668 | 667 operative spy 2.739130435 669 | 668 opinion papers 1.72 670 | 669 opinion sentiment 3.136363636 671 | 670 origin root 4.333333333 672 | 671 outlet shop 3.565217391 673 | 672 oxygen substance 2.565217391 674 | 673 package software 3.4 675 | 674 padding tower 1.863636364 676 | 675 painting picture 4.186046512 677 | 676 papers security 2.4 678 | 677 papers ticket 3.181818182 679 | 678 parcel region 2 680 | 679 park stadium 3.333333333 681 | 680 passage quotation 3.666666667 682 | 681 patch spot 3.181818182 683 | 682 payment spending 3.708333333 684 | 683 permission tolerance 2.04 685 | 684 person technician 3.458333333 686 | 685 phantom shadow 3.227272727 687 | 686 piazza square 2.2 688 | 687 piece sail 1.333333333 689 | 688 pinnacle tower 3.083333333 690 | 689 place position 4.230769231 691 | 690 plane sheet 2.130434783 692 | 691 plane tool 2.304347826 693 | 692 play turn 2.681818182 694 | 693 plot strategy 2.9 695 | 694 point second 1.826086957 696 | 695 point site 2.96 697 | 696 point spot 4.2 698 | 697 polyester textile 4.285714286 699 | 698 position view 3 700 | 699 postage rate 3.086956522 701 | 700 postage signal 1.48 702 | 701 power skill 3.16 703 | 702 prayer request 3.36 704 | 703 problem trouble 4.608695652 705 | 704 process rule 2.5 706 | 705 process tail 1.12 707 | 706 product wear 2.041666667 708 | 707 property texture 2 709 | 708 protection roof 3.708333333 710 | 709 protocol rule 4.090909091 711 | 710 publication textbook 3.428571429 712 | 711 pumpkin vine 2.409090909 713 | 712 pupil student 4.523809524 714 | 713 pyramid speculation 1.7 715 | 714 query question 4.739130435 716 | 715 quiet silence 4.909090909 717 | 716 racer taxi 2.19047619 718 | 717 radio receiver 3.807692308 719 | 718 rain storm 3.958333333 720 | 719 ray shark 2.545454545 721 | 720 recreation skiing 3.090909091 722 | 721 red wine 3.4 723 | 722 report study 3.875 724 | 723 representative voice 2.875 725 | 724 ring water 1.333333333 726 | 725 rise travel 1.608695652 727 | 726 rock stone 4.476190476 728 | 727 roll toast 2.826086957 729 | 728 root stem 3.666666667 730 | 729 rub wipe 4.227272727 731 | 730 rubber stuff 2.1 732 | 731 rugby soccer 3.260869565 733 | 732 sail sheet 2.083333333 734 | 733 scandal week 1.086956522 735 | 734 science shelter 1.136363636 736 | 735 score success 3.208333333 737 | 736 season summer 4.045454545 738 | 737 season winter 4.347826087 739 | 738 second time 4.217391304 740 | 739 seminar sweet 1.202898551 741 | 740 sex stance 1.476190476 742 | 741 share stake 3.208333333 743 | 742 shelter tent 4.25 744 | 743 shelter wind 2.641791045 745 | 744 shirt tiger 1.038461538 746 | 745 side slope 3.08 747 | 746 sight vision 4.818181818 748 | 747 simulation theory 2.227272727 749 | 748 skull tooth 2.590909091 750 | 749 slash stroke 3.25 751 | 750 smash success 2.791666667 752 | 751 snap touch 2.038461538 753 | 752 software writing 2.347826087 754 | 753 song vocal 3.857142857 755 | 754 soup spaghetti 3 756 | 755 soup sweet 2.454545455 757 | 756 speech word 4.045454545 758 | 757 steel weapon 3.380952381 759 | 758 step travel 2.086956522 760 | 759 step walk 4.173913043 761 | 760 storm weather 4.083333333 762 | 761 straight stretch 3.291666667 763 | 762 sun toy 1.25 764 | 763 tank tub 3.52173913 765 | 764 taxpayer window 1.21875 766 | 765 throne toilet 1.956521739 767 | 766 ticket writing 2.375 768 | 767 victory watch 1.553191489 769 | 768 washer worker 2.909090909 770 | 769 wife woman 3.884615385 771 | 770 workplace workshop 4.04 772 | -------------------------------------------------------------------------------- /evaluation/wordsim/data/rel353.txt: -------------------------------------------------------------------------------- 1 | 0 computer keyboard 7.62 2 | 1 Jerusalem Israel 8.46 3 | 2 planet galaxy 8.11 4 | 3 canyon landscape 7.53 5 | 4 OPEC country 5.63 6 | 5 day summer 3.94 7 | 6 day dawn 7.53 8 | 7 country citizen 7.31 9 | 8 planet people 5.75 10 | 9 environment ecology 8.81 11 | 10 Maradona football 8.62 12 | 11 OPEC oil 8.59 13 | 12 money bank 8.50 14 | 13 computer software 8.50 15 | 14 law lawyer 8.38 16 | 15 weather forecast 8.34 17 | 16 network hardware 8.31 18 | 17 nature environment 8.31 19 | 18 FBI investigation 8.31 20 | 19 money wealth 8.27 21 | 20 psychology Freud 8.21 22 | 21 news report 8.16 23 | 22 war troops 8.13 24 | 23 physics proton 8.12 25 | 24 bank money 8.12 26 | 25 stock market 8.08 27 | 26 planet constellation 8.06 28 | 27 credit card 8.06 29 | 28 hotel reservation 8.03 30 | 29 closet clothes 8.00 31 | 30 soap opera 7.94 32 | 31 planet astronomer 7.94 33 | 32 planet space 7.92 34 | 33 movie theater 7.92 35 | 34 treatment recovery 7.91 36 | 35 baby mother 7.85 37 | 36 money deposit 7.73 38 | 37 television film 7.72 39 | 38 psychology mind 7.69 40 | 39 game team 7.69 41 | 40 admission ticket 7.69 42 | 41 Jerusalem Palestinian 7.65 43 | 42 Arafat terror 7.65 44 | 43 boxing round 7.61 45 | 44 computer internet 7.58 46 | 45 money property 7.57 47 | 46 tennis racket 7.56 48 | 47 telephone communication 7.50 49 | 48 currency market 7.50 50 | 49 psychology cognition 7.48 51 | 50 seafood sea 7.47 52 | 51 book paper 7.46 53 | 52 book library 7.46 54 | 53 psychology depression 7.42 55 | 54 fighting defeating 7.41 56 | 55 movie star 7.38 57 | 56 hundred percent 7.38 58 | 57 dollar profit 7.38 59 | 58 money possession 7.29 60 | 59 cup drink 7.25 61 | 60 psychology health 7.23 62 | 61 summer drought 7.16 63 | 62 investor earning 7.13 64 | 63 company stock 7.08 65 | 64 stroke hospital 7.03 66 | 65 liability insurance 7.03 67 | 66 game victory 7.03 68 | 67 psychology anxiety 7.00 69 | 68 game defeat 6.97 70 | 69 FBI fingerprint 6.94 71 | 70 money withdrawal 6.88 72 | 71 psychology fear 6.85 73 | 72 drug abuse 6.85 74 | 73 concert virtuoso 6.81 75 | 74 computer laboratory 6.78 76 | 75 love sex 6.77 77 | 76 problem challenge 6.75 78 | 77 movie critic 6.73 79 | 78 Arafat peace 6.73 80 | 79 bed closet 6.72 81 | 80 lawyer evidence 6.69 82 | 81 fertility egg 6.69 83 | 82 precedent law 6.65 84 | 83 minister party 6.63 85 | 84 psychology clinic 6.58 86 | 85 cup coffee 6.58 87 | 86 water seepage 6.56 88 | 87 government crisis 6.56 89 | 88 space world 6.53 90 | 89 dividend calculation 6.48 91 | 90 victim emergency 6.47 92 | 91 luxury car 6.47 93 | 92 tool implement 6.46 94 | 93 competition price 6.44 95 | 94 psychology doctor 6.42 96 | 95 gender equality 6.41 97 | 96 listing category 6.38 98 | 97 video archive 6.34 99 | 98 oil stock 6.34 100 | 99 governor office 6.34 101 | 100 discovery space 6.34 102 | 101 record number 6.31 103 | 102 brother monk 6.27 104 | 103 production crew 6.25 105 | 104 nature man 6.25 106 | 105 family planning 6.25 107 | 106 disaster area 6.25 108 | 107 food preparation 6.22 109 | 108 preservation world 6.19 110 | 109 movie popcorn 6.19 111 | 110 lover quarrel 6.19 112 | 111 game series 6.19 113 | 112 dollar loss 6.09 114 | 113 weapon secret 6.06 115 | 114 shower flood 6.03 116 | 115 registration arrangement 6.00 117 | 116 arrival hotel 6.00 118 | 117 announcement warning 6.00 119 | 118 game round 5.97 120 | 119 baseball season 5.97 121 | 120 drink mouth 5.96 122 | 121 life lesson 5.94 123 | 122 grocery money 5.94 124 | 123 energy crisis 5.94 125 | 124 reason criterion 5.91 126 | 125 equipment maker 5.91 127 | 126 cup liquid 5.90 128 | 127 deployment withdrawal 5.88 129 | 128 tiger zoo 5.87 130 | 129 journey car 5.85 131 | 130 money laundering 5.65 132 | 131 summer nature 5.63 133 | 132 decoration valor 5.63 134 | 133 Mars scientist 5.63 135 | 134 alcohol chemistry 5.54 136 | 135 disability death 5.47 137 | 136 change attitude 5.44 138 | 137 arrangement accommodation 5.41 139 | 138 territory surface 5.34 140 | 139 size prominence 5.31 141 | 140 exhibit memorabilia 5.31 142 | 141 credit information 5.31 143 | 142 territory kilometer 5.28 144 | 143 death row 5.25 145 | 144 doctor liability 5.19 146 | 145 impartiality interest 5.16 147 | 146 energy laboratory 5.09 148 | 147 secretary senate 5.06 149 | 148 death inmate 5.03 150 | 149 monk oracle 5.00 151 | 150 cup food 5.00 152 | 151 journal association 4.97 153 | 152 street children 4.94 154 | 153 car flight 4.94 155 | 154 space chemistry 4.88 156 | 155 situation conclusion 4.81 157 | 156 word similarity 4.75 158 | 157 peace plan 4.75 159 | 158 consumer energy 4.75 160 | 159 ministry culture 4.69 161 | 160 smart student 4.62 162 | 161 investigation effort 4.59 163 | 162 image surface 4.56 164 | 163 life term 4.50 165 | 164 start match 4.47 166 | 165 computer news 4.47 167 | 166 board recommendation 4.47 168 | 167 lad brother 4.46 169 | 168 observation architecture 4.38 170 | 169 coast hill 4.38 171 | 170 deployment departure 4.25 172 | 171 benchmark index 4.25 173 | 172 attempt peace 4.25 174 | 173 consumer confidence 4.13 175 | 174 start year 4.06 176 | 175 focus life 4.06 177 | 176 development issue 3.97 178 | 177 theater history 3.91 179 | 178 situation isolation 3.88 180 | 179 profit warning 3.88 181 | 180 media trading 3.88 182 | 181 chance credibility 3.88 183 | 182 precedent information 3.85 184 | 183 architecture century 3.78 185 | 184 population development 3.75 186 | 185 stock live 3.73 187 | 186 peace atmosphere 3.69 188 | 187 morality marriage 3.69 189 | 188 minority peace 3.69 190 | 189 atmosphere landscape 3.69 191 | 190 report gain 3.63 192 | 191 music project 3.63 193 | 192 seven series 3.56 194 | 193 experience music 3.47 195 | 194 school center 3.44 196 | 195 five month 3.38 197 | 196 announcement production 3.38 198 | 197 morality importance 3.31 199 | 198 money operation 3.31 200 | 199 delay news 3.31 201 | 200 governor interview 3.25 202 | 201 practice institution 3.19 203 | 202 century nation 3.16 204 | 203 coast forest 3.15 205 | 204 shore woodland 3.08 206 | 205 drink car 3.04 207 | 206 president medal 3.00 208 | 207 prejudice recognition 3.00 209 | 208 viewer serial 2.97 210 | 209 peace insurance 2.94 211 | 210 Mars water 2.94 212 | 211 media gain 2.88 213 | 212 precedent cognition 2.81 214 | 213 announcement effort 2.75 215 | 214 line insurance 2.69 216 | 215 crane implement 2.69 217 | 216 drink mother 2.65 218 | 217 opera industry 2.63 219 | 218 volunteer motto 2.56 220 | 219 listing proximity 2.56 221 | 220 precedent collection 2.50 222 | 221 cup article 2.40 223 | 222 sign recess 2.38 224 | 223 problem airport 2.38 225 | 224 reason hypertension 2.31 226 | 225 direction combination 2.25 227 | 226 Wednesday news 2.22 228 | 227 glass magician 2.08 229 | 228 cemetery woodland 2.08 230 | 229 possibility girl 1.94 231 | 230 cup substance 1.92 232 | 231 forest graveyard 1.85 233 | 232 stock egg 1.81 234 | 233 month hotel 1.81 235 | 234 energy secretary 1.81 236 | 235 precedent group 1.77 237 | 236 production hike 1.75 238 | 237 stock phone 1.62 239 | 238 holy sex 1.62 240 | 239 stock CD 1.31 241 | 240 drink ear 1.31 242 | 241 delay racism 1.19 243 | 242 stock life 0.92 244 | 243 stock jaguar 0.92 245 | 244 monk slave 0.92 246 | 245 lad wizard 0.92 247 | 246 sugar approach 0.88 248 | 247 rooster voyage 0.62 249 | 248 noon string 0.54 250 | 249 chord smile 0.54 251 | 250 professor cucumber 0.31 252 | 251 king cabbage 0.23 253 | -------------------------------------------------------------------------------- /evaluation/wordsim/data/simLex.txt: -------------------------------------------------------------------------------- 1 | 0 old new 0.0 2 | 1 smart intelligent 9.76923076923077 3 | 2 hard difficult 9.692307692307692 4 | 3 happy cheerful 9.307692307692308 5 | 4 hard easy 0.0 6 | 5 fast rapid 9.846153846153847 7 | 6 happy glad 9.384615384615385 8 | 7 short long 0.0 9 | 8 stupid dumb 9.076923076923077 10 | 9 weird strange 9.692307692307692 11 | 10 wide narrow 0.0 12 | 11 bad awful 9.384615384615385 13 | 12 easy difficult 0.0 14 | 13 bad terrible 9.461538461538462 15 | 14 hard simple 0.0 16 | 15 smart dumb 0.0 17 | 16 insane crazy 9.615384615384615 18 | 17 happy mad 0.0 19 | 18 large huge 9.692307692307692 20 | 19 hard tough 9.153846153846153 21 | 20 new fresh 8.615384615384615 22 | 21 sharp dull 0.0 23 | 22 quick rapid 9.846153846153847 24 | 23 dumb foolish 8.923076923076923 25 | 24 wonderful terrific 9.615384615384615 26 | 25 strange odd 9.615384615384615 27 | 26 happy angry 0.0 28 | 27 narrow broad 0.0 29 | 28 simple easy 9.615384615384615 30 | 29 old fresh 0.0 31 | 30 apparent obvious 9.076923076923077 32 | 31 inexpensive cheap 9.692307692307692 33 | 32 nice generous 7.3076923076923075 34 | 33 weird normal 0.0 35 | 34 weird odd 9.76923076923077 36 | 35 bad immoral 7.769230769230769 37 | 36 sad funny 0.0 38 | 37 wonderful great 9.538461538461538 39 | 38 guilty ashamed 7.3076923076923075 40 | 39 beautiful wonderful 7.153846153846154 41 | 40 confident sure 9.384615384615385 42 | 41 dumb dense 7.0 43 | 42 large big 10.0 44 | 43 nice cruel 0.0 45 | 44 impatient anxious 8.23076923076923 46 | 45 big broad 8.384615384615385 47 | 46 strong proud 4.769230769230769 48 | 47 unnecessary necessary 0.0 49 | 48 restless young 0.38461538461538464 50 | 49 dumb intelligent 0.0 51 | 50 bad great 0.0 52 | 51 difficult simple 0.0 53 | 52 necessary important 9.076923076923077 54 | 53 bad terrific 0.0 55 | 54 mad glad 0.0 56 | 55 honest guilty 0.0 57 | 56 easy tough 0.0 58 | 57 easy flexible 3.6923076923076925 59 | 58 certain sure 9.692307692307692 60 | 59 essential necessary 9.846153846153847 61 | 60 different normal 0.0 62 | 61 sly clever 8.307692307692308 63 | 62 crucial important 9.615384615384615 64 | 63 harsh cruel 8.461538461538462 65 | 64 childish foolish 6.461538461538462 66 | 65 scarce rare 9.615384615384615 67 | 66 friendly generous 4.538461538461538 68 | 67 fragile frigid 0.6153846153846154 69 | 68 long narrow 2.1538461538461537 70 | 69 big heavy 4.615384615384615 71 | 70 rough frigid 0.07692307692307693 72 | 71 bizarre strange 9.692307692307692 73 | 72 illegal immoral 4.6923076923076925 74 | 73 bad guilty 4.230769230769231 75 | 74 modern ancient 0.0 76 | 75 new ancient 0.0 77 | 76 dull funny 0.0 78 | 77 happy young 0.07692307692307693 79 | 78 easy big 0.0 80 | 79 great awful 0.0 81 | 80 tiny huge 0.0 82 | 81 polite proper 7.923076923076923 83 | 82 modest ashamed 1.9230769230769231 84 | 83 exotic rare 8.307692307692308 85 | 84 dumb clever 0.0 86 | 85 delightful wonderful 9.23076923076923 87 | 86 noticeable obvious 9.153846153846153 88 | 87 afraid anxious 6.923076923076923 89 | 88 formal proper 8.076923076923077 90 | 89 dreary dull 6.923076923076923 91 | 90 delightful cheerful 8.461538461538462 92 | 91 unhappy mad 6.384615384615385 93 | 92 sad terrible 6.0 94 | 93 sick crazy 2.3846153846153846 95 | 94 violent angry 5.923076923076923 96 | 95 laden heavy 8.0 97 | 96 dirty cheap 0.7692307692307693 98 | 97 elastic flexible 7.923076923076923 99 | 98 hard dense 5.3076923076923075 100 | 99 recent new 8.461538461538462 101 | 100 bold proud 1.3076923076923077 102 | 101 sly strange 0.38461538461538464 103 | 102 strange sly 0.15384615384615385 104 | 103 dumb rare 0.0 105 | 104 sly tough 0.0 106 | 105 terrific mad 0.0 107 | 106 modest flexible 0.0 108 | 107 fresh wide 0.0 109 | 108 huge dumb 0.0 110 | 109 large flexible 0.0 111 | 110 dirty narrow 0.0 112 | 111 wife husband 0.8461538461538461 113 | 112 book text 2.5384615384615383 114 | 113 groom bride 0.8461538461538461 115 | 114 night day 0.0 116 | 115 south north 0.07692307692307693 117 | 116 plane airport 1.6153846153846154 118 | 117 uncle aunt 0.7692307692307693 119 | 118 horse mare 7.153846153846154 120 | 119 bottom top 0.07692307692307693 121 | 120 friend buddy 9.538461538461538 122 | 121 student pupil 9.615384615384615 123 | 122 world globe 6.3076923076923075 124 | 123 leg arm 0.15384615384615385 125 | 124 plane jet 6.076923076923077 126 | 125 woman man 0.0 127 | 126 horse colt 7.6923076923076925 128 | 127 actress actor 3.076923076923077 129 | 128 teacher instructor 9.538461538461538 130 | 129 movie film 9.23076923076923 131 | 130 bird hawk 6.769230769230769 132 | 131 word dictionary 2.0 133 | 132 money salary 4.615384615384615 134 | 133 dog cat 0.0 135 | 134 area region 9.0 136 | 135 navy army 1.6923076923076923 137 | 136 book literature 3.5384615384615383 138 | 137 clothes closet 1.1538461538461537 139 | 138 sunset sunrise 0.07692307692307693 140 | 139 child adult 0.0 141 | 140 cow cattle 6.769230769230769 142 | 141 book story 2.1538461538461537 143 | 142 winter summer 0.15384615384615385 144 | 143 taxi cab 9.615384615384615 145 | 144 tree maple 6.3076923076923075 146 | 145 bed bedroom 0.7692307692307693 147 | 146 roof ceiling 4.538461538461538 148 | 147 disease infection 7.076923076923077 149 | 148 arm shoulder 3.4615384615384617 150 | 149 sheep lamb 8.23076923076923 151 | 150 lady gentleman 1.6923076923076923 152 | 151 boat anchor 2.923076923076923 153 | 152 priest monk 5.3076923076923075 154 | 153 toe finger 3.3846153846153846 155 | 154 river stream 4.384615384615385 156 | 155 anger fury 7.923076923076923 157 | 156 date calendar 4.230769230769231 158 | 157 sea ocean 6.230769230769231 159 | 158 second minute 2.6923076923076925 160 | 159 hand thumb 3.3076923076923075 161 | 160 wood log 4.538461538461538 162 | 161 mud dirt 4.461538461538462 163 | 162 hallway corridor 8.538461538461538 164 | 163 way manner 6.230769230769231 165 | 164 mouse cat 1.7692307692307692 166 | 165 cop sheriff 7.461538461538462 167 | 166 death burial 4.846153846153846 168 | 167 music melody 6.461538461538462 169 | 168 beer alcohol 5.923076923076923 170 | 169 mouth lip 5.615384615384615 171 | 170 storm hurricane 6.846153846153846 172 | 171 tax income 2.769230769230769 173 | 172 flower violet 5.0 174 | 173 paper cardboard 3.5384615384615383 175 | 174 floor ceiling 1.6153846153846154 176 | 175 beach seashore 6.461538461538462 177 | 176 rod curtain 2.3076923076923075 178 | 177 hound fox 2.0 179 | 178 street alley 4.153846153846154 180 | 179 boat deck 2.230769230769231 181 | 180 car horn 2.3076923076923075 182 | 181 friend guest 3.8461538461538463 183 | 182 employer employee 3.4615384615384617 184 | 183 hand wrist 2.769230769230769 185 | 184 ball cannon 3.6153846153846154 186 | 185 alcohol brandy 5.153846153846154 187 | 186 victory triumph 7.615384615384615 188 | 187 telephone booth 1.9230769230769231 189 | 188 door doorway 5.538461538461538 190 | 189 motel inn 5.846153846153846 191 | 190 clothes cloth 3.6923076923076925 192 | 191 steak meat 6.846153846153846 193 | 192 nail thumb 2.6153846153846154 194 | 193 band orchestra 5.615384615384615 195 | 194 book bible 5.153846153846154 196 | 195 business industry 5.6923076923076925 197 | 196 winter season 4.0 198 | 197 decade century 2.6923076923076925 199 | 198 alcohol gin 5.538461538461538 200 | 199 hat coat 2.230769230769231 201 | 200 window door 1.5384615384615385 202 | 201 arm wrist 2.4615384615384617 203 | 202 house apartment 5.461538461538462 204 | 203 glass crystal 4.769230769230769 205 | 204 wine brandy 3.8461538461538463 206 | 205 creator maker 9.615384615384615 207 | 206 dinner breakfast 1.2307692307692308 208 | 207 arm muscle 2.076923076923077 209 | 208 bubble suds 5.3076923076923075 210 | 209 bread flour 1.6153846153846154 211 | 210 death tragedy 4.461538461538462 212 | 211 absence presence 0.07692307692307693 213 | 212 gun cannon 4.0 214 | 213 grass blade 1.6153846153846154 215 | 214 ball basket 1.4615384615384615 216 | 215 hose garden 0.7692307692307693 217 | 216 boy kid 5.0 218 | 217 church choir 1.4615384615384615 219 | 218 clothes drawer 1.0 220 | 219 tower bell 1.0 221 | 220 father parent 6.384615384615385 222 | 221 school grade 2.1538461538461537 223 | 222 parent adult 4.230769230769231 224 | 223 bar jail 1.0 225 | 224 car highway 1.0769230769230769 226 | 225 dictionary definition 2.6923076923076925 227 | 226 door cellar 0.6153846153846154 228 | 227 army legion 6.230769230769231 229 | 228 metal aluminum 4.923076923076923 230 | 229 chair bench 4.769230769230769 231 | 230 cloud fog 4.846153846153846 232 | 231 boy son 5.230769230769231 233 | 232 water ice 3.8461538461538463 234 | 233 bed blanket 2.0 235 | 234 attorney lawyer 9.461538461538462 236 | 235 area zone 9.153846153846153 237 | 236 business company 8.153846153846153 238 | 237 clothes fabric 3.5384615384615383 239 | 238 sweater jacket 4.230769230769231 240 | 239 money capital 5.615384615384615 241 | 240 hand foot 0.6923076923076923 242 | 241 alcohol cocktail 5.615384615384615 243 | 242 yard inch 1.6923076923076923 244 | 243 molecule atom 3.923076923076923 245 | 244 lens camera 2.769230769230769 246 | 245 meal dinner 6.6923076923076925 247 | 246 eye tear 1.3076923076923077 248 | 247 god devil 0.23076923076923078 249 | 248 loop belt 1.8461538461538463 250 | 249 rat mouse 4.6923076923076925 251 | 250 motor engine 8.538461538461538 252 | 251 car cab 3.8461538461538463 253 | 252 cat lion 3.1538461538461537 254 | 253 size magnitude 6.0 255 | 254 reality fantasy 0.23076923076923078 256 | 255 door gate 7.230769230769231 257 | 256 cat pet 2.6923076923076925 258 | 257 tin aluminum 1.7692307692307692 259 | 258 bone jaw 1.9230769230769231 260 | 259 cereal wheat 0.6923076923076923 261 | 260 house key 0.3076923076923077 262 | 261 blood flesh 0.8461538461538461 263 | 262 door corridor 0.23076923076923078 264 | 263 god spirit 2.769230769230769 265 | 264 capability competence 4.769230769230769 266 | 265 abundance plenty 7.538461538461538 267 | 266 sofa chair 2.769230769230769 268 | 267 wall brick 0.7692307692307693 269 | 268 horn drum 0.46153846153846156 270 | 269 organ liver 2.5384615384615383 271 | 270 strength might 3.6923076923076925 272 | 271 phrase word 0.6923076923076923 273 | 272 band parade 0.5384615384615384 274 | 273 stomach waist 1.3846153846153846 275 | 274 cloud storm 1.1538461538461537 276 | 275 joy pride 3.4615384615384617 277 | 276 noise rattle 2.769230769230769 278 | 277 rain mist 2.8461538461538463 279 | 278 beer beverage 4.384615384615385 280 | 279 man uncle 2.076923076923077 281 | 280 apple juice 0.38461538461538464 282 | 281 intelligence logic 3.6153846153846154 283 | 282 communication language 5.615384615384615 284 | 283 mink fur 0.46153846153846156 285 | 284 mob crowd 4.230769230769231 286 | 285 shore coast 6.384615384615385 287 | 286 wire cord 4.615384615384615 288 | 287 bird turkey 2.3076923076923075 289 | 288 bed crib 3.1538461538461537 290 | 289 competence ability 6.0 291 | 290 cloud haze 3.0 292 | 291 supper meal 3.923076923076923 293 | 292 bar cage 1.0 294 | 293 water salt 0.23076923076923078 295 | 294 sense intuition 6.3076923076923075 296 | 295 situation condition 5.538461538461538 297 | 296 crime theft 5.615384615384615 298 | 297 style fashion 5.538461538461538 299 | 298 boundary border 9.076923076923077 300 | 299 arm body 1.5384615384615385 301 | 300 boat car 1.3076923076923077 302 | 301 sandwich lunch 2.3846153846153846 303 | 302 bride princess 1.6923076923076923 304 | 303 heroine hero 6.846153846153846 305 | 304 car gauge 1.0769230769230769 306 | 305 insect bee 3.4615384615384617 307 | 306 crib cradle 6.769230769230769 308 | 307 animal person 0.9230769230769231 309 | 308 marijuana herb 3.076923076923077 310 | 309 bed hospital 1.8461538461538463 311 | 310 cheek tongue 1.6153846153846154 312 | 311 disc computer 1.6153846153846154 313 | 312 curve angle 2.3846153846153846 314 | 313 grass moss 2.5384615384615383 315 | 314 school law 1.6153846153846154 316 | 315 foot head 1.0769230769230769 317 | 316 mother guardian 4.076923076923077 318 | 317 orthodontist dentist 4.3076923076923075 319 | 318 alcohol whiskey 5.461538461538462 320 | 319 mouth tooth 2.1538461538461537 321 | 320 breakfast bacon 2.230769230769231 322 | 321 bathroom bedroom 1.4615384615384615 323 | 322 plate bowl 2.076923076923077 324 | 323 meat bacon 3.3846153846153846 325 | 324 air helium 1.7692307692307692 326 | 325 worker employer 1.0769230769230769 327 | 326 body chest 1.6923076923076923 328 | 327 son father 1.3076923076923077 329 | 328 heart surgery 1.1538461538461537 330 | 329 woman secretary 1.2307692307692308 331 | 330 man father 3.769230769230769 332 | 331 beach island 2.0 333 | 332 story topic 1.6153846153846154 334 | 333 game fun 2.230769230769231 335 | 334 weekend week 3.0 336 | 335 couple pair 6.846153846153846 337 | 336 woman wife 2.8461538461538463 338 | 337 sheep cattle 1.3846153846153846 339 | 338 purse bag 4.461538461538462 340 | 339 ceiling cathedral 1.6923076923076923 341 | 340 bean coffee 1.0769230769230769 342 | 341 wood paper 1.1538461538461537 343 | 342 top side 2.0 344 | 343 crime fraud 4.6923076923076925 345 | 344 pain harm 3.923076923076923 346 | 345 lover companion 3.6153846153846154 347 | 346 evening dusk 4.769230769230769 348 | 347 father daughter 1.1538461538461537 349 | 348 wine liquor 5.846153846153846 350 | 349 cow goat 1.2307692307692308 351 | 350 belief opinion 5.3076923076923075 352 | 351 reality illusion 1.0769230769230769 353 | 352 pact agreement 7.615384615384615 354 | 353 wealth poverty 1.0769230769230769 355 | 354 accident emergency 2.6153846153846154 356 | 355 battle conquest 2.923076923076923 357 | 356 friend teacher 1.6923076923076923 358 | 357 illness infection 4.461538461538462 359 | 358 game trick 1.6923076923076923 360 | 359 brother son 3.0 361 | 360 aunt nephew 1.5384615384615385 362 | 361 worker mechanic 2.8461538461538463 363 | 362 doctor orthodontist 3.076923076923077 364 | 363 oak maple 2.923076923076923 365 | 364 bee queen 1.9230769230769231 366 | 365 car bicycle 1.6923076923076923 367 | 366 goal quest 3.230769230769231 368 | 367 august month 3.769230769230769 369 | 368 army squad 4.0 370 | 369 cloud weather 3.1538461538461537 371 | 370 physician doctor 7.076923076923077 372 | 371 canyon valley 4.615384615384615 373 | 372 river valley 2.4615384615384617 374 | 373 sun sky 2.3076923076923075 375 | 374 target arrow 1.8461538461538463 376 | 375 chocolate pie 2.4615384615384617 377 | 376 circumstance situation 4.538461538461538 378 | 377 opinion choice 3.8461538461538463 379 | 378 rhythm melody 4.923076923076923 380 | 379 gut nerve 1.6923076923076923 381 | 380 day dawn 2.3076923076923075 382 | 381 cattle beef 3.3846153846153846 383 | 382 doctor professor 3.769230769230769 384 | 383 arm vein 2.3076923076923075 385 | 384 room bath 1.7692307692307692 386 | 385 corporation business 6.6923076923076925 387 | 386 fun football 2.5384615384615383 388 | 387 hill cliff 2.3076923076923075 389 | 388 bone ankle 2.3846153846153846 390 | 389 apple candy 0.9230769230769231 391 | 390 helper maid 2.923076923076923 392 | 391 leader manager 5.923076923076923 393 | 392 lemon tea 1.3846153846153846 394 | 393 bee ant 1.5384615384615385 395 | 394 basketball baseball 1.7692307692307692 396 | 395 rice bean 1.6923076923076923 397 | 396 bed furniture 2.3076923076923075 398 | 397 emotion passion 5.615384615384615 399 | 398 anarchy chaos 7.538461538461538 400 | 399 crime violation 5.076923076923077 401 | 400 machine engine 2.6153846153846154 402 | 401 beach sea 3.5384615384615383 403 | 402 alley bowl 0.6923076923076923 404 | 403 jar bottle 4.461538461538462 405 | 404 strength capability 3.769230769230769 406 | 405 seed mustard 0.7692307692307693 407 | 406 guitar drum 1.0 408 | 407 opinion idea 6.153846153846154 409 | 408 north west 0.3076923076923077 410 | 409 diet salad 0.9230769230769231 411 | 410 mother wife 2.230769230769231 412 | 411 dad mother 1.2307692307692308 413 | 412 captain sailor 4.6923076923076925 414 | 413 meter yard 3.4615384615384617 415 | 414 beer champagne 3.769230769230769 416 | 415 motor boat 0.7692307692307693 417 | 416 card bridge 0.38461538461538464 418 | 417 science psychology 3.5384615384615383 419 | 418 sinner saint 0.46153846153846156 420 | 419 destruction construction 0.3076923076923077 421 | 420 crowd bunch 5.230769230769231 422 | 421 beach reef 1.3846153846153846 423 | 422 man child 1.1538461538461537 424 | 423 bread cheese 0.6923076923076923 425 | 424 champion winner 7.153846153846154 426 | 425 celebration ceremony 6.461538461538462 427 | 426 menu order 4.076923076923077 428 | 427 king princess 1.4615384615384615 429 | 428 wealth prestige 3.4615384615384617 430 | 429 endurance strength 5.230769230769231 431 | 430 danger threat 7.153846153846154 432 | 431 god priest 0.6153846153846154 433 | 432 men fraternity 2.3846153846153846 434 | 433 buddy companion 6.461538461538462 435 | 434 teacher helper 4.0 436 | 435 body stomach 1.4615384615384615 437 | 436 tongue throat 1.3076923076923077 438 | 437 house carpet 0.5384615384615384 439 | 438 intelligence skill 3.923076923076923 440 | 439 journey conquest 2.076923076923077 441 | 440 god prey 0.6923076923076923 442 | 441 brother soul 0.38461538461538464 443 | 442 adversary opponent 8.76923076923077 444 | 443 death catastrophe 3.0 445 | 444 monster demon 5.615384615384615 446 | 445 day morning 2.1538461538461537 447 | 446 man victor 0.6923076923076923 448 | 447 friend guy 2.6153846153846154 449 | 448 song story 0.9230769230769231 450 | 449 ray sunshine 2.4615384615384617 451 | 450 guy stud 4.230769230769231 452 | 451 chicken rice 0.6153846153846154 453 | 452 box elevator 1.3846153846153846 454 | 453 butter potato 0.46153846153846156 455 | 454 apartment furniture 1.4615384615384615 456 | 455 lake swamp 4.3076923076923075 457 | 456 salad vinegar 1.2307692307692308 458 | 457 flower bulb 3.6153846153846154 459 | 458 cloud mist 5.538461538461538 460 | 459 driver pilot 6.461538461538462 461 | 460 sugar honey 4.923076923076923 462 | 461 body shoulder 2.3846153846153846 463 | 462 idea image 3.4615384615384617 464 | 463 father brother 1.3846153846153846 465 | 464 moon planet 3.0 466 | 465 ball costume 1.8461538461538463 467 | 466 rail fence 4.538461538461538 468 | 467 room bed 2.1538461538461537 469 | 468 flower bush 3.769230769230769 470 | 469 bone knee 3.1538461538461537 471 | 470 arm knee 1.1538461538461537 472 | 471 bottom side 0.6923076923076923 473 | 472 vessel vein 3.076923076923077 474 | 473 cat rabbit 0.46153846153846156 475 | 474 meat sandwich 2.5384615384615383 476 | 475 belief concept 5.3076923076923075 477 | 476 intelligence insight 5.923076923076923 478 | 477 attention interest 6.461538461538462 479 | 478 attitude confidence 4.3076923076923075 480 | 479 right justice 5.461538461538462 481 | 480 argument agreement 0.3076923076923077 482 | 481 depth magnitude 6.384615384615385 483 | 482 medium news 2.769230769230769 484 | 483 winner candidate 2.0 485 | 484 birthday date 3.076923076923077 486 | 485 fee payment 6.384615384615385 487 | 486 bible hymn 2.769230769230769 488 | 487 exit doorway 5.923076923076923 489 | 488 man sentry 2.4615384615384617 490 | 489 aisle hall 6.384615384615385 491 | 490 whiskey gin 4.615384615384615 492 | 491 blood marrow 1.2307692307692308 493 | 492 oil mink 1.0769230769230769 494 | 493 floor deck 6.461538461538462 495 | 494 roof floor 0.3076923076923077 496 | 495 door floor 0.3076923076923077 497 | 496 shoulder head 0.9230769230769231 498 | 497 wagon carriage 7.0 499 | 498 car carriage 5.0 500 | 499 elbow ankle 0.3076923076923077 501 | 500 wealth fame 3.769230769230769 502 | 501 sorrow shame 4.769230769230769 503 | 502 administration management 7.076923076923077 504 | 503 communication conversation 5.846153846153846 505 | 504 pollution atmosphere 2.5384615384615383 506 | 505 anatomy biology 4.461538461538462 507 | 506 college profession 2.4615384615384617 508 | 507 book topic 2.0 509 | 508 formula equation 6.923076923076923 510 | 509 book information 2.3076923076923075 511 | 510 boy partner 1.3076923076923077 512 | 511 sky universe 2.923076923076923 513 | 512 population people 6.3076923076923075 514 | 513 college class 4.769230769230769 515 | 514 chief mayor 4.846153846153846 516 | 515 rabbi minister 3.769230769230769 517 | 516 meter inch 2.6153846153846154 518 | 517 polyester cotton 1.9230769230769231 519 | 518 lawyer banker 1.0 520 | 519 violin instrument 5.384615384615385 521 | 520 camp cabin 2.8461538461538463 522 | 521 pot appliance 2.769230769230769 523 | 522 linen fabric 6.153846153846154 524 | 523 whiskey champagne 3.1538461538461537 525 | 524 girl child 4.769230769230769 526 | 525 cottage cabin 5.538461538461538 527 | 526 bird hen 5.461538461538462 528 | 527 racket noise 4.538461538461538 529 | 528 sunset evening 5.384615384615385 530 | 529 drizzle rain 9.153846153846153 531 | 530 adult baby 0.7692307692307693 532 | 531 charcoal coal 7.384615384615385 533 | 532 body spine 2.3846153846153846 534 | 533 head nail 0.9230769230769231 535 | 534 log timber 4.846153846153846 536 | 535 spoon cup 1.1538461538461537 537 | 536 body nerve 1.9230769230769231 538 | 537 man husband 3.5384615384615383 539 | 538 bone neck 2.1538461538461537 540 | 539 frustration anger 5.923076923076923 541 | 540 river sea 3.3076923076923075 542 | 541 task job 7.0 543 | 542 club society 5.769230769230769 544 | 543 reflection image 4.769230769230769 545 | 544 prince king 3.8461538461538463 546 | 545 snow weather 4.384615384615385 547 | 546 people party 2.3076923076923075 548 | 547 boy brother 3.6153846153846154 549 | 548 root grass 2.6923076923076925 550 | 549 brow eye 2.230769230769231 551 | 550 money pearl 1.6153846153846154 552 | 551 money diamond 2.3076923076923075 553 | 552 vehicle bus 5.461538461538462 554 | 553 cab bus 3.1538461538461537 555 | 554 house barn 3.3076923076923075 556 | 555 finger palm 1.4615384615384615 557 | 556 car bridge 1.3076923076923077 558 | 557 effort difficulty 3.5384615384615383 559 | 558 fact insight 3.3846153846153846 560 | 559 job management 2.8461538461538463 561 | 560 cancer sickness 5.0 562 | 561 word newspaper 3.230769230769231 563 | 562 composer writer 3.0 564 | 563 actor singer 2.769230769230769 565 | 564 shelter hut 4.846153846153846 566 | 565 bathroom kitchen 2.230769230769231 567 | 566 cabin hut 2.769230769230769 568 | 567 door kitchen 1.2307692307692308 569 | 568 value belief 4.0 570 | 569 wisdom intelligence 6.923076923076923 571 | 570 ignorance intelligence 1.0769230769230769 572 | 571 happiness luck 3.230769230769231 573 | 572 idea scheme 6.615384615384615 574 | 573 mood emotion 4.230769230769231 575 | 574 happiness peace 3.923076923076923 576 | 575 despair misery 5.3076923076923075 577 | 576 logic arithmetic 2.4615384615384617 578 | 577 denial confession 2.230769230769231 579 | 578 argument criticism 4.3076923076923075 580 | 579 aggression hostility 3.6923076923076925 581 | 580 hysteria confusion 2.923076923076923 582 | 581 chemistry theory 2.923076923076923 583 | 582 trial verdict 4.230769230769231 584 | 583 comfort safety 4.769230769230769 585 | 584 confidence self 3.769230769230769 586 | 585 vision perception 3.3846153846153846 587 | 586 era decade 4.076923076923077 588 | 587 biography fiction 2.5384615384615383 589 | 588 discussion argument 6.384615384615385 590 | 589 code symbol 3.4615384615384617 591 | 590 danger disease 3.076923076923077 592 | 591 accident catastrophe 6.3076923076923075 593 | 592 journey trip 6.153846153846154 594 | 593 activity movement 5.153846153846154 595 | 594 gossip news 3.0 596 | 595 father god 1.3076923076923077 597 | 596 action course 1.2307692307692308 598 | 597 fever illness 5.076923076923077 599 | 598 aviation flight 5.0 600 | 599 game action 2.769230769230769 601 | 600 molecule air 2.923076923076923 602 | 601 home state 2.1538461538461537 603 | 602 word literature 3.769230769230769 604 | 603 adult guardian 3.5384615384615383 605 | 604 newspaper information 3.076923076923077 606 | 605 communication television 2.769230769230769 607 | 606 cousin uncle 2.4615384615384617 608 | 607 author reader 1.6923076923076923 609 | 608 guy partner 2.5384615384615383 610 | 609 area corner 3.3846153846153846 611 | 610 ballad song 6.538461538461538 612 | 611 wall decoration 1.4615384615384615 613 | 612 word page 1.6153846153846154 614 | 613 nurse scientist 1.7692307692307692 615 | 614 politician president 5.923076923076923 616 | 615 president mayor 3.8461538461538463 617 | 616 book essay 2.6923076923076925 618 | 617 man warrior 3.1538461538461537 619 | 618 article journal 4.846153846153846 620 | 619 breakfast supper 3.230769230769231 621 | 620 crowd parade 2.8461538461538463 622 | 621 aisle hallway 5.923076923076923 623 | 622 teacher rabbi 3.3846153846153846 624 | 623 hip lip 2.076923076923077 625 | 624 book article 3.3076923076923075 626 | 625 room cell 3.1538461538461537 627 | 626 box booth 3.4615384615384617 628 | 627 daughter kid 5.0 629 | 628 limb leg 4.538461538461538 630 | 629 liver lung 2.3846153846153846 631 | 630 classroom hallway 2.3846153846153846 632 | 631 mountain ledge 2.0 633 | 632 car elevator 0.6153846153846154 634 | 633 bed couch 1.8461538461538463 635 | 634 clothes button 2.230769230769231 636 | 635 clothes coat 4.923076923076923 637 | 636 kidney organ 4.153846153846154 638 | 637 apple sauce 2.0 639 | 638 chicken steak 1.7692307692307692 640 | 639 car hose 0.6923076923076923 641 | 640 tobacco cigarette 7.230769230769231 642 | 641 student professor 2.0 643 | 642 baby daughter 3.923076923076923 644 | 643 pipe cigar 3.6153846153846154 645 | 644 milk juice 1.8461538461538463 646 | 645 box cigar 1.3846153846153846 647 | 646 apartment hotel 4.3076923076923075 648 | 647 cup cone 1.5384615384615385 649 | 648 horse ox 1.0769230769230769 650 | 649 throat nose 1.8461538461538463 651 | 650 bone teeth 3.3846153846153846 652 | 651 bone elbow 2.1538461538461537 653 | 652 bacon bean 1.0769230769230769 654 | 653 cup jar 3.230769230769231 655 | 654 proof fact 4.846153846153846 656 | 655 appointment engagement 5.076923076923077 657 | 656 birthday year 2.0 658 | 657 word clue 1.4615384615384615 659 | 658 author creator 6.461538461538462 660 | 659 atom carbon 2.6923076923076925 661 | 660 archbishop bishop 5.153846153846154 662 | 661 letter paragraph 2.076923076923077 663 | 662 page paragraph 1.8461538461538463 664 | 663 steeple chapel 4.769230769230769 665 | 664 muscle bone 2.1538461538461537 666 | 665 muscle tongue 2.5384615384615383 667 | 666 boy soldier 1.0 668 | 667 belly abdomen 7.230769230769231 669 | 668 guy girl 1.6923076923076923 670 | 669 bed chair 1.4615384615384615 671 | 670 clothes jacket 4.153846153846154 672 | 671 gun knife 1.8461538461538463 673 | 672 tin metal 3.6153846153846154 674 | 673 bottle container 3.1538461538461537 675 | 674 hen turkey 2.1538461538461537 676 | 675 meat bread 1.2307692307692308 677 | 676 arm bone 2.3846153846153846 678 | 677 neck spine 2.5384615384615383 679 | 678 apple lemon 1.3076923076923077 680 | 679 agony grief 4.923076923076923 681 | 680 assignment task 5.615384615384615 682 | 681 night dawn 1.0 683 | 682 dinner soup 1.6923076923076923 684 | 683 calf bull 4.0 685 | 684 snow storm 2.0 686 | 685 nail hand 2.923076923076923 687 | 686 dog horse 1.0 688 | 687 arm neck 1.2307692307692308 689 | 688 ball glove 1.7692307692307692 690 | 689 flu fever 4.076923076923077 691 | 690 fee salary 2.923076923076923 692 | 691 nerve brain 2.8461538461538463 693 | 692 beast animal 5.3076923076923075 694 | 693 dinner chicken 1.4615384615384615 695 | 694 girl maid 2.4615384615384617 696 | 695 child boy 5.0 697 | 696 alcohol wine 4.846153846153846 698 | 697 nose mouth 2.076923076923077 699 | 698 street car 1.6153846153846154 700 | 699 bell door 1.6153846153846154 701 | 700 box hat 1.0769230769230769 702 | 701 belief impression 3.3846153846153846 703 | 702 bias opinion 3.8461538461538463 704 | 703 attention awareness 5.923076923076923 705 | 704 anger mood 3.6923076923076925 706 | 705 elegance style 4.230769230769231 707 | 706 beauty age 2.5384615384615383 708 | 707 book theme 1.6153846153846154 709 | 708 friend mother 1.9230769230769231 710 | 709 vitamin iron 3.076923076923077 711 | 710 car factory 1.8461538461538463 712 | 711 pact condition 3.0 713 | 712 chapter choice 1.3846153846153846 714 | 713 arithmetic rhythm 1.6923076923076923 715 | 714 winner presence 1.9230769230769231 716 | 715 belief flower 1.1538461538461537 717 | 716 winner goal 3.5384615384615383 718 | 717 trick size 0.9230769230769231 719 | 718 choice vein 1.3846153846153846 720 | 719 hymn conquest 1.2307692307692308 721 | 720 endurance band 1.2307692307692308 722 | 721 jail choice 1.3076923076923077 723 | 722 condition boy 0.8461538461538461 724 | 723 flower endurance 1.0769230769230769 725 | 724 hole agreement 1.2307692307692308 726 | 725 doctor temper 1.3846153846153846 727 | 726 fraternity door 1.4615384615384615 728 | 727 task woman 1.0769230769230769 729 | 728 fraternity baseball 1.5384615384615385 730 | 729 cent size 2.230769230769231 731 | 730 presence door 1.6153846153846154 732 | 731 mouse management 0.8461538461538461 733 | 732 task highway 1.0769230769230769 734 | 733 liquor century 0.9230769230769231 735 | 734 task straw 0.8461538461538461 736 | 735 island task 0.8461538461538461 737 | 736 night chapter 1.2307692307692308 738 | 737 pollution president 1.0 739 | 738 gun trick 1.0 740 | 739 bath trick 1.1538461538461537 741 | 740 diet apple 1.1538461538461537 742 | 741 cent wife 0.6153846153846154 743 | 742 chapter tail 1.3846153846153846 744 | 743 course stomach 1.0 745 | 744 hymn straw 1.0769230769230769 746 | 745 dentist colonel 0.9230769230769231 747 | 746 wife straw 1.0769230769230769 748 | 747 hole wife 1.0769230769230769 749 | 748 pupil president 1.3076923076923077 750 | 749 bath wife 1.0769230769230769 751 | 750 people cent 0.0 752 | 751 formula log 0.8461538461538461 753 | 752 woman fur 0.07692307692307693 754 | 753 apple sunshine 0.0 755 | 754 gun dawn 0.0 756 | 755 meal waist 0.6153846153846154 757 | 756 camera president 0.23076923076923078 758 | 757 liquor band 0.0 759 | 758 stomach vein 0.7692307692307693 760 | 759 gun fur 0.15384615384615385 761 | 760 couch baseball 0.38461538461538464 762 | 761 worker camera 0.23076923076923078 763 | 762 deck mouse 0.07692307692307693 764 | 763 rice boy 0.0 765 | 764 people gun 0.07692307692307693 766 | 765 cliff tail 0.3076923076923077 767 | 766 ankle window 0.0 768 | 767 princess island 0.0 769 | 768 container mouse 0.0 770 | 769 wagon container 2.3846153846153846 771 | 770 people balloon 0.0 772 | 771 dollar people 0.0 773 | 772 bath balloon 0.0 774 | 773 stomach bedroom 0.0 775 | 774 bicycle bedroom 0.0 776 | 775 log bath 0.07692307692307693 777 | 776 bowl tail 0.07692307692307693 778 | 777 go come 1.1538461538461537 779 | 778 take steal 5.0 780 | 779 listen hear 8.307692307692308 781 | 780 think rationalize 6.230769230769231 782 | 781 occur happen 9.0 783 | 782 vanish disappear 9.76923076923077 784 | 783 multiply divide 1.8461538461538463 785 | 784 plead beg 7.6923076923076925 786 | 785 begin originate 8.307692307692308 787 | 786 protect defend 9.0 788 | 787 kill destroy 7.384615384615385 789 | 788 create make 8.384615384615385 790 | 789 accept reject 0.0 791 | 790 ignore avoid 7.923076923076923 792 | 791 carry bring 4.769230769230769 793 | 792 leave enter 0.6153846153846154 794 | 793 choose elect 8.153846153846153 795 | 794 lose fail 7.846153846153846 796 | 795 encourage discourage 0.0 797 | 796 achieve accomplish 8.384615384615385 798 | 797 make construct 8.0 799 | 798 listen obey 4.615384615384615 800 | 799 inform notify 9.153846153846153 801 | 800 receive give 1.5384615384615385 802 | 801 borrow beg 2.923076923076923 803 | 802 take obtain 4.0 804 | 803 advise recommend 5.153846153846154 805 | 804 imitate portray 3.3076923076923075 806 | 805 win succeed 6.769230769230769 807 | 806 think decide 2.769230769230769 808 | 807 greet meet 5.461538461538462 809 | 808 agree argue 0.9230769230769231 810 | 809 enjoy entertain 4.846153846153846 811 | 810 destroy make 0.9230769230769231 812 | 811 save protect 6.384615384615385 813 | 812 give lend 3.076923076923077 814 | 813 understand know 5.6923076923076925 815 | 814 take receive 3.5384615384615383 816 | 815 accept acknowledge 4.076923076923077 817 | 816 decide choose 4.0 818 | 817 accept believe 4.230769230769231 819 | 818 keep possess 4.076923076923077 820 | 819 roam wander 5.0 821 | 820 succeed fail 1.0 822 | 821 spend save 1.2307692307692308 823 | 822 leave go 4.076923076923077 824 | 823 come attend 4.538461538461538 825 | 824 know believe 4.384615384615385 826 | 825 gather meet 4.384615384615385 827 | 826 make earn 3.923076923076923 828 | 827 forget ignore 4.3076923076923075 829 | 828 multiply add 2.5384615384615383 830 | 829 shrink grow 0.8461538461538461 831 | 830 arrive leave 1.2307692307692308 832 | 831 succeed try 2.3846153846153846 833 | 832 accept deny 1.0769230769230769 834 | 833 arrive come 4.3076923076923075 835 | 834 agree differ 1.0769230769230769 836 | 835 send receive 0.8461538461538461 837 | 836 win dominate 5.3076923076923075 838 | 837 add divide 1.6923076923076923 839 | 838 kill choke 3.1538461538461537 840 | 839 acquire get 5.0 841 | 840 participate join 5.923076923076923 842 | 841 leave remain 1.7692307692307692 843 | 842 go enter 2.3076923076923075 844 | 843 take carry 2.923076923076923 845 | 844 forget learn 1.0 846 | 845 appoint elect 4.0 847 | 846 engage marry 4.384615384615385 848 | 847 ask pray 2.6923076923076925 849 | 848 go send 2.3076923076923075 850 | 849 take deliver 2.3846153846153846 851 | 850 speak hear 0.38461538461538464 852 | 851 analyze evaluate 7.923076923076923 853 | 852 argue rationalize 4.583333333333333 854 | 853 lose keep 0.23076923076923078 855 | 854 compare analyze 5.461538461538462 856 | 855 disorganize organize 0.07692307692307693 857 | 856 go allow 0.9230769230769231 858 | 857 take possess 4.846153846153846 859 | 858 learn listen 2.3076923076923075 860 | 859 destroy construct 0.0 861 | 860 create build 6.538461538461538 862 | 861 steal buy 0.0 863 | 862 kill hang 2.3846153846153846 864 | 863 forget know 0.0 865 | 864 create imagine 4.846153846153846 866 | 865 do happen 2.3076923076923075 867 | 866 win accomplish 6.230769230769231 868 | 867 give deny 0.07692307692307693 869 | 868 deserve earn 4.538461538461538 870 | 869 get put 0.5384615384615384 871 | 870 locate find 8.461538461538462 872 | 871 appear attend 3.4615384615384617 873 | 872 know comprehend 6.923076923076923 874 | 873 pretend imagine 6.6923076923076925 875 | 874 satisfy please 6.076923076923077 876 | 875 cherish keep 3.4615384615384617 877 | 876 argue differ 5.6923076923076925 878 | 877 overcome dominate 4.461538461538462 879 | 878 behave obey 7.0 880 | 879 cooperate participate 5.384615384615385 881 | 880 achieve try 1.8461538461538463 882 | 881 fail discourage 2.769230769230769 883 | 882 begin quit 0.0 884 | 883 say participate 1.6153846153846154 885 | 884 come bring 1.6153846153846154 886 | 885 declare announce 8.153846153846153 887 | 886 read comprehend 5.0 888 | 887 take leave 0.0 889 | 888 proclaim announce 8.076923076923077 890 | 889 acquire obtain 9.461538461538462 891 | 890 conclude decide 6.153846153846154 892 | 891 please plead 1.0769230769230769 893 | 892 argue prove 4.076923076923077 894 | 893 ask plead 6.076923076923077 895 | 894 find disappear 0.07692307692307693 896 | 895 inspect examine 9.153846153846153 897 | 896 verify justify 3.076923076923077 898 | 897 assume predict 3.5384615384615383 899 | 898 learn evaluate 3.3076923076923075 900 | 899 argue justify 4.230769230769231 901 | 900 make become 2.4615384615384617 902 | 901 discover originate 1.9230769230769231 903 | 902 achieve succeed 7.6923076923076925 904 | 903 give put 3.769230769230769 905 | 904 understand listen 4.384615384615385 906 | 905 expand grow 6.769230769230769 907 | 906 borrow sell 0.38461538461538464 908 | 907 keep protect 3.3076923076923075 909 | 908 explain prove 4.461538461538462 910 | 909 assume pretend 2.3846153846153846 911 | 910 agree please 1.1538461538461537 912 | 911 forgive forget 3.6923076923076925 913 | 912 clarify explain 7.6923076923076925 914 | 913 understand forgive 2.230769230769231 915 | 914 remind forget 0.6153846153846154 916 | 915 get remain 2.0 917 | 916 realize discover 3.1538461538461537 918 | 917 require inquire 1.6923076923076923 919 | 918 ignore ask 1.5384615384615385 920 | 919 think inquire 2.076923076923077 921 | 920 reject avoid 5.923076923076923 922 | 921 argue persuade 4.076923076923077 923 | 922 pursue persuade 2.3076923076923075 924 | 923 accept forgive 3.3076923076923075 925 | 924 do quit 0.6153846153846154 926 | 925 investigate examine 6.769230769230769 927 | 926 discuss explain 5.230769230769231 928 | 927 owe lend 1.0 929 | 928 explore discover 6.384615384615385 930 | 929 complain argue 4.153846153846154 931 | 930 withdraw reject 5.230769230769231 932 | 931 keep borrow 0.7692307692307693 933 | 932 beg ask 2.769230769230769 934 | 933 arrange organize 4.846153846153846 935 | 934 reduce shrink 5.153846153846154 936 | 935 speak acknowledge 2.6153846153846154 937 | 936 give borrow 1.6153846153846154 938 | 937 kill defend 0.6923076923076923 939 | 938 disappear shrink 3.3076923076923075 940 | 939 deliver carry 1.4615384615384615 941 | 940 breathe choke 0.7692307692307693 942 | 941 acknowledge notify 4.0 943 | 942 become seem 3.3076923076923075 944 | 943 pretend seem 3.769230769230769 945 | 944 accomplish become 4.769230769230769 946 | 945 contemplate think 4.846153846153846 947 | 946 determine predict 3.230769230769231 948 | 947 please entertain 1.9230769230769231 949 | 948 remain retain 4.769230769230769 950 | 949 pretend portray 2.1538461538461537 951 | 950 forget retain 1.0769230769230769 952 | 951 want choose 3.6153846153846154 953 | 952 lose get 0.9230769230769231 954 | 953 try think 2.230769230769231 955 | 954 become appear 3.4615384615384617 956 | 955 leave ignore 4.461538461538462 957 | 956 accept recommend 2.1538461538461537 958 | 957 leave wander 2.5384615384615383 959 | 958 keep give 1.3846153846153846 960 | 959 give allow 3.5384615384615383 961 | 960 bring send 1.0769230769230769 962 | 961 absorb learn 4.384615384615385 963 | 962 acquire find 5.076923076923077 964 | 963 leave appear 0.7692307692307693 965 | 964 create destroy 0.6153846153846154 966 | 965 begin go 5.461538461538462 967 | 966 get buy 4.846153846153846 968 | 967 collect save 4.384615384615385 969 | 968 replace restore 3.0 970 | 969 join add 6.384615384615385 971 | 970 join marry 5.6923076923076925 972 | 971 accept deliver 1.0 973 | 972 attach join 6.076923076923077 974 | 973 put hang 2.923076923076923 975 | 974 go sell 2.076923076923077 976 | 975 communicate pray 2.3846153846153846 977 | 976 give steal 1.3076923076923077 978 | 977 add build 2.8461538461538463 979 | 978 bring restore 2.769230769230769 980 | 979 comprehend satisfy 2.3076923076923075 981 | 980 portray decide 1.9230769230769231 982 | 981 organize become 1.8461538461538463 983 | 982 give know 0.7692307692307693 984 | 983 say verify 2.6923076923076925 985 | 984 cooperate join 4.230769230769231 986 | 985 arrange require 1.8461538461538463 987 | 986 borrow want 2.0 988 | 987 investigate pursue 5.3076923076923075 989 | 988 ignore explore 0.6923076923076923 990 | 989 bring complain 1.1538461538461537 991 | 990 enter owe 1.0 992 | 991 portray notify 1.6153846153846154 993 | 992 remind sell 0.6923076923076923 994 | 993 absorb possess 3.1538461538461537 995 | 994 join acquire 2.1538461538461537 996 | 995 send attend 2.076923076923077 997 | 996 gather attend 2.8461538461538463 998 | 997 absorb withdraw 1.0769230769230769 999 | 998 attend arrive 5.3076923076923075 1000 | -------------------------------------------------------------------------------- /evaluation/wordsim/tasks.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | 3 | 4 | @dataclass 5 | class WordSimTask: 6 | """ 7 | A WordSim task. 8 | 9 | Attributes 10 | ---------- 11 | task: The name of the task. 12 | file: The file path to the dataset. 13 | index1: The index of the first word in the dataset. 14 | index2: The index of the second word in the dataset. 15 | target: The index of the target value in the dataset 16 | 17 | """ 18 | 19 | task: str 20 | file: str 21 | index1: int 22 | index2: int 23 | target: int 24 | 25 | 26 | wordsim_tasks: list[WordSimTask] = [ 27 | WordSimTask( 28 | task="RareWord", 29 | file="rw.txt", 30 | index1=0, 31 | index2=1, 32 | target=2, 33 | ), 34 | WordSimTask( 35 | task="MEN", 36 | file="men.txt", 37 | index1=0, 38 | index2=1, 39 | target=2, 40 | ), 41 | WordSimTask( 42 | task="SimLex", 43 | file="simLex.txt", 44 | index1=1, 45 | index2=2, 46 | target=3, 47 | ), 48 | WordSimTask( 49 | task="rel353", 50 | file="rel353.txt", 51 | index1=1, 52 | index2=2, 53 | target=3, 54 | ), 55 | WordSimTask( 56 | task="simverb", 57 | file="simverb_3500.txt", 58 | index1=2, 59 | index2=3, 60 | target=1, 61 | ), 62 | WordSimTask( 63 | task="muturk", 64 | file="mturk_771.txt", 65 | index1=1, 66 | index2=2, 67 | target=3, 68 | ), 69 | WordSimTask( 70 | task="Card660", 71 | file="card_660.txt", 72 | index1=0, 73 | index2=1, 74 | target=2, 75 | ), 76 | ] 77 | -------------------------------------------------------------------------------- /evaluation/wordsim/wordsim.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from importlib import resources 4 | from typing import Any 5 | 6 | import datasets 7 | from mteb import TaskMetadata 8 | from mteb.abstasks import AbsTaskSTS 9 | 10 | from evaluation.wordsim.tasks import wordsim_tasks 11 | 12 | 13 | class WordSim(AbsTaskSTS): 14 | def __init__(self, dataset_name: str | None = None, hf_subsets: Any = None, **kwargs: Any) -> None: 15 | """ 16 | Initialize a WordSim task with the given dataset name. 17 | 18 | :param dataset_name: The name of the dataset to use. 19 | :param hf_subsets: The Hugging Face dataset splits to use. 20 | :param **kwargs: Additional keyword arguments. 21 | """ 22 | super().__init__(hf_subsets=hf_subsets, **kwargs) 23 | self.dataset_name = dataset_name 24 | self.metadata = TaskMetadata( 25 | name=dataset_name if dataset_name else "WordSim", 26 | description=f"Custom Word Similarity Task: {dataset_name}" 27 | if dataset_name 28 | else "Custom Word Similarity Task with Multiple Datasets.", 29 | reference=None, 30 | type="STS", 31 | category="s2s", 32 | modalities=["text"], 33 | eval_splits=["test"], 34 | eval_langs=["en"], 35 | main_score="spearman", 36 | dataset={ 37 | "path": "evaluation/wordsim/tasks.py", 38 | "revision": "1.0.0", 39 | }, 40 | ) 41 | self.dataset_splits: dict[str, dict] = {} 42 | 43 | @property 44 | def min_score(self) -> int: 45 | """Minimum score for the similarity task.""" 46 | return -1 47 | 48 | @property 49 | def max_score(self) -> int: 50 | """Maximum score for the similarity task.""" 51 | return 1 52 | 53 | def load_data(self, eval_splits: Any = None) -> None: 54 | """Load the WordSim datasets.""" 55 | # Load the data for each task 56 | for task in wordsim_tasks: 57 | sentence1 = [] 58 | sentence2 = [] 59 | scores = [] 60 | 61 | index1 = task.index1 62 | index2 = task.index2 63 | target = task.target 64 | 65 | with resources.open_text("evaluation.wordsim.data", task.file) as f: 66 | for line in f: 67 | parts = line.strip().split("\t") 68 | # Remove underscores from the words 69 | parts = [part.replace("_", " ") for part in parts] 70 | word1 = parts[index1] 71 | word2 = parts[index2] 72 | 73 | similarity = float(parts[target]) 74 | 75 | sentence1.append(word1) 76 | sentence2.append(word2) 77 | scores.append(similarity) 78 | 79 | dataset_name = task.task 80 | self.dataset_splits[dataset_name] = datasets.Dataset.from_dict( 81 | { 82 | "sentence1": sentence1, 83 | "sentence2": sentence2, 84 | "score": scores, 85 | } 86 | ) 87 | if self.dataset_name: 88 | self.dataset = datasets.DatasetDict( 89 | { 90 | "test": self.dataset_splits[self.dataset_name], 91 | } 92 | ) 93 | else: 94 | self.dataset = datasets.DatasetDict(self.dataset_splits) 95 | 96 | @classmethod 97 | def get_subtasks(cls) -> list[WordSim]: 98 | """Return a list of subtasks, one for each dataset.""" 99 | instance = cls() 100 | instance.load_data() 101 | return [cls(dataset_name=name) for name in instance.dataset_splits.keys()] 102 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [project] 2 | name = "evaluation" 3 | description = "Evaluation of word embeddings" 4 | readme = "README.md" 5 | version = "0.1.0" 6 | requires-python = ">=3.10" 7 | 8 | dependencies = [ 9 | "autofj", 10 | "datasets", 11 | "huggingface-hub", 12 | "mteb==1.14.15", 13 | "numpy", 14 | "pandas", 15 | "pytorch_lightning", 16 | "reach", 17 | "scikit-learn", 18 | "scipy", 19 | "seaborn", 20 | "sentence_transformers", 21 | "torch", 22 | "plotnine", 23 | ] 24 | 25 | [project.optional-dependencies] 26 | dev = [ 27 | "black", 28 | "ipython", 29 | "mypy", 30 | "pre-commit", 31 | "pytest", 32 | "pytest-coverage", 33 | "ruff", 34 | ] 35 | 36 | [build-system] 37 | requires = ["setuptools>=64", "setuptools_scm>=8"] 38 | build-backend = "setuptools.build_meta" 39 | 40 | [tool.setuptools] 41 | packages = ["evaluation"] 42 | include-package-data = true 43 | 44 | [tool.setuptools.package-data] 45 | "evaluation.wordsim" = ["data/*"] 46 | 47 | [tool.ruff] 48 | exclude = [".venv/"] 49 | line-length = 120 50 | target-version = "py310" 51 | 52 | [tool.ruff.lint] 53 | select = [ 54 | # Annotations: Enforce type annotations 55 | "ANN", 56 | # Complexity: Enforce a maximum cyclomatic complexity 57 | "C90", 58 | # Pydocstyle: Enforce docstrings 59 | "D", 60 | # Remove unused imports 61 | "F401", 62 | # Isort: Enforce import order 63 | "I", 64 | # Numpy: Enforce numpy style 65 | "NPY", 66 | # Print: Forbid print statements 67 | "T20", 68 | ] 69 | 70 | ignore = [ 71 | # Allow self and cls to be untyped, and allow Any type 72 | "ANN101", "ANN102", "ANN401", 73 | # Pydocstyle ignores 74 | "D100", "D101", "D104", "D203", "D204", "D212", "D401", 75 | # Allow use of f-strings in logging 76 | "G004" 77 | ] 78 | 79 | [tool.pydoclint] 80 | style = "sphinx" 81 | exclude = "test_" 82 | allow-init-docstring = true 83 | arg-type-hints-in-docstring = false 84 | check-return-types = false 85 | require-return-section-when-returning-nothing = false 86 | 87 | [tool.mypy] 88 | python_version = "3.10" 89 | warn_unused_configs = true 90 | ignore_missing_imports = true 91 | -------------------------------------------------------------------------------- /requirements/requirements-linux.txt: -------------------------------------------------------------------------------- 1 | # This file was autogenerated by uv via the following command: 2 | # uv pip compile pyproject.toml --python-platform linux --output-file=requirements/requirements-linux.txt --all-extras 3 | aiohappyeyeballs==2.4.0 4 | # via aiohttp 5 | aiohttp==3.10.5 6 | # via 7 | # datasets 8 | # fsspec 9 | aiosignal==1.3.1 10 | # via aiohttp 11 | annotated-types==0.7.0 12 | # via pydantic 13 | asttokens==2.4.1 14 | # via stack-data 15 | async-timeout==4.0.3 16 | # via aiohttp 17 | attrs==24.2.0 18 | # via aiohttp 19 | autofj==0.0.6 20 | # via evaluation (pyproject.toml) 21 | black==24.8.0 22 | # via evaluation (pyproject.toml) 23 | blis==0.7.11 24 | # via thinc 25 | catalogue==2.0.10 26 | # via 27 | # spacy 28 | # srsly 29 | # thinc 30 | certifi==2024.8.30 31 | # via requests 32 | cfgv==3.4.0 33 | # via pre-commit 34 | charset-normalizer==3.3.2 35 | # via requests 36 | click==8.1.7 37 | # via 38 | # black 39 | # nltk 40 | # typer 41 | cloudpathlib==0.19.0 42 | # via weasel 43 | confection==0.1.5 44 | # via 45 | # thinc 46 | # weasel 47 | coverage==7.6.1 48 | # via pytest-cov 49 | cymem==2.0.8 50 | # via 51 | # preshed 52 | # spacy 53 | # thinc 54 | datasets==2.21.0 55 | # via 56 | # evaluation (pyproject.toml) 57 | # mteb 58 | decorator==5.1.1 59 | # via ipython 60 | dill==0.3.8 61 | # via 62 | # datasets 63 | # multiprocess 64 | distlib==0.3.8 65 | # via virtualenv 66 | editdistance==0.8.1 67 | # via autofj 68 | eval-type-backport==0.2.0 69 | # via mteb 70 | exceptiongroup==1.2.2 71 | # via 72 | # ipython 73 | # pytest 74 | executing==2.1.0 75 | # via stack-data 76 | filelock==3.15.4 77 | # via 78 | # datasets 79 | # huggingface-hub 80 | # torch 81 | # transformers 82 | # triton 83 | # virtualenv 84 | frozenlist==1.4.1 85 | # via 86 | # aiohttp 87 | # aiosignal 88 | fsspec==2024.6.1 89 | # via 90 | # datasets 91 | # huggingface-hub 92 | # pytorch-lightning 93 | # torch 94 | huggingface-hub==0.24.6 95 | # via 96 | # evaluation (pyproject.toml) 97 | # datasets 98 | # sentence-transformers 99 | # tokenizers 100 | # transformers 101 | identify==2.6.0 102 | # via pre-commit 103 | idna==3.8 104 | # via 105 | # requests 106 | # yarl 107 | iniconfig==2.0.0 108 | # via pytest 109 | ipython==8.27.0 110 | # via evaluation (pyproject.toml) 111 | jedi==0.19.1 112 | # via ipython 113 | jellyfish==1.1.0 114 | # via autofj 115 | jinja2==3.1.4 116 | # via 117 | # spacy 118 | # torch 119 | joblib==1.4.2 120 | # via 121 | # nltk 122 | # scikit-learn 123 | langcodes==3.4.0 124 | # via spacy 125 | language-data==1.2.0 126 | # via langcodes 127 | lightning-utilities==0.11.6 128 | # via 129 | # pytorch-lightning 130 | # torchmetrics 131 | marisa-trie==1.2.0 132 | # via language-data 133 | markdown-it-py==3.0.0 134 | # via rich 135 | markupsafe==2.1.5 136 | # via jinja2 137 | matplotlib-inline==0.1.7 138 | # via ipython 139 | mdurl==0.1.2 140 | # via markdown-it-py 141 | mpmath==1.3.0 142 | # via sympy 143 | mteb==1.14.15 144 | # via evaluation (pyproject.toml) 145 | multidict==6.0.5 146 | # via 147 | # aiohttp 148 | # yarl 149 | multiprocess==0.70.16 150 | # via datasets 151 | murmurhash==1.0.10 152 | # via 153 | # preshed 154 | # spacy 155 | # thinc 156 | mypy==1.11.2 157 | # via evaluation (pyproject.toml) 158 | mypy-extensions==1.0.0 159 | # via 160 | # black 161 | # mypy 162 | networkx==3.3 163 | # via torch 164 | ngram==4.0.3 165 | # via autofj 166 | nltk==3.9.1 167 | # via autofj 168 | nodeenv==1.9.1 169 | # via pre-commit 170 | numpy==1.26.4 171 | # via 172 | # evaluation (pyproject.toml) 173 | # autofj 174 | # blis 175 | # datasets 176 | # mteb 177 | # pandas 178 | # pyarrow 179 | # reach 180 | # scikit-learn 181 | # scipy 182 | # sentence-transformers 183 | # spacy 184 | # thinc 185 | # torchmetrics 186 | # transformers 187 | nvidia-cublas-cu12==12.1.3.1 188 | # via 189 | # nvidia-cudnn-cu12 190 | # nvidia-cusolver-cu12 191 | # torch 192 | nvidia-cuda-cupti-cu12==12.1.105 193 | # via torch 194 | nvidia-cuda-nvrtc-cu12==12.1.105 195 | # via torch 196 | nvidia-cuda-runtime-cu12==12.1.105 197 | # via torch 198 | nvidia-cudnn-cu12==9.1.0.70 199 | # via torch 200 | nvidia-cufft-cu12==11.0.2.54 201 | # via torch 202 | nvidia-curand-cu12==10.3.2.106 203 | # via torch 204 | nvidia-cusolver-cu12==11.4.5.107 205 | # via torch 206 | nvidia-cusparse-cu12==12.1.0.106 207 | # via 208 | # nvidia-cusolver-cu12 209 | # torch 210 | nvidia-nccl-cu12==2.20.5 211 | # via torch 212 | nvidia-nvjitlink-cu12==12.6.68 213 | # via 214 | # nvidia-cusolver-cu12 215 | # nvidia-cusparse-cu12 216 | nvidia-nvtx-cu12==12.1.105 217 | # via torch 218 | packaging==24.1 219 | # via 220 | # black 221 | # datasets 222 | # huggingface-hub 223 | # lightning-utilities 224 | # pytest 225 | # pytorch-lightning 226 | # spacy 227 | # thinc 228 | # torchmetrics 229 | # transformers 230 | # weasel 231 | pandas==2.2.2 232 | # via 233 | # evaluation (pyproject.toml) 234 | # autofj 235 | # datasets 236 | parso==0.8.4 237 | # via jedi 238 | pathspec==0.12.1 239 | # via black 240 | pexpect==4.9.0 241 | # via ipython 242 | pillow==10.4.0 243 | # via sentence-transformers 244 | platformdirs==4.2.2 245 | # via 246 | # black 247 | # virtualenv 248 | pluggy==1.5.0 249 | # via pytest 250 | polars==1.6.0 251 | # via mteb 252 | pre-commit==3.8.0 253 | # via evaluation (pyproject.toml) 254 | preshed==3.0.9 255 | # via 256 | # spacy 257 | # thinc 258 | prompt-toolkit==3.0.47 259 | # via ipython 260 | ptyprocess==0.7.0 261 | # via pexpect 262 | pure-eval==0.2.3 263 | # via stack-data 264 | pyarrow==17.0.0 265 | # via datasets 266 | pydantic==2.8.2 267 | # via 268 | # confection 269 | # mteb 270 | # spacy 271 | # thinc 272 | # weasel 273 | pydantic-core==2.20.1 274 | # via pydantic 275 | pygments==2.18.0 276 | # via 277 | # ipython 278 | # rich 279 | pytest==8.3.2 280 | # via 281 | # evaluation (pyproject.toml) 282 | # pytest-cov 283 | pytest-cov==5.0.0 284 | # via pytest-cover 285 | pytest-cover==3.0.0 286 | # via pytest-coverage 287 | pytest-coverage==0.0 288 | # via evaluation (pyproject.toml) 289 | python-dateutil==2.9.0.post0 290 | # via pandas 291 | pytorch-lightning==2.4.0 292 | # via evaluation (pyproject.toml) 293 | pytrec-eval-terrier==0.5.6 294 | # via mteb 295 | pytz==2024.1 296 | # via pandas 297 | pyyaml==6.0.2 298 | # via 299 | # datasets 300 | # huggingface-hub 301 | # pre-commit 302 | # pytorch-lightning 303 | # transformers 304 | reach==4.1.1 305 | # via evaluation (pyproject.toml) 306 | regex==2024.7.24 307 | # via 308 | # nltk 309 | # transformers 310 | requests==2.32.3 311 | # via 312 | # datasets 313 | # huggingface-hub 314 | # mteb 315 | # spacy 316 | # transformers 317 | # weasel 318 | rich==13.8.0 319 | # via 320 | # mteb 321 | # typer 322 | ruff==0.6.3 323 | # via evaluation (pyproject.toml) 324 | safetensors==0.4.4 325 | # via transformers 326 | scikit-learn==1.5.1 327 | # via 328 | # evaluation (pyproject.toml) 329 | # mteb 330 | # sentence-transformers 331 | scipy==1.14.1 332 | # via 333 | # evaluation (pyproject.toml) 334 | # mteb 335 | # scikit-learn 336 | # sentence-transformers 337 | sentence-transformers==3.0.1 338 | # via 339 | # evaluation (pyproject.toml) 340 | # mteb 341 | setuptools==74.0.0 342 | # via 343 | # lightning-utilities 344 | # marisa-trie 345 | # spacy 346 | # thinc 347 | shellingham==1.5.4 348 | # via typer 349 | six==1.16.0 350 | # via 351 | # asttokens 352 | # python-dateutil 353 | smart-open==7.0.4 354 | # via weasel 355 | spacy==3.7.6 356 | # via autofj 357 | spacy-legacy==3.0.12 358 | # via spacy 359 | spacy-loggers==1.0.5 360 | # via spacy 361 | srsly==2.4.8 362 | # via 363 | # confection 364 | # spacy 365 | # thinc 366 | # weasel 367 | stack-data==0.6.3 368 | # via ipython 369 | sympy==1.13.2 370 | # via torch 371 | thinc==8.2.5 372 | # via spacy 373 | threadpoolctl==3.5.0 374 | # via scikit-learn 375 | tokenizers==0.19.1 376 | # via transformers 377 | tomli==2.0.1 378 | # via 379 | # black 380 | # coverage 381 | # mypy 382 | # pytest 383 | torch==2.4.0 384 | # via 385 | # evaluation (pyproject.toml) 386 | # mteb 387 | # pytorch-lightning 388 | # sentence-transformers 389 | # torchmetrics 390 | torchmetrics==1.4.1 391 | # via pytorch-lightning 392 | tqdm==4.66.5 393 | # via 394 | # datasets 395 | # huggingface-hub 396 | # mteb 397 | # nltk 398 | # pytorch-lightning 399 | # reach 400 | # sentence-transformers 401 | # spacy 402 | # transformers 403 | traitlets==5.14.3 404 | # via 405 | # ipython 406 | # matplotlib-inline 407 | transformers==4.44.2 408 | # via sentence-transformers 409 | triton==3.0.0 410 | # via torch 411 | typer==0.12.5 412 | # via 413 | # spacy 414 | # weasel 415 | typing-extensions==4.12.2 416 | # via 417 | # black 418 | # cloudpathlib 419 | # huggingface-hub 420 | # ipython 421 | # lightning-utilities 422 | # mteb 423 | # mypy 424 | # pydantic 425 | # pydantic-core 426 | # pytorch-lightning 427 | # torch 428 | # typer 429 | tzdata==2024.1 430 | # via pandas 431 | urllib3==2.2.2 432 | # via requests 433 | virtualenv==20.26.3 434 | # via pre-commit 435 | wasabi==1.1.3 436 | # via 437 | # spacy 438 | # thinc 439 | # weasel 440 | wcwidth==0.2.13 441 | # via prompt-toolkit 442 | weasel==0.4.1 443 | # via spacy 444 | wrapt==1.16.0 445 | # via smart-open 446 | xxhash==3.5.0 447 | # via datasets 448 | yarl==1.9.7 449 | # via aiohttp 450 | -------------------------------------------------------------------------------- /requirements/requirements-macos.txt: -------------------------------------------------------------------------------- 1 | # This file was autogenerated by uv via the following command: 2 | # uv pip compile pyproject.toml --python-platform macos --output-file=requirements/requirements-macos.txt --all-extras 3 | aiohappyeyeballs==2.4.0 4 | # via aiohttp 5 | aiohttp==3.10.5 6 | # via 7 | # datasets 8 | # fsspec 9 | aiosignal==1.3.1 10 | # via aiohttp 11 | annotated-types==0.7.0 12 | # via pydantic 13 | asttokens==2.4.1 14 | # via stack-data 15 | async-timeout==4.0.3 16 | # via aiohttp 17 | attrs==24.2.0 18 | # via aiohttp 19 | autofj==0.0.6 20 | # via evaluation (pyproject.toml) 21 | black==24.8.0 22 | # via evaluation (pyproject.toml) 23 | blis==0.7.11 24 | # via thinc 25 | catalogue==2.0.10 26 | # via 27 | # spacy 28 | # srsly 29 | # thinc 30 | certifi==2024.8.30 31 | # via requests 32 | cfgv==3.4.0 33 | # via pre-commit 34 | charset-normalizer==3.3.2 35 | # via requests 36 | click==8.1.7 37 | # via 38 | # black 39 | # nltk 40 | # typer 41 | cloudpathlib==0.19.0 42 | # via weasel 43 | confection==0.1.5 44 | # via 45 | # thinc 46 | # weasel 47 | coverage==7.6.1 48 | # via pytest-cov 49 | cymem==2.0.8 50 | # via 51 | # preshed 52 | # spacy 53 | # thinc 54 | datasets==2.21.0 55 | # via 56 | # evaluation (pyproject.toml) 57 | # mteb 58 | decorator==5.1.1 59 | # via ipython 60 | dill==0.3.8 61 | # via 62 | # datasets 63 | # multiprocess 64 | distlib==0.3.8 65 | # via virtualenv 66 | editdistance==0.8.1 67 | # via autofj 68 | eval-type-backport==0.2.0 69 | # via mteb 70 | exceptiongroup==1.2.2 71 | # via 72 | # ipython 73 | # pytest 74 | executing==2.1.0 75 | # via stack-data 76 | filelock==3.15.4 77 | # via 78 | # datasets 79 | # huggingface-hub 80 | # torch 81 | # transformers 82 | # virtualenv 83 | frozenlist==1.4.1 84 | # via 85 | # aiohttp 86 | # aiosignal 87 | fsspec==2024.6.1 88 | # via 89 | # datasets 90 | # huggingface-hub 91 | # pytorch-lightning 92 | # torch 93 | huggingface-hub==0.24.6 94 | # via 95 | # evaluation (pyproject.toml) 96 | # datasets 97 | # sentence-transformers 98 | # tokenizers 99 | # transformers 100 | identify==2.6.0 101 | # via pre-commit 102 | idna==3.8 103 | # via 104 | # requests 105 | # yarl 106 | iniconfig==2.0.0 107 | # via pytest 108 | ipython==8.27.0 109 | # via evaluation (pyproject.toml) 110 | jedi==0.19.1 111 | # via ipython 112 | jellyfish==1.1.0 113 | # via autofj 114 | jinja2==3.1.4 115 | # via 116 | # spacy 117 | # torch 118 | joblib==1.4.2 119 | # via 120 | # nltk 121 | # scikit-learn 122 | langcodes==3.4.0 123 | # via spacy 124 | language-data==1.2.0 125 | # via langcodes 126 | lightning-utilities==0.11.6 127 | # via 128 | # pytorch-lightning 129 | # torchmetrics 130 | marisa-trie==1.2.0 131 | # via language-data 132 | markdown-it-py==3.0.0 133 | # via rich 134 | markupsafe==2.1.5 135 | # via jinja2 136 | matplotlib-inline==0.1.7 137 | # via ipython 138 | mdurl==0.1.2 139 | # via markdown-it-py 140 | mpmath==1.3.0 141 | # via sympy 142 | mteb==1.14.15 143 | # via evaluation (pyproject.toml) 144 | multidict==6.0.5 145 | # via 146 | # aiohttp 147 | # yarl 148 | multiprocess==0.70.16 149 | # via datasets 150 | murmurhash==1.0.10 151 | # via 152 | # preshed 153 | # spacy 154 | # thinc 155 | mypy==1.11.2 156 | # via evaluation (pyproject.toml) 157 | mypy-extensions==1.0.0 158 | # via 159 | # black 160 | # mypy 161 | networkx==3.3 162 | # via torch 163 | ngram==4.0.3 164 | # via autofj 165 | nltk==3.9.1 166 | # via autofj 167 | nodeenv==1.9.1 168 | # via pre-commit 169 | numpy==1.26.4 170 | # via 171 | # evaluation (pyproject.toml) 172 | # autofj 173 | # blis 174 | # datasets 175 | # mteb 176 | # pandas 177 | # pyarrow 178 | # reach 179 | # scikit-learn 180 | # scipy 181 | # sentence-transformers 182 | # spacy 183 | # thinc 184 | # torchmetrics 185 | # transformers 186 | packaging==24.1 187 | # via 188 | # black 189 | # datasets 190 | # huggingface-hub 191 | # lightning-utilities 192 | # pytest 193 | # pytorch-lightning 194 | # spacy 195 | # thinc 196 | # torchmetrics 197 | # transformers 198 | # weasel 199 | pandas==2.2.2 200 | # via 201 | # evaluation (pyproject.toml) 202 | # autofj 203 | # datasets 204 | parso==0.8.4 205 | # via jedi 206 | pathspec==0.12.1 207 | # via black 208 | pexpect==4.9.0 209 | # via ipython 210 | pillow==10.4.0 211 | # via sentence-transformers 212 | platformdirs==4.2.2 213 | # via 214 | # black 215 | # virtualenv 216 | pluggy==1.5.0 217 | # via pytest 218 | polars==1.6.0 219 | # via mteb 220 | pre-commit==3.8.0 221 | # via evaluation (pyproject.toml) 222 | preshed==3.0.9 223 | # via 224 | # spacy 225 | # thinc 226 | prompt-toolkit==3.0.47 227 | # via ipython 228 | ptyprocess==0.7.0 229 | # via pexpect 230 | pure-eval==0.2.3 231 | # via stack-data 232 | pyarrow==17.0.0 233 | # via datasets 234 | pydantic==2.8.2 235 | # via 236 | # confection 237 | # mteb 238 | # spacy 239 | # thinc 240 | # weasel 241 | pydantic-core==2.20.1 242 | # via pydantic 243 | pygments==2.18.0 244 | # via 245 | # ipython 246 | # rich 247 | pytest==8.3.2 248 | # via 249 | # evaluation (pyproject.toml) 250 | # pytest-cov 251 | pytest-cov==5.0.0 252 | # via pytest-cover 253 | pytest-cover==3.0.0 254 | # via pytest-coverage 255 | pytest-coverage==0.0 256 | # via evaluation (pyproject.toml) 257 | python-dateutil==2.9.0.post0 258 | # via pandas 259 | pytorch-lightning==2.4.0 260 | # via evaluation (pyproject.toml) 261 | pytrec-eval-terrier==0.5.6 262 | # via mteb 263 | pytz==2024.1 264 | # via pandas 265 | pyyaml==6.0.2 266 | # via 267 | # datasets 268 | # huggingface-hub 269 | # pre-commit 270 | # pytorch-lightning 271 | # transformers 272 | reach==4.1.1 273 | # via evaluation (pyproject.toml) 274 | regex==2024.7.24 275 | # via 276 | # nltk 277 | # transformers 278 | requests==2.32.3 279 | # via 280 | # datasets 281 | # huggingface-hub 282 | # mteb 283 | # spacy 284 | # transformers 285 | # weasel 286 | rich==13.8.0 287 | # via 288 | # mteb 289 | # typer 290 | ruff==0.6.3 291 | # via evaluation (pyproject.toml) 292 | safetensors==0.4.4 293 | # via transformers 294 | scikit-learn==1.5.1 295 | # via 296 | # evaluation (pyproject.toml) 297 | # mteb 298 | # sentence-transformers 299 | scipy==1.14.1 300 | # via 301 | # evaluation (pyproject.toml) 302 | # mteb 303 | # scikit-learn 304 | # sentence-transformers 305 | sentence-transformers==3.0.1 306 | # via 307 | # evaluation (pyproject.toml) 308 | # mteb 309 | setuptools==74.0.0 310 | # via 311 | # lightning-utilities 312 | # marisa-trie 313 | # spacy 314 | # thinc 315 | shellingham==1.5.4 316 | # via typer 317 | six==1.16.0 318 | # via 319 | # asttokens 320 | # python-dateutil 321 | smart-open==7.0.4 322 | # via weasel 323 | spacy==3.7.6 324 | # via autofj 325 | spacy-legacy==3.0.12 326 | # via spacy 327 | spacy-loggers==1.0.5 328 | # via spacy 329 | srsly==2.4.8 330 | # via 331 | # confection 332 | # spacy 333 | # thinc 334 | # weasel 335 | stack-data==0.6.3 336 | # via ipython 337 | sympy==1.13.2 338 | # via torch 339 | thinc==8.2.5 340 | # via spacy 341 | threadpoolctl==3.5.0 342 | # via scikit-learn 343 | tokenizers==0.19.1 344 | # via transformers 345 | tomli==2.0.1 346 | # via 347 | # black 348 | # coverage 349 | # mypy 350 | # pytest 351 | torch==2.4.0 352 | # via 353 | # evaluation (pyproject.toml) 354 | # mteb 355 | # pytorch-lightning 356 | # sentence-transformers 357 | # torchmetrics 358 | torchmetrics==1.4.1 359 | # via pytorch-lightning 360 | tqdm==4.66.5 361 | # via 362 | # datasets 363 | # huggingface-hub 364 | # mteb 365 | # nltk 366 | # pytorch-lightning 367 | # reach 368 | # sentence-transformers 369 | # spacy 370 | # transformers 371 | traitlets==5.14.3 372 | # via 373 | # ipython 374 | # matplotlib-inline 375 | transformers==4.44.2 376 | # via sentence-transformers 377 | typer==0.12.5 378 | # via 379 | # spacy 380 | # weasel 381 | typing-extensions==4.12.2 382 | # via 383 | # black 384 | # cloudpathlib 385 | # huggingface-hub 386 | # ipython 387 | # lightning-utilities 388 | # mteb 389 | # mypy 390 | # pydantic 391 | # pydantic-core 392 | # pytorch-lightning 393 | # torch 394 | # typer 395 | tzdata==2024.1 396 | # via pandas 397 | urllib3==2.2.2 398 | # via requests 399 | virtualenv==20.26.3 400 | # via pre-commit 401 | wasabi==1.1.3 402 | # via 403 | # spacy 404 | # thinc 405 | # weasel 406 | wcwidth==0.2.13 407 | # via prompt-toolkit 408 | weasel==0.4.1 409 | # via spacy 410 | wrapt==1.16.0 411 | # via smart-open 412 | xxhash==3.5.0 413 | # via datasets 414 | yarl==1.9.7 415 | # via aiohttp 416 | -------------------------------------------------------------------------------- /tests/conftest.py: -------------------------------------------------------------------------------- 1 | from typing import Any, Sequence 2 | from unittest.mock import create_autospec 3 | 4 | import pytest 5 | import torch 6 | from mteb.encoder_interface import Encoder 7 | from mteb.model_meta import ModelMeta 8 | 9 | 10 | @pytest.fixture 11 | def mock_encoder() -> Encoder: 12 | """Return a mock encoder that follows the Encoder protocol.""" 13 | mock_encoder = create_autospec(Encoder, instance=True) 14 | 15 | # Mock the encode method 16 | def mock_encode(sentences: Sequence[str], prompt_name: str | None = None, **kwargs: Any) -> torch.Tensor: 17 | """Return random embeddings for the sentence.""" 18 | return torch.rand(len(sentences), 300) 19 | 20 | # Set the side effect of the mock 21 | mock_encoder.encode.side_effect = mock_encode 22 | 23 | # Set the model meta 24 | mock_encoder.mteb_model_meta = ModelMeta( 25 | name="mock_model_name", revision="mock_revision", release_date=None, languages=None 26 | ) 27 | 28 | return mock_encoder 29 | -------------------------------------------------------------------------------- /tests/test_evaluation.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | 3 | import pytest 4 | from mteb.encoder_interface import Encoder 5 | 6 | from evaluation import CustomMTEB, TaskType, get_tasks 7 | 8 | 9 | def test_evaluation(mock_encoder: Encoder, tmp_path: Path) -> None: 10 | """Test the evaluation with the CustomMTEB class.""" 11 | tasks = get_tasks([TaskType.WORDSIM]) 12 | evaluation = CustomMTEB(tasks) 13 | results = evaluation.run(mock_encoder, eval_splits=["test"], output_folder=tmp_path) 14 | 15 | # Assert that the number of tasks and results are the same and that the results folder exists 16 | assert len(tasks) == len(results), "The number of tasks and results should be the same." 17 | assert (tmp_path).exists(), "The results folder should exist." 18 | 19 | # Assert that the results folder contains the results for all tasks 20 | task_names = [task.metadata.name for task in tasks] 21 | result_folder = tmp_path / mock_encoder.mteb_model_meta.name / mock_encoder.mteb_model_meta.revision 22 | 23 | assert all( 24 | (result_folder / f"{task_name}.json").exists() for task_name in task_names 25 | ), "All result files for the specified tasks should exist." 26 | 27 | # Ensure that get_tasks without any arguments works 28 | get_tasks() 29 | 30 | # Ensure that get_tasks with a string works 31 | get_tasks(["WordSim"]) 32 | 33 | # Ensure that get_tasks with a non-existent task name raises an error 34 | with pytest.raises(ValueError): 35 | get_tasks(["non_existent_task"]) 36 | -------------------------------------------------------------------------------- /tests/test_summarize.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | 3 | from mteb.encoder_interface import Encoder 4 | 5 | from evaluation import ( 6 | CustomMTEB, 7 | TaskType, 8 | get_tasks, 9 | load_results, 10 | make_leaderboard, 11 | parse_mteb_results, 12 | summarize_results, 13 | ) 14 | 15 | 16 | def test_summarize(mock_encoder: Encoder, tmp_path: Path) -> None: 17 | """Test the summarization of the evaluation results.""" 18 | task_types = [task.value for task in TaskType] 19 | 20 | # Get the specified tasks and results 21 | tasks = get_tasks([TaskType.WORDSIM]) 22 | evaluation = CustomMTEB(tasks) 23 | results = evaluation.run(mock_encoder, eval_splits=["test"], output_folder=tmp_path) 24 | 25 | # Set the model name 26 | model_name = f"{mock_encoder.mteb_model_meta.name}_{mock_encoder.mteb_model_meta.revision}" 27 | 28 | # Test option 1: Parse the results into a custom ResultSet format 29 | parsed_results = parse_mteb_results(mteb_results=results, model_name=model_name) 30 | model_scores = summarize_results(parsed_results) 31 | # Assert that all the task_types exist as keys in the model_scores 32 | assert all(task in model_scores[model_name]["task_means"].keys() for task in task_types) 33 | # Assert that every task_type has the mock_encoder name as a key 34 | assert model_name in model_scores 35 | # Ensure that print_leaderboard works 36 | make_leaderboard(model_scores) 37 | 38 | # Test option 2: Load all results from the output folder 39 | results = load_results(tmp_path) 40 | model_scores = summarize_results(results) 41 | # Assert that all the task_types exist as keys in the model_scores 42 | assert all(task in model_scores[model_name]["task_means"].keys() for task in task_types) 43 | # Assert that every task_type has the mock_encoder name as a key 44 | assert model_name in model_scores 45 | # Ensure that print_leaderboard works 46 | make_leaderboard(model_scores) 47 | 48 | # Test option 3: load a specific folder 49 | result_folder = tmp_path / mock_encoder.mteb_model_meta.name / mock_encoder.mteb_model_meta.revision 50 | results = load_results(result_folder) 51 | model_scores = summarize_results(results) 52 | # Assert that all the task_types exist as keys in the model_scores 53 | assert all(task in model_scores[model_name]["task_means"].keys() for task in task_types) 54 | # Assert that every task_type has the mock_encoder name as a key 55 | assert model_name in model_scores 56 | # Ensure that print_leaderboard works 57 | make_leaderboard(model_scores) 58 | --------------------------------------------------------------------------------