├── .gitignore
├── .pre-commit-config.yaml
├── LICENSE
├── Makefile
├── README.md
├── evaluation
    ├── __init__.py
    ├── classification_benchmark.py
    ├── evaluation.py
    ├── pearl
    │   ├── __init__.py
    │   ├── eval.py
    │   ├── pearl.py
    │   └── probing.py
    ├── utils.py
    └── wordsim
    │   ├── __init__.py
    │   ├── data
    │       ├── all_vocab.txt
    │       ├── card_660.txt
    │       ├── men.txt
    │       ├── mturk_771.txt
    │       ├── rel353.txt
    │       ├── rw.txt
    │       ├── simLex.txt
    │       └── simverb_3500.txt
    │   ├── tasks.py
    │   └── wordsim.py
├── pyproject.toml
├── requirements
    ├── requirements-linux.txt
    └── requirements-macos.txt
├── tests
    ├── conftest.py
    ├── test_evaluation.py
    └── test_summarize.py
└── uv.lock


/.gitignore:
--------------------------------------------------------------------------------
  1 | # User specific files
  2 | local/
  3 | lightning_logs/
  4 | results/
  5 | .vscode/
  6 | .DS_store
  7 | 
  8 | # Byte-compiled / optimized / DLL files
  9 | __pycache__/
 10 | *.py[cod]
 11 | *$py.class
 12 | 
 13 | # C extensions
 14 | *.so
 15 | 
 16 | # Distribution / packaging
 17 | .Python
 18 | build/
 19 | develop-eggs/
 20 | dist/
 21 | downloads/
 22 | eggs/
 23 | .eggs/
 24 | lib/
 25 | lib64/
 26 | parts/
 27 | sdist/
 28 | var/
 29 | wheels/
 30 | share/python-wheels/
 31 | *.egg-info/
 32 | .installed.cfg
 33 | *.egg
 34 | MANIFEST
 35 | 
 36 | # PyInstaller
 37 | #  Usually these files are written by a python script from a template
 38 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 39 | *.manifest
 40 | *.spec
 41 | 
 42 | # Installer logs
 43 | pip-log.txt
 44 | pip-delete-this-directory.txt
 45 | 
 46 | # Unit test / coverage reports
 47 | htmlcov/
 48 | .tox/
 49 | .nox/
 50 | .coverage
 51 | .coverage.*
 52 | .cache
 53 | nosetests.xml
 54 | coverage.xml
 55 | *.cover
 56 | *.py,cover
 57 | .hypothesis/
 58 | .pytest_cache/
 59 | cover/
 60 | 
 61 | # Translations
 62 | *.mo
 63 | *.pot
 64 | 
 65 | # Django stuff:
 66 | *.log
 67 | local_settings.py
 68 | db.sqlite3
 69 | db.sqlite3-journal
 70 | 
 71 | # Flask stuff:
 72 | instance/
 73 | .webassets-cache
 74 | 
 75 | # Scrapy stuff:
 76 | .scrapy
 77 | 
 78 | # Sphinx documentation
 79 | docs/_build/
 80 | 
 81 | # PyBuilder
 82 | .pybuilder/
 83 | target/
 84 | 
 85 | # Jupyter Notebook
 86 | .ipynb_checkpoints
 87 | 
 88 | # IPython
 89 | profile_default/
 90 | ipython_config.py
 91 | 
 92 | # pyenv
 93 | #   For a library or package, you might want to ignore these files since the code is
 94 | #   intended to run in multiple environments; otherwise, check them in:
 95 | # .python-version
 96 | 
 97 | # pipenv
 98 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 99 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
100 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
101 | #   install all needed dependencies.
102 | #Pipfile.lock
103 | 
104 | # poetry
105 | #   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
106 | #   This is especially recommended for binary packages to ensure reproducibility, and is more
107 | #   commonly ignored for libraries.
108 | #   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
109 | #poetry.lock
110 | 
111 | # pdm
112 | #   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
113 | #pdm.lock
114 | #   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
115 | #   in version control.
116 | #   https://pdm.fming.dev/latest/usage/project/#working-with-version-control
117 | .pdm.toml
118 | .pdm-python
119 | .pdm-build/
120 | 
121 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
122 | __pypackages__/
123 | 
124 | # Celery stuff
125 | celerybeat-schedule
126 | celerybeat.pid
127 | 
128 | # SageMath parsed files
129 | *.sage.py
130 | 
131 | # Environments
132 | .env
133 | .venv
134 | env/
135 | venv/
136 | ENV/
137 | env.bak/
138 | venv.bak/
139 | 
140 | # Spyder project settings
141 | .spyderproject
142 | .spyproject
143 | 
144 | # Rope project settings
145 | .ropeproject
146 | 
147 | # mkdocs documentation
148 | /site
149 | 
150 | # mypy
151 | .mypy_cache/
152 | .dmypy.json
153 | dmypy.json
154 | 
155 | # Pyre type checker
156 | .pyre/
157 | 
158 | # pytype static type analyzer
159 | .pytype/
160 | 
161 | # Cython debug symbols
162 | cython_debug/
163 | 
164 | # PyCharm
165 | #  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
166 | #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
167 | #  and can be added to the global gitignore or merged into this file.  For a more nuclear
168 | #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
169 | #.idea/
170 | 


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | # See https://pre-commit.com for more information
 2 | # See https://pre-commit.com/hooks.html for more hooks
 3 | repos:
 4 |   - repo: https://github.com/pre-commit/pre-commit-hooks
 5 |     rev: v4.4.0
 6 |     hooks:
 7 |       - id: check-ast
 8 |         description: Simply check whether files parse as valid python.
 9 |       - id: trailing-whitespace
10 |         description: Trims trailing whitespace
11 |       - id: end-of-file-fixer
12 |         description: Makes sure files end in a newline and only a newline.
13 |       - id: check-added-large-files
14 |         description: Prevent giant files from being committed.
15 |       - id: check-case-conflict
16 |         description: Check for files with names that would conflict on case-insensitive filesystems like MacOS/Windows.
17 |   - repo: https://github.com/astral-sh/ruff-pre-commit
18 |     rev: v0.4.10
19 |     hooks:
20 |       - id: ruff
21 |         args: [ --fix ]
22 |       - id: ruff-format
23 |   - repo: https://github.com/jsh9/pydoclint
24 |     rev: 0.5.3
25 |     hooks:
26 |       - id: pydoclint
27 |   - repo: local
28 |     hooks:
29 |       - id: mypy
30 |         name: mypy
31 |         entry: mypy
32 |         language: python
33 |         types: [python]
34 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2024 The Minish Lab
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | install:
 2 | 	uv sync --all-extras
 3 | 	uv run pre-commit install
 4 | 
 5 | fix:
 6 | 	uv run pre-commit run --all-files
 7 | 
 8 | test:
 9 | 	uv run pytest --cov=model2vec --cov-report=term-missing
10 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Evaluation
  2 | 
  3 | This repository can be used to evaluate word embeddings on several tasks. All tasks are implemented as [MTEB](https://github.com/embeddings-benchmark/mteb) tasks and can be run using the same interface.
  4 | 
  5 | ## Usage
  6 | 
  7 | To run the evaluation on all available tasks and summarize the results, the following code can be used:
  8 | 
  9 | ```python
 10 | from sentence_transformers import SentenceTransformer
 11 | 
 12 | from evaluation import CustomMTEB, get_tasks, parse_mteb_results, make_leaderboard, summarize_results
 13 | 
 14 | # Define the model name
 15 | model_name = "average_word_embeddings_glove.6B.300d"
 16 | 
 17 | # Get all available tasks
 18 | tasks = get_tasks()
 19 | # Define the CustomMTEB object with the specified tasks
 20 | evaluation = CustomMTEB(tasks=tasks)
 21 | model = SentenceTransformer(model_name)
 22 | results = evaluation.run(model, eval_splits=["test"], output_folder=f"results/{model_name}")
 23 | 
 24 | # Parse the results and summarize them
 25 | parsed_results = parse_mteb_results(mteb_results=results, model_name=model_name)
 26 | task_scores = summarize_results(parsed_results)
 27 | # Print the results in a leaderboard format
 28 | print(make_leaderboard(task_scores))
 29 | ```
 30 | 
 31 | This will print a markdown table similar to the [MTEB leaderboard](https://huggingface.co/spaces/mteb/leaderboard), e.g.:
 32 | 
 33 | ```
 34 | | Model            |   Average (All) |   Average (MTEB) |   Classification |   Clustering |   PairClassification |   Reranking |   Retrieval |    STS |   Summarization |   PEARL |   WordSim |
 35 | |:-----------------|----------------:|-----------------:|-----------------:|-------------:|---------------------:|------------:|------------:|-------:|----------------:|--------:|----------:|
 36 | | GloVe_300d       |           42.84 |            42.36 |            57.31 |        27.66 |                72.48 |       43.3  |       22.78 |  61.9  |           28.81 |   45.65 |     43.05 |
 37 | ```
 38 | 
 39 | Alternatively, the evaluation can be run on a subset of tasks by specifying the task types:
 40 | 
 41 | ```python
 42 | from evaluation import CustomMTEB, get_tasks, TaskType
 43 | from sentence_transformers import SentenceTransformer
 44 | 
 45 | # Define the model name
 46 | model_name = "average_word_embeddings_glove.6B.300d"
 47 | 
 48 | # Get the specified tasks, in this case the classification and wordsim tasks
 49 | task_types = [TaskType.CLASSIFICATION, TaskType.WORDSIM]
 50 | tasks = get_tasks(task_types=task_types)
 51 | 
 52 | # Define the CustomMTEB object with the specified tasks
 53 | evaluation = CustomMTEB(tasks=tasks)
 54 | # Run the rest of the evaluation and summarization as before
 55 | ```
 56 | 
 57 | The following tasks are supported and can be used via the `TaskType` enum:
 58 | ```python
 59 | - CLASSIFICATION
 60 | - CLUSTERING
 61 | - PAIRCLASSIFICATION
 62 | - RERANKING
 63 | - RETRIEVAL
 64 | - STS
 65 | - SUMMARIZATION
 66 | - WORDSIM
 67 | - PEARL
 68 | ```
 69 | Alternatively, the task types can also be specified as a list of strings, e.g. `task_types=["Classification", "WordSim"]`.
 70 | 
 71 | Custom embedders can be used by implementing the [Encoder protocol](https://github.com/embeddings-benchmark/mteb/blob/main/mteb/encoder_interface.py#L12) from `MTEB`.
 72 | 
 73 | ### Summarizing results
 74 | 
 75 | The `summarize_results` function can be used to summarize results from an existing results folder, e.g.:
 76 | 
 77 | ```python
 78 | from evaluation import load_results, make_leaderboard, summarize_results
 79 | 
 80 | # To summarize all models in a results folder:
 81 | results = load_results("results/")
 82 | task_scores = summarize_results(results)
 83 | print(make_leaderboard(task_scores))
 84 | 
 85 | # To summarize a single model:
 86 | results = load_results("results/average_word_embeddings_glove.6B.300d/")
 87 | task_scores = summarize_results(results)
 88 | print(make_leaderboard(task_scores))
 89 | ```
 90 | 
 91 | 
 92 | ## Supported Tasks
 93 | All tasks from [MTEB](https://github.com/embeddings-benchmark/mteb) are supported:
 94 | - Classification
 95 | - Clustering
 96 | - PairClassification
 97 | - Reranking
 98 | - Retrieval
 99 | - STS
100 | - Summarization
101 | 
102 | ### PEARL
103 | All tasks from the [PEARL paper](https://arxiv.org/pdf/2401.10407) benchmark are supported (PEARL codebase [here](https://github.com/tigerchen52/PEARL)):
104 | - Paraphrase Classification
105 | - Phrase Similarity
106 | - Entity Retrieval
107 | - Entity Clustering
108 | - Fuzzy Join
109 | 
110 | ### WordSim
111 | A collection of single word similarity datasets are supported:
112 | - RareWord
113 | - MEN
114 | - SimLex
115 | - rel353
116 | - simverb
117 | - muturk
118 | - Card660
119 | 


--------------------------------------------------------------------------------
/evaluation/__init__.py:
--------------------------------------------------------------------------------
 1 | from importlib.metadata import version
 2 | 
 3 | from evaluation.evaluation import CustomMTEB, TaskType, get_tasks
 4 | from evaluation.utils import load_results, make_leaderboard, parse_mteb_results, summarize_results
 5 | 
 6 | __all__ = [
 7 |     "CustomMTEB",
 8 |     "TaskType",
 9 |     "get_tasks",
10 |     "load_results",
11 |     "parse_mteb_results",
12 |     "make_leaderboard",
13 |     "summarize_results",
14 | ]
15 | __version__ = version("evaluation")  # fetch version from install metadata
16 | 


--------------------------------------------------------------------------------
/evaluation/classification_benchmark.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | import time
  3 | from logging import getLogger
  4 | from pathlib import Path
  5 | 
  6 | import pandas as pd
  7 | from datasets import Dataset, load_dataset
  8 | from mteb.encoder_interface import Encoder
  9 | from plotnine import aes, geom_point, ggplot, guides, scale_size, theme, theme_classic
 10 | from sklearn.linear_model import LogisticRegression
 11 | from sklearn.metrics import precision_recall_fscore_support
 12 | from sklearn.pipeline import make_pipeline
 13 | from sklearn.preprocessing import StandardScaler
 14 | 
 15 | logger = getLogger(__name__)
 16 | 
 17 | datasets = [
 18 |     {"ds_name": "sst2", "text_name": "sentence", "label_name": "label", "type": "classification"},
 19 |     {"ds_name": "imdb", "text_name": "text", "label_name": "label", "type": "classification"},
 20 |     {"ds_name": "trec", "text_name": "text", "label_name": "coarse_label", "type": "classification"},
 21 |     {"ds_name": "ag_news", "text_name": "text", "label_name": "label", "type": "classification"},
 22 | ]
 23 | 
 24 | 
 25 | class ClassificationBenchmark:
 26 |     def __init__(self, encoder: Encoder, save_path: str) -> None:
 27 |         """
 28 |         Initialize the classification benchmark.
 29 | 
 30 |         :param encoder: The encoder to use. Should be an implementation of an MTEB Encoder protocol.
 31 |         :param save_path: The path to save the results to.
 32 |         """
 33 |         self.encoder = encoder
 34 |         # First check if the encoder has the 'mteb_model_meta' attribute, and if it does, check for 'name'
 35 |         if hasattr(encoder, "mteb_model_meta") and hasattr(encoder.mteb_model_meta, "name"):
 36 |             model_name = encoder.mteb_model_meta.name
 37 |         else:
 38 |             model_name = "no_model_name_available"
 39 |             logger.warning(
 40 |                 "Encoder does not have a model name or mteb_model_meta attribute. Defaulting model name to 'no_model_name_available'."
 41 |             )
 42 | 
 43 |         self.model_name = model_name
 44 |         self.save_path = Path(save_path) / f"{model_name}_classification_results.json"
 45 |         # Make sure the save directory exists
 46 |         self.save_path.parent.mkdir(parents=True, exist_ok=True)
 47 |         self.results: dict[str, dict] = {self.model_name: {}}
 48 | 
 49 |     def train_test_classification(
 50 |         self, encoder: Encoder, dataset: Dataset, text_name: str, label_name: str
 51 |     ) -> tuple[list[str], list[str], float]:
 52 |         """
 53 |         Train and test a classification model for a specific encoder.
 54 | 
 55 |         :param encoder: The encoder to use.
 56 |         :param dataset: The dataset to use.
 57 |         :param text_name: The name of the text column in the dataset.
 58 |         :param label_name: The name of the label column in the dataset.
 59 |         :return: The predictions and labels.
 60 |         """
 61 |         encode_time = 0.0
 62 |         model = make_pipeline(StandardScaler(), LogisticRegression(max_iter=1000))
 63 |         split = dataset["train"].train_test_split(test_size=0.1, seed=42)
 64 |         s = time.time()
 65 |         X_train = encoder.encode(split["train"][text_name], show_progress_bar=True)
 66 |         encode_time += time.time() - s
 67 |         y_train = split["train"][label_name]
 68 | 
 69 |         s = time.time()
 70 |         X_dev = encoder.encode(split["test"][text_name], show_progress_bar=True)
 71 |         encode_time += time.time() - s
 72 |         y_dev = split["test"][label_name]
 73 | 
 74 |         model.fit(X_train, y_train)
 75 |         pred = model.predict(X_dev)
 76 | 
 77 |         return pred, y_dev, encode_time
 78 | 
 79 |     def run(self) -> None:
 80 |         """Run the classification benchmark."""
 81 |         for dataset_config in datasets:
 82 |             ds_name = dataset_config["ds_name"]
 83 |             dataset = load_dataset(ds_name)
 84 | 
 85 |             logger.info(f"Evaluating {ds_name}")
 86 |             text_name = dataset_config["text_name"]
 87 |             label_name = dataset_config["label_name"]
 88 | 
 89 |             start_time = time.time()
 90 | 
 91 |             pred, gold, encode_time = self.train_test_classification(self.encoder, dataset, text_name, label_name)
 92 |             metrics = precision_recall_fscore_support(gold, pred, average="micro")
 93 |             runtime = time.time() - start_time
 94 | 
 95 |             self.results[self.model_name][ds_name] = {
 96 |                 "dataset": ds_name,
 97 |                 "main_score": metrics[2],  # Main score
 98 |                 "runtime": runtime,
 99 |                 "encode_time": encode_time,
100 |                 "dataset_length": len(dataset["train"]),
101 |                 "samples_second": len(dataset["train"]) / encode_time,
102 |             }
103 | 
104 |             # Save the results to a JSON file
105 |             self.save_results(self.save_path)
106 | 
107 |     def save_results(self, save_path: Path) -> None:
108 |         """Save the results to a JSON file."""
109 |         with open(save_path, "w") as file:
110 |             json.dump(self.results, file, indent=4)
111 | 
112 | 
113 | def summarize_classification_results(results_path: str) -> pd.DataFrame:
114 |     """
115 |     Summarize the results by generating a pandas DataFrame and an enhanced scatterplot.
116 | 
117 |     The bubble colors transition from grey (left, slower models) to green (right, faster models)
118 |     using logarithmic scaling for a smoother gradient and more gradual transitions.
119 | 
120 |     :param results_path: Path to the directory containing the results JSON files.
121 |     :return: A pandas DataFrame containing the results.
122 |     """
123 |     result_files = Path(results_path).glob("*.json")
124 | 
125 |     data = []
126 |     model_averages = []
127 | 
128 |     names = {"GloVe_300d": "GloVe 6B 300d"}
129 | 
130 |     # Process each file and extract the model name, dataset scores, and runtimes
131 |     for file in result_files:
132 |         with open(file, "r") as f:
133 |             result_data = json.load(f)
134 | 
135 |         model_name = list(result_data.keys())[0]  # Extract model name
136 |         model_info = result_data[model_name]
137 | 
138 |         row = {"model": model_name}
139 |         total_score = 0
140 |         total_time = 0
141 |         dataset_count = 0
142 |         total_samples = 0
143 | 
144 |         # Extract params and dataset scores and runtimes
145 |         params = model_info["params"]  # Extract params from the file
146 | 
147 |         for dataset_name, metrics in model_info.items():
148 |             if dataset_name == "params":
149 |                 continue  # Skip the params entry
150 |             row[dataset_name] = metrics["main_score"]
151 |             total_score += metrics["main_score"]
152 |             total_time += metrics["encode_time"]
153 |             total_samples += metrics["dataset_length"]
154 |             dataset_count += 1
155 | 
156 |         # Append data for the DataFrame
157 |         data.append(row)
158 | 
159 |         # Calculate averages for scatterplot
160 |         avg_score = total_score / dataset_count
161 |         samples_second = total_samples / total_time
162 | 
163 |         model_averages.append(
164 |             {
165 |                 "Model": names.get(model_name, model_name),
166 |                 "Accuracy": avg_score,
167 |                 "Samples per second": samples_second,
168 |                 "Params (Million)": params / 1_000_000,  # Use the params from the file
169 |             }
170 |         )
171 | 
172 |     # Generate enhanced scatterplot for sentences per second vs average score
173 |     avg_df = pd.DataFrame(model_averages)
174 | 
175 |     return avg_df
176 | 
177 | 
178 | def plot_avg_df(df: pd.DataFrame) -> ggplot:
179 |     """Creates a plot of the average df returned by the summarization."""
180 |     plot = (
181 |         ggplot(df, aes(x="Samples per second", y="Accuracy", size="Params (Million)", color="Model"))
182 |         + geom_point()  # Plot points with variable size
183 |         + scale_size(range=(5, 15))  # Adjust the range: min size = 5, max size = 15
184 |         + theme(figure_size=(10, 6))  # Adjust figure size (width, height) in inches
185 |         + theme_classic()
186 |         + guides(None)
187 |     )
188 | 
189 |     return plot
190 | 


--------------------------------------------------------------------------------
/evaluation/evaluation.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | from enum import Enum
 3 | from typing import Any
 4 | 
 5 | import mteb
 6 | from mteb.abstasks import AbsTask
 7 | from mteb.evaluation import MTEB
 8 | 
 9 | from evaluation.pearl import PEARL
10 | from evaluation.wordsim import WordSim
11 | 
12 | logger = logging.getLogger(__name__)
13 | 
14 | 
15 | class TaskType(str, Enum):
16 |     """Enum for the different supported task types."""
17 | 
18 |     CLASSIFICATION = "Classification"
19 |     CLUSTERING = "Clustering"
20 |     PAIRCLASSIFICATION = "PairClassification"
21 |     RERANKING = "Reranking"
22 |     RETRIEVAL = "Retrieval"
23 |     STS = "STS"
24 |     SUMMARIZATION = "Summarization"
25 |     PEARL = "PEARL"
26 |     WORDSIM = "WordSim"
27 | 
28 | 
29 | class CustomMTEB(MTEB):
30 |     def select_tasks(self, *args: Any, **kwargs: Any) -> None:
31 |         """Override select_tasks to directly use passed task instances."""
32 |         if self._tasks is not None:
33 |             # If any args or kwargs are passed, log a warning
34 |             if args or kwargs:
35 |                 logger.warning("Ignoring passed arguments and using provided tasks directly.")
36 |             # Use tasks directly without reinitializing
37 |             self.tasks = [task for task in self._tasks if isinstance(task, AbsTask)]
38 |             # Initialize tasks_cls with the classes of the provided tasks
39 |             self.tasks_cls = [type(task) for task in self.tasks]
40 |             if len(self.tasks) != len(self._tasks):
41 |                 task_names = [task.metadata_dict["name"] for task in self.tasks]
42 |                 logger.warning(f"Some tasks may not have been initialized correctly: {task_names}")
43 |         else:
44 |             # If no tasks are passed, fall back to the original behavior
45 |             super().select_tasks(*args, **kwargs)
46 | 
47 |     @property
48 |     def available_task_types(self) -> set[str]:
49 |         """Override to ensure task types are gathered from the instances."""
50 |         return {task.metadata.type for task in self.tasks}
51 | 
52 | 
53 | def get_tasks(task_types: list[TaskType | str] | None = None) -> list[AbsTask]:
54 |     """
55 |     Get the MTEB tasks that match the provided task types.
56 | 
57 |     :param task_types: The task types to include. If None, all task types are included.
58 |     :return: The MTEB tasks that match the provided task types.
59 |     :raises ValueError: If any task types are invalid.
60 |     """
61 |     all_task_types = list(TaskType)
62 |     # If no task types are provided, default to all task types
63 |     if task_types is None:
64 |         valid_task_types = all_task_types
65 |     else:
66 |         # Validate that all items in task_types are in TaskType
67 |         invalid_types = [task for task in task_types if task not in all_task_types]
68 |         if invalid_types:
69 |             supported_types = ", ".join([t.name for t in TaskType])
70 |             raise ValueError(
71 |                 f"Invalid task types: {invalid_types}. "
72 |                 f"All task types must be instances of TaskType. "
73 |                 f"Supported task types are: {supported_types}"
74 |             )
75 |         # Convert to a list of TaskType instances
76 |         valid_task_types = [TaskType(task_type) for task_type in task_types]
77 | 
78 |     # Get the MTEB tasks that match the provided task types
79 |     tasks = [
80 |         task
81 |         for task in (mteb.get_task(task_name, languages=["eng"]) for task_name in mteb.MTEB_MAIN_EN.tasks)
82 |         if task.metadata.type in valid_task_types
83 |     ]
84 | 
85 |     # If WordSim is in the task types, add the WordSim subtasks
86 |     if TaskType.WORDSIM in valid_task_types:
87 |         wordsim_subtasks = WordSim.get_subtasks()
88 |         tasks.extend(wordsim_subtasks)
89 | 
90 |     # If PEARL is in the task types, add the PEARL subtasks
91 |     if TaskType.PEARL in valid_task_types:
92 |         pearl_subtasks = PEARL.get_subtasks()
93 |         tasks.extend(pearl_subtasks)
94 | 
95 |     return tasks
96 | 


--------------------------------------------------------------------------------
/evaluation/pearl/__init__.py:
--------------------------------------------------------------------------------
1 | from evaluation.pearl.pearl import PEARL
2 | 
3 | __all__ = ["PEARL"]
4 | 


--------------------------------------------------------------------------------
/evaluation/pearl/eval.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | from typing import Literal, cast
  3 | 
  4 | import numpy as np
  5 | from autofj.datasets import load_data
  6 | from datasets import Dataset
  7 | from mteb.encoder_interface import Encoder
  8 | from reach import Reach, normalize
  9 | from scipy.stats import pearsonr
 10 | from sklearn.cluster import KMeans
 11 | from sklearn.metrics.cluster import normalized_mutual_info_score
 12 | 
 13 | from evaluation.pearl.probing import run_probing_model
 14 | 
 15 | logger = logging.getLogger(__name__)
 16 | 
 17 | 
 18 | def eval_bird(model: Encoder, dataset: Dataset) -> float:
 19 |     """
 20 |     Evaluate the BIRD dataset.
 21 | 
 22 |     :param model: The model to evaluate.
 23 |     :param dataset: The dataset to evaluate.
 24 |     :return: The accuracy of the model on the dataset.
 25 |     """
 26 |     input1 = normalize(model.encode(dataset["term1"]))
 27 |     input2 = normalize(model.encode(dataset["term2"]))
 28 | 
 29 |     sim = (input1 * input2).sum(1)
 30 |     sim = (sim + 1) / 2.0
 31 |     cor, _ = pearsonr(sim, dataset["relatedness score"])
 32 | 
 33 |     return cor
 34 | 
 35 | 
 36 | def eval_turney(model: Encoder, dataset: Dataset) -> float:
 37 |     """
 38 |     Evaluate the Turney dataset.
 39 | 
 40 |     :param model: The model to evaluate.
 41 |     :param dataset: The dataset to evaluate.
 42 |     :return: The accuracy of the model on the dataset.
 43 |     """
 44 |     data_list = []
 45 |     for row in dataset:
 46 |         data_list.append(
 47 |             list(
 48 |                 (
 49 |                     row["query"],
 50 |                     row["label"],
 51 |                     row["candidate_1"],
 52 |                     row["candidate_2"],
 53 |                     row["candidate_3"],
 54 |                     row["candidate_4"],
 55 |                 )
 56 |             )
 57 |         )
 58 | 
 59 |     num_correct = 0
 60 |     for components in data_list:
 61 |         emb = cast(np.ndarray, model.encode(components))
 62 |         query = emb[0, :]
 63 |         matrix = emb[1:, :]
 64 |         scores = np.dot(matrix, query)
 65 |         chosen = np.argmax(scores)
 66 | 
 67 |         if chosen == 0:
 68 |             num_correct += 1
 69 |     accuracy = num_correct / len(data_list)
 70 | 
 71 |     return accuracy
 72 | 
 73 | 
 74 | def eval_ppdb(model: Encoder, dataset: Dataset) -> float:
 75 |     """
 76 |     Evaluate the PPDB dataset.
 77 | 
 78 |     :param model: The model to evaluate.
 79 |     :param dataset: The dataset to evaluate.
 80 |     :return: The accuracy of the model on the dataset.
 81 |     """
 82 |     phrase1_emb = model.encode(dataset["phrase_1"])
 83 |     phrase2_emb = model.encode(dataset["phrase_2"])
 84 |     label_list = [1 if e == "pos" else 0 for e in dataset["label"]]
 85 | 
 86 |     score = run_probing_model(np.concatenate([phrase1_emb, phrase2_emb], axis=1), label_list)
 87 | 
 88 |     return score
 89 | 
 90 | 
 91 | def eval_clustering(model: Encoder, dataset: Dataset, name: Literal["conll", "bc5cdr"]) -> float:
 92 |     """
 93 |     Evaluate the clustering dataset.
 94 | 
 95 |     :param model: The model to evaluate.
 96 |     :param dataset: The dataset to evaluate.
 97 |     :param name: The name of the dataset. Can be "conll" or "bc5cdr".
 98 |     :return: The normalized mutual information score of the model on the dataset.
 99 |     :raises ValueError: If the dataset name is invalid.
100 |     """
101 |     label_dict = dict()
102 |     match name:
103 |         case "conll":
104 |             label_dict = {"PER": 0, "LOC": 1, "ORG": 2}
105 |         case "bc5cdr":
106 |             label_dict = {"Chemical": 0, "Disease": 1}
107 |         case _:
108 |             raise ValueError(f"Invalid dataset name: {name}")
109 | 
110 |     num_class = len(label_dict)
111 | 
112 |     phrases, labels = [], []
113 |     for row in dataset:
114 |         phrases.append(row["entity"] or "NA")
115 |         labels.append(row["label"])
116 | 
117 |     phrase_emb = model.encode(phrases)
118 |     kmeans = KMeans(n_clusters=num_class, random_state=0).fit(phrase_emb)
119 |     nmi_score = normalized_mutual_info_score(labels, kmeans.labels_)
120 | 
121 |     return nmi_score
122 | 
123 | 
124 | def eval_retrieval(model: Encoder, kb_dataset: Dataset, test_dataset: Dataset) -> float:
125 |     """
126 |     Evaluate the retrieval dataset.
127 | 
128 |     :param model: The model to evaluate.
129 |     :param kb_dataset: The dataset containing the knowledge base.
130 |     :param test_dataset: The dataset to evaluate.
131 |     :return: The accuracy of the model on the dataset.
132 |     """
133 |     e_names = [x for x in kb_dataset["entity_name"] if x is not None]
134 |     sen_embeddings = model.encode(e_names)
135 | 
136 |     emb_index = Reach(sen_embeddings, e_names)
137 | 
138 |     cnt, wrong_cnt = 0, 0
139 |     mentions = test_dataset["query"]
140 |     labels = test_dataset["label"]
141 | 
142 |     batch_emb = model.encode(mentions)
143 | 
144 |     I = emb_index.nearest_neighbor(batch_emb)
145 |     predicted = [i[0][0] for i in I]
146 |     for label, predict in zip(labels, predicted):
147 |         cnt += 1
148 |         if predict != label:
149 |             wrong_cnt += 1
150 |     acc = (cnt - wrong_cnt) * 1.0 / cnt
151 | 
152 |     return acc
153 | 
154 | 
155 | def eval_single_autofj(dataset_name: str, model: Encoder) -> float:
156 |     """
157 |     Evaluate a single dataset from the AutoFJ benchmark.
158 | 
159 |     :param dataset_name: The name of the dataset to evaluate.
160 |     :param model: The model to evaluate.
161 |     :return: The accuracy of the model on the dataset.
162 |     """
163 |     left_table, right_table, gt_table = load_data(dataset_name)
164 |     left_table_list: list[str] = list(left_table.title)
165 |     right_table_list: list[str] = list(right_table.title)
166 |     left_label, right_label = list(gt_table.title_l), list(gt_table.title_r)
167 |     gt_label = dict(zip(right_label, left_label))
168 | 
169 |     left_embs = normalize(model.encode(left_table_list))
170 |     right_embs = normalize(model.encode(right_table_list))
171 | 
172 |     acc_cnt, total = 0, 0
173 | 
174 |     for index, r_t_emb in enumerate(right_embs):
175 |         r_t = right_table_list[index]
176 |         try:
177 |             g_t = gt_label[r_t]
178 |         except KeyError:
179 |             continue
180 | 
181 |         score = r_t_emb @ left_embs.T
182 |         pred_i = np.argmax(score)
183 |         predicted = left_table_list[pred_i]
184 | 
185 |         if predicted == g_t:
186 |             acc_cnt += 1
187 |         total += 1
188 |     return acc_cnt * 1.0 / total
189 | 
190 | 
191 | def eval_autofj(model: Encoder, dataset: Dataset) -> float:
192 |     """
193 |     Evaluate the AutoFJ benchmark.
194 | 
195 |     :param model: The model to evaluate.
196 |     :param dataset: The dataset to evaluate.
197 |     :return: The accuracy of the model on the dataset.
198 |     """
199 |     table_names: list[str] = [row["Dataset"] for row in dataset]
200 |     acc_list = []
201 |     for table_name in table_names:
202 |         acc_list.append(eval_single_autofj(dataset_name=table_name, model=model))
203 | 
204 |     return sum(acc_list) / len(acc_list)
205 | 


--------------------------------------------------------------------------------
/evaluation/pearl/pearl.py:
--------------------------------------------------------------------------------
  1 | from __future__ import annotations
  2 | 
  3 | from typing import Any, Literal, cast
  4 | 
  5 | from datasets import DatasetDict, load_dataset
  6 | from mteb import TaskMetadata
  7 | from mteb.abstasks import AbsTask
  8 | from mteb.encoder_interface import Encoder
  9 | 
 10 | from evaluation.pearl.eval import eval_autofj, eval_bird, eval_clustering, eval_ppdb, eval_retrieval, eval_turney
 11 | 
 12 | 
 13 | class PEARL(AbsTask):
 14 |     DATASET_TASK_MAPPING = {
 15 |         "bird": "Classification",
 16 |         "turney": "Classification",
 17 |         "ppdb": "Classification",
 18 |         "ppdb_filtered": "Classification",
 19 |         "yago": "Retrieval",
 20 |         "umls": "Retrieval",
 21 |         "autofj": "Retrieval",
 22 |         "conll": "Clustering",
 23 |         "bc5cdr": "Clustering",
 24 |     }
 25 | 
 26 |     def __init__(self, dataset_name: str, hf_subsets: Any = None, **kwargs: Any) -> None:
 27 |         """
 28 |         Initialize a PEARL task with the given dataset name.
 29 | 
 30 |         :param dataset_name: The name of the dataset to use.
 31 |         :param hf_subsets: The Hugging Face dataset splits to use.
 32 |         :param **kwargs: Additional keyword arguments.
 33 |         :raises ValueError: If the dataset name is unknown.
 34 |         """
 35 |         # Use the mapping to get the task type
 36 |         try:
 37 |             task_type = self.DATASET_TASK_MAPPING[dataset_name]
 38 |         except KeyError:
 39 |             raise ValueError(f"Unknown dataset name: {dataset_name}")
 40 | 
 41 |         self.dataset_name = dataset_name
 42 |         self.metadata = TaskMetadata(
 43 |             name=dataset_name,
 44 |             description=f"PEARL Task: {dataset_name}",
 45 |             dataset={
 46 |                 "path": "Lihuchen/pearl_benchmark",
 47 |                 "revision": "1.0.0",
 48 |             },
 49 |             reference=None,
 50 |             type=task_type,
 51 |             modalities=["text"],
 52 |             eval_splits=["test"],
 53 |             eval_langs=["en"],
 54 |             main_score="accuracy",
 55 |         )
 56 | 
 57 |         # Initialize the parent class after setting the metadata
 58 |         super().__init__(hf_subsets=hf_subsets, **kwargs)
 59 | 
 60 |     def _calculate_metrics_from_split(self) -> None:
 61 |         """Calculate the metrics from the dataset split."""
 62 |         raise NotImplementedError("Method not implemented")
 63 | 
 64 |     def load_data(self, eval_splits: Any = None) -> None:
 65 |         """Load the appropriate dataset based on the task name."""
 66 |         if self.dataset_name == "umls":
 67 |             dataset = load_dataset("Lihuchen/pearl_benchmark", "umls", split="umls")
 68 |         else:
 69 |             dataset = load_dataset("Lihuchen/pearl_benchmark", self.dataset_name, split="test")
 70 |         self.dataset = DatasetDict(
 71 |             {
 72 |                 "test": dataset,
 73 |             }
 74 |         )
 75 | 
 76 |     def evaluate(
 77 |         self, model: Encoder, split: str = "test", output_folder: str | None = None, **kwargs: Any
 78 |     ) -> dict[str, dict[str, float]]:
 79 |         """Evaluate the given model on the specified dataset split."""
 80 |         dataset_split = self.dataset[split]
 81 |         result = self._evaluate_subset(model, dataset_split)
 82 | 
 83 |         return {"default": {"accuracy": result, "main_score": result}}
 84 | 
 85 |     def _evaluate_subset(self, model: Encoder, dataset_split: str, **kwargs: Any) -> float:
 86 |         """Evaluate the given model on the specified dataset split."""
 87 |         match self.dataset_name:
 88 |             case "bird":
 89 |                 return eval_bird(model, dataset_split)
 90 |             case "turney":
 91 |                 return eval_turney(model, dataset_split)
 92 |             case "ppdb" | "ppdb_filtered":
 93 |                 return eval_ppdb(model, dataset_split)
 94 |             case "yago" | "umls":
 95 |                 kb_dataset = load_dataset("Lihuchen/pearl_benchmark", "kb", split=self.dataset_name)
 96 |                 return eval_retrieval(model, kb_dataset, dataset_split)
 97 |             case "autofj":
 98 |                 return eval_autofj(model, dataset_split)
 99 |             case "conll" | "bc5cdr":
100 |                 return eval_clustering(model, dataset_split, name=cast(Literal["conll", "bc5cdr"], self.dataset_name))
101 |             case _:
102 |                 raise ValueError(f"Unknown dataset: {self.dataset_name}")
103 | 
104 |     @classmethod
105 |     def get_subtasks(cls) -> list[PEARL]:
106 |         """Return a list of subtasks, one for each dataset in the PEARL benchmark."""
107 |         return [cls(dataset_name=name) for name in cls.DATASET_TASK_MAPPING.keys()]
108 | 


--------------------------------------------------------------------------------
/evaluation/pearl/probing.py:
--------------------------------------------------------------------------------
  1 | from typing import Any
  2 | 
  3 | import numpy as np
  4 | import torch
  5 | from pytorch_lightning import LightningModule, Trainer
  6 | from pytorch_lightning.callbacks import EarlyStopping
  7 | from sklearn.model_selection import train_test_split
  8 | from torch import LongTensor, Tensor, nn, optim
  9 | from torch.nn import functional as F
 10 | from torch.utils.data import DataLoader, Dataset
 11 | 
 12 | 
 13 | class ParaphraseDataset(Dataset):
 14 |     """Dataset for paraphrase probing task."""
 15 | 
 16 |     def __init__(self, X: Tensor, label_tensor: Tensor) -> None:
 17 |         """
 18 |         Initialize the dataset.
 19 | 
 20 |         :param X: The input data.
 21 |         :param label_tensor: The labels.
 22 |         """
 23 |         self.concat_input = X.float()
 24 |         self.label = label_tensor.float()
 25 | 
 26 |     def __getitem__(self, index: int) -> tuple[Tensor, Tensor]:
 27 |         """Get the item at the given index."""
 28 |         return self.concat_input[index], self.label[index]
 29 | 
 30 |     def __len__(self) -> int:
 31 |         """Get the length of the dataset."""
 32 |         return len(self.concat_input)
 33 | 
 34 | 
 35 | class ProbingModel(LightningModule):
 36 |     """Probing model for paraphrase detection."""
 37 | 
 38 |     def __init__(self, input_dim: int, train_dataset: Dataset, valid_dataset: Dataset, test_dataset: Dataset) -> None:
 39 |         """
 40 |         Initialize the probing model.
 41 | 
 42 |         :param input_dim: The input dimension.
 43 |         :param train_dataset: The training dataset.
 44 |         :param valid_dataset: The validation dataset.
 45 |         :param test_dataset: The test dataset.
 46 |         """
 47 |         super().__init__()
 48 |         self.input_dim = input_dim
 49 |         self.linear = nn.Linear(self.input_dim, 256)
 50 |         self.linear2 = nn.Linear(256, 1)
 51 |         self.output = nn.Sigmoid()
 52 | 
 53 |         # Hyper-parameters, that we will auto-tune using lightning.
 54 |         self.lr = 0.0001
 55 |         self.batch_size = 200
 56 | 
 57 |         # datasets
 58 |         self.train_dataset = train_dataset
 59 |         self.valid_dataset = valid_dataset
 60 |         self.test_dataset = test_dataset
 61 | 
 62 |         # Store validation and test outputs
 63 |         self.validation_outputs: list[dict[str, Tensor]] = []
 64 |         self.test_outputs: list[dict[str, Tensor]] = []
 65 | 
 66 |     def forward(self, x: Tensor) -> Tensor:
 67 |         """Forward pass of the model."""
 68 |         x1 = F.relu(self.linear(x))
 69 |         x2 = self.linear2(x1)
 70 |         output: Tensor = self.output(x2)
 71 |         return output.reshape((-1,))
 72 | 
 73 |     def configure_optimizers(self) -> optim.Adam:
 74 |         """Configure the optimizer."""
 75 |         return optim.Adam(self.parameters(), lr=self.lr)
 76 | 
 77 |     def train_dataloader(self) -> DataLoader:
 78 |         """Get the training dataloader."""
 79 |         return DataLoader(self.train_dataset, batch_size=self.batch_size, shuffle=True)
 80 | 
 81 |     def val_dataloader(self) -> DataLoader:
 82 |         """Get the validation dataloader."""
 83 |         return DataLoader(self.valid_dataset, batch_size=self.batch_size, shuffle=False)
 84 | 
 85 |     def test_dataloader(self) -> DataLoader:
 86 |         """Get the test dataloader."""
 87 |         return DataLoader(self.test_dataset, batch_size=self.batch_size, shuffle=False)
 88 | 
 89 |     def compute_accuracy(self, y_hat: Tensor, y: Tensor) -> Tensor:
 90 |         """Compute the accuracy of the model."""
 91 |         y_pred = (y_hat >= 0.5).long()
 92 |         num_correct = (y_pred == y).long().sum().item()
 93 |         accuracy = torch.as_tensor(num_correct / len(y_hat))
 94 |         return accuracy
 95 | 
 96 |     def training_step(self, batch: tuple[Tensor, Tensor], batch_nb: int) -> dict[str, Any]:
 97 |         """Training step of the model."""
 98 |         mode = "train"
 99 |         x, y = batch
100 |         y_hat = self(x)
101 |         loss = F.binary_cross_entropy(y_hat, y)
102 |         accuracy = self.compute_accuracy(y_hat, y)
103 |         return {f"loss": loss, f"{mode}_accuracy": accuracy}
104 | 
105 |     def validation_step(self, batch: tuple[Tensor, Tensor], batch_nb: int) -> dict[str, Any]:
106 |         """Validation step of the model."""
107 |         mode = "val"
108 |         x, y = batch
109 |         y_hat = self(x)
110 |         loss = F.binary_cross_entropy(y_hat, y)
111 |         accuracy = self.compute_accuracy(y_hat, y)
112 |         self.log(f"{mode}_loss", loss, on_epoch=True, on_step=False)
113 |         self.log(f"{mode}_accuracy", accuracy, on_epoch=True, on_step=False)
114 | 
115 |         # Store the outputs for aggregation later
116 |         self.validation_outputs.append({"val_loss": loss, "val_accuracy": accuracy})
117 |         return {"val_loss": loss, "val_accuracy": accuracy}
118 | 
119 |     def on_validation_epoch_end(self) -> None:
120 |         """Validation epoch end hook."""
121 |         mode = "val"
122 |         loss_mean = torch.stack([x["val_loss"] for x in self.validation_outputs]).mean()
123 |         accuracy_mean = torch.tensor([x["val_accuracy"] for x in self.validation_outputs]).mean()
124 |         self.log(f"epoch_{mode}_loss", loss_mean, on_epoch=True, on_step=False)
125 |         self.log(f"epoch_{mode}_accuracy", accuracy_mean, on_epoch=True, on_step=False)
126 | 
127 |         # Clear the outputs for the next epoch
128 |         self.validation_outputs.clear()
129 | 
130 |     def test_step(self, batch: tuple[Tensor, Tensor], batch_nb: int) -> dict[str, Any]:
131 |         """Test step of the model."""
132 |         mode = "test"
133 |         x, y = batch
134 |         y_hat = self(x)
135 |         loss = F.binary_cross_entropy(y_hat, y)
136 |         accuracy = self.compute_accuracy(y_hat, y)
137 |         self.log(f"{mode}_loss", loss, on_epoch=True, on_step=False)
138 |         self.log(f"{mode}_accuracy", accuracy, on_epoch=True, on_step=False)
139 | 
140 |         # Store the outputs for aggregation later
141 |         self.test_outputs.append({"test_loss": loss, "test_accuracy": accuracy})
142 |         return {"test_loss": loss, "test_accuracy": accuracy}
143 | 
144 |     def on_test_epoch_end(self) -> None:
145 |         """Test epoch end hook."""
146 |         mode = "test"
147 |         loss_mean = torch.stack([x["test_loss"] for x in self.test_outputs]).mean()
148 |         accuracy_mean = torch.tensor([x["test_accuracy"] for x in self.test_outputs]).mean()
149 |         self.log(f"epoch_{mode}_loss", loss_mean, on_epoch=True, on_step=False)
150 |         self.log(f"epoch_{mode}_accuracy", accuracy_mean, on_epoch=True, on_step=False)
151 | 
152 |         # Clear the outputs for the next epoch
153 |         self.test_outputs.clear()
154 | 
155 | 
156 | def run_probing_model(X: np.ndarray, y: list[int]) -> float:
157 |     """
158 |     Run the probing model.
159 | 
160 |     :param X: The input data.
161 |     :param y: The labels.
162 |     :return: The test accuracy.
163 |     """
164 |     X_train, X_to_split, y_train, y_to_split = train_test_split(X, y, test_size=0.2, random_state=42)
165 |     X_test, X_dev, y_test, y_dev = train_test_split(X_to_split, y_to_split, test_size=0.5, random_state=42)
166 | 
167 |     train_dataset = ParaphraseDataset(torch.from_numpy(X_train), LongTensor(y_train))
168 |     valid_dataset = ParaphraseDataset(torch.from_numpy(X_dev), LongTensor(y_dev))
169 |     test_dataset = ParaphraseDataset(torch.from_numpy(X_test), LongTensor(y_test))
170 | 
171 |     model = ProbingModel(
172 |         input_dim=X.shape[1],
173 |         train_dataset=train_dataset,
174 |         valid_dataset=valid_dataset,
175 |         test_dataset=test_dataset,
176 |     )
177 | 
178 |     early_stop_callback = EarlyStopping(
179 |         monitor="epoch_val_accuracy", min_delta=0.00, patience=5, verbose=False, mode="max"
180 |     )
181 | 
182 |     trainer = Trainer(max_epochs=100, min_epochs=3, callbacks=[early_stop_callback])
183 |     trainer.fit(model)
184 |     result = trainer.test(dataloaders=model.test_dataloader())
185 | 
186 |     return result[0]["epoch_test_accuracy"]
187 | 


--------------------------------------------------------------------------------
/evaluation/utils.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | import logging
  3 | from collections import defaultdict
  4 | from dataclasses import dataclass, field
  5 | from pathlib import Path
  6 | from typing import Any
  7 | 
  8 | import click
  9 | import mteb
 10 | import numpy as np
 11 | import pandas as pd
 12 | from mteb.evaluation.LangMapping import LANG_MAPPING
 13 | from mteb.load_results import MTEBResults
 14 | from rich.logging import RichHandler
 15 | from scipy.stats._stats_py import SignificanceResult
 16 | 
 17 | from evaluation import TaskType, get_tasks
 18 | 
 19 | _FORBIDDEN_JSON = "model_meta.json"
 20 | _SUPPORTED_LANGS = {"default", "en-en", "en"}.union(LANG_MAPPING["en"])
 21 | 
 22 | _TASK_LIST_CQA = {
 23 |     "CQADupstackAndroidRetrieval",
 24 |     "CQADupstackEnglishRetrieval",
 25 |     "CQADupstackGamingRetrieval",
 26 |     "CQADupstackGisRetrieval",
 27 |     "CQADupstackMathematicaRetrieval",
 28 |     "CQADupstackPhysicsRetrieval",
 29 |     "CQADupstackProgrammersRetrieval",
 30 |     "CQADupstackStatsRetrieval",
 31 |     "CQADupstackTexRetrieval",
 32 |     "CQADupstackUnixRetrieval",
 33 |     "CQADupstackWebmastersRetrieval",
 34 |     "CQADupstackWordpressRetrieval",
 35 | }
 36 | 
 37 | logger = logging.getLogger(__name__)
 38 | 
 39 | 
 40 | def setup_task_mappings() -> tuple[dict[str, list[str]], list[str]]:
 41 |     """
 42 |     Setup the task mappings for the evaluation.
 43 | 
 44 |     :return: A dictionary mapping task types to task names and a list of custom task names.
 45 |     """
 46 |     # Get all tasks
 47 |     all_tasks = get_tasks()
 48 |     # Create a dictionary mapping task types to task names
 49 |     task_type_to_tasks_mapping = defaultdict(list)
 50 | 
 51 |     # Get all WordSim tasks
 52 |     wordsim_tasks = get_tasks([TaskType.WORDSIM])
 53 |     wordsim_task_names = [task.metadata.name for task in wordsim_tasks]
 54 | 
 55 |     # Get all PEARL tasks
 56 |     pearl_tasks = get_tasks([TaskType.PEARL])
 57 |     pearl_task_names = [task.metadata.name for task in pearl_tasks]
 58 | 
 59 |     # Get all custom task names
 60 |     custom_task_names = wordsim_task_names + pearl_task_names
 61 | 
 62 |     # Populate the dictionary
 63 |     for task in all_tasks:
 64 |         if task.metadata.name in wordsim_task_names:
 65 |             task_type_to_tasks_mapping["WordSim"].append(task.metadata.name)
 66 |         elif task.metadata.name in pearl_task_names:
 67 |             task_type_to_tasks_mapping["PEARL"].append(task.metadata.name)
 68 |         else:
 69 |             task_type_to_tasks_mapping[task.metadata.type].append(task.metadata.name)
 70 | 
 71 |     return task_type_to_tasks_mapping, custom_task_names
 72 | 
 73 | 
 74 | _task_type_to_tasks_mapping, _custom_task_names = setup_task_mappings()
 75 | 
 76 | 
 77 | def setup_logging() -> None:
 78 |     """Simple logging setup."""
 79 |     logging.basicConfig(
 80 |         level="INFO",
 81 |         format="%(name)s - %(message)s",
 82 |         datefmt="%Y-%m-%d %H:%M:%S",
 83 |         handlers=[RichHandler(rich_tracebacks=True, tracebacks_suppress=[click])],
 84 |     )
 85 | 
 86 | 
 87 | @dataclass
 88 | class DatasetResult:
 89 |     """
 90 |     Scores for a single dataset.
 91 | 
 92 |     Attributes
 93 |     ----------
 94 |         scores: The scores for the dataset.
 95 |         time: The time it took to evaluate the dataset.
 96 | 
 97 |     """
 98 | 
 99 |     scores: list[float]
100 |     time: float
101 | 
102 |     def mean(self) -> float:
103 |         """Calculate the mean of all scores."""
104 |         return float(np.mean(self.scores))
105 | 
106 | 
107 | @dataclass
108 | class ResultSet:
109 |     """A set of results over multiple datasets."""
110 | 
111 |     datasets: dict[str, DatasetResult] = field(default_factory=dict)
112 | 
113 |     def summarize(self, task_type: str) -> pd.Series:
114 |         """Summarize the results by taking the mean of all datasets."""
115 |         result_dict = {}
116 |         for name, result in self.datasets.items():
117 |             # Check if the task is a custom task or an MTEB task
118 |             if name not in _custom_task_names:
119 |                 task = mteb.get_task(name)
120 |                 if task.metadata.type == task_type:
121 |                     result_dict[name] = result.mean()
122 |             if task_type == "WordSim":
123 |                 if name in _task_type_to_tasks_mapping["WordSim"]:
124 |                     result_dict[name] = result.mean()
125 |             elif task_type == "PEARL":
126 |                 if name in _task_type_to_tasks_mapping["PEARL"]:
127 |                     result_dict[name] = result.mean()
128 | 
129 |         return pd.Series(result_dict)
130 | 
131 |     def times(self) -> dict[str, float]:
132 |         """Return the evaluation times for all datasets."""
133 |         return {name: result.time for name, result in self.datasets.items()}
134 | 
135 | 
136 | def load_results(results_dir: str | Path) -> dict[str, ResultSet]:
137 |     """
138 |     Load results from the specified directory.
139 | 
140 |     :param results_dir: The root directory containing results for all models.
141 |     :return: A dictionary of model names to ResultSet objects.
142 |     """
143 |     results: defaultdict = defaultdict(ResultSet)
144 |     results_path = Path(results_dir).resolve()
145 | 
146 |     # Glob for all JSON files in the directory and subdirectories
147 |     json_paths = results_path.glob("**/*.json")
148 | 
149 |     for json_path in json_paths:
150 |         # Construct the model name from the parent folder (model_revision) and its parent (model_name)
151 |         model_revision = json_path.parent.name
152 |         model_name = json_path.parent.parent.name
153 |         if model_name == "no_model_name_available":
154 |             logger.warning(f"Model name not available for {json_path}. Skipping.")
155 |             continue
156 |         elif model_revision == "no_revision_available":
157 |             full_model_name = model_name
158 |         else:
159 |             full_model_name = f"{model_name}_{model_revision}"
160 | 
161 |         if json_path.name != _FORBIDDEN_JSON:
162 |             with open(json_path) as f:
163 |                 data = json.load(f)
164 |             results[full_model_name].datasets[json_path.stem] = _process_result_data(data)
165 | 
166 |     return dict(results)
167 | 
168 | 
169 | def _process_result_data(data: dict[str, Any]) -> DatasetResult:
170 |     """
171 |     Process a single result JSON.
172 | 
173 |     :param data: The data to process.
174 |     :return: The processed data.
175 |     """
176 |     scores = [score["main_score"] for score in data["scores"]["test"] if score["hf_subset"] in _SUPPORTED_LANGS]
177 |     scores = [score[0] if isinstance(score, list) else score for score in scores]
178 | 
179 |     return DatasetResult(scores=scores, time=data["evaluation_time"])
180 | 
181 | 
182 | def parse_mteb_results(mteb_results: list[MTEBResults], model_name: str) -> dict[str, ResultSet]:
183 |     """Parse MTEBResults into a dictionary of ResultSet objects."""
184 |     dataset_results = {}
185 | 
186 |     for result in mteb_results:
187 |         task_name = result.task_name
188 |         test_scores = result.scores.get("test", [])
189 |         if not test_scores:
190 |             continue
191 | 
192 |         main_score = [score["main_score"] for score in test_scores if score["hf_subset"] in _SUPPORTED_LANGS][0]
193 | 
194 |         # Check if the main score is a SignificanceResult. If so, extract the statistic
195 |         if isinstance(main_score, SignificanceResult):
196 |             main_score = main_score.statistic
197 | 
198 |         # Populate the DatasetResult
199 |         dataset_results[task_name] = DatasetResult(scores=[main_score], time=result.evaluation_time)
200 | 
201 |     return {model_name: ResultSet(datasets=dataset_results)}
202 | 
203 | 
204 | def summarize_results(
205 |     results: dict[str, ResultSet],
206 | ) -> dict[str, pd.DataFrame]:
207 |     """
208 |     Summarize the results for all models and tasks.
209 | 
210 |     :param results: The results to summarize.
211 |     :return: A dictionary mapping model names to DataFrames containing the mean scores for each task, if available.
212 |     """
213 |     model_scores = {}
214 |     task_types = [task.value for task in TaskType]
215 | 
216 |     for model_name, result_set in results.items():
217 |         # dataset_scores = []
218 |         dataset_scores = {}
219 |         task_summaries = {}
220 | 
221 |         for task_type in task_types:
222 |             # Summarize the results for the specific task type
223 |             task_summary = result_set.summarize(task_type=task_type)
224 |             if task_type == "Retrieval":
225 |                 # Retrieval task is a special case, as it has multiple datasets for CQA
226 |                 scores = {}
227 |                 scores_cqa = []
228 |                 for name, score in task_summary.items():
229 |                     if name not in _TASK_LIST_CQA:
230 |                         scores[name] = score
231 |                     else:
232 |                         scores_cqa.append(score)
233 |                     scores["CQADupstack"] = np.mean(scores_cqa)
234 |                 task_summary = pd.Series(scores)
235 |             # Get the expected datasets for this task type
236 |             expected_datasets = _task_type_to_tasks_mapping[task_type]
237 |             # Check if the model has results for all required datasets, or the Retrieval task
238 |             if set(task_summary.index) == set(expected_datasets) or task_type == "Retrieval":
239 |                 task_summaries[task_type] = task_summary.mean()
240 |                 for dataset, score in task_summary.items():
241 |                     dataset_scores[dataset] = score
242 |             else:
243 |                 task_summaries[task_type] = np.nan
244 |                 logger.warning(f"Model {model_name} is missing results for some datasets in task type {task_type}.")
245 | 
246 |         # Store task means but also collect all individual dataset scores for macro averaging
247 |         model_scores[model_name] = {
248 |             "task_means": pd.Series(task_summaries),
249 |             "dataset_scores": dataset_scores,  # Collecting all dataset scores for macro averaging
250 |         }
251 | 
252 |     return model_scores
253 | 
254 | 
255 | def make_leaderboard(model_scores: dict[str, dict]) -> pd.DataFrame:
256 |     """Make the leaderboard with the mean scores for each task and compute macro scores."""
257 |     # Extract task means and dataset scores
258 |     task_means = {model: scores["task_means"] for model, scores in model_scores.items()}
259 |     dataset_scores = {model: scores["dataset_scores"] for model, scores in model_scores.items()}
260 | 
261 |     # Convert the task_means dictionary to a DataFrame for task-wise averaging
262 |     leaderboard = pd.DataFrame(task_means)
263 | 
264 |     # Calculate the overall macro score for each model (mean of all datasets across all tasks)
265 |     leaderboard.loc["Average (All)"] = {
266 |         model: np.mean(list(scores.values())) if task_means[model].notna().all() else np.nan
267 |         for model, scores in dataset_scores.items()
268 |     }
269 |     # Filter out the custom task names from dataset_scores
270 |     mteb_dataset_scores = {
271 |         model: {dataset: score for dataset, score in scores.items() if dataset not in _custom_task_names}
272 |         for model, scores in dataset_scores.items()
273 |     }
274 | 
275 |     # Calculate the overall mean for MTEB tasks (excluding custom task names)
276 |     leaderboard.loc["Average (MTEB)"] = {
277 |         model: np.mean(list(scores.values()))
278 |         if task_means[model].notna().all() and pd.Series(mteb_dataset_scores[model]).notna().all()
279 |         else np.nan
280 |         for model, scores in mteb_dataset_scores.items()
281 |     }
282 | 
283 |     # Multiply all values by 100 and format to 2 decimal places
284 |     leaderboard = leaderboard.applymap(lambda x: f"{x * 100:.2f}" if isinstance(x, (int, float)) else x)
285 | 
286 |     # Replace NaN values with "N/A"
287 |     leaderboard = leaderboard.fillna("N/A")
288 | 
289 |     # Transpose the DataFrame so models are in rows and task types in columns
290 |     leaderboard = leaderboard.transpose().reset_index()
291 | 
292 |     # Rename the index column to "Model"
293 |     leaderboard.rename(columns={"index": "Model"}, inplace=True)
294 | 
295 |     # Reorder columns to place "Average (All)" and "Average (MTEB)" right after "Model"
296 |     columns = ["Model", "Average (All)", "Average (MTEB)"] + [
297 |         col for col in leaderboard.columns if col not in ["Model", "Average (All)", "Average (MTEB)"]
298 |     ]
299 |     leaderboard = leaderboard[columns]
300 | 
301 |     return leaderboard
302 | 


--------------------------------------------------------------------------------
/evaluation/wordsim/__init__.py:
--------------------------------------------------------------------------------
1 | from evaluation.wordsim.wordsim import WordSim
2 | 
3 | __all__ = ["WordSim"]
4 | 


--------------------------------------------------------------------------------
/evaluation/wordsim/data/card_660.txt:
--------------------------------------------------------------------------------
  1 | Pokemon	Pocket_Monsters	3.81
  2 | prejudice	chauvinist	2.25
  3 | formic_acid	arachnology	1.19
  4 | NetMeeting	Marwar_Hall	0.00
  5 | kingfish	kingship	0.31
  6 | iight	ok	3.94
  7 | ACL	EMNLP	3.13
  8 | Qintex	Allwaste	2.06
  9 | Australian_Open	mixed_doubles	1.88
 10 | Curry_powder	pumpkin_spice	2.75
 11 | full-HD	1080p	4.00
 12 | cheddah	cheddar	0.25
 13 | convocation	gathering	3.56
 14 | random_seed	BiLSTM	1.56
 15 | heater	convector	3.50
 16 | half-life	ratemeter	1.81
 17 | 3D	black-and-white	1.13
 18 | Hero's_engine	aeolipile	4.00
 19 | Josef_Albers	Richard_Anuszkiewicz	2.06
 20 | MacBook	ZenBook	3.13
 21 | microwaving	pesto	0.75
 22 | primality	mathematics	2.00
 23 | Park_Ji-sung	Yosemite_Park	0.00
 24 | router	D-Link	2.13
 25 | Winamp	VLC_media_player	3.19
 26 | gown	pelerine	2.38
 27 | Malva_parviflora	cheeseweed	4.00
 28 | care	caution	3.00
 29 | rope-a-dope	WWE	1.81
 30 | oldster	dotard	3.31
 31 | navaid	HIV/Aids	0.00
 32 | excellent	top-notch	3.81
 33 | MIDlet	Oracle_Java	2.25
 34 | fakelore	photostimulation	0.06
 35 | black_hole	blackmail	0.06
 36 | night_sky	skyglow	1.94
 37 | neuropore	nervous_system	2.31
 38 | underspecification	incompleteness	2.94
 39 | exequatur	equator	0.00
 40 | Rotary_International	Rota_Island	0.06
 41 | TorPark	parkour	0.00
 42 | yellow_dwarf	yellow_pages	0.06
 43 | Mercedes-Benz	BMW	3.13
 44 | circus	ropedancer	2.00
 45 | Hoover_hog	armadillo	3.94
 46 | irresistibleness	illiterateness	0.13
 47 | metolazone	blazonry	0.00
 48 | vasocongestion	engorgement	2.88
 49 | salicylic_acid	carbonate	2.31
 50 | Kepler-11	red_giant	2.56
 51 | baby	cutee	1.56
 52 | Rubik's_Cube	RuBisCO	0.06
 53 | goatsbeard	tragopogon	4.00
 54 | fundraiser	event	2.00
 55 | going	really	0.00
 56 | transmigration	residence_permit	2.06
 57 | prospector	sourdough	3.56
 58 | sorry	srry	4.00
 59 | avionics	aeronautics	2.50
 60 | rallentando	slowly	2.88
 61 | retweeting	RTing	4.00
 62 | Apple	Applebees	0.38
 63 | exponential	logx	2.38
 64 | Zeta-Jones	Catherine_Zeta-Jones	3.88
 65 | Mosul	Mawsil	4.00
 66 | Pizza_Hut	Pizzle_rot	0.06
 67 | remainder	difference	2.44
 68 | preheat	reheat	2.63
 69 | disembodied	spiritual	2.56
 70 | crested_tit	Amazon_rainforest	0.94
 71 | afterworld	purgatory	2.88
 72 | screenshot	screengrab	3.88
 73 | practicable	goal	1.13
 74 | Skype_Lite	ooVoo	2.75
 75 | decomposition	factorization	3.31
 76 | LOL	looool	3.88
 77 | skateboard_deck	halfpipe	2.63
 78 | Breuil-Cervinia	Val_Gardena	3.06
 79 | inheritor	hoarded_wealth	1.94
 80 | appendage	swimmeret	2.69
 81 | passenger	passepied	0.06
 82 | tedious	old-fashioned	1.00
 83 | radionavigation	frequency_band	1.75
 84 | Tag_Heuer	Jaeger-LeCoultre	3.13
 85 | Followback	Twitter	2.31
 86 | weekend	race	0.13
 87 | septenary	Pleiades	1.25
 88 | monsignor	assignor	0.25
 89 | preoccupation	prepossession	1.19
 90 | spontaneousness	returnability	0.31
 91 | under-appreciated	unnoticeable	1.69
 92 | covfefe	coverage	2.69
 93 | devious	untrustworthy	2.75
 94 | comedian	stand-up	2.44
 95 | infant	breastfeeder	2.19
 96 | trusteeship	traineeship	0.31
 97 | human_face	make-up	1.88
 98 | backslash	backsolving	0.19
 99 | cr8	create	3.94
100 | fluoride	monofluoride	3.31
101 | unforeseen	unanticipated	3.88
102 | fancifully	whimsically	3.88
103 | Winn-Dixie	Winnipeg	0.13
104 | iPhone	bendgate	1.94
105 | ultracompetitiveness	overcompetitiveness	3.69
106 | 1/4	quarter	4.00
107 | modern-day	futuristic	2.38
108 | yeahh	yessss	3.94
109 | serious	tongue-in-cheek	1.88
110 | New_Zealander	enzedder	4.00
111 | circumnavigation	baggage	1.44
112 | school	intramural_program	1.81
113 | iMac	hairgate	0.94
114 | driver	autoinstall	1.50
115 | credit	borrower	2.00
116 | saving	saver	2.44
117 | hearth_tax	property_tax	3.44
118 | Britain_First	racism	2.31
119 | fetishism	fractionate	0.00
120 | carboxylesterase	carbonara	0.06
121 | wandmaker	Xanthosoma	0.00
122 | bootjack	blackjack	0.19
123 | lecturership	ERC_grant	1.31
124 | imaginativeness	non-existent	1.81
125 | swimwear	swimshorts	3.00
126 | grocery	Wal-Mart	2.31
127 | office	desklamp	2.00
128 | circumcision	foreskin	2.13
129 | Phelps	Philips	0.88
130 | reverse_engineering	copy_protection	1.25
131 | heart-wrenching	spanner	0.00
132 | unprecedented	unexpected	2.75
133 | smallish	dwarfish	3.06
134 | chasable	purchasable	0.50
135 | highlander	pathfinder	1.94
136 | borrowing	loanword	3.19
137 | DHL_Express	Deutsche_Bundespost	2.94
138 | value	rate	1.81
139 | witch-hunt	McCarthyism	2.81
140 | leggin	legging	4.00
141 | run-of-the-mill	ordinary	3.44
142 | ropewalker	tightrope	2.13
143 | Ben-Hur	Titanic	1.56
144 | 1st	2nd	2.88
145 | tonbak	tombac_alloy	0.50
146 | twitcon	twitch	0.06
147 | threadworm	pinworm	3.69
148 | roller-coaster	Disneyland_Park	2.13
149 | wrathful	hesitant	0.38
150 | Ebola	hyperbola	0.00
151 | promegakaryocyte	precursor	2.69
152 | tasteless	indelicate	3.06
153 | unwantedly	unintendedly	3.38
154 | Head_tilt	cervix	2.06
155 | relaxed	nerveless	3.06
156 | 3rd	third	4.00
157 | self-driving	unmanned	3.13
158 | leadscrew	leader	0.13
159 | constable	metpoliceuk	2.44
160 | eye-candy	mesomorphic	1.06
161 | real-world	real-life	3.56
162 | bystrite	strike	0.00
163 | union	sum	2.81
164 | weaving	tussah	1.56
165 | manpage	helpsheet	3.25
166 | bureau	subdepartment	2.56
167 | cover	horsecloth	2.44
168 | non-refundable	irredeemable	2.63
169 | hemorrhage	2morro	0.00
170 | knifemaking	spoonworm	0.00
171 | boatmaster	captain	3.44
172 | bloodloss	radiotherapist	0.88
173 | copilot	cockpit	1.94
174 | scam	cybershopping	1.06
175 | screensaver	FoodSaver	0.19
176 | headword	syntax	1.75
177 | slam-bang	suddenly	3.25
178 | pyelography	urography	3.44
179 | sandglass	hourglass	3.88
180 | sleepwalking	somnambulists	3.88
181 | Colorado_Technical_University	Colorado_Tech	4.00
182 | dishwasher-safe	sturdy	1.69
183 | galvanize	zinc	2.38
184 | banksia	Banksy	0.06
185 | teary-eyed	misty-eyed	3.63
186 | protrusion	bewitchment	0.00
187 | preteen	preadolescent	3.50
188 | bowling	bowler	2.38
189 | aviation	airscoop	1.75
190 | Golden_Delicious	Aldi	0.75
191 | platelayer	media_player	0.06
192 | casteless	portrayer	0.25
193 | pretax_income	gross_salary	3.31
194 | understood	gotchu	3.81
195 | payment	reward	2.63
196 | security	immobilizer	2.25
197 | dysentery	paratyphoid	2.56
198 | BitTorrent	peer-to-peer	2.69
199 | beleaguering	besieging	3.56
200 | Christiane_Amanpour	Hala_Gorani	2.44
201 | infection	inflection	0.25
202 | hard-boiled	pugnacious	3.81
203 | flying_fox	wurbagool	3.19
204 | bone	pseudofracture	1.69
205 | intertitle	title_card	3.75
206 | Athlete's_Foot	tinea_pedis	3.63
207 | exchange	replacement	2.63
208 | hyporesponsiveness	hyponymy	0.13
209 | skimobile	T-Mobile	0.06
210 | cannoli	Haagen-Dazs	1.56
211 | white_horehound	Marrubium_vulgare	4.00
212 | metapsychology	schizophrenia	1.50
213 | mousse	yellowcake	0.00
214 | schemer	strategist	2.56
215 | first_milk	colostrum	4.00
216 | genetics	genethics	2.25
217 | FIVB	AFib	0.06
218 | thing	think	0.00
219 | AccuRay	AccuWeather	0.25
220 | Anthony_Joshua	Persepolis	0.69
221 | slipper	daysleeper	0.25
222 | romance	tweethearts	2.06
223 | matchlock	arquebusier	1.25
224 | chickpea	Chick-fil-A	0.44
225 | 3D_glasses	one-dimensional	1.13
226 | AKG	Mercedes-AMG	0.44
227 | minute_of_arc	arcmin	3.88
228 | red_tape	political_campaign	0.81
229 | postmortem_examination	autopsy	4.00
230 | unformed	uniformed	0.13
231 | Addison's_Disease	corticosteroid	2.06
232 | roofball	backyard	0.56
233 | Guasacaca	gnocchi	1.25
234 | x-intercept	inception	0.50
235 | Sotloff	type-c	0.00
236 | Boko_Haram	Taliban	2.81
237 | Nobelist	novelist	0.44
238 | prognosticator	foreteller	3.75
239 | editor	Redditor	1.25
240 | director	cinematography	2.00
241 | pike	spontoon	3.25
242 | autopilot	autophyte	0.25
243 | surrounding	ecclesiastic	0.00
244 | abetalipoproteinemia	familial_dysbetalipoproteinemia	2.44
245 | do-over	leftover	0.25
246 | perpend	stipend	0.00
247 | placidity	acidity	0.06
248 | IOCCC	ACM-ICPC	2.81
249 | ISIS	terrorism	2.75
250 | undersuit	understanding	0.06
251 | saccharose	sucrose	3.94
252 | DVB-H	mobile_TV	2.75
253 | science-fiction	sci-fi	4.00
254 | swan	seaduck	2.81
255 | plane	sheet	0.75
256 | amusement_park	hypercoaster	2.25
257 | ATI	Nvidia	3.06
258 | choice	election	2.13
259 | little_bee-eater	Merops_pusillus	4.00
260 | b-day	present	2.00
261 | Kilroy-Silk	Nick_Clegg	2.50
262 | orogeny	progeny	0.19
263 | Yun-Fat	fatness	0.00
264 | ballistic_missile	combust	1.31
265 | slave	hierodule	3.13
266 | combination	union	2.88
267 | 2mro	tomorrow	4.00
268 | pestis	pestilence	4.00
269 | CUDA	parallelize	2.13
270 | Kurdish	Turkey	2.50
271 | syllable	lemma	2.13
272 | Stephen_Hawking	hawk	0.06
273 | Under_Armour	armor	0.13
274 | DNA	transactivator	1.56
275 | review	IrfanView	0.19
276 | dialler	modem	1.75
277 | lukewarmness	unattractiveness	0.81
278 | thoughtful	die-hard	0.13
279 | doorcase	door_frame	3.94
280 | judge	court	2.25
281 | boutta	boat	0.00
282 | algebraist	eigenvalue	1.69
283 | biting_point	clutch	2.19
284 | Sam_Mraovich	Ben_and_Arthur	2.00
285 | 0.5	half	4.00
286 | fast-forward	tape	1.81
287 | 7-Zip	C++	1.38
288 | HD	high-definition	4.00
289 | entity	corporation	2.00
290 | flag	tricolor	2.81
291 | collaborator	cooperator	3.25
292 | Douglas	Isle_of_Man	2.44
293 | farsightedness	hyperopia	3.25
294 | halomethane	oscilloscope	0.56
295 | coincineration	waste_treatment	2.63
296 | NGC_4622	Ursa_Major	2.00
297 | fake_news	Fox_News	1.56
298 | eye_of_the_storm	cyclone	2.31
299 | astronomy	finderscope	1.75
300 | phototransmutation	photodisintegration	4.00
301 | nonmonogamy	consanguinity	0.44
302 | microfiche	photocopy	1.63
303 | Commodore_64	microcomputer	2.44
304 | billhook	plough	2.13
305 | AirBake	A-bike	0.13
306 | transplant	autograft	2.56
307 | thylakoid	photosynthesis	2.06
308 | gnuplot	Plotinus	0.00
309 | delayingly	slothfully	2.75
310 | LAMO	lambda	0.00
311 | payment	note	1.63
312 | EAGGF	market	1.25
313 | Champions_League	Champlain_Sea	0.00
314 | physiopathology	medical_diagnosis	1.75
315 | for_sure	fasho	4.00
316 | autocrime	truck	1.06
317 | geometry	cylindric	1.69
318 | 4sho	surely	4.00
319 | disomy	intragenic	1.81
320 | revelatory	apocalyptical	3.88
321 | pain	neckache	2.56
322 | imperfectness	imperfection	3.69
323 | electrolytic_polishing	electropolishing	3.94
324 | Brexit	Fatuzzo	0.56
325 | Visual_C	.NET_Framework	2.31
326 | microwave	All-Clad	1.75
327 | stubbornly	single-mindedly	2.94
328 | smdh	smfh	3.75
329 | njoy	killjoy	1.50
330 | cool	swagg	2.56
331 | combat	belligerence	2.25
332 | Bushihr	Donald_Horne	0.00
333 | cheap	low-budget	3.50
334 | Bitcoin	exchange_rate	1.69
335 | impossible	far-fetched	3.44
336 | nonnative	nonnegative	0.06
337 | reduction	overeducation	0.06
338 | vanilla	pastrycook	1.69
339 | protein	CIITA	1.94
340 | descensus	prolapse	4.00
341 | microsyringe	hyperfunction	0.25
342 | washbasin	handbasin	3.88
343 | geneflow	gene_migration	4.00
344 | knowingness	cognizance	3.94
345 | predominance	prepotency	3.75
346 | stellar_nucleosynthesis	nitrogen-14	2.19
347 | Tefal	T-Fal	4.00
348 | real_estate	Millcraft	2.00
349 | snow_leopard	Summer_snowflake	0.19
350 | flysheet	tent	2.50
351 | feeing	feeling	0.06
352 | Snowblood	Japan	1.44
353 | infinity	alligation	1.00
354 | wanna-be	Kardashians	0.38
355 | film_production	filmmaking	3.56
356 | cartshed	bloodshed	0.06
357 | hypocrisy	pretense	3.06
358 | zoonosis	tepoxalin	1.56
359 | Shelby_GT350	Shelby_Farms	0.19
360 | drop-off	rip-off	0.19
361 | mode	fashion	3.38
362 | semivowel	aspirate	2.44
363 | Ferguson	Stamford_Bridge	1.44
364 | Seven_foot	growth_hormone	1.94
365 | down-to-earth	practical	3.50
366 | textual_matter	concordance	2.06
367 | Phenelzine	XXXXX_syndrome	0.69
368 | sqrt	square_root	4.00
369 | sunsuit	beach	1.75
370 | epidiascope	epidioscope	4.00
371 | Kile	kite	0.06
372 | leaf_roller	leaflet	0.31
373 | Cyber-Shot	Coolpix	2.81
374 | voidness	emptyness	3.63
375 | shot	close-up	2.75
376 | Chad	Yemen	2.13
377 | hygiene	autoflush	1.31
378 | currency	concurrency	0.13
379 | Deathstalker	Wikipedia	0.00
380 | thaumasite	silicate_mineral	3.06
381 | must-see	interesting	3.06
382 | cycloheximide	Streptomyces_bacteria	2.25
383 | 25	twenty-five	4.00
384 | ceramic	pottery	2.75
385 | constipation	constitutionalisation	0.00
386 | protagonist	deuteragonist	2.81
387 | overt	undisclosed	1.88
388 | Java_DB	J2SE	2.63
389 | underclothes	underlinen	2.81
390 | dentinogenesis_imperfecta	itch	0.31
391 | Democracy_Player	Miro	3.81
392 | handpan	belieber	0.25
393 | Google_Earth	3D_imagery	2.00
394 | bigot	4got	0.13
395 | bankruptcy	failure	1.94
396 | dreadnought	battleship	3.06
397 | noisemonger	electric_guitar	1.81
398 | leucosis	Bird_flu	2.31
399 | Autodesk_Revit	3D_Studio_Max	2.88
400 | supremacy	omnipotence	3.00
401 | letting	rental	3.63
402 | entryphone	door_phone	3.88
403 | inharmonious	architecture	0.81
404 | hugger-mugger	disorganized	3.75
405 | organelle	endosymbiotic	1.44
406 | wrathfully	self-righteously	1.38
407 | altimeter	all-time	0.00
408 | WWII	Fishbed	1.31
409 | Kobani	Bale	0.00
410 | shit	shxt	3.94
411 | fishplate	template	0.06
412 | extraterrestrial_being	hypothetical_creature	2.56
413 | applicant	application	2.31
414 | lemmatization	treebank	1.63
415 | imperishingness	easygoingness	0.00
416 | Valentine's_Day	lovee	2.00
417 | mozzarella	pizzamaker	1.56
418 | cop	copilot	0.38
419 | clinic	triclinic	0.06
420 | isotope	dioxygen	1.31
421 | Al-Jazeera	Mehdi_Hasan	1.94
422 | bedgown	bedrobe	2.63
423 | stepsize	stepfather	0.06
424 | concise	one-liner	2.44
425 | human_herpesvirus_4	Epstein-Barr	4.00
426 | leathercrafter	leatherworker	3.69
427 | oceanic_trench	sea_floor	2.50
428 | brainless	sheepish	2.75
429 | missed_call	so-called	0.13
430 | metamorphose	transmute	3.81
431 | bood_clot	agammaglobulinemia	1.63
432 | man-at-arms	traitorous	0.19
433 | fringe_benefit	bonus	3.00
434 | headcollar	hair_color	0.00
435 | madrier	plank	3.19
436 | Boeing_747	Stratojet	2.75
437 | teratospermia	teratozoospermia	4.00
438 | tariqa	tariqah	4.00
439 | seriously	foreal	3.75
440 | insidiously	harmfully	3.50
441 | pasteurization	cowkeeper	0.81
442 | vindictively	revengefully	3.94
443 | scornful	contemptuous	3.94
444 | jawfish	streamwater	1.19
445 | mollusc	parapodia	1.63
446 | cheapjack	amberjack	0.06
447 | dual_screen	off-screen	1.00
448 | Regosol	prioritarianism	0.00
449 | end	tail	3.00
450 | DVD	Blu-ray	3.19
451 | Gorilla_Glass	scratch-resistant	2.31
452 | pacemaker	lacemaker	0.19
453 | starless	starkness	0.06
454 | foul-mouthed	foul-spoken	4.00
455 | snow_lotus	Saussurea	3.69
456 | inadvertence	oversight	3.38
457 | mark	grade	3.75
458 | Wristwatch	smartwatch	2.88
459 | dessertspoon	vanilla	1.38
460 | Christstollen	flour	1.75
461 | massive	astronomical	3.38
462 | sweeeet	sweet	3.94
463 | Alcohol_withdrawal_syndrome	Whiskey_fits	3.31
464 | kicksled	sledge	2.88
465 | errbody	embody	0.25
466 | bluethroat	nightingale	3.06
467 | proportional_tax	capitation	2.69
468 | children	dollmaking	1.31
469 | about	bouta	3.94
470 | fairness	unbiasedness	3.69
471 | PiO	President-in-Office	4.00
472 | greyishness	oil_paint	0.75
473 | dunno	idek	3.69
474 | In-N-Out_Burger	Jamie_Oliver	1.81
475 | rail	minecart	2.00
476 | parameter	hyperparameter	3.00
477 | undercover	self-indulgent	0.00
478 | expounding	anlysis	2.75
479 | ontologize	democratize	0.50
480 | Woods	Phil_Mickelson	3.00
481 | Nutella	Belgian_Congo	0.06
482 | transalpine	pretense	0.00
483 | latewood	fastfood	0.00
484 | skater	pool_skating	2.31
485 | ctenophore	comb_jelly	3.94
486 | actinomycin_D	infertility	0.75
487 | oilbird	beak	2.31
488 | magnet	gyromagnetic	2.25
489 | jaw-dropping	eyedropper	0.06
490 | foot	base	1.69
491 | coalitionist	casern	0.38
492 | fundamentalism	intergovernmentalism	0.63
493 | angle	angleworm	0.25
494 | relaxedly	cold-bloodedly	2.06
495 | thought-provoking	provocative	1.63
496 | midquel	sequel	3.13
497 | Orient_Heights	Constitution_beach	2.31
498 | engine	Shovelhead	2.56
499 | opopanax	opobalsam	3.94
500 | agriculture	monoculture	2.50
501 | cowberry	lingonberry	3.75
502 | Schwartz-Jampel	lathosterol	0.63
503 | movie	must-see	1.13
504 | nephoscope	meteorologist	1.63
505 | dripstone	cave	1.56
506 | disinvestment	subtropical	0.00
507 | promise	promiscuous	0.06
508 | puncture	tricycle	1.25
509 | AirDrop	airdome	0.00
510 | univocal	unquestionable	3.19
511 | maximization	maxilliped	0.06
512 | Hindooism	Hinduism	3.94
513 | co-prime	Amazon_Prime	0.00
514 | liberty	censorship	1.69
515 | seater	fruiteater	0.00
516 | Smorgasburg	food_market	2.75
517 | hydrohalic_acid	dehydrohalogenation	1.94
518 | fibroplasia	angioplasty	1.94
519 | prefinancing	investment	2.50
520 | loool	ctfu	2.94
521 | Brooklyn	Brklyn	4.00
522 | decomposition	biowaste	2.06
523 | snapshot	clapshot	0.06
524 | Mohammed_Emwazi	Jihadi_John	3.88
525 | gap	opening	3.19
526 | extramarital	unmentionable	0.75
527 | omniscience	discovery	1.25
528 | VoIP	interconnectedness	0.88
529 | offering	contribution	2.88
530 | tweet	retweet	3.25
531 | commodious	hand-held	0.44
532 | lmbo	jajajajaja	3.63
533 | autosuggestion	autocomplete	3.44
534 | horsecart	throatlatch	1.75
535 | coin	coinsurance	0.31
536 | boatman	logboat	1.75
537 | Roman_Church	Inquisition	2.06
538 | puddingwife	plumcake	0.25
539 | carbinolamine	hemiaminal	3.88
540 | grimy	imy	0.06
541 | defense_mechanism	anxiety	1.44
542 | lymphocyte	T-cell	3.00
543 | oneirocritic	dream	2.06
544 | medallist	Emirates_Stadium	0.81
545 | bottle	can	2.50
546 | sowing	plantlet	1.81
547 | Buxus	defoliator	1.25
548 | Seoul	Yo-jong	1.00
549 | strive	foreclose	0.31
550 | IE4	Internet_Explorer_4	4.00
551 | blitz	attack	3.19
552 | praecipe	praecipuum	0.13
553 | fundholder	construction	0.44
554 | foodstuff	feedingstuff	2.88
555 | filtrate	first-rate	0.00
556 | ITV2	ITV_Two	4.00
557 | photophone	homophone	0.25
558 | wrinkle	Early_aging	1.81
559 | water_bearer	Aquarius	3.81
560 | novelist	sub-plot	1.63
561 | fruiterer	fruit	2.00
562 | electrodiagnosis	ecosocialist	0.06
563 | dukedom	princedom	2.75
564 | thought_process	thought-provoking	1.94
565 | draftswoman	watercolor_painting	1.75
566 | census	nosecount	4.00
567 | Mathematica	differential_equation	2.06
568 | soldier	footguard	2.94
569 | Fish_and_chips	Nando's	1.38
570 | Scooby-Doo	Shaun_the_Sheep	2.75
571 | post-apocalyptic	postage	0.00
572 | pragmatics	stemmatics	1.50
573 | locomotion	zoospore	0.25
574 | transaction	cyberbank	1.75
575 | iCloud	cyberlocker	2.69
576 | Ramadan	Al_Ramadi	0.44
577 | erythroleukemia	blood_cancer	3.06
578 | vestibular_system	Ototoxicity	1.81
579 | miniguide	tourist	1.69
580 | miniskirt	microskirt	3.31
581 | all-star	legendary	2.44
582 | X-Files	science_fiction_drama	2.56
583 | Adobe_AfterEffects	Canon_EOS_6D	1.63
584 | responsibility	respondee	0.50
585 | surgery	therapeutical	1.56
586 | Fiat_500	venice	0.69
587 | K-PAX	Kevin_Spacey	1.94
588 | overcurious	inquisitive	3.06
589 | Top_Gear	Isuzu_Vehicross	1.19
590 | European_Parliament	Oomen-Ruijten	1.81
591 | June	JunB	0.06
592 | PyeongChang_2018	Richard_Seymour	0.81
593 | Apple	monopolist	0.94
594 | 2nite	2mrw	2.38
595 | biological_function	teleosemantics	2.19
596 | single-handedly	independently	2.69
597 | trail_and_error	trialogue	0.06
598 | Fire_TV	Snapchat	0.94
599 | trilogy	mini-series	2.44
600 | Hale-Bopp	Thomas_Bopp	2.00
601 | groundcrew	airport	1.88
602 | roadster	musclebike	3.06
603 | corncutter	cornsilk	1.50
604 | greengrocery	grocery	3.06
605 | Galaxy_S8	Samsung_S8	3.63
606 | early	precociously	2.38
607 | PGA_Tour	Steven_O'Hara	1.94
608 | all-nighter	deadline	1.50
609 | favism	broad_bean	1.69
610 | Blind_Freddy	window_blind	0.06
611 | soon	sooon	3.94
612 | religion	secularist	2.00
613 | down	feather	2.88
614 | mini-USB	minium	0.06
615 | anamorphosis	replica	1.31
616 | wheel	cyclometer	1.50
617 | macrocosmic	Big_Bang	2.13
618 | raincoat	rainjacket	3.88
619 | penthouse	premises	1.88
620 | pigswill	swill	3.81
621 | fifth	top	0.81
622 | tubocurarine	anesthetic	1.38
623 | C#	C++	3.25
624 | reburial	tomb	2.19
625 | gluttonously	voraciously	3.19
626 | Cambridge_University	Anglia_Ruskin	2.88
627 | New_York	Mnhttn	2.75
628 | stage_fright	diffidence	2.44
629 | Chelsea_tractor	Blvd	1.06
630 | offprint	printer	1.44
631 | Inkscape	vector_graphics	2.19
632 | hashtag	insta	1.69
633 | English_Channel	History_Channel	0.13
634 | hankering	handling	0.25
635 | double-faults	Wimbledon	1.75
636 | BMW	omw	0.06
637 | nutmeg	mace	2.56
638 | blithesome	light-hearted	3.94
639 | spiritualist	autographic	0.00
640 | frizzen	flintlock_firearm	2.19
641 | work_permit	Blue_Card	2.94
642 | slow-cooker	crock-pot	3.81
643 | doggo	invisible	2.94
644 | irritate	antagonize	2.81
645 | archer	pathfinder	1.38
646 | Ossessione	Italian_neorealism	1.69
647 | piscary	containership	0.50
648 | unhygienically	lip-smackingly	0.38
649 | tissue	abiotrophy	1.81
650 | swordmaker	weapon	2.06
651 | hyperproliferation	cell	2.19
652 | Milk_spots	Chris_Milk	0.06
653 | wallhack	Xbox	1.31
654 | Keras	Titan_Xp	1.44
655 | Tandoori_chicken	LeCreuset	1.00
656 | whole	basically	0.25
657 | shapeless	amorphous	3.94
658 | rule	convention	3.00
659 | cotransfection	siRNA	2.13
660 | multiplayer	Supersonic_Warriors	1.88
661 | 


--------------------------------------------------------------------------------
/evaluation/wordsim/data/mturk_771.txt:
--------------------------------------------------------------------------------
  1 | 0	access	gateway	3.791666667
  2 | 1	account	explanation	2
  3 | 2	account	invoice	3.75
  4 | 3	account	statement	3.681818182
  5 | 4	acoustic	remedy	1.227272727
  6 | 5	acrylic	cloth	2.739130435
  7 | 6	action	adjustment	2
  8 | 7	action	entrance	1.583333333
  9 | 8	activity	event	4.083333333
 10 | 9	activity	music	2.681818182
 11 | 10	activity	skiing	3.45
 12 | 11	addition	segment	2.5
 13 | 12	adhesive	glue	4.608695652
 14 | 13	adult	dentist	2.47826087
 15 | 14	adult	doctor	2.782608696
 16 | 15	afternoon	substance	1
 17 | 16	age	childhood	3.782608696
 18 | 17	agency	army	2.916666667
 19 | 18	agency	office	3.857142857
 20 | 19	agency	police	3.19047619
 21 | 20	agent	spy	4
 22 | 21	agreement	contract	4.476190476
 23 | 22	aim	purpose	4.363636364
 24 | 23	aircraft	balloon	2.869565217
 25 | 24	aircraft	yacht	2.434782609
 26 | 25	alarm	horn	3.458333333
 27 | 26	alarm	press	2
 28 | 27	algorithm	search	1.863636364
 29 | 28	alien	stranger	3.428571429
 30 | 29	alloy	metal	3.954545455
 31 | 30	alphabet	script	3.5
 32 | 31	aluminum	oxygen	1.608695652
 33 | 32	amount	distance	1.958333333
 34 | 33	amount	number	4.136363636
 35 | 34	amount	season	1.434782609
 36 | 35	amusement	athletics	2.6
 37 | 36	amusement	play	4.041666667
 38 | 37	amusement	procedure	1.454545455
 39 | 38	anatomy	creation	2.565217391
 40 | 39	animal	flora	2
 41 | 40	animal	worm	2.72
 42 | 41	ankle	joint	4.227272727
 43 | 42	anniversary	birthday	3.727272727
 44 | 43	answer	plea	2.44
 45 | 44	apparel	dress	4.227272727
 46 | 45	appearance	shadow	2.52173913
 47 | 46	apple	bank	1.125
 48 | 47	apple	orange	3.47826087
 49 | 48	apple	pod	2.043478261
 50 | 49	appliance	dryer	3.333333333
 51 | 50	appliance	refrigerator	4.227272727
 52 | 51	approach	swing	1.652173913
 53 | 52	approval	encouragement	2.863636364
 54 | 53	approving	interaction	2
 55 | 54	arc	rainbow	3.347826087
 56 | 55	architecture	engineering	3.25
 57 | 56	area	patio	2.545454545
 58 | 57	area	region	4.318181818
 59 | 58	argument	evidence	3.045454545
 60 | 59	argument	indication	1.772727273
 61 | 60	arm	arrow	2.230769231
 62 | 61	arm	missile	2.772727273
 63 | 62	armor	mail	1.913043478
 64 | 63	army	legion	3.285714286
 65 | 64	aroma	fragrance	4.681818182
 66 | 65	aroma	smell	4.19047619
 67 | 66	arrangement	blizzard	1.25
 68 | 67	arrangement	room	2.380952381
 69 | 68	article	girl	1.208333333
 70 | 69	artillery	gun	3.541666667
 71 | 70	aspen	maple	2.791666667
 72 | 71	ass	donkey	4.85
 73 | 72	assembly	crowd	3.363636364
 74 | 73	assets	capital	4.090909091
 75 | 74	assets	credit	3.47826087
 76 | 75	assets	income	4.038461538
 77 | 76	association	organization	4.362318841
 78 | 77	athlete	participant	3.458333333
 79 | 78	athletics	racing	3.826086957
 80 | 79	athletics	swimming	3.45
 81 | 80	attitude	notice	1.486486486
 82 | 81	attitude	study	1.88
 83 | 82	attorney	lawyer	4.681818182
 84 | 83	attraction	pressure	1.565217391
 85 | 84	attraction	quality	2.347826087
 86 | 85	attribute	condition	2.576923077
 87 | 86	average	time	2.380952381
 88 | 87	baby	computer	1.24
 89 | 88	bail	bond	3.086956522
 90 | 89	bait	instrument	2.090909091
 91 | 90	bait	pump	1.208333333
 92 | 91	bakery	work	2.541666667
 93 | 92	bakery	workplace	3
 94 | 93	balance	gauge	2.6
 95 | 94	ball	egg	1.727272727
 96 | 95	ball	nut	1.727272727
 97 | 96	ball	poker	2.165217391
 98 | 97	ball	sphere	4.142857143
 99 | 98	band	circle	2.954545455
100 | 99	bar	needle	1.772727273
101 | 100	bar	rod	4.047619048
102 | 101	barrel	gallon	3.153846154
103 | 102	base	club	1.619047619
104 | 103	base	stock	2.476190476
105 | 104	baseball	softball	3.230769231
106 | 105	basin	vessel	4.076923077
107 | 106	basketball	hockey	2.782608696
108 | 107	basketball	squash	2.833333333
109 | 108	bathroom	chamber	2.681818182
110 | 109	battle	fight	4.583333333
111 | 110	bay	mere	1.583333333
112 | 111	beach	chain	1.047619048
113 | 112	beach	ridge	2.260869565
114 | 113	beam	column	2.961538462
115 | 114	beat	meter	1.92
116 | 115	beat	rhythm	4.363636364
117 | 116	bed	layer	2.136363636
118 | 117	bedroom	cell	2.136363636
119 | 118	bedroom	construction	2.208333333
120 | 119	bee	insect	4.043478261
121 | 120	beef	cattle	4.173913043
122 | 121	beef	meat	4.619047619
123 | 122	beginner	novice	4.166666667
124 | 123	behavior	purpose	2.304347826
125 | 124	belief	magic	2.565217391
126 | 125	belief	purpose	2.333333333
127 | 126	bench	seat	4.428571429
128 | 127	bend	curve	4.541666667
129 | 128	berry	citrus	3
130 | 129	bill	invoice	4.588235294
131 | 130	billboard	structure	3
132 | 131	bin	box	3.956521739
133 | 132	bin	cup	2.217391304
134 | 133	binary	star	1.863636364
135 | 134	bird	creature	3.458333333
136 | 135	bird	solid	1.090909091
137 | 136	birth	modification	1.434782609
138 | 137	bishop	priest	4.269230769
139 | 138	bit	tool	2.708333333
140 | 139	bite	breakfast	2.869565217
141 | 140	bite	taste	3.636363636
142 | 141	black	juvenile	1.238095238
143 | 142	blade	projector	1.434782609
144 | 143	blanket	sleeve	2.111111111
145 | 144	blizzard	rash	1.307692308
146 | 145	blow	contact	1.695652174
147 | 146	blue	red	3.272727273
148 | 147	board	commission	3.173913043
149 | 148	boat	ferry	4.083333333
150 | 149	boat	vessel	3.208333333
151 | 150	body	trunk	3.260869565
152 | 151	bond	security	3.904761905
153 | 152	bone	skull	4.272727273
154 | 153	book	encyclopedia	3.904761905
155 | 154	booklet	reference	3.380952381
156 | 155	boot	kick	3.173913043
157 | 156	boot	punch	1.75
158 | 157	bottom	sole	3.347826087
159 | 158	boy	male	4.52173913
160 | 159	boy	rover	2
161 | 160	brace	stand	2.913043478
162 | 161	brain	head	4.173913043
163 | 162	brake	click	1.761904762
164 | 163	branch	department	4.043478261
165 | 164	brand	knife	1.681818182
166 | 165	brand	surname	2.318181818
167 | 166	brandy	liquor	4.541666667
168 | 167	brass	executive	1.913043478
169 | 168	bread	bun	4.304347826
170 | 169	break	dash	2.916666667
171 | 170	break	insert	2.041666667
172 | 171	brick	cement	3.625
173 | 172	brick	strip	1.541666667
174 | 173	broadcast	packet	1.727272727
175 | 174	brochure	publication	3.875
176 | 175	brochure	reference	2.95
177 | 176	brother	member	2.583333333
178 | 177	brush	implement	2.47826087
179 | 178	budget	fund	4
180 | 179	buffer	cache	2.409090909
181 | 180	bug	child	1.276595745
182 | 181	build	flesh	1.833333333
183 | 182	building	cafe	3.142857143
184 | 183	bulb	onion	2.92
185 | 184	bulletin	news	4.666666667
186 | 185	bunny	server	1.25
187 | 186	burn	hurt	3.730769231
188 | 187	burning	flame	4.48
189 | 188	burning	punishment	2.52
190 | 189	burst	transformation	1.952380952
191 | 190	business	disposition	1.583333333
192 | 191	business	railway	2.541666667
193 | 192	butter	stick	2.375
194 | 193	butterfly	comma	1.130434783
195 | 194	cab	taxi	4.476190476
196 | 195	cake	pie	3.545454545
197 | 196	calendar	circulation	1.695652174
198 | 197	calendar	system	1.916666667
199 | 198	call	meeting	2.727272727
200 | 199	call	statement	2.125
201 | 200	campaign	operation	2.826086957
202 | 201	candy	sweet	4.510638298
203 | 202	cap	covering	3.681818182
204 | 203	capital	seat	2.32
205 | 204	captain	officer	4
206 | 205	card	plastic	2.391304348
207 | 206	carriage	coach	3.708333333
208 | 207	cart	wagon	4.375
209 | 208	cartoon	wit	2.666666667
210 | 209	case	grip	2.12
211 | 210	case	luggage	3.68
212 | 211	case	tin	2.6
213 | 212	cast	fishing	2.6
214 | 213	cat	vision	1.431818182
215 | 214	category	flavor	2.428571429
216 | 215	cave	formation	2.347826087
217 | 216	ceiling	overhead	3.52
218 | 217	cement	glue	3
219 | 218	center	loss	1.304347826
220 | 219	century	decade	3.434782609
221 | 220	century	period	3.769230769
222 | 221	century	temperature	1.130434783
223 | 222	certificate	study	3.136363636
224 | 223	certificate	wave	1.145833333
225 | 224	chair	furniture	3.869565217
226 | 225	chair	rocker	2.826086957
227 | 226	chance	opportunity	4.590909091
228 | 227	chance	probability	4.619047619
229 | 228	chandler	retailer	2.130434783
230 | 229	change	decrease	3
231 | 230	change	move	3.434782609
232 | 231	channel	sound	3.2
233 | 232	character	vision	1.666666667
234 | 233	charge	damage	2.375
235 | 234	charge	tax	4.086956522
236 | 235	check	draft	3.952380952
237 | 236	cheese	food	4
238 | 237	chemical	salt	3.590909091
239 | 238	chess	duty	1.166666667
240 | 239	chick	hen	3.863636364
241 | 240	chicken	poultry	4.217391304
242 | 241	chief	guru	3.904761905
243 | 242	child	kid	4.857142857
244 | 243	chin	feature	2.875
245 | 244	choice	option	4.590909091
246 | 245	chuck	jaw	2.090909091
247 | 246	church	temple	4.347826087
248 | 247	circle	oval	3.347826087
249 | 248	citrus	orange	4.208333333
250 | 249	climb	mount	4.166666667
251 | 250	clock	timer	4.416666667
252 | 251	cloth	satin	3.857142857
253 | 252	cloud	energy	1.727272727
254 | 253	club	society	3.869565217
255 | 254	coach	trainer	4.6
256 | 255	coat	newspaper	1.088888889
257 | 256	coat	roof	2.304347826
258 | 257	code	software	3.52
259 | 258	coffee	meeting	2.602941176
260 | 259	coin	currency	4.571428571
261 | 260	collapse	shock	3.043478261
262 | 261	collection	packet	2.772727273
263 | 262	collision	smash	3.904761905
264 | 263	color	purple	4.090909091
265 | 264	color	quality	2.407407407
266 | 265	coloring	yellow	3.791666667
267 | 266	comfort	relief	3.857142857
268 | 267	commander	editor	2.041666667
269 | 268	commerce	transport	2.739130435
270 | 269	commitment	guarantee	3.791666667
271 | 270	communication	message	4.083333333
272 | 271	communication	statement	3.391304348
273 | 272	communication	tune	2.409090909
274 | 273	community	province	3.391304348
275 | 274	community	territory	2.739130435
276 | 275	company	distributor	3.523809524
277 | 276	company	establishment	3.833333333
278 | 277	comparison	scrutiny	2.727272727
279 | 278	compound	salt	3.045454545
280 | 279	conclusion	result	4.523809524
281 | 280	condition	status	4.090909091
282 | 281	conditions	weather	3.666666667
283 | 282	congress	sex	1.818181818
284 | 283	connection	keyboard	2.4
285 | 284	conservation	traveling	1.717391304
286 | 285	construction	window	2.761904762
287 | 286	continent	ground	2.791666667
288 | 287	control	driving	3.714285714
289 | 288	cook	printer	1.347826087
290 | 289	cooking	cuisine	4.25
291 | 290	copy	image	3.25
292 | 291	copy	work	3.125
293 | 292	cord	pick	1.590909091
294 | 293	cord	yarn	2.761904762
295 | 294	corridor	hall	4.041666667
296 | 295	corruption	house	1.255319149
297 | 296	cost	postage	3.086956522
298 | 297	cost	reward	2.80952381
299 | 298	couch	lounge	3.347826087
300 | 299	count	number	4.166666667
301 | 300	counter	furniture	2.3
302 | 301	country	playground	1.791666667
303 | 302	course	starter	2.666666667
304 | 303	court	drawer	1.272727273
305 | 304	court	tribunal	3.791666667
306 | 305	cousin	relation	4.043478261
307 | 306	cousin	relative	4.5
308 | 307	cover	feather	2.44
309 | 308	cover	hair	2.65
310 | 309	cover	sleeve	3.260869565
311 | 310	covering	skin	3.35
312 | 311	crack	hole	3.04
313 | 312	craft	trade	3.458333333
314 | 313	creation	stitch	2.68
315 | 314	creativity	vision	3.181818182
316 | 315	credit	sum	3
317 | 316	creek	stream	3.88
318 | 317	crew	society	2
319 | 318	crew	unit	3.590909091
320 | 319	crop	plant	4.083333333
321 | 320	crow	jay	2.217391304
322 | 321	crown	place	1.541666667
323 | 322	crush	push	2.181818182
324 | 323	cube	dice	3.925925926
325 | 324	cup	handbag	1.318181818
326 | 325	cup	son	1.086956522
327 | 326	current	flow	3.761904762
328 | 327	curve	rainbow	3.44
329 | 328	customers	mission	1.641304348
330 | 329	cut	meat	2.904761905
331 | 330	cutter	knife	4.458333333
332 | 331	cylinder	pen	2.227272727
333 | 332	dad	parent	4.545454545
334 | 333	daisy	flower	4.5
335 | 334	damage	terms	1.761904762
336 | 335	danger	status	1.904761905
337 | 336	dash	sprint	3.375
338 | 337	dashboard	protection	2.173913043
339 | 338	database	list	4.08
340 | 339	daughter	girl	4.04
341 | 340	day	shoes	1.288888889
342 | 341	deal	hand	3.041666667
343 | 342	debt	deficit	3.458333333
344 | 343	debt	loan	4.347826087
345 | 344	debt	possession	2.481481481
346 | 345	decision	option	2.909090909
347 | 346	decrease	increase	3.541666667
348 | 347	deep	ocean	3.636363636
349 | 348	deficit	trust	1.576923077
350 | 349	degree	quantity	2.681818182
351 | 350	degree	style	1.652173913
352 | 351	delivery	distribution	3.590909091
353 | 352	department	division	4.625
354 | 353	descent	fall	3.304347826
355 | 354	desert	tract	2.2
356 | 355	desire	feeling	4.227272727
357 | 356	desk	table	4.172413793
358 | 357	determination	discovery	2.608695652
359 | 358	determination	selection	2.75
360 | 359	development	exploitation	2.2
361 | 360	device	drum	2.583333333
362 | 361	devil	satan	4.782608696
363 | 362	dialogue	play	3.44
364 | 363	diamond	parcel	1.217391304
365 | 364	digit	toe	2.64
366 | 365	digit	unit	3.681818182
367 | 366	dinner	party	3.826086957
368 | 367	direction	government	2.173913043
369 | 368	direction	protocol	3.142857143
370 | 369	direction	traveling	3.136363636
371 | 370	dirt	sand	3.391304348
372 | 371	dirt	soil	4
373 | 372	discharge	spark	2.782608696
374 | 373	disease	illness	4.739130435
375 | 374	display	language	1.916666667
376 | 375	distance	distribution	1.739130435
377 | 376	diversion	skiing	1.826086957
378 | 377	dividend	net	2.52
379 | 378	dividend	profit	4.045454545
380 | 379	diving	swim	3.96
381 | 380	dock	herb	1.238095238
382 | 381	document	report	3.826086957
383 | 382	dog	fauna	2.666666667
384 | 383	domain	land	2.523809524
385 | 384	door	light	1.577777778
386 | 385	doubt	ego	1.772727273
387 | 386	drama	genre	2.714285714
388 | 387	draw	finish	2.375
389 | 388	draw	tie	2.958333333
390 | 389	drawer	pan	1.818181818
391 | 390	dressing	patch	2.523809524
392 | 391	dressing	sauce	2.869565217
393 | 392	drill	implement	2.666666667
394 | 393	drink	slice	2.772727273
395 | 394	driver	supporter	2.090909091
396 | 395	driver	worker	3.1
397 | 396	drop	serving	2.136363636
398 | 397	drug	liquor	3.523809524
399 | 398	drug	operator	1.416666667
400 | 399	drum	piano	3.652173913
401 | 400	eagle	hawk	3.833333333
402 | 401	ear	organ	3.909090909
403 | 402	ease	relaxation	4.5
404 | 403	ease	rest	3.916666667
405 | 404	editing	instrument	1.64
406 | 405	editor	worker	3.125
407 | 406	eight	movement	1.318181818
408 | 407	element	iron	3.318181818
409 | 408	element	mixture	3
410 | 409	element	nickel	3.782608696
411 | 410	elevator	lift	4.72
412 | 411	emission	gum	1.25
413 | 412	employee	server	3.318181818
414 | 413	endorsement	signature	3.545454545
415 | 414	energy	microwave	3.583333333
416 | 415	engineering	technology	4.16
417 | 416	environment	land	3.090909091
418 | 417	equipment	recorder	3.391304348
419 | 418	equipment	seat	2.136363636
420 | 419	establishment	religion	2.173913043
421 | 420	event	influence	1.76
422 | 421	event	phenomenon	3.173913043
423 | 422	evidence	format	1.476190476
424 | 423	evidence	record	3.681818182
425 | 424	examination	quiz	4.391304348
426 | 425	examiner	tea	1.3
427 | 426	executive	minister	3.095238095
428 | 427	executive	organization	3.2
429 | 428	explanation	theory	3.652173913
430 | 429	express	mail	3.619047619
431 | 430	extract	selection	3.086956522
432 | 431	eye	organ	4.125
433 | 432	fabric	lace	3.625
434 | 433	fabric	sail	2.380952381
435 | 434	faith	religion	4.227272727
436 | 435	fantasy	recycling	1.125
437 | 436	farmer	individual	2.545454545
438 | 437	fault	mistake	4.541666667
439 | 438	fauna	toy	1.304347826
440 | 439	feature	side	1.833333333
441 | 440	feedback	flow	2.666666667
442 | 441	feeling	hope	3.48
443 | 442	female	slave	2.125
444 | 443	female	woman	4.96
445 | 444	fiction	literature	3.590909091
446 | 445	fiction	romance	2.476190476
447 | 446	field	science	3
448 | 447	field	yard	3.8
449 | 448	fight	separation	2.681818182
450 | 449	figure	stamp	1.88
451 | 450	film	movie	4.912280702
452 | 451	find	implementation	2.045454545
453 | 452	find	occurrence	2.739130435
454 | 453	finger	toe	3.76
455 | 454	flag	iris	1.708333333
456 | 455	flame	reaction	1.954545455
457 | 456	flash	lightning	3.96
458 | 457	flat	housing	3.96
459 | 458	flavor	variety	3.318181818
460 | 459	flight	trip	3.772727273
461 | 460	floor	level	3.333333333
462 | 461	floor	porch	2.739130435
463 | 462	flora	plant	4.384615385
464 | 463	flora	violet	3.347826087
465 | 464	flour	garlic	2.4
466 | 465	flower	pink	2.739130435
467 | 466	flute	wind	2.826086957
468 | 467	flyer	justice	1.181818182
469 | 468	food	sausage	3.913043478
470 | 469	foot	recognition	1.431372549
471 | 470	football	rugby	3.208333333
472 | 471	forecast	message	2.5
473 | 472	forest	ground	2.681818182
474 | 473	form	type	3.909090909
475 | 474	format	packaging	2.47826087
476 | 475	format	style	4.086956522
477 | 476	foundation	support	3.913043478
478 | 477	fox	wolf	3.090909091
479 | 478	framework	grill	3.428571429
480 | 479	freeze	frost	4
481 | 480	friend	individual	2.791666667
482 | 481	front	school	1.48
483 | 482	front	surface	3
484 | 483	fruit	seed	3.818181818
485 | 484	fuel	gasoline	4.48
486 | 485	fuel	nutrition	2.428571429
487 | 486	fund	store	1.826086957
488 | 487	funds	interest	3.375
489 | 488	furniture	table	4.181818182
490 | 489	gamble	kitty	1.636363636
491 | 490	gamble	pyramid	1.380952381
492 | 491	game	tennis	4.166666667
493 | 492	garbage	rubbish	4.520833333
494 | 493	garden	plantation	3.55
495 | 494	garlic	meal	3.083333333
496 | 495	garment	sweater	3.7
497 | 496	garment	tie	3.636363636
498 | 497	gas	hydrogen	4.090909091
499 | 498	gas	neon	3.869565217
500 | 499	gathering	parade	3.625
501 | 500	gauge	meter	3.863636364
502 | 501	gear	mechanism	4.090909091
503 | 502	gem	quartz	3.227272727
504 | 503	gender	sex	4.434782609
505 | 504	gender	size	1.571428571
506 | 505	genre	prose	2.363636364
507 | 506	glass	tub	2.88
508 | 507	glove	wear	3.416666667
509 | 508	goal	objective	4.590909091
510 | 509	golf	hockey	2.727272727
511 | 510	good	sheet	1.458333333
512 | 511	governor	mayor	3.666666667
513 | 512	governor	politician	4.125
514 | 513	graphic	image	4.318181818
515 | 514	grass	universe	1.76
516 | 515	gray	property	1.5
517 | 516	grip	handle	4.047619048
518 | 517	grip	hold	4.391304348
519 | 518	growth	process	3.291666667
520 | 519	guarantee	warranty	4.230769231
521 | 520	guess	universe	1.243243243
522 | 521	gulf	ocean	3.227272727
523 | 522	hack	machine	2.458333333
524 | 523	hamburger	nutrition	3.041666667
525 | 524	hand	script	3.260869565
526 | 525	happening	surprise	2.708333333
527 | 526	head	question	1.24
528 | 527	head	secretary	1.909090909
529 | 528	health	welfare	3.5
530 | 529	hearing	proceedings	3.130434783
531 | 530	heart	space	1.583333333
532 | 531	heart	ticker	3.181818182
533 | 532	heat	temperature	4.25
534 | 533	height	infinite	2.434782609
535 | 534	helmet	scale	1.380952381
536 | 535	help	support	4.619047619
537 | 536	help	supporter	3.833333333
538 | 537	heritage	loss	1.681818182
539 | 538	highway	street	3.545454545
540 | 539	highway	trail	2.863636364
541 | 540	hit	tourist	1.44
542 | 541	hole	opening	3.76
543 | 542	holiday	vacation	4.619047619
544 | 543	hood	protection	2.913043478
545 | 544	hood	shelter	2.571428571
546 | 545	horn	tail	2.692307692
547 | 546	horse	mount	2.875
548 | 547	hose	pipe	4
549 | 548	housing	vault	2
550 | 549	icon	representation	3.380952381
551 | 550	implement	stick	2.5
552 | 551	impulse	motive	2.869565217
553 | 552	impulse	urge	4.5
554 | 553	information	target	1.95
555 | 554	ink	liquid	3.64
556 | 555	installation	zoo	1.44
557 | 556	institution	prison	2.565217391
558 | 557	instruction	lesson	3.8
559 | 558	instruction	teaching	4.523809524
560 | 559	instrumentation	perfume	1.208333333
561 | 560	instrumentation	rod	2.863636364
562 | 561	intensity	quiet	2.181818182
563 | 562	interest	lien	2.541666667
564 | 563	intervention	treatment	2.363636364
565 | 564	inventory	listing	3.380952381
566 | 565	investment	tomato	1.155555556
567 | 566	jail	nick	1.782608696
568 | 567	jail	prison	4.739130435
569 | 568	jaw	lens	1.260869565
570 | 569	join	union	4
571 | 570	joke	message	2.636363636
572 | 571	journey	travel	4.8
573 | 572	judgment	sense	3.541666667
574 | 573	jumper	sweater	3.347826087
575 | 574	jury	school	1.476190476
576 | 575	justice	official	3.043478261
577 | 576	kiss	sweet	3
578 | 577	kitchen	toilet	2.545454545
579 | 578	knight	prince	3.375
580 | 579	knowledge	revolution	2.2
581 | 580	knowledge	taste	1.869565217
582 | 581	lake	stream	4.130434783
583 | 582	lamb	young	3.285714286
584 | 583	language	tongue	3.652173913
585 | 584	latex	rubber	3.913043478
586 | 585	law	personnel	1.772727273
587 | 586	layer	region	2.458333333
588 | 587	layer	snow	2.434782609
589 | 588	leader	politician	4.238095238
590 | 589	lesson	teaching	4.136363636
591 | 590	letter	text	3.84
592 | 591	level	stage	4
593 | 592	license	permission	4.19047619
594 | 593	license	security	3.130434783
595 | 594	lien	share	2.136363636
596 | 595	life	story	3
597 | 596	line	occupation	2.260869565
598 | 597	line	plane	2.833333333
599 | 598	line	queue	4.541666667
600 | 599	lineup	roll	2.958333333
601 | 600	link	union	3.583333333
602 | 601	lion	tiger	3.565217391
603 | 602	literature	poem	3.76
604 | 603	load	weight	4.043478261
605 | 604	location	property	3.347826087
606 | 605	loss	possession	2.954545455
607 | 606	low	shoulder	1.387755102
608 | 607	lyric	printer	1.244444444
609 | 608	magnolia	maple	2.68
610 | 609	male	man	4.619047619
611 | 610	man	soldier	3.875
612 | 611	manager	trainer	3.347826087
613 | 612	map	representation	3.434782609
614 | 613	map	sewing	1.347826087
615 | 614	map	video	1.695652174
616 | 615	maple	tree	3.833333333
617 | 616	mark	print	2.954545455
618 | 617	mark	slash	3.304347826
619 | 618	mask	roof	1.782608696
620 | 619	mate	relation	3.434782609
621 | 620	matter	text	3.269230769
622 | 621	matter	verse	1.913043478
623 | 622	mayor	water	1.113636364
624 | 623	meal	mixture	2.272727273
625 | 624	meal	rice	4
626 | 625	measure	money	2.565217391
627 | 626	measure	twist	1.590909091
628 | 627	meat	solid	2.5
629 | 628	melody	music	4.5
630 | 629	memory	operation	2.043478261
631 | 630	metal	zinc	3.956521739
632 | 631	meter	radar	2.954545455
633 | 632	microwave	radiation	3.454545455
634 | 633	middle	scene	1.4
635 | 634	minute	quantity	2.608695652
636 | 635	mode	scale	2.173913043
637 | 636	modification	surprise	1.714285714
638 | 637	moment	thief	1.242424242
639 | 638	mortal	mother	2.25
640 | 639	mortal	visitor	1.923076923
641 | 640	motion	snowboarding	2.363636364
642 | 641	motion	step	3.142857143
643 | 642	motive	reason	3.909090909
644 | 643	motorcycle	tank	2.043478261
645 | 644	mount	volcano	3.238095238
646 | 645	mouth	opening	3.304347826
647 | 646	mouth	trap	2.333333333
648 | 647	murphy	potato	1.173913043
649 | 648	museum	store	2.695652174
650 | 649	music	print	1.958333333
651 | 650	musician	performer	3.952380952
652 | 651	needle	sharp	3.954545455
653 | 652	noise	trouble	3.15
654 | 653	note	obligation	2.166666667
655 | 654	notebook	product	2.217391304
656 | 655	notebook	production	1.666666667
657 | 656	novel	story	4.363636364
658 | 657	oak	tree	4.576923077
659 | 658	objective	target	4.086956522
660 | 659	occasion	second	1.75
661 | 660	occupation	place	1.956521739
662 | 661	occurrence	tsunami	2.47826087
663 | 662	ocean	pond	3.545454545
664 | 663	office	outlet	2.142857143
665 | 664	office	situation	1.24
666 | 665	onion	topic	1.183673469
667 | 666	operation	processing	3.409090909
668 | 667	operative	spy	2.739130435
669 | 668	opinion	papers	1.72
670 | 669	opinion	sentiment	3.136363636
671 | 670	origin	root	4.333333333
672 | 671	outlet	shop	3.565217391
673 | 672	oxygen	substance	2.565217391
674 | 673	package	software	3.4
675 | 674	padding	tower	1.863636364
676 | 675	painting	picture	4.186046512
677 | 676	papers	security	2.4
678 | 677	papers	ticket	3.181818182
679 | 678	parcel	region	2
680 | 679	park	stadium	3.333333333
681 | 680	passage	quotation	3.666666667
682 | 681	patch	spot	3.181818182
683 | 682	payment	spending	3.708333333
684 | 683	permission	tolerance	2.04
685 | 684	person	technician	3.458333333
686 | 685	phantom	shadow	3.227272727
687 | 686	piazza	square	2.2
688 | 687	piece	sail	1.333333333
689 | 688	pinnacle	tower	3.083333333
690 | 689	place	position	4.230769231
691 | 690	plane	sheet	2.130434783
692 | 691	plane	tool	2.304347826
693 | 692	play	turn	2.681818182
694 | 693	plot	strategy	2.9
695 | 694	point	second	1.826086957
696 | 695	point	site	2.96
697 | 696	point	spot	4.2
698 | 697	polyester	textile	4.285714286
699 | 698	position	view	3
700 | 699	postage	rate	3.086956522
701 | 700	postage	signal	1.48
702 | 701	power	skill	3.16
703 | 702	prayer	request	3.36
704 | 703	problem	trouble	4.608695652
705 | 704	process	rule	2.5
706 | 705	process	tail	1.12
707 | 706	product	wear	2.041666667
708 | 707	property	texture	2
709 | 708	protection	roof	3.708333333
710 | 709	protocol	rule	4.090909091
711 | 710	publication	textbook	3.428571429
712 | 711	pumpkin	vine	2.409090909
713 | 712	pupil	student	4.523809524
714 | 713	pyramid	speculation	1.7
715 | 714	query	question	4.739130435
716 | 715	quiet	silence	4.909090909
717 | 716	racer	taxi	2.19047619
718 | 717	radio	receiver	3.807692308
719 | 718	rain	storm	3.958333333
720 | 719	ray	shark	2.545454545
721 | 720	recreation	skiing	3.090909091
722 | 721	red	wine	3.4
723 | 722	report	study	3.875
724 | 723	representative	voice	2.875
725 | 724	ring	water	1.333333333
726 | 725	rise	travel	1.608695652
727 | 726	rock	stone	4.476190476
728 | 727	roll	toast	2.826086957
729 | 728	root	stem	3.666666667
730 | 729	rub	wipe	4.227272727
731 | 730	rubber	stuff	2.1
732 | 731	rugby	soccer	3.260869565
733 | 732	sail	sheet	2.083333333
734 | 733	scandal	week	1.086956522
735 | 734	science	shelter	1.136363636
736 | 735	score	success	3.208333333
737 | 736	season	summer	4.045454545
738 | 737	season	winter	4.347826087
739 | 738	second	time	4.217391304
740 | 739	seminar	sweet	1.202898551
741 | 740	sex	stance	1.476190476
742 | 741	share	stake	3.208333333
743 | 742	shelter	tent	4.25
744 | 743	shelter	wind	2.641791045
745 | 744	shirt	tiger	1.038461538
746 | 745	side	slope	3.08
747 | 746	sight	vision	4.818181818
748 | 747	simulation	theory	2.227272727
749 | 748	skull	tooth	2.590909091
750 | 749	slash	stroke	3.25
751 | 750	smash	success	2.791666667
752 | 751	snap	touch	2.038461538
753 | 752	software	writing	2.347826087
754 | 753	song	vocal	3.857142857
755 | 754	soup	spaghetti	3
756 | 755	soup	sweet	2.454545455
757 | 756	speech	word	4.045454545
758 | 757	steel	weapon	3.380952381
759 | 758	step	travel	2.086956522
760 | 759	step	walk	4.173913043
761 | 760	storm	weather	4.083333333
762 | 761	straight	stretch	3.291666667
763 | 762	sun	toy	1.25
764 | 763	tank	tub	3.52173913
765 | 764	taxpayer	window	1.21875
766 | 765	throne	toilet	1.956521739
767 | 766	ticket	writing	2.375
768 | 767	victory	watch	1.553191489
769 | 768	washer	worker	2.909090909
770 | 769	wife	woman	3.884615385
771 | 770	workplace	workshop	4.04
772 | 


--------------------------------------------------------------------------------
/evaluation/wordsim/data/rel353.txt:
--------------------------------------------------------------------------------
  1 | 0	computer	keyboard	7.62
  2 | 1	Jerusalem	Israel	8.46
  3 | 2	planet	galaxy	8.11
  4 | 3	canyon	landscape	7.53
  5 | 4	OPEC	country	5.63
  6 | 5	day	summer	3.94
  7 | 6	day	dawn	7.53
  8 | 7	country	citizen	7.31
  9 | 8	planet	people	5.75
 10 | 9	environment	ecology	8.81
 11 | 10	Maradona	football	8.62
 12 | 11	OPEC	oil	8.59
 13 | 12	money	bank	8.50
 14 | 13	computer	software	8.50
 15 | 14	law	lawyer	8.38
 16 | 15	weather	forecast	8.34
 17 | 16	network	hardware	8.31
 18 | 17	nature	environment	8.31
 19 | 18	FBI	investigation	8.31
 20 | 19	money	wealth	8.27
 21 | 20	psychology	Freud	8.21
 22 | 21	news	report	8.16
 23 | 22	war	troops	8.13
 24 | 23	physics	proton	8.12
 25 | 24	bank	money	8.12
 26 | 25	stock	market	8.08
 27 | 26	planet	constellation	8.06
 28 | 27	credit	card	8.06
 29 | 28	hotel	reservation	8.03
 30 | 29	closet	clothes	8.00
 31 | 30	soap	opera	7.94
 32 | 31	planet	astronomer	7.94
 33 | 32	planet	space	7.92
 34 | 33	movie	theater	7.92
 35 | 34	treatment	recovery	7.91
 36 | 35	baby	mother	7.85
 37 | 36	money	deposit	7.73
 38 | 37	television	film	7.72
 39 | 38	psychology	mind	7.69
 40 | 39	game	team	7.69
 41 | 40	admission	ticket	7.69
 42 | 41	Jerusalem	Palestinian	7.65
 43 | 42	Arafat	terror	7.65
 44 | 43	boxing	round	7.61
 45 | 44	computer	internet	7.58
 46 | 45	money	property	7.57
 47 | 46	tennis	racket	7.56
 48 | 47	telephone	communication	7.50
 49 | 48	currency	market	7.50
 50 | 49	psychology	cognition	7.48
 51 | 50	seafood	sea	7.47
 52 | 51	book	paper	7.46
 53 | 52	book	library	7.46
 54 | 53	psychology	depression	7.42
 55 | 54	fighting	defeating	7.41
 56 | 55	movie	star	7.38
 57 | 56	hundred	percent	7.38
 58 | 57	dollar	profit	7.38
 59 | 58	money	possession	7.29
 60 | 59	cup	drink	7.25
 61 | 60	psychology	health	7.23
 62 | 61	summer	drought	7.16
 63 | 62	investor	earning	7.13
 64 | 63	company	stock	7.08
 65 | 64	stroke	hospital	7.03
 66 | 65	liability	insurance	7.03
 67 | 66	game	victory	7.03
 68 | 67	psychology	anxiety	7.00
 69 | 68	game	defeat	6.97
 70 | 69	FBI	fingerprint	6.94
 71 | 70	money	withdrawal	6.88
 72 | 71	psychology	fear	6.85
 73 | 72	drug	abuse	6.85
 74 | 73	concert	virtuoso	6.81
 75 | 74	computer	laboratory	6.78
 76 | 75	love	sex	6.77
 77 | 76	problem	challenge	6.75
 78 | 77	movie	critic	6.73
 79 | 78	Arafat	peace	6.73
 80 | 79	bed	closet	6.72
 81 | 80	lawyer	evidence	6.69
 82 | 81	fertility	egg	6.69
 83 | 82	precedent	law	6.65
 84 | 83	minister	party	6.63
 85 | 84	psychology	clinic	6.58
 86 | 85	cup	coffee	6.58
 87 | 86	water	seepage	6.56
 88 | 87	government	crisis	6.56
 89 | 88	space	world	6.53
 90 | 89	dividend	calculation	6.48
 91 | 90	victim	emergency	6.47
 92 | 91	luxury	car	6.47
 93 | 92	tool	implement	6.46
 94 | 93	competition	price	6.44
 95 | 94	psychology	doctor	6.42
 96 | 95	gender	equality	6.41
 97 | 96	listing	category	6.38
 98 | 97	video	archive	6.34
 99 | 98	oil	stock	6.34
100 | 99	governor	office	6.34
101 | 100	discovery	space	6.34
102 | 101	record	number	6.31
103 | 102	brother	monk	6.27
104 | 103	production	crew	6.25
105 | 104	nature	man	6.25
106 | 105	family	planning	6.25
107 | 106	disaster	area	6.25
108 | 107	food	preparation	6.22
109 | 108	preservation	world	6.19
110 | 109	movie	popcorn	6.19
111 | 110	lover	quarrel	6.19
112 | 111	game	series	6.19
113 | 112	dollar	loss	6.09
114 | 113	weapon	secret	6.06
115 | 114	shower	flood	6.03
116 | 115	registration	arrangement	6.00
117 | 116	arrival	hotel	6.00
118 | 117	announcement	warning	6.00
119 | 118	game	round	5.97
120 | 119	baseball	season	5.97
121 | 120	drink	mouth	5.96
122 | 121	life	lesson	5.94
123 | 122	grocery	money	5.94
124 | 123	energy	crisis	5.94
125 | 124	reason	criterion	5.91
126 | 125	equipment	maker	5.91
127 | 126	cup	liquid	5.90
128 | 127	deployment	withdrawal	5.88
129 | 128	tiger	zoo	5.87
130 | 129	journey	car	5.85
131 | 130	money	laundering	5.65
132 | 131	summer	nature	5.63
133 | 132	decoration	valor	5.63
134 | 133	Mars	scientist	5.63
135 | 134	alcohol	chemistry	5.54
136 | 135	disability	death	5.47
137 | 136	change	attitude	5.44
138 | 137	arrangement	accommodation	5.41
139 | 138	territory	surface	5.34
140 | 139	size	prominence	5.31
141 | 140	exhibit	memorabilia	5.31
142 | 141	credit	information	5.31
143 | 142	territory	kilometer	5.28
144 | 143	death	row	5.25
145 | 144	doctor	liability	5.19
146 | 145	impartiality	interest	5.16
147 | 146	energy	laboratory	5.09
148 | 147	secretary	senate	5.06
149 | 148	death	inmate	5.03
150 | 149	monk	oracle	5.00
151 | 150	cup	food	5.00
152 | 151	journal	association	4.97
153 | 152	street	children	4.94
154 | 153	car	flight	4.94
155 | 154	space	chemistry	4.88
156 | 155	situation	conclusion	4.81
157 | 156	word	similarity	4.75
158 | 157	peace	plan	4.75
159 | 158	consumer	energy	4.75
160 | 159	ministry	culture	4.69
161 | 160	smart	student	4.62
162 | 161	investigation	effort	4.59
163 | 162	image	surface	4.56
164 | 163	life	term	4.50
165 | 164	start	match	4.47
166 | 165	computer	news	4.47
167 | 166	board	recommendation	4.47
168 | 167	lad	brother	4.46
169 | 168	observation	architecture	4.38
170 | 169	coast	hill	4.38
171 | 170	deployment	departure	4.25
172 | 171	benchmark	index	4.25
173 | 172	attempt	peace	4.25
174 | 173	consumer	confidence	4.13
175 | 174	start	year	4.06
176 | 175	focus	life	4.06
177 | 176	development	issue	3.97
178 | 177	theater	history	3.91
179 | 178	situation	isolation	3.88
180 | 179	profit	warning	3.88
181 | 180	media	trading	3.88
182 | 181	chance	credibility	3.88
183 | 182	precedent	information	3.85
184 | 183	architecture	century	3.78
185 | 184	population	development	3.75
186 | 185	stock	live	3.73
187 | 186	peace	atmosphere	3.69
188 | 187	morality	marriage	3.69
189 | 188	minority	peace	3.69
190 | 189	atmosphere	landscape	3.69
191 | 190	report	gain	3.63
192 | 191	music	project	3.63
193 | 192	seven	series	3.56
194 | 193	experience	music	3.47
195 | 194	school	center	3.44
196 | 195	five	month	3.38
197 | 196	announcement	production	3.38
198 | 197	morality	importance	3.31
199 | 198	money	operation	3.31
200 | 199	delay	news	3.31
201 | 200	governor	interview	3.25
202 | 201	practice	institution	3.19
203 | 202	century	nation	3.16
204 | 203	coast	forest	3.15
205 | 204	shore	woodland	3.08
206 | 205	drink	car	3.04
207 | 206	president	medal	3.00
208 | 207	prejudice	recognition	3.00
209 | 208	viewer	serial	2.97
210 | 209	peace	insurance	2.94
211 | 210	Mars	water	2.94
212 | 211	media	gain	2.88
213 | 212	precedent	cognition	2.81
214 | 213	announcement	effort	2.75
215 | 214	line	insurance	2.69
216 | 215	crane	implement	2.69
217 | 216	drink	mother	2.65
218 | 217	opera	industry	2.63
219 | 218	volunteer	motto	2.56
220 | 219	listing	proximity	2.56
221 | 220	precedent	collection	2.50
222 | 221	cup	article	2.40
223 | 222	sign	recess	2.38
224 | 223	problem	airport	2.38
225 | 224	reason	hypertension	2.31
226 | 225	direction	combination	2.25
227 | 226	Wednesday	news	2.22
228 | 227	glass	magician	2.08
229 | 228	cemetery	woodland	2.08
230 | 229	possibility	girl	1.94
231 | 230	cup	substance	1.92
232 | 231	forest	graveyard	1.85
233 | 232	stock	egg	1.81
234 | 233	month	hotel	1.81
235 | 234	energy	secretary	1.81
236 | 235	precedent	group	1.77
237 | 236	production	hike	1.75
238 | 237	stock	phone	1.62
239 | 238	holy	sex	1.62
240 | 239	stock	CD	1.31
241 | 240	drink	ear	1.31
242 | 241	delay	racism	1.19
243 | 242	stock	life	0.92
244 | 243	stock	jaguar	0.92
245 | 244	monk	slave	0.92
246 | 245	lad	wizard	0.92
247 | 246	sugar	approach	0.88
248 | 247	rooster	voyage	0.62
249 | 248	noon	string	0.54
250 | 249	chord	smile	0.54
251 | 250	professor	cucumber	0.31
252 | 251	king	cabbage	0.23
253 | 


--------------------------------------------------------------------------------
/evaluation/wordsim/data/simLex.txt:
--------------------------------------------------------------------------------
   1 | 0	old	new	0.0
   2 | 1	smart	intelligent	9.76923076923077
   3 | 2	hard	difficult	9.692307692307692
   4 | 3	happy	cheerful	9.307692307692308
   5 | 4	hard	easy	0.0
   6 | 5	fast	rapid	9.846153846153847
   7 | 6	happy	glad	9.384615384615385
   8 | 7	short	long	0.0
   9 | 8	stupid	dumb	9.076923076923077
  10 | 9	weird	strange	9.692307692307692
  11 | 10	wide	narrow	0.0
  12 | 11	bad	awful	9.384615384615385
  13 | 12	easy	difficult	0.0
  14 | 13	bad	terrible	9.461538461538462
  15 | 14	hard	simple	0.0
  16 | 15	smart	dumb	0.0
  17 | 16	insane	crazy	9.615384615384615
  18 | 17	happy	mad	0.0
  19 | 18	large	huge	9.692307692307692
  20 | 19	hard	tough	9.153846153846153
  21 | 20	new	fresh	8.615384615384615
  22 | 21	sharp	dull	0.0
  23 | 22	quick	rapid	9.846153846153847
  24 | 23	dumb	foolish	8.923076923076923
  25 | 24	wonderful	terrific	9.615384615384615
  26 | 25	strange	odd	9.615384615384615
  27 | 26	happy	angry	0.0
  28 | 27	narrow	broad	0.0
  29 | 28	simple	easy	9.615384615384615
  30 | 29	old	fresh	0.0
  31 | 30	apparent	obvious	9.076923076923077
  32 | 31	inexpensive	cheap	9.692307692307692
  33 | 32	nice	generous	7.3076923076923075
  34 | 33	weird	normal	0.0
  35 | 34	weird	odd	9.76923076923077
  36 | 35	bad	immoral	7.769230769230769
  37 | 36	sad	funny	0.0
  38 | 37	wonderful	great	9.538461538461538
  39 | 38	guilty	ashamed	7.3076923076923075
  40 | 39	beautiful	wonderful	7.153846153846154
  41 | 40	confident	sure	9.384615384615385
  42 | 41	dumb	dense	7.0
  43 | 42	large	big	10.0
  44 | 43	nice	cruel	0.0
  45 | 44	impatient	anxious	8.23076923076923
  46 | 45	big	broad	8.384615384615385
  47 | 46	strong	proud	4.769230769230769
  48 | 47	unnecessary	necessary	0.0
  49 | 48	restless	young	0.38461538461538464
  50 | 49	dumb	intelligent	0.0
  51 | 50	bad	great	0.0
  52 | 51	difficult	simple	0.0
  53 | 52	necessary	important	9.076923076923077
  54 | 53	bad	terrific	0.0
  55 | 54	mad	glad	0.0
  56 | 55	honest	guilty	0.0
  57 | 56	easy	tough	0.0
  58 | 57	easy	flexible	3.6923076923076925
  59 | 58	certain	sure	9.692307692307692
  60 | 59	essential	necessary	9.846153846153847
  61 | 60	different	normal	0.0
  62 | 61	sly	clever	8.307692307692308
  63 | 62	crucial	important	9.615384615384615
  64 | 63	harsh	cruel	8.461538461538462
  65 | 64	childish	foolish	6.461538461538462
  66 | 65	scarce	rare	9.615384615384615
  67 | 66	friendly	generous	4.538461538461538
  68 | 67	fragile	frigid	0.6153846153846154
  69 | 68	long	narrow	2.1538461538461537
  70 | 69	big	heavy	4.615384615384615
  71 | 70	rough	frigid	0.07692307692307693
  72 | 71	bizarre	strange	9.692307692307692
  73 | 72	illegal	immoral	4.6923076923076925
  74 | 73	bad	guilty	4.230769230769231
  75 | 74	modern	ancient	0.0
  76 | 75	new	ancient	0.0
  77 | 76	dull	funny	0.0
  78 | 77	happy	young	0.07692307692307693
  79 | 78	easy	big	0.0
  80 | 79	great	awful	0.0
  81 | 80	tiny	huge	0.0
  82 | 81	polite	proper	7.923076923076923
  83 | 82	modest	ashamed	1.9230769230769231
  84 | 83	exotic	rare	8.307692307692308
  85 | 84	dumb	clever	0.0
  86 | 85	delightful	wonderful	9.23076923076923
  87 | 86	noticeable	obvious	9.153846153846153
  88 | 87	afraid	anxious	6.923076923076923
  89 | 88	formal	proper	8.076923076923077
  90 | 89	dreary	dull	6.923076923076923
  91 | 90	delightful	cheerful	8.461538461538462
  92 | 91	unhappy	mad	6.384615384615385
  93 | 92	sad	terrible	6.0
  94 | 93	sick	crazy	2.3846153846153846
  95 | 94	violent	angry	5.923076923076923
  96 | 95	laden	heavy	8.0
  97 | 96	dirty	cheap	0.7692307692307693
  98 | 97	elastic	flexible	7.923076923076923
  99 | 98	hard	dense	5.3076923076923075
 100 | 99	recent	new	8.461538461538462
 101 | 100	bold	proud	1.3076923076923077
 102 | 101	sly	strange	0.38461538461538464
 103 | 102	strange	sly	0.15384615384615385
 104 | 103	dumb	rare	0.0
 105 | 104	sly	tough	0.0
 106 | 105	terrific	mad	0.0
 107 | 106	modest	flexible	0.0
 108 | 107	fresh	wide	0.0
 109 | 108	huge	dumb	0.0
 110 | 109	large	flexible	0.0
 111 | 110	dirty	narrow	0.0
 112 | 111	wife	husband	0.8461538461538461
 113 | 112	book	text	2.5384615384615383
 114 | 113	groom	bride	0.8461538461538461
 115 | 114	night	day	0.0
 116 | 115	south	north	0.07692307692307693
 117 | 116	plane	airport	1.6153846153846154
 118 | 117	uncle	aunt	0.7692307692307693
 119 | 118	horse	mare	7.153846153846154
 120 | 119	bottom	top	0.07692307692307693
 121 | 120	friend	buddy	9.538461538461538
 122 | 121	student	pupil	9.615384615384615
 123 | 122	world	globe	6.3076923076923075
 124 | 123	leg	arm	0.15384615384615385
 125 | 124	plane	jet	6.076923076923077
 126 | 125	woman	man	0.0
 127 | 126	horse	colt	7.6923076923076925
 128 | 127	actress	actor	3.076923076923077
 129 | 128	teacher	instructor	9.538461538461538
 130 | 129	movie	film	9.23076923076923
 131 | 130	bird	hawk	6.769230769230769
 132 | 131	word	dictionary	2.0
 133 | 132	money	salary	4.615384615384615
 134 | 133	dog	cat	0.0
 135 | 134	area	region	9.0
 136 | 135	navy	army	1.6923076923076923
 137 | 136	book	literature	3.5384615384615383
 138 | 137	clothes	closet	1.1538461538461537
 139 | 138	sunset	sunrise	0.07692307692307693
 140 | 139	child	adult	0.0
 141 | 140	cow	cattle	6.769230769230769
 142 | 141	book	story	2.1538461538461537
 143 | 142	winter	summer	0.15384615384615385
 144 | 143	taxi	cab	9.615384615384615
 145 | 144	tree	maple	6.3076923076923075
 146 | 145	bed	bedroom	0.7692307692307693
 147 | 146	roof	ceiling	4.538461538461538
 148 | 147	disease	infection	7.076923076923077
 149 | 148	arm	shoulder	3.4615384615384617
 150 | 149	sheep	lamb	8.23076923076923
 151 | 150	lady	gentleman	1.6923076923076923
 152 | 151	boat	anchor	2.923076923076923
 153 | 152	priest	monk	5.3076923076923075
 154 | 153	toe	finger	3.3846153846153846
 155 | 154	river	stream	4.384615384615385
 156 | 155	anger	fury	7.923076923076923
 157 | 156	date	calendar	4.230769230769231
 158 | 157	sea	ocean	6.230769230769231
 159 | 158	second	minute	2.6923076923076925
 160 | 159	hand	thumb	3.3076923076923075
 161 | 160	wood	log	4.538461538461538
 162 | 161	mud	dirt	4.461538461538462
 163 | 162	hallway	corridor	8.538461538461538
 164 | 163	way	manner	6.230769230769231
 165 | 164	mouse	cat	1.7692307692307692
 166 | 165	cop	sheriff	7.461538461538462
 167 | 166	death	burial	4.846153846153846
 168 | 167	music	melody	6.461538461538462
 169 | 168	beer	alcohol	5.923076923076923
 170 | 169	mouth	lip	5.615384615384615
 171 | 170	storm	hurricane	6.846153846153846
 172 | 171	tax	income	2.769230769230769
 173 | 172	flower	violet	5.0
 174 | 173	paper	cardboard	3.5384615384615383
 175 | 174	floor	ceiling	1.6153846153846154
 176 | 175	beach	seashore	6.461538461538462
 177 | 176	rod	curtain	2.3076923076923075
 178 | 177	hound	fox	2.0
 179 | 178	street	alley	4.153846153846154
 180 | 179	boat	deck	2.230769230769231
 181 | 180	car	horn	2.3076923076923075
 182 | 181	friend	guest	3.8461538461538463
 183 | 182	employer	employee	3.4615384615384617
 184 | 183	hand	wrist	2.769230769230769
 185 | 184	ball	cannon	3.6153846153846154
 186 | 185	alcohol	brandy	5.153846153846154
 187 | 186	victory	triumph	7.615384615384615
 188 | 187	telephone	booth	1.9230769230769231
 189 | 188	door	doorway	5.538461538461538
 190 | 189	motel	inn	5.846153846153846
 191 | 190	clothes	cloth	3.6923076923076925
 192 | 191	steak	meat	6.846153846153846
 193 | 192	nail	thumb	2.6153846153846154
 194 | 193	band	orchestra	5.615384615384615
 195 | 194	book	bible	5.153846153846154
 196 | 195	business	industry	5.6923076923076925
 197 | 196	winter	season	4.0
 198 | 197	decade	century	2.6923076923076925
 199 | 198	alcohol	gin	5.538461538461538
 200 | 199	hat	coat	2.230769230769231
 201 | 200	window	door	1.5384615384615385
 202 | 201	arm	wrist	2.4615384615384617
 203 | 202	house	apartment	5.461538461538462
 204 | 203	glass	crystal	4.769230769230769
 205 | 204	wine	brandy	3.8461538461538463
 206 | 205	creator	maker	9.615384615384615
 207 | 206	dinner	breakfast	1.2307692307692308
 208 | 207	arm	muscle	2.076923076923077
 209 | 208	bubble	suds	5.3076923076923075
 210 | 209	bread	flour	1.6153846153846154
 211 | 210	death	tragedy	4.461538461538462
 212 | 211	absence	presence	0.07692307692307693
 213 | 212	gun	cannon	4.0
 214 | 213	grass	blade	1.6153846153846154
 215 | 214	ball	basket	1.4615384615384615
 216 | 215	hose	garden	0.7692307692307693
 217 | 216	boy	kid	5.0
 218 | 217	church	choir	1.4615384615384615
 219 | 218	clothes	drawer	1.0
 220 | 219	tower	bell	1.0
 221 | 220	father	parent	6.384615384615385
 222 | 221	school	grade	2.1538461538461537
 223 | 222	parent	adult	4.230769230769231
 224 | 223	bar	jail	1.0
 225 | 224	car	highway	1.0769230769230769
 226 | 225	dictionary	definition	2.6923076923076925
 227 | 226	door	cellar	0.6153846153846154
 228 | 227	army	legion	6.230769230769231
 229 | 228	metal	aluminum	4.923076923076923
 230 | 229	chair	bench	4.769230769230769
 231 | 230	cloud	fog	4.846153846153846
 232 | 231	boy	son	5.230769230769231
 233 | 232	water	ice	3.8461538461538463
 234 | 233	bed	blanket	2.0
 235 | 234	attorney	lawyer	9.461538461538462
 236 | 235	area	zone	9.153846153846153
 237 | 236	business	company	8.153846153846153
 238 | 237	clothes	fabric	3.5384615384615383
 239 | 238	sweater	jacket	4.230769230769231
 240 | 239	money	capital	5.615384615384615
 241 | 240	hand	foot	0.6923076923076923
 242 | 241	alcohol	cocktail	5.615384615384615
 243 | 242	yard	inch	1.6923076923076923
 244 | 243	molecule	atom	3.923076923076923
 245 | 244	lens	camera	2.769230769230769
 246 | 245	meal	dinner	6.6923076923076925
 247 | 246	eye	tear	1.3076923076923077
 248 | 247	god	devil	0.23076923076923078
 249 | 248	loop	belt	1.8461538461538463
 250 | 249	rat	mouse	4.6923076923076925
 251 | 250	motor	engine	8.538461538461538
 252 | 251	car	cab	3.8461538461538463
 253 | 252	cat	lion	3.1538461538461537
 254 | 253	size	magnitude	6.0
 255 | 254	reality	fantasy	0.23076923076923078
 256 | 255	door	gate	7.230769230769231
 257 | 256	cat	pet	2.6923076923076925
 258 | 257	tin	aluminum	1.7692307692307692
 259 | 258	bone	jaw	1.9230769230769231
 260 | 259	cereal	wheat	0.6923076923076923
 261 | 260	house	key	0.3076923076923077
 262 | 261	blood	flesh	0.8461538461538461
 263 | 262	door	corridor	0.23076923076923078
 264 | 263	god	spirit	2.769230769230769
 265 | 264	capability	competence	4.769230769230769
 266 | 265	abundance	plenty	7.538461538461538
 267 | 266	sofa	chair	2.769230769230769
 268 | 267	wall	brick	0.7692307692307693
 269 | 268	horn	drum	0.46153846153846156
 270 | 269	organ	liver	2.5384615384615383
 271 | 270	strength	might	3.6923076923076925
 272 | 271	phrase	word	0.6923076923076923
 273 | 272	band	parade	0.5384615384615384
 274 | 273	stomach	waist	1.3846153846153846
 275 | 274	cloud	storm	1.1538461538461537
 276 | 275	joy	pride	3.4615384615384617
 277 | 276	noise	rattle	2.769230769230769
 278 | 277	rain	mist	2.8461538461538463
 279 | 278	beer	beverage	4.384615384615385
 280 | 279	man	uncle	2.076923076923077
 281 | 280	apple	juice	0.38461538461538464
 282 | 281	intelligence	logic	3.6153846153846154
 283 | 282	communication	language	5.615384615384615
 284 | 283	mink	fur	0.46153846153846156
 285 | 284	mob	crowd	4.230769230769231
 286 | 285	shore	coast	6.384615384615385
 287 | 286	wire	cord	4.615384615384615
 288 | 287	bird	turkey	2.3076923076923075
 289 | 288	bed	crib	3.1538461538461537
 290 | 289	competence	ability	6.0
 291 | 290	cloud	haze	3.0
 292 | 291	supper	meal	3.923076923076923
 293 | 292	bar	cage	1.0
 294 | 293	water	salt	0.23076923076923078
 295 | 294	sense	intuition	6.3076923076923075
 296 | 295	situation	condition	5.538461538461538
 297 | 296	crime	theft	5.615384615384615
 298 | 297	style	fashion	5.538461538461538
 299 | 298	boundary	border	9.076923076923077
 300 | 299	arm	body	1.5384615384615385
 301 | 300	boat	car	1.3076923076923077
 302 | 301	sandwich	lunch	2.3846153846153846
 303 | 302	bride	princess	1.6923076923076923
 304 | 303	heroine	hero	6.846153846153846
 305 | 304	car	gauge	1.0769230769230769
 306 | 305	insect	bee	3.4615384615384617
 307 | 306	crib	cradle	6.769230769230769
 308 | 307	animal	person	0.9230769230769231
 309 | 308	marijuana	herb	3.076923076923077
 310 | 309	bed	hospital	1.8461538461538463
 311 | 310	cheek	tongue	1.6153846153846154
 312 | 311	disc	computer	1.6153846153846154
 313 | 312	curve	angle	2.3846153846153846
 314 | 313	grass	moss	2.5384615384615383
 315 | 314	school	law	1.6153846153846154
 316 | 315	foot	head	1.0769230769230769
 317 | 316	mother	guardian	4.076923076923077
 318 | 317	orthodontist	dentist	4.3076923076923075
 319 | 318	alcohol	whiskey	5.461538461538462
 320 | 319	mouth	tooth	2.1538461538461537
 321 | 320	breakfast	bacon	2.230769230769231
 322 | 321	bathroom	bedroom	1.4615384615384615
 323 | 322	plate	bowl	2.076923076923077
 324 | 323	meat	bacon	3.3846153846153846
 325 | 324	air	helium	1.7692307692307692
 326 | 325	worker	employer	1.0769230769230769
 327 | 326	body	chest	1.6923076923076923
 328 | 327	son	father	1.3076923076923077
 329 | 328	heart	surgery	1.1538461538461537
 330 | 329	woman	secretary	1.2307692307692308
 331 | 330	man	father	3.769230769230769
 332 | 331	beach	island	2.0
 333 | 332	story	topic	1.6153846153846154
 334 | 333	game	fun	2.230769230769231
 335 | 334	weekend	week	3.0
 336 | 335	couple	pair	6.846153846153846
 337 | 336	woman	wife	2.8461538461538463
 338 | 337	sheep	cattle	1.3846153846153846
 339 | 338	purse	bag	4.461538461538462
 340 | 339	ceiling	cathedral	1.6923076923076923
 341 | 340	bean	coffee	1.0769230769230769
 342 | 341	wood	paper	1.1538461538461537
 343 | 342	top	side	2.0
 344 | 343	crime	fraud	4.6923076923076925
 345 | 344	pain	harm	3.923076923076923
 346 | 345	lover	companion	3.6153846153846154
 347 | 346	evening	dusk	4.769230769230769
 348 | 347	father	daughter	1.1538461538461537
 349 | 348	wine	liquor	5.846153846153846
 350 | 349	cow	goat	1.2307692307692308
 351 | 350	belief	opinion	5.3076923076923075
 352 | 351	reality	illusion	1.0769230769230769
 353 | 352	pact	agreement	7.615384615384615
 354 | 353	wealth	poverty	1.0769230769230769
 355 | 354	accident	emergency	2.6153846153846154
 356 | 355	battle	conquest	2.923076923076923
 357 | 356	friend	teacher	1.6923076923076923
 358 | 357	illness	infection	4.461538461538462
 359 | 358	game	trick	1.6923076923076923
 360 | 359	brother	son	3.0
 361 | 360	aunt	nephew	1.5384615384615385
 362 | 361	worker	mechanic	2.8461538461538463
 363 | 362	doctor	orthodontist	3.076923076923077
 364 | 363	oak	maple	2.923076923076923
 365 | 364	bee	queen	1.9230769230769231
 366 | 365	car	bicycle	1.6923076923076923
 367 | 366	goal	quest	3.230769230769231
 368 | 367	august	month	3.769230769230769
 369 | 368	army	squad	4.0
 370 | 369	cloud	weather	3.1538461538461537
 371 | 370	physician	doctor	7.076923076923077
 372 | 371	canyon	valley	4.615384615384615
 373 | 372	river	valley	2.4615384615384617
 374 | 373	sun	sky	2.3076923076923075
 375 | 374	target	arrow	1.8461538461538463
 376 | 375	chocolate	pie	2.4615384615384617
 377 | 376	circumstance	situation	4.538461538461538
 378 | 377	opinion	choice	3.8461538461538463
 379 | 378	rhythm	melody	4.923076923076923
 380 | 379	gut	nerve	1.6923076923076923
 381 | 380	day	dawn	2.3076923076923075
 382 | 381	cattle	beef	3.3846153846153846
 383 | 382	doctor	professor	3.769230769230769
 384 | 383	arm	vein	2.3076923076923075
 385 | 384	room	bath	1.7692307692307692
 386 | 385	corporation	business	6.6923076923076925
 387 | 386	fun	football	2.5384615384615383
 388 | 387	hill	cliff	2.3076923076923075
 389 | 388	bone	ankle	2.3846153846153846
 390 | 389	apple	candy	0.9230769230769231
 391 | 390	helper	maid	2.923076923076923
 392 | 391	leader	manager	5.923076923076923
 393 | 392	lemon	tea	1.3846153846153846
 394 | 393	bee	ant	1.5384615384615385
 395 | 394	basketball	baseball	1.7692307692307692
 396 | 395	rice	bean	1.6923076923076923
 397 | 396	bed	furniture	2.3076923076923075
 398 | 397	emotion	passion	5.615384615384615
 399 | 398	anarchy	chaos	7.538461538461538
 400 | 399	crime	violation	5.076923076923077
 401 | 400	machine	engine	2.6153846153846154
 402 | 401	beach	sea	3.5384615384615383
 403 | 402	alley	bowl	0.6923076923076923
 404 | 403	jar	bottle	4.461538461538462
 405 | 404	strength	capability	3.769230769230769
 406 | 405	seed	mustard	0.7692307692307693
 407 | 406	guitar	drum	1.0
 408 | 407	opinion	idea	6.153846153846154
 409 | 408	north	west	0.3076923076923077
 410 | 409	diet	salad	0.9230769230769231
 411 | 410	mother	wife	2.230769230769231
 412 | 411	dad	mother	1.2307692307692308
 413 | 412	captain	sailor	4.6923076923076925
 414 | 413	meter	yard	3.4615384615384617
 415 | 414	beer	champagne	3.769230769230769
 416 | 415	motor	boat	0.7692307692307693
 417 | 416	card	bridge	0.38461538461538464
 418 | 417	science	psychology	3.5384615384615383
 419 | 418	sinner	saint	0.46153846153846156
 420 | 419	destruction	construction	0.3076923076923077
 421 | 420	crowd	bunch	5.230769230769231
 422 | 421	beach	reef	1.3846153846153846
 423 | 422	man	child	1.1538461538461537
 424 | 423	bread	cheese	0.6923076923076923
 425 | 424	champion	winner	7.153846153846154
 426 | 425	celebration	ceremony	6.461538461538462
 427 | 426	menu	order	4.076923076923077
 428 | 427	king	princess	1.4615384615384615
 429 | 428	wealth	prestige	3.4615384615384617
 430 | 429	endurance	strength	5.230769230769231
 431 | 430	danger	threat	7.153846153846154
 432 | 431	god	priest	0.6153846153846154
 433 | 432	men	fraternity	2.3846153846153846
 434 | 433	buddy	companion	6.461538461538462
 435 | 434	teacher	helper	4.0
 436 | 435	body	stomach	1.4615384615384615
 437 | 436	tongue	throat	1.3076923076923077
 438 | 437	house	carpet	0.5384615384615384
 439 | 438	intelligence	skill	3.923076923076923
 440 | 439	journey	conquest	2.076923076923077
 441 | 440	god	prey	0.6923076923076923
 442 | 441	brother	soul	0.38461538461538464
 443 | 442	adversary	opponent	8.76923076923077
 444 | 443	death	catastrophe	3.0
 445 | 444	monster	demon	5.615384615384615
 446 | 445	day	morning	2.1538461538461537
 447 | 446	man	victor	0.6923076923076923
 448 | 447	friend	guy	2.6153846153846154
 449 | 448	song	story	0.9230769230769231
 450 | 449	ray	sunshine	2.4615384615384617
 451 | 450	guy	stud	4.230769230769231
 452 | 451	chicken	rice	0.6153846153846154
 453 | 452	box	elevator	1.3846153846153846
 454 | 453	butter	potato	0.46153846153846156
 455 | 454	apartment	furniture	1.4615384615384615
 456 | 455	lake	swamp	4.3076923076923075
 457 | 456	salad	vinegar	1.2307692307692308
 458 | 457	flower	bulb	3.6153846153846154
 459 | 458	cloud	mist	5.538461538461538
 460 | 459	driver	pilot	6.461538461538462
 461 | 460	sugar	honey	4.923076923076923
 462 | 461	body	shoulder	2.3846153846153846
 463 | 462	idea	image	3.4615384615384617
 464 | 463	father	brother	1.3846153846153846
 465 | 464	moon	planet	3.0
 466 | 465	ball	costume	1.8461538461538463
 467 | 466	rail	fence	4.538461538461538
 468 | 467	room	bed	2.1538461538461537
 469 | 468	flower	bush	3.769230769230769
 470 | 469	bone	knee	3.1538461538461537
 471 | 470	arm	knee	1.1538461538461537
 472 | 471	bottom	side	0.6923076923076923
 473 | 472	vessel	vein	3.076923076923077
 474 | 473	cat	rabbit	0.46153846153846156
 475 | 474	meat	sandwich	2.5384615384615383
 476 | 475	belief	concept	5.3076923076923075
 477 | 476	intelligence	insight	5.923076923076923
 478 | 477	attention	interest	6.461538461538462
 479 | 478	attitude	confidence	4.3076923076923075
 480 | 479	right	justice	5.461538461538462
 481 | 480	argument	agreement	0.3076923076923077
 482 | 481	depth	magnitude	6.384615384615385
 483 | 482	medium	news	2.769230769230769
 484 | 483	winner	candidate	2.0
 485 | 484	birthday	date	3.076923076923077
 486 | 485	fee	payment	6.384615384615385
 487 | 486	bible	hymn	2.769230769230769
 488 | 487	exit	doorway	5.923076923076923
 489 | 488	man	sentry	2.4615384615384617
 490 | 489	aisle	hall	6.384615384615385
 491 | 490	whiskey	gin	4.615384615384615
 492 | 491	blood	marrow	1.2307692307692308
 493 | 492	oil	mink	1.0769230769230769
 494 | 493	floor	deck	6.461538461538462
 495 | 494	roof	floor	0.3076923076923077
 496 | 495	door	floor	0.3076923076923077
 497 | 496	shoulder	head	0.9230769230769231
 498 | 497	wagon	carriage	7.0
 499 | 498	car	carriage	5.0
 500 | 499	elbow	ankle	0.3076923076923077
 501 | 500	wealth	fame	3.769230769230769
 502 | 501	sorrow	shame	4.769230769230769
 503 | 502	administration	management	7.076923076923077
 504 | 503	communication	conversation	5.846153846153846
 505 | 504	pollution	atmosphere	2.5384615384615383
 506 | 505	anatomy	biology	4.461538461538462
 507 | 506	college	profession	2.4615384615384617
 508 | 507	book	topic	2.0
 509 | 508	formula	equation	6.923076923076923
 510 | 509	book	information	2.3076923076923075
 511 | 510	boy	partner	1.3076923076923077
 512 | 511	sky	universe	2.923076923076923
 513 | 512	population	people	6.3076923076923075
 514 | 513	college	class	4.769230769230769
 515 | 514	chief	mayor	4.846153846153846
 516 | 515	rabbi	minister	3.769230769230769
 517 | 516	meter	inch	2.6153846153846154
 518 | 517	polyester	cotton	1.9230769230769231
 519 | 518	lawyer	banker	1.0
 520 | 519	violin	instrument	5.384615384615385
 521 | 520	camp	cabin	2.8461538461538463
 522 | 521	pot	appliance	2.769230769230769
 523 | 522	linen	fabric	6.153846153846154
 524 | 523	whiskey	champagne	3.1538461538461537
 525 | 524	girl	child	4.769230769230769
 526 | 525	cottage	cabin	5.538461538461538
 527 | 526	bird	hen	5.461538461538462
 528 | 527	racket	noise	4.538461538461538
 529 | 528	sunset	evening	5.384615384615385
 530 | 529	drizzle	rain	9.153846153846153
 531 | 530	adult	baby	0.7692307692307693
 532 | 531	charcoal	coal	7.384615384615385
 533 | 532	body	spine	2.3846153846153846
 534 | 533	head	nail	0.9230769230769231
 535 | 534	log	timber	4.846153846153846
 536 | 535	spoon	cup	1.1538461538461537
 537 | 536	body	nerve	1.9230769230769231
 538 | 537	man	husband	3.5384615384615383
 539 | 538	bone	neck	2.1538461538461537
 540 | 539	frustration	anger	5.923076923076923
 541 | 540	river	sea	3.3076923076923075
 542 | 541	task	job	7.0
 543 | 542	club	society	5.769230769230769
 544 | 543	reflection	image	4.769230769230769
 545 | 544	prince	king	3.8461538461538463
 546 | 545	snow	weather	4.384615384615385
 547 | 546	people	party	2.3076923076923075
 548 | 547	boy	brother	3.6153846153846154
 549 | 548	root	grass	2.6923076923076925
 550 | 549	brow	eye	2.230769230769231
 551 | 550	money	pearl	1.6153846153846154
 552 | 551	money	diamond	2.3076923076923075
 553 | 552	vehicle	bus	5.461538461538462
 554 | 553	cab	bus	3.1538461538461537
 555 | 554	house	barn	3.3076923076923075
 556 | 555	finger	palm	1.4615384615384615
 557 | 556	car	bridge	1.3076923076923077
 558 | 557	effort	difficulty	3.5384615384615383
 559 | 558	fact	insight	3.3846153846153846
 560 | 559	job	management	2.8461538461538463
 561 | 560	cancer	sickness	5.0
 562 | 561	word	newspaper	3.230769230769231
 563 | 562	composer	writer	3.0
 564 | 563	actor	singer	2.769230769230769
 565 | 564	shelter	hut	4.846153846153846
 566 | 565	bathroom	kitchen	2.230769230769231
 567 | 566	cabin	hut	2.769230769230769
 568 | 567	door	kitchen	1.2307692307692308
 569 | 568	value	belief	4.0
 570 | 569	wisdom	intelligence	6.923076923076923
 571 | 570	ignorance	intelligence	1.0769230769230769
 572 | 571	happiness	luck	3.230769230769231
 573 | 572	idea	scheme	6.615384615384615
 574 | 573	mood	emotion	4.230769230769231
 575 | 574	happiness	peace	3.923076923076923
 576 | 575	despair	misery	5.3076923076923075
 577 | 576	logic	arithmetic	2.4615384615384617
 578 | 577	denial	confession	2.230769230769231
 579 | 578	argument	criticism	4.3076923076923075
 580 | 579	aggression	hostility	3.6923076923076925
 581 | 580	hysteria	confusion	2.923076923076923
 582 | 581	chemistry	theory	2.923076923076923
 583 | 582	trial	verdict	4.230769230769231
 584 | 583	comfort	safety	4.769230769230769
 585 | 584	confidence	self	3.769230769230769
 586 | 585	vision	perception	3.3846153846153846
 587 | 586	era	decade	4.076923076923077
 588 | 587	biography	fiction	2.5384615384615383
 589 | 588	discussion	argument	6.384615384615385
 590 | 589	code	symbol	3.4615384615384617
 591 | 590	danger	disease	3.076923076923077
 592 | 591	accident	catastrophe	6.3076923076923075
 593 | 592	journey	trip	6.153846153846154
 594 | 593	activity	movement	5.153846153846154
 595 | 594	gossip	news	3.0
 596 | 595	father	god	1.3076923076923077
 597 | 596	action	course	1.2307692307692308
 598 | 597	fever	illness	5.076923076923077
 599 | 598	aviation	flight	5.0
 600 | 599	game	action	2.769230769230769
 601 | 600	molecule	air	2.923076923076923
 602 | 601	home	state	2.1538461538461537
 603 | 602	word	literature	3.769230769230769
 604 | 603	adult	guardian	3.5384615384615383
 605 | 604	newspaper	information	3.076923076923077
 606 | 605	communication	television	2.769230769230769
 607 | 606	cousin	uncle	2.4615384615384617
 608 | 607	author	reader	1.6923076923076923
 609 | 608	guy	partner	2.5384615384615383
 610 | 609	area	corner	3.3846153846153846
 611 | 610	ballad	song	6.538461538461538
 612 | 611	wall	decoration	1.4615384615384615
 613 | 612	word	page	1.6153846153846154
 614 | 613	nurse	scientist	1.7692307692307692
 615 | 614	politician	president	5.923076923076923
 616 | 615	president	mayor	3.8461538461538463
 617 | 616	book	essay	2.6923076923076925
 618 | 617	man	warrior	3.1538461538461537
 619 | 618	article	journal	4.846153846153846
 620 | 619	breakfast	supper	3.230769230769231
 621 | 620	crowd	parade	2.8461538461538463
 622 | 621	aisle	hallway	5.923076923076923
 623 | 622	teacher	rabbi	3.3846153846153846
 624 | 623	hip	lip	2.076923076923077
 625 | 624	book	article	3.3076923076923075
 626 | 625	room	cell	3.1538461538461537
 627 | 626	box	booth	3.4615384615384617
 628 | 627	daughter	kid	5.0
 629 | 628	limb	leg	4.538461538461538
 630 | 629	liver	lung	2.3846153846153846
 631 | 630	classroom	hallway	2.3846153846153846
 632 | 631	mountain	ledge	2.0
 633 | 632	car	elevator	0.6153846153846154
 634 | 633	bed	couch	1.8461538461538463
 635 | 634	clothes	button	2.230769230769231
 636 | 635	clothes	coat	4.923076923076923
 637 | 636	kidney	organ	4.153846153846154
 638 | 637	apple	sauce	2.0
 639 | 638	chicken	steak	1.7692307692307692
 640 | 639	car	hose	0.6923076923076923
 641 | 640	tobacco	cigarette	7.230769230769231
 642 | 641	student	professor	2.0
 643 | 642	baby	daughter	3.923076923076923
 644 | 643	pipe	cigar	3.6153846153846154
 645 | 644	milk	juice	1.8461538461538463
 646 | 645	box	cigar	1.3846153846153846
 647 | 646	apartment	hotel	4.3076923076923075
 648 | 647	cup	cone	1.5384615384615385
 649 | 648	horse	ox	1.0769230769230769
 650 | 649	throat	nose	1.8461538461538463
 651 | 650	bone	teeth	3.3846153846153846
 652 | 651	bone	elbow	2.1538461538461537
 653 | 652	bacon	bean	1.0769230769230769
 654 | 653	cup	jar	3.230769230769231
 655 | 654	proof	fact	4.846153846153846
 656 | 655	appointment	engagement	5.076923076923077
 657 | 656	birthday	year	2.0
 658 | 657	word	clue	1.4615384615384615
 659 | 658	author	creator	6.461538461538462
 660 | 659	atom	carbon	2.6923076923076925
 661 | 660	archbishop	bishop	5.153846153846154
 662 | 661	letter	paragraph	2.076923076923077
 663 | 662	page	paragraph	1.8461538461538463
 664 | 663	steeple	chapel	4.769230769230769
 665 | 664	muscle	bone	2.1538461538461537
 666 | 665	muscle	tongue	2.5384615384615383
 667 | 666	boy	soldier	1.0
 668 | 667	belly	abdomen	7.230769230769231
 669 | 668	guy	girl	1.6923076923076923
 670 | 669	bed	chair	1.4615384615384615
 671 | 670	clothes	jacket	4.153846153846154
 672 | 671	gun	knife	1.8461538461538463
 673 | 672	tin	metal	3.6153846153846154
 674 | 673	bottle	container	3.1538461538461537
 675 | 674	hen	turkey	2.1538461538461537
 676 | 675	meat	bread	1.2307692307692308
 677 | 676	arm	bone	2.3846153846153846
 678 | 677	neck	spine	2.5384615384615383
 679 | 678	apple	lemon	1.3076923076923077
 680 | 679	agony	grief	4.923076923076923
 681 | 680	assignment	task	5.615384615384615
 682 | 681	night	dawn	1.0
 683 | 682	dinner	soup	1.6923076923076923
 684 | 683	calf	bull	4.0
 685 | 684	snow	storm	2.0
 686 | 685	nail	hand	2.923076923076923
 687 | 686	dog	horse	1.0
 688 | 687	arm	neck	1.2307692307692308
 689 | 688	ball	glove	1.7692307692307692
 690 | 689	flu	fever	4.076923076923077
 691 | 690	fee	salary	2.923076923076923
 692 | 691	nerve	brain	2.8461538461538463
 693 | 692	beast	animal	5.3076923076923075
 694 | 693	dinner	chicken	1.4615384615384615
 695 | 694	girl	maid	2.4615384615384617
 696 | 695	child	boy	5.0
 697 | 696	alcohol	wine	4.846153846153846
 698 | 697	nose	mouth	2.076923076923077
 699 | 698	street	car	1.6153846153846154
 700 | 699	bell	door	1.6153846153846154
 701 | 700	box	hat	1.0769230769230769
 702 | 701	belief	impression	3.3846153846153846
 703 | 702	bias	opinion	3.8461538461538463
 704 | 703	attention	awareness	5.923076923076923
 705 | 704	anger	mood	3.6923076923076925
 706 | 705	elegance	style	4.230769230769231
 707 | 706	beauty	age	2.5384615384615383
 708 | 707	book	theme	1.6153846153846154
 709 | 708	friend	mother	1.9230769230769231
 710 | 709	vitamin	iron	3.076923076923077
 711 | 710	car	factory	1.8461538461538463
 712 | 711	pact	condition	3.0
 713 | 712	chapter	choice	1.3846153846153846
 714 | 713	arithmetic	rhythm	1.6923076923076923
 715 | 714	winner	presence	1.9230769230769231
 716 | 715	belief	flower	1.1538461538461537
 717 | 716	winner	goal	3.5384615384615383
 718 | 717	trick	size	0.9230769230769231
 719 | 718	choice	vein	1.3846153846153846
 720 | 719	hymn	conquest	1.2307692307692308
 721 | 720	endurance	band	1.2307692307692308
 722 | 721	jail	choice	1.3076923076923077
 723 | 722	condition	boy	0.8461538461538461
 724 | 723	flower	endurance	1.0769230769230769
 725 | 724	hole	agreement	1.2307692307692308
 726 | 725	doctor	temper	1.3846153846153846
 727 | 726	fraternity	door	1.4615384615384615
 728 | 727	task	woman	1.0769230769230769
 729 | 728	fraternity	baseball	1.5384615384615385
 730 | 729	cent	size	2.230769230769231
 731 | 730	presence	door	1.6153846153846154
 732 | 731	mouse	management	0.8461538461538461
 733 | 732	task	highway	1.0769230769230769
 734 | 733	liquor	century	0.9230769230769231
 735 | 734	task	straw	0.8461538461538461
 736 | 735	island	task	0.8461538461538461
 737 | 736	night	chapter	1.2307692307692308
 738 | 737	pollution	president	1.0
 739 | 738	gun	trick	1.0
 740 | 739	bath	trick	1.1538461538461537
 741 | 740	diet	apple	1.1538461538461537
 742 | 741	cent	wife	0.6153846153846154
 743 | 742	chapter	tail	1.3846153846153846
 744 | 743	course	stomach	1.0
 745 | 744	hymn	straw	1.0769230769230769
 746 | 745	dentist	colonel	0.9230769230769231
 747 | 746	wife	straw	1.0769230769230769
 748 | 747	hole	wife	1.0769230769230769
 749 | 748	pupil	president	1.3076923076923077
 750 | 749	bath	wife	1.0769230769230769
 751 | 750	people	cent	0.0
 752 | 751	formula	log	0.8461538461538461
 753 | 752	woman	fur	0.07692307692307693
 754 | 753	apple	sunshine	0.0
 755 | 754	gun	dawn	0.0
 756 | 755	meal	waist	0.6153846153846154
 757 | 756	camera	president	0.23076923076923078
 758 | 757	liquor	band	0.0
 759 | 758	stomach	vein	0.7692307692307693
 760 | 759	gun	fur	0.15384615384615385
 761 | 760	couch	baseball	0.38461538461538464
 762 | 761	worker	camera	0.23076923076923078
 763 | 762	deck	mouse	0.07692307692307693
 764 | 763	rice	boy	0.0
 765 | 764	people	gun	0.07692307692307693
 766 | 765	cliff	tail	0.3076923076923077
 767 | 766	ankle	window	0.0
 768 | 767	princess	island	0.0
 769 | 768	container	mouse	0.0
 770 | 769	wagon	container	2.3846153846153846
 771 | 770	people	balloon	0.0
 772 | 771	dollar	people	0.0
 773 | 772	bath	balloon	0.0
 774 | 773	stomach	bedroom	0.0
 775 | 774	bicycle	bedroom	0.0
 776 | 775	log	bath	0.07692307692307693
 777 | 776	bowl	tail	0.07692307692307693
 778 | 777	go	come	1.1538461538461537
 779 | 778	take	steal	5.0
 780 | 779	listen	hear	8.307692307692308
 781 | 780	think	rationalize	6.230769230769231
 782 | 781	occur	happen	9.0
 783 | 782	vanish	disappear	9.76923076923077
 784 | 783	multiply	divide	1.8461538461538463
 785 | 784	plead	beg	7.6923076923076925
 786 | 785	begin	originate	8.307692307692308
 787 | 786	protect	defend	9.0
 788 | 787	kill	destroy	7.384615384615385
 789 | 788	create	make	8.384615384615385
 790 | 789	accept	reject	0.0
 791 | 790	ignore	avoid	7.923076923076923
 792 | 791	carry	bring	4.769230769230769
 793 | 792	leave	enter	0.6153846153846154
 794 | 793	choose	elect	8.153846153846153
 795 | 794	lose	fail	7.846153846153846
 796 | 795	encourage	discourage	0.0
 797 | 796	achieve	accomplish	8.384615384615385
 798 | 797	make	construct	8.0
 799 | 798	listen	obey	4.615384615384615
 800 | 799	inform	notify	9.153846153846153
 801 | 800	receive	give	1.5384615384615385
 802 | 801	borrow	beg	2.923076923076923
 803 | 802	take	obtain	4.0
 804 | 803	advise	recommend	5.153846153846154
 805 | 804	imitate	portray	3.3076923076923075
 806 | 805	win	succeed	6.769230769230769
 807 | 806	think	decide	2.769230769230769
 808 | 807	greet	meet	5.461538461538462
 809 | 808	agree	argue	0.9230769230769231
 810 | 809	enjoy	entertain	4.846153846153846
 811 | 810	destroy	make	0.9230769230769231
 812 | 811	save	protect	6.384615384615385
 813 | 812	give	lend	3.076923076923077
 814 | 813	understand	know	5.6923076923076925
 815 | 814	take	receive	3.5384615384615383
 816 | 815	accept	acknowledge	4.076923076923077
 817 | 816	decide	choose	4.0
 818 | 817	accept	believe	4.230769230769231
 819 | 818	keep	possess	4.076923076923077
 820 | 819	roam	wander	5.0
 821 | 820	succeed	fail	1.0
 822 | 821	spend	save	1.2307692307692308
 823 | 822	leave	go	4.076923076923077
 824 | 823	come	attend	4.538461538461538
 825 | 824	know	believe	4.384615384615385
 826 | 825	gather	meet	4.384615384615385
 827 | 826	make	earn	3.923076923076923
 828 | 827	forget	ignore	4.3076923076923075
 829 | 828	multiply	add	2.5384615384615383
 830 | 829	shrink	grow	0.8461538461538461
 831 | 830	arrive	leave	1.2307692307692308
 832 | 831	succeed	try	2.3846153846153846
 833 | 832	accept	deny	1.0769230769230769
 834 | 833	arrive	come	4.3076923076923075
 835 | 834	agree	differ	1.0769230769230769
 836 | 835	send	receive	0.8461538461538461
 837 | 836	win	dominate	5.3076923076923075
 838 | 837	add	divide	1.6923076923076923
 839 | 838	kill	choke	3.1538461538461537
 840 | 839	acquire	get	5.0
 841 | 840	participate	join	5.923076923076923
 842 | 841	leave	remain	1.7692307692307692
 843 | 842	go	enter	2.3076923076923075
 844 | 843	take	carry	2.923076923076923
 845 | 844	forget	learn	1.0
 846 | 845	appoint	elect	4.0
 847 | 846	engage	marry	4.384615384615385
 848 | 847	ask	pray	2.6923076923076925
 849 | 848	go	send	2.3076923076923075
 850 | 849	take	deliver	2.3846153846153846
 851 | 850	speak	hear	0.38461538461538464
 852 | 851	analyze	evaluate	7.923076923076923
 853 | 852	argue	rationalize	4.583333333333333
 854 | 853	lose	keep	0.23076923076923078
 855 | 854	compare	analyze	5.461538461538462
 856 | 855	disorganize	organize	0.07692307692307693
 857 | 856	go	allow	0.9230769230769231
 858 | 857	take	possess	4.846153846153846
 859 | 858	learn	listen	2.3076923076923075
 860 | 859	destroy	construct	0.0
 861 | 860	create	build	6.538461538461538
 862 | 861	steal	buy	0.0
 863 | 862	kill	hang	2.3846153846153846
 864 | 863	forget	know	0.0
 865 | 864	create	imagine	4.846153846153846
 866 | 865	do	happen	2.3076923076923075
 867 | 866	win	accomplish	6.230769230769231
 868 | 867	give	deny	0.07692307692307693
 869 | 868	deserve	earn	4.538461538461538
 870 | 869	get	put	0.5384615384615384
 871 | 870	locate	find	8.461538461538462
 872 | 871	appear	attend	3.4615384615384617
 873 | 872	know	comprehend	6.923076923076923
 874 | 873	pretend	imagine	6.6923076923076925
 875 | 874	satisfy	please	6.076923076923077
 876 | 875	cherish	keep	3.4615384615384617
 877 | 876	argue	differ	5.6923076923076925
 878 | 877	overcome	dominate	4.461538461538462
 879 | 878	behave	obey	7.0
 880 | 879	cooperate	participate	5.384615384615385
 881 | 880	achieve	try	1.8461538461538463
 882 | 881	fail	discourage	2.769230769230769
 883 | 882	begin	quit	0.0
 884 | 883	say	participate	1.6153846153846154
 885 | 884	come	bring	1.6153846153846154
 886 | 885	declare	announce	8.153846153846153
 887 | 886	read	comprehend	5.0
 888 | 887	take	leave	0.0
 889 | 888	proclaim	announce	8.076923076923077
 890 | 889	acquire	obtain	9.461538461538462
 891 | 890	conclude	decide	6.153846153846154
 892 | 891	please	plead	1.0769230769230769
 893 | 892	argue	prove	4.076923076923077
 894 | 893	ask	plead	6.076923076923077
 895 | 894	find	disappear	0.07692307692307693
 896 | 895	inspect	examine	9.153846153846153
 897 | 896	verify	justify	3.076923076923077
 898 | 897	assume	predict	3.5384615384615383
 899 | 898	learn	evaluate	3.3076923076923075
 900 | 899	argue	justify	4.230769230769231
 901 | 900	make	become	2.4615384615384617
 902 | 901	discover	originate	1.9230769230769231
 903 | 902	achieve	succeed	7.6923076923076925
 904 | 903	give	put	3.769230769230769
 905 | 904	understand	listen	4.384615384615385
 906 | 905	expand	grow	6.769230769230769
 907 | 906	borrow	sell	0.38461538461538464
 908 | 907	keep	protect	3.3076923076923075
 909 | 908	explain	prove	4.461538461538462
 910 | 909	assume	pretend	2.3846153846153846
 911 | 910	agree	please	1.1538461538461537
 912 | 911	forgive	forget	3.6923076923076925
 913 | 912	clarify	explain	7.6923076923076925
 914 | 913	understand	forgive	2.230769230769231
 915 | 914	remind	forget	0.6153846153846154
 916 | 915	get	remain	2.0
 917 | 916	realize	discover	3.1538461538461537
 918 | 917	require	inquire	1.6923076923076923
 919 | 918	ignore	ask	1.5384615384615385
 920 | 919	think	inquire	2.076923076923077
 921 | 920	reject	avoid	5.923076923076923
 922 | 921	argue	persuade	4.076923076923077
 923 | 922	pursue	persuade	2.3076923076923075
 924 | 923	accept	forgive	3.3076923076923075
 925 | 924	do	quit	0.6153846153846154
 926 | 925	investigate	examine	6.769230769230769
 927 | 926	discuss	explain	5.230769230769231
 928 | 927	owe	lend	1.0
 929 | 928	explore	discover	6.384615384615385
 930 | 929	complain	argue	4.153846153846154
 931 | 930	withdraw	reject	5.230769230769231
 932 | 931	keep	borrow	0.7692307692307693
 933 | 932	beg	ask	2.769230769230769
 934 | 933	arrange	organize	4.846153846153846
 935 | 934	reduce	shrink	5.153846153846154
 936 | 935	speak	acknowledge	2.6153846153846154
 937 | 936	give	borrow	1.6153846153846154
 938 | 937	kill	defend	0.6923076923076923
 939 | 938	disappear	shrink	3.3076923076923075
 940 | 939	deliver	carry	1.4615384615384615
 941 | 940	breathe	choke	0.7692307692307693
 942 | 941	acknowledge	notify	4.0
 943 | 942	become	seem	3.3076923076923075
 944 | 943	pretend	seem	3.769230769230769
 945 | 944	accomplish	become	4.769230769230769
 946 | 945	contemplate	think	4.846153846153846
 947 | 946	determine	predict	3.230769230769231
 948 | 947	please	entertain	1.9230769230769231
 949 | 948	remain	retain	4.769230769230769
 950 | 949	pretend	portray	2.1538461538461537
 951 | 950	forget	retain	1.0769230769230769
 952 | 951	want	choose	3.6153846153846154
 953 | 952	lose	get	0.9230769230769231
 954 | 953	try	think	2.230769230769231
 955 | 954	become	appear	3.4615384615384617
 956 | 955	leave	ignore	4.461538461538462
 957 | 956	accept	recommend	2.1538461538461537
 958 | 957	leave	wander	2.5384615384615383
 959 | 958	keep	give	1.3846153846153846
 960 | 959	give	allow	3.5384615384615383
 961 | 960	bring	send	1.0769230769230769
 962 | 961	absorb	learn	4.384615384615385
 963 | 962	acquire	find	5.076923076923077
 964 | 963	leave	appear	0.7692307692307693
 965 | 964	create	destroy	0.6153846153846154
 966 | 965	begin	go	5.461538461538462
 967 | 966	get	buy	4.846153846153846
 968 | 967	collect	save	4.384615384615385
 969 | 968	replace	restore	3.0
 970 | 969	join	add	6.384615384615385
 971 | 970	join	marry	5.6923076923076925
 972 | 971	accept	deliver	1.0
 973 | 972	attach	join	6.076923076923077
 974 | 973	put	hang	2.923076923076923
 975 | 974	go	sell	2.076923076923077
 976 | 975	communicate	pray	2.3846153846153846
 977 | 976	give	steal	1.3076923076923077
 978 | 977	add	build	2.8461538461538463
 979 | 978	bring	restore	2.769230769230769
 980 | 979	comprehend	satisfy	2.3076923076923075
 981 | 980	portray	decide	1.9230769230769231
 982 | 981	organize	become	1.8461538461538463
 983 | 982	give	know	0.7692307692307693
 984 | 983	say	verify	2.6923076923076925
 985 | 984	cooperate	join	4.230769230769231
 986 | 985	arrange	require	1.8461538461538463
 987 | 986	borrow	want	2.0
 988 | 987	investigate	pursue	5.3076923076923075
 989 | 988	ignore	explore	0.6923076923076923
 990 | 989	bring	complain	1.1538461538461537
 991 | 990	enter	owe	1.0
 992 | 991	portray	notify	1.6153846153846154
 993 | 992	remind	sell	0.6923076923076923
 994 | 993	absorb	possess	3.1538461538461537
 995 | 994	join	acquire	2.1538461538461537
 996 | 995	send	attend	2.076923076923077
 997 | 996	gather	attend	2.8461538461538463
 998 | 997	absorb	withdraw	1.0769230769230769
 999 | 998	attend	arrive	5.3076923076923075
1000 | 


--------------------------------------------------------------------------------
/evaluation/wordsim/tasks.py:
--------------------------------------------------------------------------------
 1 | from dataclasses import dataclass
 2 | 
 3 | 
 4 | @dataclass
 5 | class WordSimTask:
 6 |     """
 7 |     A WordSim task.
 8 | 
 9 |     Attributes
10 |     ----------
11 |         task: The name of the task.
12 |         file: The file path to the dataset.
13 |         index1: The index of the first word in the dataset.
14 |         index2: The index of the second word in the dataset.
15 |         target: The index of the target value in the dataset
16 | 
17 |     """
18 | 
19 |     task: str
20 |     file: str
21 |     index1: int
22 |     index2: int
23 |     target: int
24 | 
25 | 
26 | wordsim_tasks: list[WordSimTask] = [
27 |     WordSimTask(
28 |         task="RareWord",
29 |         file="rw.txt",
30 |         index1=0,
31 |         index2=1,
32 |         target=2,
33 |     ),
34 |     WordSimTask(
35 |         task="MEN",
36 |         file="men.txt",
37 |         index1=0,
38 |         index2=1,
39 |         target=2,
40 |     ),
41 |     WordSimTask(
42 |         task="SimLex",
43 |         file="simLex.txt",
44 |         index1=1,
45 |         index2=2,
46 |         target=3,
47 |     ),
48 |     WordSimTask(
49 |         task="rel353",
50 |         file="rel353.txt",
51 |         index1=1,
52 |         index2=2,
53 |         target=3,
54 |     ),
55 |     WordSimTask(
56 |         task="simverb",
57 |         file="simverb_3500.txt",
58 |         index1=2,
59 |         index2=3,
60 |         target=1,
61 |     ),
62 |     WordSimTask(
63 |         task="muturk",
64 |         file="mturk_771.txt",
65 |         index1=1,
66 |         index2=2,
67 |         target=3,
68 |     ),
69 |     WordSimTask(
70 |         task="Card660",
71 |         file="card_660.txt",
72 |         index1=0,
73 |         index2=1,
74 |         target=2,
75 |     ),
76 | ]
77 | 


--------------------------------------------------------------------------------
/evaluation/wordsim/wordsim.py:
--------------------------------------------------------------------------------
  1 | from __future__ import annotations
  2 | 
  3 | from importlib import resources
  4 | from typing import Any
  5 | 
  6 | import datasets
  7 | from mteb import TaskMetadata
  8 | from mteb.abstasks import AbsTaskSTS
  9 | 
 10 | from evaluation.wordsim.tasks import wordsim_tasks
 11 | 
 12 | 
 13 | class WordSim(AbsTaskSTS):
 14 |     def __init__(self, dataset_name: str | None = None, hf_subsets: Any = None, **kwargs: Any) -> None:
 15 |         """
 16 |         Initialize a WordSim task with the given dataset name.
 17 | 
 18 |         :param dataset_name: The name of the dataset to use.
 19 |         :param hf_subsets: The Hugging Face dataset splits to use.
 20 |         :param **kwargs: Additional keyword arguments.
 21 |         """
 22 |         super().__init__(hf_subsets=hf_subsets, **kwargs)
 23 |         self.dataset_name = dataset_name
 24 |         self.metadata = TaskMetadata(
 25 |             name=dataset_name if dataset_name else "WordSim",
 26 |             description=f"Custom Word Similarity Task: {dataset_name}"
 27 |             if dataset_name
 28 |             else "Custom Word Similarity Task with Multiple Datasets.",
 29 |             reference=None,
 30 |             type="STS",
 31 |             category="s2s",
 32 |             modalities=["text"],
 33 |             eval_splits=["test"],
 34 |             eval_langs=["en"],
 35 |             main_score="spearman",
 36 |             dataset={
 37 |                 "path": "evaluation/wordsim/tasks.py",
 38 |                 "revision": "1.0.0",
 39 |             },
 40 |         )
 41 |         self.dataset_splits: dict[str, dict] = {}
 42 | 
 43 |     @property
 44 |     def min_score(self) -> int:
 45 |         """Minimum score for the similarity task."""
 46 |         return -1
 47 | 
 48 |     @property
 49 |     def max_score(self) -> int:
 50 |         """Maximum score for the similarity task."""
 51 |         return 1
 52 | 
 53 |     def load_data(self, eval_splits: Any = None) -> None:
 54 |         """Load the WordSim datasets."""
 55 |         # Load the data for each task
 56 |         for task in wordsim_tasks:
 57 |             sentence1 = []
 58 |             sentence2 = []
 59 |             scores = []
 60 | 
 61 |             index1 = task.index1
 62 |             index2 = task.index2
 63 |             target = task.target
 64 | 
 65 |             with resources.open_text("evaluation.wordsim.data", task.file) as f:
 66 |                 for line in f:
 67 |                     parts = line.strip().split("\t")
 68 |                     # Remove underscores from the words
 69 |                     parts = [part.replace("_", " ") for part in parts]
 70 |                     word1 = parts[index1]
 71 |                     word2 = parts[index2]
 72 | 
 73 |                     similarity = float(parts[target])
 74 | 
 75 |                     sentence1.append(word1)
 76 |                     sentence2.append(word2)
 77 |                     scores.append(similarity)
 78 | 
 79 |             dataset_name = task.task
 80 |             self.dataset_splits[dataset_name] = datasets.Dataset.from_dict(
 81 |                 {
 82 |                     "sentence1": sentence1,
 83 |                     "sentence2": sentence2,
 84 |                     "score": scores,
 85 |                 }
 86 |             )
 87 |         if self.dataset_name:
 88 |             self.dataset = datasets.DatasetDict(
 89 |                 {
 90 |                     "test": self.dataset_splits[self.dataset_name],
 91 |                 }
 92 |             )
 93 |         else:
 94 |             self.dataset = datasets.DatasetDict(self.dataset_splits)
 95 | 
 96 |     @classmethod
 97 |     def get_subtasks(cls) -> list[WordSim]:
 98 |         """Return a list of subtasks, one for each dataset."""
 99 |         instance = cls()
100 |         instance.load_data()
101 |         return [cls(dataset_name=name) for name in instance.dataset_splits.keys()]
102 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [project]
 2 | name = "evaluation"
 3 | description = "Evaluation of word embeddings"
 4 | readme = "README.md"
 5 | version = "0.1.0"
 6 | requires-python = ">=3.10"
 7 | 
 8 | dependencies = [
 9 | "autofj",
10 | "datasets",
11 | "huggingface-hub",
12 | "mteb==1.14.15",
13 | "numpy",
14 | "pandas",
15 | "pytorch_lightning",
16 | "reach",
17 | "scikit-learn",
18 | "scipy",
19 | "seaborn",
20 | "sentence_transformers",
21 | "torch",
22 | "plotnine",
23 | ]
24 | 
25 | [project.optional-dependencies]
26 | dev = [
27 |     "black",
28 |     "ipython",
29 |     "mypy",
30 |     "pre-commit",
31 |     "pytest",
32 |     "pytest-coverage",
33 |     "ruff",
34 | ]
35 | 
36 | [build-system]
37 | requires = ["setuptools>=64", "setuptools_scm>=8"]
38 | build-backend = "setuptools.build_meta"
39 | 
40 | [tool.setuptools]
41 | packages = ["evaluation"]
42 | include-package-data = true
43 | 
44 | [tool.setuptools.package-data]
45 | "evaluation.wordsim" = ["data/*"]
46 | 
47 | [tool.ruff]
48 | exclude = [".venv/"]
49 | line-length = 120
50 | target-version = "py310"
51 | 
52 | [tool.ruff.lint]
53 | select = [
54 |     # Annotations: Enforce type annotations
55 |     "ANN",
56 |     # Complexity: Enforce a maximum cyclomatic complexity
57 |     "C90",
58 |     # Pydocstyle: Enforce docstrings
59 |     "D",
60 |     # Remove unused imports
61 |     "F401",
62 |     # Isort: Enforce import order
63 |     "I",
64 |     # Numpy: Enforce numpy style
65 |     "NPY",
66 |     # Print: Forbid print statements
67 |     "T20",
68 | ]
69 | 
70 | ignore = [
71 |     # Allow self and cls to be untyped, and allow Any type
72 |     "ANN101", "ANN102", "ANN401",
73 |     # Pydocstyle ignores
74 |     "D100", "D101", "D104", "D203", "D204", "D212", "D401",
75 |     # Allow use of f-strings in logging
76 |     "G004"
77 | ]
78 | 
79 | [tool.pydoclint]
80 | style = "sphinx"
81 | exclude = "test_"
82 | allow-init-docstring = true
83 | arg-type-hints-in-docstring = false
84 | check-return-types = false
85 | require-return-section-when-returning-nothing = false
86 | 
87 | [tool.mypy]
88 | python_version = "3.10"
89 | warn_unused_configs = true
90 | ignore_missing_imports = true
91 | 


--------------------------------------------------------------------------------
/requirements/requirements-linux.txt:
--------------------------------------------------------------------------------
  1 | # This file was autogenerated by uv via the following command:
  2 | #    uv pip compile pyproject.toml --python-platform linux --output-file=requirements/requirements-linux.txt --all-extras
  3 | aiohappyeyeballs==2.4.0
  4 |     # via aiohttp
  5 | aiohttp==3.10.5
  6 |     # via
  7 |     #   datasets
  8 |     #   fsspec
  9 | aiosignal==1.3.1
 10 |     # via aiohttp
 11 | annotated-types==0.7.0
 12 |     # via pydantic
 13 | asttokens==2.4.1
 14 |     # via stack-data
 15 | async-timeout==4.0.3
 16 |     # via aiohttp
 17 | attrs==24.2.0
 18 |     # via aiohttp
 19 | autofj==0.0.6
 20 |     # via evaluation (pyproject.toml)
 21 | black==24.8.0
 22 |     # via evaluation (pyproject.toml)
 23 | blis==0.7.11
 24 |     # via thinc
 25 | catalogue==2.0.10
 26 |     # via
 27 |     #   spacy
 28 |     #   srsly
 29 |     #   thinc
 30 | certifi==2024.8.30
 31 |     # via requests
 32 | cfgv==3.4.0
 33 |     # via pre-commit
 34 | charset-normalizer==3.3.2
 35 |     # via requests
 36 | click==8.1.7
 37 |     # via
 38 |     #   black
 39 |     #   nltk
 40 |     #   typer
 41 | cloudpathlib==0.19.0
 42 |     # via weasel
 43 | confection==0.1.5
 44 |     # via
 45 |     #   thinc
 46 |     #   weasel
 47 | coverage==7.6.1
 48 |     # via pytest-cov
 49 | cymem==2.0.8
 50 |     # via
 51 |     #   preshed
 52 |     #   spacy
 53 |     #   thinc
 54 | datasets==2.21.0
 55 |     # via
 56 |     #   evaluation (pyproject.toml)
 57 |     #   mteb
 58 | decorator==5.1.1
 59 |     # via ipython
 60 | dill==0.3.8
 61 |     # via
 62 |     #   datasets
 63 |     #   multiprocess
 64 | distlib==0.3.8
 65 |     # via virtualenv
 66 | editdistance==0.8.1
 67 |     # via autofj
 68 | eval-type-backport==0.2.0
 69 |     # via mteb
 70 | exceptiongroup==1.2.2
 71 |     # via
 72 |     #   ipython
 73 |     #   pytest
 74 | executing==2.1.0
 75 |     # via stack-data
 76 | filelock==3.15.4
 77 |     # via
 78 |     #   datasets
 79 |     #   huggingface-hub
 80 |     #   torch
 81 |     #   transformers
 82 |     #   triton
 83 |     #   virtualenv
 84 | frozenlist==1.4.1
 85 |     # via
 86 |     #   aiohttp
 87 |     #   aiosignal
 88 | fsspec==2024.6.1
 89 |     # via
 90 |     #   datasets
 91 |     #   huggingface-hub
 92 |     #   pytorch-lightning
 93 |     #   torch
 94 | huggingface-hub==0.24.6
 95 |     # via
 96 |     #   evaluation (pyproject.toml)
 97 |     #   datasets
 98 |     #   sentence-transformers
 99 |     #   tokenizers
100 |     #   transformers
101 | identify==2.6.0
102 |     # via pre-commit
103 | idna==3.8
104 |     # via
105 |     #   requests
106 |     #   yarl
107 | iniconfig==2.0.0
108 |     # via pytest
109 | ipython==8.27.0
110 |     # via evaluation (pyproject.toml)
111 | jedi==0.19.1
112 |     # via ipython
113 | jellyfish==1.1.0
114 |     # via autofj
115 | jinja2==3.1.4
116 |     # via
117 |     #   spacy
118 |     #   torch
119 | joblib==1.4.2
120 |     # via
121 |     #   nltk
122 |     #   scikit-learn
123 | langcodes==3.4.0
124 |     # via spacy
125 | language-data==1.2.0
126 |     # via langcodes
127 | lightning-utilities==0.11.6
128 |     # via
129 |     #   pytorch-lightning
130 |     #   torchmetrics
131 | marisa-trie==1.2.0
132 |     # via language-data
133 | markdown-it-py==3.0.0
134 |     # via rich
135 | markupsafe==2.1.5
136 |     # via jinja2
137 | matplotlib-inline==0.1.7
138 |     # via ipython
139 | mdurl==0.1.2
140 |     # via markdown-it-py
141 | mpmath==1.3.0
142 |     # via sympy
143 | mteb==1.14.15
144 |     # via evaluation (pyproject.toml)
145 | multidict==6.0.5
146 |     # via
147 |     #   aiohttp
148 |     #   yarl
149 | multiprocess==0.70.16
150 |     # via datasets
151 | murmurhash==1.0.10
152 |     # via
153 |     #   preshed
154 |     #   spacy
155 |     #   thinc
156 | mypy==1.11.2
157 |     # via evaluation (pyproject.toml)
158 | mypy-extensions==1.0.0
159 |     # via
160 |     #   black
161 |     #   mypy
162 | networkx==3.3
163 |     # via torch
164 | ngram==4.0.3
165 |     # via autofj
166 | nltk==3.9.1
167 |     # via autofj
168 | nodeenv==1.9.1
169 |     # via pre-commit
170 | numpy==1.26.4
171 |     # via
172 |     #   evaluation (pyproject.toml)
173 |     #   autofj
174 |     #   blis
175 |     #   datasets
176 |     #   mteb
177 |     #   pandas
178 |     #   pyarrow
179 |     #   reach
180 |     #   scikit-learn
181 |     #   scipy
182 |     #   sentence-transformers
183 |     #   spacy
184 |     #   thinc
185 |     #   torchmetrics
186 |     #   transformers
187 | nvidia-cublas-cu12==12.1.3.1
188 |     # via
189 |     #   nvidia-cudnn-cu12
190 |     #   nvidia-cusolver-cu12
191 |     #   torch
192 | nvidia-cuda-cupti-cu12==12.1.105
193 |     # via torch
194 | nvidia-cuda-nvrtc-cu12==12.1.105
195 |     # via torch
196 | nvidia-cuda-runtime-cu12==12.1.105
197 |     # via torch
198 | nvidia-cudnn-cu12==9.1.0.70
199 |     # via torch
200 | nvidia-cufft-cu12==11.0.2.54
201 |     # via torch
202 | nvidia-curand-cu12==10.3.2.106
203 |     # via torch
204 | nvidia-cusolver-cu12==11.4.5.107
205 |     # via torch
206 | nvidia-cusparse-cu12==12.1.0.106
207 |     # via
208 |     #   nvidia-cusolver-cu12
209 |     #   torch
210 | nvidia-nccl-cu12==2.20.5
211 |     # via torch
212 | nvidia-nvjitlink-cu12==12.6.68
213 |     # via
214 |     #   nvidia-cusolver-cu12
215 |     #   nvidia-cusparse-cu12
216 | nvidia-nvtx-cu12==12.1.105
217 |     # via torch
218 | packaging==24.1
219 |     # via
220 |     #   black
221 |     #   datasets
222 |     #   huggingface-hub
223 |     #   lightning-utilities
224 |     #   pytest
225 |     #   pytorch-lightning
226 |     #   spacy
227 |     #   thinc
228 |     #   torchmetrics
229 |     #   transformers
230 |     #   weasel
231 | pandas==2.2.2
232 |     # via
233 |     #   evaluation (pyproject.toml)
234 |     #   autofj
235 |     #   datasets
236 | parso==0.8.4
237 |     # via jedi
238 | pathspec==0.12.1
239 |     # via black
240 | pexpect==4.9.0
241 |     # via ipython
242 | pillow==10.4.0
243 |     # via sentence-transformers
244 | platformdirs==4.2.2
245 |     # via
246 |     #   black
247 |     #   virtualenv
248 | pluggy==1.5.0
249 |     # via pytest
250 | polars==1.6.0
251 |     # via mteb
252 | pre-commit==3.8.0
253 |     # via evaluation (pyproject.toml)
254 | preshed==3.0.9
255 |     # via
256 |     #   spacy
257 |     #   thinc
258 | prompt-toolkit==3.0.47
259 |     # via ipython
260 | ptyprocess==0.7.0
261 |     # via pexpect
262 | pure-eval==0.2.3
263 |     # via stack-data
264 | pyarrow==17.0.0
265 |     # via datasets
266 | pydantic==2.8.2
267 |     # via
268 |     #   confection
269 |     #   mteb
270 |     #   spacy
271 |     #   thinc
272 |     #   weasel
273 | pydantic-core==2.20.1
274 |     # via pydantic
275 | pygments==2.18.0
276 |     # via
277 |     #   ipython
278 |     #   rich
279 | pytest==8.3.2
280 |     # via
281 |     #   evaluation (pyproject.toml)
282 |     #   pytest-cov
283 | pytest-cov==5.0.0
284 |     # via pytest-cover
285 | pytest-cover==3.0.0
286 |     # via pytest-coverage
287 | pytest-coverage==0.0
288 |     # via evaluation (pyproject.toml)
289 | python-dateutil==2.9.0.post0
290 |     # via pandas
291 | pytorch-lightning==2.4.0
292 |     # via evaluation (pyproject.toml)
293 | pytrec-eval-terrier==0.5.6
294 |     # via mteb
295 | pytz==2024.1
296 |     # via pandas
297 | pyyaml==6.0.2
298 |     # via
299 |     #   datasets
300 |     #   huggingface-hub
301 |     #   pre-commit
302 |     #   pytorch-lightning
303 |     #   transformers
304 | reach==4.1.1
305 |     # via evaluation (pyproject.toml)
306 | regex==2024.7.24
307 |     # via
308 |     #   nltk
309 |     #   transformers
310 | requests==2.32.3
311 |     # via
312 |     #   datasets
313 |     #   huggingface-hub
314 |     #   mteb
315 |     #   spacy
316 |     #   transformers
317 |     #   weasel
318 | rich==13.8.0
319 |     # via
320 |     #   mteb
321 |     #   typer
322 | ruff==0.6.3
323 |     # via evaluation (pyproject.toml)
324 | safetensors==0.4.4
325 |     # via transformers
326 | scikit-learn==1.5.1
327 |     # via
328 |     #   evaluation (pyproject.toml)
329 |     #   mteb
330 |     #   sentence-transformers
331 | scipy==1.14.1
332 |     # via
333 |     #   evaluation (pyproject.toml)
334 |     #   mteb
335 |     #   scikit-learn
336 |     #   sentence-transformers
337 | sentence-transformers==3.0.1
338 |     # via
339 |     #   evaluation (pyproject.toml)
340 |     #   mteb
341 | setuptools==74.0.0
342 |     # via
343 |     #   lightning-utilities
344 |     #   marisa-trie
345 |     #   spacy
346 |     #   thinc
347 | shellingham==1.5.4
348 |     # via typer
349 | six==1.16.0
350 |     # via
351 |     #   asttokens
352 |     #   python-dateutil
353 | smart-open==7.0.4
354 |     # via weasel
355 | spacy==3.7.6
356 |     # via autofj
357 | spacy-legacy==3.0.12
358 |     # via spacy
359 | spacy-loggers==1.0.5
360 |     # via spacy
361 | srsly==2.4.8
362 |     # via
363 |     #   confection
364 |     #   spacy
365 |     #   thinc
366 |     #   weasel
367 | stack-data==0.6.3
368 |     # via ipython
369 | sympy==1.13.2
370 |     # via torch
371 | thinc==8.2.5
372 |     # via spacy
373 | threadpoolctl==3.5.0
374 |     # via scikit-learn
375 | tokenizers==0.19.1
376 |     # via transformers
377 | tomli==2.0.1
378 |     # via
379 |     #   black
380 |     #   coverage
381 |     #   mypy
382 |     #   pytest
383 | torch==2.4.0
384 |     # via
385 |     #   evaluation (pyproject.toml)
386 |     #   mteb
387 |     #   pytorch-lightning
388 |     #   sentence-transformers
389 |     #   torchmetrics
390 | torchmetrics==1.4.1
391 |     # via pytorch-lightning
392 | tqdm==4.66.5
393 |     # via
394 |     #   datasets
395 |     #   huggingface-hub
396 |     #   mteb
397 |     #   nltk
398 |     #   pytorch-lightning
399 |     #   reach
400 |     #   sentence-transformers
401 |     #   spacy
402 |     #   transformers
403 | traitlets==5.14.3
404 |     # via
405 |     #   ipython
406 |     #   matplotlib-inline
407 | transformers==4.44.2
408 |     # via sentence-transformers
409 | triton==3.0.0
410 |     # via torch
411 | typer==0.12.5
412 |     # via
413 |     #   spacy
414 |     #   weasel
415 | typing-extensions==4.12.2
416 |     # via
417 |     #   black
418 |     #   cloudpathlib
419 |     #   huggingface-hub
420 |     #   ipython
421 |     #   lightning-utilities
422 |     #   mteb
423 |     #   mypy
424 |     #   pydantic
425 |     #   pydantic-core
426 |     #   pytorch-lightning
427 |     #   torch
428 |     #   typer
429 | tzdata==2024.1
430 |     # via pandas
431 | urllib3==2.2.2
432 |     # via requests
433 | virtualenv==20.26.3
434 |     # via pre-commit
435 | wasabi==1.1.3
436 |     # via
437 |     #   spacy
438 |     #   thinc
439 |     #   weasel
440 | wcwidth==0.2.13
441 |     # via prompt-toolkit
442 | weasel==0.4.1
443 |     # via spacy
444 | wrapt==1.16.0
445 |     # via smart-open
446 | xxhash==3.5.0
447 |     # via datasets
448 | yarl==1.9.7
449 |     # via aiohttp
450 | 


--------------------------------------------------------------------------------
/requirements/requirements-macos.txt:
--------------------------------------------------------------------------------
  1 | # This file was autogenerated by uv via the following command:
  2 | #    uv pip compile pyproject.toml --python-platform macos --output-file=requirements/requirements-macos.txt --all-extras
  3 | aiohappyeyeballs==2.4.0
  4 |     # via aiohttp
  5 | aiohttp==3.10.5
  6 |     # via
  7 |     #   datasets
  8 |     #   fsspec
  9 | aiosignal==1.3.1
 10 |     # via aiohttp
 11 | annotated-types==0.7.0
 12 |     # via pydantic
 13 | asttokens==2.4.1
 14 |     # via stack-data
 15 | async-timeout==4.0.3
 16 |     # via aiohttp
 17 | attrs==24.2.0
 18 |     # via aiohttp
 19 | autofj==0.0.6
 20 |     # via evaluation (pyproject.toml)
 21 | black==24.8.0
 22 |     # via evaluation (pyproject.toml)
 23 | blis==0.7.11
 24 |     # via thinc
 25 | catalogue==2.0.10
 26 |     # via
 27 |     #   spacy
 28 |     #   srsly
 29 |     #   thinc
 30 | certifi==2024.8.30
 31 |     # via requests
 32 | cfgv==3.4.0
 33 |     # via pre-commit
 34 | charset-normalizer==3.3.2
 35 |     # via requests
 36 | click==8.1.7
 37 |     # via
 38 |     #   black
 39 |     #   nltk
 40 |     #   typer
 41 | cloudpathlib==0.19.0
 42 |     # via weasel
 43 | confection==0.1.5
 44 |     # via
 45 |     #   thinc
 46 |     #   weasel
 47 | coverage==7.6.1
 48 |     # via pytest-cov
 49 | cymem==2.0.8
 50 |     # via
 51 |     #   preshed
 52 |     #   spacy
 53 |     #   thinc
 54 | datasets==2.21.0
 55 |     # via
 56 |     #   evaluation (pyproject.toml)
 57 |     #   mteb
 58 | decorator==5.1.1
 59 |     # via ipython
 60 | dill==0.3.8
 61 |     # via
 62 |     #   datasets
 63 |     #   multiprocess
 64 | distlib==0.3.8
 65 |     # via virtualenv
 66 | editdistance==0.8.1
 67 |     # via autofj
 68 | eval-type-backport==0.2.0
 69 |     # via mteb
 70 | exceptiongroup==1.2.2
 71 |     # via
 72 |     #   ipython
 73 |     #   pytest
 74 | executing==2.1.0
 75 |     # via stack-data
 76 | filelock==3.15.4
 77 |     # via
 78 |     #   datasets
 79 |     #   huggingface-hub
 80 |     #   torch
 81 |     #   transformers
 82 |     #   virtualenv
 83 | frozenlist==1.4.1
 84 |     # via
 85 |     #   aiohttp
 86 |     #   aiosignal
 87 | fsspec==2024.6.1
 88 |     # via
 89 |     #   datasets
 90 |     #   huggingface-hub
 91 |     #   pytorch-lightning
 92 |     #   torch
 93 | huggingface-hub==0.24.6
 94 |     # via
 95 |     #   evaluation (pyproject.toml)
 96 |     #   datasets
 97 |     #   sentence-transformers
 98 |     #   tokenizers
 99 |     #   transformers
100 | identify==2.6.0
101 |     # via pre-commit
102 | idna==3.8
103 |     # via
104 |     #   requests
105 |     #   yarl
106 | iniconfig==2.0.0
107 |     # via pytest
108 | ipython==8.27.0
109 |     # via evaluation (pyproject.toml)
110 | jedi==0.19.1
111 |     # via ipython
112 | jellyfish==1.1.0
113 |     # via autofj
114 | jinja2==3.1.4
115 |     # via
116 |     #   spacy
117 |     #   torch
118 | joblib==1.4.2
119 |     # via
120 |     #   nltk
121 |     #   scikit-learn
122 | langcodes==3.4.0
123 |     # via spacy
124 | language-data==1.2.0
125 |     # via langcodes
126 | lightning-utilities==0.11.6
127 |     # via
128 |     #   pytorch-lightning
129 |     #   torchmetrics
130 | marisa-trie==1.2.0
131 |     # via language-data
132 | markdown-it-py==3.0.0
133 |     # via rich
134 | markupsafe==2.1.5
135 |     # via jinja2
136 | matplotlib-inline==0.1.7
137 |     # via ipython
138 | mdurl==0.1.2
139 |     # via markdown-it-py
140 | mpmath==1.3.0
141 |     # via sympy
142 | mteb==1.14.15
143 |     # via evaluation (pyproject.toml)
144 | multidict==6.0.5
145 |     # via
146 |     #   aiohttp
147 |     #   yarl
148 | multiprocess==0.70.16
149 |     # via datasets
150 | murmurhash==1.0.10
151 |     # via
152 |     #   preshed
153 |     #   spacy
154 |     #   thinc
155 | mypy==1.11.2
156 |     # via evaluation (pyproject.toml)
157 | mypy-extensions==1.0.0
158 |     # via
159 |     #   black
160 |     #   mypy
161 | networkx==3.3
162 |     # via torch
163 | ngram==4.0.3
164 |     # via autofj
165 | nltk==3.9.1
166 |     # via autofj
167 | nodeenv==1.9.1
168 |     # via pre-commit
169 | numpy==1.26.4
170 |     # via
171 |     #   evaluation (pyproject.toml)
172 |     #   autofj
173 |     #   blis
174 |     #   datasets
175 |     #   mteb
176 |     #   pandas
177 |     #   pyarrow
178 |     #   reach
179 |     #   scikit-learn
180 |     #   scipy
181 |     #   sentence-transformers
182 |     #   spacy
183 |     #   thinc
184 |     #   torchmetrics
185 |     #   transformers
186 | packaging==24.1
187 |     # via
188 |     #   black
189 |     #   datasets
190 |     #   huggingface-hub
191 |     #   lightning-utilities
192 |     #   pytest
193 |     #   pytorch-lightning
194 |     #   spacy
195 |     #   thinc
196 |     #   torchmetrics
197 |     #   transformers
198 |     #   weasel
199 | pandas==2.2.2
200 |     # via
201 |     #   evaluation (pyproject.toml)
202 |     #   autofj
203 |     #   datasets
204 | parso==0.8.4
205 |     # via jedi
206 | pathspec==0.12.1
207 |     # via black
208 | pexpect==4.9.0
209 |     # via ipython
210 | pillow==10.4.0
211 |     # via sentence-transformers
212 | platformdirs==4.2.2
213 |     # via
214 |     #   black
215 |     #   virtualenv
216 | pluggy==1.5.0
217 |     # via pytest
218 | polars==1.6.0
219 |     # via mteb
220 | pre-commit==3.8.0
221 |     # via evaluation (pyproject.toml)
222 | preshed==3.0.9
223 |     # via
224 |     #   spacy
225 |     #   thinc
226 | prompt-toolkit==3.0.47
227 |     # via ipython
228 | ptyprocess==0.7.0
229 |     # via pexpect
230 | pure-eval==0.2.3
231 |     # via stack-data
232 | pyarrow==17.0.0
233 |     # via datasets
234 | pydantic==2.8.2
235 |     # via
236 |     #   confection
237 |     #   mteb
238 |     #   spacy
239 |     #   thinc
240 |     #   weasel
241 | pydantic-core==2.20.1
242 |     # via pydantic
243 | pygments==2.18.0
244 |     # via
245 |     #   ipython
246 |     #   rich
247 | pytest==8.3.2
248 |     # via
249 |     #   evaluation (pyproject.toml)
250 |     #   pytest-cov
251 | pytest-cov==5.0.0
252 |     # via pytest-cover
253 | pytest-cover==3.0.0
254 |     # via pytest-coverage
255 | pytest-coverage==0.0
256 |     # via evaluation (pyproject.toml)
257 | python-dateutil==2.9.0.post0
258 |     # via pandas
259 | pytorch-lightning==2.4.0
260 |     # via evaluation (pyproject.toml)
261 | pytrec-eval-terrier==0.5.6
262 |     # via mteb
263 | pytz==2024.1
264 |     # via pandas
265 | pyyaml==6.0.2
266 |     # via
267 |     #   datasets
268 |     #   huggingface-hub
269 |     #   pre-commit
270 |     #   pytorch-lightning
271 |     #   transformers
272 | reach==4.1.1
273 |     # via evaluation (pyproject.toml)
274 | regex==2024.7.24
275 |     # via
276 |     #   nltk
277 |     #   transformers
278 | requests==2.32.3
279 |     # via
280 |     #   datasets
281 |     #   huggingface-hub
282 |     #   mteb
283 |     #   spacy
284 |     #   transformers
285 |     #   weasel
286 | rich==13.8.0
287 |     # via
288 |     #   mteb
289 |     #   typer
290 | ruff==0.6.3
291 |     # via evaluation (pyproject.toml)
292 | safetensors==0.4.4
293 |     # via transformers
294 | scikit-learn==1.5.1
295 |     # via
296 |     #   evaluation (pyproject.toml)
297 |     #   mteb
298 |     #   sentence-transformers
299 | scipy==1.14.1
300 |     # via
301 |     #   evaluation (pyproject.toml)
302 |     #   mteb
303 |     #   scikit-learn
304 |     #   sentence-transformers
305 | sentence-transformers==3.0.1
306 |     # via
307 |     #   evaluation (pyproject.toml)
308 |     #   mteb
309 | setuptools==74.0.0
310 |     # via
311 |     #   lightning-utilities
312 |     #   marisa-trie
313 |     #   spacy
314 |     #   thinc
315 | shellingham==1.5.4
316 |     # via typer
317 | six==1.16.0
318 |     # via
319 |     #   asttokens
320 |     #   python-dateutil
321 | smart-open==7.0.4
322 |     # via weasel
323 | spacy==3.7.6
324 |     # via autofj
325 | spacy-legacy==3.0.12
326 |     # via spacy
327 | spacy-loggers==1.0.5
328 |     # via spacy
329 | srsly==2.4.8
330 |     # via
331 |     #   confection
332 |     #   spacy
333 |     #   thinc
334 |     #   weasel
335 | stack-data==0.6.3
336 |     # via ipython
337 | sympy==1.13.2
338 |     # via torch
339 | thinc==8.2.5
340 |     # via spacy
341 | threadpoolctl==3.5.0
342 |     # via scikit-learn
343 | tokenizers==0.19.1
344 |     # via transformers
345 | tomli==2.0.1
346 |     # via
347 |     #   black
348 |     #   coverage
349 |     #   mypy
350 |     #   pytest
351 | torch==2.4.0
352 |     # via
353 |     #   evaluation (pyproject.toml)
354 |     #   mteb
355 |     #   pytorch-lightning
356 |     #   sentence-transformers
357 |     #   torchmetrics
358 | torchmetrics==1.4.1
359 |     # via pytorch-lightning
360 | tqdm==4.66.5
361 |     # via
362 |     #   datasets
363 |     #   huggingface-hub
364 |     #   mteb
365 |     #   nltk
366 |     #   pytorch-lightning
367 |     #   reach
368 |     #   sentence-transformers
369 |     #   spacy
370 |     #   transformers
371 | traitlets==5.14.3
372 |     # via
373 |     #   ipython
374 |     #   matplotlib-inline
375 | transformers==4.44.2
376 |     # via sentence-transformers
377 | typer==0.12.5
378 |     # via
379 |     #   spacy
380 |     #   weasel
381 | typing-extensions==4.12.2
382 |     # via
383 |     #   black
384 |     #   cloudpathlib
385 |     #   huggingface-hub
386 |     #   ipython
387 |     #   lightning-utilities
388 |     #   mteb
389 |     #   mypy
390 |     #   pydantic
391 |     #   pydantic-core
392 |     #   pytorch-lightning
393 |     #   torch
394 |     #   typer
395 | tzdata==2024.1
396 |     # via pandas
397 | urllib3==2.2.2
398 |     # via requests
399 | virtualenv==20.26.3
400 |     # via pre-commit
401 | wasabi==1.1.3
402 |     # via
403 |     #   spacy
404 |     #   thinc
405 |     #   weasel
406 | wcwidth==0.2.13
407 |     # via prompt-toolkit
408 | weasel==0.4.1
409 |     # via spacy
410 | wrapt==1.16.0
411 |     # via smart-open
412 | xxhash==3.5.0
413 |     # via datasets
414 | yarl==1.9.7
415 |     # via aiohttp
416 | 


--------------------------------------------------------------------------------
/tests/conftest.py:
--------------------------------------------------------------------------------
 1 | from typing import Any, Sequence
 2 | from unittest.mock import create_autospec
 3 | 
 4 | import pytest
 5 | import torch
 6 | from mteb.encoder_interface import Encoder
 7 | from mteb.model_meta import ModelMeta
 8 | 
 9 | 
10 | @pytest.fixture
11 | def mock_encoder() -> Encoder:
12 |     """Return a mock encoder that follows the Encoder protocol."""
13 |     mock_encoder = create_autospec(Encoder, instance=True)
14 | 
15 |     # Mock the encode method
16 |     def mock_encode(sentences: Sequence[str], prompt_name: str | None = None, **kwargs: Any) -> torch.Tensor:
17 |         """Return random embeddings for the sentence."""
18 |         return torch.rand(len(sentences), 300)
19 | 
20 |     # Set the side effect of the mock
21 |     mock_encoder.encode.side_effect = mock_encode
22 | 
23 |     # Set the model meta
24 |     mock_encoder.mteb_model_meta = ModelMeta(
25 |         name="mock_model_name", revision="mock_revision", release_date=None, languages=None
26 |     )
27 | 
28 |     return mock_encoder
29 | 


--------------------------------------------------------------------------------
/tests/test_evaluation.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | 
 3 | import pytest
 4 | from mteb.encoder_interface import Encoder
 5 | 
 6 | from evaluation import CustomMTEB, TaskType, get_tasks
 7 | 
 8 | 
 9 | def test_evaluation(mock_encoder: Encoder, tmp_path: Path) -> None:
10 |     """Test the evaluation with the CustomMTEB class."""
11 |     tasks = get_tasks([TaskType.WORDSIM])
12 |     evaluation = CustomMTEB(tasks)
13 |     results = evaluation.run(mock_encoder, eval_splits=["test"], output_folder=tmp_path)
14 | 
15 |     # Assert that the number of tasks and results are the same and that the results folder exists
16 |     assert len(tasks) == len(results), "The number of tasks and results should be the same."
17 |     assert (tmp_path).exists(), "The results folder should exist."
18 | 
19 |     # Assert that the results folder contains the results for all tasks
20 |     task_names = [task.metadata.name for task in tasks]
21 |     result_folder = tmp_path / mock_encoder.mteb_model_meta.name / mock_encoder.mteb_model_meta.revision
22 | 
23 |     assert all(
24 |         (result_folder / f"{task_name}.json").exists() for task_name in task_names
25 |     ), "All result files for the specified tasks should exist."
26 | 
27 |     # Ensure that get_tasks without any arguments works
28 |     get_tasks()
29 | 
30 |     # Ensure that get_tasks with a string works
31 |     get_tasks(["WordSim"])
32 | 
33 |     # Ensure that get_tasks with a non-existent task name raises an error
34 |     with pytest.raises(ValueError):
35 |         get_tasks(["non_existent_task"])
36 | 


--------------------------------------------------------------------------------
/tests/test_summarize.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | 
 3 | from mteb.encoder_interface import Encoder
 4 | 
 5 | from evaluation import (
 6 |     CustomMTEB,
 7 |     TaskType,
 8 |     get_tasks,
 9 |     load_results,
10 |     make_leaderboard,
11 |     parse_mteb_results,
12 |     summarize_results,
13 | )
14 | 
15 | 
16 | def test_summarize(mock_encoder: Encoder, tmp_path: Path) -> None:
17 |     """Test the summarization of the evaluation results."""
18 |     task_types = [task.value for task in TaskType]
19 | 
20 |     # Get the specified tasks and results
21 |     tasks = get_tasks([TaskType.WORDSIM])
22 |     evaluation = CustomMTEB(tasks)
23 |     results = evaluation.run(mock_encoder, eval_splits=["test"], output_folder=tmp_path)
24 | 
25 |     # Set the model name
26 |     model_name = f"{mock_encoder.mteb_model_meta.name}_{mock_encoder.mteb_model_meta.revision}"
27 | 
28 |     # Test option 1: Parse the results into a custom ResultSet format
29 |     parsed_results = parse_mteb_results(mteb_results=results, model_name=model_name)
30 |     model_scores = summarize_results(parsed_results)
31 |     # Assert that all the task_types exist as keys in the model_scores
32 |     assert all(task in model_scores[model_name]["task_means"].keys() for task in task_types)
33 |     # Assert that every task_type has the mock_encoder name as a key
34 |     assert model_name in model_scores
35 |     # Ensure that print_leaderboard works
36 |     make_leaderboard(model_scores)
37 | 
38 |     # Test option 2: Load all results from the output folder
39 |     results = load_results(tmp_path)
40 |     model_scores = summarize_results(results)
41 |     # Assert that all the task_types exist as keys in the model_scores
42 |     assert all(task in model_scores[model_name]["task_means"].keys() for task in task_types)
43 |     # Assert that every task_type has the mock_encoder name as a key
44 |     assert model_name in model_scores
45 |     # Ensure that print_leaderboard works
46 |     make_leaderboard(model_scores)
47 | 
48 |     # Test option 3: load a specific folder
49 |     result_folder = tmp_path / mock_encoder.mteb_model_meta.name / mock_encoder.mteb_model_meta.revision
50 |     results = load_results(result_folder)
51 |     model_scores = summarize_results(results)
52 |     # Assert that all the task_types exist as keys in the model_scores
53 |     assert all(task in model_scores[model_name]["task_means"].keys() for task in task_types)
54 |     # Assert that every task_type has the mock_encoder name as a key
55 |     assert model_name in model_scores
56 |     # Ensure that print_leaderboard works
57 |     make_leaderboard(model_scores)
58 | 


--------------------------------------------------------------------------------