├── tests ├── __init__.py ├── data │ └── LFW.npz ├── test_utils.py ├── test_metrics.py ├── test_experiment_real_data.py └── test_evalify.py ├── docs ├── api.md ├── authors.md ├── history.md ├── index.md ├── contributing.md ├── usage.md └── installation.md ├── codecov.yml ├── evalify ├── __init__.py ├── utils.py ├── metrics.py └── evalify.py ├── .coveragerc ├── .github ├── ISSUE_TEMPLATE.md └── workflows │ ├── release.yml │ ├── dev.yml │ └── codeql-analysis.yml ├── CITATION.cff ├── HISTORY.md ├── AUTHORS.md ├── tox.ini ├── examples └── LFW.py ├── LICENSE ├── mkdocs.yml ├── .gitignore ├── pyproject.toml ├── CONTRIBUTING.md └── README.md /tests/__init__.py: -------------------------------------------------------------------------------- 1 | """Unit test package for evalify.""" 2 | -------------------------------------------------------------------------------- /docs/api.md: -------------------------------------------------------------------------------- 1 | ::: evalify.evalify 2 | handler: python 3 | -------------------------------------------------------------------------------- /docs/authors.md: -------------------------------------------------------------------------------- 1 | {% 2 | include-markdown "../AUTHORS.md" 3 | %} -------------------------------------------------------------------------------- /docs/history.md: -------------------------------------------------------------------------------- 1 | {% 2 | include-markdown "../HISTORY.md" 3 | %} -------------------------------------------------------------------------------- /docs/index.md: -------------------------------------------------------------------------------- 1 | {% 2 | include-markdown "../README.md" 3 | %} 4 | -------------------------------------------------------------------------------- /docs/contributing.md: -------------------------------------------------------------------------------- 1 | {% 2 | include-markdown "../CONTRIBUTING.md" 3 | %} -------------------------------------------------------------------------------- /tests/data/LFW.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ma7555/evalify/HEAD/tests/data/LFW.npz -------------------------------------------------------------------------------- /codecov.yml: -------------------------------------------------------------------------------- 1 | coverage: 2 | status: 3 | project: 4 | default: 5 | target: 90% 6 | patch: 7 | default: 8 | target: 85% 9 | -------------------------------------------------------------------------------- /evalify/__init__.py: -------------------------------------------------------------------------------- 1 | """Top-level package for evalify.""" 2 | 3 | from evalify.evalify import Experiment as Experiment 4 | 5 | __author__ = """Mahmoud Bahaa""" 6 | __email__ = "evalify@ma7555.anonaddy.com" 7 | __version__ = "0.1.0" 8 | -------------------------------------------------------------------------------- /.coveragerc: -------------------------------------------------------------------------------- 1 | [run] 2 | # uncomment the following to omit files during running 3 | #omit = 4 | [report] 5 | exclude_lines = 6 | pragma: no cover 7 | def __repr__ 8 | if self.debug: 9 | if settings.DEBUG 10 | raise AssertionError 11 | raise NotImplementedError 12 | if 0: 13 | if __name__ == .__main__.: 14 | def main 15 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | * evalify version: 2 | * Python version: 3 | * Operating System: 4 | 5 | ### Description 6 | 7 | Describe what you were trying to get done. 8 | Tell us what happened, what went wrong, and what you expected to happen. 9 | 10 | ### What I Did 11 | 12 | ``` 13 | Paste the command(s) you ran and the output. 14 | If there was a crash, please include the traceback here. 15 | ``` 16 | -------------------------------------------------------------------------------- /CITATION.cff: -------------------------------------------------------------------------------- 1 | cff-version: 1.2.0 2 | title: evalify 3 | message: " If you use this software, please cite it using the metadata from this file." 4 | type: software 5 | authors: 6 | - given-names: Mahmoud 7 | family-names: Bahaa 8 | email: evalify@ma7555.anonaddy.com 9 | affiliation: Nile University 10 | orcid: "https://orcid.org/0000-0001-8688-6495" 11 | doi: 10.5281/zenodo.6181723 12 | date-released: 2022-02-20 13 | -------------------------------------------------------------------------------- /HISTORY.md: -------------------------------------------------------------------------------- 1 | # History 2 | 3 | ## 0.1.0 (2022-02-20) 4 | 5 | * First release on PyPI. 6 | 7 | ## 0.1.1 (2022-02-22) 8 | 9 | * Run time enhancement. 10 | 11 | ## 0.1.2 (2022-02-23) 12 | 13 | * Various enhancements and refactoring. 14 | 15 | ## 0.1.3 (2022-02-24) 16 | 17 | * Add pearson similarity as a metric 18 | 19 | ## 0.1.4 (2022-02-24) 20 | 21 | * Add EER calculation function. 22 | * Drop support for python 3.7 23 | 24 | ## 1.0.0 (2024-11-08) 25 | 26 | * Bump dependencies. 27 | * Drop support for python 3.8 28 | * Add support for TAR @ FAR -------------------------------------------------------------------------------- /AUTHORS.md: -------------------------------------------------------------------------------- 1 | # Credits 2 | 3 | ## Development Lead 4 | 5 | * Mahmoud Bahaa 6 | 7 | ## Contributors 8 | 9 | None yet. Why not be the first? 10 | 11 | ## Others 12 | * This package was created with [Cookiecutter](https://github.com/audreyr/cookiecutter) and the [zillionare/cookiecutter-pypackage](https://github.com/zillionare/cookiecutter-pypackage) project template. 13 | 14 | * Logo was created using font [GlacialIndifference-Regular](https://hanken.co/product/hk-grotesk/) by [Hanken Design Co.](https://hanken.co/) 15 | * Logo icon designed by Mauro Lucchesi 16 | -------------------------------------------------------------------------------- /tox.ini: -------------------------------------------------------------------------------- 1 | [tox] 2 | isolated_build = true 3 | envlist = py39, py310, py311, py312, lint 4 | 5 | [gh-actions] 6 | python = 7 | 3.12: py312 8 | 3.11: py311 9 | 3.10: py310 10 | 3.9: py39 11 | 12 | [testenv:lint] 13 | allowlist_externals = 14 | python 15 | deps = 16 | .[test, doc, dev] 17 | commands = 18 | python -m ruff check evalify tests --fix 19 | python -m poetry build 20 | python -m mkdocs build 21 | python -m twine check dist/* 22 | 23 | [testenv] 24 | allowlist_externals = pytest 25 | setenv = 26 | PYTHONPATH = {toxinidir} 27 | PYTHONWARNINGS = ignore 28 | deps = 29 | .[test] 30 | commands = 31 | pytest -s --cov=evalify --cov-append --cov-report=xml --cov-report term-missing tests 32 | -------------------------------------------------------------------------------- /docs/usage.md: -------------------------------------------------------------------------------- 1 | # Usage 2 | 3 | To use evalify in a project 4 | 5 | ```python 6 | import numpy as np 7 | from evalify import Experiment 8 | 9 | rng = np.random.default_rng() 10 | nphotos = 500 11 | emb_size = 32 12 | nclasses = 10 13 | X = rng.random((self.nphotos, self.emb_size)) 14 | y = rng.integers(self.nclasses, size=self.nphotos) 15 | 16 | experiment = Experiment() 17 | experiment.run(X, y) 18 | experiment.get_roc_auc() 19 | print(experiment.df.roc_auc) 20 | ``` 21 | 22 | For a working experiment using real face embeddings, please refer to `LFW.py` under `./examples`. 23 | 24 | ```python 25 | python ./examples/LFW.py 26 | ``` 27 | ``` 28 | Total available embeddings 2921 resulted in 4264660 samples for the experiment. 29 | Metrics calculations executed in 24.05 seconds 30 | ROC AUC: 31 | OrderedDict([('euclidean_distance', 0.9991302819624498), ('cosine_distance', 0.9991302818953706), ('euclidean_distance_l2', 0.9991302818953706), ('manhattan_distance', 0.9991260462584446)]) 32 | ``` 33 | -------------------------------------------------------------------------------- /examples/LFW.py: -------------------------------------------------------------------------------- 1 | """ File LFW.npz contains sample embeddings and targets from LFW dataset""" 2 | 3 | from pathlib import Path 4 | import time 5 | import numpy as np 6 | 7 | from evalify import Experiment 8 | 9 | lfw_npz = Path(__file__).parent.parent / Path("tests/data/LFW.npz") 10 | X_y_array = np.load(lfw_npz) 11 | X = X_y_array["X"][:1000] 12 | y = X_y_array["y"][:1000] 13 | 14 | experiment = Experiment( 15 | metrics=( 16 | "cosine_similarity", 17 | "pearson_similarity", 18 | "euclidean_distance_l2", 19 | ), 20 | same_class_samples="full", 21 | different_class_samples=("full", "full"), 22 | ) 23 | start_time = time.time() 24 | print("Starting Experiment") 25 | experiment.run(X, y) 26 | print( 27 | f"Total available embeddings {len(y)} resulted in {len(experiment.df)} " 28 | "samples for the experiment." 29 | ) 30 | print(f"Metrics calculations executed in {time.time()-start_time:.2f} seconds") 31 | print("ROC AUC:") 32 | print(experiment.roc_auc()) 33 | print("threshold @ FPR:") 34 | print(experiment.threshold_at_fpr(0.01)) 35 | print("EER:") 36 | print(experiment.eer()) 37 | print("TAR@FAR:") 38 | print(experiment.tar_at_far([0.01, 0.001])) 39 | -------------------------------------------------------------------------------- /evalify/utils.py: -------------------------------------------------------------------------------- 1 | """Evalify utils module contains various utilites serving other modules.""" 2 | 3 | import numpy as np 4 | import psutil 5 | 6 | GB_TO_BYTE = 1024**3 7 | 8 | 9 | def _validate_vectors(X, y): 10 | X = np.asarray(X, dtype=np.float32) 11 | y = np.asarray(y, dtype=np.int32).squeeze() 12 | if X.ndim != 2: 13 | msg = "Embeddings vector should be 2-D." 14 | raise ValueError(msg) 15 | if y.ndim != 1: 16 | msg = "Target vector should be 1-D." 17 | raise ValueError(msg) 18 | return X, y 19 | 20 | 21 | def _calc_available_memory(): 22 | """Calculate available memory in system.""" 23 | mem = psutil.virtual_memory() 24 | return mem[1] 25 | 26 | 27 | def calculate_best_batch_size(X, available_mem=None): 28 | """Calculate maximum rows to fetch per batch without going out of memory. 29 | 30 | We need 3 big arrays to be held in memory (A, B, A*B) 31 | """ 32 | available_mem = _calc_available_memory() if available_mem is None else available_mem 33 | if available_mem > 2 * GB_TO_BYTE: 34 | max_total_rows = np.floor(available_mem - GB_TO_BYTE / X[0].nbytes) 35 | return max_total_rows // 3 36 | max_total_rows = np.floor(available_mem / X[0].nbytes) 37 | return max_total_rows // 5 38 | -------------------------------------------------------------------------------- /docs/installation.md: -------------------------------------------------------------------------------- 1 | # Installation 2 | 3 | ## Stable release 4 | 5 | To install evalify, run this command in your 6 | terminal: 7 | 8 | ```bash 9 | pip install evalify 10 | ``` 11 | 12 | This is the preferred method to install evalify, as it will always install the most recent stable release. 13 | 14 | If you don't have [pip][] installed, this [Python installation guide][] 15 | can guide you through the process. 16 | 17 | ## From source 18 | 19 | The source for evalify can be downloaded from 20 | the [Github repo][]. 21 | 22 | You can either clone the public repository: 23 | 24 | ```bash 25 | git clone git://github.com/ma7555/evalify 26 | ``` 27 | 28 | Or download the [tarball][]: 29 | 30 | ```bash 31 | curl -OJL https://github.com/ma7555/evalify/tarball/master 32 | ``` 33 | 34 | Once you have a copy of the source, you can install it with: 35 | 36 | ```bash 37 | pip install . 38 | ``` 39 | 40 | [pip]: https://pip.pypa.io 41 | [Python installation guide]: http://docs.python-guide.org/en/latest/starting/installation/ 42 | [Github repo]: https://github.com/%7B%7B%20cookiecutter.github_username%20%7D%7D/%7B%7B%20cookiecutter.project_slug%20%7D%7D 43 | [tarball]: https://github.com/%7B%7B%20cookiecutter.github_username%20%7D%7D/%7B%7B%20cookiecutter.project_slug%20%7D%7D/tarball/master 44 | 45 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | BSD 3-Clause License 2 | 3 | 4 | Copyright (c) 2022, Mahmoud Bahaa 5 | All rights reserved. 6 | 7 | Redistribution and use in source and binary forms, with or without modification, 8 | are permitted provided that the following conditions are met: 9 | 10 | 1. Redistributions of source code must retain the above copyright notice, this 11 | list of conditions and the following disclaimer. 12 | 13 | 2. Redistributions in binary form must reproduce the above copyright notice, this 14 | list of conditions and the following disclaimer in the documentation and/or 15 | other materials provided with the distribution. 16 | 17 | 3. Neither the name of the copyright holder nor the names of its 18 | contributors may be used to endorse or promote products derived from this 19 | software without specific prior written permission. 20 | 21 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 22 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 23 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 24 | IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, 25 | INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 26 | BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 27 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 28 | OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE 29 | OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED 30 | OF THE POSSIBILITY OF SUCH DAMAGE. -------------------------------------------------------------------------------- /tests/test_utils.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | """Tests for `evalify` package.""" 4 | import unittest 5 | 6 | import numpy as np 7 | 8 | from evalify import utils 9 | 10 | 11 | class TestUtils(unittest.TestCase): 12 | """Tests for `evalify` package.""" 13 | 14 | def setUp(self): 15 | """Set up test fixtures, if any.""" 16 | self.rng = np.random.default_rng(555) 17 | self.nphotos = 100 18 | self.emb_size = 8 19 | self.nclasses = 10 20 | self.embs = self.rng.random((self.nphotos, self.emb_size), dtype=np.float32) 21 | self.targets = self.rng.integers(self.nclasses, size=self.nphotos) 22 | 23 | def tearDown(self): 24 | """Tear down test fixtures, if any.""" 25 | 26 | def test_validate_vectors(self): 27 | """Test _validate_vectors""" 28 | embs = self.embs.tolist() 29 | targets = self.targets.tolist() 30 | X, y = utils._validate_vectors(embs, targets) 31 | self.assertEqual(X.shape, (self.nphotos, self.emb_size)) 32 | self.assertEqual(y.shape, (self.nphotos,)) 33 | 34 | def test_calculate_best_batch_size(self): 35 | """Test calculate_best_batch_size""" 36 | batch_size = utils.calculate_best_batch_size(self.embs, 4 * utils.GB_TO_BYTE) 37 | self.assertEqual(batch_size, 1420470954) 38 | 39 | def test_run_errors(self): 40 | """Test run errors""" 41 | with self.assertRaisesRegex(ValueError, "Embeddings vector should be 2-D."): 42 | _ = utils._validate_vectors( 43 | X=self.rng.random(5), y=self.rng.integers(10, size=5), 44 | ) 45 | with self.assertRaisesRegex(ValueError, "Target vector should be 1-D."): 46 | _ = utils._validate_vectors( 47 | X=self.rng.random((5, 5)), y=self.rng.integers(10, size=(5, 2)), 48 | ) 49 | -------------------------------------------------------------------------------- /mkdocs.yml: -------------------------------------------------------------------------------- 1 | site_name: evalify 2 | repo_url: https://github.com/ma7555/evalify 3 | repo_name: evalify 4 | nav: 5 | - home: index.md 6 | - installation: installation.md 7 | - usage: usage.md 8 | - modules: api.md 9 | - contributing: contributing.md 10 | - authors: authors.md 11 | - history: history.md 12 | theme: 13 | name: material 14 | language: en 15 | logo: https://user-images.githubusercontent.com/7144929/154332210-fa1fee34-faae-4567-858a-49fa53e99a2b.svg 16 | palette: 17 | - media: "(prefers-color-scheme: light)" 18 | scheme: default 19 | toggle: 20 | icon: material/weather-night 21 | name: Switch to dark mode 22 | - media: "(prefers-color-scheme: dark)" 23 | scheme: slate 24 | toggle: 25 | icon: material/weather-sunny 26 | name: Switch to light mode 27 | features: 28 | - navigation.indexes 29 | - navigation.tabs 30 | - navigation.instant 31 | - navigation.tabs.sticky 32 | markdown_extensions: 33 | - pymdownx.emoji: 34 | emoji_index: !!python/name:material.extensions.emoji.twemoji 35 | emoji_generator: !!python/name:material.extensions.emoji.to_svg 36 | - pymdownx.critic 37 | - pymdownx.caret 38 | - pymdownx.mark 39 | - pymdownx.tilde 40 | - pymdownx.tabbed 41 | - attr_list 42 | - pymdownx.arithmatex: 43 | generic: true 44 | - pymdownx.highlight: 45 | linenums: true 46 | - pymdownx.superfences 47 | - pymdownx.details 48 | - admonition 49 | - toc: 50 | baselevel: 2 51 | permalink: true 52 | - meta 53 | plugins: 54 | - include-markdown 55 | - search: 56 | lang: en 57 | - mkdocstrings 58 | extra: 59 | social: 60 | - icon: fontawesome/brands/github 61 | link: https://github.com/ma7555/evalify 62 | name: Github 63 | - icon: material/email 64 | link: "mailto:evalify@ma7555.anonaddy.com" 65 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | env/ 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *.cover 47 | .hypothesis/ 48 | .pytest_cache/ 49 | 50 | # Translations 51 | *.mo 52 | *.pot 53 | 54 | # Django stuff: 55 | *.log 56 | local_settings.py 57 | 58 | # Flask stuff: 59 | instance/ 60 | .webassets-cache 61 | 62 | # Scrapy stuff: 63 | .scrapy 64 | 65 | # Sphinx documentation 66 | docs/_build/ 67 | 68 | # PyBuilder 69 | target/ 70 | 71 | # Jupyter Notebook 72 | .ipynb_checkpoints 73 | 74 | # pyenv 75 | .python-version 76 | 77 | # celery beat schedule file 78 | celerybeat-schedule 79 | 80 | # SageMath parsed files 81 | *.sage.py 82 | 83 | # dotenv 84 | .env 85 | 86 | # virtualenv 87 | .venv 88 | venv/ 89 | ENV/ 90 | 91 | # Spyder project settings 92 | .spyderproject 93 | .spyproject 94 | 95 | # Rope project settings 96 | .ropeproject 97 | 98 | # mkdocs documentation 99 | /site 100 | 101 | # mypy 102 | .mypy_cache/ 103 | 104 | # IDE settings 105 | .vscode/ 106 | 107 | # mkdocs build dir 108 | site/ 109 | 110 | # logo 111 | logo/ 112 | poetry.lock 113 | .ruff_cache/ 114 | -------------------------------------------------------------------------------- /.github/workflows/release.yml: -------------------------------------------------------------------------------- 1 | name: release & publish workflow 2 | 3 | on: 4 | push: 5 | tags: 6 | - "v1.*.*" 7 | 8 | workflow_dispatch: 9 | 10 | jobs: 11 | release: 12 | name: Create Release 13 | runs-on: ubuntu-latest 14 | 15 | strategy: 16 | matrix: 17 | python-versions: [3.12] 18 | 19 | steps: 20 | - name: Checks-out 21 | uses: actions/checkout@v4 22 | - name: "Build Changelog" 23 | id: build_changelog 24 | uses: mikepenz/release-changelog-builder-action@v5.0.0 25 | env: 26 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 27 | - uses: actions/setup-python@v5 28 | with: 29 | python-version: ${{ matrix.python-versions }} 30 | - name: Install dependencies 31 | run: | 32 | python -m pip install --upgrade pip 33 | pip install tox-gh-actions poetry 34 | 35 | - name: pre-publish documentation 36 | run: | 37 | poetry install -E doc 38 | poetry run mkdocs build 39 | 40 | - name: publish documentation 41 | uses: peaceiris/actions-gh-pages@v4 42 | with: 43 | github_token: ${{ secrets.GITHUB_TOKEN }} 44 | publish_dir: ./site 45 | 46 | - name: Build wheels and source tarball 47 | run: >- 48 | poetry build 49 | 50 | - name: show temporary files 51 | run: >- 52 | ls -l 53 | 54 | - name: create github release 55 | id: create_release 56 | uses: softprops/action-gh-release@v2.0.9 57 | env: 58 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 59 | with: 60 | body: ${{steps.build_changelog.outputs.changelog}} 61 | # body_path: ./CHANGELOG.md 62 | files: dist/*.whl 63 | draft: false 64 | prerelease: false 65 | 66 | - name: create pypi release 67 | uses: pypa/gh-action-pypi-publish@v1.12.2 68 | with: 69 | user: __token__ 70 | password: ${{ secrets.PYPI_API_TOKEN }} -------------------------------------------------------------------------------- /.github/workflows/dev.yml: -------------------------------------------------------------------------------- 1 | name: build 2 | 3 | on: 4 | push: 5 | branches: [main] 6 | pull_request: 7 | branches: [main] 8 | 9 | workflow_dispatch: 10 | 11 | jobs: 12 | test: 13 | strategy: 14 | matrix: 15 | python-versions: ["3.9", "3.10", "3.11", "3.12"] 16 | os: [ubuntu-latest, macos-latest, windows-latest] 17 | runs-on: ${{ matrix.os }} 18 | 19 | steps: 20 | - uses: actions/checkout@v4 21 | - uses: actions/setup-python@v5 22 | with: 23 | python-version: ${{ matrix.python-versions }} 24 | 25 | - name: Install dependencies 26 | run: | 27 | python -m pip install --upgrade pip 28 | pip install poetry tox tox-gh-actions 29 | 30 | - name: test with tox 31 | run: tox 32 | 33 | - name: list files 34 | run: ls -l . 35 | 36 | publish_dev_build: 37 | needs: test 38 | runs-on: ubuntu-latest 39 | steps: 40 | - uses: actions/checkout@v4 41 | - uses: actions/setup-python@v5 42 | with: 43 | python-version: 3.12 44 | 45 | - name: Install dependencies 46 | run: | 47 | python -m pip install --upgrade pip 48 | pip install poetry tox tox-gh-actions 49 | 50 | - name: test with tox 51 | run: tox 52 | 53 | - name: list files 54 | run: ls -l . 55 | 56 | - uses: codecov/codecov-action@v4 57 | with: 58 | fail_ci_if_error: false 59 | files: coverage.xml 60 | token: ${{ secrets.CODECOV_TOKEN }} 61 | - name: Build wheels and source tarball 62 | run: | 63 | poetry version $(poetry version --short)-dev.$GITHUB_RUN_NUMBER 64 | poetry version --short 65 | poetry build 66 | 67 | - name: publish to Test PyPI 68 | uses: pypa/gh-action-pypi-publish@v1.12.2 69 | with: 70 | user: __token__ 71 | password: ${{ secrets.TEST_PYPI_API_TOKEN}} 72 | repository-url: https://test.pypi.org/legacy/ 73 | skip-existing: true 74 | -------------------------------------------------------------------------------- /.github/workflows/codeql-analysis.yml: -------------------------------------------------------------------------------- 1 | # For most projects, this workflow file will not need changing; you simply need 2 | # to commit it to your repository. 3 | # 4 | # You may wish to alter this file to override the set of languages analyzed, 5 | # or to provide custom queries or build logic. 6 | # 7 | # ******** NOTE ******** 8 | # We have attempted to detect the languages in your repository. Please check 9 | # the `language` matrix defined below to confirm you have the correct set of 10 | # supported CodeQL languages. 11 | # 12 | name: "CodeQL" 13 | 14 | on: 15 | push: 16 | branches: [ main ] 17 | pull_request: 18 | # The branches below must be a subset of the branches above 19 | branches: [ main ] 20 | schedule: 21 | - cron: '41 19 * * 2' 22 | 23 | jobs: 24 | analyze: 25 | name: Analyze 26 | runs-on: ubuntu-latest 27 | permissions: 28 | actions: read 29 | contents: read 30 | security-events: write 31 | 32 | strategy: 33 | fail-fast: false 34 | matrix: 35 | language: [ 'python' ] 36 | # CodeQL supports [ 'cpp', 'csharp', 'go', 'java', 'javascript', 'python', 'ruby' ] 37 | # Learn more about CodeQL language support at https://git.io/codeql-language-support 38 | 39 | steps: 40 | - name: Checkout repository 41 | uses: actions/checkout@v2 42 | 43 | # Initializes the CodeQL tools for scanning. 44 | - name: Initialize CodeQL 45 | uses: github/codeql-action/init@v1 46 | with: 47 | languages: ${{ matrix.language }} 48 | # If you wish to specify custom queries, you can do so here or in a config file. 49 | # By default, queries listed here will override any specified in a config file. 50 | # Prefix the list here with "+" to use these queries and those in the config file. 51 | # queries: ./path/to/local/query, your-org/your-repo/queries@main 52 | 53 | # Autobuild attempts to build any compiled languages (C/C++, C#, or Java). 54 | # If this step fails, then you should remove it and run the build manually (see below) 55 | - name: Autobuild 56 | uses: github/codeql-action/autobuild@v1 57 | 58 | # ℹ️ Command-line programs to run using the OS shell. 59 | # 📚 https://git.io/JvXDl 60 | 61 | # ✏️ If the Autobuild fails above, remove it and uncomment the following three lines 62 | # and modify them (or add more) to build your code if your project 63 | # uses a compiled language 64 | 65 | #- run: | 66 | # make bootstrap 67 | # make release 68 | 69 | - name: Perform CodeQL Analysis 70 | uses: github/codeql-action/analyze@v1 71 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.poetry] 2 | name = "evalify" 3 | version = "1.0.0" 4 | homepage = "https://github.com/ma7555/evalify" 5 | description = "Evaluate your face or voice verification models literally in seconds." 6 | authors = ["Mahmoud Bahaa "] 7 | keywords = ["biometric verification", "biometric authentication", "evaluation"] 8 | readme = "README.md" 9 | license = "BSD-3-Clause" 10 | classifiers = [ 11 | "Development Status :: 4 - Beta", 12 | "Intended Audience :: Developers", 13 | "License :: OSI Approved :: BSD License", 14 | "Natural Language :: English", 15 | "Programming Language :: Python :: 3", 16 | "Programming Language :: Python :: 3.9", 17 | "Programming Language :: Python :: 3.10", 18 | "Programming Language :: Python :: 3.11", 19 | "Programming Language :: Python :: 3.12", 20 | ] 21 | 22 | packages = [ 23 | { include = "evalify" }, 24 | ] 25 | 26 | [tool.poetry.dependencies] 27 | python = ">=3.9,<4.0" 28 | pandas = "^2.0.0" 29 | numpy = "^2.0.0" 30 | psutil = "^5.9.0" 31 | scikit-learn = "^1.2.0" 32 | 33 | # Optional Dependencies 34 | ruff = { version = ">=0.7.2", optional = true } 35 | pytest = { version = "^7.2.0", optional = true } 36 | pytest-cov = { version = "^4.0.0", optional = true } 37 | scipy = { version = ">=1.10.0", optional = true } 38 | tox = { version = "^4.7.0", optional = true } 39 | virtualenv = { version = ">=20.24.0", optional = true } 40 | pip = { version = ">=23.2.0", optional = true } 41 | mkdocs = { version = ">=1.4.0", optional = true } 42 | mkdocs-material = { version = "^9.2.0", optional = true } 43 | mkdocstrings = { version = ">=0.26.0", optional = true } 44 | mkdocstrings-python = { version = ">=1.12.2", optional = true } 45 | mkdocs-include-markdown-plugin = { version = ">=6.0.0", optional = true } 46 | twine = { version = "^5.0.0", optional = true } 47 | toml = { version = ">0.8.0", optional = true } 48 | pyreadline3 = { version = "^3.4.1", optional = true } 49 | poetry = { version = "^1.8.0", optional = true } 50 | 51 | [tool.poetry.extras] 52 | test = [ 53 | "pytest", 54 | "ruff", 55 | "pytest-cov", 56 | "pyreadline3", 57 | "scipy", 58 | ] 59 | 60 | dev = [ 61 | "tox", 62 | "virtualenv", 63 | "pip", 64 | "twine", 65 | "toml", 66 | "poetry", 67 | ] 68 | 69 | doc = [ 70 | "mkdocs", 71 | "mkdocs-material", 72 | "mkdocstrings", 73 | "mkdocstrings-python", 74 | "mkdocs-include-markdown-plugin", 75 | ] 76 | 77 | [build-system] 78 | requires = ["poetry-core>=1.8.0"] 79 | build-backend = "poetry.core.masonry.api" 80 | 81 | [tool.ruff] 82 | line-length = 88 83 | indent-width = 4 84 | 85 | [tool.ruff.lint] 86 | select = [ 87 | "E", # pycodestyle error 88 | "F", # Pyflakes 89 | "I", # isort 90 | ] 91 | dummy-variable-rgx = "^(_+|(_+[a-zA-Z0-9_]*[a-zA-Z0-9]+?))$" 92 | 93 | [tool.ruff.format] 94 | quote-style = "double" 95 | 96 | [tool.ruff.lint.isort] 97 | known-first-party = ["evalify"] 98 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing 2 | 3 | Contributions are welcomed, and they are greatly appreciated! Every little bit 4 | helps, and credit will always be given. 5 | 6 | You can contribute in many ways: 7 | 8 | ## Types of Contributions 9 | 10 | ### Report Bugs 11 | 12 | Report bugs at https://github.com/ma7555/evalify/issues. 13 | 14 | If you are reporting a bug, please include: 15 | 16 | * Your operating system name and version. 17 | * Any details about your local setup that might be helpful in troubleshooting. 18 | * Detailed steps to reproduce the bug. 19 | 20 | ### Fix Bugs 21 | 22 | Look through the GitHub issues for bugs. Anything tagged with "bug" and "help 23 | wanted" is open to whoever wants to implement it. 24 | 25 | ### Implement Features 26 | 27 | Look through the GitHub issues for features. Anything tagged with "enhancement" 28 | and "help wanted" is open to whoever wants to implement it. 29 | 30 | ### Write Documentation 31 | 32 | evalify could always use more documentation, whether as part of the 33 | official evalify docs, in docstrings, or even on the web in blog posts, 34 | articles, and such. 35 | 36 | ### Submit Feedback 37 | 38 | The best way to send feedback is to file an issue at https://github.com/ma7555/evalify/issues. 39 | 40 | If you are proposing a feature: 41 | 42 | * Explain in detail how it would work. 43 | * Keep the scope as narrow as possible, to make it easier to implement. 44 | * Remember that this is a volunteer-driven project, and that contributions 45 | are welcome :) 46 | 47 | ## Get Started! 48 | 49 | Ready to contribute? Here's how to set up `evalify` for local development. 50 | 51 | 1. Fork the `evalify` repo on GitHub. 52 | 2. Clone your fork locally 53 | 54 | ```bash 55 | git clone git@github.com:your_name_here/evalify.git 56 | ``` 57 | 58 | 3. Ensure [poetry](https://python-poetry.org/docs/) is installed. 59 | 4. Install dependencies and start your virtualenv: 60 | 61 | ```bash 62 | poetry install -E test -E doc -E dev 63 | ``` 64 | 65 | 5. Create a branch for local development: 66 | 67 | ```bash 68 | git checkout -b name-of-your-bugfix-or-feature 69 | ``` 70 | 71 | Now you can make your changes locally. 72 | 73 | 6. When you're done making changes, check that your changes pass the 74 | tests, including testing other Python versions, with tox: 75 | 76 | ```bash 77 | tox 78 | ``` 79 | 80 | 7. Commit your changes and push your branch to GitHub: 81 | 82 | ```bash 83 | git add . 84 | git commit -m "Your detailed description of your changes." 85 | git push origin name-of-your-bugfix-or-feature 86 | ``` 87 | 88 | 8. Submit a pull request through the GitHub website. 89 | 90 | ## Pull Request Guidelines 91 | 92 | Before you submit a pull request, check that it meets these guidelines: 93 | 94 | 1. The pull request should include tests. 95 | 2. If the pull request adds functionality, the docs should be updated. Put 96 | your new functionality into a function with a docstring, and add the 97 | feature to the list in README.md. 98 | 3. The pull request should work for Python 3.9, 3.10, 3.11, 3.12 and for PyPy. Check 99 | https://github.com/ma7555/evalify/actions 100 | and make sure that the tests pass for all supported Python versions. 101 | 102 | ## 103 | ```bash 104 | python -m unittest 105 | ``` 106 | or 107 | ```bash 108 | pytest 109 | ``` 110 | To run a subset of tests. 111 | 112 | 113 | ## Deploying 114 | 115 | A reminder for the maintainers on how to deploy. 116 | Make sure all your changes are committed (including an entry in HISTORY.md). 117 | Then run: 118 | 119 | ```bash 120 | git push 121 | git push --tags 122 | ``` 123 | 124 | Github Actions will then deploy to PyPI if tests pass. 125 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # evalify 2 | 3 |

4 | 5 | Logo 6 | 7 |

8 | 9 |

10 | 11 | 12 | License 14 | 15 | DOI 16 | 17 | Python 3.7 | 3.8 | 3.9 | 3 19 | 20 | 21 | Release Status 23 | 24 | 25 | CI Status 26 | 27 | 28 | Documentation Status 29 | 30 | 31 | Code style: Ruff 32 | 33 | 34 | 35 | 36 | 37 | PyPI Downloads/Month 38 | 39 | 40 |

41 | 42 | **Evaluate Biometric Authentication Models Literally in Seconds.** 43 | 44 | ## Installation 45 | #### Stable release: 46 | ```bash 47 | pip install evalify 48 | ``` 49 | #### Bleeding edge: 50 | ```bash 51 | pip install git+https://github.com/ma7555/evalify.git 52 | ``` 53 | ## Used for 54 | Evaluating all biometric authentication models, where the model output is a high-level embeddings known as feature vectors for visual or behaviour biometrics or d-vectors for auditory biometrics. 55 | 56 | ## Usage 57 | 58 | ```python 59 | import numpy as np 60 | from evalify import Experiment 61 | 62 | rng = np.random.default_rng() 63 | nphotos = 500 64 | emb_size = 32 65 | nclasses = 10 66 | X = rng.random((self.nphotos, self.emb_size)) 67 | y = rng.integers(self.nclasses, size=self.nphotos) 68 | 69 | experiment = Experiment() 70 | experiment.run(X, y) 71 | experiment.get_roc_auc() 72 | print(experiment.roc_auc) 73 | print(experiment.find_threshold_at_fpr(0.01)) 74 | ``` 75 | ## How it works 76 | * When you run an experiment, evalify tries all the possible combinations between individuals for authentication based on the `X` and `y` parameters and returns the results including FPR, TPR, FNR, TNR and ROC AUC. `X` is an array of embeddings and `y` is an array of corresponding targets. 77 | * Evalify can find the optimal threshold based on your agreed FPR and desired similarity or distance metric. 78 | 79 | ## Documentation: 80 | * 81 | 82 | 83 | ## Features 84 | 85 | * Blazing fast implementation for metrics calculation through optimized einstein sum and vectorized calculations. 86 | * Many operations are dispatched to canonical BLAS, cuBLAS, or other specialized routines. 87 | * Smart sampling options using direct indexing from pre-calculated arrays with total control over sampling strategy and sampling numbers. 88 | * Supports most evaluation metrics: 89 | - `cosine_similarity` 90 | - `pearson_similarity` 91 | - `cosine_distance` 92 | - `euclidean_distance` 93 | - `euclidean_distance_l2` 94 | - `minkowski_distance` 95 | - `manhattan_distance` 96 | - `chebyshev_distance` 97 | * Computation time for 4 metrics 4.2 million samples experiment is **24 seconds vs 51 minutes** if looping using `scipy.spatial.distance` implemntations. 98 | 99 | ## TODO 100 | * Safer memory allocation. I did not have issues but if you ran out of memory please manually set the `batch_size` argument. 101 | 102 | ## Contribution 103 | * Contributions are welcomed, and they are greatly appreciated! Every little bit helps, and credit will always be given. 104 | * Please check [CONTRIBUTING.md](https://github.com/ma7555/evalify/blob/main/CONTRIBUTING.md) for guidelines. 105 | 106 | ## Citation 107 | * If you use this software, please cite it using the metadata from [CITATION.cff](https://github.com/ma7555/evalify/blob/main/CITATION.cff) 108 | 109 | -------------------------------------------------------------------------------- /tests/test_metrics.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | """Tests for `evalify` package.""" 4 | import unittest 5 | 6 | import numpy as np 7 | from scipy.spatial import distance 8 | from scipy.stats import pearsonr 9 | 10 | from evalify import metrics 11 | 12 | 13 | class TestMetrics(unittest.TestCase): 14 | """Tests for `evalify` package.""" 15 | 16 | def setUp(self): 17 | """Set up test fixtures, if any.""" 18 | rng = np.random.default_rng(555) 19 | self.nphotos = 500 20 | self.emb_size = 8 21 | self.slice_size = 100 22 | self.embs = rng.random((self.nphotos, self.emb_size), dtype=np.float32) 23 | self.norms = np.linalg.norm(self.embs, axis=1) 24 | self.ix = rng.integers(self.nphotos, size=self.slice_size) 25 | self.iy = rng.integers(self.nphotos, size=self.slice_size) 26 | 27 | def test_cosine_similarity(self): 28 | """Test cosine_similarity""" 29 | result = metrics.cosine_similarity(self.embs, self.ix, self.iy, self.norms) 30 | result_2 = 1 - np.array( 31 | [ 32 | distance.cosine(self.embs[ix], self.embs[iy]) 33 | for (ix, iy) in zip(self.ix, self.iy) 34 | ], 35 | ) 36 | self.assertEqual(result.shape, (self.slice_size,)) 37 | self.assertTrue(np.allclose(result, result_2)) 38 | 39 | def test_pearson_similarity(self): 40 | """Test pearson_similarity""" 41 | result = metrics.pearson_similarity(self.embs, self.ix, self.iy) 42 | result_2 = np.array( 43 | [ 44 | pearsonr(self.embs[ix], self.embs[iy])[0] 45 | for (ix, iy) in zip(self.ix, self.iy) 46 | ], 47 | ) 48 | self.assertEqual(result.shape, (self.slice_size,)) 49 | self.assertTrue(np.allclose(result, result_2)) 50 | 51 | def test_euclidean_distance(self): 52 | """Test euclidean_distance""" 53 | result = metrics.metrics_caller.get("euclidean_distance")( 54 | self.embs, 55 | self.ix, 56 | self.iy, 57 | ) 58 | result_2 = np.array( 59 | [ 60 | distance.euclidean(self.embs[ix], self.embs[iy]) 61 | for (ix, iy) in zip(self.ix, self.iy) 62 | ], 63 | ) 64 | self.assertEqual(result.shape, (self.slice_size,)) 65 | self.assertTrue(np.allclose(result, result_2)) 66 | 67 | def test_euclidean_distance_l2(self): 68 | """Test euclidean_distance""" 69 | result = metrics.metrics_caller.get("euclidean_distance_l2")( 70 | self.embs, 71 | self.ix, 72 | self.iy, 73 | self.norms, 74 | ) 75 | result_2 = np.array( 76 | [ 77 | distance.euclidean( 78 | self.embs[ix] / np.sqrt(np.sum(self.embs[ix] ** 2)), 79 | self.embs[iy] / np.sqrt(np.sum(self.embs[iy] ** 2)), 80 | ) 81 | for (ix, iy) in zip(self.ix, self.iy) 82 | ], 83 | ) 84 | 85 | self.assertEqual(result.shape, (len(self.ix),)) 86 | self.assertTrue(np.allclose(result, result_2)) 87 | 88 | def test_minkowski_distance_distance(self): 89 | """Test euclidean_distance""" 90 | result = metrics.metrics_caller.get("minkowski_distance")( 91 | self.embs, 92 | self.ix, 93 | self.iy, 94 | p=3, 95 | ) 96 | result_2 = np.array( 97 | [ 98 | distance.minkowski(self.embs[ix], self.embs[iy], p=3) 99 | for (ix, iy) in zip(self.ix, self.iy) 100 | ], 101 | ) 102 | self.assertEqual(result.shape, (self.slice_size,)) 103 | self.assertTrue(np.allclose(result, result_2)) 104 | 105 | def test_manhattan_distance_distance(self): 106 | """Test euclidean_distance""" 107 | result = metrics.metrics_caller.get("manhattan_distance")( 108 | self.embs, 109 | self.ix, 110 | self.iy, 111 | ) 112 | result_2 = np.array( 113 | [ 114 | distance.cityblock(self.embs[ix], self.embs[iy]) 115 | for (ix, iy) in zip(self.ix, self.iy) 116 | ], 117 | ) 118 | self.assertEqual(result.shape, (self.slice_size,)) 119 | self.assertTrue(np.allclose(result, result_2)) 120 | 121 | def test_chebyshev_distance_distance(self): 122 | """Test euclidean_distance""" 123 | result = metrics.metrics_caller.get("chebyshev_distance")( 124 | self.embs, 125 | self.ix, 126 | self.iy, 127 | ) 128 | result_2 = np.array( 129 | [ 130 | distance.chebyshev(self.embs[ix], self.embs[iy]) 131 | for (ix, iy) in zip(self.ix, self.iy) 132 | ], 133 | ) 134 | self.assertEqual(result.shape, (self.slice_size,)) 135 | self.assertTrue(np.allclose(result, result_2)) 136 | -------------------------------------------------------------------------------- /evalify/metrics.py: -------------------------------------------------------------------------------- 1 | """Evalify metrics module used for calculating the evaluation metrics. 2 | 3 | Optimized calculations using einstein sum. Embeddings array and norm arrays are indexed 4 | with every 5 | split and calculations happens over large data chunks very quickly. 6 | """ 7 | 8 | import numpy as np 9 | 10 | 11 | def _inner1d(A, B): 12 | """Calculate the inner product between two arrays of vectors. 13 | 14 | Args: 15 | A (numpy.ndarray): 2D array of shape (n_samples, n_features) 16 | B (numpy.ndarray): 2D array of shape (n_samples, n_features) 17 | 18 | Returns: 19 | numpy.ndarray: 1D array of shape (n_samples,) where each element is the inner 20 | product of the corresponding rows in A and B 21 | 22 | """ 23 | return np.einsum("ij,ij->i", A, B, optimize="optimal") 24 | 25 | 26 | def cosine_similarity(embs, ix, iy, norms, return_distance=False, **kwargs): 27 | """Calculate the cosine similarity between two arrays of vectors. 28 | 29 | Args: 30 | embs (numpy.ndarray): 2D array of shape (n_samples, n_features) 31 | ix (numpy.ndarray): 1D array of shape (n_samples,) containing the indices of 32 | the first array 33 | iy (numpy.ndarray): 1D array of shape (n_samples,) containing the indices of 34 | the second array 35 | norms (numpy.ndarray): 1D array of shape (n_samples,) containing the L2 norm 36 | of each row in X 37 | return_distance (bool): Whether to return the cosine distance instead of the 38 | cosine similarity. Defaults to False. 39 | 40 | Returns: 41 | numpy.ndarray: 1D array of shape (n_samples,) where each element is the cosine 42 | similarity (or cosine distance) of the corresponding rows in X. 43 | 44 | """ 45 | similarity = _inner1d(embs[ix], embs[iy]) / (norms[ix] * norms[iy]) 46 | return 1 - similarity if return_distance else similarity 47 | 48 | 49 | def euclidean_distance_l2(embs, ix, iy, norms, **kwargs): 50 | """Calculate the L2-normalized Euclidean distance between two arrays of vectors. 51 | 52 | Args: 53 | embs (numpy.ndarray): 2D array of shape (n_samples, n_features). 54 | ix (numpy.ndarray): 1D array of shape (n_samples,) containing the indices of 55 | the first array. 56 | iy (numpy.ndarray): 1D array of shape (n_samples,) containing the indices of 57 | the second array. 58 | norms (numpy.ndarray): 1D array of shape (n_samples,) containing the L2 norm 59 | of each row in embs. 60 | 61 | Returns: 62 | numpy.ndarray: 1D array of shape (n_samples,) where each element is the 63 | L2-normalized Euclidean distance of the corresponding rows in embs. 64 | 65 | """ 66 | X = embs[ix] / norms[ix].reshape(-1, 1) - embs[iy] / norms[iy].reshape(-1, 1) 67 | return np.linalg.norm(X, axis=1) 68 | 69 | 70 | def minkowski_distance(embs, ix, iy, p, **kwargs): 71 | """Calculate the element-wise Minkowski or Manhattan or Chebyshev distance. 72 | 73 | Args: 74 | embs (numpy.ndarray): 2D array of shape (n_samples, n_features) 75 | ix (numpy.ndarray): 1D array of shape (n_samples,) containing the indices of 76 | the first array 77 | iy (numpy.ndarray): 1D array of shape (n_samples,) containing the indices of 78 | the second array 79 | p (int): The order of the norm of the difference. 80 | 81 | Returns: 82 | numpy.ndarray: 1D array of shape (n_samples,) where each element is the 83 | Minkowski distance of the corresponding rows in embs. 84 | 85 | """ 86 | return np.linalg.norm(embs[ix] - embs[iy], ord=p, axis=1) 87 | 88 | 89 | def pearson_similarity(embs, ix, iy, **kwargs): 90 | """Calculate the Pearson correlation coefficient between two arrays of vectors. 91 | 92 | Args: 93 | embs (numpy.ndarray): 2D array of shape (n_samples, n_features) 94 | ix (numpy.ndarray): 1D array of shape (n_samples,) containing the indices of 95 | the first array 96 | iy (numpy.ndarray): 1D array of shape (n_samples,) containing the indices of 97 | the second array 98 | 99 | Returns: 100 | numpy.ndarray: 1D array of shape (n_samples,) where each element is the Pearson 101 | correlation coefficient 102 | of the corresponding rows in embs. 103 | 104 | """ 105 | A = embs[ix] 106 | B = embs[iy] 107 | A_mA = A - np.expand_dims(A.mean(axis=1), -1) 108 | B_mB = B - np.expand_dims(B.mean(axis=1), -1) 109 | ssA = np.expand_dims((A_mA**2).sum(axis=1), -1) 110 | ssB = np.expand_dims((B_mB**2).sum(axis=1), -1) 111 | return _inner1d(A_mA, B_mB) / np.sqrt(_inner1d(ssA, ssB)) 112 | 113 | 114 | metrics_caller = { 115 | "cosine_similarity": cosine_similarity, 116 | "pearson_similarity": pearson_similarity, 117 | "cosine_distance": lambda embs, ix, iy, norms, **kwargs: cosine_similarity( 118 | embs, 119 | ix, 120 | iy, 121 | norms, 122 | return_distance=True, 123 | ), 124 | "euclidean_distance": lambda embs, ix, iy, **kwargs: minkowski_distance( 125 | embs, 126 | ix, 127 | iy, 128 | p=2, 129 | ), 130 | "euclidean_distance_l2": euclidean_distance_l2, 131 | "minkowski_distance": minkowski_distance, 132 | "manhattan_distance": lambda embs, ix, iy, **kwargs: minkowski_distance( 133 | embs, 134 | ix, 135 | iy, 136 | p=1, 137 | ), 138 | "chebyshev_distance": lambda embs, ix, iy, **kwargs: minkowski_distance( 139 | embs, 140 | ix, 141 | iy, 142 | p=np.inf, 143 | ), 144 | } 145 | 146 | METRICS_NEED_NORM = ["cosine_similarity", "cosine_distance", "euclidean_distance_l2"] 147 | METRICS_NEED_ORDER = ["minkowski_distance"] 148 | DISTANCE_TO_SIMILARITY = { 149 | "cosine_distance": lambda x: 1 - x, 150 | "euclidean_distance": lambda x: 1 / (1 + x), 151 | "euclidean_distance_l2": lambda x: 1 - x, 152 | "minkowski_distance": lambda x: 1 / (1 + x), 153 | "manhattan_distance": lambda x: 1 / (1 + x), 154 | "chebyshev_distance": lambda x: 1 / (1 + x), 155 | } 156 | 157 | REVERSE_DISTANCE_TO_SIMILARITY = { 158 | "cosine_distance": lambda x: 1 - x, 159 | "euclidean_distance": lambda x: (1 / x) - 1, 160 | "euclidean_distance_l2": lambda x: 1 - x, 161 | "minkowski_distance": lambda x: (1 / x) - 1, 162 | "manhattan_distance": lambda x: (1 / x) - 1, 163 | "chebyshev_distance": lambda x: (1 / x) - 1, 164 | } 165 | -------------------------------------------------------------------------------- /tests/test_experiment_real_data.py: -------------------------------------------------------------------------------- 1 | # tests/test_experiment_real_data_small.py 2 | 3 | import os 4 | import pathlib 5 | import unittest 6 | from collections import OrderedDict 7 | 8 | import numpy as np 9 | 10 | from evalify import Experiment 11 | 12 | 13 | class TestExperimentRealDataSmall(unittest.TestCase): 14 | """Tests for Experiment class using a subset of the LFW dataset""" 15 | 16 | def setUp(self): 17 | """Set up test fixtures.""" 18 | # Path to LFW.npz, assuming it's in the tests/data/ directory 19 | self.lfw_npz = os.path.join(pathlib.Path(__file__).parent, "data", "LFW.npz") 20 | if not os.path.exists(self.lfw_npz): 21 | self.fail(f"LFW.npz not found at {self.lfw_npz}") 22 | 23 | X_y_array = np.load(self.lfw_npz) 24 | self.X = X_y_array["X"][:1000] 25 | self.y = X_y_array["y"][:1000] 26 | 27 | self.metrics = [ 28 | "cosine_similarity", 29 | "pearson_similarity", 30 | "euclidean_distance_l2", 31 | ] 32 | 33 | self.experiment = Experiment( 34 | metrics=self.metrics, 35 | same_class_samples="full", 36 | different_class_samples=("full", "full"), 37 | seed=555, # To ensure reproducibility 38 | ) 39 | 40 | # Run the experiment once during setup to reuse the results in multiple tests 41 | self.df = self.experiment.run(self.X, self.y) 42 | 43 | def test_number_of_samples(self): 44 | """Test that the number of generated samples matches the expected count.""" 45 | expected_num_samples = 499500 46 | actual_num_samples = len(self.df) 47 | self.assertEqual( 48 | actual_num_samples, 49 | expected_num_samples, 50 | f"Expected {expected_num_samples} samples, got {actual_num_samples}.", 51 | ) 52 | 53 | def test_roc_auc(self): 54 | """Test that ROC AUC values match the expected results.""" 55 | expected_roc_auc = OrderedDict( 56 | { 57 | "euclidean_distance_l2": 0.9998640116393942, 58 | "cosine_similarity": 0.9998640114481793, 59 | "pearson_similarity": 0.999858162377461, 60 | } 61 | ) 62 | 63 | actual_roc_auc = self.experiment.roc_auc() 64 | 65 | self.assertEqual( 66 | len(actual_roc_auc), 67 | len(self.metrics), 68 | f"Expected ROC AUC for {len(self.metrics)} metrics, got " 69 | f"{len(actual_roc_auc)}.", 70 | ) 71 | 72 | for metric in self.metrics: 73 | self.assertIn( 74 | metric, actual_roc_auc, f"ROC AUC for metric '{metric}' not found." 75 | ) 76 | self.assertAlmostEqual( 77 | actual_roc_auc[metric], 78 | expected_roc_auc[metric], 79 | places=6, 80 | msg=f"ROC AUC for metric '{metric}' does not match.", 81 | ) 82 | 83 | def test_threshold_at_fpr(self): 84 | """Test that thresholds at a specified FPR match expected values.""" 85 | far = 0.01 86 | expected_threshold_at_fpr = { 87 | "cosine_similarity": { 88 | "FPR": 0.010001841326240518, 89 | "TPR": 0.9973539973539973, 90 | "threshold": 0.37717896699905396, 91 | }, 92 | "pearson_similarity": { 93 | "FPR": 0.010001841326240518, 94 | "TPR": 0.9973539973539973, 95 | "threshold": 0.37802454829216003, 96 | }, 97 | "euclidean_distance_l2": { 98 | "FPR": 0.010001841326240518, 99 | "TPR": 0.9973539973539973, 100 | "threshold": 1.1160835027694702, 101 | }, 102 | } 103 | 104 | actual_threshold_at_fpr = self.experiment.threshold_at_fpr(far) 105 | 106 | self.assertEqual( 107 | len(actual_threshold_at_fpr), 108 | len(self.metrics), 109 | f"Expected Threshold @ FPR for {len(self.metrics)} metrics, got " 110 | f"{len(actual_threshold_at_fpr)}.", 111 | ) 112 | 113 | for metric in self.metrics: 114 | self.assertIn( 115 | metric, 116 | actual_threshold_at_fpr, 117 | f"Threshold @ FPR for metric '{metric}' not found.", 118 | ) 119 | expected = expected_threshold_at_fpr[metric] 120 | actual = actual_threshold_at_fpr[metric] 121 | 122 | self.assertAlmostEqual( 123 | actual["FPR"], 124 | expected["FPR"], 125 | places=6, 126 | msg=f"FPR for metric '{metric}' does not match.", 127 | ) 128 | self.assertAlmostEqual( 129 | actual["TPR"], 130 | expected["TPR"], 131 | places=6, 132 | msg=f"TPR for metric '{metric}' does not match.", 133 | ) 134 | self.assertAlmostEqual( 135 | actual["threshold"], 136 | expected["threshold"], 137 | places=6, 138 | msg=f"Threshold for metric '{metric}' at FAR={far} does not match.", 139 | ) 140 | 141 | def test_eer(self): 142 | """Test that EER values and thresholds match the expected results.""" 143 | expected_eer = OrderedDict( 144 | { 145 | "cosine_similarity": { 146 | "EER": 0.004724863226023654, 147 | "threshold": 0.4244731664657593, 148 | }, 149 | "euclidean_distance_l2": { 150 | "EER": 0.004724863226023654, 151 | "threshold": 1.0728718042373657, 152 | }, 153 | "pearson_similarity": { 154 | "EER": 0.004914464785693375, 155 | "threshold": 0.4228288531303406, 156 | }, 157 | } 158 | ) 159 | 160 | actual_eer = self.experiment.eer() 161 | 162 | self.assertEqual( 163 | len(actual_eer), 164 | len(self.metrics), 165 | f"Expected EER for {len(self.metrics)} metrics, got {len(actual_eer)}.", 166 | ) 167 | 168 | for metric in self.metrics: 169 | self.assertIn(metric, actual_eer, f"EER for metric '{metric}' not found.") 170 | expected = expected_eer[metric] 171 | actual = actual_eer[metric] 172 | 173 | self.assertAlmostEqual( 174 | actual["EER"], 175 | expected["EER"], 176 | places=6, 177 | msg=f"EER for metric '{metric}' does not match.", 178 | ) 179 | self.assertAlmostEqual( 180 | actual["threshold"], 181 | expected["threshold"], 182 | places=6, 183 | msg=f"Threshold for EER of metric '{metric}' does not match.", 184 | ) 185 | 186 | def test_tar_at_far(self): 187 | """Test the tar_at_far method with specific FAR values.""" 188 | # Define FAR values to test 189 | far_values = [0.01, 0.001] 190 | 191 | # Define expected TAR values based on the recent experiment 192 | expected_tar_at_far = OrderedDict( 193 | { 194 | "cosine_similarity": { 195 | 0.01: 0.9973539973539973, 196 | 0.001: 0.9795879795879796, 197 | }, 198 | "pearson_similarity": { 199 | 0.01: 0.9973539973539973, 200 | 0.001: 0.9793989793989794, 201 | }, 202 | "euclidean_distance_l2": { 203 | 0.01: 0.9973539973539973, 204 | 0.001: 0.9795879795879796, 205 | }, 206 | } 207 | ) 208 | 209 | # Call tar_at_far with the FAR values 210 | actual_tar_at_far = self.experiment.tar_at_far(far_values) 211 | 212 | # Assert the returned TAR@FAR matches expected values 213 | self.assertEqual( 214 | len(actual_tar_at_far), 215 | len(self.metrics), 216 | f"Expected TAR@FAR for {len(self.metrics)} metrics, got " 217 | f"{len(actual_tar_at_far)}.", 218 | ) 219 | 220 | for metric in self.metrics: 221 | self.assertIn( 222 | metric, actual_tar_at_far, f"TAR@FAR for metric '{metric}' not found." 223 | ) 224 | 225 | for far in far_values: 226 | self.assertIn( 227 | far, 228 | actual_tar_at_far[metric], 229 | f"TAR@FAR for metric '{metric}' at FAR={far} not found.", 230 | ) 231 | 232 | expected_tar = expected_tar_at_far[metric][far] 233 | actual_tar = actual_tar_at_far[metric][far] 234 | 235 | self.assertAlmostEqual( 236 | actual_tar, 237 | expected_tar, 238 | places=6, 239 | msg=f"TAR@FAR for metric '{metric}' at FAR={far} does not match.", 240 | ) 241 | 242 | 243 | # if __name__ == '__main__': 244 | # unittest.main() 245 | -------------------------------------------------------------------------------- /tests/test_evalify.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | """Tests for `evalify` package.""" 4 | import unittest 5 | 6 | import numpy as np 7 | from scipy.special import comb 8 | 9 | from evalify import Experiment 10 | from evalify.metrics import metrics_caller 11 | 12 | 13 | class TestEvalify(unittest.TestCase): 14 | """Tests for `evalify` package.""" 15 | 16 | def setUp(self): 17 | """Set up test fixtures, if any.""" 18 | rng = np.random.default_rng(555) 19 | self.nphotos = 500 20 | self.emb_size = 8 21 | self.nclasses = 10 22 | self.embs = rng.random((self.nphotos, self.emb_size), dtype=np.float32) 23 | self.targets = rng.integers(self.nclasses, size=self.nphotos) 24 | 25 | def test_run_euclidean_distance(self): 26 | """Test run with euclidean_distance""" 27 | experiment = Experiment(metrics="euclidean_distance") 28 | df = experiment.run(self.embs, self.targets) 29 | experiment = Experiment(metrics="euclidean_distance_l2") 30 | df_l2 = experiment.run(self.embs, self.targets) 31 | self.assertGreater(df.euclidean_distance.max(), 0) 32 | self.assertGreater(df_l2.euclidean_distance_l2.max(), 0) 33 | 34 | def test_run_cosine_similarity(self): 35 | """Test run with cosine_similarity""" 36 | experiment = Experiment(metrics="cosine_similarity") 37 | df = experiment.run(self.embs, self.targets) 38 | self.assertLessEqual(df.cosine_similarity.max(), 1) 39 | 40 | def test_run_all_metrics_separated(self): 41 | for metric in metrics_caller.keys(): 42 | experiment = Experiment(metrics=metric) 43 | df = experiment.run(self.embs, self.targets) 44 | self.assertTrue(metric in df.columns) 45 | 46 | def test_run_all_metrics_combined(self): 47 | metrics = set(metrics_caller.keys()) 48 | experiment = Experiment(metrics=metrics) 49 | df = experiment.run(self.embs, self.targets) 50 | self.assertTrue(metrics.issubset(df.columns)) 51 | 52 | def test_run_full_class_samples(self): 53 | """Test run with return_embeddings""" 54 | experiment = Experiment( 55 | same_class_samples="full", 56 | different_class_samples=("full", "full"), 57 | ) 58 | df = experiment.run( 59 | self.embs, 60 | self.targets, 61 | ) 62 | self.assertEqual(len(df), comb(self.nphotos, 2)) 63 | 64 | def test_run_custom_class_samples(self): 65 | """Test run with custom same_class_samples and different_class_samples""" 66 | N, M = (2, 5) 67 | experiment = Experiment(same_class_samples=2, different_class_samples=(N, M)) 68 | same_class_samples = 3 69 | df = experiment.run( 70 | self.embs, 71 | self.targets, 72 | ) 73 | 74 | self.assertLessEqual( 75 | len(df), 76 | (comb(same_class_samples, 2) * self.nclasses) 77 | + (self.nclasses * (self.nclasses - 1)) * M * N, 78 | ) 79 | 80 | def test_run_shuffle(self): 81 | """Test run with shuffle""" 82 | experiment = Experiment(seed=555) 83 | df1 = experiment.run(self.embs, self.targets, shuffle=True) 84 | df2 = experiment.run(self.embs, self.targets, shuffle=True) 85 | self.assertEqual(len(df1), len(df2)) 86 | self.assertEqual(sum(df1.index), sum(df2.index)) 87 | self.assertTrue(all(ix in df2.index for ix in df1.index)) 88 | 89 | def test_run_no_batch_size(self): 90 | """Test run with no batch_size""" 91 | experiment = Experiment( 92 | same_class_samples=2, 93 | different_class_samples=(1, 1), 94 | seed=555, 95 | ) 96 | experiment.run(self.embs, self.targets, batch_size=None) 97 | self.assertTrue(experiment.check_experiment_run()) 98 | 99 | def test_run_return_embeddings(self): 100 | """Test run with return_embeddings""" 101 | experiment = Experiment() 102 | df = experiment.run(self.embs, self.targets, return_embeddings=True) 103 | self.assertLessEqual(len(df.at[0, "emb_a"]), self.emb_size) 104 | 105 | def test_run_evaluate_at_threshold(self): 106 | """Test run with evaluate_at_threshold""" 107 | metrics = ["cosine_similarity", "euclidean_distance_l2"] 108 | experiment = Experiment(metrics=metrics) 109 | experiment.run( 110 | self.embs, 111 | self.targets, 112 | ) 113 | evaluations = experiment.evaluate_at_threshold(0.5, "cosine_similarity") 114 | # self.assertEqual(len(evaluations), len(metrics)) 115 | self.assertEqual(len(evaluations), 9) 116 | 117 | def test_run_find_optimal_cutoff(self): 118 | """Test run with find_optimal_cutoff""" 119 | metrics = ["cosine_similarity", "euclidean_distance_l2"] 120 | experiment = Experiment(metrics=metrics) 121 | experiment.run( 122 | self.embs, 123 | self.targets, 124 | ) 125 | evaluations = experiment.find_optimal_cutoff() 126 | self.assertEqual(len(evaluations), len(metrics)) 127 | self.assertTrue(all(evaluation in metrics for evaluation in evaluations)) 128 | 129 | def test_run_get_roc_auc(self): 130 | """Test run with get_roc_auc""" 131 | metrics = ["cosine_similarity", "euclidean_distance_l2"] 132 | experiment = Experiment(metrics=metrics) 133 | experiment.run( 134 | self.embs, 135 | self.targets, 136 | ) 137 | roc_auc = experiment.roc_auc() 138 | # self.assertEqual(len(evaluations), len(metrics)) 139 | self.assertEqual(len(roc_auc), len(metrics)) 140 | self.assertTrue(all(auc in metrics for auc in roc_auc)) 141 | 142 | def test_run_predicted_as_similarity(self): 143 | """Test run with predicted_as_similarity""" 144 | experiment = Experiment(metrics=["cosine_similarity", "cosine_distance"]) 145 | experiment.run( 146 | self.embs, 147 | self.targets, 148 | ) 149 | result = experiment.predicted_as_similarity("cosine_similarity") 150 | result_2 = experiment.predicted_as_similarity("cosine_distance") 151 | self.assertTrue(np.allclose(result, result_2)) 152 | 153 | def test_run_find_threshold_at_fpr(self): 154 | """Test run with find_threshold_at_fpr""" 155 | metric = "cosine_similarity" 156 | experiment = Experiment( 157 | metrics=metric, 158 | different_class_samples=("full", "full"), 159 | ) 160 | experiment.run( 161 | self.embs, 162 | self.targets, 163 | ) 164 | fpr_d01 = experiment.threshold_at_fpr(0.1) 165 | fpr_d1 = experiment.threshold_at_fpr(1) 166 | fpr_d0 = experiment.threshold_at_fpr(0) 167 | self.assertEqual(len(fpr_d01[metric]), 3) 168 | self.assertAlmostEqual(fpr_d01[metric]["threshold"], 0.8939142, 3) 169 | self.assertAlmostEqual(fpr_d0[metric]["threshold"], 0.9953355, 3) 170 | self.assertAlmostEqual(fpr_d1[metric]["threshold"], 0.2060538, 3) 171 | 172 | def test_run_calculate_eer(self): 173 | """Test run with calculate_eer""" 174 | metric = "cosine_similarity" 175 | experiment = Experiment( 176 | metrics=metric, 177 | different_class_samples=("full", "full"), 178 | ) 179 | experiment.run( 180 | self.embs, 181 | self.targets, 182 | ) 183 | eer = experiment.eer() 184 | self.assertTrue("EER" in eer[metric]) 185 | 186 | def test__call__(self): 187 | """Test run with __call__""" 188 | experiment = Experiment(seed=555) 189 | result = experiment.run(self.embs, self.targets) 190 | result_2 = experiment(self.embs, self.targets) 191 | self.assertTrue(np.array_equal(result.to_numpy(), result_2.to_numpy())) 192 | 193 | def test_run_errors(self): 194 | """Test run errors""" 195 | with self.assertRaisesRegex( 196 | ValueError, 197 | "`same_class_samples` argument must be one of 'full' or an integer ", 198 | ): 199 | experiment = Experiment(same_class_samples=54.4) 200 | experiment.run(self.embs, self.targets) 201 | 202 | with self.assertRaisesRegex( 203 | ValueError, 204 | "`different_class_samples` argument must be one of 'full', 'minimal'", 205 | ): 206 | experiment = Experiment(different_class_samples="all") 207 | experiment.run(self.embs, self.targets) 208 | 209 | with self.assertRaisesRegex( 210 | ValueError, 211 | "When passing `different_class_samples` as a tuple or list. ", 212 | ): 213 | experiment = Experiment(different_class_samples=(1, 2, 3)) 214 | experiment.run( 215 | self.embs, 216 | self.targets, 217 | ) 218 | 219 | with self.assertRaisesRegex( 220 | ValueError, 221 | '`batch_size` argument must be either "best" or of type integer', 222 | ): 223 | experiment = Experiment() 224 | experiment.run(self.embs, self.targets, batch_size="all") 225 | 226 | with self.assertRaisesRegex(ValueError, "`metric` argument must be one of "): 227 | experiment = Experiment(metrics="dot_prod") 228 | experiment.run(self.embs, self.targets) 229 | 230 | with self.assertRaisesRegex( 231 | ValueError, 232 | "`p` must be an int and at least 1. Received: p=", 233 | ): 234 | experiment = Experiment() 235 | experiment.run(self.embs, self.targets, p=0.1) 236 | 237 | with self.assertRaisesRegex( 238 | NotImplementedError, 239 | "`evaluate_at_threshold` function can only be run after running " 240 | "`run_experiment`.", 241 | ): 242 | experiment = Experiment() 243 | experiment.evaluate_at_threshold(0.5, "euclidean_distance") 244 | 245 | with self.assertRaisesRegex( 246 | ValueError, 247 | "`evaluate_at_threshold` function can only be called with `metric` from ", 248 | ): 249 | experiment = Experiment(metrics="euclidean_distance") 250 | experiment.run(self.embs, self.targets) 251 | experiment.evaluate_at_threshold(0.5, "cosine_similarity") 252 | 253 | with self.assertRaisesRegex( 254 | ValueError, 255 | "`fpr` must be between 0 and 1. Received wanted_fpr=", 256 | ): 257 | experiment = Experiment(metrics="euclidean_distance") 258 | experiment.run(self.embs, self.targets) 259 | experiment.threshold_at_fpr(-1.1) 260 | -------------------------------------------------------------------------------- /evalify/evalify.py: -------------------------------------------------------------------------------- 1 | """Evalify main module used for creating the verification experiments. 2 | 3 | Creates experiments with embedding pairs to compare for face verification tasks 4 | including positive pairs, negative pairs and metrics calculations using a very 5 | optimized einstein sum. Many operations are dispatched to canonical BLAS, cuBLAS, 6 | or other specialized routines. Extremely large arrays are split into smaller batches, 7 | every batch would consume the roughly the maximum available memory. 8 | 9 | Typical usage example: 10 | 11 | ``` 12 | experiment = Experiment() 13 | experiment.run(X, y) 14 | ``` 15 | """ 16 | 17 | import itertools 18 | import sys 19 | from collections import OrderedDict 20 | from typing import Any, List, Optional, Sequence, Tuple, Union 21 | 22 | import numpy as np 23 | import pandas as pd 24 | from sklearn.metrics import auc, confusion_matrix, roc_curve 25 | 26 | from evalify.metrics import ( 27 | DISTANCE_TO_SIMILARITY, 28 | METRICS_NEED_NORM, 29 | METRICS_NEED_ORDER, 30 | REVERSE_DISTANCE_TO_SIMILARITY, 31 | metrics_caller, 32 | ) 33 | from evalify.utils import _validate_vectors, calculate_best_batch_size 34 | 35 | StrOrInt = Union[str, int] 36 | StrIntSequence = Union[str, int, Sequence[Union[str, int]]] 37 | 38 | 39 | class Experiment: 40 | """Defines an experiment for evalifying. 41 | 42 | Args: 43 | metrics: The list of metrics to use. Can be one or more of the following: 44 | `cosine_similarity`, `pearson_similarity`, `cosine_distance`, 45 | `euclidean_distance`, `euclidean_distance_l2`, `minkowski_distance`, 46 | `manhattan_distance` and `chebyshev_distance` 47 | same_class_samples: 48 | - 'full': Samples all possible images within each class to create all 49 | all possible positive pairs. 50 | - int: Samples specific number of images for every class to create 51 | nC2 pairs where n is passed integer. 52 | different_class_samples: 53 | - 'full': Samples one image from every class with all possible pairs 54 | of different classes. This can grow exponentially as the number 55 | of images increase. (N, M) = (1, "full") 56 | - 'minimal': Samples one image from every class with one image of 57 | all other classes. (N, M) = (1, 1). (Default) 58 | - int: Samples one image from every class with provided number of 59 | images of every other class. 60 | - tuple or list: (N, M) Samples N images from every class with M images of 61 | every other class. 62 | seed: Optional random seed for reproducibility. 63 | 64 | 65 | Notes: 66 | - `same_class_samples`: 67 | If the provided number is greater than the achievable for the class, 68 | the maximum possible combinations are used. 69 | - `different_class_samples`: 70 | If the provided number is greater than the achievable for the class, 71 | the maximum possible combinations are used. (N, M) can also be 72 | ('full', 'full') but this will calculate all possible combinations 73 | between all posibile negative samples. If the dataset is not small 74 | this will probably result in an extremely large array!. 75 | 76 | """ 77 | 78 | def __init__( 79 | self, 80 | metrics: Union[str, Sequence[str]] = "cosine_similarity", 81 | same_class_samples: StrOrInt = "full", 82 | different_class_samples: StrIntSequence = "minimal", 83 | seed: Optional[int] = None, 84 | ) -> None: 85 | self.experiment_success = False 86 | self.cached_predicted_as_similarity = {} 87 | self.metrics = (metrics,) if isinstance(metrics, str) else metrics 88 | self.same_class_samples = same_class_samples 89 | self.different_class_samples = different_class_samples 90 | self.seed = seed 91 | 92 | def __call__(self, *args: Any, **kwds: Any) -> Any: 93 | return self.run(*args, **kwds) 94 | 95 | @staticmethod 96 | def _validate_args( 97 | metrics: Sequence[str], 98 | same_class_samples: StrOrInt, 99 | different_class_samples: StrIntSequence, 100 | batch_size: Optional[StrOrInt], 101 | p, 102 | ) -> None: 103 | """Validates passed arguments to Experiment.run() method.""" 104 | if same_class_samples != "full" and not isinstance(same_class_samples, int): 105 | msg = ( 106 | "`same_class_samples` argument must be one of 'full' or an integer " 107 | f"Received: same_class_samples={same_class_samples}" 108 | ) 109 | raise ValueError( 110 | msg, 111 | ) 112 | 113 | if different_class_samples not in ("full", "minimal"): 114 | if not isinstance(different_class_samples, (int, list, tuple)): 115 | msg = ( 116 | "`different_class_samples` argument must be one of 'full', " 117 | "'minimal', an integer, a list or tuple of integers or keyword " 118 | "'full'." 119 | f"Received: different_class_samples={different_class_samples}." 120 | ) 121 | raise ValueError( 122 | msg, 123 | ) 124 | if isinstance(different_class_samples, (list, tuple)) and ( 125 | not ( 126 | all( 127 | isinstance(i, int) or i == "full" 128 | for i in different_class_samples 129 | ) 130 | ) 131 | or (len(different_class_samples)) != 2 132 | ): 133 | msg = ( 134 | "When passing `different_class_samples` as a tuple or list, " 135 | "elements must be exactly two of integer type or keyword 'full' " 136 | "(N, M). " 137 | f"Received: different_class_samples={different_class_samples}." 138 | ) 139 | raise ValueError( 140 | msg, 141 | ) 142 | 143 | if ( 144 | batch_size != "best" 145 | and not isinstance(batch_size, int) 146 | and batch_size is not None 147 | ): 148 | msg = ( 149 | '`batch_size` argument must be either "best" or of type integer ' 150 | f"Received: batch_size={batch_size} with type {type(batch_size)}." 151 | ) 152 | raise ValueError( 153 | msg, 154 | ) 155 | 156 | if any(metric not in metrics_caller for metric in metrics): 157 | msg = ( 158 | f"`metric` argument must be one of {tuple(metrics_caller.keys())} " 159 | f"Received: metric={metrics}" 160 | ) 161 | raise ValueError( 162 | msg, 163 | ) 164 | 165 | if p < 1: 166 | msg = f"`p` must be an int and at least 1. Received: p={p}" 167 | raise ValueError(msg) 168 | 169 | def _get_pairs( 170 | self, 171 | y, 172 | same_class_samples, 173 | different_class_samples, 174 | target, 175 | ) -> List[Tuple]: 176 | """Generates experiment pairs.""" 177 | same_ixs_full = np.argwhere(y == target).ravel() 178 | if isinstance(same_class_samples, int): 179 | same_class_samples = min(len(same_ixs_full), same_class_samples) 180 | same_ixs = self.rng.choice(same_ixs_full, same_class_samples) 181 | elif same_class_samples == "full": 182 | same_ixs = same_ixs_full 183 | same_pairs = itertools.combinations(same_ixs, 2) 184 | same_pairs = [(a, b, target, target, 1) for a, b in same_pairs] 185 | 186 | different_ixs = np.argwhere(y != target).ravel() 187 | diff_df = pd.DataFrame( 188 | data={"sample_idx": different_ixs, "target": y[different_ixs]}, 189 | ) 190 | 191 | diff_df = diff_df.sample(frac=1, random_state=self.seed) 192 | if different_class_samples in ["full", "minimal"] or isinstance( 193 | different_class_samples, 194 | int, 195 | ): 196 | N = 1 197 | if different_class_samples == "minimal": 198 | diff_df = diff_df.drop_duplicates(subset=["target"]) 199 | else: 200 | N, M = different_class_samples 201 | N = len(same_ixs_full) if N == "full" else min(N, len(same_ixs_full)) 202 | if M != "full": 203 | diff_df = ( 204 | diff_df.groupby("target") 205 | .apply(lambda x: x[:M], include_groups=False) 206 | .droplevel(0) 207 | ) 208 | 209 | different_ixs = diff_df.sample_idx.to_numpy() 210 | 211 | different_pairs = itertools.product( 212 | self.rng.choice(same_ixs_full, N, replace=False), 213 | different_ixs, 214 | ) 215 | different_pairs = [(a, b, target, y[b], 0) for a, b in different_pairs if a < b] 216 | 217 | return same_pairs + different_pairs 218 | 219 | def run( 220 | self, 221 | X: np.ndarray, 222 | y: np.ndarray, 223 | batch_size: Optional[StrOrInt] = "best", 224 | shuffle: bool = False, 225 | return_embeddings: bool = False, 226 | p: int = 3, 227 | ) -> pd.DataFrame: 228 | """Runs an experiment for face verification 229 | Args: 230 | X: Embeddings array 231 | y: Targets for X as integers 232 | batch_size: 233 | - 'best': Let the program decide based on available memory such that 234 | every batch will fit into the available memory. (Default) 235 | - int: Manually decide the batch_size. 236 | - None: No batching. All experiment and intermediate results must fit 237 | entirely into memory or a MemoryError will be raised. 238 | shuffle: Shuffle the returned experiment dataframe. Default: False. 239 | return_embeddings: Whether to return the embeddings instead of indexes. 240 | Default: False 241 | p: 242 | The order of the norm of the difference. Should be `p >= 1`, Only valid 243 | with minkowski_distance as a metric. Default = 3. 244 | 245 | Returns: 246 | pandas.DataFrame: A DataFrame representing the experiment results. 247 | 248 | Raises: 249 | ValueError: An error occurred with the provided arguments. 250 | 251 | """ 252 | self._validate_args( 253 | self.metrics, 254 | self.same_class_samples, 255 | self.different_class_samples, 256 | batch_size, 257 | p, 258 | ) 259 | X, y = _validate_vectors(X, y) 260 | all_targets = np.unique(y) 261 | all_pairs = [] 262 | metric_fns = list(map(metrics_caller.get, self.metrics)) 263 | self.rng = np.random.default_rng(self.seed) 264 | for target in all_targets: 265 | all_pairs += self._get_pairs( 266 | y, 267 | self.same_class_samples, 268 | self.different_class_samples, 269 | target, 270 | ) 271 | 272 | self.df = pd.DataFrame( 273 | data=all_pairs, 274 | columns=["emb_a", "emb_b", "target_a", "target_b", "target"], 275 | ) 276 | experiment_size = len(self.df) 277 | if shuffle: 278 | self.df = self.df.sample(frac=1, random_state=self.seed) 279 | if batch_size == "best": 280 | batch_size = calculate_best_batch_size(X) 281 | elif batch_size is None: 282 | batch_size = experiment_size 283 | kwargs = {} 284 | if any(metric in METRICS_NEED_NORM for metric in self.metrics): 285 | kwargs["norms"] = np.linalg.norm(X, axis=1) 286 | if any(metric in METRICS_NEED_ORDER for metric in self.metrics): 287 | kwargs["p"] = p 288 | 289 | emb_a = self.df.emb_a.to_numpy() 290 | emb_b = self.df.emb_b.to_numpy() 291 | 292 | emb_a_s = np.array_split(emb_a, np.ceil(experiment_size / batch_size)) 293 | emb_b_s = np.array_split(emb_b, np.ceil(experiment_size / batch_size)) 294 | 295 | for metric, metric_fn in zip(self.metrics, metric_fns): 296 | self.df[metric] = np.hstack( 297 | [metric_fn(X, i, j, **kwargs) for i, j in zip(emb_a_s, emb_b_s)], 298 | ) 299 | if return_embeddings: 300 | self.df["emb_a"] = X[emb_a].tolist() 301 | self.df["emb_b"] = X[emb_b].tolist() 302 | 303 | self.experiment_success = True 304 | return self.df 305 | 306 | def find_optimal_cutoff(self) -> dict: 307 | """Finds the optimal cutoff threshold for each metric based on the ROC curve. 308 | 309 | This function calculates the optimal threshold for each metric by finding the 310 | point on the Receiver Operating Characteristic (ROC) curve where the difference 311 | between the True Positive Rate (TPR) and the False Positive Rate (FPR) is 312 | minimized. 313 | 314 | Returns: 315 | dict: A dictionary with metrics as keys and their corresponding optimal 316 | threshold as values. 317 | """ 318 | 319 | self.check_experiment_run() 320 | self.optimal_cutoff = {} 321 | for metric in self.metrics: 322 | fpr, tpr, threshold = roc_curve(self.df["target"], self.df[metric]) 323 | i = np.arange(len(tpr)) 324 | roc = pd.DataFrame( 325 | { 326 | "tf": pd.Series(tpr - (1 - fpr), index=i), 327 | "threshold": pd.Series(threshold, index=i), 328 | }, 329 | ) 330 | roc_t = roc.iloc[(roc.tf - 0).abs().argsort()[:1]] 331 | self.optimal_cutoff[metric] = roc_t["threshold"].item() 332 | return self.optimal_cutoff 333 | 334 | def threshold_at_fpr(self, fpr: float) -> dict: 335 | """Find the threshold at a specified False Positive Rate (FPR) for each metric. 336 | 337 | The function calculates the threshold at the specified FPR for each metric 338 | by using the Receiver Operating Characteristic (ROC) curve. If the desired 339 | FPR is 0 or 1, or no exact match is found, the closest thresholds are used. 340 | 341 | Args: 342 | fpr (float): Desired False Positive Rate. Must be between 0 and 1. 343 | 344 | Returns: 345 | dict: A dictionary where keys are the metrics and values are dictionaries 346 | containing FPR, TPR, and threshold at the specified FPR. 347 | 348 | Raises: 349 | ValueError: If the provided `fpr` is not between 0 and 1. 350 | """ 351 | 352 | self.check_experiment_run() 353 | if not 0 <= fpr <= 1: 354 | msg = "`fpr` must be between 0 and 1. " f"Received wanted_fpr={fpr}" 355 | raise ValueError( 356 | msg, 357 | ) 358 | threshold_at_fpr = {} 359 | for metric in self.metrics: 360 | predicted = self.predicted_as_similarity(metric) 361 | FPR, TPR, thresholds = roc_curve( 362 | self.df["target"], 363 | predicted, 364 | drop_intermediate=False, 365 | ) 366 | df_fpr_tpr = pd.DataFrame({"FPR": FPR, "TPR": TPR, "threshold": thresholds}) 367 | ix_left = np.searchsorted(df_fpr_tpr["FPR"], fpr, side="left") 368 | ix_right = np.searchsorted(df_fpr_tpr["FPR"], fpr, side="right") 369 | 370 | if fpr == 0: 371 | best = df_fpr_tpr.iloc[ix_right] 372 | elif fpr == 1 or ix_left == ix_right: 373 | best = df_fpr_tpr.iloc[ix_left] 374 | else: 375 | best = ( 376 | df_fpr_tpr.iloc[ix_left] 377 | if abs(df_fpr_tpr.iloc[ix_left].FPR - fpr) 378 | < abs(df_fpr_tpr.iloc[ix_right].FPR - fpr) 379 | else df_fpr_tpr.iloc[ix_right] 380 | ) 381 | best = best.to_dict() 382 | if metric in REVERSE_DISTANCE_TO_SIMILARITY: 383 | best["threshold"] = REVERSE_DISTANCE_TO_SIMILARITY.get(metric)( 384 | best["threshold"], 385 | ) 386 | threshold_at_fpr[metric] = best 387 | return threshold_at_fpr 388 | 389 | def get_binary_prediction(self, metric: str, threshold: float) -> pd.Series: 390 | """Binary classification prediction based on the given metric and threshold. 391 | 392 | Args: 393 | metric: Metric name for the desired prediction. 394 | threshold: Cut off threshold. 395 | 396 | Returns: 397 | pd.Series: Binary predictions. 398 | 399 | """ 400 | return ( 401 | self.df[metric].apply(lambda x: 1 if x < threshold else 0) 402 | if metric in DISTANCE_TO_SIMILARITY 403 | else self.df[metric].apply(lambda x: 1 if x > threshold else 0) 404 | ) 405 | 406 | def evaluate_at_threshold(self, threshold: float, metric: str) -> dict: 407 | """Evaluate performance at specific threshold 408 | Args: 409 | threshold: Cut-off threshold. 410 | metric: Metric to use. 411 | 412 | Returns: 413 | dict: A dict ontaining all evaluation metrics. 414 | 415 | """ 416 | self.metrics_evaluation = {} 417 | self.check_experiment_run(metric) 418 | for metric in self.metrics: 419 | predicted = self.get_binary_prediction(metric, threshold) 420 | cm = confusion_matrix(self.df["target"], predicted) 421 | tn, fp, fn, tp = cm.ravel() 422 | TPR = tp / (tp + fn) # recall / true positive rate 423 | TNR = tn / (tn + fp) # true negative rate 424 | PPV = tp / (tp + fp) # precision / positive predicted value 425 | NPV = tn / (tn + fn) # negative predictive value 426 | FPR = fp / (fp + tn) # false positive rate 427 | FNR = 1 - TPR # false negative rate 428 | FDR = 1 - PPV # false discovery rate 429 | FOR = 1 - NPV # false omission rate 430 | F1 = 2 * (PPV * TPR) / (PPV + TPR) 431 | 432 | evaluation = { 433 | "TPR": TPR, 434 | "TNR": TNR, 435 | "PPV": PPV, 436 | "NPV": NPV, 437 | "FPR": FPR, 438 | "FNR": FNR, 439 | "FDR": FDR, 440 | "FOR": FOR, 441 | "F1": F1, 442 | } 443 | 444 | return evaluation 445 | 446 | def check_experiment_run(self, metric: Optional[str] = None) -> bool: 447 | caller = sys._getframe().f_back.f_code.co_name 448 | if not self.experiment_success: 449 | msg = ( 450 | f"`{caller}` function can only be run after running " 451 | "`run_experiment`." 452 | ) 453 | raise NotImplementedError( 454 | msg, 455 | ) 456 | if metric is not None and metric not in self.metrics: 457 | msg = ( 458 | f"`{caller}` function can only be called with `metric` from " 459 | f"{self.metrics} which were used while running the experiment" 460 | ) 461 | raise ValueError( 462 | msg, 463 | ) 464 | return True 465 | 466 | def roc_auc(self) -> OrderedDict: 467 | """Find ROC AUC for all the metrics used. 468 | 469 | Returns: 470 | OrderedDict: An OrderedDict with AUC for all metrics. 471 | 472 | """ 473 | self.check_experiment_run() 474 | self.roc_auc = {} 475 | for metric in self.metrics: 476 | predicted = self.predicted_as_similarity(metric) 477 | fpr, tpr, thresholds = roc_curve( 478 | self.df["target"], 479 | predicted, 480 | drop_intermediate=False, 481 | ) 482 | self.roc_auc[metric] = auc(fpr, tpr).item() 483 | self.roc_auc = OrderedDict( 484 | sorted(self.roc_auc.items(), key=lambda x: x[1], reverse=True), 485 | ) 486 | return self.roc_auc 487 | 488 | def predicted_as_similarity(self, metric: str) -> pd.Series: 489 | """Convert distance metrics to a similarity measure. 490 | 491 | Args: 492 | metric: distance metric to convert to similarity. If a similarity metric is 493 | passed, It gets returned unchanged. 494 | 495 | Returns: 496 | pd.Series: Converted distance to similarity. 497 | 498 | """ 499 | predicted = self.df[metric] 500 | if metric in DISTANCE_TO_SIMILARITY: 501 | predicted = ( 502 | self.cached_predicted_as_similarity[metric] 503 | if metric in self.cached_predicted_as_similarity 504 | else DISTANCE_TO_SIMILARITY.get(metric)(predicted) 505 | ) 506 | self.cached_predicted_as_similarity[metric] = predicted 507 | return predicted 508 | 509 | def eer(self) -> OrderedDict: 510 | """Calculates the Equal Error Rate (EER) for each metric. 511 | 512 | Returns: 513 | OrderedDict: A dictionary containing the EER value and threshold for each 514 | metric. 515 | The metrics are sorted in ascending order based on the EER values. 516 | Example: {'metric1': {'EER': 0.123, 'threshold': 0.456}, 517 | ...} 518 | 519 | """ 520 | self.check_experiment_run() 521 | self.eer = {} 522 | for metric in self.metrics: 523 | predicted = self.predicted_as_similarity(metric) 524 | actual = self.df["target"] 525 | 526 | fpr, tpr, thresholds = roc_curve( 527 | actual, 528 | predicted, 529 | pos_label=1, 530 | drop_intermediate=False, 531 | ) 532 | fnr = 1 - tpr 533 | eer_threshold = thresholds[np.nanargmin(np.absolute(fnr - fpr))].item() 534 | eer_1 = fpr[np.nanargmin(np.absolute(fnr - fpr))].item() 535 | eer_2 = fnr[np.nanargmin(np.absolute(fnr - fpr))].item() 536 | if metric in REVERSE_DISTANCE_TO_SIMILARITY: 537 | eer_threshold = REVERSE_DISTANCE_TO_SIMILARITY.get(metric)( 538 | eer_threshold, 539 | ) 540 | 541 | self.eer[metric] = {"EER": (eer_1 + eer_2) / 2, "threshold": eer_threshold} 542 | self.eer = OrderedDict( 543 | sorted(self.eer.items(), key=lambda x: x[1]["EER"], reverse=False), 544 | ) 545 | 546 | return self.eer 547 | 548 | def tar_at_far(self, far_values: List[float]) -> OrderedDict: 549 | """Calculates TAR at specified FAR values for each metric. 550 | 551 | Args: 552 | far_values (List[float]): A list of False Accept Rates (FAR) to get TAR 553 | values for. 554 | 555 | Returns: 556 | OrderedDict: A dictionary with keys as metrics and values as dictionaries 557 | of FAR:TAR pairs. 558 | 559 | Raises: 560 | ValueError: If any FAR in far_values is not between 0 and 1. 561 | """ 562 | if isinstance(far_values, (float, int)): 563 | far_values = [float(far_values)] 564 | 565 | if not all(0 <= far <= 1 for far in far_values): 566 | raise ValueError("All FAR values must be between 0 and 1.") 567 | 568 | self.check_experiment_run() 569 | tar_at_far_results = {} 570 | 571 | for metric in self.metrics: 572 | predicted = self.predicted_as_similarity(metric) 573 | fpr, tpr, _ = roc_curve(self.df["target"], predicted, pos_label=1) 574 | 575 | tar_values = {} 576 | for far in far_values: 577 | idx = np.searchsorted(fpr, far, side="right") - 1 578 | idx = max(0, min(idx, len(fpr) - 1)) # Ensure idx is within bounds 579 | tar_values[far] = tpr[idx].item() 580 | 581 | tar_at_far_results[metric] = tar_values 582 | 583 | self.tar_at_far_results = OrderedDict( 584 | sorted(tar_at_far_results.items(), key=lambda x: list(x[1].keys())[0]) 585 | ) 586 | 587 | return self.tar_at_far_results 588 | --------------------------------------------------------------------------------