├── tests
    ├── __init__.py
    ├── data
    │   └── LFW.npz
    ├── test_utils.py
    ├── test_metrics.py
    ├── test_experiment_real_data.py
    └── test_evalify.py
├── docs
    ├── api.md
    ├── authors.md
    ├── history.md
    ├── index.md
    ├── contributing.md
    ├── usage.md
    └── installation.md
├── codecov.yml
├── evalify
    ├── __init__.py
    ├── utils.py
    ├── metrics.py
    └── evalify.py
├── .coveragerc
├── .github
    ├── ISSUE_TEMPLATE.md
    └── workflows
    │   ├── release.yml
    │   ├── dev.yml
    │   └── codeql-analysis.yml
├── CITATION.cff
├── HISTORY.md
├── AUTHORS.md
├── tox.ini
├── examples
    └── LFW.py
├── LICENSE
├── mkdocs.yml
├── .gitignore
├── pyproject.toml
├── CONTRIBUTING.md
└── README.md


/tests/__init__.py:
--------------------------------------------------------------------------------
1 | """Unit test package for evalify."""
2 | 


--------------------------------------------------------------------------------
/docs/api.md:
--------------------------------------------------------------------------------
1 | ::: evalify.evalify
2 |     handler: python
3 | 


--------------------------------------------------------------------------------
/docs/authors.md:
--------------------------------------------------------------------------------
1 | {%
2 |   include-markdown "../AUTHORS.md"
3 | %}


--------------------------------------------------------------------------------
/docs/history.md:
--------------------------------------------------------------------------------
1 | {%
2 |   include-markdown "../HISTORY.md"
3 | %}


--------------------------------------------------------------------------------
/docs/index.md:
--------------------------------------------------------------------------------
1 | {%
2 |     include-markdown "../README.md"
3 | %}
4 | 


--------------------------------------------------------------------------------
/docs/contributing.md:
--------------------------------------------------------------------------------
1 | {%
2 |   include-markdown "../CONTRIBUTING.md"
3 | %}


--------------------------------------------------------------------------------
/tests/data/LFW.npz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ma7555/evalify/HEAD/tests/data/LFW.npz


--------------------------------------------------------------------------------
/codecov.yml:
--------------------------------------------------------------------------------
1 | coverage:
2 |   status:
3 |     project:
4 |       default:
5 |         target: 90%
6 |     patch:
7 |       default:
8 |         target: 85%
9 | 


--------------------------------------------------------------------------------
/evalify/__init__.py:
--------------------------------------------------------------------------------
1 | """Top-level package for evalify."""
2 | 
3 | from evalify.evalify import Experiment as Experiment
4 | 
5 | __author__ = """Mahmoud Bahaa"""
6 | __email__ = "evalify@ma7555.anonaddy.com"
7 | __version__ = "0.1.0"
8 | 


--------------------------------------------------------------------------------
/.coveragerc:
--------------------------------------------------------------------------------
 1 | [run]
 2 | # uncomment the following to omit files during running
 3 | #omit =
 4 | [report]
 5 | exclude_lines =
 6 |     pragma: no cover
 7 |     def __repr__
 8 |     if self.debug:
 9 |     if settings.DEBUG
10 |     raise AssertionError
11 |     raise NotImplementedError
12 |     if 0:
13 |     if __name__ == .__main__.:
14 |     def main
15 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE.md:
--------------------------------------------------------------------------------
 1 | * evalify version:
 2 | * Python version:
 3 | * Operating System:
 4 | 
 5 | ### Description
 6 | 
 7 | Describe what you were trying to get done.
 8 | Tell us what happened, what went wrong, and what you expected to happen.
 9 | 
10 | ### What I Did
11 | 
12 | ```
13 | Paste the command(s) you ran and the output.
14 | If there was a crash, please include the traceback here.
15 | ```
16 | 


--------------------------------------------------------------------------------
/CITATION.cff:
--------------------------------------------------------------------------------
 1 | cff-version: 1.2.0
 2 | title: evalify
 3 | message: " If you use this software, please cite it using the metadata from this file."
 4 | type: software
 5 | authors:
 6 |   - given-names: Mahmoud
 7 |     family-names: Bahaa
 8 |     email: evalify@ma7555.anonaddy.com
 9 |     affiliation: Nile University
10 |     orcid: "https://orcid.org/0000-0001-8688-6495"
11 | doi: 10.5281/zenodo.6181723
12 | date-released: 2022-02-20
13 | 


--------------------------------------------------------------------------------
/HISTORY.md:
--------------------------------------------------------------------------------
 1 | # History
 2 | 
 3 | ## 0.1.0 (2022-02-20)
 4 | 
 5 | * First release on PyPI.
 6 | 
 7 | ## 0.1.1 (2022-02-22)
 8 | 
 9 | * Run time enhancement. 
10 | 
11 | ## 0.1.2 (2022-02-23)
12 | 
13 | * Various enhancements and refactoring.
14 | 
15 | ## 0.1.3 (2022-02-24)
16 | 
17 | * Add pearson similarity as a metric
18 | 
19 | ## 0.1.4 (2022-02-24)
20 | 
21 | * Add EER calculation function.
22 | * Drop support for python 3.7
23 | 
24 | ## 1.0.0 (2024-11-08)
25 | 
26 | * Bump dependencies.
27 | * Drop support for python 3.8
28 | * Add support for TAR @ FAR


--------------------------------------------------------------------------------
/AUTHORS.md:
--------------------------------------------------------------------------------
 1 | # Credits
 2 | 
 3 | ## Development Lead
 4 | 
 5 | * Mahmoud Bahaa <evalify@ma7555.anonaddy.com>
 6 | 
 7 | ## Contributors
 8 | 
 9 | None yet. Why not be the first?
10 | 
11 | ## Others
12 | * This package was created with [Cookiecutter](https://github.com/audreyr/cookiecutter) and the [zillionare/cookiecutter-pypackage](https://github.com/zillionare/cookiecutter-pypackage) project template.
13 | 
14 | * Logo was created using font [GlacialIndifference-Regular](https://hanken.co/product/hk-grotesk/) by [Hanken Design Co.](https://hanken.co/)
15 | * Logo icon designed by Mauro Lucchesi
16 | 


--------------------------------------------------------------------------------
/tox.ini:
--------------------------------------------------------------------------------
 1 | [tox]
 2 | isolated_build = true
 3 | envlist = py39, py310, py311, py312, lint
 4 | 
 5 | [gh-actions]
 6 | python =
 7 |     3.12: py312
 8 |     3.11: py311
 9 |     3.10: py310
10 |     3.9: py39
11 | 
12 | [testenv:lint]
13 | allowlist_externals =
14 |     python
15 | deps =
16 |     .[test, doc, dev]
17 | commands =
18 |     python -m ruff check evalify tests --fix
19 |     python -m poetry build
20 |     python -m mkdocs build
21 |     python -m twine check dist/*
22 | 
23 | [testenv]
24 | allowlist_externals = pytest
25 | setenv =
26 |     PYTHONPATH = {toxinidir}
27 |     PYTHONWARNINGS = ignore
28 | deps =
29 |     .[test]
30 | commands =
31 |     pytest -s --cov=evalify --cov-append --cov-report=xml --cov-report term-missing tests
32 | 


--------------------------------------------------------------------------------
/docs/usage.md:
--------------------------------------------------------------------------------
 1 | # Usage
 2 | 
 3 | To use evalify in a project
 4 | 
 5 | ```python
 6 | import numpy as np
 7 | from evalify import Experiment
 8 | 
 9 | rng = np.random.default_rng()
10 | nphotos = 500
11 | emb_size = 32
12 | nclasses = 10
13 | X = rng.random((self.nphotos, self.emb_size))
14 | y = rng.integers(self.nclasses, size=self.nphotos)
15 | 
16 | experiment = Experiment()
17 | experiment.run(X, y)
18 | experiment.get_roc_auc()
19 | print(experiment.df.roc_auc)
20 | ```
21 | 
22 | For a working experiment using real face embeddings, please refer to `LFW.py` under `./examples`.
23 | 
24 | ```python
25 | python ./examples/LFW.py
26 | ```
27 | ```
28 | Total available embeddings 2921 resulted in 4264660 samples for the experiment.
29 | Metrics calculations executed in 24.05 seconds
30 | ROC AUC:
31 | OrderedDict([('euclidean_distance', 0.9991302819624498), ('cosine_distance', 0.9991302818953706), ('euclidean_distance_l2', 0.9991302818953706), ('manhattan_distance', 0.9991260462584446)])
32 | ```
33 | 


--------------------------------------------------------------------------------
/examples/LFW.py:
--------------------------------------------------------------------------------
 1 | """ File LFW.npz contains sample embeddings and targets from LFW dataset"""
 2 | 
 3 | from pathlib import Path
 4 | import time
 5 | import numpy as np
 6 | 
 7 | from evalify import Experiment
 8 | 
 9 | lfw_npz = Path(__file__).parent.parent / Path("tests/data/LFW.npz")
10 | X_y_array = np.load(lfw_npz)
11 | X = X_y_array["X"][:1000]
12 | y = X_y_array["y"][:1000]
13 | 
14 | experiment = Experiment(
15 |     metrics=(
16 |         "cosine_similarity",
17 |         "pearson_similarity",
18 |         "euclidean_distance_l2",
19 |     ),
20 |     same_class_samples="full",
21 |     different_class_samples=("full", "full"),
22 | )
23 | start_time = time.time()
24 | print("Starting Experiment")
25 | experiment.run(X, y)
26 | print(
27 |     f"Total available embeddings {len(y)} resulted in {len(experiment.df)} "
28 |     "samples for the experiment."
29 | )
30 | print(f"Metrics calculations executed in {time.time()-start_time:.2f} seconds")
31 | print("ROC AUC:")
32 | print(experiment.roc_auc())
33 | print("threshold @ FPR:")
34 | print(experiment.threshold_at_fpr(0.01))
35 | print("EER:")
36 | print(experiment.eer())
37 | print("TAR@FAR:")
38 | print(experiment.tar_at_far([0.01, 0.001]))
39 | 


--------------------------------------------------------------------------------
/evalify/utils.py:
--------------------------------------------------------------------------------
 1 | """Evalify utils module contains various utilites serving other modules."""
 2 | 
 3 | import numpy as np
 4 | import psutil
 5 | 
 6 | GB_TO_BYTE = 1024**3
 7 | 
 8 | 
 9 | def _validate_vectors(X, y):
10 |     X = np.asarray(X, dtype=np.float32)
11 |     y = np.asarray(y, dtype=np.int32).squeeze()
12 |     if X.ndim != 2:
13 |         msg = "Embeddings vector should be 2-D."
14 |         raise ValueError(msg)
15 |     if y.ndim != 1:
16 |         msg = "Target vector should be 1-D."
17 |         raise ValueError(msg)
18 |     return X, y
19 | 
20 | 
21 | def _calc_available_memory():
22 |     """Calculate available memory in system."""
23 |     mem = psutil.virtual_memory()
24 |     return mem[1]
25 | 
26 | 
27 | def calculate_best_batch_size(X, available_mem=None):
28 |     """Calculate maximum rows to fetch per batch without going out of memory.
29 | 
30 |     We need 3 big arrays to be held in memory (A, B, A*B)
31 |     """
32 |     available_mem = _calc_available_memory() if available_mem is None else available_mem
33 |     if available_mem > 2 * GB_TO_BYTE:
34 |         max_total_rows = np.floor(available_mem - GB_TO_BYTE / X[0].nbytes)
35 |         return max_total_rows // 3
36 |     max_total_rows = np.floor(available_mem / X[0].nbytes)
37 |     return max_total_rows // 5
38 | 


--------------------------------------------------------------------------------
/docs/installation.md:
--------------------------------------------------------------------------------
 1 | # Installation
 2 | 
 3 | ## Stable release
 4 | 
 5 | To install evalify, run this command in your
 6 | terminal:
 7 | 
 8 | ```bash
 9 | pip install evalify
10 | ```
11 | 
12 | This is the preferred method to install evalify, as it will always install the most recent stable release.
13 | 
14 | If you don't have [pip][] installed, this [Python installation guide][]
15 | can guide you through the process.
16 | 
17 | ## From source
18 | 
19 | The source for evalify can be downloaded from
20 | the [Github repo][].
21 | 
22 | You can either clone the public repository:
23 | 
24 | ```bash
25 | git clone git://github.com/ma7555/evalify
26 | ```
27 | 
28 | Or download the [tarball][]:
29 | 
30 | ```bash
31 | curl -OJL https://github.com/ma7555/evalify/tarball/master
32 | ```
33 | 
34 | Once you have a copy of the source, you can install it with:
35 | 
36 | ```bash
37 | pip install .
38 | ```
39 | 
40 |   [pip]: https://pip.pypa.io
41 |   [Python installation guide]: http://docs.python-guide.org/en/latest/starting/installation/
42 |   [Github repo]: https://github.com/%7B%7B%20cookiecutter.github_username%20%7D%7D/%7B%7B%20cookiecutter.project_slug%20%7D%7D
43 |   [tarball]: https://github.com/%7B%7B%20cookiecutter.github_username%20%7D%7D/%7B%7B%20cookiecutter.project_slug%20%7D%7D/tarball/master
44 | 
45 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | BSD 3-Clause License
 2 | 
 3 | 
 4 | Copyright (c) 2022, Mahmoud Bahaa
 5 | All rights reserved.
 6 | 
 7 | Redistribution and use in source and binary forms, with or without modification,
 8 | are permitted provided that the following conditions are met:
 9 | 
10 | 1. Redistributions of source code must retain the above copyright notice, this
11 |   list of conditions and the following disclaimer.
12 | 
13 | 2. Redistributions in binary form must reproduce the above copyright notice, this
14 |   list of conditions and the following disclaimer in the documentation and/or
15 |   other materials provided with the distribution.
16 | 
17 | 3. Neither the name of the copyright holder nor the names of its
18 |   contributors may be used to endorse or promote products derived from this
19 |   software without specific prior written permission.
20 | 
21 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
22 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
23 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
24 | IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
25 | INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
26 | BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
28 | OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
29 | OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
30 | OF THE POSSIBILITY OF SUCH DAMAGE.


--------------------------------------------------------------------------------
/tests/test_utils.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | """Tests for `evalify` package."""
 4 | import unittest
 5 | 
 6 | import numpy as np
 7 | 
 8 | from evalify import utils
 9 | 
10 | 
11 | class TestUtils(unittest.TestCase):
12 |     """Tests for `evalify` package."""
13 | 
14 |     def setUp(self):
15 |         """Set up test fixtures, if any."""
16 |         self.rng = np.random.default_rng(555)
17 |         self.nphotos = 100
18 |         self.emb_size = 8
19 |         self.nclasses = 10
20 |         self.embs = self.rng.random((self.nphotos, self.emb_size), dtype=np.float32)
21 |         self.targets = self.rng.integers(self.nclasses, size=self.nphotos)
22 | 
23 |     def tearDown(self):
24 |         """Tear down test fixtures, if any."""
25 | 
26 |     def test_validate_vectors(self):
27 |         """Test _validate_vectors"""
28 |         embs = self.embs.tolist()
29 |         targets = self.targets.tolist()
30 |         X, y = utils._validate_vectors(embs, targets)
31 |         self.assertEqual(X.shape, (self.nphotos, self.emb_size))
32 |         self.assertEqual(y.shape, (self.nphotos,))
33 | 
34 |     def test_calculate_best_batch_size(self):
35 |         """Test calculate_best_batch_size"""
36 |         batch_size = utils.calculate_best_batch_size(self.embs, 4 * utils.GB_TO_BYTE)
37 |         self.assertEqual(batch_size, 1420470954)
38 | 
39 |     def test_run_errors(self):
40 |         """Test run errors"""
41 |         with self.assertRaisesRegex(ValueError, "Embeddings vector should be 2-D."):
42 |             _ = utils._validate_vectors(
43 |                 X=self.rng.random(5), y=self.rng.integers(10, size=5),
44 |             )
45 |         with self.assertRaisesRegex(ValueError, "Target vector should be 1-D."):
46 |             _ = utils._validate_vectors(
47 |                 X=self.rng.random((5, 5)), y=self.rng.integers(10, size=(5, 2)),
48 |             )
49 | 


--------------------------------------------------------------------------------
/mkdocs.yml:
--------------------------------------------------------------------------------
 1 | site_name: evalify
 2 | repo_url: https://github.com/ma7555/evalify
 3 | repo_name: evalify
 4 | nav:
 5 |   - home: index.md
 6 |   - installation: installation.md
 7 |   - usage: usage.md
 8 |   - modules: api.md
 9 |   - contributing: contributing.md
10 |   - authors: authors.md
11 |   - history: history.md
12 | theme:
13 |   name: material
14 |   language: en
15 |   logo: https://user-images.githubusercontent.com/7144929/154332210-fa1fee34-faae-4567-858a-49fa53e99a2b.svg
16 |   palette:
17 |     - media: "(prefers-color-scheme: light)"
18 |       scheme: default
19 |       toggle:
20 |         icon: material/weather-night
21 |         name: Switch to dark mode
22 |     - media: "(prefers-color-scheme: dark)"
23 |       scheme: slate
24 |       toggle:
25 |         icon: material/weather-sunny
26 |         name: Switch to light mode
27 |   features:
28 |     - navigation.indexes
29 |     - navigation.tabs
30 |     - navigation.instant
31 |     - navigation.tabs.sticky
32 | markdown_extensions:
33 |   - pymdownx.emoji:
34 |       emoji_index: !!python/name:material.extensions.emoji.twemoji
35 |       emoji_generator: !!python/name:material.extensions.emoji.to_svg
36 |   - pymdownx.critic
37 |   - pymdownx.caret
38 |   - pymdownx.mark
39 |   - pymdownx.tilde
40 |   - pymdownx.tabbed
41 |   - attr_list
42 |   - pymdownx.arithmatex:
43 |       generic: true
44 |   - pymdownx.highlight:
45 |       linenums: true
46 |   - pymdownx.superfences
47 |   - pymdownx.details
48 |   - admonition
49 |   - toc:
50 |       baselevel: 2
51 |       permalink: true
52 |   - meta
53 | plugins:
54 |   - include-markdown
55 |   - search:
56 |       lang: en
57 |   - mkdocstrings
58 | extra:
59 |   social:
60 |     - icon: fontawesome/brands/github
61 |       link: https://github.com/ma7555/evalify
62 |       name: Github
63 |     - icon: material/email
64 |       link: "mailto:evalify@ma7555.anonaddy.com"
65 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | env/
 12 | build/
 13 | develop-eggs/
 14 | dist/
 15 | downloads/
 16 | eggs/
 17 | .eggs/
 18 | lib/
 19 | lib64/
 20 | parts/
 21 | sdist/
 22 | var/
 23 | wheels/
 24 | *.egg-info/
 25 | .installed.cfg
 26 | *.egg
 27 | 
 28 | # PyInstaller
 29 | #  Usually these files are written by a python script from a template
 30 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 31 | *.manifest
 32 | *.spec
 33 | 
 34 | # Installer logs
 35 | pip-log.txt
 36 | pip-delete-this-directory.txt
 37 | 
 38 | # Unit test / coverage reports
 39 | htmlcov/
 40 | .tox/
 41 | .coverage
 42 | .coverage.*
 43 | .cache
 44 | nosetests.xml
 45 | coverage.xml
 46 | *.cover
 47 | .hypothesis/
 48 | .pytest_cache/
 49 | 
 50 | # Translations
 51 | *.mo
 52 | *.pot
 53 | 
 54 | # Django stuff:
 55 | *.log
 56 | local_settings.py
 57 | 
 58 | # Flask stuff:
 59 | instance/
 60 | .webassets-cache
 61 | 
 62 | # Scrapy stuff:
 63 | .scrapy
 64 | 
 65 | # Sphinx documentation
 66 | docs/_build/
 67 | 
 68 | # PyBuilder
 69 | target/
 70 | 
 71 | # Jupyter Notebook
 72 | .ipynb_checkpoints
 73 | 
 74 | # pyenv
 75 | .python-version
 76 | 
 77 | # celery beat schedule file
 78 | celerybeat-schedule
 79 | 
 80 | # SageMath parsed files
 81 | *.sage.py
 82 | 
 83 | # dotenv
 84 | .env
 85 | 
 86 | # virtualenv
 87 | .venv
 88 | venv/
 89 | ENV/
 90 | 
 91 | # Spyder project settings
 92 | .spyderproject
 93 | .spyproject
 94 | 
 95 | # Rope project settings
 96 | .ropeproject
 97 | 
 98 | # mkdocs documentation
 99 | /site
100 | 
101 | # mypy
102 | .mypy_cache/
103 | 
104 | # IDE settings
105 | .vscode/
106 | 
107 | # mkdocs build dir
108 | site/
109 | 
110 | # logo
111 | logo/
112 | poetry.lock
113 | .ruff_cache/
114 | 


--------------------------------------------------------------------------------
/.github/workflows/release.yml:
--------------------------------------------------------------------------------
 1 | name: release & publish workflow
 2 | 
 3 | on:
 4 |   push:
 5 |     tags:
 6 |       - "v1.*.*"
 7 | 
 8 |   workflow_dispatch:
 9 | 
10 | jobs:
11 |   release:
12 |     name: Create Release
13 |     runs-on: ubuntu-latest
14 | 
15 |     strategy:
16 |       matrix:
17 |         python-versions: [3.12]
18 | 
19 |     steps:
20 |       - name: Checks-out
21 |         uses: actions/checkout@v4
22 |       - name: "Build Changelog"
23 |         id: build_changelog
24 |         uses: mikepenz/release-changelog-builder-action@v5.0.0
25 |         env:
26 |           GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
27 |       - uses: actions/setup-python@v5
28 |         with:
29 |           python-version: ${{ matrix.python-versions }}
30 |       - name: Install dependencies
31 |         run: |
32 |           python -m pip install --upgrade pip
33 |           pip install tox-gh-actions poetry
34 | 
35 |       - name: pre-publish documentation
36 |         run: |
37 |           poetry install -E doc
38 |           poetry run mkdocs build
39 | 
40 |       - name: publish documentation
41 |         uses: peaceiris/actions-gh-pages@v4
42 |         with:
43 |           github_token: ${{ secrets.GITHUB_TOKEN }}
44 |           publish_dir: ./site
45 | 
46 |       - name: Build wheels and source tarball
47 |         run: >-
48 |           poetry build
49 | 
50 |       - name: show temporary files
51 |         run: >-
52 |           ls -l
53 | 
54 |       - name: create github release
55 |         id: create_release
56 |         uses: softprops/action-gh-release@v2.0.9
57 |         env:
58 |           GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
59 |         with:
60 |           body: ${{steps.build_changelog.outputs.changelog}}
61 |           # body_path: ./CHANGELOG.md
62 |           files: dist/*.whl
63 |           draft: false
64 |           prerelease: false
65 | 
66 |       - name: create pypi release
67 |         uses: pypa/gh-action-pypi-publish@v1.12.2
68 |         with:
69 |           user: __token__
70 |           password: ${{ secrets.PYPI_API_TOKEN }}


--------------------------------------------------------------------------------
/.github/workflows/dev.yml:
--------------------------------------------------------------------------------
 1 | name: build
 2 | 
 3 | on:
 4 |   push:
 5 |     branches: [main]
 6 |   pull_request:
 7 |     branches: [main]
 8 | 
 9 |   workflow_dispatch:
10 | 
11 | jobs:
12 |   test:
13 |     strategy:
14 |       matrix:
15 |         python-versions: ["3.9", "3.10", "3.11", "3.12"]
16 |         os: [ubuntu-latest, macos-latest, windows-latest]
17 |     runs-on: ${{ matrix.os }}
18 | 
19 |     steps:
20 |       - uses: actions/checkout@v4
21 |       - uses: actions/setup-python@v5
22 |         with:
23 |           python-version: ${{ matrix.python-versions }}
24 | 
25 |       - name: Install dependencies
26 |         run: |
27 |           python -m pip install --upgrade pip
28 |           pip install poetry tox tox-gh-actions
29 | 
30 |       - name: test with tox
31 |         run: tox
32 | 
33 |       - name: list files
34 |         run: ls -l .
35 | 
36 |   publish_dev_build:
37 |     needs: test
38 |     runs-on: ubuntu-latest
39 |     steps:
40 |       - uses: actions/checkout@v4
41 |       - uses: actions/setup-python@v5
42 |         with:
43 |           python-version: 3.12
44 | 
45 |       - name: Install dependencies
46 |         run: |
47 |           python -m pip install --upgrade pip
48 |           pip install poetry tox tox-gh-actions
49 | 
50 |       - name: test with tox
51 |         run: tox
52 | 
53 |       - name: list files
54 |         run: ls -l .
55 | 
56 |       - uses: codecov/codecov-action@v4
57 |         with:
58 |           fail_ci_if_error: false
59 |           files: coverage.xml
60 |           token: ${{ secrets.CODECOV_TOKEN }}
61 |       - name: Build wheels and source tarball
62 |         run: |
63 |           poetry version $(poetry version --short)-dev.$GITHUB_RUN_NUMBER
64 |           poetry version --short
65 |           poetry build
66 | 
67 |       - name: publish to Test PyPI
68 |         uses: pypa/gh-action-pypi-publish@v1.12.2
69 |         with:
70 |           user: __token__
71 |           password: ${{ secrets.TEST_PYPI_API_TOKEN}}
72 |           repository-url: https://test.pypi.org/legacy/
73 |           skip-existing: true
74 | 


--------------------------------------------------------------------------------
/.github/workflows/codeql-analysis.yml:
--------------------------------------------------------------------------------
 1 | # For most projects, this workflow file will not need changing; you simply need
 2 | # to commit it to your repository.
 3 | #
 4 | # You may wish to alter this file to override the set of languages analyzed,
 5 | # or to provide custom queries or build logic.
 6 | #
 7 | # ******** NOTE ********
 8 | # We have attempted to detect the languages in your repository. Please check
 9 | # the `language` matrix defined below to confirm you have the correct set of
10 | # supported CodeQL languages.
11 | #
12 | name: "CodeQL"
13 | 
14 | on:
15 |   push:
16 |     branches: [ main ]
17 |   pull_request:
18 |     # The branches below must be a subset of the branches above
19 |     branches: [ main ]
20 |   schedule:
21 |     - cron: '41 19 * * 2'
22 | 
23 | jobs:
24 |   analyze:
25 |     name: Analyze
26 |     runs-on: ubuntu-latest
27 |     permissions:
28 |       actions: read
29 |       contents: read
30 |       security-events: write
31 | 
32 |     strategy:
33 |       fail-fast: false
34 |       matrix:
35 |         language: [ 'python' ]
36 |         # CodeQL supports [ 'cpp', 'csharp', 'go', 'java', 'javascript', 'python', 'ruby' ]
37 |         # Learn more about CodeQL language support at https://git.io/codeql-language-support
38 | 
39 |     steps:
40 |     - name: Checkout repository
41 |       uses: actions/checkout@v2
42 | 
43 |     # Initializes the CodeQL tools for scanning.
44 |     - name: Initialize CodeQL
45 |       uses: github/codeql-action/init@v1
46 |       with:
47 |         languages: ${{ matrix.language }}
48 |         # If you wish to specify custom queries, you can do so here or in a config file.
49 |         # By default, queries listed here will override any specified in a config file.
50 |         # Prefix the list here with "+" to use these queries and those in the config file.
51 |         # queries: ./path/to/local/query, your-org/your-repo/queries@main
52 | 
53 |     # Autobuild attempts to build any compiled languages  (C/C++, C#, or Java).
54 |     # If this step fails, then you should remove it and run the build manually (see below)
55 |     - name: Autobuild
56 |       uses: github/codeql-action/autobuild@v1
57 | 
58 |     # ℹ️ Command-line programs to run using the OS shell.
59 |     # 📚 https://git.io/JvXDl
60 | 
61 |     # ✏️ If the Autobuild fails above, remove it and uncomment the following three lines
62 |     #    and modify them (or add more) to build your code if your project
63 |     #    uses a compiled language
64 | 
65 |     #- run: |
66 |     #   make bootstrap
67 |     #   make release
68 | 
69 |     - name: Perform CodeQL Analysis
70 |       uses: github/codeql-action/analyze@v1
71 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [tool.poetry]
 2 | name = "evalify"
 3 | version = "1.0.0"
 4 | homepage = "https://github.com/ma7555/evalify"
 5 | description = "Evaluate your face or voice verification models literally in seconds."
 6 | authors = ["Mahmoud Bahaa <evalify@ma7555.anonaddy.com>"]
 7 | keywords = ["biometric verification", "biometric authentication", "evaluation"]
 8 | readme = "README.md"
 9 | license = "BSD-3-Clause"
10 | classifiers = [
11 |     "Development Status :: 4 - Beta",
12 |     "Intended Audience :: Developers",
13 |     "License :: OSI Approved :: BSD License",
14 |     "Natural Language :: English",
15 |     "Programming Language :: Python :: 3",
16 |     "Programming Language :: Python :: 3.9",
17 |     "Programming Language :: Python :: 3.10",
18 |     "Programming Language :: Python :: 3.11",
19 |     "Programming Language :: Python :: 3.12",
20 | ]
21 | 
22 | packages = [
23 |     { include = "evalify" },
24 | ]
25 | 
26 | [tool.poetry.dependencies]
27 | python = ">=3.9,<4.0"
28 | pandas = "^2.0.0"
29 | numpy = "^2.0.0"
30 | psutil = "^5.9.0"
31 | scikit-learn = "^1.2.0"
32 | 
33 | # Optional Dependencies
34 | ruff = { version = ">=0.7.2", optional = true }
35 | pytest = { version = "^7.2.0", optional = true }
36 | pytest-cov = { version = "^4.0.0", optional = true }
37 | scipy = { version = ">=1.10.0", optional = true }
38 | tox = { version = "^4.7.0", optional = true }
39 | virtualenv = { version = ">=20.24.0", optional = true }
40 | pip = { version = ">=23.2.0", optional = true }
41 | mkdocs = { version = ">=1.4.0", optional = true }
42 | mkdocs-material = { version = "^9.2.0", optional = true }
43 | mkdocstrings = { version = ">=0.26.0", optional = true }
44 | mkdocstrings-python = { version = ">=1.12.2", optional = true }
45 | mkdocs-include-markdown-plugin = { version = ">=6.0.0", optional = true }
46 | twine = { version = "^5.0.0", optional = true }
47 | toml = { version = ">0.8.0", optional = true }
48 | pyreadline3 = { version = "^3.4.1", optional = true }
49 | poetry = { version = "^1.8.0", optional = true }
50 | 
51 | [tool.poetry.extras]
52 | test = [
53 |     "pytest",
54 |     "ruff",
55 |     "pytest-cov",
56 |     "pyreadline3",
57 |     "scipy",
58 | ]
59 | 
60 | dev = [
61 |     "tox",
62 |     "virtualenv",
63 |     "pip",
64 |     "twine",
65 |     "toml",
66 |     "poetry",
67 | ]
68 | 
69 | doc = [
70 |     "mkdocs",
71 |     "mkdocs-material",
72 |     "mkdocstrings",
73 |     "mkdocstrings-python",
74 |     "mkdocs-include-markdown-plugin",
75 | ]
76 | 
77 | [build-system]
78 | requires = ["poetry-core>=1.8.0"]
79 | build-backend = "poetry.core.masonry.api"
80 | 
81 | [tool.ruff]
82 | line-length = 88
83 | indent-width = 4
84 | 
85 | [tool.ruff.lint]
86 | select = [
87 |     "E",  # pycodestyle error
88 |     "F",  # Pyflakes
89 |     "I",  # isort
90 | ]
91 | dummy-variable-rgx = "^(_+|(_+[a-zA-Z0-9_]*[a-zA-Z0-9]+?))$"
92 | 
93 | [tool.ruff.format]
94 | quote-style = "double"
95 | 
96 | [tool.ruff.lint.isort]
97 | known-first-party = ["evalify"]
98 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
  1 | # Contributing
  2 | 
  3 | Contributions are welcomed, and they are greatly appreciated! Every little bit
  4 | helps, and credit will always be given.
  5 | 
  6 | You can contribute in many ways:
  7 | 
  8 | ## Types of Contributions
  9 | 
 10 | ### Report Bugs
 11 | 
 12 | Report bugs at https://github.com/ma7555/evalify/issues.
 13 | 
 14 | If you are reporting a bug, please include:
 15 | 
 16 | * Your operating system name and version.
 17 | * Any details about your local setup that might be helpful in troubleshooting.
 18 | * Detailed steps to reproduce the bug.
 19 | 
 20 | ### Fix Bugs
 21 | 
 22 | Look through the GitHub issues for bugs. Anything tagged with "bug" and "help
 23 | wanted" is open to whoever wants to implement it.
 24 | 
 25 | ### Implement Features
 26 | 
 27 | Look through the GitHub issues for features. Anything tagged with "enhancement"
 28 | and "help wanted" is open to whoever wants to implement it.
 29 | 
 30 | ### Write Documentation
 31 | 
 32 | evalify could always use more documentation, whether as part of the
 33 | official evalify docs, in docstrings, or even on the web in blog posts,
 34 | articles, and such.
 35 | 
 36 | ### Submit Feedback
 37 | 
 38 | The best way to send feedback is to file an issue at https://github.com/ma7555/evalify/issues.
 39 | 
 40 | If you are proposing a feature:
 41 | 
 42 | * Explain in detail how it would work.
 43 | * Keep the scope as narrow as possible, to make it easier to implement.
 44 | * Remember that this is a volunteer-driven project, and that contributions
 45 |   are welcome :)
 46 | 
 47 | ## Get Started!
 48 | 
 49 | Ready to contribute? Here's how to set up `evalify` for local development.
 50 | 
 51 | 1. Fork the `evalify` repo on GitHub.
 52 | 2. Clone your fork locally
 53 | 
 54 | ```bash
 55 | git clone git@github.com:your_name_here/evalify.git
 56 | ```
 57 | 
 58 | 3. Ensure [poetry](https://python-poetry.org/docs/) is installed.
 59 | 4. Install dependencies and start your virtualenv:
 60 | 
 61 | ```bash
 62 | poetry install -E test -E doc -E dev
 63 | ```
 64 | 
 65 | 5. Create a branch for local development:
 66 | 
 67 | ```bash
 68 | git checkout -b name-of-your-bugfix-or-feature
 69 | ```
 70 | 
 71 |    Now you can make your changes locally.
 72 | 
 73 | 6. When you're done making changes, check that your changes pass the
 74 |    tests, including testing other Python versions, with tox:
 75 | 
 76 | ```bash
 77 | tox
 78 | ```
 79 | 
 80 | 7. Commit your changes and push your branch to GitHub:
 81 | 
 82 | ```bash
 83 | git add .
 84 | git commit -m "Your detailed description of your changes."
 85 | git push origin name-of-your-bugfix-or-feature
 86 | ```
 87 | 
 88 | 8. Submit a pull request through the GitHub website.
 89 | 
 90 | ## Pull Request Guidelines
 91 | 
 92 | Before you submit a pull request, check that it meets these guidelines:
 93 | 
 94 | 1. The pull request should include tests.
 95 | 2. If the pull request adds functionality, the docs should be updated. Put
 96 |    your new functionality into a function with a docstring, and add the
 97 |    feature to the list in README.md.
 98 | 3. The pull request should work for Python 3.9, 3.10, 3.11, 3.12 and for PyPy. Check
 99 |    https://github.com/ma7555/evalify/actions
100 |    and make sure that the tests pass for all supported Python versions.
101 | 
102 | ## 
103 | ```bash
104 | python -m unittest
105 | ```
106 | or
107 | ```bash
108 | pytest
109 | ```
110 | To run a subset of tests.
111 | 
112 | 
113 | ## Deploying
114 | 
115 | A reminder for the maintainers on how to deploy.
116 | Make sure all your changes are committed (including an entry in HISTORY.md).
117 | Then run:
118 | 
119 | ```bash
120 | git push
121 | git push --tags
122 | ```
123 | 
124 | Github Actions will then deploy to PyPI if tests pass.
125 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # evalify
  2 | 
  3 | <p align="center">
  4 | 
  5 | <img src="https://user-images.githubusercontent.com/7144929/154332210-fa1fee34-faae-4567-858a-49fa53e99a2b.svg" width="292" height="120" alt="Logo"/>
  6 | 
  7 | </p>
  8 | 
  9 | <p align="center">
 10 | 
 11 | <a href="https://github.com/ma7555/evalify/blob/main/LICENSE">
 12 |     <img src="https://img.shields.io/github/license/ma7555/evalify"
 13 |         alt = "License">
 14 | </a>
 15 | <a href="https://doi.org/10.5281/zenodo.6181723"><img src="https://zenodo.org/badge/DOI/10.5281/zenodo.6181723.svg" alt="DOI"></a>
 16 | <a href="https://www.python.org/downloads/">
 17 |     <img src="https://img.shields.io/badge/python-3.9 | 3.10 | 3.11 | 3.12-blue.svg"
 18 |         alt = "Python 3.7 | 3.8 | 3.9 | 3">
 19 | </a>
 20 | <a href="https://pypi.python.org/pypi/evalify">
 21 |     <img src="https://img.shields.io/pypi/v/evalify.svg"
 22 |         alt = "Release Status">
 23 | </a>
 24 | <a href="https://github.com/ma7555/evalify/actions">
 25 |     <img src="https://github.com/ma7555/evalify/actions/workflows/dev.yml/badge.svg?branch=main" alt="CI Status">
 26 | </a>
 27 | <a href="https://ma7555.github.io/evalify/">
 28 |     <img src="https://img.shields.io/website/https/ma7555.github.io/evalify/index.html.svg?label=docs&down_message=unavailable&up_message=available" alt="Documentation Status">
 29 | </a>
 30 | <a href="https://github.com/astral-sh/ruff">
 31 |     <img src="https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/astral-sh/ruff/main/assets/badge/v2.json" alt="Code style: Ruff">
 32 | </a>
 33 | 
 34 | <a href="https://codecov.io/gh/ma7555/evalify">
 35 |   <img src="https://codecov.io/gh/ma7555/evalify/branch/main/graph/badge.svg" />
 36 | </a>
 37 | <a href="https://pypi.org/project/evalify/"><img alt="PyPI Downloads/Month" src="https://img.shields.io/pypi/dm/evalify">
 38 | </a>
 39 | 
 40 | </p>
 41 | 
 42 | **Evaluate Biometric Authentication Models Literally in Seconds.**
 43 | 
 44 | ## Installation
 45 | #### Stable release:
 46 | ```bash
 47 | pip install evalify
 48 | ```
 49 | #### Bleeding edge:
 50 | ```bash
 51 | pip install git+https://github.com/ma7555/evalify.git
 52 | ```
 53 | ## Used for
 54 | Evaluating all biometric authentication models, where the model output is a high-level embeddings known as feature vectors for visual or behaviour biometrics or d-vectors for auditory biometrics.
 55 | 
 56 | ## Usage
 57 | 
 58 | ```python
 59 | import numpy as np
 60 | from evalify import Experiment
 61 | 
 62 | rng = np.random.default_rng()
 63 | nphotos = 500
 64 | emb_size = 32
 65 | nclasses = 10
 66 | X = rng.random((self.nphotos, self.emb_size))
 67 | y = rng.integers(self.nclasses, size=self.nphotos)
 68 | 
 69 | experiment = Experiment()
 70 | experiment.run(X, y)
 71 | experiment.get_roc_auc()
 72 | print(experiment.roc_auc)
 73 | print(experiment.find_threshold_at_fpr(0.01))
 74 | ```
 75 | ## How it works
 76 | * When you run an experiment, evalify tries all the possible combinations between individuals for authentication based on the `X` and `y` parameters and returns the results including FPR, TPR, FNR, TNR and ROC AUC. `X` is an array of embeddings and `y` is an array of corresponding targets.
 77 | * Evalify can find the optimal threshold based on your agreed FPR and desired similarity or distance metric.
 78 | 
 79 | ## Documentation: 
 80 | * <https://ma7555.github.io/evalify/>
 81 | 
 82 | 
 83 | ## Features
 84 | 
 85 | * Blazing fast implementation for metrics calculation through optimized einstein sum and vectorized calculations.
 86 | * Many operations are dispatched to canonical BLAS, cuBLAS, or other specialized routines.
 87 | * Smart sampling options using direct indexing from pre-calculated arrays with total control over sampling strategy and sampling numbers.
 88 | * Supports most evaluation metrics:
 89 |     - `cosine_similarity`
 90 |     - `pearson_similarity`
 91 |     - `cosine_distance`
 92 |     - `euclidean_distance`
 93 |     - `euclidean_distance_l2`
 94 |     - `minkowski_distance`
 95 |     - `manhattan_distance`
 96 |     - `chebyshev_distance`
 97 | * Computation time for 4 metrics 4.2 million samples experiment is **24 seconds vs 51 minutes** if looping using `scipy.spatial.distance` implemntations.
 98 | 
 99 | ## TODO
100 | * Safer memory allocation. I did not have issues but if you ran out of memory please manually set the `batch_size` argument.
101 | 
102 | ## Contribution
103 | * Contributions are welcomed, and they are greatly appreciated! Every little bit helps, and credit will always be given.
104 | * Please check [CONTRIBUTING.md](https://github.com/ma7555/evalify/blob/main/CONTRIBUTING.md) for guidelines.
105 | 
106 | ## Citation
107 | * If you use this software, please cite it using the metadata from [CITATION.cff](https://github.com/ma7555/evalify/blob/main/CITATION.cff)
108 | 
109 | 


--------------------------------------------------------------------------------
/tests/test_metrics.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | """Tests for `evalify` package."""
  4 | import unittest
  5 | 
  6 | import numpy as np
  7 | from scipy.spatial import distance
  8 | from scipy.stats import pearsonr
  9 | 
 10 | from evalify import metrics
 11 | 
 12 | 
 13 | class TestMetrics(unittest.TestCase):
 14 |     """Tests for `evalify` package."""
 15 | 
 16 |     def setUp(self):
 17 |         """Set up test fixtures, if any."""
 18 |         rng = np.random.default_rng(555)
 19 |         self.nphotos = 500
 20 |         self.emb_size = 8
 21 |         self.slice_size = 100
 22 |         self.embs = rng.random((self.nphotos, self.emb_size), dtype=np.float32)
 23 |         self.norms = np.linalg.norm(self.embs, axis=1)
 24 |         self.ix = rng.integers(self.nphotos, size=self.slice_size)
 25 |         self.iy = rng.integers(self.nphotos, size=self.slice_size)
 26 | 
 27 |     def test_cosine_similarity(self):
 28 |         """Test cosine_similarity"""
 29 |         result = metrics.cosine_similarity(self.embs, self.ix, self.iy, self.norms)
 30 |         result_2 = 1 - np.array(
 31 |             [
 32 |                 distance.cosine(self.embs[ix], self.embs[iy])
 33 |                 for (ix, iy) in zip(self.ix, self.iy)
 34 |             ],
 35 |         )
 36 |         self.assertEqual(result.shape, (self.slice_size,))
 37 |         self.assertTrue(np.allclose(result, result_2))
 38 | 
 39 |     def test_pearson_similarity(self):
 40 |         """Test pearson_similarity"""
 41 |         result = metrics.pearson_similarity(self.embs, self.ix, self.iy)
 42 |         result_2 = np.array(
 43 |             [
 44 |                 pearsonr(self.embs[ix], self.embs[iy])[0]
 45 |                 for (ix, iy) in zip(self.ix, self.iy)
 46 |             ],
 47 |         )
 48 |         self.assertEqual(result.shape, (self.slice_size,))
 49 |         self.assertTrue(np.allclose(result, result_2))
 50 | 
 51 |     def test_euclidean_distance(self):
 52 |         """Test euclidean_distance"""
 53 |         result = metrics.metrics_caller.get("euclidean_distance")(
 54 |             self.embs,
 55 |             self.ix,
 56 |             self.iy,
 57 |         )
 58 |         result_2 = np.array(
 59 |             [
 60 |                 distance.euclidean(self.embs[ix], self.embs[iy])
 61 |                 for (ix, iy) in zip(self.ix, self.iy)
 62 |             ],
 63 |         )
 64 |         self.assertEqual(result.shape, (self.slice_size,))
 65 |         self.assertTrue(np.allclose(result, result_2))
 66 | 
 67 |     def test_euclidean_distance_l2(self):
 68 |         """Test euclidean_distance"""
 69 |         result = metrics.metrics_caller.get("euclidean_distance_l2")(
 70 |             self.embs,
 71 |             self.ix,
 72 |             self.iy,
 73 |             self.norms,
 74 |         )
 75 |         result_2 = np.array(
 76 |             [
 77 |                 distance.euclidean(
 78 |                     self.embs[ix] / np.sqrt(np.sum(self.embs[ix] ** 2)),
 79 |                     self.embs[iy] / np.sqrt(np.sum(self.embs[iy] ** 2)),
 80 |                 )
 81 |                 for (ix, iy) in zip(self.ix, self.iy)
 82 |             ],
 83 |         )
 84 | 
 85 |         self.assertEqual(result.shape, (len(self.ix),))
 86 |         self.assertTrue(np.allclose(result, result_2))
 87 | 
 88 |     def test_minkowski_distance_distance(self):
 89 |         """Test euclidean_distance"""
 90 |         result = metrics.metrics_caller.get("minkowski_distance")(
 91 |             self.embs,
 92 |             self.ix,
 93 |             self.iy,
 94 |             p=3,
 95 |         )
 96 |         result_2 = np.array(
 97 |             [
 98 |                 distance.minkowski(self.embs[ix], self.embs[iy], p=3)
 99 |                 for (ix, iy) in zip(self.ix, self.iy)
100 |             ],
101 |         )
102 |         self.assertEqual(result.shape, (self.slice_size,))
103 |         self.assertTrue(np.allclose(result, result_2))
104 | 
105 |     def test_manhattan_distance_distance(self):
106 |         """Test euclidean_distance"""
107 |         result = metrics.metrics_caller.get("manhattan_distance")(
108 |             self.embs,
109 |             self.ix,
110 |             self.iy,
111 |         )
112 |         result_2 = np.array(
113 |             [
114 |                 distance.cityblock(self.embs[ix], self.embs[iy])
115 |                 for (ix, iy) in zip(self.ix, self.iy)
116 |             ],
117 |         )
118 |         self.assertEqual(result.shape, (self.slice_size,))
119 |         self.assertTrue(np.allclose(result, result_2))
120 | 
121 |     def test_chebyshev_distance_distance(self):
122 |         """Test euclidean_distance"""
123 |         result = metrics.metrics_caller.get("chebyshev_distance")(
124 |             self.embs,
125 |             self.ix,
126 |             self.iy,
127 |         )
128 |         result_2 = np.array(
129 |             [
130 |                 distance.chebyshev(self.embs[ix], self.embs[iy])
131 |                 for (ix, iy) in zip(self.ix, self.iy)
132 |             ],
133 |         )
134 |         self.assertEqual(result.shape, (self.slice_size,))
135 |         self.assertTrue(np.allclose(result, result_2))
136 | 


--------------------------------------------------------------------------------
/evalify/metrics.py:
--------------------------------------------------------------------------------
  1 | """Evalify metrics module used for calculating the evaluation metrics.
  2 | 
  3 | Optimized calculations using einstein sum. Embeddings array and norm arrays are indexed
  4 | with every
  5 | split and calculations happens over large data chunks very quickly.
  6 | """
  7 | 
  8 | import numpy as np
  9 | 
 10 | 
 11 | def _inner1d(A, B):
 12 |     """Calculate the inner product between two arrays of vectors.
 13 | 
 14 |     Args:
 15 |         A (numpy.ndarray): 2D array of shape (n_samples, n_features)
 16 |         B (numpy.ndarray): 2D array of shape (n_samples, n_features)
 17 | 
 18 |     Returns:
 19 |         numpy.ndarray: 1D array of shape (n_samples,) where each element is the inner
 20 |         product of the corresponding rows in A and B
 21 | 
 22 |     """
 23 |     return np.einsum("ij,ij->i", A, B, optimize="optimal")
 24 | 
 25 | 
 26 | def cosine_similarity(embs, ix, iy, norms, return_distance=False, **kwargs):
 27 |     """Calculate the cosine similarity between two arrays of vectors.
 28 | 
 29 |     Args:
 30 |         embs (numpy.ndarray): 2D array of shape (n_samples, n_features)
 31 |         ix (numpy.ndarray): 1D array of shape (n_samples,) containing the indices of
 32 |         the first array
 33 |         iy (numpy.ndarray): 1D array of shape (n_samples,) containing the indices of
 34 |         the second array
 35 |         norms (numpy.ndarray): 1D array of shape (n_samples,) containing the L2 norm
 36 |         of each row in X
 37 |         return_distance (bool): Whether to return the cosine distance instead of the
 38 |         cosine similarity. Defaults to False.
 39 | 
 40 |     Returns:
 41 |         numpy.ndarray: 1D array of shape (n_samples,) where each element is the cosine
 42 |         similarity (or cosine distance) of the corresponding rows in X.
 43 | 
 44 |     """
 45 |     similarity = _inner1d(embs[ix], embs[iy]) / (norms[ix] * norms[iy])
 46 |     return 1 - similarity if return_distance else similarity
 47 | 
 48 | 
 49 | def euclidean_distance_l2(embs, ix, iy, norms, **kwargs):
 50 |     """Calculate the L2-normalized Euclidean distance between two arrays of vectors.
 51 | 
 52 |     Args:
 53 |         embs (numpy.ndarray): 2D array of shape (n_samples, n_features).
 54 |         ix (numpy.ndarray): 1D array of shape (n_samples,) containing the indices of
 55 |         the first array.
 56 |         iy (numpy.ndarray): 1D array of shape (n_samples,) containing the indices of
 57 |         the second array.
 58 |         norms (numpy.ndarray): 1D array of shape (n_samples,) containing the L2 norm
 59 |         of each row in embs.
 60 | 
 61 |     Returns:
 62 |         numpy.ndarray: 1D array of shape (n_samples,) where each element is the
 63 |         L2-normalized Euclidean distance of the corresponding rows in embs.
 64 | 
 65 |     """
 66 |     X = embs[ix] / norms[ix].reshape(-1, 1) - embs[iy] / norms[iy].reshape(-1, 1)
 67 |     return np.linalg.norm(X, axis=1)
 68 | 
 69 | 
 70 | def minkowski_distance(embs, ix, iy, p, **kwargs):
 71 |     """Calculate the element-wise Minkowski or Manhattan or Chebyshev distance.
 72 | 
 73 |     Args:
 74 |         embs (numpy.ndarray): 2D array of shape (n_samples, n_features)
 75 |         ix (numpy.ndarray): 1D array of shape (n_samples,) containing the indices of
 76 |         the first array
 77 |         iy (numpy.ndarray): 1D array of shape (n_samples,) containing the indices of
 78 |         the second array
 79 |         p (int): The order of the norm of the difference.
 80 | 
 81 |     Returns:
 82 |         numpy.ndarray: 1D array of shape (n_samples,) where each element is the
 83 |         Minkowski distance of the corresponding rows in embs.
 84 | 
 85 |     """
 86 |     return np.linalg.norm(embs[ix] - embs[iy], ord=p, axis=1)
 87 | 
 88 | 
 89 | def pearson_similarity(embs, ix, iy, **kwargs):
 90 |     """Calculate the Pearson correlation coefficient between two arrays of vectors.
 91 | 
 92 |     Args:
 93 |         embs (numpy.ndarray): 2D array of shape (n_samples, n_features)
 94 |         ix (numpy.ndarray): 1D array of shape (n_samples,) containing the indices of
 95 |         the first array
 96 |         iy (numpy.ndarray): 1D array of shape (n_samples,) containing the indices of
 97 |         the second array
 98 | 
 99 |     Returns:
100 |         numpy.ndarray: 1D array of shape (n_samples,) where each element is the Pearson
101 |         correlation coefficient
102 |         of the corresponding rows in embs.
103 | 
104 |     """
105 |     A = embs[ix]
106 |     B = embs[iy]
107 |     A_mA = A - np.expand_dims(A.mean(axis=1), -1)
108 |     B_mB = B - np.expand_dims(B.mean(axis=1), -1)
109 |     ssA = np.expand_dims((A_mA**2).sum(axis=1), -1)
110 |     ssB = np.expand_dims((B_mB**2).sum(axis=1), -1)
111 |     return _inner1d(A_mA, B_mB) / np.sqrt(_inner1d(ssA, ssB))
112 | 
113 | 
114 | metrics_caller = {
115 |     "cosine_similarity": cosine_similarity,
116 |     "pearson_similarity": pearson_similarity,
117 |     "cosine_distance": lambda embs, ix, iy, norms, **kwargs: cosine_similarity(
118 |         embs,
119 |         ix,
120 |         iy,
121 |         norms,
122 |         return_distance=True,
123 |     ),
124 |     "euclidean_distance": lambda embs, ix, iy, **kwargs: minkowski_distance(
125 |         embs,
126 |         ix,
127 |         iy,
128 |         p=2,
129 |     ),
130 |     "euclidean_distance_l2": euclidean_distance_l2,
131 |     "minkowski_distance": minkowski_distance,
132 |     "manhattan_distance": lambda embs, ix, iy, **kwargs: minkowski_distance(
133 |         embs,
134 |         ix,
135 |         iy,
136 |         p=1,
137 |     ),
138 |     "chebyshev_distance": lambda embs, ix, iy, **kwargs: minkowski_distance(
139 |         embs,
140 |         ix,
141 |         iy,
142 |         p=np.inf,
143 |     ),
144 | }
145 | 
146 | METRICS_NEED_NORM = ["cosine_similarity", "cosine_distance", "euclidean_distance_l2"]
147 | METRICS_NEED_ORDER = ["minkowski_distance"]
148 | DISTANCE_TO_SIMILARITY = {
149 |     "cosine_distance": lambda x: 1 - x,
150 |     "euclidean_distance": lambda x: 1 / (1 + x),
151 |     "euclidean_distance_l2": lambda x: 1 - x,
152 |     "minkowski_distance": lambda x: 1 / (1 + x),
153 |     "manhattan_distance": lambda x: 1 / (1 + x),
154 |     "chebyshev_distance": lambda x: 1 / (1 + x),
155 | }
156 | 
157 | REVERSE_DISTANCE_TO_SIMILARITY = {
158 |     "cosine_distance": lambda x: 1 - x,
159 |     "euclidean_distance": lambda x: (1 / x) - 1,
160 |     "euclidean_distance_l2": lambda x: 1 - x,
161 |     "minkowski_distance": lambda x: (1 / x) - 1,
162 |     "manhattan_distance": lambda x: (1 / x) - 1,
163 |     "chebyshev_distance": lambda x: (1 / x) - 1,
164 | }
165 | 


--------------------------------------------------------------------------------
/tests/test_experiment_real_data.py:
--------------------------------------------------------------------------------
  1 | # tests/test_experiment_real_data_small.py
  2 | 
  3 | import os
  4 | import pathlib
  5 | import unittest
  6 | from collections import OrderedDict
  7 | 
  8 | import numpy as np
  9 | 
 10 | from evalify import Experiment
 11 | 
 12 | 
 13 | class TestExperimentRealDataSmall(unittest.TestCase):
 14 |     """Tests for Experiment class using a subset of the LFW dataset"""
 15 | 
 16 |     def setUp(self):
 17 |         """Set up test fixtures."""
 18 |         # Path to LFW.npz, assuming it's in the tests/data/ directory
 19 |         self.lfw_npz = os.path.join(pathlib.Path(__file__).parent, "data", "LFW.npz")
 20 |         if not os.path.exists(self.lfw_npz):
 21 |             self.fail(f"LFW.npz not found at {self.lfw_npz}")
 22 | 
 23 |         X_y_array = np.load(self.lfw_npz)
 24 |         self.X = X_y_array["X"][:1000]
 25 |         self.y = X_y_array["y"][:1000]
 26 | 
 27 |         self.metrics = [
 28 |             "cosine_similarity",
 29 |             "pearson_similarity",
 30 |             "euclidean_distance_l2",
 31 |         ]
 32 | 
 33 |         self.experiment = Experiment(
 34 |             metrics=self.metrics,
 35 |             same_class_samples="full",
 36 |             different_class_samples=("full", "full"),
 37 |             seed=555,  # To ensure reproducibility
 38 |         )
 39 | 
 40 |         # Run the experiment once during setup to reuse the results in multiple tests
 41 |         self.df = self.experiment.run(self.X, self.y)
 42 | 
 43 |     def test_number_of_samples(self):
 44 |         """Test that the number of generated samples matches the expected count."""
 45 |         expected_num_samples = 499500
 46 |         actual_num_samples = len(self.df)
 47 |         self.assertEqual(
 48 |             actual_num_samples,
 49 |             expected_num_samples,
 50 |             f"Expected {expected_num_samples} samples, got {actual_num_samples}.",
 51 |         )
 52 | 
 53 |     def test_roc_auc(self):
 54 |         """Test that ROC AUC values match the expected results."""
 55 |         expected_roc_auc = OrderedDict(
 56 |             {
 57 |                 "euclidean_distance_l2": 0.9998640116393942,
 58 |                 "cosine_similarity": 0.9998640114481793,
 59 |                 "pearson_similarity": 0.999858162377461,
 60 |             }
 61 |         )
 62 | 
 63 |         actual_roc_auc = self.experiment.roc_auc()
 64 | 
 65 |         self.assertEqual(
 66 |             len(actual_roc_auc),
 67 |             len(self.metrics),
 68 |             f"Expected ROC AUC for {len(self.metrics)} metrics, got "
 69 |             f"{len(actual_roc_auc)}.",
 70 |         )
 71 | 
 72 |         for metric in self.metrics:
 73 |             self.assertIn(
 74 |                 metric, actual_roc_auc, f"ROC AUC for metric '{metric}' not found."
 75 |             )
 76 |             self.assertAlmostEqual(
 77 |                 actual_roc_auc[metric],
 78 |                 expected_roc_auc[metric],
 79 |                 places=6,
 80 |                 msg=f"ROC AUC for metric '{metric}' does not match.",
 81 |             )
 82 | 
 83 |     def test_threshold_at_fpr(self):
 84 |         """Test that thresholds at a specified FPR match expected values."""
 85 |         far = 0.01
 86 |         expected_threshold_at_fpr = {
 87 |             "cosine_similarity": {
 88 |                 "FPR": 0.010001841326240518,
 89 |                 "TPR": 0.9973539973539973,
 90 |                 "threshold": 0.37717896699905396,
 91 |             },
 92 |             "pearson_similarity": {
 93 |                 "FPR": 0.010001841326240518,
 94 |                 "TPR": 0.9973539973539973,
 95 |                 "threshold": 0.37802454829216003,
 96 |             },
 97 |             "euclidean_distance_l2": {
 98 |                 "FPR": 0.010001841326240518,
 99 |                 "TPR": 0.9973539973539973,
100 |                 "threshold": 1.1160835027694702,
101 |             },
102 |         }
103 | 
104 |         actual_threshold_at_fpr = self.experiment.threshold_at_fpr(far)
105 | 
106 |         self.assertEqual(
107 |             len(actual_threshold_at_fpr),
108 |             len(self.metrics),
109 |             f"Expected Threshold @ FPR for {len(self.metrics)} metrics, got "
110 |             f"{len(actual_threshold_at_fpr)}.",
111 |         )
112 | 
113 |         for metric in self.metrics:
114 |             self.assertIn(
115 |                 metric,
116 |                 actual_threshold_at_fpr,
117 |                 f"Threshold @ FPR for metric '{metric}' not found.",
118 |             )
119 |             expected = expected_threshold_at_fpr[metric]
120 |             actual = actual_threshold_at_fpr[metric]
121 | 
122 |             self.assertAlmostEqual(
123 |                 actual["FPR"],
124 |                 expected["FPR"],
125 |                 places=6,
126 |                 msg=f"FPR for metric '{metric}' does not match.",
127 |             )
128 |             self.assertAlmostEqual(
129 |                 actual["TPR"],
130 |                 expected["TPR"],
131 |                 places=6,
132 |                 msg=f"TPR for metric '{metric}' does not match.",
133 |             )
134 |             self.assertAlmostEqual(
135 |                 actual["threshold"],
136 |                 expected["threshold"],
137 |                 places=6,
138 |                 msg=f"Threshold for metric '{metric}' at FAR={far} does not match.",
139 |             )
140 | 
141 |     def test_eer(self):
142 |         """Test that EER values and thresholds match the expected results."""
143 |         expected_eer = OrderedDict(
144 |             {
145 |                 "cosine_similarity": {
146 |                     "EER": 0.004724863226023654,
147 |                     "threshold": 0.4244731664657593,
148 |                 },
149 |                 "euclidean_distance_l2": {
150 |                     "EER": 0.004724863226023654,
151 |                     "threshold": 1.0728718042373657,
152 |                 },
153 |                 "pearson_similarity": {
154 |                     "EER": 0.004914464785693375,
155 |                     "threshold": 0.4228288531303406,
156 |                 },
157 |             }
158 |         )
159 | 
160 |         actual_eer = self.experiment.eer()
161 | 
162 |         self.assertEqual(
163 |             len(actual_eer),
164 |             len(self.metrics),
165 |             f"Expected EER for {len(self.metrics)} metrics, got {len(actual_eer)}.",
166 |         )
167 | 
168 |         for metric in self.metrics:
169 |             self.assertIn(metric, actual_eer, f"EER for metric '{metric}' not found.")
170 |             expected = expected_eer[metric]
171 |             actual = actual_eer[metric]
172 | 
173 |             self.assertAlmostEqual(
174 |                 actual["EER"],
175 |                 expected["EER"],
176 |                 places=6,
177 |                 msg=f"EER for metric '{metric}' does not match.",
178 |             )
179 |             self.assertAlmostEqual(
180 |                 actual["threshold"],
181 |                 expected["threshold"],
182 |                 places=6,
183 |                 msg=f"Threshold for EER of metric '{metric}' does not match.",
184 |             )
185 | 
186 |     def test_tar_at_far(self):
187 |         """Test the tar_at_far method with specific FAR values."""
188 |         # Define FAR values to test
189 |         far_values = [0.01, 0.001]
190 | 
191 |         # Define expected TAR values based on the recent experiment
192 |         expected_tar_at_far = OrderedDict(
193 |             {
194 |                 "cosine_similarity": {
195 |                     0.01: 0.9973539973539973,
196 |                     0.001: 0.9795879795879796,
197 |                 },
198 |                 "pearson_similarity": {
199 |                     0.01: 0.9973539973539973,
200 |                     0.001: 0.9793989793989794,
201 |                 },
202 |                 "euclidean_distance_l2": {
203 |                     0.01: 0.9973539973539973,
204 |                     0.001: 0.9795879795879796,
205 |                 },
206 |             }
207 |         )
208 | 
209 |         # Call tar_at_far with the FAR values
210 |         actual_tar_at_far = self.experiment.tar_at_far(far_values)
211 | 
212 |         # Assert the returned TAR@FAR matches expected values
213 |         self.assertEqual(
214 |             len(actual_tar_at_far),
215 |             len(self.metrics),
216 |             f"Expected TAR@FAR for {len(self.metrics)} metrics, got "
217 |             f"{len(actual_tar_at_far)}.",
218 |         )
219 | 
220 |         for metric in self.metrics:
221 |             self.assertIn(
222 |                 metric, actual_tar_at_far, f"TAR@FAR for metric '{metric}' not found."
223 |             )
224 | 
225 |             for far in far_values:
226 |                 self.assertIn(
227 |                     far,
228 |                     actual_tar_at_far[metric],
229 |                     f"TAR@FAR for metric '{metric}' at FAR={far} not found.",
230 |                 )
231 | 
232 |                 expected_tar = expected_tar_at_far[metric][far]
233 |                 actual_tar = actual_tar_at_far[metric][far]
234 | 
235 |                 self.assertAlmostEqual(
236 |                     actual_tar,
237 |                     expected_tar,
238 |                     places=6,
239 |                     msg=f"TAR@FAR for metric '{metric}' at FAR={far} does not match.",
240 |                 )
241 | 
242 | 
243 | # if __name__ == '__main__':
244 | #     unittest.main()
245 | 


--------------------------------------------------------------------------------
/tests/test_evalify.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | """Tests for `evalify` package."""
  4 | import unittest
  5 | 
  6 | import numpy as np
  7 | from scipy.special import comb
  8 | 
  9 | from evalify import Experiment
 10 | from evalify.metrics import metrics_caller
 11 | 
 12 | 
 13 | class TestEvalify(unittest.TestCase):
 14 |     """Tests for `evalify` package."""
 15 | 
 16 |     def setUp(self):
 17 |         """Set up test fixtures, if any."""
 18 |         rng = np.random.default_rng(555)
 19 |         self.nphotos = 500
 20 |         self.emb_size = 8
 21 |         self.nclasses = 10
 22 |         self.embs = rng.random((self.nphotos, self.emb_size), dtype=np.float32)
 23 |         self.targets = rng.integers(self.nclasses, size=self.nphotos)
 24 | 
 25 |     def test_run_euclidean_distance(self):
 26 |         """Test run with euclidean_distance"""
 27 |         experiment = Experiment(metrics="euclidean_distance")
 28 |         df = experiment.run(self.embs, self.targets)
 29 |         experiment = Experiment(metrics="euclidean_distance_l2")
 30 |         df_l2 = experiment.run(self.embs, self.targets)
 31 |         self.assertGreater(df.euclidean_distance.max(), 0)
 32 |         self.assertGreater(df_l2.euclidean_distance_l2.max(), 0)
 33 | 
 34 |     def test_run_cosine_similarity(self):
 35 |         """Test run with cosine_similarity"""
 36 |         experiment = Experiment(metrics="cosine_similarity")
 37 |         df = experiment.run(self.embs, self.targets)
 38 |         self.assertLessEqual(df.cosine_similarity.max(), 1)
 39 | 
 40 |     def test_run_all_metrics_separated(self):
 41 |         for metric in metrics_caller.keys():
 42 |             experiment = Experiment(metrics=metric)
 43 |             df = experiment.run(self.embs, self.targets)
 44 |             self.assertTrue(metric in df.columns)
 45 | 
 46 |     def test_run_all_metrics_combined(self):
 47 |         metrics = set(metrics_caller.keys())
 48 |         experiment = Experiment(metrics=metrics)
 49 |         df = experiment.run(self.embs, self.targets)
 50 |         self.assertTrue(metrics.issubset(df.columns))
 51 | 
 52 |     def test_run_full_class_samples(self):
 53 |         """Test run with return_embeddings"""
 54 |         experiment = Experiment(
 55 |             same_class_samples="full",
 56 |             different_class_samples=("full", "full"),
 57 |         )
 58 |         df = experiment.run(
 59 |             self.embs,
 60 |             self.targets,
 61 |         )
 62 |         self.assertEqual(len(df), comb(self.nphotos, 2))
 63 | 
 64 |     def test_run_custom_class_samples(self):
 65 |         """Test run with custom same_class_samples and different_class_samples"""
 66 |         N, M = (2, 5)
 67 |         experiment = Experiment(same_class_samples=2, different_class_samples=(N, M))
 68 |         same_class_samples = 3
 69 |         df = experiment.run(
 70 |             self.embs,
 71 |             self.targets,
 72 |         )
 73 | 
 74 |         self.assertLessEqual(
 75 |             len(df),
 76 |             (comb(same_class_samples, 2) * self.nclasses)
 77 |             + (self.nclasses * (self.nclasses - 1)) * M * N,
 78 |         )
 79 | 
 80 |     def test_run_shuffle(self):
 81 |         """Test run with shuffle"""
 82 |         experiment = Experiment(seed=555)
 83 |         df1 = experiment.run(self.embs, self.targets, shuffle=True)
 84 |         df2 = experiment.run(self.embs, self.targets, shuffle=True)
 85 |         self.assertEqual(len(df1), len(df2))
 86 |         self.assertEqual(sum(df1.index), sum(df2.index))
 87 |         self.assertTrue(all(ix in df2.index for ix in df1.index))
 88 | 
 89 |     def test_run_no_batch_size(self):
 90 |         """Test run with no batch_size"""
 91 |         experiment = Experiment(
 92 |             same_class_samples=2,
 93 |             different_class_samples=(1, 1),
 94 |             seed=555,
 95 |         )
 96 |         experiment.run(self.embs, self.targets, batch_size=None)
 97 |         self.assertTrue(experiment.check_experiment_run())
 98 | 
 99 |     def test_run_return_embeddings(self):
100 |         """Test run with return_embeddings"""
101 |         experiment = Experiment()
102 |         df = experiment.run(self.embs, self.targets, return_embeddings=True)
103 |         self.assertLessEqual(len(df.at[0, "emb_a"]), self.emb_size)
104 | 
105 |     def test_run_evaluate_at_threshold(self):
106 |         """Test run with evaluate_at_threshold"""
107 |         metrics = ["cosine_similarity", "euclidean_distance_l2"]
108 |         experiment = Experiment(metrics=metrics)
109 |         experiment.run(
110 |             self.embs,
111 |             self.targets,
112 |         )
113 |         evaluations = experiment.evaluate_at_threshold(0.5, "cosine_similarity")
114 |         # self.assertEqual(len(evaluations), len(metrics))
115 |         self.assertEqual(len(evaluations), 9)
116 | 
117 |     def test_run_find_optimal_cutoff(self):
118 |         """Test run with find_optimal_cutoff"""
119 |         metrics = ["cosine_similarity", "euclidean_distance_l2"]
120 |         experiment = Experiment(metrics=metrics)
121 |         experiment.run(
122 |             self.embs,
123 |             self.targets,
124 |         )
125 |         evaluations = experiment.find_optimal_cutoff()
126 |         self.assertEqual(len(evaluations), len(metrics))
127 |         self.assertTrue(all(evaluation in metrics for evaluation in evaluations))
128 | 
129 |     def test_run_get_roc_auc(self):
130 |         """Test run with get_roc_auc"""
131 |         metrics = ["cosine_similarity", "euclidean_distance_l2"]
132 |         experiment = Experiment(metrics=metrics)
133 |         experiment.run(
134 |             self.embs,
135 |             self.targets,
136 |         )
137 |         roc_auc = experiment.roc_auc()
138 |         # self.assertEqual(len(evaluations), len(metrics))
139 |         self.assertEqual(len(roc_auc), len(metrics))
140 |         self.assertTrue(all(auc in metrics for auc in roc_auc))
141 | 
142 |     def test_run_predicted_as_similarity(self):
143 |         """Test run with predicted_as_similarity"""
144 |         experiment = Experiment(metrics=["cosine_similarity", "cosine_distance"])
145 |         experiment.run(
146 |             self.embs,
147 |             self.targets,
148 |         )
149 |         result = experiment.predicted_as_similarity("cosine_similarity")
150 |         result_2 = experiment.predicted_as_similarity("cosine_distance")
151 |         self.assertTrue(np.allclose(result, result_2))
152 | 
153 |     def test_run_find_threshold_at_fpr(self):
154 |         """Test run with find_threshold_at_fpr"""
155 |         metric = "cosine_similarity"
156 |         experiment = Experiment(
157 |             metrics=metric,
158 |             different_class_samples=("full", "full"),
159 |         )
160 |         experiment.run(
161 |             self.embs,
162 |             self.targets,
163 |         )
164 |         fpr_d01 = experiment.threshold_at_fpr(0.1)
165 |         fpr_d1 = experiment.threshold_at_fpr(1)
166 |         fpr_d0 = experiment.threshold_at_fpr(0)
167 |         self.assertEqual(len(fpr_d01[metric]), 3)
168 |         self.assertAlmostEqual(fpr_d01[metric]["threshold"], 0.8939142, 3)
169 |         self.assertAlmostEqual(fpr_d0[metric]["threshold"], 0.9953355, 3)
170 |         self.assertAlmostEqual(fpr_d1[metric]["threshold"], 0.2060538, 3)
171 | 
172 |     def test_run_calculate_eer(self):
173 |         """Test run with calculate_eer"""
174 |         metric = "cosine_similarity"
175 |         experiment = Experiment(
176 |             metrics=metric,
177 |             different_class_samples=("full", "full"),
178 |         )
179 |         experiment.run(
180 |             self.embs,
181 |             self.targets,
182 |         )
183 |         eer = experiment.eer()
184 |         self.assertTrue("EER" in eer[metric])
185 | 
186 |     def test__call__(self):
187 |         """Test run with __call__"""
188 |         experiment = Experiment(seed=555)
189 |         result = experiment.run(self.embs, self.targets)
190 |         result_2 = experiment(self.embs, self.targets)
191 |         self.assertTrue(np.array_equal(result.to_numpy(), result_2.to_numpy()))
192 | 
193 |     def test_run_errors(self):
194 |         """Test run errors"""
195 |         with self.assertRaisesRegex(
196 |             ValueError,
197 |             "`same_class_samples` argument must be one of 'full' or an integer ",
198 |         ):
199 |             experiment = Experiment(same_class_samples=54.4)
200 |             experiment.run(self.embs, self.targets)
201 | 
202 |         with self.assertRaisesRegex(
203 |             ValueError,
204 |             "`different_class_samples` argument must be one of 'full', 'minimal'",
205 |         ):
206 |             experiment = Experiment(different_class_samples="all")
207 |             experiment.run(self.embs, self.targets)
208 | 
209 |         with self.assertRaisesRegex(
210 |             ValueError,
211 |             "When passing `different_class_samples` as a tuple or list. ",
212 |         ):
213 |             experiment = Experiment(different_class_samples=(1, 2, 3))
214 |             experiment.run(
215 |                 self.embs,
216 |                 self.targets,
217 |             )
218 | 
219 |         with self.assertRaisesRegex(
220 |             ValueError,
221 |             '`batch_size` argument must be either "best" or of type integer',
222 |         ):
223 |             experiment = Experiment()
224 |             experiment.run(self.embs, self.targets, batch_size="all")
225 | 
226 |         with self.assertRaisesRegex(ValueError, "`metric` argument must be one of "):
227 |             experiment = Experiment(metrics="dot_prod")
228 |             experiment.run(self.embs, self.targets)
229 | 
230 |         with self.assertRaisesRegex(
231 |             ValueError,
232 |             "`p` must be an int and at least 1. Received: p=",
233 |         ):
234 |             experiment = Experiment()
235 |             experiment.run(self.embs, self.targets, p=0.1)
236 | 
237 |         with self.assertRaisesRegex(
238 |             NotImplementedError,
239 |             "`evaluate_at_threshold` function can only be run after running "
240 |             "`run_experiment`.",
241 |         ):
242 |             experiment = Experiment()
243 |             experiment.evaluate_at_threshold(0.5, "euclidean_distance")
244 | 
245 |         with self.assertRaisesRegex(
246 |             ValueError,
247 |             "`evaluate_at_threshold` function can only be called with `metric` from ",
248 |         ):
249 |             experiment = Experiment(metrics="euclidean_distance")
250 |             experiment.run(self.embs, self.targets)
251 |             experiment.evaluate_at_threshold(0.5, "cosine_similarity")
252 | 
253 |         with self.assertRaisesRegex(
254 |             ValueError,
255 |             "`fpr` must be between 0 and 1. Received wanted_fpr=",
256 |         ):
257 |             experiment = Experiment(metrics="euclidean_distance")
258 |             experiment.run(self.embs, self.targets)
259 |             experiment.threshold_at_fpr(-1.1)
260 | 


--------------------------------------------------------------------------------
/evalify/evalify.py:
--------------------------------------------------------------------------------
  1 | """Evalify main module used for creating the verification experiments.
  2 | 
  3 | Creates experiments with embedding pairs to compare for face verification tasks
  4 | including positive pairs, negative pairs and metrics calculations using a very
  5 | optimized einstein sum. Many operations are dispatched to canonical BLAS, cuBLAS,
  6 | or other specialized routines. Extremely large arrays are split into smaller batches,
  7 | every batch would consume the roughly the maximum available memory.
  8 | 
  9 |   Typical usage example:
 10 | 
 11 |   ```
 12 |   experiment = Experiment()
 13 |   experiment.run(X, y)
 14 |   ```
 15 | """
 16 | 
 17 | import itertools
 18 | import sys
 19 | from collections import OrderedDict
 20 | from typing import Any, List, Optional, Sequence, Tuple, Union
 21 | 
 22 | import numpy as np
 23 | import pandas as pd
 24 | from sklearn.metrics import auc, confusion_matrix, roc_curve
 25 | 
 26 | from evalify.metrics import (
 27 |     DISTANCE_TO_SIMILARITY,
 28 |     METRICS_NEED_NORM,
 29 |     METRICS_NEED_ORDER,
 30 |     REVERSE_DISTANCE_TO_SIMILARITY,
 31 |     metrics_caller,
 32 | )
 33 | from evalify.utils import _validate_vectors, calculate_best_batch_size
 34 | 
 35 | StrOrInt = Union[str, int]
 36 | StrIntSequence = Union[str, int, Sequence[Union[str, int]]]
 37 | 
 38 | 
 39 | class Experiment:
 40 |     """Defines an experiment for evalifying.
 41 | 
 42 |     Args:
 43 |         metrics: The list of metrics to use. Can be one or more of the following:
 44 |             `cosine_similarity`, `pearson_similarity`, `cosine_distance`,
 45 |             `euclidean_distance`, `euclidean_distance_l2`, `minkowski_distance`,
 46 |             `manhattan_distance` and `chebyshev_distance`
 47 |         same_class_samples:
 48 |             - 'full': Samples all possible images within each class to create all
 49 |                 all possible positive pairs.
 50 |             -  int: Samples specific number of images for every class to create
 51 |                 nC2 pairs where n is passed integer.
 52 |         different_class_samples:
 53 |             - 'full': Samples one image from every class with all possible pairs
 54 |                 of different classes. This can grow exponentially as the number
 55 |                 of images increase. (N, M) = (1, "full")
 56 |             - 'minimal': Samples one image from every class with one image of
 57 |                 all other classes. (N, M) = (1, 1). (Default)
 58 |             - int: Samples one image from every class with provided number of
 59 |                 images of every other class.
 60 |             - tuple or list: (N, M) Samples N images from every class with M images of
 61 |                 every other class.
 62 |         seed: Optional random seed for reproducibility.
 63 | 
 64 | 
 65 |     Notes:
 66 |         - `same_class_samples`:
 67 |             If the provided number is greater than the achievable for the class,
 68 |             the maximum possible combinations are used.
 69 |         - `different_class_samples`:
 70 |             If the provided number is greater than the achievable for the class,
 71 |             the maximum possible combinations are used. (N, M) can also be
 72 |             ('full', 'full') but this will calculate all possible combinations
 73 |             between all posibile negative samples. If the dataset is not small
 74 |             this will probably result in an extremely large array!.
 75 | 
 76 |     """
 77 | 
 78 |     def __init__(
 79 |         self,
 80 |         metrics: Union[str, Sequence[str]] = "cosine_similarity",
 81 |         same_class_samples: StrOrInt = "full",
 82 |         different_class_samples: StrIntSequence = "minimal",
 83 |         seed: Optional[int] = None,
 84 |     ) -> None:
 85 |         self.experiment_success = False
 86 |         self.cached_predicted_as_similarity = {}
 87 |         self.metrics = (metrics,) if isinstance(metrics, str) else metrics
 88 |         self.same_class_samples = same_class_samples
 89 |         self.different_class_samples = different_class_samples
 90 |         self.seed = seed
 91 | 
 92 |     def __call__(self, *args: Any, **kwds: Any) -> Any:
 93 |         return self.run(*args, **kwds)
 94 | 
 95 |     @staticmethod
 96 |     def _validate_args(
 97 |         metrics: Sequence[str],
 98 |         same_class_samples: StrOrInt,
 99 |         different_class_samples: StrIntSequence,
100 |         batch_size: Optional[StrOrInt],
101 |         p,
102 |     ) -> None:
103 |         """Validates passed arguments to Experiment.run() method."""
104 |         if same_class_samples != "full" and not isinstance(same_class_samples, int):
105 |             msg = (
106 |                 "`same_class_samples` argument must be one of 'full' or an integer "
107 |                 f"Received: same_class_samples={same_class_samples}"
108 |             )
109 |             raise ValueError(
110 |                 msg,
111 |             )
112 | 
113 |         if different_class_samples not in ("full", "minimal"):
114 |             if not isinstance(different_class_samples, (int, list, tuple)):
115 |                 msg = (
116 |                     "`different_class_samples` argument must be one of 'full', "
117 |                     "'minimal', an integer, a list or tuple of integers or keyword "
118 |                     "'full'."
119 |                     f"Received: different_class_samples={different_class_samples}."
120 |                 )
121 |                 raise ValueError(
122 |                     msg,
123 |                 )
124 |             if isinstance(different_class_samples, (list, tuple)) and (
125 |                 not (
126 |                     all(
127 |                         isinstance(i, int) or i == "full"
128 |                         for i in different_class_samples
129 |                     )
130 |                 )
131 |                 or (len(different_class_samples)) != 2
132 |             ):
133 |                 msg = (
134 |                     "When passing `different_class_samples` as a tuple or list, "
135 |                     "elements must be exactly two of integer type or keyword 'full' "
136 |                     "(N, M). "
137 |                     f"Received: different_class_samples={different_class_samples}."
138 |                 )
139 |                 raise ValueError(
140 |                     msg,
141 |                 )
142 | 
143 |         if (
144 |             batch_size != "best"
145 |             and not isinstance(batch_size, int)
146 |             and batch_size is not None
147 |         ):
148 |             msg = (
149 |                 '`batch_size` argument must be either "best" or of type integer '
150 |                 f"Received: batch_size={batch_size} with type {type(batch_size)}."
151 |             )
152 |             raise ValueError(
153 |                 msg,
154 |             )
155 | 
156 |         if any(metric not in metrics_caller for metric in metrics):
157 |             msg = (
158 |                 f"`metric` argument must be one of {tuple(metrics_caller.keys())} "
159 |                 f"Received: metric={metrics}"
160 |             )
161 |             raise ValueError(
162 |                 msg,
163 |             )
164 | 
165 |         if p < 1:
166 |             msg = f"`p` must be an int and at least 1. Received: p={p}"
167 |             raise ValueError(msg)
168 | 
169 |     def _get_pairs(
170 |         self,
171 |         y,
172 |         same_class_samples,
173 |         different_class_samples,
174 |         target,
175 |     ) -> List[Tuple]:
176 |         """Generates experiment pairs."""
177 |         same_ixs_full = np.argwhere(y == target).ravel()
178 |         if isinstance(same_class_samples, int):
179 |             same_class_samples = min(len(same_ixs_full), same_class_samples)
180 |             same_ixs = self.rng.choice(same_ixs_full, same_class_samples)
181 |         elif same_class_samples == "full":
182 |             same_ixs = same_ixs_full
183 |         same_pairs = itertools.combinations(same_ixs, 2)
184 |         same_pairs = [(a, b, target, target, 1) for a, b in same_pairs]
185 | 
186 |         different_ixs = np.argwhere(y != target).ravel()
187 |         diff_df = pd.DataFrame(
188 |             data={"sample_idx": different_ixs, "target": y[different_ixs]},
189 |         )
190 | 
191 |         diff_df = diff_df.sample(frac=1, random_state=self.seed)
192 |         if different_class_samples in ["full", "minimal"] or isinstance(
193 |             different_class_samples,
194 |             int,
195 |         ):
196 |             N = 1
197 |             if different_class_samples == "minimal":
198 |                 diff_df = diff_df.drop_duplicates(subset=["target"])
199 |         else:
200 |             N, M = different_class_samples
201 |             N = len(same_ixs_full) if N == "full" else min(N, len(same_ixs_full))
202 |             if M != "full":
203 |                 diff_df = (
204 |                     diff_df.groupby("target")
205 |                     .apply(lambda x: x[:M], include_groups=False)
206 |                     .droplevel(0)
207 |                 )
208 | 
209 |         different_ixs = diff_df.sample_idx.to_numpy()
210 | 
211 |         different_pairs = itertools.product(
212 |             self.rng.choice(same_ixs_full, N, replace=False),
213 |             different_ixs,
214 |         )
215 |         different_pairs = [(a, b, target, y[b], 0) for a, b in different_pairs if a < b]
216 | 
217 |         return same_pairs + different_pairs
218 | 
219 |     def run(
220 |         self,
221 |         X: np.ndarray,
222 |         y: np.ndarray,
223 |         batch_size: Optional[StrOrInt] = "best",
224 |         shuffle: bool = False,
225 |         return_embeddings: bool = False,
226 |         p: int = 3,
227 |     ) -> pd.DataFrame:
228 |         """Runs an experiment for face verification
229 |         Args:
230 |             X: Embeddings array
231 |             y: Targets for X as integers
232 |             batch_size:
233 |                 - 'best': Let the program decide based on available memory such that
234 |                     every batch will fit into the available memory. (Default)
235 |                 - int: Manually decide the batch_size.
236 |                 - None: No batching. All experiment and intermediate results must fit
237 |                     entirely into memory or a MemoryError will be raised.
238 |             shuffle: Shuffle the returned experiment dataframe. Default: False.
239 |             return_embeddings: Whether to return the embeddings instead of indexes.
240 |                 Default: False
241 |             p:
242 |                 The order of the norm of the difference. Should be `p >= 1`, Only valid
243 |                 with minkowski_distance as a metric. Default = 3.
244 | 
245 |         Returns:
246 |             pandas.DataFrame: A DataFrame representing the experiment results.
247 | 
248 |         Raises:
249 |             ValueError: An error occurred with the provided arguments.
250 | 
251 |         """
252 |         self._validate_args(
253 |             self.metrics,
254 |             self.same_class_samples,
255 |             self.different_class_samples,
256 |             batch_size,
257 |             p,
258 |         )
259 |         X, y = _validate_vectors(X, y)
260 |         all_targets = np.unique(y)
261 |         all_pairs = []
262 |         metric_fns = list(map(metrics_caller.get, self.metrics))
263 |         self.rng = np.random.default_rng(self.seed)
264 |         for target in all_targets:
265 |             all_pairs += self._get_pairs(
266 |                 y,
267 |                 self.same_class_samples,
268 |                 self.different_class_samples,
269 |                 target,
270 |             )
271 | 
272 |         self.df = pd.DataFrame(
273 |             data=all_pairs,
274 |             columns=["emb_a", "emb_b", "target_a", "target_b", "target"],
275 |         )
276 |         experiment_size = len(self.df)
277 |         if shuffle:
278 |             self.df = self.df.sample(frac=1, random_state=self.seed)
279 |         if batch_size == "best":
280 |             batch_size = calculate_best_batch_size(X)
281 |         elif batch_size is None:
282 |             batch_size = experiment_size
283 |         kwargs = {}
284 |         if any(metric in METRICS_NEED_NORM for metric in self.metrics):
285 |             kwargs["norms"] = np.linalg.norm(X, axis=1)
286 |         if any(metric in METRICS_NEED_ORDER for metric in self.metrics):
287 |             kwargs["p"] = p
288 | 
289 |         emb_a = self.df.emb_a.to_numpy()
290 |         emb_b = self.df.emb_b.to_numpy()
291 | 
292 |         emb_a_s = np.array_split(emb_a, np.ceil(experiment_size / batch_size))
293 |         emb_b_s = np.array_split(emb_b, np.ceil(experiment_size / batch_size))
294 | 
295 |         for metric, metric_fn in zip(self.metrics, metric_fns):
296 |             self.df[metric] = np.hstack(
297 |                 [metric_fn(X, i, j, **kwargs) for i, j in zip(emb_a_s, emb_b_s)],
298 |             )
299 |         if return_embeddings:
300 |             self.df["emb_a"] = X[emb_a].tolist()
301 |             self.df["emb_b"] = X[emb_b].tolist()
302 | 
303 |         self.experiment_success = True
304 |         return self.df
305 | 
306 |     def find_optimal_cutoff(self) -> dict:
307 |         """Finds the optimal cutoff threshold for each metric based on the ROC curve.
308 | 
309 |         This function calculates the optimal threshold for each metric by finding the
310 |         point on the Receiver Operating Characteristic (ROC) curve where the difference
311 |         between the True Positive Rate (TPR) and the False Positive Rate (FPR) is
312 |         minimized.
313 | 
314 |         Returns:
315 |             dict: A dictionary with metrics as keys and their corresponding optimal
316 |             threshold as values.
317 |         """
318 | 
319 |         self.check_experiment_run()
320 |         self.optimal_cutoff = {}
321 |         for metric in self.metrics:
322 |             fpr, tpr, threshold = roc_curve(self.df["target"], self.df[metric])
323 |             i = np.arange(len(tpr))
324 |             roc = pd.DataFrame(
325 |                 {
326 |                     "tf": pd.Series(tpr - (1 - fpr), index=i),
327 |                     "threshold": pd.Series(threshold, index=i),
328 |                 },
329 |             )
330 |             roc_t = roc.iloc[(roc.tf - 0).abs().argsort()[:1]]
331 |             self.optimal_cutoff[metric] = roc_t["threshold"].item()
332 |         return self.optimal_cutoff
333 | 
334 |     def threshold_at_fpr(self, fpr: float) -> dict:
335 |         """Find the threshold at a specified False Positive Rate (FPR) for each metric.
336 | 
337 |         The function calculates the threshold at the specified FPR for each metric
338 |         by using the Receiver Operating Characteristic (ROC) curve. If the desired
339 |         FPR is 0 or 1, or no exact match is found, the closest thresholds are used.
340 | 
341 |         Args:
342 |             fpr (float): Desired False Positive Rate. Must be between 0 and 1.
343 | 
344 |         Returns:
345 |             dict: A dictionary where keys are the metrics and values are dictionaries
346 |             containing FPR, TPR, and threshold at the specified FPR.
347 | 
348 |         Raises:
349 |             ValueError: If the provided `fpr` is not between 0 and 1.
350 |         """
351 | 
352 |         self.check_experiment_run()
353 |         if not 0 <= fpr <= 1:
354 |             msg = "`fpr` must be between 0 and 1. " f"Received wanted_fpr={fpr}"
355 |             raise ValueError(
356 |                 msg,
357 |             )
358 |         threshold_at_fpr = {}
359 |         for metric in self.metrics:
360 |             predicted = self.predicted_as_similarity(metric)
361 |             FPR, TPR, thresholds = roc_curve(
362 |                 self.df["target"],
363 |                 predicted,
364 |                 drop_intermediate=False,
365 |             )
366 |             df_fpr_tpr = pd.DataFrame({"FPR": FPR, "TPR": TPR, "threshold": thresholds})
367 |             ix_left = np.searchsorted(df_fpr_tpr["FPR"], fpr, side="left")
368 |             ix_right = np.searchsorted(df_fpr_tpr["FPR"], fpr, side="right")
369 | 
370 |             if fpr == 0:
371 |                 best = df_fpr_tpr.iloc[ix_right]
372 |             elif fpr == 1 or ix_left == ix_right:
373 |                 best = df_fpr_tpr.iloc[ix_left]
374 |             else:
375 |                 best = (
376 |                     df_fpr_tpr.iloc[ix_left]
377 |                     if abs(df_fpr_tpr.iloc[ix_left].FPR - fpr)
378 |                     < abs(df_fpr_tpr.iloc[ix_right].FPR - fpr)
379 |                     else df_fpr_tpr.iloc[ix_right]
380 |                 )
381 |             best = best.to_dict()
382 |             if metric in REVERSE_DISTANCE_TO_SIMILARITY:
383 |                 best["threshold"] = REVERSE_DISTANCE_TO_SIMILARITY.get(metric)(
384 |                     best["threshold"],
385 |                 )
386 |             threshold_at_fpr[metric] = best
387 |         return threshold_at_fpr
388 | 
389 |     def get_binary_prediction(self, metric: str, threshold: float) -> pd.Series:
390 |         """Binary classification prediction based on the given metric and threshold.
391 | 
392 |         Args:
393 |             metric: Metric name for the desired prediction.
394 |             threshold: Cut off threshold.
395 | 
396 |         Returns:
397 |             pd.Series: Binary predictions.
398 | 
399 |         """
400 |         return (
401 |             self.df[metric].apply(lambda x: 1 if x < threshold else 0)
402 |             if metric in DISTANCE_TO_SIMILARITY
403 |             else self.df[metric].apply(lambda x: 1 if x > threshold else 0)
404 |         )
405 | 
406 |     def evaluate_at_threshold(self, threshold: float, metric: str) -> dict:
407 |         """Evaluate performance at specific threshold
408 |         Args:
409 |             threshold: Cut-off threshold.
410 |             metric: Metric to use.
411 | 
412 |         Returns:
413 |             dict: A dict ontaining all evaluation metrics.
414 | 
415 |         """
416 |         self.metrics_evaluation = {}
417 |         self.check_experiment_run(metric)
418 |         for metric in self.metrics:
419 |             predicted = self.get_binary_prediction(metric, threshold)
420 |             cm = confusion_matrix(self.df["target"], predicted)
421 |             tn, fp, fn, tp = cm.ravel()
422 |             TPR = tp / (tp + fn)  # recall / true positive rate
423 |             TNR = tn / (tn + fp)  # true negative rate
424 |             PPV = tp / (tp + fp)  # precision / positive predicted value
425 |             NPV = tn / (tn + fn)  # negative predictive value
426 |             FPR = fp / (fp + tn)  # false positive rate
427 |             FNR = 1 - TPR  # false negative rate
428 |             FDR = 1 - PPV  # false discovery rate
429 |             FOR = 1 - NPV  # false omission rate
430 |             F1 = 2 * (PPV * TPR) / (PPV + TPR)
431 | 
432 |             evaluation = {
433 |                 "TPR": TPR,
434 |                 "TNR": TNR,
435 |                 "PPV": PPV,
436 |                 "NPV": NPV,
437 |                 "FPR": FPR,
438 |                 "FNR": FNR,
439 |                 "FDR": FDR,
440 |                 "FOR": FOR,
441 |                 "F1": F1,
442 |             }
443 | 
444 |         return evaluation
445 | 
446 |     def check_experiment_run(self, metric: Optional[str] = None) -> bool:
447 |         caller = sys._getframe().f_back.f_code.co_name
448 |         if not self.experiment_success:
449 |             msg = (
450 |                 f"`{caller}` function can only be run after running "
451 |                 "`run_experiment`."
452 |             )
453 |             raise NotImplementedError(
454 |                 msg,
455 |             )
456 |         if metric is not None and metric not in self.metrics:
457 |             msg = (
458 |                 f"`{caller}` function can only be called with `metric` from "
459 |                 f"{self.metrics} which were used while running the experiment"
460 |             )
461 |             raise ValueError(
462 |                 msg,
463 |             )
464 |         return True
465 | 
466 |     def roc_auc(self) -> OrderedDict:
467 |         """Find ROC AUC for all the metrics used.
468 | 
469 |         Returns:
470 |             OrderedDict: An OrderedDict with AUC for all metrics.
471 | 
472 |         """
473 |         self.check_experiment_run()
474 |         self.roc_auc = {}
475 |         for metric in self.metrics:
476 |             predicted = self.predicted_as_similarity(metric)
477 |             fpr, tpr, thresholds = roc_curve(
478 |                 self.df["target"],
479 |                 predicted,
480 |                 drop_intermediate=False,
481 |             )
482 |             self.roc_auc[metric] = auc(fpr, tpr).item()
483 |         self.roc_auc = OrderedDict(
484 |             sorted(self.roc_auc.items(), key=lambda x: x[1], reverse=True),
485 |         )
486 |         return self.roc_auc
487 | 
488 |     def predicted_as_similarity(self, metric: str) -> pd.Series:
489 |         """Convert distance metrics to a similarity measure.
490 | 
491 |         Args:
492 |             metric: distance metric to convert to similarity. If a similarity metric is
493 |                 passed, It gets returned unchanged.
494 | 
495 |         Returns:
496 |             pd.Series: Converted distance to similarity.
497 | 
498 |         """
499 |         predicted = self.df[metric]
500 |         if metric in DISTANCE_TO_SIMILARITY:
501 |             predicted = (
502 |                 self.cached_predicted_as_similarity[metric]
503 |                 if metric in self.cached_predicted_as_similarity
504 |                 else DISTANCE_TO_SIMILARITY.get(metric)(predicted)
505 |             )
506 |             self.cached_predicted_as_similarity[metric] = predicted
507 |         return predicted
508 | 
509 |     def eer(self) -> OrderedDict:
510 |         """Calculates the Equal Error Rate (EER) for each metric.
511 | 
512 |         Returns:
513 |             OrderedDict: A dictionary containing the EER value and threshold for each
514 |             metric.
515 |                 The metrics are sorted in ascending order based on the EER values.
516 |                 Example: {'metric1': {'EER': 0.123, 'threshold': 0.456},
517 |                         ...}
518 | 
519 |         """
520 |         self.check_experiment_run()
521 |         self.eer = {}
522 |         for metric in self.metrics:
523 |             predicted = self.predicted_as_similarity(metric)
524 |             actual = self.df["target"]
525 | 
526 |             fpr, tpr, thresholds = roc_curve(
527 |                 actual,
528 |                 predicted,
529 |                 pos_label=1,
530 |                 drop_intermediate=False,
531 |             )
532 |             fnr = 1 - tpr
533 |             eer_threshold = thresholds[np.nanargmin(np.absolute(fnr - fpr))].item()
534 |             eer_1 = fpr[np.nanargmin(np.absolute(fnr - fpr))].item()
535 |             eer_2 = fnr[np.nanargmin(np.absolute(fnr - fpr))].item()
536 |             if metric in REVERSE_DISTANCE_TO_SIMILARITY:
537 |                 eer_threshold = REVERSE_DISTANCE_TO_SIMILARITY.get(metric)(
538 |                     eer_threshold,
539 |                 )
540 | 
541 |             self.eer[metric] = {"EER": (eer_1 + eer_2) / 2, "threshold": eer_threshold}
542 |         self.eer = OrderedDict(
543 |             sorted(self.eer.items(), key=lambda x: x[1]["EER"], reverse=False),
544 |         )
545 | 
546 |         return self.eer
547 | 
548 |     def tar_at_far(self, far_values: List[float]) -> OrderedDict:
549 |         """Calculates TAR at specified FAR values for each metric.
550 | 
551 |         Args:
552 |             far_values (List[float]): A list of False Accept Rates (FAR) to get TAR
553 |                 values for.
554 | 
555 |         Returns:
556 |             OrderedDict: A dictionary with keys as metrics and values as dictionaries
557 |             of FAR:TAR pairs.
558 | 
559 |         Raises:
560 |             ValueError: If any FAR in far_values is not between 0 and 1.
561 |         """
562 |         if isinstance(far_values, (float, int)):
563 |             far_values = [float(far_values)]
564 | 
565 |         if not all(0 <= far <= 1 for far in far_values):
566 |             raise ValueError("All FAR values must be between 0 and 1.")
567 | 
568 |         self.check_experiment_run()
569 |         tar_at_far_results = {}
570 | 
571 |         for metric in self.metrics:
572 |             predicted = self.predicted_as_similarity(metric)
573 |             fpr, tpr, _ = roc_curve(self.df["target"], predicted, pos_label=1)
574 | 
575 |             tar_values = {}
576 |             for far in far_values:
577 |                 idx = np.searchsorted(fpr, far, side="right") - 1
578 |                 idx = max(0, min(idx, len(fpr) - 1))  # Ensure idx is within bounds
579 |                 tar_values[far] = tpr[idx].item()
580 | 
581 |             tar_at_far_results[metric] = tar_values
582 | 
583 |         self.tar_at_far_results = OrderedDict(
584 |             sorted(tar_at_far_results.items(), key=lambda x: list(x[1].keys())[0])
585 |         )
586 | 
587 |         return self.tar_at_far_results
588 | 


--------------------------------------------------------------------------------