├── images
    └── logo.png
├── examples
    ├── test_data
    │   ├── masks
    │   │   ├── 0001.png
    │   │   ├── 19.png
    │   │   └── aerial-1867541__340.png
    │   ├── preds
    │   │   ├── 0001.png
    │   │   ├── 19.png
    │   │   └── aerial-1867541__340.png
    │   └── readme.md
    ├── test_bimetrics.py
    ├── version_performance.json
    ├── metric_recorder.py
    └── test_metrics.py
├── requirements.txt
├── deploy
    ├── .gitignore
    ├── README.md
    ├── api.rst
    ├── installation.rst
    ├── conf.py
    ├── index.rst
    ├── metrics.rst
    └── usage.rst
├── CITATION.cff
├── py_sod_metrics
    ├── __init__.py
    ├── utils.py
    ├── multiscale_iou.py
    ├── size_invariance.py
    ├── context_measure.py
    └── fmeasurev2.py
├── .pre-commit-config.yaml
├── CHANGELOG.md
├── LICENSE
├── .github
    └── workflows
    │   ├── docs.yml
    │   ├── python-publish.yml
    │   └── README.md
├── .gitignore
├── pyproject.toml
├── readme_zh.md
└── readme.md


/images/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lartpang/PySODMetrics/HEAD/images/logo.png


--------------------------------------------------------------------------------
/examples/test_data/masks/0001.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lartpang/PySODMetrics/HEAD/examples/test_data/masks/0001.png


--------------------------------------------------------------------------------
/examples/test_data/masks/19.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lartpang/PySODMetrics/HEAD/examples/test_data/masks/19.png


--------------------------------------------------------------------------------
/examples/test_data/preds/0001.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lartpang/PySODMetrics/HEAD/examples/test_data/preds/0001.png


--------------------------------------------------------------------------------
/examples/test_data/preds/19.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lartpang/PySODMetrics/HEAD/examples/test_data/preds/19.png


--------------------------------------------------------------------------------
/examples/test_data/readme.md:
--------------------------------------------------------------------------------
1 | # 数据来源
2 | 
3 | * `aerial-1867541__340` SOC 纯背景mask
4 | * `0001.png` ECSSD 包含目标的mask
5 | * `19.png` Pascal-S
6 | 


--------------------------------------------------------------------------------
/examples/test_data/masks/aerial-1867541__340.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lartpang/PySODMetrics/HEAD/examples/test_data/masks/aerial-1867541__340.png


--------------------------------------------------------------------------------
/examples/test_data/preds/aerial-1867541__340.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lartpang/PySODMetrics/HEAD/examples/test_data/preds/aerial-1867541__340.png


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | numpy>=1.18,<2.3.5
2 | scipy>=1.5,<2.0
3 | scikit-image>=0.19,<0.26
4 | scikit-learn>=1.0,<2.0
5 | opencv-python-headless>=4.7.0,<5.0.0
6 | 


--------------------------------------------------------------------------------
/deploy/.gitignore:
--------------------------------------------------------------------------------
 1 | # Ignore Sphinx build artifacts
 2 | _build/
 3 | _static/
 4 | _templates/
 5 | 
 6 | # Python cache
 7 | __pycache__/
 8 | *.py[cod]
 9 | *$py.class
10 | 
11 | # Environment
12 | .doctrees/
13 | 


--------------------------------------------------------------------------------
/CITATION.cff:
--------------------------------------------------------------------------------
 1 | cff-version: 1.2.0
 2 | message: "If you use this software, please cite it using these metadata."
 3 | authors:
 4 | - family-names: "Pang"
 5 |   given-names: "Youwei"
 6 | date-released: 2020-11-21
 7 | keywords:
 8 |   - metrics
 9 |   - metrics-reported
10 |   - metrics-evaluation
11 |   - metrics-library
12 |   - salient-object-detection
13 |   - camouflaged-object-detection
14 |   - saliency-detection
15 |   - saliency-methods
16 | license: MIT License
17 | title: "PySODMetrics"
18 | abstract: "A Simple and Efficient Implementation of Grayscale/Binary Segmentation Metrics"
19 | url: "https://github.com/lartpang/PySODMetrics"
20 | repository-code: "https://github.com/lartpang/PySODMetrics"
21 | version: v1.4.3
22 | 


--------------------------------------------------------------------------------
/py_sod_metrics/__init__.py:
--------------------------------------------------------------------------------
 1 | from py_sod_metrics.context_measure import CamouflageContextMeasure, ContextMeasure
 2 | from py_sod_metrics.fmeasurev2 import (
 3 |     BERHandler,
 4 |     DICEHandler,
 5 |     FmeasureHandler,
 6 |     FmeasureV2,
 7 |     FPRHandler,
 8 |     IOUHandler,
 9 |     KappaHandler,
10 |     OverallAccuracyHandler,
11 |     PrecisionHandler,
12 |     RecallHandler,
13 |     SensitivityHandler,
14 |     SpecificityHandler,
15 |     TNRHandler,
16 |     TPRHandler,
17 | )
18 | from py_sod_metrics.multiscale_iou import MSIoU
19 | from py_sod_metrics.size_invariance import SizeInvarianceFmeasureV2, SizeInvarianceMAE
20 | from py_sod_metrics.sod_metrics import (
21 |     MAE,
22 |     Emeasure,
23 |     Fmeasure,
24 |     HumanCorrectionEffortMeasure,
25 |     Smeasure,
26 |     WeightedFmeasure,
27 | )
28 | 


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | # See https://pre-commit.com for more information
 2 | # See https://pre-commit.com/hooks.html for more hooks
 3 | repos:
 4 |   - repo: https://github.com/pre-commit/pre-commit-hooks
 5 |     rev: v3.2.0
 6 |     hooks:
 7 |       - id: trailing-whitespace
 8 |       - id: end-of-file-fixer
 9 |       - id: check-yaml
10 |       - id: check-toml
11 |       - id: check-added-large-files
12 |       - id: fix-encoding-pragma
13 |       - id: mixed-line-ending
14 | 
15 |   - repo: https://github.com/astral-sh/ruff-pre-commit
16 |     # Ruff version.
17 |     rev: v0.11.8
18 |     hooks:
19 |       # Run the linter.
20 |       - id: ruff
21 |         types_or: [ python, pyi ]
22 |         args: [ --fix ]
23 |       # Run the formatter.
24 |       - id: ruff-format
25 |         types_or: [ python, pyi ]
26 | 
27 |   - repo: https://github.com/pycqa/isort
28 |     rev: 5.6.4
29 |     hooks:
30 |       - id: isort
31 | 


--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
 1 | # CHANGELOG
 2 | 
 3 | ## [1.4.3] - 2025-5-8
 4 | 
 5 | - Migrate to modern PyPI publishing by configuring `pyproject.toml` and `python-publish.yml`.
 6 | - Update the formatter and linter tools to `ruff`.
 7 | - Update the documentation information for the functions in `py_sod_metrics/fmeasurev2.py` and `py_sod_metrics/multiscale_iou.py`.
 8 | - Optimize the code in `py_sod_metrics/multiscale_iou.py`.
 9 | 
10 | ## [1.4.3.1] - 2025-5-8
11 | 
12 | - [FEATURE] Add `binary`, `dinamic`, and `adaptive` modes for `py_sod_metrics/multiscale_iou.py`.
13 | - [UPDATE] Update `examples/test_metrics.py` to support `binary`, `dinamic`, and `adaptive` modes of `MSIoU`.
14 | - [NOTE] The current implementation of the dynamic mode for `MSIoU` relies on the for loop, so it currently runs less efficiently.
15 | 
16 | ## [1.4.4] - 2025-5-9
17 | 
18 | - [FEATURE] Add `normalize` parameter to `py_sod_metrics/sod_metrics.py`.
19 | - [UPDATE] Update a unified function `validate_and_normalize_input` to validate and normalize the input data.
20 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2020 lartpang
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/deploy/README.md:
--------------------------------------------------------------------------------
 1 | # PySODMetrics Documentation Deployment
 2 | 
 3 | This directory contains the source files for building the PySODMetrics documentation.
 4 | 
 5 | ## Quick Start
 6 | 
 7 | Build the documentation:
 8 | 
 9 | ```bash
10 | cd deploy
11 | sphinx-build -b html . ./_build
12 | ```
13 | 
14 | The built documentation will be placed in the `_build/` directory in the project root.
15 | 
16 | ## Clean Build
17 | 
18 | To clean the build directory and rebuild from scratch:
19 | 
20 | ```bash
21 | cd deploy
22 | rm -rf ./_build
23 | sphinx-build -b html . ./_build
24 | ```
25 | 
26 | ## Directory Structure
27 | 
28 | - `*.rst` - Documentation source files (reStructuredText)
29 | - `conf.py` - Sphinx configuration
30 | - `_static/` - Static files (CSS, images, etc.)
31 | 
32 | ## Output
33 | 
34 | - Built HTML documentation is output to: `./deploy/_build/`
35 | - This allows the `deploy/_build/` folder to be used directly for GitHub Pages
36 | 
37 | ## Requirements
38 | 
39 | Install documentation dependencies:
40 | 
41 | ```bash
42 | pip install sphinx sphinx-rtd-theme
43 | ```
44 | 
45 | Or use the project's optional dependencies:
46 | 
47 | ```bash
48 | pip install -e ".[docs]"
49 | ```
50 | 


--------------------------------------------------------------------------------
/deploy/api.rst:
--------------------------------------------------------------------------------
 1 | API Reference
 2 | =============
 3 | 
 4 | This page provides the complete API documentation for PySODMetrics.
 5 | 
 6 | .. contents:: Contents
 7 |    :local:
 8 |    :depth: 2
 9 | 
10 | Core Metrics Module
11 | -------------------
12 | 
13 | .. automodule:: py_sod_metrics.sod_metrics
14 |    :members:
15 |    :undoc-members:
16 |    :show-inheritance:
17 | 
18 | FmeasureV2 Module
19 | -----------------
20 | 
21 | .. automodule:: py_sod_metrics.fmeasurev2
22 |    :members:
23 |    :undoc-members:
24 |    :show-inheritance:
25 | 
26 | Context Measure Module
27 | ----------------------
28 | 
29 | .. automodule:: py_sod_metrics.context_measure
30 |    :members:
31 |    :undoc-members:
32 |    :show-inheritance:
33 | 
34 | Multi-Scale IoU Module
35 | ----------------------
36 | 
37 | .. automodule:: py_sod_metrics.multiscale_iou
38 |    :members:
39 |    :undoc-members:
40 |    :show-inheritance:
41 | 
42 | Size Invariance Module
43 | -----------------------
44 | 
45 | .. automodule:: py_sod_metrics.size_invariance
46 |    :members:
47 |    :undoc-members:
48 |    :show-inheritance:
49 | 
50 | Utility Functions
51 | -----------------
52 | 
53 | .. automodule:: py_sod_metrics.utils
54 |    :members:
55 |    :undoc-members:
56 |    :show-inheritance:
57 | 


--------------------------------------------------------------------------------
/.github/workflows/docs.yml:
--------------------------------------------------------------------------------
 1 | name: Build and Deploy Documentation
 2 | 
 3 | on:
 4 |   push:
 5 |     branches:
 6 |       - main
 7 |       - master
 8 |     paths:
 9 |       - 'deploy/**'
10 |       - 'py_sod_metrics/**'
11 |       - '.github/workflows/docs.yml'
12 |   workflow_dispatch:  # Allow manual trigger
13 | 
14 | permissions:
15 |   contents: write
16 |   pages: write
17 |   id-token: write
18 | 
19 | jobs:
20 |   build-and-deploy:
21 |     runs-on: ubuntu-latest
22 | 
23 |     steps:
24 |       - name: Checkout repository
25 |         uses: actions/checkout@v4
26 |         with:
27 |           fetch-depth: 0  # Fetch all history for proper git operations
28 | 
29 |       - name: Set up Python
30 |         uses: actions/setup-python@v5
31 |         with:
32 |           python-version: '3.10'
33 |           cache: 'pip'
34 | 
35 |       - name: Install dependencies
36 |         run: |
37 |           python -m pip install --upgrade pip
38 |           pip install sphinx sphinx-rtd-theme
39 |           pip install -e .
40 | 
41 |       - name: Build documentation
42 |         run: |
43 |           cd deploy
44 |           sphinx-build -b html . ./_build
45 | 
46 |       - name: Add .nojekyll file
47 |         run: |
48 |           touch deploy/_build/.nojekyll
49 | 
50 |       - name: Deploy to GitHub Pages
51 |         uses: peaceiris/actions-gh-pages@v3
52 |         with:
53 |           github_token: ${{ secrets.GITHUB_TOKEN }}
54 |           publish_dir: ./deploy/_build
55 |           publish_branch: gh-pages
56 |           force_orphan: true
57 |           user_name: 'github-actions[bot]'
58 |           user_email: 'github-actions[bot]@users.noreply.github.com'
59 |           commit_message: 'Deploy documentation from ${{ github.sha }}'
60 | 


--------------------------------------------------------------------------------
/.github/workflows/python-publish.yml:
--------------------------------------------------------------------------------
 1 | name: Publish Python 🐍 distribution 📦 to PyPI
 2 | 
 3 | # 触发条件：当手动创建Release时触发（包括草稿发布转正式发布）
 4 | on:
 5 |   release:
 6 |     types: [created]
 7 | 
 8 | jobs:
 9 |   build:
10 |     name: Build distribution 📦
11 |     runs-on: ubuntu-latest
12 | 
13 |     steps:
14 |     # 1. 检出仓库代码
15 |     - uses: actions/checkout@v4
16 |       with:
17 |         # 禁用凭据持久化（避免权限残留）
18 |         persist-credentials: false
19 | 
20 |     # 2. 设置Python环境（使用最新的3.x版本）
21 |     - name: Set up Python
22 |       uses: actions/setup-python@v5
23 |       with:
24 |         python-version: "3.x"
25 | 
26 |     # 3. 安装Python构建工具
27 |     - name: Install pypa/build
28 |       run: python3 -m pip install build --user
29 | 
30 |     # 4. 构建二进制wheel和源代码tarball
31 |     - name: Build a binary wheel and a source tarball
32 |       run: python3 -m build
33 | 
34 |     # 5. 保存构建产物（dist目录下的文件）
35 |     - name: Store the distribution packages
36 |       uses: actions/upload-artifact@v4
37 |       with:
38 |         name: python-package-distributions
39 |         path: dist/
40 |         retention-days: 1 # 1 天后删除 artifact
41 | 
42 |   publish-to-pypi:
43 |     name: Publish Python 🐍 distribution 📦 to PyPI
44 |     # 条件判断：仅当满足以下所有条件时执行
45 |     # 1. 事件类型为Release创建
46 |     # 2. 标签以refs/tags/v开头（即vX.Y.Z格式）
47 |     # 3. 标签包含点号（确保版本分隔符存在）
48 |     # 4. 排除包含连续点号的异常标签（如v1..2）
49 |     if: startsWith(github.ref, 'refs/tags/v') && contains(github.ref, '.')
50 |     needs: [build]  # 依赖build作业的完成
51 |     runs-on: ubuntu-latest
52 |     # 环境配置
53 |     environment:
54 |       name: pypi
55 |       url: https://pypi.org/p/pysodmetrics
56 |     permissions:
57 |       # 必须配置OIDC权限用于可信发布
58 |       id-token: write
59 | 
60 |     steps:
61 |     # 6. 下载之前构建阶段保存的产物
62 |     - name: Download all the dists
63 |       uses: actions/download-artifact@v4
64 |       with:
65 |         name: python-package-distributions
66 |         path: dist/
67 |     # 7. 发布到PyPI
68 |     - name: Publish distribution 📦 to PyPI
69 |       uses: pypa/gh-action-pypi-publish@release/v1


--------------------------------------------------------------------------------
/deploy/installation.rst:
--------------------------------------------------------------------------------
 1 | Installation
 2 | ============
 3 | 
 4 | Requirements
 5 | ------------
 6 | 
 7 | PySODMetrics requires Python 3.6 or higher and the following dependencies:
 8 | 
 9 | * numpy >= 1.18, < 2.3.5
10 | * scipy >= 1.5, < 2.0
11 | * scikit-image >= 0.19, < 0.26
12 | * scikit-learn >= 1.0, < 2.0
13 | * opencv-python-headless >= 4.7.0, < 5.0.0
14 | 
15 | Install from PyPI
16 | -----------------
17 | 
18 | The easiest way to install PySODMetrics is from PyPI using pip:
19 | 
20 | .. code-block:: bash
21 | 
22 |    pip install pysodmetrics
23 | 
24 | This is the **recommended and most stable** installation method.
25 | 
26 | Install from Source
27 | -------------------
28 | 
29 | Installing from GitHub (Latest Version)
30 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
31 | 
32 | To get the latest development version (which may include new features but could be less stable):
33 | 
34 | .. code-block:: bash
35 | 
36 |    pip install git+https://github.com/lartpang/PySODMetrics.git
37 | 
38 | Installing from Cloned Repository
39 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
40 | 
41 | If you want to modify the code or contribute to the project:
42 | 
43 | 1. Clone the repository:
44 | 
45 |    .. code-block:: bash
46 | 
47 |       git clone https://github.com/lartpang/PySODMetrics.git
48 |       cd PySODMetrics
49 | 
50 | 2. Install in development mode:
51 | 
52 |    .. code-block:: bash
53 | 
54 |       pip install -e .
55 | 
56 | Building Documentation
57 | ----------------------
58 | 
59 | To build the documentation locally, you need to install the documentation dependencies:
60 | 
61 | .. code-block:: bash
62 | 
63 |    pip install sphinx sphinx-rtd-theme
64 | 
65 | Then build the documentation:
66 | 
67 | .. code-block:: bash
68 | 
69 |    cd docs
70 |    make html  # On Linux/Mac
71 |    # or
72 |    make.bat html  # On Windows
73 | 
74 | The built documentation will be available in ``docs/_build/html/``.
75 | 
76 | Verifying Installation
77 | ----------------------
78 | 
79 | To verify that PySODMetrics is installed correctly, open a Python interpreter and try:
80 | 
81 | .. code-block:: python
82 | 
83 |    import py_sod_metrics
84 |    from py_sod_metrics import MAE, Smeasure
85 | 
86 |    # If no errors occur, the installation was successful!
87 |    print("PySODMetrics installed successfully!")
88 | 
89 | You can also check the available classes:
90 | 
91 | .. code-block:: python
92 | 
93 |    import py_sod_metrics
94 |    print(dir(py_sod_metrics))
95 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | .idea/
  7 | .vscode/
  8 | 
  9 | # C extensions
 10 | *.so
 11 | 
 12 | # Distribution / packaging
 13 | .Python
 14 | build/
 15 | develop-eggs/
 16 | dist/
 17 | downloads/
 18 | eggs/
 19 | .eggs/
 20 | lib/
 21 | lib64/
 22 | parts/
 23 | sdist/
 24 | var/
 25 | wheels/
 26 | pip-wheel-metadata/
 27 | share/python-wheels/
 28 | *.egg-info/
 29 | .installed.cfg
 30 | *.egg
 31 | MANIFEST
 32 | 
 33 | # PyInstaller
 34 | #  Usually these files are written by a python script from a template
 35 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 36 | *.manifest
 37 | *.spec
 38 | 
 39 | # Installer logs
 40 | pip-log.txt
 41 | pip-delete-this-directory.txt
 42 | 
 43 | # Unit test / coverage reports
 44 | htmlcov/
 45 | .tox/
 46 | .nox/
 47 | .coverage
 48 | .coverage.*
 49 | .cache
 50 | nosetests.xml
 51 | coverage.xml
 52 | *.cover
 53 | *.py,cover
 54 | .hypothesis/
 55 | .pytest_cache/
 56 | 
 57 | # Translations
 58 | *.mo
 59 | *.pot
 60 | 
 61 | # Django stuff:
 62 | *.log
 63 | local_settings.py
 64 | db.sqlite3
 65 | db.sqlite3-journal
 66 | 
 67 | # Flask stuff:
 68 | instance/
 69 | .webassets-cache
 70 | 
 71 | # Scrapy stuff:
 72 | .scrapy
 73 | 
 74 | # Sphinx documentation
 75 | docs/_build/
 76 | deploy/_build/
 77 | 
 78 | # PyBuilder
 79 | target/
 80 | 
 81 | # Jupyter Notebook
 82 | .ipynb_checkpoints
 83 | 
 84 | # IPython
 85 | profile_default/
 86 | ipython_config.py
 87 | 
 88 | # pyenv
 89 | .python-version
 90 | 
 91 | # pipenv
 92 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 93 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 94 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 95 | #   install all needed dependencies.
 96 | #Pipfile.lock
 97 | 
 98 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
 99 | __pypackages__/
100 | 
101 | # Celery stuff
102 | celerybeat-schedule
103 | celerybeat.pid
104 | 
105 | # SageMath parsed files
106 | *.sage.py
107 | 
108 | # Environments
109 | .env
110 | .venv
111 | env/
112 | venv/
113 | ENV/
114 | env.bak/
115 | venv.bak/
116 | 
117 | # Spyder project settings
118 | .spyderproject
119 | .spyproject
120 | 
121 | # Rope project settings
122 | .ropeproject
123 | 
124 | # mkdocs documentation
125 | /site
126 | 
127 | # mypy
128 | .mypy_cache/
129 | .dmypy.json
130 | dmypy.json
131 | 
132 | # Pyre type checker
133 | .pyre/
134 | 


--------------------------------------------------------------------------------
/deploy/conf.py:
--------------------------------------------------------------------------------
 1 | # Configuration file for the Sphinx documentation builder.
 2 | #
 3 | # For the full list of built-in configuration values, see the documentation:
 4 | # https://www.sphinx-doc.org/en/master/usage/configuration.html
 5 | 
 6 | import os
 7 | import sys
 8 | 
 9 | # -- Path setup --------------------------------------------------------------
10 | # If extensions (or modules to document with autodoc) are in another directory,
11 | # add these directories to sys.path here.
12 | sys.path.insert(0, os.path.abspath(".."))
13 | 
14 | # -- Project information -----------------------------------------------------
15 | # https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information
16 | 
17 | project = "PySODMetrics"
18 | copyright = "2025, lartpang"
19 | author = "lartpang"
20 | release = "1.6.0"
21 | 
22 | # -- General configuration ---------------------------------------------------
23 | # https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration
24 | 
25 | extensions = [
26 |     "sphinx.ext.autodoc",
27 |     "sphinx.ext.napoleon",
28 |     "sphinx.ext.viewcode",
29 |     "sphinx.ext.intersphinx",
30 |     "sphinx.ext.mathjax",
31 | ]
32 | 
33 | templates_path = ["_templates"]
34 | exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"]
35 | 
36 | # -- Options for HTML output -------------------------------------------------
37 | # https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output
38 | 
39 | html_theme = "sphinx_rtd_theme"
40 | html_static_path = ["_static"]
41 | 
42 | # -- Extension configuration -------------------------------------------------
43 | 
44 | # Napoleon settings
45 | napoleon_google_docstring = True
46 | napoleon_numpy_docstring = True
47 | napoleon_include_init_with_doc = True
48 | napoleon_include_private_with_doc = False
49 | napoleon_include_special_with_doc = True
50 | napoleon_use_admonition_for_examples = False
51 | napoleon_use_admonition_for_notes = False
52 | napoleon_use_admonition_for_references = False
53 | napoleon_use_ivar = False
54 | napoleon_use_param = True
55 | napoleon_use_rtype = True
56 | napoleon_preprocess_types = False
57 | napoleon_type_aliases = None
58 | napoleon_attr_annotations = True
59 | 
60 | # Autodoc settings
61 | autodoc_default_options = {
62 |     "members": True,
63 |     "member-order": "bysource",
64 |     "special-members": "__init__",
65 |     "undoc-members": True,
66 |     "exclude-members": "__weakref__",
67 | }
68 | 
69 | # Intersphinx mapping
70 | intersphinx_mapping = {
71 |     "python": ("https://docs.python.org/3", None),
72 |     "numpy": ("https://numpy.org/doc/stable/", None),
73 |     "scipy": ("https://docs.scipy.org/doc/scipy/", None),
74 | }
75 | 


--------------------------------------------------------------------------------
/deploy/index.rst:
--------------------------------------------------------------------------------
 1 | PySODMetrics Documentation
 2 | ===========================
 3 | 
 4 | Welcome to PySODMetrics - A simple and efficient implementation of SOD metrics.
 5 | 
 6 | .. image:: https://img.shields.io/pypi/v/pysodmetrics
 7 |    :target: https://pypi.org/project/pysodmetrics/
 8 |    :alt: PyPI version
 9 | 
10 | .. image:: https://img.shields.io/pypi/dm/pysodmetrics?label=pypi%20downloads&logo=PyPI&logoColor=white
11 |    :target: https://pypi.org/project/pysodmetrics/
12 |    :alt: Downloads
13 | 
14 | Overview
15 | --------
16 | 
17 | PySODMetrics is a Python library that provides simple and efficient implementations of
18 | metrics for evaluating salient object detection (SOD), camouflaged object detection (COD),
19 | and medical image segmentation tasks.
20 | 
21 | **Key Features:**
22 | 
23 | * Based on numpy and scipy for fast computation
24 | * Verified against the original MATLAB implementations
25 | * Simple and extensible code structure
26 | * Lightweight and easy to use
27 | 
28 | .. note::
29 |    Our exploration in this field continues with `PyIRSTDMetrics <https://github.com/lartpang/PyIRSTDMetrics>`_,
30 |    a project born from the same core motivation. Think of them as twin initiatives:
31 |    this project maps the landscape of current evaluation, while its sibling takes the next step
32 |    to expand upon and rethink it.
33 | 
34 | Contents
35 | --------
36 | 
37 | .. toctree::
38 |    :maxdepth: 2
39 |    :caption: User Guide
40 | 
41 |    installation
42 |    usage
43 |    metrics
44 | 
45 | .. toctree::
46 |    :maxdepth: 2
47 |    :caption: API Reference
48 | 
49 |    api
50 | 
51 | Supported Metrics
52 | -----------------
53 | 
54 | PySODMetrics supports a comprehensive set of evaluation metrics:
55 | 
56 | * **MAE** - Mean Absolute Error
57 | * **S-measure** (:math:`S_m`) - Structure Measure
58 | * **E-measure** (:math:`E_m`) - Enhanced-alignment Measure
59 | * **F-measure** (:math:`F_\beta`) - Precision-Recall F-measure
60 | * **Weighted F-measure** (:math:`F^\omega_\beta`)
61 | * **Context-Measure** (:math:`C_\beta`, :math:`C^\omega_\beta`)
62 | * **Multi-Scale IoU** - Multi-scale Intersection over Union
63 | * **Human Correction Effort Measure**
64 | * And many more classification metrics (BER, Dice, Kappa, Precision, Recall, etc.)
65 | 
66 | See :doc:`metrics` for detailed descriptions of all supported metrics.
67 | 
68 | Indices and tables
69 | ==================
70 | 
71 | * :ref:`genindex`
72 | * :ref:`modindex`
73 | * :ref:`search`
74 | 
75 | Related Projects
76 | ================
77 | 
78 | * `PySODEvalToolkit <https://github.com/lartpang/PySODEvalToolkit>`_ - A Python-based Evaluation Toolbox for Salient Object Detection and Camouflaged Object Detection
79 | 
80 | Links
81 | =====
82 | 
83 | * **GitHub Repository:** https://github.com/lartpang/PySODMetrics
84 | * **PyPI Package:** https://pypi.org/project/pysodmetrics/
85 | * **Issue Tracker:** https://github.com/lartpang/PySODMetrics/issues
86 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
  1 | [build-system]
  2 | requires = ["setuptools >= 61.0"]
  3 | build-backend = "setuptools.build_meta"
  4 | 
  5 | [tool.setuptools]
  6 | packages = ["py_sod_metrics"] # 直接指定包名
  7 | 
  8 | [project]
  9 | name = "pysodmetrics"
 10 | version = "1.6.0"
 11 | dependencies = [
 12 |   "numpy>=1.18,<2.3.5",
 13 |   "scipy>=1.5,<2.0",
 14 |   "scikit-image>=0.19,<0.26",
 15 |   "scikit-learn>=1.0,<2.0",
 16 |   "opencv-python-headless>=4.7.0,<5.0.0",
 17 | ]
 18 | requires-python = ">=3.6"
 19 | authors = [{ name = "lartpang", email = "lartpang@gmail.com" }]
 20 | maintainers = [{ name = "lartpang", email = "lartpang@gmail.com" }]
 21 | description = "A simple and efficient metric implementation for grayscale/binary image segmentation like salient object detection, camouflaged object detection, and medical image segmentation."
 22 | readme = "readme.md"
 23 | license = { file = "LICENSE" }
 24 | keywords = [
 25 |   "salient object detection",
 26 |   "camouflaged object detection",
 27 |   "medical image segmentation",
 28 |   "dichotomous image segmentation",
 29 |   "saliency detection",
 30 |   "metric",
 31 |   "deep learning",
 32 | ]
 33 | classifiers = [
 34 |   "Development Status :: 5 - Production/Stable",
 35 |   "Environment :: Console",
 36 |   "Intended Audience :: Developers",
 37 |   "Operating System :: OS Independent",
 38 |   "Programming Language :: Python",
 39 |   "Topic :: Scientific/Engineering :: Artificial Intelligence",
 40 | ]
 41 | 
 42 | [project.urls]
 43 | Homepage = "https://github.com/lartpang/PySODMetrics"
 44 | Documentation = "https://github.com/lartpang/PySODMetrics"
 45 | Repository = "https://github.com/lartpang/PySODMetrics.git"
 46 | "Bug Tracker" = "https://github.com/lartpang/PySODMetrics/issues"
 47 | Changelog = "https://github.com/lartpang/PySODMetrics/blob/master/CHANGELOG.md"
 48 | 
 49 | [project.optional-dependencies]
 50 | docs = [
 51 |   "sphinx>=7.0.0",
 52 |   "sphinx-rtd-theme>=2.0.0",
 53 | ]
 54 | 
 55 | [tool.isort]
 56 | # https://pycqa.github.io/isort/docs/configuration/options/
 57 | profile = "black"
 58 | multi_line_output = 3
 59 | filter_files = true
 60 | supported_extensions = "py"
 61 | 
 62 | [tool.black]
 63 | line-length = 119
 64 | include = '\.pyi?$'
 65 | exclude = '''
 66 | /(
 67 |     \.eggs
 68 |   | \.git
 69 |   | \.idea
 70 |   | \.vscode
 71 |   | \.hg
 72 |   | \.mypy_cache
 73 |   | \.tox
 74 |   | \.venv
 75 |   | _build
 76 |   | buck-out
 77 |   | build
 78 |   | dist
 79 |   | output
 80 | )/
 81 | '''
 82 | 
 83 | [tool.ruff]
 84 | # Same as Black.
 85 | line-length = 119
 86 | indent-width = 4
 87 | # Exclude a variety of commonly ignored directories.
 88 | exclude = [
 89 |   ".bzr",
 90 |   ".direnv",
 91 |   ".eggs",
 92 |   ".git",
 93 |   ".git-rewrite",
 94 |   ".hg",
 95 |   ".ipynb_checkpoints",
 96 |   ".mypy_cache",
 97 |   ".nox",
 98 |   ".pants.d",
 99 |   ".pyenv",
100 |   ".pytest_cache",
101 |   ".pytype",
102 |   ".ruff_cache",
103 |   ".svn",
104 |   ".tox",
105 |   ".venv",
106 |   ".vscode",
107 |   "__pypackages__",
108 |   "_build",
109 |   "buck-out",
110 |   "build",
111 |   "dist",
112 |   "node_modules",
113 |   "site-packages",
114 |   "venv",
115 | ]
116 | [tool.ruff.format]
117 | # Like Black, use double quotes for strings.
118 | quote-style = "double"
119 | # Like Black, indent with spaces, rather than tabs.
120 | indent-style = "space"
121 | # Like Black, respect magic trailing commas.
122 | skip-magic-trailing-comma = false
123 | # Like Black, automatically detect the appropriate line ending.
124 | line-ending = "auto"
125 | 


--------------------------------------------------------------------------------
/.github/workflows/README.md:
--------------------------------------------------------------------------------
  1 | # GitHub Actions 自动构建文档
  2 | 
  3 | 本项目使用 GitHub Actions 自动构建和部署 Sphinx 文档到 GitHub Pages。
  4 | 
  5 | ## 工作流程
  6 | 
  7 | 当代码推送到主分支时，GitHub Actions 会自动：
  8 | 
  9 | 1. 检出代码
 10 | 2. 安装 Python 和依赖
 11 | 3. 构建 Sphinx 文档
 12 | 4. 部署到 `gh-pages` 分支
 13 | 5. GitHub Pages 自动发布
 14 | 
 15 | ## 触发条件
 16 | 
 17 | 文档自动构建会在以下情况触发：
 18 | 
 19 | - 推送到 `main` 或 `master` 分支
 20 | - 修改了以下文件：
 21 |   - `deploy/` 目录中的任何文件（文档源）
 22 |   - `py_sod_metrics/` 目录中的任何文件（API 文档源）
 23 |   - `.github/workflows/docs.yml` 工作流文件
 24 | 
 25 | ## 手动触发
 26 | 
 27 | 如果需要手动触发文档构建：
 28 | 
 29 | 1. 进入 GitHub 仓库页面
 30 | 2. 点击 "Actions" 标签
 31 | 3. 选择 "Build and Deploy Documentation" 工作流
 32 | 4. 点击 "Run workflow" 按钮
 33 | 
 34 | ## GitHub Pages 设置
 35 | 
 36 | ### 首次设置
 37 | 
 38 | 1. 进入仓库的 Settings → Pages
 39 | 2. Source 选择：
 40 |    - **Branch**: `gh-pages`
 41 |    - **Folder**: `/ (root)`
 42 | 3. 保存设置
 43 | 
 44 | ### 访问文档
 45 | 
 46 | 文档发布后，可以通过以下地址访问：
 47 | 
 48 | ```
 49 | https://<your-username>.github.io/PySODMetrics/
 50 | ```
 51 | 
 52 | ## 工作流文件
 53 | 
 54 | 工作流文件位于：`.github/workflows/docs.yml`
 55 | 
 56 | ### 主要步骤
 57 | 
 58 | 1. **Checkout repository** - 检出代码
 59 | 2. **Set up Python** - 安装 Python 3.10
 60 | 3. **Install dependencies** - 安装 Sphinx 和主题
 61 | 4. **Build documentation** - 构建 HTML 文档
 62 | 5. **Deploy to GitHub Pages** - 部署到 gh-pages 分支
 63 | 
 64 | ## 本地构建 vs 自动构建
 65 | 
 66 | ### 本地构建
 67 | 
 68 | 仍然可以在本地构建文档进行预览：
 69 | 
 70 | ```bash
 71 | cd deploy
 72 | make html  # Linux/Mac
 73 | make.bat html  # Windows
 74 | ```
 75 | 
 76 | 本地构建的结果在 `docs/` 目录，但**不需要**提交这些文件到 Git。
 77 | 
 78 | ### 自动构建
 79 | 
 80 | GitHub Actions 会在云端自动构建，无需本地生成 HTML 文件。
 81 | 
 82 | ## Git 工作流建议
 83 | 
 84 | ### 方案 A：不提交 docs/ 目录（推荐）
 85 | 
 86 | 将 `docs/` 添加到 `.gitignore`：
 87 | 
 88 | ```gitignore
 89 | # Build output (generated by GitHub Actions)
 90 | docs/
 91 | ```
 92 | 
 93 | **优点**：
 94 | - 仓库更干净
 95 | - 避免不必要的文件冲突
 96 | - 减小仓库大小
 97 | 
 98 | **缺点**：
 99 | - 必须依赖 GitHub Actions
100 | 
101 | ### 方案 B：提交 docs/ 目录
102 | 
103 | 保留 `docs/` 在 Git 中作为备份。
104 | 
105 | **优点**：
106 | - 即使 Actions 失败也有备份
107 | - 可以查看文档历史
108 | 
109 | **缺点**：
110 | - 每次推送都会有大量文件变更
111 | - 容易产生合并冲突
112 | 
113 | ## 故障排查
114 | 
115 | ### Actions 失败
116 | 
117 | 如果 GitHub Actions 失败，检查：
118 | 
119 | 1. **Permission 错误**：确保仓库设置中启用了 Actions 的写权限
120 |    - Settings → Actions → General → Workflow permissions
121 |    - 选择 "Read and write permissions"
122 | 
123 | 2. **分支保护**：确保 `gh-pages` 分支没有保护规则阻止 Actions
124 | 
125 | 3. **依赖安装失败**：检查 `pyproject.toml` 中的依赖是否正确
126 | 
127 | ### Pages 未更新
128 | 
129 | 如果文档未更新：
130 | 
131 | 1. 检查 Actions 是否成功运行
132 | 2. 确认 GitHub Pages 设置正确（来源为 `gh-pages` 分支）
133 | 3. GitHub Pages 部署可能需要几分钟时间
134 | 
135 | ## 自定义配置
136 | 
137 | ### 修改触发条件
138 | 
139 | 编辑 `.github/workflows/docs.yml`：
140 | 
141 | ```yaml
142 | on:
143 |   push:
144 |     branches:
145 |       - main  # 修改为你的主分支名
146 |     paths:
147 |       - 'deploy/**'  # 添加或删除路径
148 | ```
149 | 
150 | ### 修改 Python 版本
151 | 
152 | ```yaml
153 | - name: Set up Python
154 |   uses: actions/setup-python@v5
155 |   with:
156 |     python-version: '3.11'  # 修改版本
157 | ```
158 | 
159 | ### 添加额外依赖
160 | 
161 | ```yaml
162 | - name: Install dependencies
163 |   run: |
164 |     pip install sphinx sphinx-rtd-theme
165 |     pip install your-extra-dependency
166 |     pip install -e .
167 | ```
168 | 
169 | ## 监控构建状态
170 | 
171 | 添加徽章到 README.md：
172 | 
173 | ```markdown
174 | [![Documentation Status](https://github.com/<username>/PySODMetrics/workflows/Build%20and%20Deploy%20Documentation/badge.svg)](https://github.com/<username>/PySODMetrics/actions)
175 | ```
176 | 
177 | ## 注意事项
178 | 
179 | - 确保 `deploy/conf.py` 中的配置正确
180 | - 文档构建警告不会导致 Actions 失败（除非是 ERROR）
181 | - `gh-pages` 分支会被自动创建和管理，无需手动操作
182 | - 每次成功构建会完全替换 `gh-pages` 分支的内容
183 | 


--------------------------------------------------------------------------------
/py_sod_metrics/utils.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | # the different implementation of epsilon (extreme min value) between numpy and matlab
 4 | EPS = np.spacing(1)
 5 | TYPE = np.float64
 6 | 
 7 | 
 8 | def validate_and_normalize_input(pred: np.ndarray, gt: np.ndarray, normalize: bool = True):
 9 |     """Validate and optionally normalize prediction and ground truth inputs.
10 | 
11 |     This function ensures that prediction and ground truth arrays have compatible shapes and appropriate data types. When normalization is enabled, it converts inputs to the standard format required by the predefined metrics (pred in [0, 1] as float, gt as boolean).
12 | 
13 |     Args:
14 |         pred (np.ndarray): Prediction array. If `normalize=True`, should be uint8 grayscale image (0-255). If `normalize=False`, should be float32/float64 in range [0, 1].
15 |         gt (np.ndarray): Ground truth array. If `normalize=True`, should be uint8 grayscale image (0-255). If `normalize=False`, should be boolean array.
16 |         normalize (bool, optional): Whether to normalize the input data using prepare_data(). Defaults to True.
17 | 
18 |     Returns:
19 |         tuple: A tuple containing:
20 |             - pred (np.ndarray): Normalized prediction as float64 in range [0, 1].
21 |             - gt (np.ndarray): Normalized ground truth as boolean array.
22 | 
23 |     Raises:
24 |         ValueError: If prediction and ground truth shapes don't match, or if prediction values are outside [0, 1] range when normalize=False.
25 |         TypeError: If data types are invalid when normalize=False (pred must be float32/float64, gt must be boolean).
26 |     """
27 |     # Validate input shapes
28 |     if pred.shape != gt.shape:
29 |         raise ValueError(f"Shape mismatch between prediction ({pred.shape}) and ground truth ({gt.shape})")
30 | 
31 |     # Handle normalization
32 |     if normalize:
33 |         pred, gt = prepare_data(pred, gt)
34 |     else:
35 |         # Validate prediction data type and range
36 |         if pred.dtype not in (np.float32, np.float64):
37 |             raise TypeError(f"Prediction array must be float32 or float64, got {pred.dtype}")
38 |         if not (0 <= pred.min() and pred.max() <= 1):
39 |             raise ValueError("Prediction values must be in range [0, 1]")
40 |         # Validate ground truth type
41 |         if gt.dtype != bool:
42 |             raise TypeError(f"Ground truth must be boolean, got {gt.dtype}")
43 | 
44 |     return pred, gt
45 | 
46 | 
47 | def prepare_data(pred: np.ndarray, gt: np.ndarray) -> tuple:
48 |     """Convert and normalize prediction and ground truth data.
49 | 
50 |     - For predictions, mimics MATLAB's `mapminmax(im2double(...))`.
51 |     - For ground truth, applies binary thresholding at 128.
52 | 
53 |     Args:
54 |         pred (np.ndarray): Prediction grayscale image, uint8 type with values in [0, 255].
55 |         gt (np.ndarray): Ground truth grayscale image, uint8 type with values in [0, 255].
56 | 
57 |     Returns:
58 |         tuple: A tuple containing:
59 |             - pred (np.ndarray): Normalized prediction as float64 in range [0, 1].
60 |             - gt (np.ndarray): Binary ground truth as boolean array.
61 |     """
62 |     gt = gt > 128
63 |     # im2double, mapminmax
64 |     pred = pred / 255
65 |     if pred.max() != pred.min():
66 |         pred = (pred - pred.min()) / (pred.max() - pred.min())
67 |     return pred, gt
68 | 
69 | 
70 | def get_adaptive_threshold(matrix: np.ndarray, max_value: float = 1) -> float:
71 |     """Return an adaptive threshold, which is equal to twice the mean of `matrix`.
72 | 
73 |     Args:
74 |         matrix (np.ndarray): a data array
75 |         max_value (float, optional): the upper limit of the threshold. Defaults to 1.
76 | 
77 |     Returns:
78 |         float: `min(2 * matrix.mean(), max_value)`
79 |     """
80 |     return min(2 * matrix.mean(), max_value)
81 | 


--------------------------------------------------------------------------------
/py_sod_metrics/multiscale_iou.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from scipy import ndimage
  3 | 
  4 | from .utils import TYPE, get_adaptive_threshold, validate_and_normalize_input
  5 | 
  6 | 
  7 | class MSIoU:
  8 |     """Multi-Scale Intersection over Union (MSIoU) metric.
  9 | 
 10 |     This implements the MSIoU metric which evaluates segmentation quality at multiple scales by comparing edge maps. It addresses the limitation of traditional IoU which struggles with fine structures in segmentation results.
 11 | 
 12 | 
 13 |     ```
 14 |     @inproceedings{MSIoU,
 15 |         title = {Multiscale IOU: A Metric for Evaluation of Salient Object Detection with Fine Structures},
 16 |         author = {Ahmadzadeh, Azim and Kempton, Dustin J. and Chen, Yang and Angryk, Rafal A.},
 17 |         booktitle = ICIP,
 18 |         year = {2021},
 19 |     }
 20 |     ```
 21 |     """
 22 | 
 23 |     def __init__(self, with_dynamic: bool, with_adaptive: bool, *, with_binary: bool = False, num_levels=10):
 24 |         """Initialize the MSIoU evaluator.
 25 | 
 26 |         Args:
 27 |             with_dynamic (bool, optional): Record dynamic results for max/avg/curve versions.
 28 |             with_adaptive (bool, optional): Record adaptive results for adp version.
 29 |             with_binary (bool, optional): Record binary results for binary version.
 30 |         """
 31 |         self.dynamic_results = [] if with_dynamic else None
 32 |         self.adaptive_results = [] if with_adaptive else None
 33 |         self.binary_results = [] if with_binary else None
 34 | 
 35 |         # The values of this collection determines the resolutions based on which MIoU is computed.
 36 |         # It is set as the original implementation
 37 |         self.cell_sizes = np.power(2, np.linspace(0, 9, num=num_levels, dtype=int))
 38 | 
 39 |     def get_edge(self, mask: np.ndarray):
 40 |         """Edge detection based on the `scipy.ndimage.sobel` function.
 41 | 
 42 |         :param mask: a binary mask of an object whose edges are of interest.
 43 |         :return: a binary mask of 1's as edges and 0's as background.
 44 |         """
 45 |         sx = ndimage.sobel(mask, axis=0, mode="constant")
 46 |         sy = ndimage.sobel(mask, axis=1, mode="constant")
 47 |         sob = np.hypot(sx, sy)
 48 |         # sob[sob > 0] = 1
 49 |         return (sob > 0).astype(sob.dtype)
 50 | 
 51 |     def shrink_by_grid(self, image: np.ndarray, cell_size: int) -> np.ndarray:
 52 |         """Shrink the image by summing values within grid cells.
 53 | 
 54 |         Performs box-counting after applying zero padding if the image dimensions
 55 |         are not perfectly divisible by the cell size.
 56 | 
 57 |         :param image: The input binary image (edges).
 58 |         :param cell_size: The size of the grid cells.
 59 |         :return: A shrunk binary image where each pixel represents a grid cell.
 60 |         """
 61 |         if cell_size <= 0:
 62 |             raise ValueError("Cell size must be a positive integer")
 63 | 
 64 |         if cell_size > 1:
 65 |             # Calculate padding sizes to make dimensions divisible by cell_size
 66 |             h, w = image.shape[:2]
 67 |             pad_h = (cell_size - h % cell_size) % cell_size
 68 |             pad_w = (cell_size - w % cell_size) % cell_size
 69 | 
 70 |             # Apply padding if necessary
 71 |             if pad_h > 0 or pad_w > 0:
 72 |                 # Padding is added to the top and left edges.
 73 |                 image = np.pad(image, ((pad_h, 0), (pad_w, 0)), mode="constant", constant_values=0)
 74 | 
 75 |             # Reshape and sum within each cell
 76 |             h, w = image.shape[:2]
 77 |             image = image.reshape(h // cell_size, cell_size, w // cell_size, cell_size)
 78 |             image = image.sum(axis=(1, 3))
 79 |         # image[image > 0] = 1
 80 |         return (image > 0).astype(image.dtype)
 81 | 
 82 |     def multi_scale_iou(self, pred_edge: np.ndarray, gt_edge: np.ndarray) -> list:
 83 |         """Calculate Multi-Scale IoU.
 84 | 
 85 |         Args:
 86 |             pred_edge (np.ndarray): edge map of pred
 87 |             gt_edge (np.ndarray): edge map of gt
 88 | 
 89 |         Returns:
 90 |             list: ratios
 91 |         """
 92 |         # Calculate IoU ratios at different scales
 93 |         ratios = []
 94 |         for cell_size in self.cell_sizes:
 95 |             # Shrink both prediction and ground truth edges
 96 |             shrunk_pred_edge = self.shrink_by_grid(pred_edge, cell_size=cell_size)
 97 |             shrunk_gt_edge = self.shrink_by_grid(gt_edge, cell_size=cell_size)
 98 | 
 99 |             # Calculate IoU with smoothing to prevent division by zero
100 |             numerator = np.logical_and(shrunk_pred_edge, shrunk_gt_edge).sum() + 1
101 |             # Only consider ground truth for denominator
102 |             denominator = shrunk_gt_edge.sum() + 1
103 |             ratios.append(numerator / denominator)
104 |         return ratios
105 | 
106 |     def binarizing(self, pred_bin: np.ndarray, gt_edge: np.ndarray) -> list:
107 |         """Calculate Multi-Scale IoU based on dynamically thresholding.
108 | 
109 |         Args:
110 |             pred_bin (np.ndarray): binarized pred
111 |             gt_edge (np.ndarray): gt binarized by 128
112 | 
113 |         Returns:
114 |             np.ndarray: areas under the curve
115 |         """
116 |         pred_edge = self.get_edge(pred_bin)
117 |         ratios = self.multi_scale_iou(pred_edge, gt_edge)  # 10
118 | 
119 |         # Calculate area under the curve using trapezoidal rule
120 |         return np.trapz(y=ratios, dx=1 / (len(self.cell_sizes) - 1))
121 | 
122 |     def step(self, pred: np.ndarray, gt: np.ndarray, normalize: bool = True):
123 |         """Calculate the Multi-Scale IoU for a single prediction-ground truth pair.
124 | 
125 |         This method first extracts edges from both prediction and ground truth,
126 |         then computes IoU ratios at multiple scales defined by self.cell_sizes.
127 |         Finally, it calculates the area under the curve of these ratios.
128 | 
129 |         Args:
130 |             pred (np.ndarray): Prediction, gray scale image.
131 |             gt (np.ndarray): Ground truth, gray scale image.
132 |             normalize (bool, optional): Whether to normalize the input data. Defaults to True.
133 | 
134 |         Returns:
135 |             The MSIoU score for the given pair (float between 0 and 1).
136 |         """
137 |         pred, gt = validate_and_normalize_input(pred, gt, normalize)
138 | 
139 |         # Calculate MSIoU for this pair and store the result
140 |         gt_edge = self.get_edge(gt)
141 | 
142 |         if self.dynamic_results is not None:
143 |             results = []
144 |             _pred = (pred * 255).astype(np.uint8)
145 |             for threshold in np.linspace(0, 256, 257):
146 |                 results.append(self.binarizing(_pred >= threshold, gt_edge))
147 |             # threshold_masks = pred[..., None] >= np.arange(0, 257)[None, None, :]
148 |             self.dynamic_results.append(results)
149 | 
150 |         if self.adaptive_results is not None:
151 |             adaptive_threshold = get_adaptive_threshold(pred, max_value=1)
152 |             results = self.binarizing(pred >= adaptive_threshold, gt_edge)
153 |             self.adaptive_results.append(results)
154 | 
155 |         if self.binary_results is not None:
156 |             self.binary_results.append(self.binarizing(pred > 0.5, gt_edge))
157 | 
158 |     def get_results(self) -> dict:
159 |         """Return the results about MSIoU.
160 | 
161 |         Calculates the mean of all stored MSIoU values from previous calls to step().
162 | 
163 |         :return: Dictionary with key 'msiou' and the mean MSIoU value.
164 |         :raises: ValueError if no samples have been processed.
165 |         """
166 |         results = {}
167 |         if self.dynamic_results is not None:
168 |             results["dynamic"] = np.mean(np.array(self.dynamic_results, dtype=TYPE), axis=0)
169 |         if self.adaptive_results is not None:
170 |             results["adaptive"] = np.mean(np.array(self.adaptive_results, dtype=TYPE))
171 |         if self.binary_results is not None:
172 |             results["binary"] = np.mean(np.array(self.binary_results, dtype=TYPE))
173 |         return results
174 | 


--------------------------------------------------------------------------------
/examples/test_bimetrics.py:
--------------------------------------------------------------------------------
  1 | import copy
  2 | import logging
  3 | import os
  4 | import sys
  5 | 
  6 | import cv2
  7 | import numpy as np
  8 | 
  9 | sys.path.append("..")
 10 | import py_sod_metrics
 11 | 
 12 | logging.basicConfig(level=logging.DEBUG)
 13 | 
 14 | 
 15 | def compare_unnormalized(pred_files, mask_files):
 16 |     overall_bin = dict(
 17 |         with_adaptive=False, with_dynamic=False, with_binary=True, sample_based=False
 18 |     )
 19 |     # single iteration
 20 |     sample_recorder = py_sod_metrics.FmeasureV2(
 21 |         metric_handlers={
 22 |             # 二值化数据指标的特殊情况二：汇总所有样本的tp、fp、tn、fn后整体计算指标
 23 |             "overall_bifm": py_sod_metrics.FmeasureHandler(**overall_bin, beta=0.3),
 24 |             "overall_bif1": py_sod_metrics.FmeasureHandler(**overall_bin, beta=1),
 25 |             "overall_bipre": py_sod_metrics.PrecisionHandler(**overall_bin),
 26 |             "overall_birec": py_sod_metrics.RecallHandler(**overall_bin),
 27 |             "overall_bifpr": py_sod_metrics.FPRHandler(**overall_bin),
 28 |             "overall_biiou": py_sod_metrics.IOUHandler(**overall_bin),
 29 |             "overall_bidice": py_sod_metrics.DICEHandler(**overall_bin),
 30 |             "overall_bispec": py_sod_metrics.SpecificityHandler(**overall_bin),
 31 |             "overall_biber": py_sod_metrics.BERHandler(**overall_bin),
 32 |             "overall_bioa": py_sod_metrics.OverallAccuracyHandler(**overall_bin),
 33 |             "overall_bikappa": py_sod_metrics.KappaHandler(**overall_bin),
 34 |         }
 35 |     )
 36 |     whole_recorder = copy.deepcopy(sample_recorder)
 37 | 
 38 |     base_h = base_w = 256
 39 | 
 40 |     preds = []
 41 |     masks = []
 42 |     for pred_path, mask_path in zip(pred_files, mask_files):
 43 |         pred = cv2.imread(pred_path, cv2.IMREAD_GRAYSCALE)
 44 |         assert pred is not None, pred_path
 45 |         pred = cv2.resize(pred, dsize=(base_w, base_h), interpolation=cv2.INTER_LINEAR)
 46 |         preds.append(pred)
 47 | 
 48 |         mask = cv2.imread(mask_path, cv2.IMREAD_GRAYSCALE)
 49 |         assert mask is not None, mask_path
 50 |         mask = cv2.resize(mask, dsize=(base_w, base_h), interpolation=cv2.INTER_LINEAR)
 51 |         masks.append(mask)
 52 | 
 53 |         pred = pred / 255
 54 |         mask = mask > 127
 55 |         sample_recorder.step(pred=pred, gt=mask, normalize=False)
 56 |     sample_results = sample_recorder.get_results()
 57 |     sample_info = {
 58 |         "overall_bifm": sample_results["overall_bifm"]["binary"],
 59 |         "overall_bipre": sample_results["overall_bipre"]["binary"],
 60 |         "overall_birec": sample_results["overall_birec"]["binary"],
 61 |         "overall_bifpr": sample_results["overall_bifpr"]["binary"],
 62 |         "overall_bidice": sample_results["overall_bidice"]["binary"],
 63 |         "overall_biiou": sample_results["overall_biiou"]["binary"],
 64 |         "overall_bif1": sample_results["overall_bif1"]["binary"],
 65 |         "overall_bispec": sample_results["overall_bispec"]["binary"],
 66 |         "overall_biber": sample_results["overall_biber"]["binary"],
 67 |         "overall_bioa": sample_results["overall_bioa"]["binary"],
 68 |         "overall_bikappa": sample_results["overall_bikappa"]["binary"],
 69 |     }
 70 | 
 71 |     preds = np.concatenate(preds, axis=-1)  # H,n*W
 72 |     masks = np.concatenate(masks, axis=-1)
 73 |     preds = preds / 255
 74 |     masks = masks > 127
 75 |     whole_recorder.step(pred=preds, gt=masks, normalize=False)
 76 |     whole_results = whole_recorder.get_results()
 77 |     whole_info = {
 78 |         "overall_bifm": whole_results["overall_bifm"]["binary"],
 79 |         "overall_bipre": whole_results["overall_bipre"]["binary"],
 80 |         "overall_birec": whole_results["overall_birec"]["binary"],
 81 |         "overall_bifpr": whole_results["overall_bifpr"]["binary"],
 82 |         "overall_bidice": whole_results["overall_bidice"]["binary"],
 83 |         "overall_biiou": whole_results["overall_biiou"]["binary"],
 84 |         "overall_bif1": whole_results["overall_bif1"]["binary"],
 85 |         "overall_bispec": whole_results["overall_bispec"]["binary"],
 86 |         "overall_biber": whole_results["overall_biber"]["binary"],
 87 |         "overall_bioa": whole_results["overall_bioa"]["binary"],
 88 |         "overall_bikappa": whole_results["overall_bikappa"]["binary"],
 89 |     }
 90 | 
 91 |     for name, sample_value in sample_info.items():
 92 |         whole_value = whole_info[name]
 93 |         # 此时的结果应该是一致的
 94 |         if sample_value == whole_value:
 95 |             logging.info(f"[normalized] {name} passed!")
 96 |         else:
 97 |             logging.warning(
 98 |                 f"[normalized] {name} should be equal: {sample_value} vs {whole_value}"
 99 |             )
100 | 
101 | 
102 | def compare_normalized(pred_files, mask_files):
103 |     overall_bin = dict(
104 |         with_adaptive=False, with_dynamic=False, with_binary=True, sample_based=False
105 |     )
106 |     # single iteration
107 |     sample_recorder = py_sod_metrics.FmeasureV2(
108 |         metric_handlers={
109 |             # 二值化数据指标的特殊情况二：汇总所有样本的tp、fp、tn、fn后整体计算指标
110 |             "overall_bifm": py_sod_metrics.FmeasureHandler(**overall_bin, beta=0.3),
111 |             "overall_bif1": py_sod_metrics.FmeasureHandler(**overall_bin, beta=1),
112 |             "overall_bipre": py_sod_metrics.PrecisionHandler(**overall_bin),
113 |             "overall_birec": py_sod_metrics.RecallHandler(**overall_bin),
114 |             "overall_bifpr": py_sod_metrics.FPRHandler(**overall_bin),
115 |             "overall_biiou": py_sod_metrics.IOUHandler(**overall_bin),
116 |             "overall_bidice": py_sod_metrics.DICEHandler(**overall_bin),
117 |             "overall_bispec": py_sod_metrics.SpecificityHandler(**overall_bin),
118 |             "overall_biber": py_sod_metrics.BERHandler(**overall_bin),
119 |             "overall_bioa": py_sod_metrics.OverallAccuracyHandler(**overall_bin),
120 |             "overall_bikappa": py_sod_metrics.KappaHandler(**overall_bin),
121 |         }
122 |     )
123 |     whole_recorder = copy.deepcopy(sample_recorder)
124 | 
125 |     base_h = base_w = 256
126 | 
127 |     preds = []
128 |     masks = []
129 |     for pred_path, mask_path in zip(pred_files, mask_files):
130 |         pred = cv2.imread(pred_path, cv2.IMREAD_GRAYSCALE)
131 |         assert pred is not None, pred_path
132 |         pred = cv2.resize(pred, dsize=(base_w, base_h), interpolation=cv2.INTER_LINEAR)
133 |         preds.append(pred)
134 | 
135 |         mask = cv2.imread(mask_path, cv2.IMREAD_GRAYSCALE)
136 |         assert mask is not None, mask_path
137 |         mask = cv2.resize(mask, dsize=(base_w, base_h), interpolation=cv2.INTER_LINEAR)
138 |         masks.append(mask)
139 | 
140 |         sample_recorder.step(pred=pred, gt=mask, normalize=True)
141 |     sample_results = sample_recorder.get_results()
142 |     sample_info = {
143 |         "overall_bifm": sample_results["overall_bifm"]["binary"],
144 |         "overall_bipre": sample_results["overall_bipre"]["binary"],
145 |         "overall_birec": sample_results["overall_birec"]["binary"],
146 |         "overall_bifpr": sample_results["overall_bifpr"]["binary"],
147 |         "overall_bidice": sample_results["overall_bidice"]["binary"],
148 |         "overall_biiou": sample_results["overall_biiou"]["binary"],
149 |         "overall_bif1": sample_results["overall_bif1"]["binary"],
150 |         "overall_bispec": sample_results["overall_bispec"]["binary"],
151 |         "overall_biber": sample_results["overall_biber"]["binary"],
152 |         "overall_bioa": sample_results["overall_bioa"]["binary"],
153 |         "overall_bikappa": sample_results["overall_bikappa"]["binary"],
154 |     }
155 | 
156 |     preds = np.concatenate(preds, axis=-1)  # H,n*W
157 |     masks = np.concatenate(masks, axis=-1)
158 |     whole_recorder.step(pred=preds, gt=masks, normalize=True)
159 |     whole_results = whole_recorder.get_results()
160 |     whole_info = {
161 |         "overall_bifm": whole_results["overall_bifm"]["binary"],
162 |         "overall_bipre": whole_results["overall_bipre"]["binary"],
163 |         "overall_birec": whole_results["overall_birec"]["binary"],
164 |         "overall_bifpr": whole_results["overall_bifpr"]["binary"],
165 |         "overall_bidice": whole_results["overall_bidice"]["binary"],
166 |         "overall_biiou": whole_results["overall_biiou"]["binary"],
167 |         "overall_bif1": whole_results["overall_bif1"]["binary"],
168 |         "overall_bispec": whole_results["overall_bispec"]["binary"],
169 |         "overall_biber": whole_results["overall_biber"]["binary"],
170 |         "overall_bioa": whole_results["overall_bioa"]["binary"],
171 |         "overall_bikappa": whole_results["overall_bikappa"]["binary"],
172 |     }
173 | 
174 |     for name, sample_value in sample_info.items():
175 |         whole_value = whole_info[name]
176 |         # 此时的结果应该是不同的
177 |         if sample_value == whole_value:
178 |             logging.warning(
179 |                 f"[unnormalized] {name} should be not equal: {sample_value} vs {whole_value}"
180 |             )
181 |         else:
182 |             logging.info(f"[unnormalized] {name} passed!")
183 | 
184 | 
185 | def main():
186 |     pred_dir = "test_data/preds"
187 |     mask_dir = "test_data/masks"
188 |     pred_files = sorted([os.path.join(pred_dir, f) for f in os.listdir(pred_dir)])
189 |     mask_files = sorted([os.path.join(mask_dir, f) for f in os.listdir(mask_dir)])
190 |     compare_normalized(pred_files, mask_files)
191 |     compare_unnormalized(pred_files, mask_files)
192 | 
193 | 
194 | if __name__ == "__main__":
195 |     main()
196 | 


--------------------------------------------------------------------------------
/readme_zh.md:
--------------------------------------------------------------------------------
  1 | <div align="center">
  2 |   <img src="https://github.com/lartpang/PySODMetrics/blob/main/images/logo.png?raw=true" alt="Logo" width="320" height="auto">
  3 |   </br>
  4 |   <h2>PySODMetrics: 一份简单有效的SOD指标实现</h2>
  5 |   <a href="./readme.md"><img src="https://img.shields.io/badge/README-English-blue"></a>
  6 |   <img src="https://img.shields.io/pypi/v/pysodmetrics">
  7 |   <img src="https://img.shields.io/pypi/dm/pysodmetrics?label=pypi%20downloads&logo=PyPI&logoColor=white">
  8 |   <img src="https://img.shields.io/github/last-commit/lartpang/PySODMetrics">
  9 |   <img src="https://img.shields.io/github/last-commit/lartpang/PySODMetrics">
 10 |   <img src="https://img.shields.io/github/release-date/lartpang/PySODMetrics">
 11 | </div>
 12 | 
 13 | > [!important]
 14 | >
 15 | > 我们在这一领域的探索仍在继续，这一次是通过 [PyIRSTDMetrics](https://github.com/lartpang/PyIRSTDMetrics) —— 一个源于同样核心动机的项目。
 16 | > 你可以把它们看作是一对“孪生”计划：这个项目旨在描绘当前评估的全景，而它的“兄弟”项目则更进一步，在此基础上加以拓展，并重新思考这一体系。
 17 | > 我们很希望得到你的 Star！ 🌟
 18 | 
 19 | 
 20 | ## 介绍
 21 | 
 22 | 一份简单有效的 SOD 指标实现。
 23 | 
 24 | - 基于`numpy`和极少量`scipy.ndimage`代码
 25 | - 基于 DengPing Fan <https://github.com/DengPingFan/CODToolbox> 进行对比验证
 26 | - 结构简单，易于扩展
 27 | - 代码轻量且快速
 28 | 
 29 | 欢迎您的改进和建议。
 30 | 
 31 | ### 相关项目
 32 | 
 33 | - [PySODEvalToolkit](https://github.com/lartpang/PySODEvalToolkit): A Python-based Evaluation Toolbox for Salient Object Detection and Camouflaged Object Detection
 34 | 
 35 | ### 支持的指标
 36 | 
 37 | | Metric                                              | Sample-based                                | Whole-based              | Related Class                                |
 38 | | --------------------------------------------------- | ------------------------------------------- | ------------------------ | -------------------------------------------- |
 39 | | MAE                                                 | soft,si-soft                                |                          | `MAE`                                        |
 40 | | S-measure $S_{m}$                                   | soft                                        |                          | `Smeasure`                                   |
 41 | | weighted F-measure ($F^{\omega}_{\beta}$)           | soft                                        |                          | `WeightedFmeasure`                           |
 42 | | Human Correction Effort Measure                     | soft                                        |                          | `HumanCorrectionEffortMeasure`               |
 43 | | Context-Measure ($C_{\beta}$, $C^{\omega}_{\beta}$) | soft                                        |                          | `ContextMeasure`, `CamouflageContextMeasure` |
 44 | | Multi-Scale IoU                                     | max,avg,adp,bin                             |                          | `MSIoU`                                      |
 45 | | E-measure ($E_{m}$)                                 | max,avg,adp                                 |                          | `Emeasure`                                   |
 46 | | F-measure (old) ($F_{\beta}$)                       | max,avg,adp                                 |                          | `Fmeasure` (Will be removed!)                |
 47 | | F-measure (new) ($F_{\beta}$, $F_{1}$)              | max,avg,adp,bin,si-max,si-avg,si-adp,si-bin | bin,si-max,si-avg,si-bin | `FmeasureV2`+`FmeasureHandler`               |
 48 | | BER                                                 | max,avg,adp,bin,si-max,si-avg,si-adp,si-bin | bin,si-max,si-avg,si-bin | `FmeasureV2`+`BERHandler`                    |
 49 | | Dice                                                | max,avg,adp,bin,si-max,si-avg,si-adp,si-bin | bin,si-max,si-avg,si-bin | `FmeasureV2`+`DICEHandler`                   |
 50 | | FPR                                                 | max,avg,adp,bin,si-max,si-avg,si-adp,si-bin | bin,si-max,si-avg,si-bin | `FmeasureV2`+`FPRHandler`                    |
 51 | | IoU                                                 | max,avg,adp,bin,si-max,si-avg,si-adp,si-bin | bin,si-max,si-avg,si-bin | `FmeasureV2`+`IOUHandler`                    |
 52 | | Kappa                                               | max,avg,adp,bin,si-max,si-avg,si-adp,si-bin | bin,si-max,si-avg,si-bin | `FmeasureV2`+`KappaHandler`                  |
 53 | | Overall Accuracy                                    | max,avg,adp,bin,si-max,si-avg,si-adp,si-bin | bin,si-max,si-avg,si-bin | `FmeasureV2`+`OverallAccuracyHandler`        |
 54 | | Precision                                           | max,avg,adp,bin,si-max,si-avg,si-adp,si-bin | bin,si-max,si-avg,si-bin | `FmeasureV2`+`PrecisionHandler`              |
 55 | | Recall                                              | max,avg,adp,bin,si-max,si-avg,si-adp,si-bin | bin,si-max,si-avg,si-bin | `FmeasureV2`+`RecallHandler`                 |
 56 | | Sensitivity                                         | max,avg,adp,bin,si-max,si-avg,si-adp,si-bin | bin,si-max,si-avg,si-bin | `FmeasureV2`+`SensitivityHandler`            |
 57 | | Specificity                                         | max,avg,adp,bin,si-max,si-avg,si-adp,si-bin | bin,si-max,si-avg,si-bin | `FmeasureV2`+`SpecificityHandler`            |
 58 | | TNR                                                 | max,avg,adp,bin,si-max,si-avg,si-adp,si-bin | bin,si-max,si-avg,si-bin | `FmeasureV2`+`TNRHandler`                    |
 59 | | TPR                                                 | max,avg,adp,bin,si-max,si-avg,si-adp,si-bin | bin,si-max,si-avg,si-bin | `FmeasureV2`+`TPRHandler`                    |
 60 | 
 61 | **注意**：
 62 | - 基于样本（Sample-based）的 `si-` 变体会根据形状为 `(num_targets, 256)` 的样本级 `fm` 序列，计算每个样本的均值/最大值。
 63 | - 基于整体（Whole-based）的 `si-` 变体会在跨所有样本的所有目标的平均 `fm` 序列上计算均值/最大值。
 64 | - 由于 `*adp` 变体是专门为 `sample-based` 计算设计的，因此不支持基于整体（whole-based）的计算方式。
 65 | 
 66 | ## 使用
 67 | 
 68 | 核心文件在文件夹 `py_sod_metrics` 中。
 69 | 
 70 | - **[新，但可能不稳定]** 从源代码安装：`pip install git+https://github.com/lartpang/PySODMetrics.git`
 71 | - **[更稳定一些]** 从 PyPI 安装：`pip install pysodmetrics`
 72 | 
 73 | ### 示例
 74 | 
 75 | - [examples/test_metrics.py](./examples/test_metrics.py)
 76 | - [examples/metric_recorder.py](./examples/metric_recorder.py)
 77 | 
 78 | ## 参考
 79 | 
 80 | - [Matlab Code](https://github.com/DengPingFan/CODToolbox) by DengPingFan(<https://github.com/DengPingFan>): 在我们的测试中 (测试代码可见`test`文件夹下内容)，结果与 Fan 的代码一致。
 81 |   - matlab 代码需要将<https://github.com/DengPingFan/CODToolbox/blob/910358910c7824a4237b0ea689ac9d19d1958d11/Onekey_Evaluation_Code/OnekeyEvaluationCode/main.m#L102> 的`Bi_sal(sal>threshold)=1;`改为` Bi_sal(sal>=threshold)=1;`。细节可见 [相关讨论](https://github.com/DengPingFan/CODToolbox/issues/1)。
 82 |   - 2021-12-20 (Version `1.3.0`)：由于 numpy 和 matlab 的不同，在 `1.2.x` 版本中，matlab 代码的结果与我们的结果在某些指标上存在非常细微的差异。[最近的 PR](https://github.com/lartpang/PySODMetrics/pull/3) 缓解了这个问题。但是，在 E-measure 上仍然存在非常小的差异。大多数论文中的结果都四舍五入到三四位有效数字，因此，新版本与“1.2.x”版本之间没有明显差异。
 83 | - <https://en.wikipedia.org/wiki/Precision_and_recall>
 84 | 
 85 | ```text
 86 | @inproceedings{Fmeasure,
 87 |     title={Frequency-tuned salient region detection},
 88 |     author={Achanta, Radhakrishna and Hemami, Sheila and Estrada, Francisco and S{\"u}sstrunk, Sabine},
 89 |     booktitle=CVPR,
 90 |     number={CONF},
 91 |     pages={1597--1604},
 92 |     year={2009}
 93 | }
 94 | 
 95 | @inproceedings{MAE,
 96 |     title={Saliency filters: Contrast based filtering for salient region detection},
 97 |     author={Perazzi, Federico and Kr{\"a}henb{\"u}hl, Philipp and Pritch, Yael and Hornung, Alexander},
 98 |     booktitle=CVPR,
 99 |     pages={733--740},
100 |     year={2012}
101 | }
102 | 
103 | @inproceedings{Smeasure,
104 |     title={Structure-measure: A new way to evaluate foreground maps},
105 |     author={Fan, Deng-Ping and Cheng, Ming-Ming and Liu, Yun and Li, Tao and Borji, Ali},
106 |     booktitle=ICCV,
107 |     pages={4548--4557},
108 |     year={2017}
109 | }
110 | 
111 | @inproceedings{Emeasure,
112 |     title="Enhanced-alignment Measure for Binary Foreground Map Evaluation",
113 |     author="Deng-Ping {Fan} and Cheng {Gong} and Yang {Cao} and Bo {Ren} and Ming-Ming {Cheng} and Ali {Borji}",
114 |     booktitle=IJCAI,
115 |     pages="698--704",
116 |     year={2018}
117 | }
118 | 
119 | @inproceedings{wFmeasure,
120 |     title={How to evaluate foreground maps?},
121 |     author={Margolin, Ran and Zelnik-Manor, Lihi and Tal, Ayellet},
122 |     booktitle=CVPR,
123 |     pages={248--255},
124 |     year={2014}
125 | }
126 | 
127 | @inproceedings{MSIoU,
128 |     title = {Multiscale IOU: A Metric for Evaluation of Salient Object Detection with Fine Structures},
129 |     author = {Ahmadzadeh, Azim and Kempton, Dustin J. and Chen, Yang and Angryk, Rafal A.},
130 |     booktitle = ICIP,
131 |     year = {2021},
132 | }
133 | 
134 | @inproceedings{SizeInvarianceVariants,
135 |     title = {Size-invariance Matters: Rethinking Metrics and Losses for Imbalanced Multi-object Salient Object Detection},
136 |     author = {Feiran Li and Qianqian Xu and Shilong Bao and Zhiyong Yang and Runmin Cong and Xiaochun Cao and Qingming Huang},
137 |     booktitle = ICML,
138 |     year = {2024}
139 | }
140 | 
141 | @inproceedings{HumanCorrectionEffortMeasure,
142 |     title = {Highly Accurate Dichotomous Image Segmentation},
143 |     author = {Xuebin Qin and Hang Dai and Xiaobin Hu and Deng-Ping Fan and Ling Shao and Luc Van Gool},
144 |     booktitle = ECCV,
145 |     year = {2022}
146 | }
147 | 
148 | @article{ContextMeasure,
149 |     title={Context-measure: Contextualizing Metric for Camouflage},
150 |     author={Wang, Chen-Yang and Ji, Gepeng and Shao, Song and Cheng, Ming-Ming and Fan, Deng-Ping},
151 |     journal={arXiv preprint arXiv:2512.07076},
152 |     year={2025}
153 | }
154 | ```
155 | 


--------------------------------------------------------------------------------
/readme.md:
--------------------------------------------------------------------------------
  1 | <div align="center">
  2 |   <img src="https://github.com/lartpang/PySODMetrics/blob/main/images/logo.png?raw=true" alt="Logo" width="320" height="auto">
  3 |   </br>
  4 |   <h2>PySODMetrics: A simple and efficient implementation of SOD metrics</h2>
  5 |   <a href="./readme_zh.md"><img src="https://img.shields.io/badge/README-%E4%B8%AD%E6%96%87-blue"></a>
  6 |   <img src="https://img.shields.io/pypi/v/pysodmetrics">
  7 |   <img src="https://img.shields.io/pypi/dm/pysodmetrics?label=pypi%20downloads&logo=PyPI&logoColor=white">
  8 |   <img src="https://img.shields.io/github/last-commit/lartpang/PySODMetrics">
  9 |   <img src="https://img.shields.io/github/last-commit/lartpang/PySODMetrics">
 10 |   <img src="https://img.shields.io/github/release-date/lartpang/PySODMetrics">
 11 | </div>
 12 | 
 13 | > [!important]
 14 | >
 15 | > Our exploration in this field continues with [PyIRSTDMetrics](https://github.com/lartpang/PyIRSTDMetrics), a project born from the same core motivation.
 16 | > ​​Think of them as twin initiatives: this project maps the landscape of current evaluation, while its sibling takes the next step to expand upon and rethink it.
 17 | > We'd love your star! 🌟
 18 | 
 19 | ## Introduction
 20 | 
 21 | A simple and efficient implementation of SOD metrics.
 22 | 
 23 | - Based on `numpy` and `scipy`
 24 | - Verification based on Fan's matlab code <https://github.com/DengPingFan/CODToolbox>
 25 | - The code structure is simple and easy to extend
 26 | - The code is lightweight and fast
 27 | 
 28 | Your improvements and suggestions are welcome.
 29 | 
 30 | ### Related Projects
 31 | 
 32 | - [PySODEvalToolkit](https://github.com/lartpang/PySODEvalToolkit): A Python-based Evaluation Toolbox for Salient Object Detection and Camouflaged Object Detection
 33 | 
 34 | ### Supported Metrics
 35 | 
 36 | | Metric                                              | Sample-based                                | Whole-based              | Related Class                                |
 37 | | --------------------------------------------------- | ------------------------------------------- | ------------------------ | -------------------------------------------- |
 38 | | MAE                                                 | soft,si-soft                                |                          | `MAE`                                        |
 39 | | S-measure $S_{m}$                                   | soft                                        |                          | `Smeasure`                                   |
 40 | | weighted F-measure ($F^{\omega}_{\beta}$)           | soft                                        |                          | `WeightedFmeasure`                           |
 41 | | Human Correction Effort Measure                     | soft                                        |                          | `HumanCorrectionEffortMeasure`               |
 42 | | Context-Measure ($C_{\beta}$, $C^{\omega}_{\beta}$) | soft                                        |                          | `ContextMeasure`, `CamouflageContextMeasure` |
 43 | | Multi-Scale IoU                                     | max,avg,adp,bin                             |                          | `MSIoU`                                      |
 44 | | E-measure ($E_{m}$)                                 | max,avg,adp                                 |                          | `Emeasure`                                   |
 45 | | F-measure (old) ($F_{\beta}$)                       | max,avg,adp                                 |                          | `Fmeasure` (Will be removed!)                |
 46 | | F-measure (new) ($F_{\beta}$, $F_{1}$)              | max,avg,adp,bin,si-max,si-avg,si-adp,si-bin | bin,si-max,si-avg,si-bin | `FmeasureV2`+`FmeasureHandler`               |
 47 | | BER                                                 | max,avg,adp,bin,si-max,si-avg,si-adp,si-bin | bin,si-max,si-avg,si-bin | `FmeasureV2`+`BERHandler`                    |
 48 | | Dice                                                | max,avg,adp,bin,si-max,si-avg,si-adp,si-bin | bin,si-max,si-avg,si-bin | `FmeasureV2`+`DICEHandler`                   |
 49 | | FPR                                                 | max,avg,adp,bin,si-max,si-avg,si-adp,si-bin | bin,si-max,si-avg,si-bin | `FmeasureV2`+`FPRHandler`                    |
 50 | | IoU                                                 | max,avg,adp,bin,si-max,si-avg,si-adp,si-bin | bin,si-max,si-avg,si-bin | `FmeasureV2`+`IOUHandler`                    |
 51 | | Kappa                                               | max,avg,adp,bin,si-max,si-avg,si-adp,si-bin | bin,si-max,si-avg,si-bin | `FmeasureV2`+`KappaHandler`                  |
 52 | | Overall Accuracy                                    | max,avg,adp,bin,si-max,si-avg,si-adp,si-bin | bin,si-max,si-avg,si-bin | `FmeasureV2`+`OverallAccuracyHandler`        |
 53 | | Precision                                           | max,avg,adp,bin,si-max,si-avg,si-adp,si-bin | bin,si-max,si-avg,si-bin | `FmeasureV2`+`PrecisionHandler`              |
 54 | | Recall                                              | max,avg,adp,bin,si-max,si-avg,si-adp,si-bin | bin,si-max,si-avg,si-bin | `FmeasureV2`+`RecallHandler`                 |
 55 | | Sensitivity                                         | max,avg,adp,bin,si-max,si-avg,si-adp,si-bin | bin,si-max,si-avg,si-bin | `FmeasureV2`+`SensitivityHandler`            |
 56 | | Specificity                                         | max,avg,adp,bin,si-max,si-avg,si-adp,si-bin | bin,si-max,si-avg,si-bin | `FmeasureV2`+`SpecificityHandler`            |
 57 | | TNR                                                 | max,avg,adp,bin,si-max,si-avg,si-adp,si-bin | bin,si-max,si-avg,si-bin | `FmeasureV2`+`TNRHandler`                    |
 58 | | TPR                                                 | max,avg,adp,bin,si-max,si-avg,si-adp,si-bin | bin,si-max,si-avg,si-bin | `FmeasureV2`+`TPRHandler`                    |
 59 | 
 60 | **NOTE**:
 61 | - Sample-based `si-` variants calculate the sample-specific mean/maximum based on the sample-based fm sequence with a shape of `(num_targets, 256)`.
 62 | - Whole-based `si-` variants calculate the mean/maximum based on the average fm sequence across all targets from all samples.
 63 | - Because the `*adp` variants are specialized for `sample-based` computation, they do not support whole-based computation.
 64 | 
 65 | ## Usage
 66 | 
 67 | The core files are in the folder `py_sod_metrics`.
 68 | 
 69 | - **[Latest, but may be unstable]** Install from the source code: `pip install git+https://github.com/lartpang/PySODMetrics.git`
 70 | - **[More stable]** Install from PyPI: `pip install pysodmetrics`
 71 | 
 72 | ### Examples
 73 | 
 74 | - [examples/test_metrics.py](./examples/test_metrics.py)
 75 | - [examples/metric_recorder.py](./examples/metric_recorder.py)
 76 | 
 77 | ## Reference
 78 | 
 79 | - [Matlab Code](https://github.com/DengPingFan/CODToolbox) by DengPingFan(<https://github.com/DengPingFan>): In our comparison (the test code can be seen under the `test` folder), the result is consistent with the code.
 80 |   - The matlab code needs to change `Bi_sal(sal>threshold)=1;` to `Bi_sal(sal>=threshold)=1;` in <https://github.com/DengPingFan/CODToolbox/blob/910358910c7824a4237b0ea689ac9d19d1958d11/Onekey_Evaluation_Code/OnekeyEvaluationCode/main.m#L102>. For related discussion, please see [the issue](https://github.com/DengPingFan/CODToolbox/issues/1).
 81 |   - 2021-12-20 (version `1.3.0`): Due to the difference between numpy and matlab, in version `1.2.x`, there are very slight differences on some metrics between the results of the matlab code and ours. The [recent PR](https://github.com/lartpang/PySODMetrics/pull/3) alleviated this problem. However, there are still very small differences on E-measure. The results in most papers are rounded off to three or four significant figures, so, there is no obvious difference between the new version and the version `1.2.x` for them.
 82 | - <https://en.wikipedia.org/wiki/Precision_and_recall>
 83 | 
 84 | ```text
 85 | @inproceedings{Fmeasure,
 86 |     title={Frequency-tuned salient region detection},
 87 |     author={Achanta, Radhakrishna and Hemami, Sheila and Estrada, Francisco and S{\"u}sstrunk, Sabine},
 88 |     booktitle=CVPR,
 89 |     number={CONF},
 90 |     pages={1597--1604},
 91 |     year={2009}
 92 | }
 93 | 
 94 | @inproceedings{MAE,
 95 |     title={Saliency filters: Contrast based filtering for salient region detection},
 96 |     author={Perazzi, Federico and Kr{\"a}henb{\"u}hl, Philipp and Pritch, Yael and Hornung, Alexander},
 97 |     booktitle=CVPR,
 98 |     pages={733--740},
 99 |     year={2012}
100 | }
101 | 
102 | @inproceedings{Smeasure,
103 |     title={Structure-measure: A new way to evaluate foreground maps},
104 |     author={Fan, Deng-Ping and Cheng, Ming-Ming and Liu, Yun and Li, Tao and Borji, Ali},
105 |     booktitle=ICCV,
106 |     pages={4548--4557},
107 |     year={2017}
108 | }
109 | 
110 | @inproceedings{Emeasure,
111 |     title="Enhanced-alignment Measure for Binary Foreground Map Evaluation",
112 |     author="Deng-Ping {Fan} and Cheng {Gong} and Yang {Cao} and Bo {Ren} and Ming-Ming {Cheng} and Ali {Borji}",
113 |     booktitle=IJCAI,
114 |     pages="698--704",
115 |     year={2018}
116 | }
117 | 
118 | @inproceedings{wFmeasure,
119 |     title={How to evaluate foreground maps?},
120 |     author={Margolin, Ran and Zelnik-Manor, Lihi and Tal, Ayellet},
121 |     booktitle=CVPR,
122 |     pages={248--255},
123 |     year={2014}
124 | }
125 | 
126 | @inproceedings{MSIoU,
127 |     title = {Multiscale IOU: A Metric for Evaluation of Salient Object Detection with Fine Structures},
128 |     author = {Ahmadzadeh, Azim and Kempton, Dustin J. and Chen, Yang and Angryk, Rafal A.},
129 |     booktitle = ICIP,
130 |     year = {2021},
131 | }
132 | 
133 | @inproceedings{SizeInvarianceVariants,
134 |     title = {Size-invariance Matters: Rethinking Metrics and Losses for Imbalanced Multi-object Salient Object Detection},
135 |     author = {Feiran Li and Qianqian Xu and Shilong Bao and Zhiyong Yang and Runmin Cong and Xiaochun Cao and Qingming Huang},
136 |     booktitle = ICML,
137 |     year = {2024}
138 | }
139 | 
140 | @inproceedings{HumanCorrectionEffortMeasure,
141 |     title = {Highly Accurate Dichotomous Image Segmentation},
142 |     author = {Xuebin Qin and Hang Dai and Xiaobin Hu and Deng-Ping Fan and Ling Shao and Luc Van Gool},
143 |     booktitle = ECCV,
144 |     year = {2022}
145 | }
146 | 
147 | @article{ContextMeasure,
148 |     title={Context-measure: Contextualizing Metric for Camouflage},
149 |     author={Wang, Chen-Yang and Ji, Gepeng and Shao, Song and Cheng, Ming-Ming and Fan, Deng-Ping},
150 |     journal={arXiv preprint arXiv:2512.07076},
151 |     year={2025}
152 | }
153 | ```
154 | 


--------------------------------------------------------------------------------
/deploy/metrics.rst:
--------------------------------------------------------------------------------
  1 | Supported Metrics
  2 | =================
  3 | 
  4 | This page provides detailed information about all the metrics supported by PySODMetrics.
  5 | 
  6 | Overview
  7 | --------
  8 | 
  9 | PySODMetrics provides two types of metric computation:
 10 | 
 11 | * **Sample-based**: Metrics are computed for each sample individually and then aggregated
 12 | * **Whole-based**: Metrics are computed across all samples globally
 13 | 
 14 | Most metrics support different aggregation strategies:
 15 | 
 16 | * ``max``: Maximum value across all thresholds
 17 | * ``avg``: Average value across all thresholds
 18 | * ``adp``: Adaptive threshold (2 × mean of predictions)
 19 | * ``bin``: Binary threshold (typically 0.5 or fixed threshold)
 20 | * ``si-*``: Size-invariant variants for handling multi-scale objects
 21 | 
 22 | Basic Metrics
 23 | -------------
 24 | 
 25 | MAE (Mean Absolute Error)
 26 | ~~~~~~~~~~~~~~~~~~~~~~~~~~
 27 | 
 28 | Measures the pixel-wise absolute difference between prediction and ground truth.
 29 | 
 30 | .. math::
 31 | 
 32 |    MAE = \frac{1}{W \times H} \sum_{x=1}^{W} \sum_{y=1}^{H} |P(x,y) - G(x,y)|
 33 | 
 34 | where :math:`P` is the prediction, :math:`G` is the ground truth, and :math:`W \times H` is the image size.
 35 | 
 36 | **Reference:**
 37 | 
 38 | Perazzi et al., "Saliency filters: Contrast based filtering for salient region detection", CVPR 2012
 39 | 
 40 | **Usage:**
 41 | 
 42 | .. code-block:: python
 43 | 
 44 |    from py_sod_metrics import MAE
 45 | 
 46 |    mae = MAE()
 47 |    mae.step(pred, gt)
 48 |    results = mae.get_results()
 49 |    print(f"MAE: {results['mae']:.4f}")
 50 | 
 51 | S-measure (Structure Measure)
 52 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 53 | 
 54 | Evaluates structural similarity between prediction and ground truth, considering both region-aware and object-aware components.
 55 | 
 56 | .. math::
 57 | 
 58 |    S_m = \alpha \cdot S_o + (1 - \alpha) \cdot S_r
 59 | 
 60 | where :math:`S_o` is the object-aware structural similarity and :math:`S_r` is the region-aware structural similarity.
 61 | 
 62 | **Reference:**
 63 | 
 64 | Fan et al., "Structure-measure: A new way to evaluate foreground maps", ICCV 2017
 65 | 
 66 | **Usage:**
 67 | 
 68 | .. code-block:: python
 69 | 
 70 |    from py_sod_metrics import Smeasure
 71 | 
 72 |    sm = Smeasure()
 73 |    sm.step(pred, gt)
 74 |    results = sm.get_results()
 75 |    print(f"S-measure: {results['sm']:.4f}")
 76 | 
 77 | E-measure (Enhanced-alignment Measure)
 78 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 79 | 
 80 | Captures both local and global matching information between prediction and ground truth.
 81 | 
 82 | **Reference:**
 83 | 
 84 | Fan et al., "Enhanced-alignment Measure for Binary Foreground Map Evaluation", IJCAI 2018
 85 | 
 86 | **Usage:**
 87 | 
 88 | .. code-block:: python
 89 | 
 90 |    from py_sod_metrics import Emeasure
 91 | 
 92 |    em = Emeasure()
 93 |    em.step(pred, gt)
 94 |    results = em.get_results()
 95 |    print(f"Max E-measure: {results['em']['adp']:.4f}")
 96 |    print(f"Avg E-measure: {results['em']['avg']:.4f}")
 97 | 
 98 | F-measure
 99 | ~~~~~~~~~
100 | 
101 | Harmonic mean of precision and recall.
102 | 
103 | .. math::
104 | 
105 |    F_\beta = \frac{(1 + \beta^2) \times Precision \times Recall}{\beta^2 \times Precision + Recall}
106 | 
107 | **Reference:**
108 | 
109 | Achanta et al., "Frequency-tuned salient region detection", CVPR 2009
110 | 
111 | **Usage:**
112 | 
113 | .. code-block:: python
114 | 
115 |    from py_sod_metrics import Fmeasure
116 | 
117 |    fm = Fmeasure()
118 |    fm.step(pred, gt)
119 |    results = fm.get_results()
120 |    print(f"Max F-measure: {results['fm']['adp']:.4f}")
121 | 
122 | Weighted F-measure
123 | ~~~~~~~~~~~~~~~~~~
124 | 
125 | A weighted version of F-measure that assigns different importance to different pixels based on their location.
126 | 
127 | **Reference:**
128 | 
129 | Margolin et al., "How to evaluate foreground maps?", CVPR 2014
130 | 
131 | **Usage:**
132 | 
133 | .. code-block:: python
134 | 
135 |    from py_sod_metrics import WeightedFmeasure
136 | 
137 |    wfm = WeightedFmeasure()
138 |    wfm.step(pred, gt)
139 |    results = wfm.get_results()
140 |    print(f"Weighted F-measure: {results['wfm']:.4f}")
141 | 
142 | Advanced Metrics
143 | ----------------
144 | 
145 | FmeasureV2 Framework
146 | ~~~~~~~~~~~~~~~~~~~~
147 | 
148 | A flexible framework for computing multiple binary classification metrics using different handlers.
149 | 
150 | **Supported Handlers:**
151 | 
152 | * ``FmeasureHandler``: F-measure with configurable β
153 | * ``PrecisionHandler``: Precision (Positive Predictive Value)
154 | * ``RecallHandler``: Recall (Sensitivity, TPR)
155 | * ``IOUHandler``: Intersection over Union
156 | * ``DICEHandler``: Dice coefficient
157 | * ``BERHandler``: Balanced Error Rate
158 | * ``KappaHandler``: Cohen's Kappa
159 | * ``OverallAccuracyHandler``: Overall classification accuracy
160 | * ``SpecificityHandler``: Specificity (TNR)
161 | * ``SensitivityHandler``: Sensitivity (same as Recall)
162 | * ``FPRHandler``: False Positive Rate
163 | * ``TNRHandler``: True Negative Rate
164 | * ``TPRHandler``: True Positive Rate
165 | 
166 | **Usage:**
167 | 
168 | .. code-block:: python
169 | 
170 |    from py_sod_metrics import FmeasureV2, FmeasureHandler, IOUHandler
171 | 
172 |    fm_v2 = FmeasureV2(
173 |        handlers={
174 |            "fm": FmeasureHandler(beta=0.3),
175 |            "iou": IOUHandler(),
176 |        }
177 |    )
178 | 
179 |    fm_v2.step(pred, gt)
180 |    results = fm_v2.get_results()
181 | 
182 | Context-Measure
183 | ~~~~~~~~~~~~~~~
184 | 
185 | Designed specifically for camouflaged object detection, considering contextual information.
186 | 
187 | **Reference:**
188 | 
189 | Wang et al., "Context-measure: Contextualizing Metric for Camouflage", arXiv 2025
190 | 
191 | **Variants:**
192 | 
193 | * ``ContextMeasure``: Standard context measure :math:`C_\beta`
194 | * ``CamouflageContextMeasure``: Weighted context measure :math:`C^\omega_\beta`
195 | 
196 | **Usage:**
197 | 
198 | .. code-block:: python
199 | 
200 |    from py_sod_metrics import ContextMeasure, CamouflageContextMeasure
201 | 
202 |    cm = ContextMeasure()
203 |    ccm = CamouflageContextMeasure()
204 | 
205 |    cm.step(pred, gt)
206 |    ccm.step(pred, gt)
207 | 
208 | Multi-Scale IoU (MSIoU)
209 | ~~~~~~~~~~~~~~~~~~~~~~~
210 | 
211 | Evaluates segmentation quality across multiple scales, particularly useful for fine structures.
212 | 
213 | **Reference:**
214 | 
215 | Ahmadzadeh et al., "Multiscale IOU: A Metric for Evaluation of Salient Object Detection with Fine Structures", ICIP 2021
216 | 
217 | **Usage:**
218 | 
219 | .. code-block:: python
220 | 
221 |    from py_sod_metrics import MSIoU
222 | 
223 |    msiou = MSIoU()
224 |    msiou.step(pred, gt)
225 |    results = msiou.get_results()
226 | 
227 | Human Correction Effort Measure
228 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
229 | 
230 | Estimates the effort required for humans to correct prediction errors.
231 | 
232 | **Reference:**
233 | 
234 | Qin et al., "Highly Accurate Dichotomous Image Segmentation", ECCV 2022
235 | 
236 | **Usage:**
237 | 
238 | .. code-block:: python
239 | 
240 |    from py_sod_metrics import HumanCorrectionEffortMeasure
241 | 
242 |    hcem = HumanCorrectionEffortMeasure()
243 |    hcem.step(pred, gt)
244 |    results = hcem.get_results()
245 | 
246 | Size-Invariant Metrics
247 | ----------------------
248 | 
249 | For datasets with objects at multiple scales, size-invariant variants provide more balanced evaluation.
250 | 
251 | Size-Invariant F-measure
252 | ~~~~~~~~~~~~~~~~~~~~~~~~~
253 | 
254 | **Reference:**
255 | 
256 | Li et al., "Size-invariance Matters: Rethinking Metrics and Losses for Imbalanced Multi-object Salient Object Detection", ICML 2024
257 | 
258 | **Usage:**
259 | 
260 | .. code-block:: python
261 | 
262 |    from py_sod_metrics import SizeInvarianceFmeasureV2
263 | 
264 |    si_fm = SizeInvarianceFmeasureV2()
265 |    si_fm.step(pred, gt)
266 |    results = si_fm.get_results()
267 |    print(f"SI F-measure (avg): {results['fm']['si-avg']:.4f}")
268 | 
269 | Size-Invariant MAE
270 | ~~~~~~~~~~~~~~~~~~
271 | 
272 | **Usage:**
273 | 
274 | .. code-block:: python
275 | 
276 |    from py_sod_metrics import SizeInvarianceMAE
277 | 
278 |    si_mae = SizeInvarianceMAE()
279 |    si_mae.step(pred, gt)
280 |    results = si_mae.get_results()
281 | 
282 | Metric Comparison Table
283 | -----------------------
284 | 
285 | +--------------------------------------------------+--------------------+------------------+
286 | | Metric                                           | Sample-based       | Whole-based      |
287 | +==================================================+====================+==================+
288 | | MAE                                              | soft, si-soft      | —                |
289 | +--------------------------------------------------+--------------------+------------------+
290 | | S-measure                                        | soft               | —                |
291 | +--------------------------------------------------+--------------------+------------------+
292 | | Weighted F-measure                               | soft               | —                |
293 | +--------------------------------------------------+--------------------+------------------+
294 | | Human Correction Effort                          | soft               | —                |
295 | +--------------------------------------------------+--------------------+------------------+
296 | | Context-Measure                                  | soft               | —                |
297 | +--------------------------------------------------+--------------------+------------------+
298 | | Multi-Scale IoU                                  | max,avg,adp,bin    | —                |
299 | +--------------------------------------------------+--------------------+------------------+
300 | | E-measure                                        | max,avg,adp        | —                |
301 | +--------------------------------------------------+--------------------+------------------+
302 | | F-measure (V2)                                   | max,avg,adp,bin,si | bin,si           |
303 | +--------------------------------------------------+--------------------+------------------+
304 | | BER, Dice, IoU, Precision, Recall, etc.          | max,avg,adp,bin,si | bin,si           |
305 | +--------------------------------------------------+--------------------+------------------+
306 | 
307 | Notes
308 | -----
309 | 
310 | * **soft**: Metrics that work directly on continuous prediction values
311 | * **si-**: Size-invariant variants that normalize by object size
312 | * **adp**: Adaptive thresholding based on prediction statistics
313 | * For detailed mathematical formulations, please refer to the original papers cited above
314 | 


--------------------------------------------------------------------------------
/py_sod_metrics/size_invariance.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from skimage import measure
  3 | 
  4 | from .fmeasurev2 import FmeasureV2
  5 | from .sod_metrics import MAE
  6 | from .utils import TYPE, validate_and_normalize_input
  7 | 
  8 | 
  9 | def parse_connected_components(mask: np.ndarray, area_threshold: float = 50) -> tuple:
 10 |     """Find the connected components in a binary mask.
 11 | 
 12 |     1. If there are no connected components, return an empty list.
 13 |     2. If all the connected components are smaller than the area_threshold, we will return the largest one.
 14 | 
 15 |     Args:
 16 |         mask (np.ndarray): binary mask
 17 |         area_threshold (float): The threshold for the area of the connected components.
 18 | 
 19 |     Returns:
 20 |         tuple: max_valid_tgt_idx, valid_labeled_mask
 21 |     """
 22 |     labeled_tgts = measure.label(mask, connectivity=1, background=0, return_num=False)
 23 |     tgt_props = measure.regionprops(labeled_tgts)
 24 | 
 25 |     # find the valid targets based on the target size
 26 |     tgts_with_max_size = []
 27 |     max_valid_tgt_idx = 0  # 0 is background
 28 |     valid_labeled_mask = np.zeros_like(mask, dtype=int)
 29 |     for tgt_prop in tgt_props:
 30 |         if tgts_with_max_size is not None or tgts_with_max_size[0].area == tgt_prop.area:
 31 |             tgts_with_max_size.append(tgt_prop)
 32 |         elif tgts_with_max_size[0].area < tgt_prop.area:
 33 |             tgts_with_max_size = [tgt_prop]
 34 | 
 35 |         if tgt_prop.area >= area_threshold:  # valid indices start from 1
 36 |             max_valid_tgt_idx += 1
 37 |             valid_labeled_mask[labeled_tgts == tgt_prop.label] = max_valid_tgt_idx
 38 | 
 39 |     if max_valid_tgt_idx == 0:  # no valid targets
 40 |         for tgt_prop in tgts_with_max_size:
 41 |             max_valid_tgt_idx += 1
 42 |             valid_labeled_mask[labeled_tgts == tgt_prop.label] = max_valid_tgt_idx
 43 |     return max_valid_tgt_idx, valid_labeled_mask
 44 | 
 45 | 
 46 | def encode_bboxwise_tgts_bitwise(max_valid_tgt_idx: int, valid_labeled_mask: np.ndarray) -> np.ndarray:
 47 |     """Encode each target bbox region with a bitwise mask.
 48 | 
 49 |     Args:
 50 |         max_valid_tgt_idx (int): The maximum index of the valid targets.
 51 |         valid_labeled_mask (np.ndarray): The mask of the valid targets. 0 is background.
 52 | 
 53 |     Returns:
 54 |         np.ndarray: The size weight for the bbox of each target.
 55 |     """
 56 |     binarized_weights = np.zeros_like(valid_labeled_mask, dtype=float)
 57 |     for label in range(max_valid_tgt_idx + 1):  # 0 is background
 58 |         rows, cols = np.where(valid_labeled_mask == label)
 59 |         assert len(rows) * len(cols) > 0, (
 60 |             f"connected_block_size = 0 when label = {label} for {np.unique(valid_labeled_mask)}!"
 61 |         )
 62 | 
 63 |         xmin, xmax = min(cols), max(cols)
 64 |         ymin, ymax = min(rows), max(rows)
 65 | 
 66 |         # This encoding scheme can encode overlaping multiple targets in different bits.
 67 |         weight = 0 if label == 0 else 1 << (label - 1)  # 0,1,2,4,8,...
 68 |         binarized_weights[ymin : (ymax + 1), xmin : (xmax + 1)] += weight
 69 |     return binarized_weights
 70 | 
 71 | 
 72 | def get_kth_bit(n: np.ndarray, k: int) -> np.ndarray:
 73 |     """Get the value (0 or 1) in the k-th bit of each element in the array.
 74 | 
 75 |     Args:
 76 |         n (np.ndarray): The original data array.
 77 |         k (int): The index of the bit to extract.
 78 | 
 79 |     Returns:
 80 |         np.ndarray: The extracted data array. Only the output of the kth bit which is not 0 equals 1.
 81 |     """
 82 |     n = n.astype(int)
 83 |     k = int(k)
 84 | 
 85 |     # Use bitwise AND to check if the k-th bit is set
 86 |     return (n & (1 << (k - 1))) >> (k - 1)
 87 | 
 88 | 
 89 | class SizeInvarianceFmeasureV2(FmeasureV2):
 90 |     """Size invariance version of FmeasureV2.
 91 | 
 92 |     This provides size-invariant versions of standard SOD metrics that address the imbalance problem in multi-object salient object detection. Traditional metrics can be biased toward larger objects, while size-invariant metrics ensure fair evaluation across objects of different sizes.
 93 | 
 94 |     ```
 95 |     @inproceedings{SizeInvarianceVariants,
 96 |         title = {Size-invariance Matters: Rethinking Metrics and Losses for Imbalanced Multi-object Salient Object Detection},
 97 |         author = {Feiran Li and Qianqian Xu and Shilong Bao and Zhiyong Yang and Runmin Cong and Xiaochun Cao and Qingming Huang},
 98 |         booktitle = ICML,
 99 |         year = {2024}
100 |     }
101 |     ```
102 |     """
103 | 
104 |     def _update_metrics(self, pred: np.ndarray, gt: np.ndarray):
105 |         FG = np.count_nonzero(gt)  # 真实前景, FG=(TPs+FNs)
106 |         BG = gt.size - FG  # 真实背景, BG=(TNs+FPs)
107 | 
108 |         dynamical_tpfptnfn = None
109 |         adaptive_tpfptnfn = None
110 |         binary_tpfptnfn = None
111 |         for handler_name, handler in self._metric_handlers.items():
112 |             if handler.dynamic_results is not None:
113 |                 if dynamical_tpfptnfn is None:
114 |                     dynamical_tpfptnfn = self.dynamically_binarizing(pred=pred, gt=gt, FG=FG, BG=BG)
115 |                 tgt_result = handler(**dynamical_tpfptnfn)
116 |                 if handler.sample_based:  # is not None
117 |                     if not handler.dynamic_results or not isinstance(
118 |                         handler.dynamic_results[-1], list
119 |                     ):  # is not [] or not contain list
120 |                         handler.dynamic_results.append([])
121 |                     handler.dynamic_results[-1].append(tgt_result)
122 |                 else:
123 |                     handler.dynamic_results.append(tgt_result)
124 | 
125 |             if handler.adaptive_results is not None:
126 |                 if adaptive_tpfptnfn is None:
127 |                     adaptive_tpfptnfn = self.adaptively_binarizing(pred=pred, gt=gt, FG=FG, BG=BG)
128 |                 tgt_result = handler(**adaptive_tpfptnfn)
129 |                 if not handler.adaptive_results or not isinstance(handler.adaptive_results[-1], list):
130 |                     handler.adaptive_results.append([])
131 |                 handler.adaptive_results[-1].append(tgt_result)
132 | 
133 |             if handler.binary_results is not None:
134 |                 if binary_tpfptnfn is None:
135 |                     # `pred > 0.5`: Simulating the effect of the `argmax` function.
136 |                     binary_tpfptnfn = self.get_statistics(binary=pred > 0.5, gt=gt, FG=FG, BG=BG)
137 | 
138 |                 if handler.sample_based:
139 |                     tgt_result = handler(**binary_tpfptnfn)
140 |                     if not handler.binary_results or not isinstance(handler.binary_results[-1], list):
141 |                         handler.binary_results.append([])
142 |                     handler.binary_results[-1].append(tgt_result)
143 |                 else:  # will average over all targets from all samples
144 |                     tgt_result = binary_tpfptnfn
145 |                     handler.binary_results["tp"] += tgt_result["tp"]
146 |                     handler.binary_results["fp"] += tgt_result["fp"]
147 |                     handler.binary_results["tn"] += tgt_result["tn"]
148 |                     handler.binary_results["fn"] += tgt_result["fn"]
149 | 
150 |     def step(self, pred: np.ndarray, gt: np.ndarray, normalize: bool = True):
151 |         """Statistics the metrics for the pair of pred and gt.
152 | 
153 |         Args:
154 |             pred (np.ndarray): Prediction, gray scale image.
155 |             gt (np.ndarray): Ground truth, gray scale image.
156 |             normalize (bool, optional): Whether to normalize the input data. Defaults to True.
157 |         """
158 |         if not self._metric_handlers:  # 没有添加metric_handler
159 |             raise ValueError("Please add your metric handler before using `step()`.")
160 | 
161 |         pred, gt = validate_and_normalize_input(pred, gt, normalize=normalize)
162 | 
163 |         max_valid_tgt_idx, valid_labeled_mask = parse_connected_components(mask=gt)
164 |         tgt_weights = encode_bboxwise_tgts_bitwise(max_valid_tgt_idx, valid_labeled_mask)
165 | 
166 |         if max_valid_tgt_idx == 0:  # no target or no background
167 |             self._update_metrics(pred=pred, gt=gt)
168 |         else:
169 |             for tgt_idx in range(1, max_valid_tgt_idx + 1):
170 |                 tgt_mask = get_kth_bit(tgt_weights, k=tgt_idx) > 0
171 | 
172 |                 _pred = pred * tgt_mask
173 |                 _gt = gt & tgt_mask
174 |                 self._update_metrics(pred=_pred, gt=_gt)
175 | 
176 |         # average over all targets in each sample
177 |         for handler_name, handler in self._metric_handlers.items():
178 |             if handler.dynamic_results is not None and handler.sample_based:
179 |                 tgt_results = handler.dynamic_results.pop()  # Tx256
180 |                 handler.dynamic_results.append(np.array(tgt_results, dtype=TYPE))  # Tx256
181 | 
182 |             if handler.adaptive_results is not None:
183 |                 tgt_results = handler.adaptive_results.pop()  # Tx1
184 |                 handler.adaptive_results.append(np.mean(np.array(tgt_results, dtype=TYPE)))  # 1
185 | 
186 |             if handler.binary_results is not None and handler.sample_based:
187 |                 tgt_results = handler.binary_results.pop()  # Tx1
188 |                 handler.binary_results.append(np.mean(np.array(tgt_results, dtype=TYPE)))  # 1
189 | 
190 |     def get_results(self) -> dict:
191 |         """Return the results of the specific metric names.
192 | 
193 |         Returns:
194 |             dict: All results corresponding to different metrics.
195 |         """
196 |         results = {}
197 |         for handler_name, handler in self._metric_handlers.items():
198 |             res = {}
199 |             if handler.dynamic_results is not None:
200 |                 dynamic_results = handler.dynamic_results
201 |                 if handler.sample_based:  # N个T'x256
202 |                     res["dynamic"] = dynamic_results
203 |                 else:  # N'x256 -> 256
204 |                     res["dynamic"] = np.mean(np.array(dynamic_results, dtype=TYPE), axis=0)
205 | 
206 |             if handler.adaptive_results is not None:
207 |                 res["adaptive"] = np.mean(np.array(handler.adaptive_results, dtype=TYPE))  # 1
208 | 
209 |             if handler.binary_results is not None:
210 |                 binary_results = handler.binary_results
211 |                 if handler.sample_based:
212 |                     res["binary"] = np.mean(np.array(binary_results, dtype=TYPE))  # 1
213 |                 else:
214 |                     # NOTE: use `np.mean` to simplify output format (`array(123)` -> `123`)
215 |                     res["binary"] = np.mean(handler(**binary_results))
216 |             results[handler_name] = res
217 |         return results
218 | 
219 | 
220 | class SizeInvarianceMAE(MAE):
221 |     """Size invariance version of MAE.
222 | 
223 |     ```
224 |     @inproceedings{SizeInvarianceVariants,
225 |         title = {Size-invariance Matters: Rethinking Metrics and Losses for Imbalanced Multi-object Salient Object Detection},
226 |         author = {Feiran Li and Qianqian Xu and Shilong Bao and Zhiyong Yang and Runmin Cong and Xiaochun Cao and Qingming Huang},
227 |         booktitle = ICML,
228 |         year = {2024}
229 |     }
230 |     ```
231 |     """
232 | 
233 |     def step(self, pred: np.ndarray, gt: np.ndarray, normalize: bool = True):
234 |         """Statistics the metric for the pair of pred and gt.
235 | 
236 |         Args:
237 |             pred (np.ndarray): Prediction, gray scale image.
238 |             gt (np.ndarray): Ground truth, gray scale image.
239 |             normalize (bool, optional): Whether to normalize the input data. Defaults to True.
240 |         """
241 |         pred, gt = validate_and_normalize_input(pred, gt, normalize=normalize)
242 |         max_valid_tgt_idx, valid_labeled_mask = parse_connected_components(mask=gt)
243 |         tgt_weights = encode_bboxwise_tgts_bitwise(max_valid_tgt_idx, valid_labeled_mask)
244 | 
245 |         if max_valid_tgt_idx == 0:  # no targets or no background
246 |             mae = np.abs(pred - gt).mean()
247 |         else:  # there are multiple targets
248 |             # background component
249 |             bg_mask = tgt_weights == 0
250 |             bg_area = np.count_nonzero(bg_mask)
251 | 
252 |             _pred = pred * bg_mask
253 |             _gt = gt & bg_mask
254 |             bg_fg_area_ratio = bg_area / (gt.size - bg_area)
255 |             factor = 1 / (max_valid_tgt_idx + bg_fg_area_ratio)
256 |             mae = bg_fg_area_ratio * np.abs(_pred - _gt).sum() / bg_area * factor
257 | 
258 |             # foreground components
259 |             for tgt_idx in range(1, max_valid_tgt_idx + 1):
260 |                 tgt_mask = get_kth_bit(tgt_weights, k=tgt_idx) > 0
261 |                 tgt_area = np.count_nonzero(tgt_mask)
262 | 
263 |                 _pred = pred * tgt_mask
264 |                 _gt = gt & tgt_mask
265 |                 mae += np.abs(_pred - _gt).sum() / tgt_area * factor
266 |         self.maes.append(mae)
267 | 
268 |     def get_results(self) -> dict:
269 |         """Return the results about MAE.
270 | 
271 |         Returns:
272 |             dict(mae=mae)
273 |         """
274 |         mae = np.mean(np.array(self.maes, TYPE))
275 |         return dict(si_mae=mae)
276 | 


--------------------------------------------------------------------------------
/examples/version_performance.json:
--------------------------------------------------------------------------------
  1 | {
  2 |     "v1_2_3": {
  3 |         "Smeasure": 0.9029763868504661,
  4 |         "wFmeasure": 0.5579812753638986,
  5 |         "MAE": 0.03705558476661653,
  6 |         "adpEm": 0.9408760066970631,
  7 |         "meanEm": 0.9566258293508715,
  8 |         "maxEm": 0.966954482892271,
  9 |         "adpFm": 0.5816750824038355,
 10 |         "meanFm": 0.577051059518767,
 11 |         "maxFm": 0.5886784581120638
 12 |     },
 13 |     "v1_3_0": {
 14 |         "Smeasure": 0.9029761578759272,
 15 |         "wFmeasure": 0.5579812753638986,
 16 |         "MAE": 0.03705558476661653,
 17 |         "adpEm": 0.9408760066970617,
 18 |         "meanEm": 0.9566258293508704,
 19 |         "maxEm": 0.9669544828922699,
 20 |         "adpFm": 0.5816750824038355,
 21 |         "meanFm": 0.577051059518767,
 22 |         "maxFm": 0.5886784581120638
 23 |     },
 24 |     "v1_4_0": {
 25 |         "MAE": 0.03705558476661653,
 26 |         "Smeasure": 0.9029761578759272,
 27 |         "adpEm": 0.9408760066970617,
 28 |         "adpFm": 0.5816750824038355,
 29 |         "adpber": 0.2354784689008184,
 30 |         "adpdice": 0.5801020564379223,
 31 |         "adpf1": 0.5801020564379223,
 32 |         "adpfm": 0.5816750824038355,
 33 |         "adpiou": 0.5141023436626048,
 34 |         "adpkappa": 0.6568702977598276,
 35 |         "adpoa": 0.9391947016812359,
 36 |         "adppre": 0.583200007681871,
 37 |         "adprec": 0.5777548546727481,
 38 |         "adpspec": 0.9512882075256152,
 39 |         "maxEm": 0.9669544828922699,
 40 |         "maxFm": 0.5886784581120638,
 41 |         "maxber": 0.6666666666666666,
 42 |         "maxdice": 0.5830613926289557,
 43 |         "maxf1": 0.5830613926289557,
 44 |         "maxfm": 0.5886784581120638,
 45 |         "maxiou": 0.5201569938888494,
 46 |         "maxkappa": 0.6759493461328753,
 47 |         "maxoa": 0.9654783867686053,
 48 |         "maxpre": 0.6396783912301717,
 49 |         "maxrec": 0.6666666666666666,
 50 |         "maxspec": 0.9965927890353435,
 51 |         "meanEm": 0.9566258293508704,
 52 |         "meanFm": 0.577051059518767,
 53 |         "meanber": 0.23290802950995626,
 54 |         "meandice": 0.5689913551800527,
 55 |         "meanf1": 0.568991355180053,
 56 |         "meanfm": 0.577051059518767,
 57 |         "meaniou": 0.49816648786971,
 58 |         "meankappa": 0.6443053495487194,
 59 |         "meanoa": 0.9596413706286032,
 60 |         "meanpre": 0.5857695537152126,
 61 |         "meanrec": 0.5599653001125341,
 62 |         "meanspec": 0.9742186408675534,
 63 |         "overall_biber": 0.08527759498137788,
 64 |         "overall_bidice": 0.8510675335753018,
 65 |         "overall_bif1": 0.8510675335753017,
 66 |         "overall_bifm": 0.8525259082995088,
 67 |         "overall_biiou": 0.740746352327995,
 68 |         "overall_bikappa": 0.7400114676102276,
 69 |         "overall_bioa": 0.965778,
 70 |         "overall_bipre": 0.8537799277020065,
 71 |         "overall_birec": 0.8483723190115916,
 72 |         "overall_bispec": 0.9810724910256526,
 73 |         "sample_biber": 0.23037858807333392,
 74 |         "sample_bidice": 0.5738376903441331,
 75 |         "sample_bif1": 0.5738376903441331,
 76 |         "sample_bifm": 0.5829998670906196,
 77 |         "sample_biiou": 0.5039622042094377,
 78 |         "sample_bikappa": 0.6510635726572914,
 79 |         "sample_bioa": 0.964811758770181,
 80 |         "sample_bipre": 0.5916996553523113,
 81 |         "sample_birec": 0.5592859147614985,
 82 |         "sample_bispec": 0.9799569090918337,
 83 |         "wFmeasure": 0.5579812753638986
 84 |     },
 85 |     "v1_4_1": {
 86 |         "MAE": 0.03705558476661653,
 87 |         "MSIOU": 0.8228002109838289,
 88 |         "Smeasure": 0.9029761578759272,
 89 |         "adpEm": 0.9408760066970617,
 90 |         "adpFm": 0.5816750824038355,
 91 |         "adpber": 0.2354784689008184,
 92 |         "adpdice": 0.5801020564379223,
 93 |         "adpf1": 0.5801020564379223,
 94 |         "adpfm": 0.5816750824038355,
 95 |         "adpiou": 0.5141023436626048,
 96 |         "adpkappa": 0.6568702977598276,
 97 |         "adpoa": 0.9391947016812359,
 98 |         "adppre": 0.583200007681871,
 99 |         "adprec": 0.5777548546727481,
100 |         "adpfpr": 0.04871179247438492,
101 |         "adpspec": 0.9512882075256152,
102 |         "maxEm": 0.9669544828922699,
103 |         "maxFm": 0.5886784581120638,
104 |         "maxber": 0.6666666666666666,
105 |         "maxdice": 0.5830613926289557,
106 |         "maxf1": 0.5830613926289557,
107 |         "maxfm": 0.5886784581120638,
108 |         "maxiou": 0.5201569938888494,
109 |         "maxkappa": 0.6759493461328753,
110 |         "maxoa": 0.9654783867686053,
111 |         "maxpre": 0.6396783912301717,
112 |         "maxrec": 0.6666666666666666,
113 |         "maxfpr": 1.0,
114 |         "maxspec": 0.9965927890353435,
115 |         "meanEm": 0.9566258293508704,
116 |         "meanFm": 0.577051059518767,
117 |         "meanber": 0.23290802950995626,
118 |         "meandice": 0.5689913551800527,
119 |         "meanf1": 0.568991355180053,
120 |         "meanfm": 0.577051059518767,
121 |         "meaniou": 0.49816648786971,
122 |         "meankappa": 0.6443053495487194,
123 |         "meanoa": 0.9596413706286032,
124 |         "meanpre": 0.5857695537152126,
125 |         "meanrec": 0.5599653001125341,
126 |         "meanfpr": 0.02578135913244661,
127 |         "meanspec": 0.9742186408675534,
128 |         "overall_biber": 0.08527759498137788,
129 |         "overall_bidice": 0.8510675335753018,
130 |         "overall_bif1": 0.8510675335753017,
131 |         "overall_bifm": 0.8525259082995088,
132 |         "overall_biiou": 0.740746352327995,
133 |         "overall_bikappa": 0.7400114676102276,
134 |         "overall_bioa": 0.965778,
135 |         "overall_bipre": 0.8537799277020065,
136 |         "overall_birec": 0.8483723190115916,
137 |         "overall_bifpr": 0.018927508974347383,
138 |         "overall_bispec": 0.9810724910256526,
139 |         "sample_biber": 0.23037858807333392,
140 |         "sample_bidice": 0.5738376903441331,
141 |         "sample_bif1": 0.5738376903441331,
142 |         "sample_bifm": 0.5829998670906196,
143 |         "sample_biiou": 0.5039622042094377,
144 |         "sample_bikappa": 0.6510635726572914,
145 |         "sample_bioa": 0.964811758770181,
146 |         "sample_bipre": 0.5916996553523113,
147 |         "sample_birec": 0.5592859147614985,
148 |         "sample_bifpr": 0.02004309090816628,
149 |         "sample_bispec": 0.9799569090918337,
150 |         "wFmeasure": 0.5579812753638986
151 |     },
152 |     "v1_4_3": {
153 |         "MAE": 0.03705558476661653,
154 |         "Smeasure": 0.9029761578759272,
155 |         "adpEm": 0.9408760066970617,
156 |         "adpFm": 0.5816750824038355,
157 |         "adpber": 0.2354784689008184,
158 |         "adpdice": 0.5801020564379223,
159 |         "adpf1": 0.5801020564379223,
160 |         "adpfm": 0.5816750824038355,
161 |         "adpiou": 0.5141023436626048,
162 |         "adpkappa": 0.6568702977598276,
163 |         "adpmsiou": 0.8309076073697286,
164 |         "adpoa": 0.9391947016812359,
165 |         "adppre": 0.583200007681871,
166 |         "adprec": 0.5777548546727481,
167 |         "adpfpr": 0.04871179247438492,
168 |         "adpspec": 0.9512882075256152,
169 |         "maxEm": 0.9669544828922699,
170 |         "maxFm": 0.5886784581120638,
171 |         "maxber": 0.6666666666666666,
172 |         "maxdice": 0.5830613926289557,
173 |         "maxf1": 0.5830613926289557,
174 |         "maxfm": 0.5886784581120638,
175 |         "maxiou": 0.5201569938888494,
176 |         "maxkappa": 0.6759493461328753,
177 |         "maxmsiou": 0.8362740728548873,
178 |         "maxoa": 0.9654783867686053,
179 |         "maxpre": 0.6396783912301717,
180 |         "maxrec": 0.6666666666666666,
181 |         "maxfpr": 1.0,
182 |         "maxspec": 0.9965927890353435,
183 |         "meanEm": 0.9566258293508704,
184 |         "meanFm": 0.577051059518767,
185 |         "meanber": 0.23290802950995626,
186 |         "meandice": 0.5689913551800527,
187 |         "meanf1": 0.568991355180053,
188 |         "meanfm": 0.577051059518767,
189 |         "meaniou": 0.49816648786971,
190 |         "meankappa": 0.6443053495487194,
191 |         "meanmsiou": 0.817192961609182,
192 |         "meanoa": 0.9596413706286032,
193 |         "meanpre": 0.5857695537152126,
194 |         "meanrec": 0.5599653001125341,
195 |         "meanfpr": 0.02578135913244661,
196 |         "meanspec": 0.9742186408675534,
197 |         "overall_biber": 0.08527759498137788,
198 |         "overall_bidice": 0.8510675335753018,
199 |         "overall_bif1": 0.8510675335753017,
200 |         "overall_bifm": 0.8525259082995088,
201 |         "overall_biiou": 0.740746352327995,
202 |         "overall_bikappa": 0.7400114676102276,
203 |         "overall_bioa": 0.965778,
204 |         "overall_bipre": 0.8537799277020065,
205 |         "overall_birec": 0.8483723190115916,
206 |         "overall_bifpr": 0.018927508974347383,
207 |         "overall_bispec": 0.9810724910256526,
208 |         "sample_biber": 0.23037858807333392,
209 |         "sample_bidice": 0.5738376903441331,
210 |         "sample_bif1": 0.5738376903441331,
211 |         "sample_bifm": 0.5829998670906196,
212 |         "sample_biiou": 0.5039622042094377,
213 |         "sample_bikappa": 0.6510635726572914,
214 |         "sample_bimsiou": 0.8227620408962383,
215 |         "sample_bioa": 0.964811758770181,
216 |         "sample_bipre": 0.5916996553523113,
217 |         "sample_birec": 0.5592859147614985,
218 |         "sample_bifpr": 0.02004309090816628,
219 |         "sample_bispec": 0.9799569090918337,
220 |         "wFmeasure": 0.5579812753638986
221 |     },
222 |     "v1_5_0": {
223 |         "si_mae": 0.062219430633157186,
224 |         "si_overall_biber": 0.08057909438705857,
225 |         "si_overall_bidice": 0.8689933434647754,
226 |         "si_overall_bif1": 0.8689933434647755,
227 |         "si_overall_bifm": 0.8805176956944588,
228 |         "si_overall_bifpr": 0.009530507785708842,
229 |         "si_overall_biiou": 0.768336188335874,
230 |         "si_overall_bikappa": 0.7691515398583714,
231 |         "si_overall_bioa": 0.9785585454545455,
232 |         "si_overall_bipre": 0.8906417940356693,
233 |         "si_overall_birec": 0.8483723190115916,
234 |         "si_overall_bispec": 0.9904694922142911,
235 |         "si_overall_maxber": 0.625,
236 |         "si_overall_maxdice": 0.5954092597492913,
237 |         "si_overall_maxf1": 0.5954092597492912,
238 |         "si_overall_maxfm": 0.6049251905352891,
239 |         "si_overall_maxfpr": 1.0,
240 |         "si_overall_maxiou": 0.5170820894969207,
241 |         "si_overall_maxkappa": 0.6552745344614292,
242 |         "si_overall_maxoa": 0.977842037152757,
243 |         "si_overall_maxpre": 0.6954311451835118,
244 |         "si_overall_maxrec": 0.75,
245 |         "si_overall_maxspec": 0.9975473999188901,
246 |         "si_overall_meanber": 0.2465345494932514,
247 |         "si_overall_meandice": 0.5557713842285521,
248 |         "si_overall_meanf1": 0.5557713842285521,
249 |         "si_overall_meanfm": 0.5914066516990303,
250 |         "si_overall_meanfpr": 0.015208429493882425,
251 |         "si_overall_meaniou": 0.47471774602791184,
252 |         "si_overall_meankappa": 0.5964751103648793,
253 |         "si_overall_meanoa": 0.972345297663634,
254 |         "si_overall_meanpre": 0.6497316834023819,
255 |         "si_overall_meanrec": 0.5221393305073796,
256 |         "si_overall_meanspec": 0.9847915705061175,
257 |         "si_sample_adpber": 0.25512709310479464,
258 |         "si_sample_adpdice": 0.5444846257190149,
259 |         "si_sample_adpf1": 0.5444846257190148,
260 |         "si_sample_adpfm": 0.5567302948345944,
261 |         "si_sample_adpfpr": 0.037480829497605665,
262 |         "si_sample_adpiou": 0.4763592468740943,
263 |         "si_sample_adpkappa": 0.6400423451759603,
264 |         "si_sample_adpoa": 0.9534735481231834,
265 |         "si_sample_adppre": 0.5694959973412623,
266 |         "si_sample_adprec": 0.5272266432880164,
267 |         "si_sample_adpspec": 0.9625191705023943,
268 |         "si_sample_biber": 0.26130632103242585,
269 |         "si_sample_bidice": 0.5233655878893085,
270 |         "si_sample_bif1": 0.5233655878893085,
271 |         "si_sample_bifm": 0.5556766467815453,
272 |         "si_sample_bifpr": 0.00830070487137397,
273 |         "si_sample_biiou": 0.4524818546833645,
274 |         "si_sample_bikappa": 0.6129034268568041,
275 |         "si_sample_bioa": 0.9789323779961485,
276 |         "si_sample_bipre": 0.5987496260981167,
277 |         "si_sample_birec": 0.48568806280652216,
278 |         "si_sample_bispec": 0.991699295128626,
279 |         "si_sample_maxber": 0.6666666666666666,
280 |         "si_sample_maxdice": 0.5549190570745837,
281 |         "si_sample_maxf1": 0.5549190570745836,
282 |         "si_sample_maxfm": 0.5609494528499654,
283 |         "si_sample_maxfpr": 1.0,
284 |         "si_sample_maxiou": 0.4939998363355485,
285 |         "si_sample_maxkappa": 0.6699265264324135,
286 |         "si_sample_maxoa": 0.9831346991175955,
287 |         "si_sample_maxpre": 0.6355082212290294,
288 |         "si_sample_maxrec": 0.6666666666666666,
289 |         "si_sample_maxspec": 0.9983641230378081,
290 |         "si_sample_meanber": 0.26322176259489033,
291 |         "si_sample_meandice": 0.5171260001047577,
292 |         "si_sample_meanf1": 0.5171260001047577,
293 |         "si_sample_meanfm": 0.5462422547476231,
294 |         "si_sample_meanfpr": 0.013213645733269934,
295 |         "si_sample_meaniou": 0.4477218744296734,
296 |         "si_sample_meankappa": 0.6075645114556255,
297 |         "si_sample_meanoa": 0.9746901954959566,
298 |         "si_sample_meanpre": 0.5903439300259663,
299 |         "si_sample_meanrec": 0.4867701205434893,
300 |         "si_sample_meanspec": 0.9867863542667301,
301 |         "auc_pr": 0.19452884849631813,
302 |         "auc_roc": 0.6468975503667292,
303 |         "si_overall_auc_pr": 0.3036500410380263,
304 |         "si_overall_auc_roc": 0.6192831970413093,
305 |         "si_sample_auc_pr": 0.3036500410380263,
306 |         "si_sample_auc_roc": 0.6192831970413093
307 |     },
308 |     "v1_5_1": {
309 |         "HCE": 73.66666666666667
310 |     },
311 |     "v1_6_0": {
312 |         "ccm": 0.5549345672746412,
313 |         "cm": 0.554784060409666
314 |     }
315 | }
316 | 


--------------------------------------------------------------------------------
/py_sod_metrics/context_measure.py:
--------------------------------------------------------------------------------
  1 | import math
  2 | 
  3 | import cv2
  4 | import numpy as np
  5 | from skimage.color import deltaE_ciede2000, rgb2lab
  6 | from sklearn.neighbors import NearestNeighbors
  7 | from sklearn.preprocessing import StandardScaler
  8 | 
  9 | from .utils import EPS, TYPE, validate_and_normalize_input
 10 | 
 11 | 
 12 | class ContextMeasure:
 13 |     """Context-measure for evaluating foreground segmentation quality.
 14 | 
 15 |     This metric evaluates predictions by considering both forward inference (how well predictions align with ground truth) and reverse deduction (how completely ground truth is covered by predictions), using context-aware Gaussian kernels.
 16 | 
 17 |     ```
 18 |     @article{ContextMeasure,
 19 |         title={Context-measure: Contextualizing Metric for Camouflage},
 20 |         author={Wang, Chen-Yang and Ji, Gepeng and Shao, Song and Cheng, Ming-Ming and Fan, Deng-Ping},
 21 |         journal={arXiv preprint arXiv:2512.07076},
 22 |         year={2025}
 23 |     }
 24 |     ```
 25 |     """
 26 | 
 27 |     def __init__(self, beta2: float = 1.0, alpha: float = 6.0):
 28 |         """Initialize the Context Measure evaluator.
 29 | 
 30 |         Args:
 31 |             beta2 (float): Balancing factor between forward inference and reverse deduction. Higher values give more weight to forward inference. Defaults to 1.0.
 32 |             alpha (float): Scaling factor for Gaussian kernel covariance, controls the spatial context range. Defaults to 6.0.
 33 |         """
 34 |         self.beta2 = beta2
 35 |         self.alpha = alpha
 36 |         self._exp_factor = math.e / (math.e - 1)
 37 |         self.scores = []
 38 | 
 39 |     def step(self, pred: np.ndarray, gt: np.ndarray, normalize: bool = True):
 40 |         """Statistics the metric for the pair of pred and gt.
 41 | 
 42 |         Args:
 43 |             pred (np.ndarray): Prediction, gray scale image.
 44 |             gt (np.ndarray): Ground truth, gray scale image.
 45 |             normalize (bool, optional): Whether to normalize the input data. Defaults to True.
 46 |         """
 47 |         pred, gt = validate_and_normalize_input(pred, gt, normalize)
 48 | 
 49 |         # align with the original implementation
 50 |         pred = pred.astype(TYPE)
 51 |         gt = gt.astype(TYPE)
 52 | 
 53 |         score = self.compute(pred, gt, cd=np.zeros_like(gt))
 54 |         self.scores.append(score)
 55 | 
 56 |     def compute(self, pred: np.ndarray, gt: np.ndarray, cd: np.ndarray) -> float:
 57 |         """Compute the context measure between prediction and ground truth.
 58 | 
 59 |         Args:
 60 |             pred (np.ndarray): Prediction map (values between 0 and 1).
 61 |             gt (np.ndarray): Ground truth map (boolean or 0/1 values).
 62 |             cd (np.ndarray): Camouflage degree map (values between 0 and 1).
 63 | 
 64 |         Returns:
 65 |             float: Context measure value.
 66 |         """
 67 |         cov_matrix, x_dis, y_dis = self._compute_y_params(gt)
 68 |         K = self._gaussian_kernel(x_dis, y_dis, cov_matrix)
 69 | 
 70 |         # Forward inference: measure prediction relevance
 71 |         forward = self._forward_inference(pred, gt, K)
 72 |         mforward = np.sum(forward * pred) / (np.sum(pred) + EPS)
 73 | 
 74 |         # Reverse deduction: measure ground truth completeness
 75 |         reverse = self._reverse_deduction(pred, gt, K)
 76 | 
 77 |         wreverse = np.sum(reverse * (gt + cd)) / (np.sum(gt) + np.sum(cd) + EPS)
 78 | 
 79 |         # F-measure style combination
 80 |         return (1 + self.beta2) * mforward * wreverse / (self.beta2 * mforward + wreverse + EPS)
 81 | 
 82 |     def _forward_inference(self, X: np.ndarray, Y: np.ndarray, kernel: np.ndarray) -> np.ndarray:
 83 |         """Calculate forward inference: how well predictions align with ground truth context."""
 84 |         x_binary = (X > 0).astype(int)
 85 |         # note: using EPS=1e-8 and this statement, the test result is the same as the original implementation
 86 |         # global_relevance_matrix = cv2.filter2D(Y, cv2.CV_32F, kernel)
 87 |         # note: this is a hack to make sure that the type of Y is compatible with more diverse data
 88 |         global_relevance_matrix = cv2.filter2D(Y.astype(np.float32), cv2.CV_32F, kernel)
 89 |         return x_binary * global_relevance_matrix
 90 | 
 91 |     def _reverse_deduction(self, X: np.ndarray, Y: np.ndarray, kernel: np.ndarray) -> np.ndarray:
 92 |         """Calculate reverse deduction: how completely ground truth is covered by predictions."""
 93 |         X = X.astype(float)
 94 |         non_global_completeness_matrix = np.exp(-1 * cv2.filter2D(X, -1, kernel))
 95 |         global_completeness_matrix = 1 - non_global_completeness_matrix
 96 |         reverse = self._exp_factor * Y * global_completeness_matrix
 97 |         return reverse
 98 | 
 99 |     def _gaussian_kernel(self, x_dis: int, y_dis: int, cov_matrix: np.ndarray) -> np.ndarray:
100 |         """Generate a 2D Gaussian kernel based on covariance matrix."""
101 |         det_sigma = np.linalg.det(cov_matrix)
102 |         inv_sigma = np.linalg.inv(cov_matrix)
103 | 
104 |         x, y = np.meshgrid(np.arange(-x_dis, x_dis + 1), np.arange(-y_dis, y_dis + 1), indexing="ij")
105 |         Z = np.stack([x, y], axis=-1)
106 |         exp_term = np.einsum("...i,ij,...j->...", Z, inv_sigma, Z)
107 | 
108 |         kernel = np.exp(-0.5 * exp_term) / (2 * np.pi * np.sqrt(det_sigma))
109 |         return kernel / np.sum(kernel)
110 | 
111 |     def _compute_y_params(self, Y: np.ndarray) -> tuple:
112 |         """Compute Gaussian kernel parameters based on ground truth distribution."""
113 |         points = np.argwhere(Y > 0)
114 |         if len(points) <= 1:
115 |             return np.diag([0.25, 0.25]), 1, 1
116 | 
117 |         cov_matrix = np.cov(points, rowvar=False)
118 |         sigma_x = np.sqrt(cov_matrix[0, 0])
119 |         sigma_y = np.sqrt(cov_matrix[1, 1])
120 |         total_sigma = np.sqrt(cov_matrix[0, 0] + cov_matrix[1, 1])
121 | 
122 |         std_cov_matrix = self.alpha**2 * cov_matrix / (total_sigma**2)
123 |         std_sigma_x = self.alpha * sigma_x / total_sigma
124 |         std_sigma_y = self.alpha * sigma_y / total_sigma
125 |         x_dis = round(3 * std_sigma_x)
126 |         y_dis = round(3 * std_sigma_y)
127 | 
128 |         return std_cov_matrix, x_dis, y_dis
129 | 
130 |     def get_results(self) -> dict:
131 |         """Return the results about context measure.
132 | 
133 |         Returns:
134 |             dict(cm=context_measure)
135 |         """
136 |         cm = np.mean(np.array(self.scores, dtype=TYPE))
137 |         return dict(cm=cm)
138 | 
139 | 
140 | class CamouflageContextMeasure(ContextMeasure):
141 |     """Camouflage Context-measure for evaluating camouflaged object detection quality.
142 | 
143 |     This metric extends the base ContextMeasure by incorporating camouflage degree, which measures how well the foreground blends with its surrounding background. It uses patch-based nearest neighbor matching in Lab color space with spatial constraints to estimate camouflage difficulty.
144 | 
145 |     ```
146 |     @article{ContextMeasure,
147 |         title={Context-measure: Contextualizing Metric for Camouflage},
148 |         author={Wang, Chen-Yang and Ji, Gepeng and Shao, Song and Cheng, Ming-Ming and Fan, Deng-Ping},
149 |         journal={arXiv preprint arXiv:2512.07076},
150 |         year={2025}
151 |     }
152 |     ```
153 |     """
154 | 
155 |     def __init__(self, beta2: float = 1.2, alpha: float = 6.0, gamma: int = 8, lambda_spatial: float = 20):
156 |         """Initialize the Camouflage Context Measure evaluator.
157 | 
158 |         Args:
159 |             beta2 (float): Balancing factor for forward and reverse. Defaults to 1.2 for camouflage.
160 |             alpha (float): Gaussian kernel scaling factor. Defaults to 6.0.
161 |             gamma (int): Exponential scaling factor for camouflage degree. Defaults to 8.
162 |             lambda_spatial (float): Weight for spatial distance in ANN search. Defaults to 20.
163 |         """
164 |         super().__init__(beta2=beta2, alpha=alpha)
165 |         self.gamma = gamma
166 |         self.lambda_spatial = lambda_spatial
167 | 
168 |     def step(self, pred: np.ndarray, gt: np.ndarray, img: np.ndarray, normalize: bool = True):
169 |         """Statistics the metric for the pair of pred, gt, and img.
170 | 
171 |         Args:
172 |             pred (np.ndarray): Prediction, gray scale image.
173 |             gt (np.ndarray): Ground truth, gray scale image.
174 |             img (np.ndarray): Original RGB image (required for camouflage degree calculation).
175 |             normalize (bool, optional): Whether to normalize the input data. Defaults to True.
176 |         """
177 |         pred, gt = validate_and_normalize_input(pred, gt, normalize)
178 | 
179 |         pred = pred.astype(TYPE)
180 |         gt = gt.astype(TYPE)
181 | 
182 |         _, cd = self._calculate_camouflage_degree(img, gt)
183 |         score = self.compute(pred, gt, cd=cd)
184 |         self.scores.append(score)
185 | 
186 |     def _calculate_camouflage_degree(self, img: np.ndarray, mask: np.ndarray, w: int = 7) -> tuple:
187 |         """Compute the camouflage degree matrix using Lab+spatial ANN and RGB reconstruction.
188 | 
189 |         Args:
190 |             img (np.ndarray): RGB image (H x W x 3).
191 |             mask (np.ndarray): Binary mask (H x W).
192 |             w (int): Patch size. Defaults to 7.
193 | 
194 |         Returns:
195 |             tuple: (reconstructed_image, camouflage_degree_matrix)
196 |         """
197 |         mask_binary = (mask > 0).astype(np.uint8)
198 |         fg_mask = mask_binary
199 |         bg_mask = self._extract_surrounding_background(fg_mask, kernel_size=20)
200 |         im_fg = fg_mask[:, :, np.newaxis] * img
201 |         im_bg = bg_mask[:, :, np.newaxis] * img
202 |         im_lab = cv2.cvtColor(img, cv2.COLOR_RGB2LAB)
203 | 
204 |         # Step 1: Extract patches in Lab space
205 |         im_fg_lab = im_lab * fg_mask[:, :, np.newaxis]
206 |         im_bg_lab = im_lab * bg_mask[:, :, np.newaxis]
207 | 
208 |         fg_indices, fg_feat_lab = self._extract_patches(im_fg_lab, fg_mask, w, d=w // 2)
209 |         bg_indices, bg_feat_lab = self._extract_patches(im_bg_lab, bg_mask, w, d=w // 2)
210 | 
211 |         # Check if we have enough patches to compute camouflage degree
212 |         if len(fg_indices) == 0 or len(bg_indices) == 0:
213 |             # Return zero camouflage degree when insufficient data
214 |             img_recon = np.zeros_like(img)
215 |             cd = np.zeros_like(mask, dtype=TYPE)
216 |             return img_recon, cd
217 | 
218 |         # Step 2: Lab+spatial ANN query
219 |         fg_nn = self._ann_with_spatial_faiss(bg_feat_lab, fg_feat_lab, bg_indices, fg_indices)
220 | 
221 |         # Step 3: Reconstruct foreground in RGB space
222 |         img_recon = self._reconstruct_image(img, fg_indices, bg_indices, fg_nn, im_bg, w)
223 | 
224 |         # Step 4: Compute similarity in Lab space
225 |         similarity_matrix = self._compute_delta_e2000_matrix(img_recon, im_fg.astype(np.uint8)).astype(TYPE)
226 | 
227 |         # Step 5: Compute camouflage degree
228 |         cd = ((np.exp(self.gamma * similarity_matrix * mask_binary) - 1) / (np.exp(self.gamma) - 1)).astype(TYPE)
229 | 
230 |         return img_recon, cd
231 | 
232 |     def _ann_with_spatial_faiss(self, x, q, x_coords, q_coords, m=16):
233 |         """Approximate Nearest Neighbor search with spatial constraints using sklearn.
234 | 
235 |         Note: Method name retained for compatibility, but now uses sklearn.neighbors.NearestNeighbors instead of FAISS for a more lightweight dependency.
236 |         """
237 |         all_coords = np.vstack([x_coords, q_coords])
238 |         scaled_coords = StandardScaler().fit_transform(all_coords)
239 |         x_coords_scaled = scaled_coords[: len(x_coords)]
240 |         q_coords_scaled = scaled_coords[len(x_coords) :]
241 | 
242 |         x_aug = np.hstack([x, self.lambda_spatial * x_coords_scaled]).astype(np.float32)
243 |         q_aug = np.hstack([q, self.lambda_spatial * q_coords_scaled]).astype(np.float32)
244 | 
245 |         # Use sklearn NearestNeighbors instead of FAISS for lightweight alternative
246 |         nbrs = NearestNeighbors(n_neighbors=1, algorithm="auto", metric="euclidean")
247 |         nbrs.fit(x_aug)
248 | 
249 |         _, indices = nbrs.kneighbors(q_aug)  # top-1
250 |         return indices
251 | 
252 |     def _extract_surrounding_background(self, mask: np.ndarray, kernel_size: int = 20) -> np.ndarray:
253 |         """Extract the surrounding background region around the foreground."""
254 |         kernel = np.ones((kernel_size, kernel_size), np.uint8)
255 |         dilated_mask = cv2.dilate(mask, kernel, iterations=1)
256 |         surrounding_bg_mask = dilated_mask - mask
257 |         return surrounding_bg_mask
258 | 
259 |     def _extract_patches(self, img: np.ndarray, mask: np.ndarray, w: int, d: int) -> tuple:
260 |         """Extract valid patches from the image based on mask."""
261 |         h, w_, c = img.shape
262 |         pad_h = (d - (h - w) % d) % d
263 |         pad_w = (d - (w_ - w) % d) % d
264 |         img_padded = np.pad(img, ((0, pad_h), (0, pad_w), (0, 0)), mode="reflect")
265 |         mask_padded = np.pad(mask, ((0, pad_h), (0, pad_w)), mode="constant")
266 | 
267 |         new_h, new_w = img_padded.shape[:2]
268 | 
269 |         img_patches = np.lib.stride_tricks.sliding_window_view(img_padded, (w, w, img.shape[2]))[::d, ::d, 0, :, :, :]
270 |         mask_patches = np.lib.stride_tricks.sliding_window_view(mask_padded, (w, w))[::d, ::d, :, :]
271 | 
272 |         img_patches = img_patches.reshape(-1, w * w * c)
273 |         mask_patches = mask_patches.reshape(-1, w, w)
274 | 
275 |         grid_x, grid_y = np.meshgrid(np.arange(0, new_h - w + 1, d), np.arange(0, new_w - w + 1, d), indexing="ij")
276 |         all_indices = np.column_stack((grid_x.ravel(), grid_y.ravel()))
277 |         valid_idx = np.all(mask_patches > 0, axis=(1, 2))
278 |         valid_indices = all_indices[valid_idx]
279 |         valid_patches = img_patches[valid_idx]
280 | 
281 |         return valid_indices, valid_patches
282 | 
283 |     def _reconstruct_image(
284 |         self,
285 |         img: np.ndarray,
286 |         fg_indices: np.ndarray,
287 |         bg_indices: np.ndarray,
288 |         fg_nn: np.ndarray,
289 |         im_bg: np.ndarray,
290 |         w: int,
291 |     ) -> np.ndarray:
292 |         """Reconstruct foreground using nearest neighbor background patches."""
293 |         img_recon = np.zeros_like(img, dtype=np.int64)
294 |         counts = np.zeros(img.shape[:2]) + EPS
295 | 
296 |         fg_x, fg_y = fg_indices[:, 0], fg_indices[:, 1]
297 |         nn_i_j = fg_nn[:, 0]
298 |         cii, cjj = bg_indices[nn_i_j, 0], bg_indices[nn_i_j, 1]
299 | 
300 |         fg_x = np.clip(fg_x, 0, img.shape[0] - w)
301 |         fg_y = np.clip(fg_y, 0, img.shape[1] - w)
302 |         cii = np.clip(cii, 0, img.shape[0] - w)
303 |         cjj = np.clip(cjj, 0, img.shape[1] - w)
304 | 
305 |         for i in range(fg_indices.shape[0]):
306 |             img_recon[fg_x[i] : fg_x[i] + w, fg_y[i] : fg_y[i] + w, :] += im_bg[
307 |                 cii[i] : cii[i] + w, cjj[i] : cjj[i] + w, :
308 |             ]
309 |             counts[fg_x[i] : fg_x[i] + w, fg_y[i] : fg_y[i] + w] += 1
310 | 
311 |         counts = np.expand_dims(counts, axis=-1)
312 |         img_recon = np.round(img_recon / counts).astype(np.uint8)
313 | 
314 |         return img_recon
315 | 
316 |     def _compute_delta_e2000_matrix(self, img1_rgb: np.ndarray, img2_rgb: np.ndarray) -> np.ndarray:
317 |         """Compute the perceptual color difference (ΔE 2000) between two images.
318 | 
319 |         Args:
320 |             img1_rgb (np.ndarray): First input image (H x W x 3) in RGB format.
321 |             img2_rgb (np.ndarray): Second input image (H x W x 3) in RGB format.
322 | 
323 |         Returns:
324 |             np.ndarray: Similarity matrix with values in [0,1] (higher = more similar).
325 |         """
326 |         # Convert RGB to Lab color space
327 |         lab1 = rgb2lab(img1_rgb)
328 |         lab2 = rgb2lab(img2_rgb)
329 | 
330 |         # Compute ΔE 2000 color difference
331 |         delta_e_matrix = deltaE_ciede2000(lab1, lab2)
332 | 
333 |         # Normalize ΔE 2000 values to [0,1]
334 |         similarity_matrix = 1 - np.clip(delta_e_matrix / 100, 0, 1)
335 | 
336 |         return similarity_matrix
337 | 
338 |     def get_results(self) -> dict:
339 |         """Return the results about camouflage context measure.
340 | 
341 |         Returns:
342 |             dict(ccm=camouflage_context_measure)
343 |         """
344 |         ccm = np.mean(np.array(self.scores, dtype=TYPE))
345 |         return dict(ccm=ccm)
346 |         return dict(ccm=ccm)
347 |         return dict(ccm=ccm)
348 | 


--------------------------------------------------------------------------------
/py_sod_metrics/fmeasurev2.py:
--------------------------------------------------------------------------------
  1 | import abc
  2 | 
  3 | import numpy as np
  4 | 
  5 | from .utils import TYPE, get_adaptive_threshold, validate_and_normalize_input
  6 | 
  7 | 
  8 | class _BaseHandler:
  9 |     """Base class for all metric handlers.
 10 | 
 11 |     Provides common functionality for calculating various segmentation metrics.
 12 |     """
 13 | 
 14 |     def __init__(
 15 |         self,
 16 |         with_dynamic: bool,
 17 |         with_adaptive: bool,
 18 |         *,
 19 |         with_binary: bool = False,
 20 |         sample_based: bool = True,
 21 |     ):
 22 |         """Initialize the base handler.
 23 | 
 24 |         Args:
 25 |             with_dynamic (bool, optional): Record dynamic results for max/avg/curve versions.
 26 |             with_adaptive (bool, optional): Record adaptive results for adp version.
 27 |             with_binary (bool, optional): Record binary results for binary version.
 28 |             sample_based (bool, optional): Whether to average the metric of each sample or calculate
 29 |                 the metric of the dataset. Defaults to True.
 30 |         """
 31 |         self.dynamic_results = [] if with_dynamic else None
 32 |         self.adaptive_results = [] if with_adaptive else None
 33 |         self.sample_based = sample_based
 34 |         if with_binary:
 35 |             if self.sample_based:
 36 |                 self.binary_results = []
 37 |             else:
 38 |                 self.binary_results = {"tp": 0, "fp": 0, "tn": 0, "fn": 0}
 39 |         else:
 40 |             self.binary_results = None
 41 | 
 42 |     @abc.abstractmethod
 43 |     def __call__(self, tp, fp, tn, fn):
 44 |         """Calculate the metric value.
 45 | 
 46 |         Args:
 47 |             tp: True positive count(s)
 48 |             fp: False positive count(s)
 49 |             tn: True negative count(s)
 50 |             fn: False negative count(s)
 51 | 
 52 |         Returns:
 53 |             Calculated metric value(s)
 54 |         """
 55 |         pass
 56 | 
 57 |     @staticmethod
 58 |     def divide(numerator, denominator):
 59 |         """Safe division with numpy arrays handling zero denominators.
 60 | 
 61 |         Args:
 62 |             numerator: Numerator value(s)
 63 |             denominator: Denominator value(s)
 64 | 
 65 |         Returns:
 66 |             Result of division with zero handling
 67 |         """
 68 |         denominator = np.array(denominator, dtype=TYPE)
 69 |         np.divide(numerator, denominator, out=denominator, where=denominator != 0)
 70 |         return denominator
 71 | 
 72 | 
 73 | class IOUHandler(_BaseHandler):
 74 |     """Intersection over Union.
 75 | 
 76 |     iou = tp / (tp + fp + fn)
 77 |     """
 78 | 
 79 |     def __call__(self, tp, fp, tn, fn):
 80 |         """Calculate IoU from confusion matrix components."""
 81 |         # ious = np.where(Ps + FNs == 0, 0, TPs / (Ps + FNs))
 82 |         return self.divide(tp, tp + fp + fn)
 83 | 
 84 | 
 85 | class SpecificityHandler(_BaseHandler):
 86 |     """Specificity.
 87 | 
 88 |     True negative rate (TNR)/specificity (SPC)/selectivity
 89 | 
 90 |     specificity = tn / (tn + fp)
 91 |     """
 92 | 
 93 |     def __call__(self, tp, fp, tn, fn):
 94 |         """Calculate specificity from confusion matrix components."""
 95 |         # specificities = np.where(TNs + FPs == 0, 0, TNs / (TNs + FPs))
 96 |         return self.divide(tn, tn + fp)
 97 | 
 98 | 
 99 | TNRHandler = SpecificityHandler
100 | 
101 | 
102 | class DICEHandler(_BaseHandler):
103 |     """DICE.
104 | 
105 |     dice = 2 * tp / (tp + fn + tp + fp)
106 |     """
107 | 
108 |     def __call__(self, tp, fp, tn, fn):
109 |         """Calculate DICE coefficient from confusion matrix components."""
110 |         # dices = np.where(TPs + FPs == 0, 0, 2 * TPs / (T + Ps))
111 |         return self.divide(2 * tp, tp + fn + tp + fp)
112 | 
113 | 
114 | class OverallAccuracyHandler(_BaseHandler):
115 |     """Overall Accuracy.
116 | 
117 |     oa = overall_accuracy = (tp + tn) / (tp + fp + tn + fn)
118 |     """
119 | 
120 |     def __call__(self, tp, fp, tn, fn):
121 |         """Calculate overall accuracy from confusion matrix components."""
122 |         # dices = np.where(TPs + FPs == 0, 0, 2 * TPs / (T + Ps))
123 |         return self.divide(tp + tn, tp + fp + tn + fn)
124 | 
125 | 
126 | class KappaHandler(_BaseHandler):
127 |     """Kappa Accuracy.
128 | 
129 |     kappa = kappa = (oa - p_) / (1 - p_)
130 |     p_ = [(tp + fp)(tp + fn) + (tn + fn)(tn + tp)] / (tp + fp + tn + fn)^2
131 |     """
132 | 
133 |     def __init__(
134 |         self,
135 |         with_dynamic: bool,
136 |         with_adaptive: bool,
137 |         *,
138 |         with_binary: bool = False,
139 |         sample_based: bool = True,
140 |     ):
141 |         """Initialize the Kappa handler.
142 | 
143 |         Args:
144 |             with_dynamic (bool, optional): Record dynamic results for max/avg/curve versions.
145 |             with_adaptive (bool, optional): Record adaptive results for adp version.
146 |             with_binary (bool, optional): Record binary results for binary version.
147 |             sample_based (bool, optional): Whether to average the metric of each sample or calculate
148 |                 the metric of the dataset. Defaults to True.
149 |         """
150 |         super().__init__(
151 |             with_dynamic=with_dynamic,
152 |             with_adaptive=with_adaptive,
153 |             with_binary=with_binary,
154 |             sample_based=sample_based,
155 |         )
156 | 
157 |         self.oa = OverallAccuracyHandler(False, False)
158 | 
159 |     def __call__(self, tp, fp, tn, fn):
160 |         """Calculate Kappa coefficient from confusion matrix components."""
161 |         oa = self.oa(tp, fp, tn, fn)
162 |         hpy_p = self.divide(
163 |             (tp + fp) * (tp + fn) + (tn + fn) * (tn + tp),
164 |             (tp + fp + tn + fn) ** 2,
165 |         )
166 |         return self.divide(oa - hpy_p, 1 - hpy_p)
167 | 
168 | 
169 | class PrecisionHandler(_BaseHandler):
170 |     """Precision.
171 | 
172 |     precision = tp / (tp + fp)
173 |     """
174 | 
175 |     def __call__(self, tp, fp, tn, fn):
176 |         """Calculate precision from confusion matrix components."""
177 |         # precisions = np.where(Ps == 0, 0, TPs / Ps)
178 |         return self.divide(tp, tp + fp)
179 | 
180 | 
181 | class RecallHandler(_BaseHandler):
182 |     """Recall.
183 | 
184 |     True positive rate (TPR)/recall/sensitivity (SEN)/probability of detection/hit rate/power
185 | 
186 |     recall = tp / (tp + fn)
187 |     """
188 | 
189 |     def __call__(self, tp, fp, tn, fn):
190 |         """Calculate recall from confusion matrix components."""
191 |         # recalls = np.where(TPs == 0, 0, TPs / T)
192 |         return self.divide(tp, tp + fn)
193 | 
194 | 
195 | TPRHandler = RecallHandler
196 | SensitivityHandler = RecallHandler
197 | 
198 | 
199 | class FPRHandler(_BaseHandler):
200 |     """False Positive Rate.
201 | 
202 |     False positive rate (FPR)/probability of false alarm/fall-out
203 | 
204 |     fpr = fp / (tn + fp)
205 |     """
206 | 
207 |     def __call__(self, tp, fp, tn, fn):
208 |         """Calculate false positive rate from confusion matrix components."""
209 |         return self.divide(fp, tn + fp)
210 | 
211 | 
212 | class BERHandler(_BaseHandler):
213 |     """Balance Error Rate.
214 | 
215 |     ber = 1 - 0.5 * (tp / (tp + fn) + tn / (tn + fp))
216 |     """
217 | 
218 |     def __call__(self, tp, fp, tn, fn):
219 |         """Calculate balanced error rate from confusion matrix components."""
220 |         fg = np.asarray(tp + fn, dtype=TYPE)
221 |         bg = np.asarray(tn + fp, dtype=TYPE)
222 |         np.divide(tp, fg, out=fg, where=fg != 0)
223 |         np.divide(tn, bg, out=bg, where=bg != 0)
224 |         return 1 - 0.5 * (fg + bg)
225 | 
226 | 
227 | class FmeasureHandler(_BaseHandler):
228 |     """F-measure.
229 | 
230 |     fmeasure = (beta + 1) * precision * recall / (beta * precision + recall)
231 |     """
232 | 
233 |     def __init__(
234 |         self,
235 |         with_dynamic: bool,
236 |         with_adaptive: bool,
237 |         *,
238 |         with_binary: bool = False,
239 |         sample_based: bool = True,
240 |         beta: float = 0.3,
241 |     ):
242 |         """Initialize the F-measure handler.
243 | 
244 |         Args:
245 |             with_dynamic (bool, optional): Record dynamic results for max/avg/curve versions.
246 |             with_adaptive (bool, optional): Record adaptive results for adp version.
247 |             with_binary (bool, optional): Record binary results for binary version.
248 |             sample_based (bool, optional): Whether to average the metric of each sample or calculate
249 |                 the metric of the dataset. Defaults to True.
250 |             beta (bool, optional): β^2 in F-measure. Defaults to 0.3.
251 |         """
252 |         super().__init__(
253 |             with_dynamic=with_dynamic,
254 |             with_adaptive=with_adaptive,
255 |             with_binary=with_binary,
256 |             sample_based=sample_based,
257 |         )
258 | 
259 |         self.beta = beta
260 |         self.precision = PrecisionHandler(False, False)
261 |         self.recall = RecallHandler(False, False)
262 | 
263 |     def __call__(self, tp, fp, tn, fn):
264 |         """Calculate F-measure from confusion matrix components.
265 | 
266 |         Note:
267 |             Uses separate precision and recall calculations to maintain consistency with original implementation rather than combined formula.
268 |         """
269 |         p = self.precision(tp, fp, tn, fn)
270 |         r = self.recall(tp, fp, tn, fn)
271 |         return self.divide((self.beta + 1) * p * r, self.beta * p + r)
272 | 
273 | 
274 | class FmeasureV2:
275 |     """Enhanced F-measure evaluator with support for multiple evaluation metrics.
276 | 
277 |     This class provides a flexible framework for computing various binary classification metrics including precision, recall, specificity, dice, IoU, and F-measure. It supports dynamic thresholding, adaptive thresholding, and binary evaluation modes.
278 |     """
279 | 
280 |     def __init__(self, metric_handlers: dict = None):
281 |         """Enhanced Fmeasure class with more relevant metrics, e.g. precision, recall, specificity, dice, iou, fmeasure and so on.
282 | 
283 |         Args:
284 |             metric_handlers (dict, optional): Handlers of different metrics. Defaults to None.
285 |         """
286 |         self._metric_handlers = metric_handlers if metric_handlers else {}
287 | 
288 |     def add_handler(self, handler_name, metric_handler):
289 |         """Add a metric handler to the evaluator.
290 | 
291 |         Args:
292 |             handler_name (str): Name identifier for the metric handler.
293 |             metric_handler: Handler instance that computes the specific metric.
294 |         """
295 |         self._metric_handlers[handler_name] = metric_handler
296 | 
297 |     @staticmethod
298 |     def get_statistics(binary: np.ndarray, gt: np.ndarray, FG: int, BG: int) -> dict:
299 |         """Calculate the TP, FP, TN and FN based a adaptive threshold.
300 | 
301 |         Args:
302 |             binary (np.ndarray): binarized `pred` containing [0, 1]
303 |             gt (np.ndarray): gt binarized by 128
304 |             FG (int): the number of foreground pixels in gt
305 |             BG (int): the number of background pixels in gt
306 | 
307 |         Returns:
308 |             dict: TP, FP, TN, FN
309 |         """
310 |         TP = np.count_nonzero(binary[gt])
311 |         FP = np.count_nonzero(binary[~gt])
312 |         FN = FG - TP
313 |         TN = BG - FP
314 |         return {"tp": TP, "fp": FP, "tn": TN, "fn": FN}
315 | 
316 |     def adaptively_binarizing(self, pred: np.ndarray, gt: np.ndarray, FG: int, BG: int) -> dict:
317 |         """Calculate the TP, FP, TN and FN based a adaptive threshold.
318 | 
319 |         Args:
320 |             pred (np.ndarray): prediction normalized in [0, 1]
321 |             gt (np.ndarray): gt binarized by 128
322 |             FG (int): the number of foreground pixels in gt
323 |             BG (int): the number of background pixels in gt
324 | 
325 |         Returns:
326 |             dict: TP, FP, TN, FN
327 |         """
328 |         adaptive_threshold = get_adaptive_threshold(pred, max_value=1)
329 |         binary = pred >= adaptive_threshold
330 |         return self.get_statistics(binary, gt, FG, BG)
331 | 
332 |     def dynamically_binarizing(self, pred: np.ndarray, gt: np.ndarray, FG: int, BG: int) -> dict:
333 |         """Calculate the corresponding TP, FP, TN and FNs when the threshold changes from 0 to 255.
334 | 
335 |         Args:
336 |             pred (np.ndarray): prediction normalized in [0, 1]
337 |             gt (np.ndarray): gt binarized by 128
338 |             FG (int): the number of foreground pixels in gt
339 |             BG (int): the number of background pixels in gt
340 | 
341 |         Returns:
342 |             dict: TPs, FPs, TNs, FNs
343 |         """
344 |         # 1. 获取预测结果在真值前背景区域中的直方图
345 |         pred: np.ndarray = (pred * 255).astype(np.uint8)
346 |         bins: np.ndarray = np.linspace(0, 256, 257)
347 |         tp_hist, _ = np.histogram(pred[gt], bins=bins)  # 最后一个bin为[255, 256]
348 |         fp_hist, _ = np.histogram(pred[~gt], bins=bins)
349 | 
350 |         # 2. 使用累积直方图（Cumulative Histogram）获得对应真值前背景中大于不同阈值的像素数量
351 |         # 这里使用累加（cumsum）就是为了一次性得出 >=不同阈值 的像素数量, 这里仅计算了前景区域
352 |         tp_w_thrs = np.cumsum(np.flip(tp_hist))  # >= 255, >= 254, ... >= 1, >= 0
353 |         fp_w_thrs = np.cumsum(np.flip(fp_hist))
354 | 
355 |         # 3. 计算对应的TP,FP,TN,FN
356 |         TPs = tp_w_thrs  # 前景 预测为 前景
357 |         FPs = fp_w_thrs  # 背景 预测为 前景
358 |         FNs = FG - TPs  # 前景 预测为 背景
359 |         TNs = BG - FPs  # 背景 预测为 背景
360 |         return {"tp": TPs, "fp": FPs, "tn": TNs, "fn": FNs}
361 | 
362 |     def step(self, pred: np.ndarray, gt: np.ndarray, normalize: bool = True):
363 |         """Statistics the metrics for the pair of pred and gt.
364 | 
365 |         Args:
366 |             pred (np.ndarray): Prediction, gray scale image.
367 |             gt (np.ndarray): Ground truth, gray scale image.
368 |             normalize (bool, optional): Whether to normalize the input data. Defaults to True.
369 |         """
370 |         if not self._metric_handlers:  # 没有添加metric_handler
371 |             raise ValueError("Please add your metric handler before using `step()`.")
372 | 
373 |         pred, gt = validate_and_normalize_input(pred, gt, normalize)
374 | 
375 |         FG = np.count_nonzero(gt)  # 真实前景, FG=(TPs+FNs)
376 |         BG = gt.size - FG  # 真实背景, BG=(TNs+FPs)
377 | 
378 |         dynamical_tpfptnfn = None
379 |         adaptive_tpfptnfn = None
380 |         binary_tpfptnfn = None
381 |         for handler_name, handler in self._metric_handlers.items():
382 |             if handler.dynamic_results is not None:
383 |                 if dynamical_tpfptnfn is None:
384 |                     dynamical_tpfptnfn = self.dynamically_binarizing(pred=pred, gt=gt, FG=FG, BG=BG)
385 |                 handler.dynamic_results.append(handler(**dynamical_tpfptnfn))
386 | 
387 |             if handler.adaptive_results is not None:
388 |                 if adaptive_tpfptnfn is None:
389 |                     adaptive_tpfptnfn = self.adaptively_binarizing(pred=pred, gt=gt, FG=FG, BG=BG)
390 |                 handler.adaptive_results.append(handler(**adaptive_tpfptnfn))
391 | 
392 |             if handler.binary_results is not None:
393 |                 if binary_tpfptnfn is None:
394 |                     # `pred > 0.5`: Simulating the effect of the `argmax` function.
395 |                     binary_tpfptnfn = self.get_statistics(binary=pred > 0.5, gt=gt, FG=FG, BG=BG)
396 |                 if handler.sample_based:
397 |                     handler.binary_results.append(handler(**binary_tpfptnfn))
398 |                 else:
399 |                     handler.binary_results["tp"] += binary_tpfptnfn["tp"]
400 |                     handler.binary_results["fp"] += binary_tpfptnfn["fp"]
401 |                     handler.binary_results["tn"] += binary_tpfptnfn["tn"]
402 |                     handler.binary_results["fn"] += binary_tpfptnfn["fn"]
403 | 
404 |     def get_results(self) -> dict:
405 |         """Return the results of the specific metric names.
406 | 
407 |         Returns:
408 |             dict: All results corresponding to different metrics.
409 |         """
410 |         results = {}
411 |         for handler_name, handler in self._metric_handlers.items():
412 |             res = {}
413 |             if handler.dynamic_results is not None:
414 |                 res["dynamic"] = np.mean(np.array(handler.dynamic_results, dtype=TYPE), axis=0)
415 |             if handler.adaptive_results is not None:
416 |                 res["adaptive"] = np.mean(np.array(handler.adaptive_results, dtype=TYPE))
417 |             if handler.binary_results is not None:
418 |                 if handler.sample_based:
419 |                     res["binary"] = np.mean(np.array(handler.binary_results, dtype=TYPE))
420 |                 else:
421 |                     # NOTE: use `np.mean` to simplify output format (`array(123)` -> `123`)
422 |                     res["binary"] = np.mean(handler(**handler.binary_results))
423 |             results[handler_name] = res
424 |         return results
425 | 


--------------------------------------------------------------------------------
/deploy/usage.rst:
--------------------------------------------------------------------------------
  1 | Usage Guide
  2 | ===========
  3 | 
  4 | This guide provides practical examples of how to use PySODMetrics for evaluating your image segmentation results.
  5 | 
  6 | Quick Start
  7 | -----------
  8 | 
  9 | Basic Example with Individual Metrics
 10 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 11 | 
 12 | Here's a simple example using individual metrics:
 13 | 
 14 | .. code-block:: python
 15 | 
 16 |    import cv2
 17 |    import numpy as np
 18 |    from py_sod_metrics import MAE, Emeasure, Smeasure, Fmeasure, WeightedFmeasure
 19 | 
 20 |    # Initialize metrics
 21 |    mae = MAE()
 22 |    em = Emeasure()
 23 |    sm = Smeasure()
 24 |    fm = Fmeasure()
 25 |    wfm = WeightedFmeasure()
 26 | 
 27 |    # Process your dataset
 28 |    # Note: pred and gt should be uint8 numpy arrays with values in [0, 255]
 29 |    for pred_path, gt_path in zip(pred_paths, gt_paths):
 30 |        pred = cv2.imread(pred_path, cv2.IMREAD_GRAYSCALE)
 31 |        gt = cv2.imread(gt_path, cv2.IMREAD_GRAYSCALE)
 32 | 
 33 |        # Resize prediction to match ground truth size if needed
 34 |        if pred.shape != gt.shape:
 35 |            pred = cv2.resize(pred, dsize=gt.shape[::-1], interpolation=cv2.INTER_LINEAR)
 36 | 
 37 |        # Feed predictions to metrics
 38 |        mae.step(pred, gt)
 39 |        em.step(pred, gt)
 40 |        sm.step(pred, gt)
 41 |        fm.step(pred, gt)
 42 |        wfm.step(pred, gt)
 43 | 
 44 |    # Get results
 45 |    mae_score = mae.get_results()["mae"]
 46 |    em_results = em.get_results()["em"]
 47 |    sm_score = sm.get_results()["sm"]
 48 |    fm_results = fm.get_results()["fm"]
 49 |    wfm_score = wfm.get_results()["wfm"]
 50 | 
 51 |    print(f"MAE: {mae_score:.4f}")
 52 |    print(f"S-measure: {sm_score:.4f}")
 53 |    print(f"Weighted F-measure: {wfm_score:.4f}")
 54 |    print(f"Max E-measure: {em_results['curve'].max():.4f}")
 55 |    print(f"Adaptive F-measure: {fm_results['adp']:.4f}")
 56 | 
 57 | Using FmeasureV2 Framework (Recommended)
 58 | -----------------------------------------
 59 | 
 60 | The ``FmeasureV2`` framework provides a unified interface for computing multiple metrics efficiently.
 61 | 
 62 | Basic FmeasureV2 Usage
 63 | ~~~~~~~~~~~~~~~~~~~~~~
 64 | 
 65 | .. code-block:: python
 66 | 
 67 |    import cv2
 68 |    from py_sod_metrics import FmeasureV2, FmeasureHandler, PrecisionHandler, RecallHandler, IOUHandler
 69 | 
 70 |    # Configure metric handlers
 71 |    fmv2 = FmeasureV2(
 72 |        metric_handlers={
 73 |            "fm": FmeasureHandler(beta=0.3, with_adaptive=True, with_dynamic=True),
 74 |            "f1": FmeasureHandler(beta=1, with_adaptive=True, with_dynamic=True),
 75 |            "pre": PrecisionHandler(with_adaptive=True, with_dynamic=True),
 76 |            "rec": RecallHandler(with_adaptive=True, with_dynamic=True),
 77 |            "iou": IOUHandler(with_adaptive=True, with_dynamic=True),
 78 |        }
 79 |    )
 80 | 
 81 |    # Process dataset
 82 |    for pred_path, gt_path in zip(pred_paths, gt_paths):
 83 |        pred = cv2.imread(pred_path, cv2.IMREAD_GRAYSCALE)
 84 |        gt = cv2.imread(gt_path, cv2.IMREAD_GRAYSCALE)
 85 | 
 86 |        if pred.shape != gt.shape:
 87 |            pred = cv2.resize(pred, dsize=gt.shape[::-1], interpolation=cv2.INTER_LINEAR)
 88 | 
 89 |        fmv2.step(pred, gt)
 90 | 
 91 |    # Get results
 92 |    results = fmv2.get_results()
 93 | 
 94 |    # Access different aggregation strategies
 95 |    print(f"Adaptive F-measure: {results['fm']['adaptive']:.4f}")
 96 |    print(f"Mean F-measure: {results['fm']['dynamic'].mean():.4f}")
 97 |    print(f"Max F-measure: {results['fm']['dynamic'].max():.4f}")
 98 |    print(f"Adaptive Precision: {results['pre']['adaptive']:.4f}")
 99 |    print(f"Adaptive IoU: {results['iou']['adaptive']:.4f}")
100 | 
101 | **Available Handlers:**
102 | 
103 | - ``FmeasureHandler`` - F-measure with configurable β
104 | - ``PrecisionHandler`` - Precision (Positive Predictive Value)
105 | - ``RecallHandler`` - Recall (Sensitivity, TPR)
106 | - ``IOUHandler`` - Intersection over Union
107 | - ``DICEHandler`` - Dice coefficient
108 | - ``BERHandler`` - Balanced Error Rate
109 | - ``KappaHandler`` - Cohen's Kappa
110 | - ``OverallAccuracyHandler`` - Overall classification accuracy
111 | - ``SpecificityHandler`` - Specificity (TNR)
112 | - ``SensitivityHandler`` - Sensitivity (same as Recall)
113 | - ``FPRHandler`` - False Positive Rate
114 | - ``TNRHandler`` - True Negative Rate
115 | - ``TPRHandler`` - True Positive Rate
116 | 
117 | Creating a Custom Metric Recorder
118 | ----------------------------------
119 | 
120 | For managing multiple metrics conveniently, you can create a custom recorder class.
121 | 
122 | Simple Metric Recorder
123 | ~~~~~~~~~~~~~~~~~~~~~~
124 | 
125 | .. code-block:: python
126 | 
127 |    import numpy as np
128 |    from py_sod_metrics import MAE, Emeasure, Smeasure, Fmeasure, WeightedFmeasure, HumanCorrectionEffortMeasure
129 | 
130 |    class SimpleMetricRecorder:
131 |        # A simple recorder for basic SOD metrics
132 | 
133 |        def __init__(self):
134 |            self.mae = MAE()
135 |            self.em = Emeasure()
136 |            self.sm = Smeasure()
137 |            self.fm = Fmeasure()
138 |            self.wfm = WeightedFmeasure()
139 |            self.hce = HumanCorrectionEffortMeasure()
140 | 
141 |        def step(self, pred, gt):
142 |            # Update all metrics with a prediction-ground truth pair
143 |            assert pred.shape == gt.shape
144 |            assert pred.dtype == np.uint8 and gt.dtype == np.uint8
145 | 
146 |            self.mae.step(pred, gt)
147 |            self.em.step(pred, gt)
148 |            self.sm.step(pred, gt)
149 |            self.fm.step(pred, gt)
150 |            self.wfm.step(pred, gt)
151 |            self.hce.step(pred, gt)
152 | 
153 |        def show(self, num_bits=3):
154 |            # Get all metric results as a dictionary
155 |            results = {}
156 | 
157 |            results['MAE'] = round(self.mae.get_results()['mae'], num_bits)
158 |            results['Smeasure'] = round(self.sm.get_results()['sm'], num_bits)
159 |            results['wFmeasure'] = round(self.wfm.get_results()['wfm'], num_bits)
160 |            results['HCE'] = round(self.hce.get_results()['hce'], num_bits)
161 | 
162 |            em_results = self.em.get_results()['em']
163 |            results['maxEm'] = round(em_results['curve'].max(), num_bits)
164 |            results['avgEm'] = round(em_results['curve'].mean(), num_bits)
165 |            results['adpEm'] = round(em_results['adp'], num_bits)
166 | 
167 |            fm_results = self.fm.get_results()['fm']
168 |            results['maxFm'] = round(fm_results['curve'].max(), num_bits)
169 |            results['avgFm'] = round(fm_results['curve'].mean(), num_bits)
170 |            results['adpFm'] = round(fm_results['adp'], num_bits)
171 | 
172 |            return results
173 | 
174 |    # Usage example
175 |    recorder = SimpleMetricRecorder()
176 | 
177 |    for pred, gt in dataset:
178 |        recorder.step(pred, gt)
179 | 
180 |    results = recorder.show()
181 |    print(results)
182 | 
183 | Advanced Metric Recorder with FmeasureV2
184 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
185 | 
186 | For more comprehensive evaluation:
187 | 
188 | .. code-block:: python
189 | 
190 |    import numpy as np
191 |    from py_sod_metrics import (
192 |        MAE, Emeasure, Smeasure, WeightedFmeasure, HumanCorrectionEffortMeasure,
193 |        FmeasureV2, FmeasureHandler, PrecisionHandler, RecallHandler,
194 |        IOUHandler, DICEHandler, BERHandler, KappaHandler
195 |    )
196 | 
197 |    class AdvancedMetricRecorder:
198 |        # Advanced recorder supporting many metrics via FmeasureV2
199 | 
200 |        def __init__(self):
201 |            # Individual metrics that don't use FmeasureV2
202 |            self.mae = MAE()
203 |            self.em = Emeasure()
204 |            self.sm = Smeasure()
205 |            self.wfm = WeightedFmeasure()
206 |            self.hce = HumanCorrectionEffortMeasure()
207 | 
208 |            # FmeasureV2 with multiple handlers
209 |            self.fmv2 = FmeasureV2(
210 |                metric_handlers={
211 |                    "fm": FmeasureHandler(beta=0.3, with_adaptive=True, with_dynamic=True),
212 |                    "f1": FmeasureHandler(beta=1, with_adaptive=True, with_dynamic=True),
213 |                    "pre": PrecisionHandler(with_adaptive=True, with_dynamic=True),
214 |                    "rec": RecallHandler(with_adaptive=True, with_dynamic=True),
215 |                    "iou": IOUHandler(with_adaptive=True, with_dynamic=True),
216 |                    "dice": DICEHandler(with_adaptive=True, with_dynamic=True),
217 |                    "ber": BERHandler(with_adaptive=True, with_dynamic=True),
218 |                    "kappa": KappaHandler(with_adaptive=True, with_dynamic=True),
219 |                }
220 |            )
221 | 
222 |        def step(self, pred, gt):
223 |            # Update all metrics
224 |            assert pred.shape == gt.shape
225 |            assert pred.dtype == np.uint8 and gt.dtype == np.uint8
226 | 
227 |            self.mae.step(pred, gt)
228 |            self.em.step(pred, gt)
229 |            self.sm.step(pred, gt)
230 |            self.wfm.step(pred, gt)
231 |            self.hce.step(pred, gt)
232 |            self.fmv2.step(pred, gt)
233 | 
234 |        def show(self, num_bits=3):
235 |            # Get all results
236 |            results = {}
237 | 
238 |            # Individual metrics
239 |            results['MAE'] = round(self.mae.get_results()['mae'], num_bits)
240 |            results['Smeasure'] = round(self.sm.get_results()['sm'], num_bits)
241 |            results['wFmeasure'] = round(self.wfm.get_results()['wfm'], num_bits)
242 |            results['HCE'] = round(self.hce.get_results()['hce'], num_bits)
243 | 
244 |            # E-measure
245 |            em_data = self.em.get_results()['em']
246 |            results['maxEm'] = round(em_data['curve'].max(), num_bits)
247 |            results['avgEm'] = round(em_data['curve'].mean(), num_bits)
248 |            results['adpEm'] = round(em_data['adp'], num_bits)
249 | 
250 |            # FmeasureV2 metrics
251 |            fmv2_results = self.fmv2.get_results()
252 |            for metric_name in ['fm', 'f1', 'pre', 'rec', 'iou', 'dice', 'ber', 'kappa']:
253 |                metric_data = fmv2_results[metric_name]
254 |                if 'dynamic' in metric_data:
255 |                    results[f'max{metric_name}'] = round(metric_data['dynamic'].max(), num_bits)
256 |                    results[f'avg{metric_name}'] = round(metric_data['dynamic'].mean(), num_bits)
257 |                if 'adaptive' in metric_data:
258 |                    results[f'adp{metric_name}'] = round(metric_data['adaptive'], num_bits)
259 | 
260 |            return results
261 | 
262 |    # Usage example
263 |    recorder = AdvancedMetricRecorder()
264 | 
265 |    for pred, gt in dataset:
266 |        recorder.step(pred, gt)
267 | 
268 |    results = recorder.show()
269 |    for name, value in results.items():
270 |        print(f"{name}: {value}")
271 | 
272 | Specialized Use Cases
273 | ---------------------
274 | 
275 | Context-Measure for Camouflaged Object Detection
276 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
277 | 
278 | .. code-block:: python
279 | 
280 |    from py_sod_metrics import ContextMeasure, CamouflageContextMeasure
281 | 
282 |    # Standard Context Measure
283 |    cm = ContextMeasure()
284 | 
285 |    # Camouflage Context Measure (weighted version, requires image)
286 |    ccm = CamouflageContextMeasure()
287 | 
288 |    for pred_path, gt_path, img_path in cod_dataset:
289 |        pred = cv2.imread(pred_path, cv2.IMREAD_GRAYSCALE)
290 |        gt = cv2.imread(gt_path, cv2.IMREAD_GRAYSCALE)
291 |        img = cv2.imread(img_path)  # RGB image
292 | 
293 |        cm.step(pred, gt)
294 |        ccm.step(pred, gt, img)  # Note: CCM requires the original image
295 | 
296 |    cm_score = cm.get_results()['cm']
297 |    ccm_score = ccm.get_results()['ccm']
298 | 
299 |    print(f"Context Measure: {cm_score:.4f}")
300 |    print(f"Camouflage Context Measure: {ccm_score:.4f}")
301 | 
302 | Size-Invariant Metrics
303 | ~~~~~~~~~~~~~~~~~~~~~~
304 | 
305 | .. code-block:: python
306 | 
307 |    from py_sod_metrics import SizeInvarianceFmeasureV2, SizeInvarianceMAE, FmeasureHandler, PrecisionHandler, RecallHandler
308 | 
309 |    # Size-invariant MAE
310 |    si_mae = SizeInvarianceMAE()
311 | 
312 |    # Size-invariant FmeasureV2
313 |    si_fmv2 = SizeInvarianceFmeasureV2(
314 |        metric_handlers={
315 |            "si_fm": FmeasureHandler(beta=0.3, with_adaptive=True, with_dynamic=True),
316 |            "si_pre": PrecisionHandler(with_adaptive=False, with_dynamic=True, sample_based=True),
317 |            "si_rec": RecallHandler(with_adaptive=False, with_dynamic=True, sample_based=True),
318 |        }
319 |    )
320 | 
321 |    # Process dataset
322 |    for pred, gt in dataset:
323 |        si_mae.step(pred, gt)
324 |        si_fmv2.step(pred, gt)
325 | 
326 |    # Get results
327 |    mae_score = si_mae.get_results()['si_mae']
328 |    fmv2_results = si_fmv2.get_results()
329 | 
330 |    print(f"SI-MAE: {mae_score:.4f}")
331 | 
332 | Multi-Scale IoU
333 | ~~~~~~~~~~~~~~~
334 | 
335 | .. code-block:: python
336 | 
337 |    from py_sod_metrics import MSIoU
338 | 
339 |    # Initialize with different strategies
340 |    msiou = MSIoU(with_dynamic=True, with_adaptive=True, with_binary=True)
341 | 
342 |    for pred, gt in dataset:
343 |        msiou.step(pred, gt)
344 | 
345 |    results = msiou.get_results()
346 | 
347 |    print(f"MS-IoU (adaptive): {results['adaptive']:.4f}")
348 |    print(f"MS-IoU (max): {results['dynamic'].max():.4f}")
349 |    print(f"MS-IoU (mean): {results['dynamic'].mean():.4f}")
350 |    print(f"MS-IoU (binary): {results['binary']:.4f}")
351 | 
352 | Complete Evaluation Example
353 | ----------------------------
354 | 
355 | Here's a complete, production-ready example:
356 | 
357 | .. code-block:: python
358 | 
359 |    import os
360 |    import cv2
361 |    import numpy as np
362 |    from py_sod_metrics import (
363 |        MAE, Emeasure, Smeasure, WeightedFmeasure,
364 |        FmeasureV2, FmeasureHandler, PrecisionHandler, RecallHandler, IOUHandler
365 |    )
366 | 
367 |    class SODEvaluator:
368 |        # Complete SOD evaluation class
369 | 
370 |        def __init__(self):
371 |            self.mae = MAE()
372 |            self.em = Emeasure()
373 |            self.sm = Smeasure()
374 |            self.wfm = WeightedFmeasure()
375 | 
376 |            self.fmv2 = FmeasureV2(
377 |                metric_handlers={
378 |                    "fm": FmeasureHandler(beta=0.3, with_adaptive=True, with_dynamic=True),
379 |                    "pre": PrecisionHandler(with_adaptive=True, with_dynamic=True),
380 |                    "rec": RecallHandler(with_adaptive=True, with_dynamic=True),
381 |                    "iou": IOUHandler(with_adaptive=True, with_dynamic=True),
382 |                }
383 |            )
384 | 
385 |        def step(self, pred, gt):
386 |            self.mae.step(pred, gt)
387 |            self.em.step(pred, gt)
388 |            self.sm.step(pred, gt)
389 |            self.wfm.step(pred, gt)
390 |            self.fmv2.step(pred, gt)
391 | 
392 |        def get_results(self):
393 |            results = {
394 |                'MAE': self.mae.get_results()['mae'],
395 |                'Smeasure': self.sm.get_results()['sm'],
396 |                'wFmeasure': self.wfm.get_results()['wfm'],
397 |            }
398 | 
399 |            em = self.em.get_results()['em']
400 |            results.update({
401 |                'maxEm': em['curve'].max(),
402 |                'avgEm': em['curve'].mean(),
403 |                'adpEm': em['adp'],
404 |            })
405 | 
406 |            fmv2 = self.fmv2.get_results()
407 |            for name in ['fm', 'pre', 'rec', 'iou']:
408 |                data = fmv2[name]
409 |                results[f'max{name}'] = data['dynamic'].max()
410 |                results[f'avg{name}'] = data['dynamic'].mean()
411 |                results[f'adp{name}'] = data['adaptive']
412 | 
413 |            return results
414 | 
415 |    def evaluate_predictions(pred_dir, gt_dir):
416 |        # Evaluate all predictions in a directory
417 |        evaluator = SODEvaluator()
418 | 
419 |        pred_files = sorted(os.listdir(pred_dir))
420 |        gt_files = sorted(os.listdir(gt_dir))
421 | 
422 |        assert len(pred_files) == len(gt_files), "Mismatch in number of files"
423 | 
424 |        for pred_file, gt_file in zip(pred_files, gt_files):
425 |            pred = cv2.imread(os.path.join(pred_dir, pred_file), cv2.IMREAD_GRAYSCALE)
426 |            gt = cv2.imread(os.path.join(gt_dir, gt_file), cv2.IMREAD_GRAYSCALE)
427 | 
428 |            if pred.shape != gt.shape:
429 |                pred = cv2.resize(pred, dsize=gt.shape[::-1], interpolation=cv2.INTER_LINEAR)
430 | 
431 |            evaluator.step(pred, gt)
432 | 
433 |        results = evaluator.get_results()
434 | 
435 |        print("=" * 50)
436 |        print("Evaluation Results")
437 |        print("=" * 50)
438 |        for metric, value in sorted(results.items()):
439 |            print(f"{metric:20s}: {value:.4f}")
440 | 
441 |        return results
442 | 
443 |    # Run evaluation
444 |    if __name__ == "__main__":
445 |        pred_directory = "./predictions"
446 |        gt_directory = "./ground_truth"
447 |        results = evaluate_predictions(pred_directory, gt_directory)
448 | 
449 | Best Practices
450 | --------------
451 | 
452 | 1. **Data Format**
453 | 
454 |    - Predictions and ground truth should be ``uint8`` numpy arrays
455 |    - Values should be in range [0, 255]
456 |    - Ground truth masks should typically be binary (0 or 255)
457 |    - Ensure prediction and ground truth have the same spatial dimensions
458 | 
459 | 2. **Memory Efficiency**
460 | 
461 |    - Use the ``step()`` method iteratively for large datasets
462 |    - Call ``get_results()`` only once after processing all samples
463 |    - Avoid loading all images into memory at once
464 | 
465 | 3. **Result Interpretation**
466 | 
467 |    - ``adaptive``: Threshold-based metric using 2× mean of predictions
468 |    - ``dynamic``: Curve across all thresholds (256 points)
469 |    - ``binary``: Metric computed on binarized predictions
470 |    - ``curve``: Full precision-recall curve or threshold-based curve
471 | 
472 | 4. **Choosing Metrics**
473 | 
474 |    - **For SOD**: MAE, S-measure, E-measure, F-measure, Weighted F-measure
475 |    - **For COD**: Add Context-Measure and Camouflage Context-Measure
476 |    - **For multi-scale objects**: Use size-invariant (SI) variants
477 |    - **For fine structures**: Use Multi-Scale IoU
478 |    - **For medical imaging**: Consider Dice coefficient and IoU
479 | 
480 | 5. **Performance Tips**
481 | 
482 |    - Resize predictions to match ground truth size before calling ``step()``
483 |    - Use FmeasureV2 to compute multiple related metrics efficiently
484 |    - Specify only the metrics you need to save computation time
485 | 
486 | Reference
487 | ---------
488 | 
489 | For more examples, see the `examples folder <https://github.com/lartpang/PySODMetrics/tree/main/examples>`_ in the GitHub repository:
490 | 
491 | - ``metric_recorder.py`` - Production-ready metric recorder implementations
492 | - ``test_metrics.py`` - Comprehensive test cases showing all features
493 | 


--------------------------------------------------------------------------------
/examples/metric_recorder.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import sys
  3 | 
  4 | import cv2
  5 | import numpy as np
  6 | 
  7 | sys.path.append("..")
  8 | import py_sod_metrics
  9 | 
 10 | 
 11 | def ndarray_to_basetype(data):
 12 |     """
 13 |     将单独的ndarray，或者tuple，list或者dict中的ndarray转化为基本数据类型，
 14 |     即列表(.tolist())和python标量
 15 |     """
 16 | 
 17 |     def _to_list_or_scalar(item):
 18 |         listed_item = item.tolist()
 19 |         if isinstance(listed_item, list) and len(listed_item) == 1:
 20 |             listed_item = listed_item[0]
 21 |         return listed_item
 22 | 
 23 |     if isinstance(data, (tuple, list)):
 24 |         results = [_to_list_or_scalar(item) for item in data]
 25 |     elif isinstance(data, dict):
 26 |         results = {k: _to_list_or_scalar(item) for k, item in data.items()}
 27 |     else:
 28 |         assert isinstance(data, np.ndarray)
 29 |         results = _to_list_or_scalar(data)
 30 |     return results
 31 | 
 32 | 
 33 | INDIVADUAL_METRIC_MAPPING = {
 34 |     "mae": py_sod_metrics.MAE,
 35 |     "fm": py_sod_metrics.Fmeasure,
 36 |     "em": py_sod_metrics.Emeasure,
 37 |     "sm": py_sod_metrics.Smeasure,
 38 |     "wfm": py_sod_metrics.WeightedFmeasure,
 39 |     "hce": py_sod_metrics.HumanCorrectionEffortMeasure,
 40 | }
 41 | 
 42 | 
 43 | class GrayscaleMetricRecorderV1:
 44 |     def __init__(self):
 45 |         """
 46 |         用于统计各种指标的类
 47 |         https://github.com/lartpang/Py-SOD-VOS-EvalToolkit/blob/81ce89da6813fdd3e22e3f20e3a09fe1e4a1a87c/utils/recorders/metric_recorder.py
 48 | 
 49 |         主要应用于旧版本实现中的五个指标，即mae/fm/sm/em/wfm/hce。推荐使用V2版本。
 50 |         """
 51 |         self.mae = INDIVADUAL_METRIC_MAPPING["mae"]()
 52 |         self.fm = INDIVADUAL_METRIC_MAPPING["fm"]()
 53 |         self.sm = INDIVADUAL_METRIC_MAPPING["sm"]()
 54 |         self.em = INDIVADUAL_METRIC_MAPPING["em"]()
 55 |         self.wfm = INDIVADUAL_METRIC_MAPPING["wfm"]()
 56 |         self.hce = INDIVADUAL_METRIC_MAPPING["hce"]()
 57 | 
 58 |     def step(self, pre: np.ndarray, gt: np.ndarray):
 59 |         assert pre.shape == gt.shape
 60 |         assert pre.dtype == np.uint8
 61 |         assert gt.dtype == np.uint8
 62 | 
 63 |         self.mae.step(pre, gt)
 64 |         self.sm.step(pre, gt)
 65 |         self.fm.step(pre, gt)
 66 |         self.em.step(pre, gt)
 67 |         self.wfm.step(pre, gt)
 68 |         self.hce.step(pre, gt)
 69 | 
 70 |     def get_results(self, num_bits: int = 3, return_ndarray: bool = False) -> dict:
 71 |         """
 72 |         返回指标计算结果：
 73 | 
 74 |         - 曲线数据(sequential)： fm/em/p/r
 75 |         - 数值指标(numerical)： SM/MAE/maxE/avgE/adpE/maxF/avgF/adpF/wFm
 76 |         """
 77 |         fm_info = self.fm.get_results()
 78 |         fm = fm_info["fm"]
 79 |         pr = fm_info["pr"]
 80 |         wfm = self.wfm.get_results()["wfm"]
 81 |         sm = self.sm.get_results()["sm"]
 82 |         em = self.em.get_results()["em"]
 83 |         mae = self.mae.get_results()["mae"]
 84 |         hce = self.hce.get_results()["hce"]
 85 | 
 86 |         sequential_results = {
 87 |             "fm": np.flip(fm["curve"]),
 88 |             "em": np.flip(em["curve"]),
 89 |             "p": np.flip(pr["p"]),
 90 |             "r": np.flip(pr["r"]),
 91 |         }
 92 |         numerical_results = {
 93 |             "SM": sm,
 94 |             "MAE": mae,
 95 |             "maxE": em["curve"].max(),
 96 |             "avgE": em["curve"].mean(),
 97 |             "adpE": em["adp"],
 98 |             "maxF": fm["curve"].max(),
 99 |             "avgF": fm["curve"].mean(),
100 |             "adpF": fm["adp"],
101 |             "wFm": wfm,
102 |             "HCE": hce,
103 |         }
104 |         if num_bits is not None and isinstance(num_bits, int):
105 |             numerical_results = {k: v.round(num_bits) for k, v in numerical_results.items()}
106 |         if not return_ndarray:
107 |             sequential_results = ndarray_to_basetype(sequential_results)
108 |             numerical_results = ndarray_to_basetype(numerical_results)
109 |         return {"sequential": sequential_results, "numerical": numerical_results}
110 | 
111 | 
112 | sample_gray = dict(with_adaptive=True, with_dynamic=True)
113 | sample_bin = dict(with_adaptive=False, with_dynamic=False, with_binary=True, sample_based=True)
114 | overall_bin = dict(with_adaptive=False, with_dynamic=False, with_binary=True, sample_based=False)
115 | # fmt: off
116 | GRAYSCALE_METRIC_MAPPING = {
117 |     # 灰度数据指标
118 |     "fm": {"handler": py_sod_metrics.FmeasureHandler, "kwargs": dict(**sample_gray, beta=0.3)},
119 |     "f1": {"handler": py_sod_metrics.FmeasureHandler, "kwargs": dict(**sample_gray, beta=1)},
120 |     "pre": {"handler": py_sod_metrics.PrecisionHandler, "kwargs": sample_gray},
121 |     "rec": {"handler": py_sod_metrics.RecallHandler, "kwargs": sample_gray},
122 |     "iou": {"handler": py_sod_metrics.IOUHandler, "kwargs": sample_gray},
123 |     "dice": {"handler": py_sod_metrics.DICEHandler, "kwargs": sample_gray},
124 |     "spec": {"handler": py_sod_metrics.SpecificityHandler, "kwargs": sample_gray},
125 |     "ber": {"handler": py_sod_metrics.BERHandler, "kwargs": sample_gray},
126 |     "oa": {"handler": py_sod_metrics.OverallAccuracyHandler, "kwargs": sample_gray},
127 |     "kappa": {"handler": py_sod_metrics.KappaHandler, "kwargs": sample_gray},
128 | }
129 | BINARY_METRIC_MAPPING = {
130 |     # 二值化数据指标的特殊情况一：各个样本独立计算指标后取平均
131 |     "sample_bifm": {"handler": py_sod_metrics.FmeasureHandler, "kwargs": dict(**sample_bin, beta=0.3)},
132 |     "sample_bif1": {"handler": py_sod_metrics.FmeasureHandler, "kwargs": dict(**sample_bin, beta=1)},
133 |     "sample_bipre": {"handler": py_sod_metrics.PrecisionHandler, "kwargs": sample_bin},
134 |     "sample_birec": {"handler": py_sod_metrics.RecallHandler, "kwargs": sample_bin},
135 |     "sample_biiou": {"handler": py_sod_metrics.IOUHandler, "kwargs": sample_bin},
136 |     "sample_bidice": {"handler": py_sod_metrics.DICEHandler, "kwargs": sample_bin},
137 |     "sample_bispec": {"handler": py_sod_metrics.SpecificityHandler, "kwargs": sample_bin},
138 |     "sample_biber": {"handler": py_sod_metrics.BERHandler, "kwargs": sample_bin},
139 |     "sample_bioa": {"handler": py_sod_metrics.OverallAccuracyHandler, "kwargs": sample_bin},
140 |     "sample_bikappa": {"handler": py_sod_metrics.KappaHandler, "kwargs": sample_bin},
141 |     # 二值化数据指标的特殊情况二：汇总所有样本的tp、fp、tn、fn后整体计算指标
142 |     "overall_bifm": {"handler": py_sod_metrics.FmeasureHandler, "kwargs": dict(**overall_bin, beta=0.3)},
143 |     "overall_bif1": {"handler": py_sod_metrics.FmeasureHandler, "kwargs": dict(**overall_bin, beta=1)},
144 |     "overall_bipre": {"handler": py_sod_metrics.PrecisionHandler, "kwargs": overall_bin},
145 |     "overall_birec": {"handler": py_sod_metrics.RecallHandler, "kwargs": overall_bin},
146 |     "overall_biiou": {"handler": py_sod_metrics.IOUHandler, "kwargs": overall_bin},
147 |     "overall_bidice": {"handler": py_sod_metrics.DICEHandler, "kwargs": overall_bin},
148 |     "overall_bispec": {"handler": py_sod_metrics.SpecificityHandler, "kwargs": overall_bin},
149 |     "overall_biber": {"handler": py_sod_metrics.BERHandler, "kwargs": overall_bin},
150 |     "overall_bioa": {"handler": py_sod_metrics.OverallAccuracyHandler, "kwargs": overall_bin},
151 |     "overall_bikappa": {"handler": py_sod_metrics.KappaHandler, "kwargs": overall_bin},
152 | }
153 | SIZEINVARIANCE_METRIC_MAPPING = {
154 |     "handler":{
155 |         "si_fm": {"handler": py_sod_metrics.FmeasureHandler, "kwargs": dict(**sample_gray, beta=0.3)},
156 |         "si_pre": {"handler": py_sod_metrics.PrecisionHandler, "kwargs": dict(with_adaptive=False, with_dynamic=True, sample_based=True)},
157 |         "si_rec": {"handler": py_sod_metrics.RecallHandler, "kwargs": dict(with_adaptive=False, with_dynamic=True, sample_based=True)},
158 |         "si_tpr": {"handler": py_sod_metrics.TPRHandler, "kwargs": dict(with_adaptive=False, with_dynamic=True, sample_based=True)},
159 |         "si_fpr": {"handler": py_sod_metrics.FPRHandler, "kwargs": dict(with_adaptive=False, with_dynamic=True, sample_based=True)},
160 |     },
161 |     "si_fmeasurev2": py_sod_metrics.SizeInvarianceFmeasureV2,
162 |     "si_mae": py_sod_metrics.SizeInvarianceMAE,
163 | }
164 | # fmt: on
165 | 
166 | 
167 | class GrayscaleMetricRecorderV2:
168 |     supported_metrics = ["mae", "em", "sm", "wfm", "hce"] + sorted(GRAYSCALE_METRIC_MAPPING.keys())
169 | 
170 |     def __init__(self, metric_names=("sm", "wfm", "mae", "fmeasure", "em")):
171 |         """
172 |         用于统计各种指标的类，支持更多的指标，更好的兼容性。
173 |         """
174 |         if not metric_names:
175 |             metric_names = self.supported_metrics
176 |         assert all([m in self.supported_metrics for m in metric_names]), f"Only support: {self.supported_metrics}"
177 | 
178 |         self.metric_objs = {}
179 |         has_existed = False
180 |         for metric_name in metric_names:
181 |             if metric_name in INDIVADUAL_METRIC_MAPPING:
182 |                 self.metric_objs[metric_name] = INDIVADUAL_METRIC_MAPPING[metric_name]()
183 |             else:  # metric_name in GRAYSCALE_METRIC_MAPPING
184 |                 if not has_existed:  # only init once
185 |                     self.metric_objs["fmeasurev2"] = py_sod_metrics.FmeasureV2()
186 |                     has_existed = True
187 |                 metric_handler = GRAYSCALE_METRIC_MAPPING[metric_name]
188 |                 self.metric_objs["fmeasurev2"].add_handler(
189 |                     handler_name=metric_name,
190 |                     metric_handler=metric_handler["handler"](**metric_handler["kwargs"]),
191 |                 )
192 | 
193 |     def step(self, pre: np.ndarray, gt: np.ndarray):
194 |         assert pre.shape == gt.shape, (pre.shape, gt.shape)
195 |         assert pre.dtype == gt.dtype == np.uint8, (pre.dtype, gt.dtype)
196 | 
197 |         for m_obj in self.metric_objs.values():
198 |             m_obj.step(pre, gt)
199 | 
200 |     def get_all_results(self, num_bits: int = 3, return_ndarray: bool = False) -> dict:
201 |         sequential_results = {}
202 |         numerical_results = {}
203 |         for m_name, m_obj in self.metric_objs.items():
204 |             info = m_obj.get_results()
205 |             if m_name == "fmeasurev2":
206 |                 for _name, results in info.items():
207 |                     dynamic_results = results.get("dynamic")
208 |                     adaptive_results = results.get("adaptive")
209 |                     if dynamic_results is not None:
210 |                         sequential_results[_name] = np.flip(dynamic_results)
211 |                         numerical_results[f"max{_name}"] = dynamic_results.max()
212 |                         numerical_results[f"avg{_name}"] = dynamic_results.mean()
213 |                     if adaptive_results is not None:
214 |                         numerical_results[f"adp{_name}"] = adaptive_results
215 |             else:
216 |                 results = info[m_name]
217 |                 if m_name in ("wfm", "sm", "mae", "hce"):
218 |                     numerical_results[m_name] = results
219 |                 elif m_name in ("fm", "em"):
220 |                     sequential_results[m_name] = np.flip(results["curve"])
221 |                     numerical_results.update(
222 |                         {
223 |                             f"max{m_name}": results["curve"].max(),
224 |                             f"avg{m_name}": results["curve"].mean(),
225 |                             f"adp{m_name}": results["adp"],
226 |                         }
227 |                     )
228 |                 else:
229 |                     raise NotImplementedError(m_name)
230 | 
231 |         if num_bits is not None and isinstance(num_bits, int):
232 |             numerical_results = {k: v.round(num_bits) for k, v in numerical_results.items()}
233 |         if not return_ndarray:
234 |             sequential_results = ndarray_to_basetype(sequential_results)
235 |             numerical_results = ndarray_to_basetype(numerical_results)
236 |         return {"sequential": sequential_results, "numerical": numerical_results}
237 | 
238 |     def show(self, num_bits: int = 3, return_ndarray: bool = False) -> dict:
239 |         return self.get_all_results(num_bits=num_bits, return_ndarray=return_ndarray)["numerical"]
240 | 
241 | 
242 | class BinaryMetricRecorder:
243 |     supported_metrics = ["mae", "sm", "wfm", "hce"] + sorted(BINARY_METRIC_MAPPING.keys())
244 | 
245 |     def __init__(self, metric_names=("bif1", "biprecision", "birecall", "biiou")):
246 |         """
247 |         用于统计各种指标的类，主要适用于对单通道灰度图计算二值图像的指标。
248 |         """
249 |         if not metric_names:
250 |             metric_names = self.supported_metrics
251 |         assert all([m in self.supported_metrics for m in metric_names]), f"Only support: {self.supported_metrics}"
252 | 
253 |         self.metric_objs = {}
254 |         has_existed = False
255 |         for metric_name in metric_names:
256 |             if metric_name in INDIVADUAL_METRIC_MAPPING:
257 |                 self.metric_objs[metric_name] = INDIVADUAL_METRIC_MAPPING[metric_name]()
258 |             else:  # metric_name in BINARY_METRIC_MAPPING
259 |                 if not has_existed:  # only init once
260 |                     self.metric_objs["fmeasurev2"] = py_sod_metrics.FmeasureV2()
261 |                     has_existed = True
262 |                 metric_handler = BINARY_METRIC_MAPPING[metric_name]
263 |                 self.metric_objs["fmeasurev2"].add_handler(
264 |                     handler_name=metric_name,
265 |                     metric_handler=metric_handler["handler"](**metric_handler["kwargs"]),
266 |                 )
267 | 
268 |     def step(self, pre: np.ndarray, gt: np.ndarray):
269 |         assert pre.shape == gt.shape, (pre.shape, gt.shape)
270 |         assert pre.dtype == gt.dtype == np.uint8, (pre.dtype, gt.dtype)
271 | 
272 |         for m_obj in self.metric_objs.values():
273 |             m_obj.step(pre, gt)
274 | 
275 |     def get_all_results(self, num_bits: int = 3, return_ndarray: bool = False) -> dict:
276 |         numerical_results = {}
277 |         for m_name, m_obj in self.metric_objs.items():
278 |             info = m_obj.get_results()
279 |             if m_name == "fmeasurev2":
280 |                 for _name, results in info.items():
281 |                     binary_results = results.get("binary")
282 |                     if binary_results is not None:
283 |                         numerical_results[_name] = binary_results
284 |             else:
285 |                 results = info[m_name]
286 |                 if m_name in ("mae", "sm", "wfm", "hce"):
287 |                     numerical_results[m_name] = results
288 |                 else:
289 |                     raise NotImplementedError(m_name)
290 | 
291 |         if num_bits is not None and isinstance(num_bits, int):
292 |             numerical_results = {k: v.round(num_bits) for k, v in numerical_results.items()}
293 |         if not return_ndarray:
294 |             numerical_results = ndarray_to_basetype(numerical_results)
295 |         return {"numerical": numerical_results}
296 | 
297 |     def show(self, num_bits: int = 3, return_ndarray: bool = False) -> dict:
298 |         return self.get_all_results(num_bits=num_bits, return_ndarray=return_ndarray)["numerical"]
299 | 
300 | 
301 | class TargetwiseGrayscaleMetricRecorderV2:
302 |     supported_metrics = ["si_mae"] + sorted(SIZEINVARIANCE_METRIC_MAPPING["handler"].keys())
303 | 
304 |     def __init__(self, metric_names=("si_mae",)):
305 |         if not metric_names:
306 |             metric_names = self.supported_metrics
307 |         assert all([m in self.supported_metrics for m in metric_names]), f"Only support: {self.supported_metrics}"
308 | 
309 |         self.metric_objs = {}
310 |         has_existed = False
311 |         for metric_name in metric_names:
312 |             if metric_name in SIZEINVARIANCE_METRIC_MAPPING["handler"]:
313 |                 if not has_existed:  # only init once
314 |                     self.metric_objs["si_fmeasurev2"] = SIZEINVARIANCE_METRIC_MAPPING["si_fmeasurev2"]()
315 |                     has_existed = True
316 |                 metric_handler = SIZEINVARIANCE_METRIC_MAPPING["handler"][metric_name]
317 |                 self.metric_objs["si_fmeasurev2"].add_handler(
318 |                     handler_name=metric_name,
319 |                     metric_handler=metric_handler["handler"](**metric_handler["kwargs"]),
320 |                 )
321 |             else:
322 |                 self.metric_objs[metric_name] = SIZEINVARIANCE_METRIC_MAPPING[metric_name]()
323 | 
324 |     def step(self, pre: np.ndarray, gt: np.ndarray):
325 |         assert pre.shape == gt.shape, (pre.shape, gt.shape)
326 |         assert pre.dtype == gt.dtype == np.uint8, (pre.dtype, gt.dtype)
327 | 
328 |         for m_obj in self.metric_objs.values():
329 |             m_obj.step(pre, gt)
330 | 
331 |     def cal_auc(self, y, x):
332 |         sorted_idx = np.argsort(x, axis=-1, kind="stable")
333 |         x = np.take_along_axis(x, sorted_idx, axis=-1)
334 |         y = np.take_along_axis(y, sorted_idx, axis=-1)
335 |         return np.trapz(y, x, axis=-1)
336 | 
337 |     def get_all_results(self, num_bits: int = 3, return_ndarray: bool = False) -> dict:
338 |         sequential_results = {}
339 |         numerical_results = {}
340 |         for m_name, m_obj in self.metric_objs.items():
341 |             info = m_obj.get_results()
342 | 
343 |             if m_name == "si_fmeasurev2":
344 |                 # AUC-ROC
345 |                 if "si_tpr" in info and "si_fpr" in info:
346 |                     ys = info.pop("si_tpr")["dynamic"]  # >=255,>=254,...>=1,>=0
347 |                     xs = info.pop("si_fpr")["dynamic"]
348 |                     if isinstance(ys, list) and isinstance(xs, list):  # Nx[T'x256]
349 |                         auc_results = []
350 |                         for y, x in zip(ys, xs):
351 |                             # NOTE: before calculate the auc, we need to flip the y and x to corresponding to ascending thresholds
352 |                             # because the dynamic results from our metrics is based on the descending order of thresholds, i.e., >=255,>=254,...>=1,>=0
353 |                             y = np.flip(y, axis=-1)
354 |                             x = np.flip(x, axis=-1)
355 |                             auc_results.append(self.cal_auc(y, x).mean())
356 |                         numerical_results["si_sample_auc_roc"] = np.asarray(auc_results).mean()
357 |                     else:  # 256
358 |                         numerical_results["si_overall_auc_roc"] = self.cal_auc(y=ys, x=xs).mean()
359 | 
360 |                 # AUC-PR
361 |                 if "si_pre" in info and "si_rec" in info:
362 |                     ys = info.pop("si_pre")["dynamic"]  # >=255,>=254,...>=1,>=0
363 |                     xs = info.pop("si_rec")["dynamic"]
364 |                     if isinstance(ys, list) and isinstance(xs, list):  # Nx[T'x256]
365 |                         auc_results = []
366 |                         for y, x in zip(ys, xs):
367 |                             y = np.flip(y, axis=-1)
368 |                             x = np.flip(x, axis=-1)
369 |                             auc_results.append(self.cal_auc(y, x).mean())
370 |                         numerical_results["si_sample_auc_pr"] = np.asarray(auc_results).mean()
371 |                     else:  # 256
372 |                         numerical_results["si_overall_auc_pr"] = self.cal_auc(y=ys, x=xs).mean()
373 | 
374 |                 for _name, results in info.items():
375 |                     dynamic_results = results.get("dynamic")
376 |                     if dynamic_results is not None:
377 |                         if isinstance(dynamic_results, list):  # Nx[T'x256]
378 |                             max_results = []
379 |                             avg_results = []
380 |                             seq_results = []
381 |                             for s in dynamic_results:
382 |                                 max_results.append(s.max(axis=-1).mean())  # 1
383 |                                 avg_results.append(s.mean(axis=-1).mean())  # 1
384 |                                 seq_results.append(s.mean(axis=0))  # 256
385 |                             seq_results = np.mean(np.asarray(seq_results), axis=0)
386 |                             numerical_results[f"si_sample_max{_name}"] = np.asarray(max_results).mean()
387 |                             numerical_results[f"si_sample_avg{_name}"] = np.asarray(avg_results).mean()
388 |                         else:  # 256
389 |                             seq_results = dynamic_results
390 |                             numerical_results[f"si_overall_max{_name}"] = dynamic_results.max()
391 |                             numerical_results[f"si_overall_avg{_name}"] = dynamic_results.mean()
392 |                         sequential_results[_name] = np.flip(seq_results)
393 | 
394 |                     adaptive_results = results.get("adaptive")
395 |                     if adaptive_results is not None:
396 |                         numerical_results[f"si_sample_adp{_name}"] = adaptive_results
397 |             else:
398 |                 results = info[m_name]
399 |                 if m_name in ("si_mae",):
400 |                     numerical_results[m_name] = results
401 |                 else:
402 |                     raise NotImplementedError(m_name)
403 | 
404 |         if num_bits is not None and isinstance(num_bits, int):
405 |             numerical_results = {k: v.round(num_bits) for k, v in numerical_results.items()}
406 |         if not return_ndarray:
407 |             sequential_results = ndarray_to_basetype(sequential_results)
408 |             numerical_results = ndarray_to_basetype(numerical_results)
409 |         return {"sequential": sequential_results, "numerical": numerical_results}
410 | 
411 |     def show(self, num_bits: int = 3, return_ndarray: bool = False) -> dict:
412 |         return self.get_all_results(num_bits=num_bits, return_ndarray=return_ndarray)["numerical"]
413 | 
414 | 
415 | if __name__ == "__main__":
416 |     data_root = "./test_data"
417 |     mask_root = os.path.join(data_root, "masks")
418 |     pred_root = os.path.join(data_root, "preds")
419 |     masks = [os.path.join(mask_root, f) for f in sorted(os.listdir(mask_root))]
420 |     preds = [os.path.join(pred_root, f) for f in sorted(os.listdir(pred_root))]
421 | 
422 |     metrics_v1 = GrayscaleMetricRecorderV2(metric_names=GrayscaleMetricRecorderV2.supported_metrics)
423 |     metrics_v2 = BinaryMetricRecorder(metric_names=BinaryMetricRecorder.supported_metrics)
424 |     metrics_v3 = TargetwiseGrayscaleMetricRecorderV2(
425 |         metric_names=TargetwiseGrayscaleMetricRecorderV2.supported_metrics
426 |     )
427 |     for mask, pred in zip(masks, preds):
428 |         mask = cv2.imread(mask, cv2.IMREAD_GRAYSCALE)
429 |         pred = cv2.imread(pred, cv2.IMREAD_GRAYSCALE)
430 |         if pred.shape != mask.shape:
431 |             pred = cv2.resize(pred, dsize=mask.shape[::-1], interpolation=cv2.INTER_LINEAR)
432 | 
433 |         metrics_v1.step(pred, mask)
434 |         metrics_v2.step(pred, mask)
435 |         metrics_v3.step(pred, mask)
436 |     print(metrics_v1.show())
437 |     print(metrics_v2.show())
438 |     print(metrics_v3.show())
439 | 


--------------------------------------------------------------------------------
/examples/test_metrics.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | import os
  3 | import sys
  4 | import unittest
  5 | from pprint import pprint
  6 | 
  7 | import cv2
  8 | import numpy as np
  9 | from skimage import data
 10 | 
 11 | sys.path.append("..")
 12 | import py_sod_metrics
 13 | 
 14 | with open("./version_performance.json", encoding="utf-8", mode="r") as f:
 15 |     default_results = json.load(f)
 16 | 
 17 | 
 18 | def cal_auc(y: np.ndarray, x: np.ndarray):
 19 |     assert y.shape == x.shape, (y.shape, x.shape)
 20 |     sorted_idx = np.argsort(x, axis=-1, kind="stable")
 21 |     y = np.take_along_axis(y, sorted_idx, axis=-1)
 22 |     x = np.take_along_axis(x, sorted_idx, axis=-1)
 23 |     return np.trapz(y=y, x=x, axis=-1)
 24 | 
 25 | 
 26 | def reduce_dynamic_results_for_max_avg(dynamic_results: list):  # Nx[T'x256] -> Nx[T'] -> N -> 1
 27 |     max_results = []
 28 |     avg_results = []
 29 |     for s in dynamic_results:
 30 |         max_results.append(s.max(axis=-1).mean())
 31 |         avg_results.append(s.mean(axis=-1).mean())
 32 |     return np.asarray(max_results).mean(), np.asarray(avg_results).mean()
 33 | 
 34 | 
 35 | def reduce_dynamic_results_for_auc(ys: list, xs: list):  # Nx[T'x256] -> Nx[T'] -> N -> 1
 36 |     auc_results = []
 37 |     for y, x in zip(ys, xs):
 38 |         # NOTE: before calculate the auc, we need to flip the y and x to corresponding to ascending thresholds
 39 |         # because the dynamic results from our metrics is based on the descending order of thresholds, i.e., >=255,>=254,...>=1,>=0
 40 |         y = np.flip(y, -1)
 41 |         x = np.flip(x, -1)
 42 |         auc_results.append(cal_auc(y=y, x=x).mean())
 43 |     return np.asarray(auc_results).mean()
 44 | 
 45 | 
 46 | class CheckMetricTestCase(unittest.TestCase):
 47 |     @classmethod
 48 |     def setUpClass(cls):
 49 |         FM = py_sod_metrics.Fmeasure()
 50 |         WFM = py_sod_metrics.WeightedFmeasure()
 51 |         SM = py_sod_metrics.Smeasure()
 52 |         EM = py_sod_metrics.Emeasure()
 53 |         MAE = py_sod_metrics.MAE()
 54 |         HCE = py_sod_metrics.HumanCorrectionEffortMeasure()
 55 |         CM = py_sod_metrics.ContextMeasure()
 56 |         CCM = py_sod_metrics.CamouflageContextMeasure()
 57 |         MSIOU = py_sod_metrics.MSIoU(with_dynamic=True, with_adaptive=True, with_binary=True)
 58 | 
 59 |         # fmt: off
 60 |         sample_gray = dict(with_adaptive=True, with_dynamic=True)
 61 |         sample_bin = dict(with_adaptive=False, with_dynamic=False, with_binary=True, sample_based=True)
 62 |         overall_bin = dict(with_adaptive=False, with_dynamic=False, with_binary=True, sample_based=False)
 63 |         FMv2 = py_sod_metrics.FmeasureV2(
 64 |             metric_handlers={
 65 |                 # 灰度数据指标
 66 |                 "fm": py_sod_metrics.FmeasureHandler(**sample_gray, beta=0.3),
 67 |                 "f1": py_sod_metrics.FmeasureHandler(**sample_gray, beta=1),
 68 |                 "pre": py_sod_metrics.PrecisionHandler(**sample_gray),
 69 |                 "rec": py_sod_metrics.RecallHandler(**sample_gray),
 70 |                 "fpr": py_sod_metrics.FPRHandler(**sample_gray),
 71 |                 "iou": py_sod_metrics.IOUHandler(**sample_gray),
 72 |                 "dice": py_sod_metrics.DICEHandler(**sample_gray),
 73 |                 "spec": py_sod_metrics.SpecificityHandler(**sample_gray),
 74 |                 "ber": py_sod_metrics.BERHandler(**sample_gray),
 75 |                 "oa": py_sod_metrics.OverallAccuracyHandler(**sample_gray),
 76 |                 "kappa": py_sod_metrics.KappaHandler(**sample_gray),
 77 |                 # 二值化数据指标的特殊情况一：各个样本独立计算指标后取平均
 78 |                 "sample_bifm": py_sod_metrics.FmeasureHandler(**sample_bin, beta=0.3),
 79 |                 "sample_bif1": py_sod_metrics.FmeasureHandler(**sample_bin, beta=1),
 80 |                 "sample_bipre": py_sod_metrics.PrecisionHandler(**sample_bin),
 81 |                 "sample_birec": py_sod_metrics.RecallHandler(**sample_bin),
 82 |                 "sample_bifpr": py_sod_metrics.FPRHandler(**sample_bin),
 83 |                 "sample_biiou": py_sod_metrics.IOUHandler(**sample_bin),
 84 |                 "sample_bidice": py_sod_metrics.DICEHandler(**sample_bin),
 85 |                 "sample_bispec": py_sod_metrics.SpecificityHandler(**sample_bin),
 86 |                 "sample_biber": py_sod_metrics.BERHandler(**sample_bin),
 87 |                 "sample_bioa": py_sod_metrics.OverallAccuracyHandler(**sample_bin),
 88 |                 "sample_bikappa": py_sod_metrics.KappaHandler(**sample_bin),
 89 |                 # 二值化数据指标的特殊情况二：汇总所有样本的tp、fp、tn、fn后整体计算指标
 90 |                 "overall_bifm": py_sod_metrics.FmeasureHandler(**overall_bin, beta=0.3),
 91 |                 "overall_bif1": py_sod_metrics.FmeasureHandler(**overall_bin, beta=1),
 92 |                 "overall_bipre": py_sod_metrics.PrecisionHandler(**overall_bin),
 93 |                 "overall_birec": py_sod_metrics.RecallHandler(**overall_bin),
 94 |                 "overall_bifpr": py_sod_metrics.FPRHandler(**overall_bin),
 95 |                 "overall_biiou": py_sod_metrics.IOUHandler(**overall_bin),
 96 |                 "overall_bidice": py_sod_metrics.DICEHandler(**overall_bin),
 97 |                 "overall_bispec": py_sod_metrics.SpecificityHandler(**overall_bin),
 98 |                 "overall_biber": py_sod_metrics.BERHandler(**overall_bin),
 99 |                 "overall_bioa": py_sod_metrics.OverallAccuracyHandler(**overall_bin),
100 |                 "overall_bikappa": py_sod_metrics.KappaHandler(**overall_bin),
101 |             }
102 |         )
103 | 
104 |         # adaptive is not supported for non-sample-based metrics
105 |         overall_gray = dict(with_adaptive=False, with_dynamic=True, sample_based=False)
106 |         SI_MAE = py_sod_metrics.SizeInvarianceMAE()
107 |         SI_FMv2 = py_sod_metrics.SizeInvarianceFmeasureV2(
108 |             metric_handlers={
109 |                 "si_sample_fm": py_sod_metrics.FmeasureHandler(**sample_gray, beta=0.3),
110 |                 "si_sample_f1": py_sod_metrics.FmeasureHandler(**sample_gray, beta=1),
111 |                 "si_sample_pre": py_sod_metrics.PrecisionHandler(**sample_gray),
112 |                 "si_sample_rec": py_sod_metrics.RecallHandler(**sample_gray),
113 |                 "si_sample_fpr": py_sod_metrics.FPRHandler(**sample_gray),
114 |                 "si_sample_iou": py_sod_metrics.IOUHandler(**sample_gray),
115 |                 "si_sample_dice": py_sod_metrics.DICEHandler(**sample_gray),
116 |                 "si_sample_spec": py_sod_metrics.SpecificityHandler(**sample_gray),
117 |                 "si_sample_ber": py_sod_metrics.BERHandler(**sample_gray),
118 |                 "si_sample_oa": py_sod_metrics.OverallAccuracyHandler(**sample_gray),
119 |                 "si_sample_kappa": py_sod_metrics.KappaHandler(**sample_gray),
120 |                 #
121 |                 "si_overall_fm": py_sod_metrics.FmeasureHandler(**overall_gray, beta=0.3),
122 |                 "si_overall_f1": py_sod_metrics.FmeasureHandler(**overall_gray, beta=1),
123 |                 "si_overall_pre": py_sod_metrics.PrecisionHandler(**overall_gray),
124 |                 "si_overall_rec": py_sod_metrics.RecallHandler(**overall_gray),
125 |                 "si_overall_fpr": py_sod_metrics.FPRHandler(**overall_gray),
126 |                 "si_overall_iou": py_sod_metrics.IOUHandler(**overall_gray),
127 |                 "si_overall_dice": py_sod_metrics.DICEHandler(**overall_gray),
128 |                 "si_overall_spec": py_sod_metrics.SpecificityHandler(**overall_gray),
129 |                 "si_overall_ber": py_sod_metrics.BERHandler(**overall_gray),
130 |                 "si_overall_oa": py_sod_metrics.OverallAccuracyHandler(**overall_gray),
131 |                 "si_overall_kappa": py_sod_metrics.KappaHandler(**overall_gray),
132 |                 # 二值化数据指标的特殊情况一：各个样本独立计算指标后取平均
133 |                 "si_sample_bifm": py_sod_metrics.FmeasureHandler(**sample_bin, beta=0.3),
134 |                 "si_sample_bif1": py_sod_metrics.FmeasureHandler(**sample_bin, beta=1),
135 |                 "si_sample_bipre": py_sod_metrics.PrecisionHandler(**sample_bin),
136 |                 "si_sample_birec": py_sod_metrics.RecallHandler(**sample_bin),
137 |                 "si_sample_bifpr": py_sod_metrics.FPRHandler(**sample_bin),
138 |                 "si_sample_biiou": py_sod_metrics.IOUHandler(**sample_bin),
139 |                 "si_sample_bidice": py_sod_metrics.DICEHandler(**sample_bin),
140 |                 "si_sample_bispec": py_sod_metrics.SpecificityHandler(**sample_bin),
141 |                 "si_sample_biber": py_sod_metrics.BERHandler(**sample_bin),
142 |                 "si_sample_bioa": py_sod_metrics.OverallAccuracyHandler(**sample_bin),
143 |                 "si_sample_bikappa": py_sod_metrics.KappaHandler(**sample_bin),
144 |                 # 二值化数据指标的特殊情况二：汇总所有样本的tp、fp、tn、fn后整体计算指标
145 |                 "si_overall_bifm": py_sod_metrics.FmeasureHandler(**overall_bin, beta=0.3),
146 |                 "si_overall_bif1": py_sod_metrics.FmeasureHandler(**overall_bin, beta=1),
147 |                 "si_overall_bipre": py_sod_metrics.PrecisionHandler(**overall_bin),
148 |                 "si_overall_birec": py_sod_metrics.RecallHandler(**overall_bin),
149 |                 "si_overall_bifpr": py_sod_metrics.FPRHandler(**overall_bin),
150 |                 "si_overall_biiou": py_sod_metrics.IOUHandler(**overall_bin),
151 |                 "si_overall_bidice": py_sod_metrics.DICEHandler(**overall_bin),
152 |                 "si_overall_bispec": py_sod_metrics.SpecificityHandler(**overall_bin),
153 |                 "si_overall_biber": py_sod_metrics.BERHandler(**overall_bin),
154 |                 "si_overall_bioa": py_sod_metrics.OverallAccuracyHandler(**overall_bin),
155 |                 "si_overall_bikappa": py_sod_metrics.KappaHandler(**overall_bin),
156 |             }
157 |         )
158 |         # fmt: on
159 | 
160 |         data_root = "./test_data"
161 |         mask_root = os.path.join(data_root, "masks")
162 |         pred_root = os.path.join(data_root, "preds")
163 |         mask_name_list = sorted(os.listdir(mask_root))
164 | 
165 |         for i, mask_name in enumerate(mask_name_list):
166 |             print(f"[{i}] Processing {mask_name}...")
167 |             mask_path = os.path.join(mask_root, mask_name)
168 |             pred_path = os.path.join(pred_root, mask_name)
169 |             mask = cv2.imread(mask_path, cv2.IMREAD_GRAYSCALE)
170 |             pred = cv2.imread(pred_path, cv2.IMREAD_GRAYSCALE)
171 | 
172 |             # random select an image from skimage.data
173 |             img = data.astronaut()
174 |             img = cv2.resize(img, dsize=(mask.shape[1], mask.shape[0]), interpolation=cv2.INTER_LINEAR)
175 | 
176 |             FM.step(pred=pred, gt=mask)
177 |             WFM.step(pred=pred, gt=mask)
178 |             SM.step(pred=pred, gt=mask)
179 |             EM.step(pred=pred, gt=mask)
180 |             MAE.step(pred=pred, gt=mask)
181 |             HCE.step(pred=pred, gt=mask)
182 |             MSIOU.step(pred=pred, gt=mask)
183 |             FMv2.step(pred=pred, gt=mask)
184 |             SI_MAE.step(pred=pred, gt=mask)
185 |             SI_FMv2.step(pred=pred, gt=mask)
186 |             CM.step(pred=pred, gt=mask)
187 |             CCM.step(pred=pred, gt=mask, img=img)
188 | 
189 |         fm = FM.get_results()["fm"]
190 |         wfm = WFM.get_results()["wfm"]
191 |         sm = SM.get_results()["sm"]
192 |         em = EM.get_results()["em"]
193 |         mae = MAE.get_results()["mae"]
194 |         hce = HCE.get_results()["hce"]
195 |         msiou = MSIOU.get_results()
196 |         fmv2 = FMv2.get_results()
197 |         si_mae = SI_MAE.get_results()["si_mae"]
198 |         si_fmv2 = SI_FMv2.get_results()
199 |         cm = CM.get_results()["cm"]
200 |         ccm = CCM.get_results()["ccm"]
201 | 
202 |         cls.curr_results = {
203 |             "MAE": mae,
204 |             "HCE": hce,
205 |             "Smeasure": sm,
206 |             "wFmeasure": wfm,
207 |             # "MSIOU": msiou,
208 |             "adpmsiou": msiou["adaptive"],
209 |             "meanmsiou": msiou["dynamic"].mean(),
210 |             "maxmsiou": msiou["dynamic"].max(),
211 |             "sample_bimsiou": msiou["binary"],
212 |             # E-measure for sod
213 |             "adpEm": em["adp"],
214 |             "meanEm": em["curve"].mean(),
215 |             "maxEm": em["curve"].max(),
216 |             # F-measure for sod
217 |             "adpFm": fm["adp"],
218 |             "meanFm": fm["curve"].mean(),
219 |             "maxFm": fm["curve"].max(),
220 |             # size-invariant
221 |             "si_mae": si_mae,
222 |             # context-measure
223 |             "cm": cm,
224 |             "ccm": ccm,
225 |         }
226 |         # fmt: off
227 |         base_metrics = ["fm", "f1", "pre", "rec", "fpr", "iou", "dice", "spec", "ber", "oa", "kappa"]
228 |         # fmt: on
229 |         for m_name in base_metrics:
230 |             si_sample_max, si_sample_mean = reduce_dynamic_results_for_max_avg(
231 |                 si_fmv2[f"si_sample_{m_name}"]["dynamic"]
232 |             )
233 |             cls.curr_results.update(
234 |                 {
235 |                     # general form
236 |                     f"adp{m_name}": fmv2[m_name]["adaptive"],
237 |                     f"mean{m_name}": fmv2[m_name]["dynamic"].mean(),
238 |                     f"max{m_name}": fmv2[m_name]["dynamic"].max(),
239 |                     f"sample_bi{m_name}": fmv2[f"sample_bi{m_name}"]["binary"],
240 |                     f"overall_bi{m_name}": fmv2[f"overall_bi{m_name}"]["binary"],
241 |                     # size-invariant
242 |                     # calculate the mean/maximum based on the average fm sequence across all targets from all samples.
243 |                     f"si_overall_mean{m_name}": si_fmv2[f"si_overall_{m_name}"]["dynamic"].mean(),
244 |                     f"si_overall_max{m_name}": si_fmv2[f"si_overall_{m_name}"]["dynamic"].max(),
245 |                     f"si_overall_bi{m_name}": si_fmv2[f"si_overall_bi{m_name}"]["binary"],
246 |                     # calculate the sample-specific mean/maximum based on the sample-based fm sequence with a shape of `(num_targets, 256)`.
247 |                     f"si_sample_mean{m_name}": si_sample_mean,
248 |                     f"si_sample_max{m_name}": si_sample_max,
249 |                     f"si_sample_adp{m_name}": si_fmv2[f"si_sample_{m_name}"]["adaptive"],
250 |                     f"si_sample_bi{m_name}": si_fmv2[f"si_sample_bi{m_name}"]["binary"],
251 |                 }
252 |             )
253 |         pr_pre = fmv2["pre"]["dynamic"]  # 256
254 |         pr_rec = fmv2["rec"]["dynamic"]  # 256
255 |         roc_fpr = fmv2["fpr"]["dynamic"]  # tpr is the same as recall
256 |         cls.curr_results["auc_pr"] = cal_auc(y=np.flip(pr_pre, -1), x=np.flip(pr_rec, -1))
257 |         cls.curr_results["auc_roc"] = cal_auc(y=np.flip(pr_rec, -1), x=np.flip(roc_fpr, -1))
258 | 
259 |         si_overall_pr_pre = si_fmv2["si_overall_pre"]["dynamic"]  # 256
260 |         si_overall_pr_rec = si_fmv2["si_overall_rec"]["dynamic"]  # 256
261 |         si_overall_roc_fpr = si_fmv2["si_overall_fpr"]["dynamic"]  # 256
262 |         cls.curr_results["si_overall_auc_pr"] = cal_auc(
263 |             y=np.flip(si_overall_pr_pre, -1), x=np.flip(si_overall_pr_rec, -1)
264 |         )
265 |         cls.curr_results["si_overall_auc_roc"] = cal_auc(
266 |             y=np.flip(si_overall_pr_rec, -1), x=np.flip(si_overall_roc_fpr, -1)
267 |         )
268 | 
269 |         si_sample_pr_pre = si_fmv2["si_sample_pre"]["dynamic"]  # Nx[T'x256]
270 |         si_sample_pr_rec = si_fmv2["si_sample_rec"]["dynamic"]  # Nx[T'x256]
271 |         si_sample_roc_fpr = si_fmv2["si_sample_fpr"]["dynamic"]  # Nx[T'x256]
272 |         cls.curr_results["si_sample_auc_pr"] = reduce_dynamic_results_for_auc(ys=si_sample_pr_pre, xs=si_sample_pr_rec)
273 |         cls.curr_results["si_sample_auc_roc"] = reduce_dynamic_results_for_auc(
274 |             ys=si_sample_pr_rec, xs=si_sample_roc_fpr
275 |         )
276 | 
277 |         print("Current results:")
278 |         pprint(cls.curr_results)
279 |         cls.default_results = default_results["v1_4_3"]  # 68
280 |         for append_version in [
281 |             "v1_5_0",  # 78+6 Size-Invariant Variants
282 |             "v1_5_1",  # 1 HCE
283 |             "v1_6_0",  # Context-Measure Series
284 |         ]:
285 |             if any([k in cls.default_results for k in default_results[append_version].keys()]):
286 |                 raise ValueError("Some keys will be overwritten by the SI variant results.")
287 |             cls.default_results.update(default_results[append_version])
288 | 
289 |     def test_sm(self):
290 |         self.assertEqual(self.curr_results["Smeasure"], self.default_results["Smeasure"])
291 | 
292 |     def test_wfm(self):
293 |         self.assertEqual(self.curr_results["wFmeasure"], self.default_results["wFmeasure"])
294 | 
295 |     def test_hce(self):
296 |         self.assertEqual(self.curr_results["HCE"], self.default_results["HCE"])
297 | 
298 |     def test_mae(self):
299 |         self.assertEqual(self.curr_results["MAE"], self.default_results["MAE"])
300 | 
301 |         self.assertEqual(self.curr_results["si_mae"], self.default_results["si_mae"])
302 | 
303 |     def test_msiou(self):
304 |         # self.assertEqual(self.curr_results["MSIOU"], self.default_results["MSIOU"])
305 |         self.assertEqual(self.curr_results["adpmsiou"], self.default_results["adpmsiou"])
306 |         self.assertEqual(self.curr_results["meanmsiou"], self.default_results["meanmsiou"])
307 |         self.assertEqual(self.curr_results["maxmsiou"], self.default_results["maxmsiou"])
308 |         self.assertEqual(self.curr_results["sample_bimsiou"], self.default_results["sample_bimsiou"])
309 | 
310 |     def test_fm(self):
311 |         self.assertEqual(self.curr_results["adpFm"], self.default_results["adpFm"])
312 |         self.assertEqual(self.curr_results["meanFm"], self.default_results["meanFm"])
313 |         self.assertEqual(self.curr_results["maxFm"], self.default_results["maxFm"])
314 | 
315 |         self.assertEqual(self.curr_results["adpfm"], self.default_results["adpfm"])
316 |         self.assertEqual(self.curr_results["meanfm"], self.default_results["meanfm"])
317 |         self.assertEqual(self.curr_results["maxfm"], self.default_results["maxfm"])
318 | 
319 |         # 对齐v1版本
320 |         self.assertEqual(self.curr_results["adpFm"], self.default_results["adpfm"])
321 |         self.assertEqual(self.curr_results["meanFm"], self.default_results["meanfm"])
322 |         self.assertEqual(self.curr_results["maxFm"], self.default_results["maxfm"])
323 | 
324 |         self.assertEqual(self.curr_results["sample_bifm"], self.default_results["sample_bifm"])
325 |         self.assertEqual(self.curr_results["overall_bifm"], self.default_results["overall_bifm"])
326 | 
327 |         self.assertEqual(self.curr_results["si_sample_adpfm"], self.default_results["si_sample_adpfm"])
328 |         self.assertEqual(self.curr_results["si_sample_meanfm"], self.default_results["si_sample_meanfm"])
329 |         self.assertEqual(self.curr_results["si_sample_maxfm"], self.default_results["si_sample_maxfm"])
330 |         self.assertEqual(self.curr_results["si_sample_bifm"], self.default_results["si_sample_bifm"])
331 |         self.assertEqual(self.curr_results["si_overall_meanfm"], self.default_results["si_overall_meanfm"])
332 |         self.assertEqual(self.curr_results["si_overall_maxfm"], self.default_results["si_overall_maxfm"])
333 |         self.assertEqual(self.curr_results["si_overall_bifm"], self.default_results["si_overall_bifm"])
334 | 
335 |     def test_em(self):
336 |         self.assertEqual(self.curr_results["adpEm"], self.default_results["adpEm"])
337 |         self.assertEqual(self.curr_results["meanEm"], self.default_results["meanEm"])
338 |         self.assertEqual(self.curr_results["maxEm"], self.default_results["maxEm"])
339 | 
340 |     def test_f1(self):
341 |         self.assertEqual(self.curr_results["adpf1"], self.default_results["adpf1"])
342 |         self.assertEqual(self.curr_results["meanf1"], self.default_results["meanf1"])
343 |         self.assertEqual(self.curr_results["maxf1"], self.default_results["maxf1"])
344 |         self.assertEqual(self.curr_results["sample_bif1"], self.default_results["sample_bif1"])
345 |         self.assertEqual(self.curr_results["overall_bif1"], self.default_results["overall_bif1"])
346 | 
347 |         self.assertEqual(self.curr_results["si_sample_adpf1"], self.default_results["si_sample_adpf1"])
348 |         self.assertEqual(self.curr_results["si_sample_meanf1"], self.default_results["si_sample_meanf1"])
349 |         self.assertEqual(self.curr_results["si_sample_maxf1"], self.default_results["si_sample_maxf1"])
350 |         self.assertEqual(self.curr_results["si_sample_bif1"], self.default_results["si_sample_bif1"])
351 |         self.assertEqual(self.curr_results["si_overall_meanf1"], self.default_results["si_overall_meanf1"])
352 |         self.assertEqual(self.curr_results["si_overall_maxf1"], self.default_results["si_overall_maxf1"])
353 |         self.assertEqual(self.curr_results["si_overall_bif1"], self.default_results["si_overall_bif1"])
354 | 
355 |     def test_pre(self):
356 |         self.assertEqual(self.curr_results["adppre"], self.default_results["adppre"])
357 |         self.assertEqual(self.curr_results["meanpre"], self.default_results["meanpre"])
358 |         self.assertEqual(self.curr_results["maxpre"], self.default_results["maxpre"])
359 |         self.assertEqual(self.curr_results["sample_bipre"], self.default_results["sample_bipre"])
360 |         self.assertEqual(self.curr_results["overall_bipre"], self.default_results["overall_bipre"])
361 | 
362 |         self.assertEqual(self.curr_results["si_sample_adppre"], self.default_results["si_sample_adppre"])
363 |         self.assertEqual(self.curr_results["si_sample_meanpre"], self.default_results["si_sample_meanpre"])
364 |         self.assertEqual(self.curr_results["si_sample_maxpre"], self.default_results["si_sample_maxpre"])
365 |         self.assertEqual(self.curr_results["si_sample_bipre"], self.default_results["si_sample_bipre"])
366 |         self.assertEqual(self.curr_results["si_overall_meanpre"], self.default_results["si_overall_meanpre"])
367 |         self.assertEqual(self.curr_results["si_overall_maxpre"], self.default_results["si_overall_maxpre"])
368 |         self.assertEqual(self.curr_results["si_overall_bipre"], self.default_results["si_overall_bipre"])
369 | 
370 |     def test_rec(self):
371 |         self.assertEqual(self.curr_results["adprec"], self.default_results["adprec"])
372 |         self.assertEqual(self.curr_results["meanrec"], self.default_results["meanrec"])
373 |         self.assertEqual(self.curr_results["maxrec"], self.default_results["maxrec"])
374 |         self.assertEqual(self.curr_results["sample_birec"], self.default_results["sample_birec"])
375 |         self.assertEqual(self.curr_results["overall_birec"], self.default_results["overall_birec"])
376 | 
377 |         self.assertEqual(self.curr_results["si_sample_adprec"], self.default_results["si_sample_adprec"])
378 |         self.assertEqual(self.curr_results["si_sample_meanrec"], self.default_results["si_sample_meanrec"])
379 |         self.assertEqual(self.curr_results["si_sample_maxrec"], self.default_results["si_sample_maxrec"])
380 |         self.assertEqual(self.curr_results["si_sample_birec"], self.default_results["si_sample_birec"])
381 |         self.assertEqual(self.curr_results["si_overall_meanrec"], self.default_results["si_overall_meanrec"])
382 |         self.assertEqual(self.curr_results["si_overall_maxrec"], self.default_results["si_overall_maxrec"])
383 |         self.assertEqual(self.curr_results["si_overall_birec"], self.default_results["si_overall_birec"])
384 | 
385 |     def test_fpr(self):
386 |         self.assertEqual(self.curr_results["adpfpr"], self.default_results["adpfpr"])
387 |         self.assertEqual(self.curr_results["meanfpr"], self.default_results["meanfpr"])
388 |         self.assertEqual(self.curr_results["maxfpr"], self.default_results["maxfpr"])
389 |         self.assertEqual(self.curr_results["sample_bifpr"], self.default_results["sample_bifpr"])
390 |         self.assertEqual(self.curr_results["overall_bifpr"], self.default_results["overall_bifpr"])
391 | 
392 |         self.assertEqual(self.curr_results["si_sample_adpfpr"], self.default_results["si_sample_adpfpr"])
393 |         self.assertEqual(self.curr_results["si_sample_meanfpr"], self.default_results["si_sample_meanfpr"])
394 |         self.assertEqual(self.curr_results["si_sample_maxfpr"], self.default_results["si_sample_maxfpr"])
395 |         self.assertEqual(self.curr_results["si_sample_bifpr"], self.default_results["si_sample_bifpr"])
396 |         self.assertEqual(self.curr_results["si_overall_meanfpr"], self.default_results["si_overall_meanfpr"])
397 |         self.assertEqual(self.curr_results["si_overall_maxfpr"], self.default_results["si_overall_maxfpr"])
398 |         self.assertEqual(self.curr_results["si_overall_bifpr"], self.default_results["si_overall_bifpr"])
399 | 
400 |     def test_iou(self):
401 |         self.assertEqual(self.curr_results["adpiou"], self.default_results["adpiou"])
402 |         self.assertEqual(self.curr_results["meaniou"], self.default_results["meaniou"])
403 |         self.assertEqual(self.curr_results["maxiou"], self.default_results["maxiou"])
404 |         self.assertEqual(self.curr_results["sample_biiou"], self.default_results["sample_biiou"])
405 |         self.assertEqual(self.curr_results["overall_biiou"], self.default_results["overall_biiou"])
406 | 
407 |         self.assertEqual(self.curr_results["si_sample_adpiou"], self.default_results["si_sample_adpiou"])
408 |         self.assertEqual(self.curr_results["si_sample_meaniou"], self.default_results["si_sample_meaniou"])
409 |         self.assertEqual(self.curr_results["si_sample_maxiou"], self.default_results["si_sample_maxiou"])
410 |         self.assertEqual(self.curr_results["si_sample_biiou"], self.default_results["si_sample_biiou"])
411 |         self.assertEqual(self.curr_results["si_overall_meaniou"], self.default_results["si_overall_meaniou"])
412 |         self.assertEqual(self.curr_results["si_overall_maxiou"], self.default_results["si_overall_maxiou"])
413 |         self.assertEqual(self.curr_results["si_overall_biiou"], self.default_results["si_overall_biiou"])
414 | 
415 |     def test_dice(self):
416 |         self.assertEqual(self.curr_results["adpdice"], self.default_results["adpdice"])
417 |         self.assertEqual(self.curr_results["meandice"], self.default_results["meandice"])
418 |         self.assertEqual(self.curr_results["maxdice"], self.default_results["maxdice"])
419 |         self.assertEqual(self.curr_results["sample_bidice"], self.default_results["sample_bidice"])
420 |         self.assertEqual(self.curr_results["overall_bidice"], self.default_results["overall_bidice"])
421 | 
422 |         self.assertEqual(self.curr_results["si_sample_adpdice"], self.default_results["si_sample_adpdice"])
423 |         self.assertEqual(self.curr_results["si_sample_meandice"], self.default_results["si_sample_meandice"])
424 |         self.assertEqual(self.curr_results["si_sample_maxdice"], self.default_results["si_sample_maxdice"])
425 |         self.assertEqual(self.curr_results["si_sample_bidice"], self.default_results["si_sample_bidice"])
426 |         self.assertEqual(self.curr_results["si_overall_meandice"], self.default_results["si_overall_meandice"])
427 |         self.assertEqual(self.curr_results["si_overall_maxdice"], self.default_results["si_overall_maxdice"])
428 |         self.assertEqual(self.curr_results["si_overall_bidice"], self.default_results["si_overall_bidice"])
429 | 
430 |     def test_spec(self):
431 |         self.assertEqual(self.curr_results["adpspec"], self.default_results["adpspec"])
432 |         self.assertEqual(self.curr_results["meanspec"], self.default_results["meanspec"])
433 |         self.assertEqual(self.curr_results["maxspec"], self.default_results["maxspec"])
434 |         self.assertEqual(self.curr_results["sample_bispec"], self.default_results["sample_bispec"])
435 |         self.assertEqual(self.curr_results["overall_bispec"], self.default_results["overall_bispec"])
436 | 
437 |         self.assertEqual(self.curr_results["si_sample_adpspec"], self.default_results["si_sample_adpspec"])
438 |         self.assertEqual(self.curr_results["si_sample_meanspec"], self.default_results["si_sample_meanspec"])
439 |         self.assertEqual(self.curr_results["si_sample_maxspec"], self.default_results["si_sample_maxspec"])
440 |         self.assertEqual(self.curr_results["si_sample_bispec"], self.default_results["si_sample_bispec"])
441 |         self.assertEqual(self.curr_results["si_overall_meanspec"], self.default_results["si_overall_meanspec"])
442 |         self.assertEqual(self.curr_results["si_overall_maxspec"], self.default_results["si_overall_maxspec"])
443 |         self.assertEqual(self.curr_results["si_overall_bispec"], self.default_results["si_overall_bispec"])
444 | 
445 |     def test_ber(self):
446 |         self.assertEqual(self.curr_results["adpber"], self.default_results["adpber"])
447 |         self.assertEqual(self.curr_results["meanber"], self.default_results["meanber"])
448 |         self.assertEqual(self.curr_results["maxber"], self.default_results["maxber"])
449 |         self.assertEqual(self.curr_results["sample_biber"], self.default_results["sample_biber"])
450 |         self.assertEqual(self.curr_results["overall_biber"], self.default_results["overall_biber"])
451 | 
452 |         self.assertEqual(self.curr_results["si_sample_adpber"], self.default_results["si_sample_adpber"])
453 |         self.assertEqual(self.curr_results["si_sample_meanber"], self.default_results["si_sample_meanber"])
454 |         self.assertEqual(self.curr_results["si_sample_maxber"], self.default_results["si_sample_maxber"])
455 |         self.assertEqual(self.curr_results["si_sample_biber"], self.default_results["si_sample_biber"])
456 |         self.assertEqual(self.curr_results["si_overall_meanber"], self.default_results["si_overall_meanber"])
457 |         self.assertEqual(self.curr_results["si_overall_maxber"], self.default_results["si_overall_maxber"])
458 |         self.assertEqual(self.curr_results["si_overall_biber"], self.default_results["si_overall_biber"])
459 | 
460 |     def test_oa(self):
461 |         self.assertEqual(self.curr_results["adpoa"], self.default_results["adpoa"])
462 |         self.assertEqual(self.curr_results["meanoa"], self.default_results["meanoa"])
463 |         self.assertEqual(self.curr_results["maxoa"], self.default_results["maxoa"])
464 |         self.assertEqual(self.curr_results["sample_bioa"], self.default_results["sample_bioa"])
465 |         self.assertEqual(self.curr_results["overall_bioa"], self.default_results["overall_bioa"])
466 | 
467 |         self.assertEqual(self.curr_results["si_sample_adpoa"], self.default_results["si_sample_adpoa"])
468 |         self.assertEqual(self.curr_results["si_sample_meanoa"], self.default_results["si_sample_meanoa"])
469 |         self.assertEqual(self.curr_results["si_sample_maxoa"], self.default_results["si_sample_maxoa"])
470 |         self.assertEqual(self.curr_results["si_sample_bioa"], self.default_results["si_sample_bioa"])
471 |         self.assertEqual(self.curr_results["si_overall_meanoa"], self.default_results["si_overall_meanoa"])
472 |         self.assertEqual(self.curr_results["si_overall_maxoa"], self.default_results["si_overall_maxoa"])
473 |         self.assertEqual(self.curr_results["si_overall_bioa"], self.default_results["si_overall_bioa"])
474 | 
475 |     def test_kappa(self):
476 |         self.assertEqual(self.curr_results["adpkappa"], self.default_results["adpkappa"])
477 |         self.assertEqual(self.curr_results["meankappa"], self.default_results["meankappa"])
478 |         self.assertEqual(self.curr_results["maxkappa"], self.default_results["maxkappa"])
479 |         self.assertEqual(self.curr_results["sample_bikappa"], self.default_results["sample_bikappa"])
480 |         self.assertEqual(self.curr_results["overall_bikappa"], self.default_results["overall_bikappa"])
481 | 
482 |         self.assertEqual(self.curr_results["si_sample_adpkappa"], self.default_results["si_sample_adpkappa"])
483 |         self.assertEqual(self.curr_results["si_sample_meankappa"], self.default_results["si_sample_meankappa"])
484 |         self.assertEqual(self.curr_results["si_sample_maxkappa"], self.default_results["si_sample_maxkappa"])
485 |         self.assertEqual(self.curr_results["si_sample_bikappa"], self.default_results["si_sample_bikappa"])
486 |         self.assertEqual(self.curr_results["si_overall_meankappa"], self.default_results["si_overall_meankappa"])
487 |         self.assertEqual(self.curr_results["si_overall_maxkappa"], self.default_results["si_overall_maxkappa"])
488 |         self.assertEqual(self.curr_results["si_overall_bikappa"], self.default_results["si_overall_bikappa"])
489 | 
490 |     def test_cm_series(self):
491 |         # Context-measure tests - only validate they run without errors
492 |         # since we don't have baseline values yet
493 |         self.assertEqual(self.curr_results["cm"], self.default_results["cm"])
494 |         self.assertEqual(self.curr_results["ccm"], self.default_results["ccm"])
495 | 
496 | 
497 | if __name__ == "__main__":
498 |     unittest.main()
499 | 


--------------------------------------------------------------------------------