├── images └── logo.png ├── examples ├── test_data │ ├── masks │ │ ├── 0001.png │ │ ├── 19.png │ │ └── aerial-1867541__340.png │ ├── preds │ │ ├── 0001.png │ │ ├── 19.png │ │ └── aerial-1867541__340.png │ └── readme.md ├── test_bimetrics.py ├── version_performance.json ├── metric_recorder.py └── test_metrics.py ├── requirements.txt ├── deploy ├── .gitignore ├── README.md ├── api.rst ├── installation.rst ├── conf.py ├── index.rst ├── metrics.rst └── usage.rst ├── CITATION.cff ├── py_sod_metrics ├── __init__.py ├── utils.py ├── multiscale_iou.py ├── size_invariance.py ├── context_measure.py └── fmeasurev2.py ├── .pre-commit-config.yaml ├── CHANGELOG.md ├── LICENSE ├── .github └── workflows │ ├── docs.yml │ ├── python-publish.yml │ └── README.md ├── .gitignore ├── pyproject.toml ├── readme_zh.md └── readme.md /images/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lartpang/PySODMetrics/HEAD/images/logo.png -------------------------------------------------------------------------------- /examples/test_data/masks/0001.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lartpang/PySODMetrics/HEAD/examples/test_data/masks/0001.png -------------------------------------------------------------------------------- /examples/test_data/masks/19.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lartpang/PySODMetrics/HEAD/examples/test_data/masks/19.png -------------------------------------------------------------------------------- /examples/test_data/preds/0001.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lartpang/PySODMetrics/HEAD/examples/test_data/preds/0001.png -------------------------------------------------------------------------------- /examples/test_data/preds/19.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lartpang/PySODMetrics/HEAD/examples/test_data/preds/19.png -------------------------------------------------------------------------------- /examples/test_data/readme.md: -------------------------------------------------------------------------------- 1 | # 数据来源 2 | 3 | * `aerial-1867541__340` SOC 纯背景mask 4 | * `0001.png` ECSSD 包含目标的mask 5 | * `19.png` Pascal-S 6 | -------------------------------------------------------------------------------- /examples/test_data/masks/aerial-1867541__340.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lartpang/PySODMetrics/HEAD/examples/test_data/masks/aerial-1867541__340.png -------------------------------------------------------------------------------- /examples/test_data/preds/aerial-1867541__340.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lartpang/PySODMetrics/HEAD/examples/test_data/preds/aerial-1867541__340.png -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | numpy>=1.18,<2.3.5 2 | scipy>=1.5,<2.0 3 | scikit-image>=0.19,<0.26 4 | scikit-learn>=1.0,<2.0 5 | opencv-python-headless>=4.7.0,<5.0.0 6 | -------------------------------------------------------------------------------- /deploy/.gitignore: -------------------------------------------------------------------------------- 1 | # Ignore Sphinx build artifacts 2 | _build/ 3 | _static/ 4 | _templates/ 5 | 6 | # Python cache 7 | __pycache__/ 8 | *.py[cod] 9 | *$py.class 10 | 11 | # Environment 12 | .doctrees/ 13 | -------------------------------------------------------------------------------- /CITATION.cff: -------------------------------------------------------------------------------- 1 | cff-version: 1.2.0 2 | message: "If you use this software, please cite it using these metadata." 3 | authors: 4 | - family-names: "Pang" 5 | given-names: "Youwei" 6 | date-released: 2020-11-21 7 | keywords: 8 | - metrics 9 | - metrics-reported 10 | - metrics-evaluation 11 | - metrics-library 12 | - salient-object-detection 13 | - camouflaged-object-detection 14 | - saliency-detection 15 | - saliency-methods 16 | license: MIT License 17 | title: "PySODMetrics" 18 | abstract: "A Simple and Efficient Implementation of Grayscale/Binary Segmentation Metrics" 19 | url: "https://github.com/lartpang/PySODMetrics" 20 | repository-code: "https://github.com/lartpang/PySODMetrics" 21 | version: v1.4.3 22 | -------------------------------------------------------------------------------- /py_sod_metrics/__init__.py: -------------------------------------------------------------------------------- 1 | from py_sod_metrics.context_measure import CamouflageContextMeasure, ContextMeasure 2 | from py_sod_metrics.fmeasurev2 import ( 3 | BERHandler, 4 | DICEHandler, 5 | FmeasureHandler, 6 | FmeasureV2, 7 | FPRHandler, 8 | IOUHandler, 9 | KappaHandler, 10 | OverallAccuracyHandler, 11 | PrecisionHandler, 12 | RecallHandler, 13 | SensitivityHandler, 14 | SpecificityHandler, 15 | TNRHandler, 16 | TPRHandler, 17 | ) 18 | from py_sod_metrics.multiscale_iou import MSIoU 19 | from py_sod_metrics.size_invariance import SizeInvarianceFmeasureV2, SizeInvarianceMAE 20 | from py_sod_metrics.sod_metrics import ( 21 | MAE, 22 | Emeasure, 23 | Fmeasure, 24 | HumanCorrectionEffortMeasure, 25 | Smeasure, 26 | WeightedFmeasure, 27 | ) 28 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | # See https://pre-commit.com for more information 2 | # See https://pre-commit.com/hooks.html for more hooks 3 | repos: 4 | - repo: https://github.com/pre-commit/pre-commit-hooks 5 | rev: v3.2.0 6 | hooks: 7 | - id: trailing-whitespace 8 | - id: end-of-file-fixer 9 | - id: check-yaml 10 | - id: check-toml 11 | - id: check-added-large-files 12 | - id: fix-encoding-pragma 13 | - id: mixed-line-ending 14 | 15 | - repo: https://github.com/astral-sh/ruff-pre-commit 16 | # Ruff version. 17 | rev: v0.11.8 18 | hooks: 19 | # Run the linter. 20 | - id: ruff 21 | types_or: [ python, pyi ] 22 | args: [ --fix ] 23 | # Run the formatter. 24 | - id: ruff-format 25 | types_or: [ python, pyi ] 26 | 27 | - repo: https://github.com/pycqa/isort 28 | rev: 5.6.4 29 | hooks: 30 | - id: isort 31 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # CHANGELOG 2 | 3 | ## [1.4.3] - 2025-5-8 4 | 5 | - Migrate to modern PyPI publishing by configuring `pyproject.toml` and `python-publish.yml`. 6 | - Update the formatter and linter tools to `ruff`. 7 | - Update the documentation information for the functions in `py_sod_metrics/fmeasurev2.py` and `py_sod_metrics/multiscale_iou.py`. 8 | - Optimize the code in `py_sod_metrics/multiscale_iou.py`. 9 | 10 | ## [1.4.3.1] - 2025-5-8 11 | 12 | - [FEATURE] Add `binary`, `dinamic`, and `adaptive` modes for `py_sod_metrics/multiscale_iou.py`. 13 | - [UPDATE] Update `examples/test_metrics.py` to support `binary`, `dinamic`, and `adaptive` modes of `MSIoU`. 14 | - [NOTE] The current implementation of the dynamic mode for `MSIoU` relies on the for loop, so it currently runs less efficiently. 15 | 16 | ## [1.4.4] - 2025-5-9 17 | 18 | - [FEATURE] Add `normalize` parameter to `py_sod_metrics/sod_metrics.py`. 19 | - [UPDATE] Update a unified function `validate_and_normalize_input` to validate and normalize the input data. 20 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 lartpang 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /deploy/README.md: -------------------------------------------------------------------------------- 1 | # PySODMetrics Documentation Deployment 2 | 3 | This directory contains the source files for building the PySODMetrics documentation. 4 | 5 | ## Quick Start 6 | 7 | Build the documentation: 8 | 9 | ```bash 10 | cd deploy 11 | sphinx-build -b html . ./_build 12 | ``` 13 | 14 | The built documentation will be placed in the `_build/` directory in the project root. 15 | 16 | ## Clean Build 17 | 18 | To clean the build directory and rebuild from scratch: 19 | 20 | ```bash 21 | cd deploy 22 | rm -rf ./_build 23 | sphinx-build -b html . ./_build 24 | ``` 25 | 26 | ## Directory Structure 27 | 28 | - `*.rst` - Documentation source files (reStructuredText) 29 | - `conf.py` - Sphinx configuration 30 | - `_static/` - Static files (CSS, images, etc.) 31 | 32 | ## Output 33 | 34 | - Built HTML documentation is output to: `./deploy/_build/` 35 | - This allows the `deploy/_build/` folder to be used directly for GitHub Pages 36 | 37 | ## Requirements 38 | 39 | Install documentation dependencies: 40 | 41 | ```bash 42 | pip install sphinx sphinx-rtd-theme 43 | ``` 44 | 45 | Or use the project's optional dependencies: 46 | 47 | ```bash 48 | pip install -e ".[docs]" 49 | ``` 50 | -------------------------------------------------------------------------------- /deploy/api.rst: -------------------------------------------------------------------------------- 1 | API Reference 2 | ============= 3 | 4 | This page provides the complete API documentation for PySODMetrics. 5 | 6 | .. contents:: Contents 7 | :local: 8 | :depth: 2 9 | 10 | Core Metrics Module 11 | ------------------- 12 | 13 | .. automodule:: py_sod_metrics.sod_metrics 14 | :members: 15 | :undoc-members: 16 | :show-inheritance: 17 | 18 | FmeasureV2 Module 19 | ----------------- 20 | 21 | .. automodule:: py_sod_metrics.fmeasurev2 22 | :members: 23 | :undoc-members: 24 | :show-inheritance: 25 | 26 | Context Measure Module 27 | ---------------------- 28 | 29 | .. automodule:: py_sod_metrics.context_measure 30 | :members: 31 | :undoc-members: 32 | :show-inheritance: 33 | 34 | Multi-Scale IoU Module 35 | ---------------------- 36 | 37 | .. automodule:: py_sod_metrics.multiscale_iou 38 | :members: 39 | :undoc-members: 40 | :show-inheritance: 41 | 42 | Size Invariance Module 43 | ----------------------- 44 | 45 | .. automodule:: py_sod_metrics.size_invariance 46 | :members: 47 | :undoc-members: 48 | :show-inheritance: 49 | 50 | Utility Functions 51 | ----------------- 52 | 53 | .. automodule:: py_sod_metrics.utils 54 | :members: 55 | :undoc-members: 56 | :show-inheritance: 57 | -------------------------------------------------------------------------------- /.github/workflows/docs.yml: -------------------------------------------------------------------------------- 1 | name: Build and Deploy Documentation 2 | 3 | on: 4 | push: 5 | branches: 6 | - main 7 | - master 8 | paths: 9 | - 'deploy/**' 10 | - 'py_sod_metrics/**' 11 | - '.github/workflows/docs.yml' 12 | workflow_dispatch: # Allow manual trigger 13 | 14 | permissions: 15 | contents: write 16 | pages: write 17 | id-token: write 18 | 19 | jobs: 20 | build-and-deploy: 21 | runs-on: ubuntu-latest 22 | 23 | steps: 24 | - name: Checkout repository 25 | uses: actions/checkout@v4 26 | with: 27 | fetch-depth: 0 # Fetch all history for proper git operations 28 | 29 | - name: Set up Python 30 | uses: actions/setup-python@v5 31 | with: 32 | python-version: '3.10' 33 | cache: 'pip' 34 | 35 | - name: Install dependencies 36 | run: | 37 | python -m pip install --upgrade pip 38 | pip install sphinx sphinx-rtd-theme 39 | pip install -e . 40 | 41 | - name: Build documentation 42 | run: | 43 | cd deploy 44 | sphinx-build -b html . ./_build 45 | 46 | - name: Add .nojekyll file 47 | run: | 48 | touch deploy/_build/.nojekyll 49 | 50 | - name: Deploy to GitHub Pages 51 | uses: peaceiris/actions-gh-pages@v3 52 | with: 53 | github_token: ${{ secrets.GITHUB_TOKEN }} 54 | publish_dir: ./deploy/_build 55 | publish_branch: gh-pages 56 | force_orphan: true 57 | user_name: 'github-actions[bot]' 58 | user_email: 'github-actions[bot]@users.noreply.github.com' 59 | commit_message: 'Deploy documentation from ${{ github.sha }}' 60 | -------------------------------------------------------------------------------- /.github/workflows/python-publish.yml: -------------------------------------------------------------------------------- 1 | name: Publish Python 🐍 distribution 📦 to PyPI 2 | 3 | # 触发条件:当手动创建Release时触发(包括草稿发布转正式发布) 4 | on: 5 | release: 6 | types: [created] 7 | 8 | jobs: 9 | build: 10 | name: Build distribution 📦 11 | runs-on: ubuntu-latest 12 | 13 | steps: 14 | # 1. 检出仓库代码 15 | - uses: actions/checkout@v4 16 | with: 17 | # 禁用凭据持久化(避免权限残留) 18 | persist-credentials: false 19 | 20 | # 2. 设置Python环境(使用最新的3.x版本) 21 | - name: Set up Python 22 | uses: actions/setup-python@v5 23 | with: 24 | python-version: "3.x" 25 | 26 | # 3. 安装Python构建工具 27 | - name: Install pypa/build 28 | run: python3 -m pip install build --user 29 | 30 | # 4. 构建二进制wheel和源代码tarball 31 | - name: Build a binary wheel and a source tarball 32 | run: python3 -m build 33 | 34 | # 5. 保存构建产物(dist目录下的文件) 35 | - name: Store the distribution packages 36 | uses: actions/upload-artifact@v4 37 | with: 38 | name: python-package-distributions 39 | path: dist/ 40 | retention-days: 1 # 1 天后删除 artifact 41 | 42 | publish-to-pypi: 43 | name: Publish Python 🐍 distribution 📦 to PyPI 44 | # 条件判断:仅当满足以下所有条件时执行 45 | # 1. 事件类型为Release创建 46 | # 2. 标签以refs/tags/v开头(即vX.Y.Z格式) 47 | # 3. 标签包含点号(确保版本分隔符存在) 48 | # 4. 排除包含连续点号的异常标签(如v1..2) 49 | if: startsWith(github.ref, 'refs/tags/v') && contains(github.ref, '.') 50 | needs: [build] # 依赖build作业的完成 51 | runs-on: ubuntu-latest 52 | # 环境配置 53 | environment: 54 | name: pypi 55 | url: https://pypi.org/p/pysodmetrics 56 | permissions: 57 | # 必须配置OIDC权限用于可信发布 58 | id-token: write 59 | 60 | steps: 61 | # 6. 下载之前构建阶段保存的产物 62 | - name: Download all the dists 63 | uses: actions/download-artifact@v4 64 | with: 65 | name: python-package-distributions 66 | path: dist/ 67 | # 7. 发布到PyPI 68 | - name: Publish distribution 📦 to PyPI 69 | uses: pypa/gh-action-pypi-publish@release/v1 -------------------------------------------------------------------------------- /deploy/installation.rst: -------------------------------------------------------------------------------- 1 | Installation 2 | ============ 3 | 4 | Requirements 5 | ------------ 6 | 7 | PySODMetrics requires Python 3.6 or higher and the following dependencies: 8 | 9 | * numpy >= 1.18, < 2.3.5 10 | * scipy >= 1.5, < 2.0 11 | * scikit-image >= 0.19, < 0.26 12 | * scikit-learn >= 1.0, < 2.0 13 | * opencv-python-headless >= 4.7.0, < 5.0.0 14 | 15 | Install from PyPI 16 | ----------------- 17 | 18 | The easiest way to install PySODMetrics is from PyPI using pip: 19 | 20 | .. code-block:: bash 21 | 22 | pip install pysodmetrics 23 | 24 | This is the **recommended and most stable** installation method. 25 | 26 | Install from Source 27 | ------------------- 28 | 29 | Installing from GitHub (Latest Version) 30 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 31 | 32 | To get the latest development version (which may include new features but could be less stable): 33 | 34 | .. code-block:: bash 35 | 36 | pip install git+https://github.com/lartpang/PySODMetrics.git 37 | 38 | Installing from Cloned Repository 39 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 40 | 41 | If you want to modify the code or contribute to the project: 42 | 43 | 1. Clone the repository: 44 | 45 | .. code-block:: bash 46 | 47 | git clone https://github.com/lartpang/PySODMetrics.git 48 | cd PySODMetrics 49 | 50 | 2. Install in development mode: 51 | 52 | .. code-block:: bash 53 | 54 | pip install -e . 55 | 56 | Building Documentation 57 | ---------------------- 58 | 59 | To build the documentation locally, you need to install the documentation dependencies: 60 | 61 | .. code-block:: bash 62 | 63 | pip install sphinx sphinx-rtd-theme 64 | 65 | Then build the documentation: 66 | 67 | .. code-block:: bash 68 | 69 | cd docs 70 | make html # On Linux/Mac 71 | # or 72 | make.bat html # On Windows 73 | 74 | The built documentation will be available in ``docs/_build/html/``. 75 | 76 | Verifying Installation 77 | ---------------------- 78 | 79 | To verify that PySODMetrics is installed correctly, open a Python interpreter and try: 80 | 81 | .. code-block:: python 82 | 83 | import py_sod_metrics 84 | from py_sod_metrics import MAE, Smeasure 85 | 86 | # If no errors occur, the installation was successful! 87 | print("PySODMetrics installed successfully!") 88 | 89 | You can also check the available classes: 90 | 91 | .. code-block:: python 92 | 93 | import py_sod_metrics 94 | print(dir(py_sod_metrics)) 95 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | .idea/ 7 | .vscode/ 8 | 9 | # C extensions 10 | *.so 11 | 12 | # Distribution / packaging 13 | .Python 14 | build/ 15 | develop-eggs/ 16 | dist/ 17 | downloads/ 18 | eggs/ 19 | .eggs/ 20 | lib/ 21 | lib64/ 22 | parts/ 23 | sdist/ 24 | var/ 25 | wheels/ 26 | pip-wheel-metadata/ 27 | share/python-wheels/ 28 | *.egg-info/ 29 | .installed.cfg 30 | *.egg 31 | MANIFEST 32 | 33 | # PyInstaller 34 | # Usually these files are written by a python script from a template 35 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 36 | *.manifest 37 | *.spec 38 | 39 | # Installer logs 40 | pip-log.txt 41 | pip-delete-this-directory.txt 42 | 43 | # Unit test / coverage reports 44 | htmlcov/ 45 | .tox/ 46 | .nox/ 47 | .coverage 48 | .coverage.* 49 | .cache 50 | nosetests.xml 51 | coverage.xml 52 | *.cover 53 | *.py,cover 54 | .hypothesis/ 55 | .pytest_cache/ 56 | 57 | # Translations 58 | *.mo 59 | *.pot 60 | 61 | # Django stuff: 62 | *.log 63 | local_settings.py 64 | db.sqlite3 65 | db.sqlite3-journal 66 | 67 | # Flask stuff: 68 | instance/ 69 | .webassets-cache 70 | 71 | # Scrapy stuff: 72 | .scrapy 73 | 74 | # Sphinx documentation 75 | docs/_build/ 76 | deploy/_build/ 77 | 78 | # PyBuilder 79 | target/ 80 | 81 | # Jupyter Notebook 82 | .ipynb_checkpoints 83 | 84 | # IPython 85 | profile_default/ 86 | ipython_config.py 87 | 88 | # pyenv 89 | .python-version 90 | 91 | # pipenv 92 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 93 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 94 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 95 | # install all needed dependencies. 96 | #Pipfile.lock 97 | 98 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 99 | __pypackages__/ 100 | 101 | # Celery stuff 102 | celerybeat-schedule 103 | celerybeat.pid 104 | 105 | # SageMath parsed files 106 | *.sage.py 107 | 108 | # Environments 109 | .env 110 | .venv 111 | env/ 112 | venv/ 113 | ENV/ 114 | env.bak/ 115 | venv.bak/ 116 | 117 | # Spyder project settings 118 | .spyderproject 119 | .spyproject 120 | 121 | # Rope project settings 122 | .ropeproject 123 | 124 | # mkdocs documentation 125 | /site 126 | 127 | # mypy 128 | .mypy_cache/ 129 | .dmypy.json 130 | dmypy.json 131 | 132 | # Pyre type checker 133 | .pyre/ 134 | -------------------------------------------------------------------------------- /deploy/conf.py: -------------------------------------------------------------------------------- 1 | # Configuration file for the Sphinx documentation builder. 2 | # 3 | # For the full list of built-in configuration values, see the documentation: 4 | # https://www.sphinx-doc.org/en/master/usage/configuration.html 5 | 6 | import os 7 | import sys 8 | 9 | # -- Path setup -------------------------------------------------------------- 10 | # If extensions (or modules to document with autodoc) are in another directory, 11 | # add these directories to sys.path here. 12 | sys.path.insert(0, os.path.abspath("..")) 13 | 14 | # -- Project information ----------------------------------------------------- 15 | # https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information 16 | 17 | project = "PySODMetrics" 18 | copyright = "2025, lartpang" 19 | author = "lartpang" 20 | release = "1.6.0" 21 | 22 | # -- General configuration --------------------------------------------------- 23 | # https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration 24 | 25 | extensions = [ 26 | "sphinx.ext.autodoc", 27 | "sphinx.ext.napoleon", 28 | "sphinx.ext.viewcode", 29 | "sphinx.ext.intersphinx", 30 | "sphinx.ext.mathjax", 31 | ] 32 | 33 | templates_path = ["_templates"] 34 | exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"] 35 | 36 | # -- Options for HTML output ------------------------------------------------- 37 | # https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output 38 | 39 | html_theme = "sphinx_rtd_theme" 40 | html_static_path = ["_static"] 41 | 42 | # -- Extension configuration ------------------------------------------------- 43 | 44 | # Napoleon settings 45 | napoleon_google_docstring = True 46 | napoleon_numpy_docstring = True 47 | napoleon_include_init_with_doc = True 48 | napoleon_include_private_with_doc = False 49 | napoleon_include_special_with_doc = True 50 | napoleon_use_admonition_for_examples = False 51 | napoleon_use_admonition_for_notes = False 52 | napoleon_use_admonition_for_references = False 53 | napoleon_use_ivar = False 54 | napoleon_use_param = True 55 | napoleon_use_rtype = True 56 | napoleon_preprocess_types = False 57 | napoleon_type_aliases = None 58 | napoleon_attr_annotations = True 59 | 60 | # Autodoc settings 61 | autodoc_default_options = { 62 | "members": True, 63 | "member-order": "bysource", 64 | "special-members": "__init__", 65 | "undoc-members": True, 66 | "exclude-members": "__weakref__", 67 | } 68 | 69 | # Intersphinx mapping 70 | intersphinx_mapping = { 71 | "python": ("https://docs.python.org/3", None), 72 | "numpy": ("https://numpy.org/doc/stable/", None), 73 | "scipy": ("https://docs.scipy.org/doc/scipy/", None), 74 | } 75 | -------------------------------------------------------------------------------- /deploy/index.rst: -------------------------------------------------------------------------------- 1 | PySODMetrics Documentation 2 | =========================== 3 | 4 | Welcome to PySODMetrics - A simple and efficient implementation of SOD metrics. 5 | 6 | .. image:: https://img.shields.io/pypi/v/pysodmetrics 7 | :target: https://pypi.org/project/pysodmetrics/ 8 | :alt: PyPI version 9 | 10 | .. image:: https://img.shields.io/pypi/dm/pysodmetrics?label=pypi%20downloads&logo=PyPI&logoColor=white 11 | :target: https://pypi.org/project/pysodmetrics/ 12 | :alt: Downloads 13 | 14 | Overview 15 | -------- 16 | 17 | PySODMetrics is a Python library that provides simple and efficient implementations of 18 | metrics for evaluating salient object detection (SOD), camouflaged object detection (COD), 19 | and medical image segmentation tasks. 20 | 21 | **Key Features:** 22 | 23 | * Based on numpy and scipy for fast computation 24 | * Verified against the original MATLAB implementations 25 | * Simple and extensible code structure 26 | * Lightweight and easy to use 27 | 28 | .. note:: 29 | Our exploration in this field continues with `PyIRSTDMetrics `_, 30 | a project born from the same core motivation. Think of them as twin initiatives: 31 | this project maps the landscape of current evaluation, while its sibling takes the next step 32 | to expand upon and rethink it. 33 | 34 | Contents 35 | -------- 36 | 37 | .. toctree:: 38 | :maxdepth: 2 39 | :caption: User Guide 40 | 41 | installation 42 | usage 43 | metrics 44 | 45 | .. toctree:: 46 | :maxdepth: 2 47 | :caption: API Reference 48 | 49 | api 50 | 51 | Supported Metrics 52 | ----------------- 53 | 54 | PySODMetrics supports a comprehensive set of evaluation metrics: 55 | 56 | * **MAE** - Mean Absolute Error 57 | * **S-measure** (:math:`S_m`) - Structure Measure 58 | * **E-measure** (:math:`E_m`) - Enhanced-alignment Measure 59 | * **F-measure** (:math:`F_\beta`) - Precision-Recall F-measure 60 | * **Weighted F-measure** (:math:`F^\omega_\beta`) 61 | * **Context-Measure** (:math:`C_\beta`, :math:`C^\omega_\beta`) 62 | * **Multi-Scale IoU** - Multi-scale Intersection over Union 63 | * **Human Correction Effort Measure** 64 | * And many more classification metrics (BER, Dice, Kappa, Precision, Recall, etc.) 65 | 66 | See :doc:`metrics` for detailed descriptions of all supported metrics. 67 | 68 | Indices and tables 69 | ================== 70 | 71 | * :ref:`genindex` 72 | * :ref:`modindex` 73 | * :ref:`search` 74 | 75 | Related Projects 76 | ================ 77 | 78 | * `PySODEvalToolkit `_ - A Python-based Evaluation Toolbox for Salient Object Detection and Camouflaged Object Detection 79 | 80 | Links 81 | ===== 82 | 83 | * **GitHub Repository:** https://github.com/lartpang/PySODMetrics 84 | * **PyPI Package:** https://pypi.org/project/pysodmetrics/ 85 | * **Issue Tracker:** https://github.com/lartpang/PySODMetrics/issues 86 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["setuptools >= 61.0"] 3 | build-backend = "setuptools.build_meta" 4 | 5 | [tool.setuptools] 6 | packages = ["py_sod_metrics"] # 直接指定包名 7 | 8 | [project] 9 | name = "pysodmetrics" 10 | version = "1.6.0" 11 | dependencies = [ 12 | "numpy>=1.18,<2.3.5", 13 | "scipy>=1.5,<2.0", 14 | "scikit-image>=0.19,<0.26", 15 | "scikit-learn>=1.0,<2.0", 16 | "opencv-python-headless>=4.7.0,<5.0.0", 17 | ] 18 | requires-python = ">=3.6" 19 | authors = [{ name = "lartpang", email = "lartpang@gmail.com" }] 20 | maintainers = [{ name = "lartpang", email = "lartpang@gmail.com" }] 21 | description = "A simple and efficient metric implementation for grayscale/binary image segmentation like salient object detection, camouflaged object detection, and medical image segmentation." 22 | readme = "readme.md" 23 | license = { file = "LICENSE" } 24 | keywords = [ 25 | "salient object detection", 26 | "camouflaged object detection", 27 | "medical image segmentation", 28 | "dichotomous image segmentation", 29 | "saliency detection", 30 | "metric", 31 | "deep learning", 32 | ] 33 | classifiers = [ 34 | "Development Status :: 5 - Production/Stable", 35 | "Environment :: Console", 36 | "Intended Audience :: Developers", 37 | "Operating System :: OS Independent", 38 | "Programming Language :: Python", 39 | "Topic :: Scientific/Engineering :: Artificial Intelligence", 40 | ] 41 | 42 | [project.urls] 43 | Homepage = "https://github.com/lartpang/PySODMetrics" 44 | Documentation = "https://github.com/lartpang/PySODMetrics" 45 | Repository = "https://github.com/lartpang/PySODMetrics.git" 46 | "Bug Tracker" = "https://github.com/lartpang/PySODMetrics/issues" 47 | Changelog = "https://github.com/lartpang/PySODMetrics/blob/master/CHANGELOG.md" 48 | 49 | [project.optional-dependencies] 50 | docs = [ 51 | "sphinx>=7.0.0", 52 | "sphinx-rtd-theme>=2.0.0", 53 | ] 54 | 55 | [tool.isort] 56 | # https://pycqa.github.io/isort/docs/configuration/options/ 57 | profile = "black" 58 | multi_line_output = 3 59 | filter_files = true 60 | supported_extensions = "py" 61 | 62 | [tool.black] 63 | line-length = 119 64 | include = '\.pyi?$' 65 | exclude = ''' 66 | /( 67 | \.eggs 68 | | \.git 69 | | \.idea 70 | | \.vscode 71 | | \.hg 72 | | \.mypy_cache 73 | | \.tox 74 | | \.venv 75 | | _build 76 | | buck-out 77 | | build 78 | | dist 79 | | output 80 | )/ 81 | ''' 82 | 83 | [tool.ruff] 84 | # Same as Black. 85 | line-length = 119 86 | indent-width = 4 87 | # Exclude a variety of commonly ignored directories. 88 | exclude = [ 89 | ".bzr", 90 | ".direnv", 91 | ".eggs", 92 | ".git", 93 | ".git-rewrite", 94 | ".hg", 95 | ".ipynb_checkpoints", 96 | ".mypy_cache", 97 | ".nox", 98 | ".pants.d", 99 | ".pyenv", 100 | ".pytest_cache", 101 | ".pytype", 102 | ".ruff_cache", 103 | ".svn", 104 | ".tox", 105 | ".venv", 106 | ".vscode", 107 | "__pypackages__", 108 | "_build", 109 | "buck-out", 110 | "build", 111 | "dist", 112 | "node_modules", 113 | "site-packages", 114 | "venv", 115 | ] 116 | [tool.ruff.format] 117 | # Like Black, use double quotes for strings. 118 | quote-style = "double" 119 | # Like Black, indent with spaces, rather than tabs. 120 | indent-style = "space" 121 | # Like Black, respect magic trailing commas. 122 | skip-magic-trailing-comma = false 123 | # Like Black, automatically detect the appropriate line ending. 124 | line-ending = "auto" 125 | -------------------------------------------------------------------------------- /.github/workflows/README.md: -------------------------------------------------------------------------------- 1 | # GitHub Actions 自动构建文档 2 | 3 | 本项目使用 GitHub Actions 自动构建和部署 Sphinx 文档到 GitHub Pages。 4 | 5 | ## 工作流程 6 | 7 | 当代码推送到主分支时,GitHub Actions 会自动: 8 | 9 | 1. 检出代码 10 | 2. 安装 Python 和依赖 11 | 3. 构建 Sphinx 文档 12 | 4. 部署到 `gh-pages` 分支 13 | 5. GitHub Pages 自动发布 14 | 15 | ## 触发条件 16 | 17 | 文档自动构建会在以下情况触发: 18 | 19 | - 推送到 `main` 或 `master` 分支 20 | - 修改了以下文件: 21 | - `deploy/` 目录中的任何文件(文档源) 22 | - `py_sod_metrics/` 目录中的任何文件(API 文档源) 23 | - `.github/workflows/docs.yml` 工作流文件 24 | 25 | ## 手动触发 26 | 27 | 如果需要手动触发文档构建: 28 | 29 | 1. 进入 GitHub 仓库页面 30 | 2. 点击 "Actions" 标签 31 | 3. 选择 "Build and Deploy Documentation" 工作流 32 | 4. 点击 "Run workflow" 按钮 33 | 34 | ## GitHub Pages 设置 35 | 36 | ### 首次设置 37 | 38 | 1. 进入仓库的 Settings → Pages 39 | 2. Source 选择: 40 | - **Branch**: `gh-pages` 41 | - **Folder**: `/ (root)` 42 | 3. 保存设置 43 | 44 | ### 访问文档 45 | 46 | 文档发布后,可以通过以下地址访问: 47 | 48 | ``` 49 | https://.github.io/PySODMetrics/ 50 | ``` 51 | 52 | ## 工作流文件 53 | 54 | 工作流文件位于:`.github/workflows/docs.yml` 55 | 56 | ### 主要步骤 57 | 58 | 1. **Checkout repository** - 检出代码 59 | 2. **Set up Python** - 安装 Python 3.10 60 | 3. **Install dependencies** - 安装 Sphinx 和主题 61 | 4. **Build documentation** - 构建 HTML 文档 62 | 5. **Deploy to GitHub Pages** - 部署到 gh-pages 分支 63 | 64 | ## 本地构建 vs 自动构建 65 | 66 | ### 本地构建 67 | 68 | 仍然可以在本地构建文档进行预览: 69 | 70 | ```bash 71 | cd deploy 72 | make html # Linux/Mac 73 | make.bat html # Windows 74 | ``` 75 | 76 | 本地构建的结果在 `docs/` 目录,但**不需要**提交这些文件到 Git。 77 | 78 | ### 自动构建 79 | 80 | GitHub Actions 会在云端自动构建,无需本地生成 HTML 文件。 81 | 82 | ## Git 工作流建议 83 | 84 | ### 方案 A:不提交 docs/ 目录(推荐) 85 | 86 | 将 `docs/` 添加到 `.gitignore`: 87 | 88 | ```gitignore 89 | # Build output (generated by GitHub Actions) 90 | docs/ 91 | ``` 92 | 93 | **优点**: 94 | - 仓库更干净 95 | - 避免不必要的文件冲突 96 | - 减小仓库大小 97 | 98 | **缺点**: 99 | - 必须依赖 GitHub Actions 100 | 101 | ### 方案 B:提交 docs/ 目录 102 | 103 | 保留 `docs/` 在 Git 中作为备份。 104 | 105 | **优点**: 106 | - 即使 Actions 失败也有备份 107 | - 可以查看文档历史 108 | 109 | **缺点**: 110 | - 每次推送都会有大量文件变更 111 | - 容易产生合并冲突 112 | 113 | ## 故障排查 114 | 115 | ### Actions 失败 116 | 117 | 如果 GitHub Actions 失败,检查: 118 | 119 | 1. **Permission 错误**:确保仓库设置中启用了 Actions 的写权限 120 | - Settings → Actions → General → Workflow permissions 121 | - 选择 "Read and write permissions" 122 | 123 | 2. **分支保护**:确保 `gh-pages` 分支没有保护规则阻止 Actions 124 | 125 | 3. **依赖安装失败**:检查 `pyproject.toml` 中的依赖是否正确 126 | 127 | ### Pages 未更新 128 | 129 | 如果文档未更新: 130 | 131 | 1. 检查 Actions 是否成功运行 132 | 2. 确认 GitHub Pages 设置正确(来源为 `gh-pages` 分支) 133 | 3. GitHub Pages 部署可能需要几分钟时间 134 | 135 | ## 自定义配置 136 | 137 | ### 修改触发条件 138 | 139 | 编辑 `.github/workflows/docs.yml`: 140 | 141 | ```yaml 142 | on: 143 | push: 144 | branches: 145 | - main # 修改为你的主分支名 146 | paths: 147 | - 'deploy/**' # 添加或删除路径 148 | ``` 149 | 150 | ### 修改 Python 版本 151 | 152 | ```yaml 153 | - name: Set up Python 154 | uses: actions/setup-python@v5 155 | with: 156 | python-version: '3.11' # 修改版本 157 | ``` 158 | 159 | ### 添加额外依赖 160 | 161 | ```yaml 162 | - name: Install dependencies 163 | run: | 164 | pip install sphinx sphinx-rtd-theme 165 | pip install your-extra-dependency 166 | pip install -e . 167 | ``` 168 | 169 | ## 监控构建状态 170 | 171 | 添加徽章到 README.md: 172 | 173 | ```markdown 174 | [![Documentation Status](https://github.com//PySODMetrics/workflows/Build%20and%20Deploy%20Documentation/badge.svg)](https://github.com//PySODMetrics/actions) 175 | ``` 176 | 177 | ## 注意事项 178 | 179 | - 确保 `deploy/conf.py` 中的配置正确 180 | - 文档构建警告不会导致 Actions 失败(除非是 ERROR) 181 | - `gh-pages` 分支会被自动创建和管理,无需手动操作 182 | - 每次成功构建会完全替换 `gh-pages` 分支的内容 183 | -------------------------------------------------------------------------------- /py_sod_metrics/utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | # the different implementation of epsilon (extreme min value) between numpy and matlab 4 | EPS = np.spacing(1) 5 | TYPE = np.float64 6 | 7 | 8 | def validate_and_normalize_input(pred: np.ndarray, gt: np.ndarray, normalize: bool = True): 9 | """Validate and optionally normalize prediction and ground truth inputs. 10 | 11 | This function ensures that prediction and ground truth arrays have compatible shapes and appropriate data types. When normalization is enabled, it converts inputs to the standard format required by the predefined metrics (pred in [0, 1] as float, gt as boolean). 12 | 13 | Args: 14 | pred (np.ndarray): Prediction array. If `normalize=True`, should be uint8 grayscale image (0-255). If `normalize=False`, should be float32/float64 in range [0, 1]. 15 | gt (np.ndarray): Ground truth array. If `normalize=True`, should be uint8 grayscale image (0-255). If `normalize=False`, should be boolean array. 16 | normalize (bool, optional): Whether to normalize the input data using prepare_data(). Defaults to True. 17 | 18 | Returns: 19 | tuple: A tuple containing: 20 | - pred (np.ndarray): Normalized prediction as float64 in range [0, 1]. 21 | - gt (np.ndarray): Normalized ground truth as boolean array. 22 | 23 | Raises: 24 | ValueError: If prediction and ground truth shapes don't match, or if prediction values are outside [0, 1] range when normalize=False. 25 | TypeError: If data types are invalid when normalize=False (pred must be float32/float64, gt must be boolean). 26 | """ 27 | # Validate input shapes 28 | if pred.shape != gt.shape: 29 | raise ValueError(f"Shape mismatch between prediction ({pred.shape}) and ground truth ({gt.shape})") 30 | 31 | # Handle normalization 32 | if normalize: 33 | pred, gt = prepare_data(pred, gt) 34 | else: 35 | # Validate prediction data type and range 36 | if pred.dtype not in (np.float32, np.float64): 37 | raise TypeError(f"Prediction array must be float32 or float64, got {pred.dtype}") 38 | if not (0 <= pred.min() and pred.max() <= 1): 39 | raise ValueError("Prediction values must be in range [0, 1]") 40 | # Validate ground truth type 41 | if gt.dtype != bool: 42 | raise TypeError(f"Ground truth must be boolean, got {gt.dtype}") 43 | 44 | return pred, gt 45 | 46 | 47 | def prepare_data(pred: np.ndarray, gt: np.ndarray) -> tuple: 48 | """Convert and normalize prediction and ground truth data. 49 | 50 | - For predictions, mimics MATLAB's `mapminmax(im2double(...))`. 51 | - For ground truth, applies binary thresholding at 128. 52 | 53 | Args: 54 | pred (np.ndarray): Prediction grayscale image, uint8 type with values in [0, 255]. 55 | gt (np.ndarray): Ground truth grayscale image, uint8 type with values in [0, 255]. 56 | 57 | Returns: 58 | tuple: A tuple containing: 59 | - pred (np.ndarray): Normalized prediction as float64 in range [0, 1]. 60 | - gt (np.ndarray): Binary ground truth as boolean array. 61 | """ 62 | gt = gt > 128 63 | # im2double, mapminmax 64 | pred = pred / 255 65 | if pred.max() != pred.min(): 66 | pred = (pred - pred.min()) / (pred.max() - pred.min()) 67 | return pred, gt 68 | 69 | 70 | def get_adaptive_threshold(matrix: np.ndarray, max_value: float = 1) -> float: 71 | """Return an adaptive threshold, which is equal to twice the mean of `matrix`. 72 | 73 | Args: 74 | matrix (np.ndarray): a data array 75 | max_value (float, optional): the upper limit of the threshold. Defaults to 1. 76 | 77 | Returns: 78 | float: `min(2 * matrix.mean(), max_value)` 79 | """ 80 | return min(2 * matrix.mean(), max_value) 81 | -------------------------------------------------------------------------------- /py_sod_metrics/multiscale_iou.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from scipy import ndimage 3 | 4 | from .utils import TYPE, get_adaptive_threshold, validate_and_normalize_input 5 | 6 | 7 | class MSIoU: 8 | """Multi-Scale Intersection over Union (MSIoU) metric. 9 | 10 | This implements the MSIoU metric which evaluates segmentation quality at multiple scales by comparing edge maps. It addresses the limitation of traditional IoU which struggles with fine structures in segmentation results. 11 | 12 | 13 | ``` 14 | @inproceedings{MSIoU, 15 | title = {Multiscale IOU: A Metric for Evaluation of Salient Object Detection with Fine Structures}, 16 | author = {Ahmadzadeh, Azim and Kempton, Dustin J. and Chen, Yang and Angryk, Rafal A.}, 17 | booktitle = ICIP, 18 | year = {2021}, 19 | } 20 | ``` 21 | """ 22 | 23 | def __init__(self, with_dynamic: bool, with_adaptive: bool, *, with_binary: bool = False, num_levels=10): 24 | """Initialize the MSIoU evaluator. 25 | 26 | Args: 27 | with_dynamic (bool, optional): Record dynamic results for max/avg/curve versions. 28 | with_adaptive (bool, optional): Record adaptive results for adp version. 29 | with_binary (bool, optional): Record binary results for binary version. 30 | """ 31 | self.dynamic_results = [] if with_dynamic else None 32 | self.adaptive_results = [] if with_adaptive else None 33 | self.binary_results = [] if with_binary else None 34 | 35 | # The values of this collection determines the resolutions based on which MIoU is computed. 36 | # It is set as the original implementation 37 | self.cell_sizes = np.power(2, np.linspace(0, 9, num=num_levels, dtype=int)) 38 | 39 | def get_edge(self, mask: np.ndarray): 40 | """Edge detection based on the `scipy.ndimage.sobel` function. 41 | 42 | :param mask: a binary mask of an object whose edges are of interest. 43 | :return: a binary mask of 1's as edges and 0's as background. 44 | """ 45 | sx = ndimage.sobel(mask, axis=0, mode="constant") 46 | sy = ndimage.sobel(mask, axis=1, mode="constant") 47 | sob = np.hypot(sx, sy) 48 | # sob[sob > 0] = 1 49 | return (sob > 0).astype(sob.dtype) 50 | 51 | def shrink_by_grid(self, image: np.ndarray, cell_size: int) -> np.ndarray: 52 | """Shrink the image by summing values within grid cells. 53 | 54 | Performs box-counting after applying zero padding if the image dimensions 55 | are not perfectly divisible by the cell size. 56 | 57 | :param image: The input binary image (edges). 58 | :param cell_size: The size of the grid cells. 59 | :return: A shrunk binary image where each pixel represents a grid cell. 60 | """ 61 | if cell_size <= 0: 62 | raise ValueError("Cell size must be a positive integer") 63 | 64 | if cell_size > 1: 65 | # Calculate padding sizes to make dimensions divisible by cell_size 66 | h, w = image.shape[:2] 67 | pad_h = (cell_size - h % cell_size) % cell_size 68 | pad_w = (cell_size - w % cell_size) % cell_size 69 | 70 | # Apply padding if necessary 71 | if pad_h > 0 or pad_w > 0: 72 | # Padding is added to the top and left edges. 73 | image = np.pad(image, ((pad_h, 0), (pad_w, 0)), mode="constant", constant_values=0) 74 | 75 | # Reshape and sum within each cell 76 | h, w = image.shape[:2] 77 | image = image.reshape(h // cell_size, cell_size, w // cell_size, cell_size) 78 | image = image.sum(axis=(1, 3)) 79 | # image[image > 0] = 1 80 | return (image > 0).astype(image.dtype) 81 | 82 | def multi_scale_iou(self, pred_edge: np.ndarray, gt_edge: np.ndarray) -> list: 83 | """Calculate Multi-Scale IoU. 84 | 85 | Args: 86 | pred_edge (np.ndarray): edge map of pred 87 | gt_edge (np.ndarray): edge map of gt 88 | 89 | Returns: 90 | list: ratios 91 | """ 92 | # Calculate IoU ratios at different scales 93 | ratios = [] 94 | for cell_size in self.cell_sizes: 95 | # Shrink both prediction and ground truth edges 96 | shrunk_pred_edge = self.shrink_by_grid(pred_edge, cell_size=cell_size) 97 | shrunk_gt_edge = self.shrink_by_grid(gt_edge, cell_size=cell_size) 98 | 99 | # Calculate IoU with smoothing to prevent division by zero 100 | numerator = np.logical_and(shrunk_pred_edge, shrunk_gt_edge).sum() + 1 101 | # Only consider ground truth for denominator 102 | denominator = shrunk_gt_edge.sum() + 1 103 | ratios.append(numerator / denominator) 104 | return ratios 105 | 106 | def binarizing(self, pred_bin: np.ndarray, gt_edge: np.ndarray) -> list: 107 | """Calculate Multi-Scale IoU based on dynamically thresholding. 108 | 109 | Args: 110 | pred_bin (np.ndarray): binarized pred 111 | gt_edge (np.ndarray): gt binarized by 128 112 | 113 | Returns: 114 | np.ndarray: areas under the curve 115 | """ 116 | pred_edge = self.get_edge(pred_bin) 117 | ratios = self.multi_scale_iou(pred_edge, gt_edge) # 10 118 | 119 | # Calculate area under the curve using trapezoidal rule 120 | return np.trapz(y=ratios, dx=1 / (len(self.cell_sizes) - 1)) 121 | 122 | def step(self, pred: np.ndarray, gt: np.ndarray, normalize: bool = True): 123 | """Calculate the Multi-Scale IoU for a single prediction-ground truth pair. 124 | 125 | This method first extracts edges from both prediction and ground truth, 126 | then computes IoU ratios at multiple scales defined by self.cell_sizes. 127 | Finally, it calculates the area under the curve of these ratios. 128 | 129 | Args: 130 | pred (np.ndarray): Prediction, gray scale image. 131 | gt (np.ndarray): Ground truth, gray scale image. 132 | normalize (bool, optional): Whether to normalize the input data. Defaults to True. 133 | 134 | Returns: 135 | The MSIoU score for the given pair (float between 0 and 1). 136 | """ 137 | pred, gt = validate_and_normalize_input(pred, gt, normalize) 138 | 139 | # Calculate MSIoU for this pair and store the result 140 | gt_edge = self.get_edge(gt) 141 | 142 | if self.dynamic_results is not None: 143 | results = [] 144 | _pred = (pred * 255).astype(np.uint8) 145 | for threshold in np.linspace(0, 256, 257): 146 | results.append(self.binarizing(_pred >= threshold, gt_edge)) 147 | # threshold_masks = pred[..., None] >= np.arange(0, 257)[None, None, :] 148 | self.dynamic_results.append(results) 149 | 150 | if self.adaptive_results is not None: 151 | adaptive_threshold = get_adaptive_threshold(pred, max_value=1) 152 | results = self.binarizing(pred >= adaptive_threshold, gt_edge) 153 | self.adaptive_results.append(results) 154 | 155 | if self.binary_results is not None: 156 | self.binary_results.append(self.binarizing(pred > 0.5, gt_edge)) 157 | 158 | def get_results(self) -> dict: 159 | """Return the results about MSIoU. 160 | 161 | Calculates the mean of all stored MSIoU values from previous calls to step(). 162 | 163 | :return: Dictionary with key 'msiou' and the mean MSIoU value. 164 | :raises: ValueError if no samples have been processed. 165 | """ 166 | results = {} 167 | if self.dynamic_results is not None: 168 | results["dynamic"] = np.mean(np.array(self.dynamic_results, dtype=TYPE), axis=0) 169 | if self.adaptive_results is not None: 170 | results["adaptive"] = np.mean(np.array(self.adaptive_results, dtype=TYPE)) 171 | if self.binary_results is not None: 172 | results["binary"] = np.mean(np.array(self.binary_results, dtype=TYPE)) 173 | return results 174 | -------------------------------------------------------------------------------- /examples/test_bimetrics.py: -------------------------------------------------------------------------------- 1 | import copy 2 | import logging 3 | import os 4 | import sys 5 | 6 | import cv2 7 | import numpy as np 8 | 9 | sys.path.append("..") 10 | import py_sod_metrics 11 | 12 | logging.basicConfig(level=logging.DEBUG) 13 | 14 | 15 | def compare_unnormalized(pred_files, mask_files): 16 | overall_bin = dict( 17 | with_adaptive=False, with_dynamic=False, with_binary=True, sample_based=False 18 | ) 19 | # single iteration 20 | sample_recorder = py_sod_metrics.FmeasureV2( 21 | metric_handlers={ 22 | # 二值化数据指标的特殊情况二:汇总所有样本的tp、fp、tn、fn后整体计算指标 23 | "overall_bifm": py_sod_metrics.FmeasureHandler(**overall_bin, beta=0.3), 24 | "overall_bif1": py_sod_metrics.FmeasureHandler(**overall_bin, beta=1), 25 | "overall_bipre": py_sod_metrics.PrecisionHandler(**overall_bin), 26 | "overall_birec": py_sod_metrics.RecallHandler(**overall_bin), 27 | "overall_bifpr": py_sod_metrics.FPRHandler(**overall_bin), 28 | "overall_biiou": py_sod_metrics.IOUHandler(**overall_bin), 29 | "overall_bidice": py_sod_metrics.DICEHandler(**overall_bin), 30 | "overall_bispec": py_sod_metrics.SpecificityHandler(**overall_bin), 31 | "overall_biber": py_sod_metrics.BERHandler(**overall_bin), 32 | "overall_bioa": py_sod_metrics.OverallAccuracyHandler(**overall_bin), 33 | "overall_bikappa": py_sod_metrics.KappaHandler(**overall_bin), 34 | } 35 | ) 36 | whole_recorder = copy.deepcopy(sample_recorder) 37 | 38 | base_h = base_w = 256 39 | 40 | preds = [] 41 | masks = [] 42 | for pred_path, mask_path in zip(pred_files, mask_files): 43 | pred = cv2.imread(pred_path, cv2.IMREAD_GRAYSCALE) 44 | assert pred is not None, pred_path 45 | pred = cv2.resize(pred, dsize=(base_w, base_h), interpolation=cv2.INTER_LINEAR) 46 | preds.append(pred) 47 | 48 | mask = cv2.imread(mask_path, cv2.IMREAD_GRAYSCALE) 49 | assert mask is not None, mask_path 50 | mask = cv2.resize(mask, dsize=(base_w, base_h), interpolation=cv2.INTER_LINEAR) 51 | masks.append(mask) 52 | 53 | pred = pred / 255 54 | mask = mask > 127 55 | sample_recorder.step(pred=pred, gt=mask, normalize=False) 56 | sample_results = sample_recorder.get_results() 57 | sample_info = { 58 | "overall_bifm": sample_results["overall_bifm"]["binary"], 59 | "overall_bipre": sample_results["overall_bipre"]["binary"], 60 | "overall_birec": sample_results["overall_birec"]["binary"], 61 | "overall_bifpr": sample_results["overall_bifpr"]["binary"], 62 | "overall_bidice": sample_results["overall_bidice"]["binary"], 63 | "overall_biiou": sample_results["overall_biiou"]["binary"], 64 | "overall_bif1": sample_results["overall_bif1"]["binary"], 65 | "overall_bispec": sample_results["overall_bispec"]["binary"], 66 | "overall_biber": sample_results["overall_biber"]["binary"], 67 | "overall_bioa": sample_results["overall_bioa"]["binary"], 68 | "overall_bikappa": sample_results["overall_bikappa"]["binary"], 69 | } 70 | 71 | preds = np.concatenate(preds, axis=-1) # H,n*W 72 | masks = np.concatenate(masks, axis=-1) 73 | preds = preds / 255 74 | masks = masks > 127 75 | whole_recorder.step(pred=preds, gt=masks, normalize=False) 76 | whole_results = whole_recorder.get_results() 77 | whole_info = { 78 | "overall_bifm": whole_results["overall_bifm"]["binary"], 79 | "overall_bipre": whole_results["overall_bipre"]["binary"], 80 | "overall_birec": whole_results["overall_birec"]["binary"], 81 | "overall_bifpr": whole_results["overall_bifpr"]["binary"], 82 | "overall_bidice": whole_results["overall_bidice"]["binary"], 83 | "overall_biiou": whole_results["overall_biiou"]["binary"], 84 | "overall_bif1": whole_results["overall_bif1"]["binary"], 85 | "overall_bispec": whole_results["overall_bispec"]["binary"], 86 | "overall_biber": whole_results["overall_biber"]["binary"], 87 | "overall_bioa": whole_results["overall_bioa"]["binary"], 88 | "overall_bikappa": whole_results["overall_bikappa"]["binary"], 89 | } 90 | 91 | for name, sample_value in sample_info.items(): 92 | whole_value = whole_info[name] 93 | # 此时的结果应该是一致的 94 | if sample_value == whole_value: 95 | logging.info(f"[normalized] {name} passed!") 96 | else: 97 | logging.warning( 98 | f"[normalized] {name} should be equal: {sample_value} vs {whole_value}" 99 | ) 100 | 101 | 102 | def compare_normalized(pred_files, mask_files): 103 | overall_bin = dict( 104 | with_adaptive=False, with_dynamic=False, with_binary=True, sample_based=False 105 | ) 106 | # single iteration 107 | sample_recorder = py_sod_metrics.FmeasureV2( 108 | metric_handlers={ 109 | # 二值化数据指标的特殊情况二:汇总所有样本的tp、fp、tn、fn后整体计算指标 110 | "overall_bifm": py_sod_metrics.FmeasureHandler(**overall_bin, beta=0.3), 111 | "overall_bif1": py_sod_metrics.FmeasureHandler(**overall_bin, beta=1), 112 | "overall_bipre": py_sod_metrics.PrecisionHandler(**overall_bin), 113 | "overall_birec": py_sod_metrics.RecallHandler(**overall_bin), 114 | "overall_bifpr": py_sod_metrics.FPRHandler(**overall_bin), 115 | "overall_biiou": py_sod_metrics.IOUHandler(**overall_bin), 116 | "overall_bidice": py_sod_metrics.DICEHandler(**overall_bin), 117 | "overall_bispec": py_sod_metrics.SpecificityHandler(**overall_bin), 118 | "overall_biber": py_sod_metrics.BERHandler(**overall_bin), 119 | "overall_bioa": py_sod_metrics.OverallAccuracyHandler(**overall_bin), 120 | "overall_bikappa": py_sod_metrics.KappaHandler(**overall_bin), 121 | } 122 | ) 123 | whole_recorder = copy.deepcopy(sample_recorder) 124 | 125 | base_h = base_w = 256 126 | 127 | preds = [] 128 | masks = [] 129 | for pred_path, mask_path in zip(pred_files, mask_files): 130 | pred = cv2.imread(pred_path, cv2.IMREAD_GRAYSCALE) 131 | assert pred is not None, pred_path 132 | pred = cv2.resize(pred, dsize=(base_w, base_h), interpolation=cv2.INTER_LINEAR) 133 | preds.append(pred) 134 | 135 | mask = cv2.imread(mask_path, cv2.IMREAD_GRAYSCALE) 136 | assert mask is not None, mask_path 137 | mask = cv2.resize(mask, dsize=(base_w, base_h), interpolation=cv2.INTER_LINEAR) 138 | masks.append(mask) 139 | 140 | sample_recorder.step(pred=pred, gt=mask, normalize=True) 141 | sample_results = sample_recorder.get_results() 142 | sample_info = { 143 | "overall_bifm": sample_results["overall_bifm"]["binary"], 144 | "overall_bipre": sample_results["overall_bipre"]["binary"], 145 | "overall_birec": sample_results["overall_birec"]["binary"], 146 | "overall_bifpr": sample_results["overall_bifpr"]["binary"], 147 | "overall_bidice": sample_results["overall_bidice"]["binary"], 148 | "overall_biiou": sample_results["overall_biiou"]["binary"], 149 | "overall_bif1": sample_results["overall_bif1"]["binary"], 150 | "overall_bispec": sample_results["overall_bispec"]["binary"], 151 | "overall_biber": sample_results["overall_biber"]["binary"], 152 | "overall_bioa": sample_results["overall_bioa"]["binary"], 153 | "overall_bikappa": sample_results["overall_bikappa"]["binary"], 154 | } 155 | 156 | preds = np.concatenate(preds, axis=-1) # H,n*W 157 | masks = np.concatenate(masks, axis=-1) 158 | whole_recorder.step(pred=preds, gt=masks, normalize=True) 159 | whole_results = whole_recorder.get_results() 160 | whole_info = { 161 | "overall_bifm": whole_results["overall_bifm"]["binary"], 162 | "overall_bipre": whole_results["overall_bipre"]["binary"], 163 | "overall_birec": whole_results["overall_birec"]["binary"], 164 | "overall_bifpr": whole_results["overall_bifpr"]["binary"], 165 | "overall_bidice": whole_results["overall_bidice"]["binary"], 166 | "overall_biiou": whole_results["overall_biiou"]["binary"], 167 | "overall_bif1": whole_results["overall_bif1"]["binary"], 168 | "overall_bispec": whole_results["overall_bispec"]["binary"], 169 | "overall_biber": whole_results["overall_biber"]["binary"], 170 | "overall_bioa": whole_results["overall_bioa"]["binary"], 171 | "overall_bikappa": whole_results["overall_bikappa"]["binary"], 172 | } 173 | 174 | for name, sample_value in sample_info.items(): 175 | whole_value = whole_info[name] 176 | # 此时的结果应该是不同的 177 | if sample_value == whole_value: 178 | logging.warning( 179 | f"[unnormalized] {name} should be not equal: {sample_value} vs {whole_value}" 180 | ) 181 | else: 182 | logging.info(f"[unnormalized] {name} passed!") 183 | 184 | 185 | def main(): 186 | pred_dir = "test_data/preds" 187 | mask_dir = "test_data/masks" 188 | pred_files = sorted([os.path.join(pred_dir, f) for f in os.listdir(pred_dir)]) 189 | mask_files = sorted([os.path.join(mask_dir, f) for f in os.listdir(mask_dir)]) 190 | compare_normalized(pred_files, mask_files) 191 | compare_unnormalized(pred_files, mask_files) 192 | 193 | 194 | if __name__ == "__main__": 195 | main() 196 | -------------------------------------------------------------------------------- /readme_zh.md: -------------------------------------------------------------------------------- 1 |
2 | Logo 3 |
4 |

PySODMetrics: 一份简单有效的SOD指标实现

5 | 6 | 7 | 8 | 9 | 10 | 11 |
12 | 13 | > [!important] 14 | > 15 | > 我们在这一领域的探索仍在继续,这一次是通过 [PyIRSTDMetrics](https://github.com/lartpang/PyIRSTDMetrics) —— 一个源于同样核心动机的项目。 16 | > 你可以把它们看作是一对“孪生”计划:这个项目旨在描绘当前评估的全景,而它的“兄弟”项目则更进一步,在此基础上加以拓展,并重新思考这一体系。 17 | > 我们很希望得到你的 Star! 🌟 18 | 19 | 20 | ## 介绍 21 | 22 | 一份简单有效的 SOD 指标实现。 23 | 24 | - 基于`numpy`和极少量`scipy.ndimage`代码 25 | - 基于 DengPing Fan 进行对比验证 26 | - 结构简单,易于扩展 27 | - 代码轻量且快速 28 | 29 | 欢迎您的改进和建议。 30 | 31 | ### 相关项目 32 | 33 | - [PySODEvalToolkit](https://github.com/lartpang/PySODEvalToolkit): A Python-based Evaluation Toolbox for Salient Object Detection and Camouflaged Object Detection 34 | 35 | ### 支持的指标 36 | 37 | | Metric | Sample-based | Whole-based | Related Class | 38 | | --------------------------------------------------- | ------------------------------------------- | ------------------------ | -------------------------------------------- | 39 | | MAE | soft,si-soft | | `MAE` | 40 | | S-measure $S_{m}$ | soft | | `Smeasure` | 41 | | weighted F-measure ($F^{\omega}_{\beta}$) | soft | | `WeightedFmeasure` | 42 | | Human Correction Effort Measure | soft | | `HumanCorrectionEffortMeasure` | 43 | | Context-Measure ($C_{\beta}$, $C^{\omega}_{\beta}$) | soft | | `ContextMeasure`, `CamouflageContextMeasure` | 44 | | Multi-Scale IoU | max,avg,adp,bin | | `MSIoU` | 45 | | E-measure ($E_{m}$) | max,avg,adp | | `Emeasure` | 46 | | F-measure (old) ($F_{\beta}$) | max,avg,adp | | `Fmeasure` (Will be removed!) | 47 | | F-measure (new) ($F_{\beta}$, $F_{1}$) | max,avg,adp,bin,si-max,si-avg,si-adp,si-bin | bin,si-max,si-avg,si-bin | `FmeasureV2`+`FmeasureHandler` | 48 | | BER | max,avg,adp,bin,si-max,si-avg,si-adp,si-bin | bin,si-max,si-avg,si-bin | `FmeasureV2`+`BERHandler` | 49 | | Dice | max,avg,adp,bin,si-max,si-avg,si-adp,si-bin | bin,si-max,si-avg,si-bin | `FmeasureV2`+`DICEHandler` | 50 | | FPR | max,avg,adp,bin,si-max,si-avg,si-adp,si-bin | bin,si-max,si-avg,si-bin | `FmeasureV2`+`FPRHandler` | 51 | | IoU | max,avg,adp,bin,si-max,si-avg,si-adp,si-bin | bin,si-max,si-avg,si-bin | `FmeasureV2`+`IOUHandler` | 52 | | Kappa | max,avg,adp,bin,si-max,si-avg,si-adp,si-bin | bin,si-max,si-avg,si-bin | `FmeasureV2`+`KappaHandler` | 53 | | Overall Accuracy | max,avg,adp,bin,si-max,si-avg,si-adp,si-bin | bin,si-max,si-avg,si-bin | `FmeasureV2`+`OverallAccuracyHandler` | 54 | | Precision | max,avg,adp,bin,si-max,si-avg,si-adp,si-bin | bin,si-max,si-avg,si-bin | `FmeasureV2`+`PrecisionHandler` | 55 | | Recall | max,avg,adp,bin,si-max,si-avg,si-adp,si-bin | bin,si-max,si-avg,si-bin | `FmeasureV2`+`RecallHandler` | 56 | | Sensitivity | max,avg,adp,bin,si-max,si-avg,si-adp,si-bin | bin,si-max,si-avg,si-bin | `FmeasureV2`+`SensitivityHandler` | 57 | | Specificity | max,avg,adp,bin,si-max,si-avg,si-adp,si-bin | bin,si-max,si-avg,si-bin | `FmeasureV2`+`SpecificityHandler` | 58 | | TNR | max,avg,adp,bin,si-max,si-avg,si-adp,si-bin | bin,si-max,si-avg,si-bin | `FmeasureV2`+`TNRHandler` | 59 | | TPR | max,avg,adp,bin,si-max,si-avg,si-adp,si-bin | bin,si-max,si-avg,si-bin | `FmeasureV2`+`TPRHandler` | 60 | 61 | **注意**: 62 | - 基于样本(Sample-based)的 `si-` 变体会根据形状为 `(num_targets, 256)` 的样本级 `fm` 序列,计算每个样本的均值/最大值。 63 | - 基于整体(Whole-based)的 `si-` 变体会在跨所有样本的所有目标的平均 `fm` 序列上计算均值/最大值。 64 | - 由于 `*adp` 变体是专门为 `sample-based` 计算设计的,因此不支持基于整体(whole-based)的计算方式。 65 | 66 | ## 使用 67 | 68 | 核心文件在文件夹 `py_sod_metrics` 中。 69 | 70 | - **[新,但可能不稳定]** 从源代码安装:`pip install git+https://github.com/lartpang/PySODMetrics.git` 71 | - **[更稳定一些]** 从 PyPI 安装:`pip install pysodmetrics` 72 | 73 | ### 示例 74 | 75 | - [examples/test_metrics.py](./examples/test_metrics.py) 76 | - [examples/metric_recorder.py](./examples/metric_recorder.py) 77 | 78 | ## 参考 79 | 80 | - [Matlab Code](https://github.com/DengPingFan/CODToolbox) by DengPingFan(): 在我们的测试中 (测试代码可见`test`文件夹下内容),结果与 Fan 的代码一致。 81 | - matlab 代码需要将 的`Bi_sal(sal>threshold)=1;`改为` Bi_sal(sal>=threshold)=1;`。细节可见 [相关讨论](https://github.com/DengPingFan/CODToolbox/issues/1)。 82 | - 2021-12-20 (Version `1.3.0`):由于 numpy 和 matlab 的不同,在 `1.2.x` 版本中,matlab 代码的结果与我们的结果在某些指标上存在非常细微的差异。[最近的 PR](https://github.com/lartpang/PySODMetrics/pull/3) 缓解了这个问题。但是,在 E-measure 上仍然存在非常小的差异。大多数论文中的结果都四舍五入到三四位有效数字,因此,新版本与“1.2.x”版本之间没有明显差异。 83 | - 84 | 85 | ```text 86 | @inproceedings{Fmeasure, 87 | title={Frequency-tuned salient region detection}, 88 | author={Achanta, Radhakrishna and Hemami, Sheila and Estrada, Francisco and S{\"u}sstrunk, Sabine}, 89 | booktitle=CVPR, 90 | number={CONF}, 91 | pages={1597--1604}, 92 | year={2009} 93 | } 94 | 95 | @inproceedings{MAE, 96 | title={Saliency filters: Contrast based filtering for salient region detection}, 97 | author={Perazzi, Federico and Kr{\"a}henb{\"u}hl, Philipp and Pritch, Yael and Hornung, Alexander}, 98 | booktitle=CVPR, 99 | pages={733--740}, 100 | year={2012} 101 | } 102 | 103 | @inproceedings{Smeasure, 104 | title={Structure-measure: A new way to evaluate foreground maps}, 105 | author={Fan, Deng-Ping and Cheng, Ming-Ming and Liu, Yun and Li, Tao and Borji, Ali}, 106 | booktitle=ICCV, 107 | pages={4548--4557}, 108 | year={2017} 109 | } 110 | 111 | @inproceedings{Emeasure, 112 | title="Enhanced-alignment Measure for Binary Foreground Map Evaluation", 113 | author="Deng-Ping {Fan} and Cheng {Gong} and Yang {Cao} and Bo {Ren} and Ming-Ming {Cheng} and Ali {Borji}", 114 | booktitle=IJCAI, 115 | pages="698--704", 116 | year={2018} 117 | } 118 | 119 | @inproceedings{wFmeasure, 120 | title={How to evaluate foreground maps?}, 121 | author={Margolin, Ran and Zelnik-Manor, Lihi and Tal, Ayellet}, 122 | booktitle=CVPR, 123 | pages={248--255}, 124 | year={2014} 125 | } 126 | 127 | @inproceedings{MSIoU, 128 | title = {Multiscale IOU: A Metric for Evaluation of Salient Object Detection with Fine Structures}, 129 | author = {Ahmadzadeh, Azim and Kempton, Dustin J. and Chen, Yang and Angryk, Rafal A.}, 130 | booktitle = ICIP, 131 | year = {2021}, 132 | } 133 | 134 | @inproceedings{SizeInvarianceVariants, 135 | title = {Size-invariance Matters: Rethinking Metrics and Losses for Imbalanced Multi-object Salient Object Detection}, 136 | author = {Feiran Li and Qianqian Xu and Shilong Bao and Zhiyong Yang and Runmin Cong and Xiaochun Cao and Qingming Huang}, 137 | booktitle = ICML, 138 | year = {2024} 139 | } 140 | 141 | @inproceedings{HumanCorrectionEffortMeasure, 142 | title = {Highly Accurate Dichotomous Image Segmentation}, 143 | author = {Xuebin Qin and Hang Dai and Xiaobin Hu and Deng-Ping Fan and Ling Shao and Luc Van Gool}, 144 | booktitle = ECCV, 145 | year = {2022} 146 | } 147 | 148 | @article{ContextMeasure, 149 | title={Context-measure: Contextualizing Metric for Camouflage}, 150 | author={Wang, Chen-Yang and Ji, Gepeng and Shao, Song and Cheng, Ming-Ming and Fan, Deng-Ping}, 151 | journal={arXiv preprint arXiv:2512.07076}, 152 | year={2025} 153 | } 154 | ``` 155 | -------------------------------------------------------------------------------- /readme.md: -------------------------------------------------------------------------------- 1 |
2 | Logo 3 |
4 |

PySODMetrics: A simple and efficient implementation of SOD metrics

5 | 6 | 7 | 8 | 9 | 10 | 11 |
12 | 13 | > [!important] 14 | > 15 | > Our exploration in this field continues with [PyIRSTDMetrics](https://github.com/lartpang/PyIRSTDMetrics), a project born from the same core motivation. 16 | > ​​Think of them as twin initiatives: this project maps the landscape of current evaluation, while its sibling takes the next step to expand upon and rethink it. 17 | > We'd love your star! 🌟 18 | 19 | ## Introduction 20 | 21 | A simple and efficient implementation of SOD metrics. 22 | 23 | - Based on `numpy` and `scipy` 24 | - Verification based on Fan's matlab code 25 | - The code structure is simple and easy to extend 26 | - The code is lightweight and fast 27 | 28 | Your improvements and suggestions are welcome. 29 | 30 | ### Related Projects 31 | 32 | - [PySODEvalToolkit](https://github.com/lartpang/PySODEvalToolkit): A Python-based Evaluation Toolbox for Salient Object Detection and Camouflaged Object Detection 33 | 34 | ### Supported Metrics 35 | 36 | | Metric | Sample-based | Whole-based | Related Class | 37 | | --------------------------------------------------- | ------------------------------------------- | ------------------------ | -------------------------------------------- | 38 | | MAE | soft,si-soft | | `MAE` | 39 | | S-measure $S_{m}$ | soft | | `Smeasure` | 40 | | weighted F-measure ($F^{\omega}_{\beta}$) | soft | | `WeightedFmeasure` | 41 | | Human Correction Effort Measure | soft | | `HumanCorrectionEffortMeasure` | 42 | | Context-Measure ($C_{\beta}$, $C^{\omega}_{\beta}$) | soft | | `ContextMeasure`, `CamouflageContextMeasure` | 43 | | Multi-Scale IoU | max,avg,adp,bin | | `MSIoU` | 44 | | E-measure ($E_{m}$) | max,avg,adp | | `Emeasure` | 45 | | F-measure (old) ($F_{\beta}$) | max,avg,adp | | `Fmeasure` (Will be removed!) | 46 | | F-measure (new) ($F_{\beta}$, $F_{1}$) | max,avg,adp,bin,si-max,si-avg,si-adp,si-bin | bin,si-max,si-avg,si-bin | `FmeasureV2`+`FmeasureHandler` | 47 | | BER | max,avg,adp,bin,si-max,si-avg,si-adp,si-bin | bin,si-max,si-avg,si-bin | `FmeasureV2`+`BERHandler` | 48 | | Dice | max,avg,adp,bin,si-max,si-avg,si-adp,si-bin | bin,si-max,si-avg,si-bin | `FmeasureV2`+`DICEHandler` | 49 | | FPR | max,avg,adp,bin,si-max,si-avg,si-adp,si-bin | bin,si-max,si-avg,si-bin | `FmeasureV2`+`FPRHandler` | 50 | | IoU | max,avg,adp,bin,si-max,si-avg,si-adp,si-bin | bin,si-max,si-avg,si-bin | `FmeasureV2`+`IOUHandler` | 51 | | Kappa | max,avg,adp,bin,si-max,si-avg,si-adp,si-bin | bin,si-max,si-avg,si-bin | `FmeasureV2`+`KappaHandler` | 52 | | Overall Accuracy | max,avg,adp,bin,si-max,si-avg,si-adp,si-bin | bin,si-max,si-avg,si-bin | `FmeasureV2`+`OverallAccuracyHandler` | 53 | | Precision | max,avg,adp,bin,si-max,si-avg,si-adp,si-bin | bin,si-max,si-avg,si-bin | `FmeasureV2`+`PrecisionHandler` | 54 | | Recall | max,avg,adp,bin,si-max,si-avg,si-adp,si-bin | bin,si-max,si-avg,si-bin | `FmeasureV2`+`RecallHandler` | 55 | | Sensitivity | max,avg,adp,bin,si-max,si-avg,si-adp,si-bin | bin,si-max,si-avg,si-bin | `FmeasureV2`+`SensitivityHandler` | 56 | | Specificity | max,avg,adp,bin,si-max,si-avg,si-adp,si-bin | bin,si-max,si-avg,si-bin | `FmeasureV2`+`SpecificityHandler` | 57 | | TNR | max,avg,adp,bin,si-max,si-avg,si-adp,si-bin | bin,si-max,si-avg,si-bin | `FmeasureV2`+`TNRHandler` | 58 | | TPR | max,avg,adp,bin,si-max,si-avg,si-adp,si-bin | bin,si-max,si-avg,si-bin | `FmeasureV2`+`TPRHandler` | 59 | 60 | **NOTE**: 61 | - Sample-based `si-` variants calculate the sample-specific mean/maximum based on the sample-based fm sequence with a shape of `(num_targets, 256)`. 62 | - Whole-based `si-` variants calculate the mean/maximum based on the average fm sequence across all targets from all samples. 63 | - Because the `*adp` variants are specialized for `sample-based` computation, they do not support whole-based computation. 64 | 65 | ## Usage 66 | 67 | The core files are in the folder `py_sod_metrics`. 68 | 69 | - **[Latest, but may be unstable]** Install from the source code: `pip install git+https://github.com/lartpang/PySODMetrics.git` 70 | - **[More stable]** Install from PyPI: `pip install pysodmetrics` 71 | 72 | ### Examples 73 | 74 | - [examples/test_metrics.py](./examples/test_metrics.py) 75 | - [examples/metric_recorder.py](./examples/metric_recorder.py) 76 | 77 | ## Reference 78 | 79 | - [Matlab Code](https://github.com/DengPingFan/CODToolbox) by DengPingFan(): In our comparison (the test code can be seen under the `test` folder), the result is consistent with the code. 80 | - The matlab code needs to change `Bi_sal(sal>threshold)=1;` to `Bi_sal(sal>=threshold)=1;` in . For related discussion, please see [the issue](https://github.com/DengPingFan/CODToolbox/issues/1). 81 | - 2021-12-20 (version `1.3.0`): Due to the difference between numpy and matlab, in version `1.2.x`, there are very slight differences on some metrics between the results of the matlab code and ours. The [recent PR](https://github.com/lartpang/PySODMetrics/pull/3) alleviated this problem. However, there are still very small differences on E-measure. The results in most papers are rounded off to three or four significant figures, so, there is no obvious difference between the new version and the version `1.2.x` for them. 82 | - 83 | 84 | ```text 85 | @inproceedings{Fmeasure, 86 | title={Frequency-tuned salient region detection}, 87 | author={Achanta, Radhakrishna and Hemami, Sheila and Estrada, Francisco and S{\"u}sstrunk, Sabine}, 88 | booktitle=CVPR, 89 | number={CONF}, 90 | pages={1597--1604}, 91 | year={2009} 92 | } 93 | 94 | @inproceedings{MAE, 95 | title={Saliency filters: Contrast based filtering for salient region detection}, 96 | author={Perazzi, Federico and Kr{\"a}henb{\"u}hl, Philipp and Pritch, Yael and Hornung, Alexander}, 97 | booktitle=CVPR, 98 | pages={733--740}, 99 | year={2012} 100 | } 101 | 102 | @inproceedings{Smeasure, 103 | title={Structure-measure: A new way to evaluate foreground maps}, 104 | author={Fan, Deng-Ping and Cheng, Ming-Ming and Liu, Yun and Li, Tao and Borji, Ali}, 105 | booktitle=ICCV, 106 | pages={4548--4557}, 107 | year={2017} 108 | } 109 | 110 | @inproceedings{Emeasure, 111 | title="Enhanced-alignment Measure for Binary Foreground Map Evaluation", 112 | author="Deng-Ping {Fan} and Cheng {Gong} and Yang {Cao} and Bo {Ren} and Ming-Ming {Cheng} and Ali {Borji}", 113 | booktitle=IJCAI, 114 | pages="698--704", 115 | year={2018} 116 | } 117 | 118 | @inproceedings{wFmeasure, 119 | title={How to evaluate foreground maps?}, 120 | author={Margolin, Ran and Zelnik-Manor, Lihi and Tal, Ayellet}, 121 | booktitle=CVPR, 122 | pages={248--255}, 123 | year={2014} 124 | } 125 | 126 | @inproceedings{MSIoU, 127 | title = {Multiscale IOU: A Metric for Evaluation of Salient Object Detection with Fine Structures}, 128 | author = {Ahmadzadeh, Azim and Kempton, Dustin J. and Chen, Yang and Angryk, Rafal A.}, 129 | booktitle = ICIP, 130 | year = {2021}, 131 | } 132 | 133 | @inproceedings{SizeInvarianceVariants, 134 | title = {Size-invariance Matters: Rethinking Metrics and Losses for Imbalanced Multi-object Salient Object Detection}, 135 | author = {Feiran Li and Qianqian Xu and Shilong Bao and Zhiyong Yang and Runmin Cong and Xiaochun Cao and Qingming Huang}, 136 | booktitle = ICML, 137 | year = {2024} 138 | } 139 | 140 | @inproceedings{HumanCorrectionEffortMeasure, 141 | title = {Highly Accurate Dichotomous Image Segmentation}, 142 | author = {Xuebin Qin and Hang Dai and Xiaobin Hu and Deng-Ping Fan and Ling Shao and Luc Van Gool}, 143 | booktitle = ECCV, 144 | year = {2022} 145 | } 146 | 147 | @article{ContextMeasure, 148 | title={Context-measure: Contextualizing Metric for Camouflage}, 149 | author={Wang, Chen-Yang and Ji, Gepeng and Shao, Song and Cheng, Ming-Ming and Fan, Deng-Ping}, 150 | journal={arXiv preprint arXiv:2512.07076}, 151 | year={2025} 152 | } 153 | ``` 154 | -------------------------------------------------------------------------------- /deploy/metrics.rst: -------------------------------------------------------------------------------- 1 | Supported Metrics 2 | ================= 3 | 4 | This page provides detailed information about all the metrics supported by PySODMetrics. 5 | 6 | Overview 7 | -------- 8 | 9 | PySODMetrics provides two types of metric computation: 10 | 11 | * **Sample-based**: Metrics are computed for each sample individually and then aggregated 12 | * **Whole-based**: Metrics are computed across all samples globally 13 | 14 | Most metrics support different aggregation strategies: 15 | 16 | * ``max``: Maximum value across all thresholds 17 | * ``avg``: Average value across all thresholds 18 | * ``adp``: Adaptive threshold (2 × mean of predictions) 19 | * ``bin``: Binary threshold (typically 0.5 or fixed threshold) 20 | * ``si-*``: Size-invariant variants for handling multi-scale objects 21 | 22 | Basic Metrics 23 | ------------- 24 | 25 | MAE (Mean Absolute Error) 26 | ~~~~~~~~~~~~~~~~~~~~~~~~~~ 27 | 28 | Measures the pixel-wise absolute difference between prediction and ground truth. 29 | 30 | .. math:: 31 | 32 | MAE = \frac{1}{W \times H} \sum_{x=1}^{W} \sum_{y=1}^{H} |P(x,y) - G(x,y)| 33 | 34 | where :math:`P` is the prediction, :math:`G` is the ground truth, and :math:`W \times H` is the image size. 35 | 36 | **Reference:** 37 | 38 | Perazzi et al., "Saliency filters: Contrast based filtering for salient region detection", CVPR 2012 39 | 40 | **Usage:** 41 | 42 | .. code-block:: python 43 | 44 | from py_sod_metrics import MAE 45 | 46 | mae = MAE() 47 | mae.step(pred, gt) 48 | results = mae.get_results() 49 | print(f"MAE: {results['mae']:.4f}") 50 | 51 | S-measure (Structure Measure) 52 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 53 | 54 | Evaluates structural similarity between prediction and ground truth, considering both region-aware and object-aware components. 55 | 56 | .. math:: 57 | 58 | S_m = \alpha \cdot S_o + (1 - \alpha) \cdot S_r 59 | 60 | where :math:`S_o` is the object-aware structural similarity and :math:`S_r` is the region-aware structural similarity. 61 | 62 | **Reference:** 63 | 64 | Fan et al., "Structure-measure: A new way to evaluate foreground maps", ICCV 2017 65 | 66 | **Usage:** 67 | 68 | .. code-block:: python 69 | 70 | from py_sod_metrics import Smeasure 71 | 72 | sm = Smeasure() 73 | sm.step(pred, gt) 74 | results = sm.get_results() 75 | print(f"S-measure: {results['sm']:.4f}") 76 | 77 | E-measure (Enhanced-alignment Measure) 78 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 79 | 80 | Captures both local and global matching information between prediction and ground truth. 81 | 82 | **Reference:** 83 | 84 | Fan et al., "Enhanced-alignment Measure for Binary Foreground Map Evaluation", IJCAI 2018 85 | 86 | **Usage:** 87 | 88 | .. code-block:: python 89 | 90 | from py_sod_metrics import Emeasure 91 | 92 | em = Emeasure() 93 | em.step(pred, gt) 94 | results = em.get_results() 95 | print(f"Max E-measure: {results['em']['adp']:.4f}") 96 | print(f"Avg E-measure: {results['em']['avg']:.4f}") 97 | 98 | F-measure 99 | ~~~~~~~~~ 100 | 101 | Harmonic mean of precision and recall. 102 | 103 | .. math:: 104 | 105 | F_\beta = \frac{(1 + \beta^2) \times Precision \times Recall}{\beta^2 \times Precision + Recall} 106 | 107 | **Reference:** 108 | 109 | Achanta et al., "Frequency-tuned salient region detection", CVPR 2009 110 | 111 | **Usage:** 112 | 113 | .. code-block:: python 114 | 115 | from py_sod_metrics import Fmeasure 116 | 117 | fm = Fmeasure() 118 | fm.step(pred, gt) 119 | results = fm.get_results() 120 | print(f"Max F-measure: {results['fm']['adp']:.4f}") 121 | 122 | Weighted F-measure 123 | ~~~~~~~~~~~~~~~~~~ 124 | 125 | A weighted version of F-measure that assigns different importance to different pixels based on their location. 126 | 127 | **Reference:** 128 | 129 | Margolin et al., "How to evaluate foreground maps?", CVPR 2014 130 | 131 | **Usage:** 132 | 133 | .. code-block:: python 134 | 135 | from py_sod_metrics import WeightedFmeasure 136 | 137 | wfm = WeightedFmeasure() 138 | wfm.step(pred, gt) 139 | results = wfm.get_results() 140 | print(f"Weighted F-measure: {results['wfm']:.4f}") 141 | 142 | Advanced Metrics 143 | ---------------- 144 | 145 | FmeasureV2 Framework 146 | ~~~~~~~~~~~~~~~~~~~~ 147 | 148 | A flexible framework for computing multiple binary classification metrics using different handlers. 149 | 150 | **Supported Handlers:** 151 | 152 | * ``FmeasureHandler``: F-measure with configurable β 153 | * ``PrecisionHandler``: Precision (Positive Predictive Value) 154 | * ``RecallHandler``: Recall (Sensitivity, TPR) 155 | * ``IOUHandler``: Intersection over Union 156 | * ``DICEHandler``: Dice coefficient 157 | * ``BERHandler``: Balanced Error Rate 158 | * ``KappaHandler``: Cohen's Kappa 159 | * ``OverallAccuracyHandler``: Overall classification accuracy 160 | * ``SpecificityHandler``: Specificity (TNR) 161 | * ``SensitivityHandler``: Sensitivity (same as Recall) 162 | * ``FPRHandler``: False Positive Rate 163 | * ``TNRHandler``: True Negative Rate 164 | * ``TPRHandler``: True Positive Rate 165 | 166 | **Usage:** 167 | 168 | .. code-block:: python 169 | 170 | from py_sod_metrics import FmeasureV2, FmeasureHandler, IOUHandler 171 | 172 | fm_v2 = FmeasureV2( 173 | handlers={ 174 | "fm": FmeasureHandler(beta=0.3), 175 | "iou": IOUHandler(), 176 | } 177 | ) 178 | 179 | fm_v2.step(pred, gt) 180 | results = fm_v2.get_results() 181 | 182 | Context-Measure 183 | ~~~~~~~~~~~~~~~ 184 | 185 | Designed specifically for camouflaged object detection, considering contextual information. 186 | 187 | **Reference:** 188 | 189 | Wang et al., "Context-measure: Contextualizing Metric for Camouflage", arXiv 2025 190 | 191 | **Variants:** 192 | 193 | * ``ContextMeasure``: Standard context measure :math:`C_\beta` 194 | * ``CamouflageContextMeasure``: Weighted context measure :math:`C^\omega_\beta` 195 | 196 | **Usage:** 197 | 198 | .. code-block:: python 199 | 200 | from py_sod_metrics import ContextMeasure, CamouflageContextMeasure 201 | 202 | cm = ContextMeasure() 203 | ccm = CamouflageContextMeasure() 204 | 205 | cm.step(pred, gt) 206 | ccm.step(pred, gt) 207 | 208 | Multi-Scale IoU (MSIoU) 209 | ~~~~~~~~~~~~~~~~~~~~~~~ 210 | 211 | Evaluates segmentation quality across multiple scales, particularly useful for fine structures. 212 | 213 | **Reference:** 214 | 215 | Ahmadzadeh et al., "Multiscale IOU: A Metric for Evaluation of Salient Object Detection with Fine Structures", ICIP 2021 216 | 217 | **Usage:** 218 | 219 | .. code-block:: python 220 | 221 | from py_sod_metrics import MSIoU 222 | 223 | msiou = MSIoU() 224 | msiou.step(pred, gt) 225 | results = msiou.get_results() 226 | 227 | Human Correction Effort Measure 228 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 229 | 230 | Estimates the effort required for humans to correct prediction errors. 231 | 232 | **Reference:** 233 | 234 | Qin et al., "Highly Accurate Dichotomous Image Segmentation", ECCV 2022 235 | 236 | **Usage:** 237 | 238 | .. code-block:: python 239 | 240 | from py_sod_metrics import HumanCorrectionEffortMeasure 241 | 242 | hcem = HumanCorrectionEffortMeasure() 243 | hcem.step(pred, gt) 244 | results = hcem.get_results() 245 | 246 | Size-Invariant Metrics 247 | ---------------------- 248 | 249 | For datasets with objects at multiple scales, size-invariant variants provide more balanced evaluation. 250 | 251 | Size-Invariant F-measure 252 | ~~~~~~~~~~~~~~~~~~~~~~~~~ 253 | 254 | **Reference:** 255 | 256 | Li et al., "Size-invariance Matters: Rethinking Metrics and Losses for Imbalanced Multi-object Salient Object Detection", ICML 2024 257 | 258 | **Usage:** 259 | 260 | .. code-block:: python 261 | 262 | from py_sod_metrics import SizeInvarianceFmeasureV2 263 | 264 | si_fm = SizeInvarianceFmeasureV2() 265 | si_fm.step(pred, gt) 266 | results = si_fm.get_results() 267 | print(f"SI F-measure (avg): {results['fm']['si-avg']:.4f}") 268 | 269 | Size-Invariant MAE 270 | ~~~~~~~~~~~~~~~~~~ 271 | 272 | **Usage:** 273 | 274 | .. code-block:: python 275 | 276 | from py_sod_metrics import SizeInvarianceMAE 277 | 278 | si_mae = SizeInvarianceMAE() 279 | si_mae.step(pred, gt) 280 | results = si_mae.get_results() 281 | 282 | Metric Comparison Table 283 | ----------------------- 284 | 285 | +--------------------------------------------------+--------------------+------------------+ 286 | | Metric | Sample-based | Whole-based | 287 | +==================================================+====================+==================+ 288 | | MAE | soft, si-soft | — | 289 | +--------------------------------------------------+--------------------+------------------+ 290 | | S-measure | soft | — | 291 | +--------------------------------------------------+--------------------+------------------+ 292 | | Weighted F-measure | soft | — | 293 | +--------------------------------------------------+--------------------+------------------+ 294 | | Human Correction Effort | soft | — | 295 | +--------------------------------------------------+--------------------+------------------+ 296 | | Context-Measure | soft | — | 297 | +--------------------------------------------------+--------------------+------------------+ 298 | | Multi-Scale IoU | max,avg,adp,bin | — | 299 | +--------------------------------------------------+--------------------+------------------+ 300 | | E-measure | max,avg,adp | — | 301 | +--------------------------------------------------+--------------------+------------------+ 302 | | F-measure (V2) | max,avg,adp,bin,si | bin,si | 303 | +--------------------------------------------------+--------------------+------------------+ 304 | | BER, Dice, IoU, Precision, Recall, etc. | max,avg,adp,bin,si | bin,si | 305 | +--------------------------------------------------+--------------------+------------------+ 306 | 307 | Notes 308 | ----- 309 | 310 | * **soft**: Metrics that work directly on continuous prediction values 311 | * **si-**: Size-invariant variants that normalize by object size 312 | * **adp**: Adaptive thresholding based on prediction statistics 313 | * For detailed mathematical formulations, please refer to the original papers cited above 314 | -------------------------------------------------------------------------------- /py_sod_metrics/size_invariance.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from skimage import measure 3 | 4 | from .fmeasurev2 import FmeasureV2 5 | from .sod_metrics import MAE 6 | from .utils import TYPE, validate_and_normalize_input 7 | 8 | 9 | def parse_connected_components(mask: np.ndarray, area_threshold: float = 50) -> tuple: 10 | """Find the connected components in a binary mask. 11 | 12 | 1. If there are no connected components, return an empty list. 13 | 2. If all the connected components are smaller than the area_threshold, we will return the largest one. 14 | 15 | Args: 16 | mask (np.ndarray): binary mask 17 | area_threshold (float): The threshold for the area of the connected components. 18 | 19 | Returns: 20 | tuple: max_valid_tgt_idx, valid_labeled_mask 21 | """ 22 | labeled_tgts = measure.label(mask, connectivity=1, background=0, return_num=False) 23 | tgt_props = measure.regionprops(labeled_tgts) 24 | 25 | # find the valid targets based on the target size 26 | tgts_with_max_size = [] 27 | max_valid_tgt_idx = 0 # 0 is background 28 | valid_labeled_mask = np.zeros_like(mask, dtype=int) 29 | for tgt_prop in tgt_props: 30 | if tgts_with_max_size is not None or tgts_with_max_size[0].area == tgt_prop.area: 31 | tgts_with_max_size.append(tgt_prop) 32 | elif tgts_with_max_size[0].area < tgt_prop.area: 33 | tgts_with_max_size = [tgt_prop] 34 | 35 | if tgt_prop.area >= area_threshold: # valid indices start from 1 36 | max_valid_tgt_idx += 1 37 | valid_labeled_mask[labeled_tgts == tgt_prop.label] = max_valid_tgt_idx 38 | 39 | if max_valid_tgt_idx == 0: # no valid targets 40 | for tgt_prop in tgts_with_max_size: 41 | max_valid_tgt_idx += 1 42 | valid_labeled_mask[labeled_tgts == tgt_prop.label] = max_valid_tgt_idx 43 | return max_valid_tgt_idx, valid_labeled_mask 44 | 45 | 46 | def encode_bboxwise_tgts_bitwise(max_valid_tgt_idx: int, valid_labeled_mask: np.ndarray) -> np.ndarray: 47 | """Encode each target bbox region with a bitwise mask. 48 | 49 | Args: 50 | max_valid_tgt_idx (int): The maximum index of the valid targets. 51 | valid_labeled_mask (np.ndarray): The mask of the valid targets. 0 is background. 52 | 53 | Returns: 54 | np.ndarray: The size weight for the bbox of each target. 55 | """ 56 | binarized_weights = np.zeros_like(valid_labeled_mask, dtype=float) 57 | for label in range(max_valid_tgt_idx + 1): # 0 is background 58 | rows, cols = np.where(valid_labeled_mask == label) 59 | assert len(rows) * len(cols) > 0, ( 60 | f"connected_block_size = 0 when label = {label} for {np.unique(valid_labeled_mask)}!" 61 | ) 62 | 63 | xmin, xmax = min(cols), max(cols) 64 | ymin, ymax = min(rows), max(rows) 65 | 66 | # This encoding scheme can encode overlaping multiple targets in different bits. 67 | weight = 0 if label == 0 else 1 << (label - 1) # 0,1,2,4,8,... 68 | binarized_weights[ymin : (ymax + 1), xmin : (xmax + 1)] += weight 69 | return binarized_weights 70 | 71 | 72 | def get_kth_bit(n: np.ndarray, k: int) -> np.ndarray: 73 | """Get the value (0 or 1) in the k-th bit of each element in the array. 74 | 75 | Args: 76 | n (np.ndarray): The original data array. 77 | k (int): The index of the bit to extract. 78 | 79 | Returns: 80 | np.ndarray: The extracted data array. Only the output of the kth bit which is not 0 equals 1. 81 | """ 82 | n = n.astype(int) 83 | k = int(k) 84 | 85 | # Use bitwise AND to check if the k-th bit is set 86 | return (n & (1 << (k - 1))) >> (k - 1) 87 | 88 | 89 | class SizeInvarianceFmeasureV2(FmeasureV2): 90 | """Size invariance version of FmeasureV2. 91 | 92 | This provides size-invariant versions of standard SOD metrics that address the imbalance problem in multi-object salient object detection. Traditional metrics can be biased toward larger objects, while size-invariant metrics ensure fair evaluation across objects of different sizes. 93 | 94 | ``` 95 | @inproceedings{SizeInvarianceVariants, 96 | title = {Size-invariance Matters: Rethinking Metrics and Losses for Imbalanced Multi-object Salient Object Detection}, 97 | author = {Feiran Li and Qianqian Xu and Shilong Bao and Zhiyong Yang and Runmin Cong and Xiaochun Cao and Qingming Huang}, 98 | booktitle = ICML, 99 | year = {2024} 100 | } 101 | ``` 102 | """ 103 | 104 | def _update_metrics(self, pred: np.ndarray, gt: np.ndarray): 105 | FG = np.count_nonzero(gt) # 真实前景, FG=(TPs+FNs) 106 | BG = gt.size - FG # 真实背景, BG=(TNs+FPs) 107 | 108 | dynamical_tpfptnfn = None 109 | adaptive_tpfptnfn = None 110 | binary_tpfptnfn = None 111 | for handler_name, handler in self._metric_handlers.items(): 112 | if handler.dynamic_results is not None: 113 | if dynamical_tpfptnfn is None: 114 | dynamical_tpfptnfn = self.dynamically_binarizing(pred=pred, gt=gt, FG=FG, BG=BG) 115 | tgt_result = handler(**dynamical_tpfptnfn) 116 | if handler.sample_based: # is not None 117 | if not handler.dynamic_results or not isinstance( 118 | handler.dynamic_results[-1], list 119 | ): # is not [] or not contain list 120 | handler.dynamic_results.append([]) 121 | handler.dynamic_results[-1].append(tgt_result) 122 | else: 123 | handler.dynamic_results.append(tgt_result) 124 | 125 | if handler.adaptive_results is not None: 126 | if adaptive_tpfptnfn is None: 127 | adaptive_tpfptnfn = self.adaptively_binarizing(pred=pred, gt=gt, FG=FG, BG=BG) 128 | tgt_result = handler(**adaptive_tpfptnfn) 129 | if not handler.adaptive_results or not isinstance(handler.adaptive_results[-1], list): 130 | handler.adaptive_results.append([]) 131 | handler.adaptive_results[-1].append(tgt_result) 132 | 133 | if handler.binary_results is not None: 134 | if binary_tpfptnfn is None: 135 | # `pred > 0.5`: Simulating the effect of the `argmax` function. 136 | binary_tpfptnfn = self.get_statistics(binary=pred > 0.5, gt=gt, FG=FG, BG=BG) 137 | 138 | if handler.sample_based: 139 | tgt_result = handler(**binary_tpfptnfn) 140 | if not handler.binary_results or not isinstance(handler.binary_results[-1], list): 141 | handler.binary_results.append([]) 142 | handler.binary_results[-1].append(tgt_result) 143 | else: # will average over all targets from all samples 144 | tgt_result = binary_tpfptnfn 145 | handler.binary_results["tp"] += tgt_result["tp"] 146 | handler.binary_results["fp"] += tgt_result["fp"] 147 | handler.binary_results["tn"] += tgt_result["tn"] 148 | handler.binary_results["fn"] += tgt_result["fn"] 149 | 150 | def step(self, pred: np.ndarray, gt: np.ndarray, normalize: bool = True): 151 | """Statistics the metrics for the pair of pred and gt. 152 | 153 | Args: 154 | pred (np.ndarray): Prediction, gray scale image. 155 | gt (np.ndarray): Ground truth, gray scale image. 156 | normalize (bool, optional): Whether to normalize the input data. Defaults to True. 157 | """ 158 | if not self._metric_handlers: # 没有添加metric_handler 159 | raise ValueError("Please add your metric handler before using `step()`.") 160 | 161 | pred, gt = validate_and_normalize_input(pred, gt, normalize=normalize) 162 | 163 | max_valid_tgt_idx, valid_labeled_mask = parse_connected_components(mask=gt) 164 | tgt_weights = encode_bboxwise_tgts_bitwise(max_valid_tgt_idx, valid_labeled_mask) 165 | 166 | if max_valid_tgt_idx == 0: # no target or no background 167 | self._update_metrics(pred=pred, gt=gt) 168 | else: 169 | for tgt_idx in range(1, max_valid_tgt_idx + 1): 170 | tgt_mask = get_kth_bit(tgt_weights, k=tgt_idx) > 0 171 | 172 | _pred = pred * tgt_mask 173 | _gt = gt & tgt_mask 174 | self._update_metrics(pred=_pred, gt=_gt) 175 | 176 | # average over all targets in each sample 177 | for handler_name, handler in self._metric_handlers.items(): 178 | if handler.dynamic_results is not None and handler.sample_based: 179 | tgt_results = handler.dynamic_results.pop() # Tx256 180 | handler.dynamic_results.append(np.array(tgt_results, dtype=TYPE)) # Tx256 181 | 182 | if handler.adaptive_results is not None: 183 | tgt_results = handler.adaptive_results.pop() # Tx1 184 | handler.adaptive_results.append(np.mean(np.array(tgt_results, dtype=TYPE))) # 1 185 | 186 | if handler.binary_results is not None and handler.sample_based: 187 | tgt_results = handler.binary_results.pop() # Tx1 188 | handler.binary_results.append(np.mean(np.array(tgt_results, dtype=TYPE))) # 1 189 | 190 | def get_results(self) -> dict: 191 | """Return the results of the specific metric names. 192 | 193 | Returns: 194 | dict: All results corresponding to different metrics. 195 | """ 196 | results = {} 197 | for handler_name, handler in self._metric_handlers.items(): 198 | res = {} 199 | if handler.dynamic_results is not None: 200 | dynamic_results = handler.dynamic_results 201 | if handler.sample_based: # N个T'x256 202 | res["dynamic"] = dynamic_results 203 | else: # N'x256 -> 256 204 | res["dynamic"] = np.mean(np.array(dynamic_results, dtype=TYPE), axis=0) 205 | 206 | if handler.adaptive_results is not None: 207 | res["adaptive"] = np.mean(np.array(handler.adaptive_results, dtype=TYPE)) # 1 208 | 209 | if handler.binary_results is not None: 210 | binary_results = handler.binary_results 211 | if handler.sample_based: 212 | res["binary"] = np.mean(np.array(binary_results, dtype=TYPE)) # 1 213 | else: 214 | # NOTE: use `np.mean` to simplify output format (`array(123)` -> `123`) 215 | res["binary"] = np.mean(handler(**binary_results)) 216 | results[handler_name] = res 217 | return results 218 | 219 | 220 | class SizeInvarianceMAE(MAE): 221 | """Size invariance version of MAE. 222 | 223 | ``` 224 | @inproceedings{SizeInvarianceVariants, 225 | title = {Size-invariance Matters: Rethinking Metrics and Losses for Imbalanced Multi-object Salient Object Detection}, 226 | author = {Feiran Li and Qianqian Xu and Shilong Bao and Zhiyong Yang and Runmin Cong and Xiaochun Cao and Qingming Huang}, 227 | booktitle = ICML, 228 | year = {2024} 229 | } 230 | ``` 231 | """ 232 | 233 | def step(self, pred: np.ndarray, gt: np.ndarray, normalize: bool = True): 234 | """Statistics the metric for the pair of pred and gt. 235 | 236 | Args: 237 | pred (np.ndarray): Prediction, gray scale image. 238 | gt (np.ndarray): Ground truth, gray scale image. 239 | normalize (bool, optional): Whether to normalize the input data. Defaults to True. 240 | """ 241 | pred, gt = validate_and_normalize_input(pred, gt, normalize=normalize) 242 | max_valid_tgt_idx, valid_labeled_mask = parse_connected_components(mask=gt) 243 | tgt_weights = encode_bboxwise_tgts_bitwise(max_valid_tgt_idx, valid_labeled_mask) 244 | 245 | if max_valid_tgt_idx == 0: # no targets or no background 246 | mae = np.abs(pred - gt).mean() 247 | else: # there are multiple targets 248 | # background component 249 | bg_mask = tgt_weights == 0 250 | bg_area = np.count_nonzero(bg_mask) 251 | 252 | _pred = pred * bg_mask 253 | _gt = gt & bg_mask 254 | bg_fg_area_ratio = bg_area / (gt.size - bg_area) 255 | factor = 1 / (max_valid_tgt_idx + bg_fg_area_ratio) 256 | mae = bg_fg_area_ratio * np.abs(_pred - _gt).sum() / bg_area * factor 257 | 258 | # foreground components 259 | for tgt_idx in range(1, max_valid_tgt_idx + 1): 260 | tgt_mask = get_kth_bit(tgt_weights, k=tgt_idx) > 0 261 | tgt_area = np.count_nonzero(tgt_mask) 262 | 263 | _pred = pred * tgt_mask 264 | _gt = gt & tgt_mask 265 | mae += np.abs(_pred - _gt).sum() / tgt_area * factor 266 | self.maes.append(mae) 267 | 268 | def get_results(self) -> dict: 269 | """Return the results about MAE. 270 | 271 | Returns: 272 | dict(mae=mae) 273 | """ 274 | mae = np.mean(np.array(self.maes, TYPE)) 275 | return dict(si_mae=mae) 276 | -------------------------------------------------------------------------------- /examples/version_performance.json: -------------------------------------------------------------------------------- 1 | { 2 | "v1_2_3": { 3 | "Smeasure": 0.9029763868504661, 4 | "wFmeasure": 0.5579812753638986, 5 | "MAE": 0.03705558476661653, 6 | "adpEm": 0.9408760066970631, 7 | "meanEm": 0.9566258293508715, 8 | "maxEm": 0.966954482892271, 9 | "adpFm": 0.5816750824038355, 10 | "meanFm": 0.577051059518767, 11 | "maxFm": 0.5886784581120638 12 | }, 13 | "v1_3_0": { 14 | "Smeasure": 0.9029761578759272, 15 | "wFmeasure": 0.5579812753638986, 16 | "MAE": 0.03705558476661653, 17 | "adpEm": 0.9408760066970617, 18 | "meanEm": 0.9566258293508704, 19 | "maxEm": 0.9669544828922699, 20 | "adpFm": 0.5816750824038355, 21 | "meanFm": 0.577051059518767, 22 | "maxFm": 0.5886784581120638 23 | }, 24 | "v1_4_0": { 25 | "MAE": 0.03705558476661653, 26 | "Smeasure": 0.9029761578759272, 27 | "adpEm": 0.9408760066970617, 28 | "adpFm": 0.5816750824038355, 29 | "adpber": 0.2354784689008184, 30 | "adpdice": 0.5801020564379223, 31 | "adpf1": 0.5801020564379223, 32 | "adpfm": 0.5816750824038355, 33 | "adpiou": 0.5141023436626048, 34 | "adpkappa": 0.6568702977598276, 35 | "adpoa": 0.9391947016812359, 36 | "adppre": 0.583200007681871, 37 | "adprec": 0.5777548546727481, 38 | "adpspec": 0.9512882075256152, 39 | "maxEm": 0.9669544828922699, 40 | "maxFm": 0.5886784581120638, 41 | "maxber": 0.6666666666666666, 42 | "maxdice": 0.5830613926289557, 43 | "maxf1": 0.5830613926289557, 44 | "maxfm": 0.5886784581120638, 45 | "maxiou": 0.5201569938888494, 46 | "maxkappa": 0.6759493461328753, 47 | "maxoa": 0.9654783867686053, 48 | "maxpre": 0.6396783912301717, 49 | "maxrec": 0.6666666666666666, 50 | "maxspec": 0.9965927890353435, 51 | "meanEm": 0.9566258293508704, 52 | "meanFm": 0.577051059518767, 53 | "meanber": 0.23290802950995626, 54 | "meandice": 0.5689913551800527, 55 | "meanf1": 0.568991355180053, 56 | "meanfm": 0.577051059518767, 57 | "meaniou": 0.49816648786971, 58 | "meankappa": 0.6443053495487194, 59 | "meanoa": 0.9596413706286032, 60 | "meanpre": 0.5857695537152126, 61 | "meanrec": 0.5599653001125341, 62 | "meanspec": 0.9742186408675534, 63 | "overall_biber": 0.08527759498137788, 64 | "overall_bidice": 0.8510675335753018, 65 | "overall_bif1": 0.8510675335753017, 66 | "overall_bifm": 0.8525259082995088, 67 | "overall_biiou": 0.740746352327995, 68 | "overall_bikappa": 0.7400114676102276, 69 | "overall_bioa": 0.965778, 70 | "overall_bipre": 0.8537799277020065, 71 | "overall_birec": 0.8483723190115916, 72 | "overall_bispec": 0.9810724910256526, 73 | "sample_biber": 0.23037858807333392, 74 | "sample_bidice": 0.5738376903441331, 75 | "sample_bif1": 0.5738376903441331, 76 | "sample_bifm": 0.5829998670906196, 77 | "sample_biiou": 0.5039622042094377, 78 | "sample_bikappa": 0.6510635726572914, 79 | "sample_bioa": 0.964811758770181, 80 | "sample_bipre": 0.5916996553523113, 81 | "sample_birec": 0.5592859147614985, 82 | "sample_bispec": 0.9799569090918337, 83 | "wFmeasure": 0.5579812753638986 84 | }, 85 | "v1_4_1": { 86 | "MAE": 0.03705558476661653, 87 | "MSIOU": 0.8228002109838289, 88 | "Smeasure": 0.9029761578759272, 89 | "adpEm": 0.9408760066970617, 90 | "adpFm": 0.5816750824038355, 91 | "adpber": 0.2354784689008184, 92 | "adpdice": 0.5801020564379223, 93 | "adpf1": 0.5801020564379223, 94 | "adpfm": 0.5816750824038355, 95 | "adpiou": 0.5141023436626048, 96 | "adpkappa": 0.6568702977598276, 97 | "adpoa": 0.9391947016812359, 98 | "adppre": 0.583200007681871, 99 | "adprec": 0.5777548546727481, 100 | "adpfpr": 0.04871179247438492, 101 | "adpspec": 0.9512882075256152, 102 | "maxEm": 0.9669544828922699, 103 | "maxFm": 0.5886784581120638, 104 | "maxber": 0.6666666666666666, 105 | "maxdice": 0.5830613926289557, 106 | "maxf1": 0.5830613926289557, 107 | "maxfm": 0.5886784581120638, 108 | "maxiou": 0.5201569938888494, 109 | "maxkappa": 0.6759493461328753, 110 | "maxoa": 0.9654783867686053, 111 | "maxpre": 0.6396783912301717, 112 | "maxrec": 0.6666666666666666, 113 | "maxfpr": 1.0, 114 | "maxspec": 0.9965927890353435, 115 | "meanEm": 0.9566258293508704, 116 | "meanFm": 0.577051059518767, 117 | "meanber": 0.23290802950995626, 118 | "meandice": 0.5689913551800527, 119 | "meanf1": 0.568991355180053, 120 | "meanfm": 0.577051059518767, 121 | "meaniou": 0.49816648786971, 122 | "meankappa": 0.6443053495487194, 123 | "meanoa": 0.9596413706286032, 124 | "meanpre": 0.5857695537152126, 125 | "meanrec": 0.5599653001125341, 126 | "meanfpr": 0.02578135913244661, 127 | "meanspec": 0.9742186408675534, 128 | "overall_biber": 0.08527759498137788, 129 | "overall_bidice": 0.8510675335753018, 130 | "overall_bif1": 0.8510675335753017, 131 | "overall_bifm": 0.8525259082995088, 132 | "overall_biiou": 0.740746352327995, 133 | "overall_bikappa": 0.7400114676102276, 134 | "overall_bioa": 0.965778, 135 | "overall_bipre": 0.8537799277020065, 136 | "overall_birec": 0.8483723190115916, 137 | "overall_bifpr": 0.018927508974347383, 138 | "overall_bispec": 0.9810724910256526, 139 | "sample_biber": 0.23037858807333392, 140 | "sample_bidice": 0.5738376903441331, 141 | "sample_bif1": 0.5738376903441331, 142 | "sample_bifm": 0.5829998670906196, 143 | "sample_biiou": 0.5039622042094377, 144 | "sample_bikappa": 0.6510635726572914, 145 | "sample_bioa": 0.964811758770181, 146 | "sample_bipre": 0.5916996553523113, 147 | "sample_birec": 0.5592859147614985, 148 | "sample_bifpr": 0.02004309090816628, 149 | "sample_bispec": 0.9799569090918337, 150 | "wFmeasure": 0.5579812753638986 151 | }, 152 | "v1_4_3": { 153 | "MAE": 0.03705558476661653, 154 | "Smeasure": 0.9029761578759272, 155 | "adpEm": 0.9408760066970617, 156 | "adpFm": 0.5816750824038355, 157 | "adpber": 0.2354784689008184, 158 | "adpdice": 0.5801020564379223, 159 | "adpf1": 0.5801020564379223, 160 | "adpfm": 0.5816750824038355, 161 | "adpiou": 0.5141023436626048, 162 | "adpkappa": 0.6568702977598276, 163 | "adpmsiou": 0.8309076073697286, 164 | "adpoa": 0.9391947016812359, 165 | "adppre": 0.583200007681871, 166 | "adprec": 0.5777548546727481, 167 | "adpfpr": 0.04871179247438492, 168 | "adpspec": 0.9512882075256152, 169 | "maxEm": 0.9669544828922699, 170 | "maxFm": 0.5886784581120638, 171 | "maxber": 0.6666666666666666, 172 | "maxdice": 0.5830613926289557, 173 | "maxf1": 0.5830613926289557, 174 | "maxfm": 0.5886784581120638, 175 | "maxiou": 0.5201569938888494, 176 | "maxkappa": 0.6759493461328753, 177 | "maxmsiou": 0.8362740728548873, 178 | "maxoa": 0.9654783867686053, 179 | "maxpre": 0.6396783912301717, 180 | "maxrec": 0.6666666666666666, 181 | "maxfpr": 1.0, 182 | "maxspec": 0.9965927890353435, 183 | "meanEm": 0.9566258293508704, 184 | "meanFm": 0.577051059518767, 185 | "meanber": 0.23290802950995626, 186 | "meandice": 0.5689913551800527, 187 | "meanf1": 0.568991355180053, 188 | "meanfm": 0.577051059518767, 189 | "meaniou": 0.49816648786971, 190 | "meankappa": 0.6443053495487194, 191 | "meanmsiou": 0.817192961609182, 192 | "meanoa": 0.9596413706286032, 193 | "meanpre": 0.5857695537152126, 194 | "meanrec": 0.5599653001125341, 195 | "meanfpr": 0.02578135913244661, 196 | "meanspec": 0.9742186408675534, 197 | "overall_biber": 0.08527759498137788, 198 | "overall_bidice": 0.8510675335753018, 199 | "overall_bif1": 0.8510675335753017, 200 | "overall_bifm": 0.8525259082995088, 201 | "overall_biiou": 0.740746352327995, 202 | "overall_bikappa": 0.7400114676102276, 203 | "overall_bioa": 0.965778, 204 | "overall_bipre": 0.8537799277020065, 205 | "overall_birec": 0.8483723190115916, 206 | "overall_bifpr": 0.018927508974347383, 207 | "overall_bispec": 0.9810724910256526, 208 | "sample_biber": 0.23037858807333392, 209 | "sample_bidice": 0.5738376903441331, 210 | "sample_bif1": 0.5738376903441331, 211 | "sample_bifm": 0.5829998670906196, 212 | "sample_biiou": 0.5039622042094377, 213 | "sample_bikappa": 0.6510635726572914, 214 | "sample_bimsiou": 0.8227620408962383, 215 | "sample_bioa": 0.964811758770181, 216 | "sample_bipre": 0.5916996553523113, 217 | "sample_birec": 0.5592859147614985, 218 | "sample_bifpr": 0.02004309090816628, 219 | "sample_bispec": 0.9799569090918337, 220 | "wFmeasure": 0.5579812753638986 221 | }, 222 | "v1_5_0": { 223 | "si_mae": 0.062219430633157186, 224 | "si_overall_biber": 0.08057909438705857, 225 | "si_overall_bidice": 0.8689933434647754, 226 | "si_overall_bif1": 0.8689933434647755, 227 | "si_overall_bifm": 0.8805176956944588, 228 | "si_overall_bifpr": 0.009530507785708842, 229 | "si_overall_biiou": 0.768336188335874, 230 | "si_overall_bikappa": 0.7691515398583714, 231 | "si_overall_bioa": 0.9785585454545455, 232 | "si_overall_bipre": 0.8906417940356693, 233 | "si_overall_birec": 0.8483723190115916, 234 | "si_overall_bispec": 0.9904694922142911, 235 | "si_overall_maxber": 0.625, 236 | "si_overall_maxdice": 0.5954092597492913, 237 | "si_overall_maxf1": 0.5954092597492912, 238 | "si_overall_maxfm": 0.6049251905352891, 239 | "si_overall_maxfpr": 1.0, 240 | "si_overall_maxiou": 0.5170820894969207, 241 | "si_overall_maxkappa": 0.6552745344614292, 242 | "si_overall_maxoa": 0.977842037152757, 243 | "si_overall_maxpre": 0.6954311451835118, 244 | "si_overall_maxrec": 0.75, 245 | "si_overall_maxspec": 0.9975473999188901, 246 | "si_overall_meanber": 0.2465345494932514, 247 | "si_overall_meandice": 0.5557713842285521, 248 | "si_overall_meanf1": 0.5557713842285521, 249 | "si_overall_meanfm": 0.5914066516990303, 250 | "si_overall_meanfpr": 0.015208429493882425, 251 | "si_overall_meaniou": 0.47471774602791184, 252 | "si_overall_meankappa": 0.5964751103648793, 253 | "si_overall_meanoa": 0.972345297663634, 254 | "si_overall_meanpre": 0.6497316834023819, 255 | "si_overall_meanrec": 0.5221393305073796, 256 | "si_overall_meanspec": 0.9847915705061175, 257 | "si_sample_adpber": 0.25512709310479464, 258 | "si_sample_adpdice": 0.5444846257190149, 259 | "si_sample_adpf1": 0.5444846257190148, 260 | "si_sample_adpfm": 0.5567302948345944, 261 | "si_sample_adpfpr": 0.037480829497605665, 262 | "si_sample_adpiou": 0.4763592468740943, 263 | "si_sample_adpkappa": 0.6400423451759603, 264 | "si_sample_adpoa": 0.9534735481231834, 265 | "si_sample_adppre": 0.5694959973412623, 266 | "si_sample_adprec": 0.5272266432880164, 267 | "si_sample_adpspec": 0.9625191705023943, 268 | "si_sample_biber": 0.26130632103242585, 269 | "si_sample_bidice": 0.5233655878893085, 270 | "si_sample_bif1": 0.5233655878893085, 271 | "si_sample_bifm": 0.5556766467815453, 272 | "si_sample_bifpr": 0.00830070487137397, 273 | "si_sample_biiou": 0.4524818546833645, 274 | "si_sample_bikappa": 0.6129034268568041, 275 | "si_sample_bioa": 0.9789323779961485, 276 | "si_sample_bipre": 0.5987496260981167, 277 | "si_sample_birec": 0.48568806280652216, 278 | "si_sample_bispec": 0.991699295128626, 279 | "si_sample_maxber": 0.6666666666666666, 280 | "si_sample_maxdice": 0.5549190570745837, 281 | "si_sample_maxf1": 0.5549190570745836, 282 | "si_sample_maxfm": 0.5609494528499654, 283 | "si_sample_maxfpr": 1.0, 284 | "si_sample_maxiou": 0.4939998363355485, 285 | "si_sample_maxkappa": 0.6699265264324135, 286 | "si_sample_maxoa": 0.9831346991175955, 287 | "si_sample_maxpre": 0.6355082212290294, 288 | "si_sample_maxrec": 0.6666666666666666, 289 | "si_sample_maxspec": 0.9983641230378081, 290 | "si_sample_meanber": 0.26322176259489033, 291 | "si_sample_meandice": 0.5171260001047577, 292 | "si_sample_meanf1": 0.5171260001047577, 293 | "si_sample_meanfm": 0.5462422547476231, 294 | "si_sample_meanfpr": 0.013213645733269934, 295 | "si_sample_meaniou": 0.4477218744296734, 296 | "si_sample_meankappa": 0.6075645114556255, 297 | "si_sample_meanoa": 0.9746901954959566, 298 | "si_sample_meanpre": 0.5903439300259663, 299 | "si_sample_meanrec": 0.4867701205434893, 300 | "si_sample_meanspec": 0.9867863542667301, 301 | "auc_pr": 0.19452884849631813, 302 | "auc_roc": 0.6468975503667292, 303 | "si_overall_auc_pr": 0.3036500410380263, 304 | "si_overall_auc_roc": 0.6192831970413093, 305 | "si_sample_auc_pr": 0.3036500410380263, 306 | "si_sample_auc_roc": 0.6192831970413093 307 | }, 308 | "v1_5_1": { 309 | "HCE": 73.66666666666667 310 | }, 311 | "v1_6_0": { 312 | "ccm": 0.5549345672746412, 313 | "cm": 0.554784060409666 314 | } 315 | } 316 | -------------------------------------------------------------------------------- /py_sod_metrics/context_measure.py: -------------------------------------------------------------------------------- 1 | import math 2 | 3 | import cv2 4 | import numpy as np 5 | from skimage.color import deltaE_ciede2000, rgb2lab 6 | from sklearn.neighbors import NearestNeighbors 7 | from sklearn.preprocessing import StandardScaler 8 | 9 | from .utils import EPS, TYPE, validate_and_normalize_input 10 | 11 | 12 | class ContextMeasure: 13 | """Context-measure for evaluating foreground segmentation quality. 14 | 15 | This metric evaluates predictions by considering both forward inference (how well predictions align with ground truth) and reverse deduction (how completely ground truth is covered by predictions), using context-aware Gaussian kernels. 16 | 17 | ``` 18 | @article{ContextMeasure, 19 | title={Context-measure: Contextualizing Metric for Camouflage}, 20 | author={Wang, Chen-Yang and Ji, Gepeng and Shao, Song and Cheng, Ming-Ming and Fan, Deng-Ping}, 21 | journal={arXiv preprint arXiv:2512.07076}, 22 | year={2025} 23 | } 24 | ``` 25 | """ 26 | 27 | def __init__(self, beta2: float = 1.0, alpha: float = 6.0): 28 | """Initialize the Context Measure evaluator. 29 | 30 | Args: 31 | beta2 (float): Balancing factor between forward inference and reverse deduction. Higher values give more weight to forward inference. Defaults to 1.0. 32 | alpha (float): Scaling factor for Gaussian kernel covariance, controls the spatial context range. Defaults to 6.0. 33 | """ 34 | self.beta2 = beta2 35 | self.alpha = alpha 36 | self._exp_factor = math.e / (math.e - 1) 37 | self.scores = [] 38 | 39 | def step(self, pred: np.ndarray, gt: np.ndarray, normalize: bool = True): 40 | """Statistics the metric for the pair of pred and gt. 41 | 42 | Args: 43 | pred (np.ndarray): Prediction, gray scale image. 44 | gt (np.ndarray): Ground truth, gray scale image. 45 | normalize (bool, optional): Whether to normalize the input data. Defaults to True. 46 | """ 47 | pred, gt = validate_and_normalize_input(pred, gt, normalize) 48 | 49 | # align with the original implementation 50 | pred = pred.astype(TYPE) 51 | gt = gt.astype(TYPE) 52 | 53 | score = self.compute(pred, gt, cd=np.zeros_like(gt)) 54 | self.scores.append(score) 55 | 56 | def compute(self, pred: np.ndarray, gt: np.ndarray, cd: np.ndarray) -> float: 57 | """Compute the context measure between prediction and ground truth. 58 | 59 | Args: 60 | pred (np.ndarray): Prediction map (values between 0 and 1). 61 | gt (np.ndarray): Ground truth map (boolean or 0/1 values). 62 | cd (np.ndarray): Camouflage degree map (values between 0 and 1). 63 | 64 | Returns: 65 | float: Context measure value. 66 | """ 67 | cov_matrix, x_dis, y_dis = self._compute_y_params(gt) 68 | K = self._gaussian_kernel(x_dis, y_dis, cov_matrix) 69 | 70 | # Forward inference: measure prediction relevance 71 | forward = self._forward_inference(pred, gt, K) 72 | mforward = np.sum(forward * pred) / (np.sum(pred) + EPS) 73 | 74 | # Reverse deduction: measure ground truth completeness 75 | reverse = self._reverse_deduction(pred, gt, K) 76 | 77 | wreverse = np.sum(reverse * (gt + cd)) / (np.sum(gt) + np.sum(cd) + EPS) 78 | 79 | # F-measure style combination 80 | return (1 + self.beta2) * mforward * wreverse / (self.beta2 * mforward + wreverse + EPS) 81 | 82 | def _forward_inference(self, X: np.ndarray, Y: np.ndarray, kernel: np.ndarray) -> np.ndarray: 83 | """Calculate forward inference: how well predictions align with ground truth context.""" 84 | x_binary = (X > 0).astype(int) 85 | # note: using EPS=1e-8 and this statement, the test result is the same as the original implementation 86 | # global_relevance_matrix = cv2.filter2D(Y, cv2.CV_32F, kernel) 87 | # note: this is a hack to make sure that the type of Y is compatible with more diverse data 88 | global_relevance_matrix = cv2.filter2D(Y.astype(np.float32), cv2.CV_32F, kernel) 89 | return x_binary * global_relevance_matrix 90 | 91 | def _reverse_deduction(self, X: np.ndarray, Y: np.ndarray, kernel: np.ndarray) -> np.ndarray: 92 | """Calculate reverse deduction: how completely ground truth is covered by predictions.""" 93 | X = X.astype(float) 94 | non_global_completeness_matrix = np.exp(-1 * cv2.filter2D(X, -1, kernel)) 95 | global_completeness_matrix = 1 - non_global_completeness_matrix 96 | reverse = self._exp_factor * Y * global_completeness_matrix 97 | return reverse 98 | 99 | def _gaussian_kernel(self, x_dis: int, y_dis: int, cov_matrix: np.ndarray) -> np.ndarray: 100 | """Generate a 2D Gaussian kernel based on covariance matrix.""" 101 | det_sigma = np.linalg.det(cov_matrix) 102 | inv_sigma = np.linalg.inv(cov_matrix) 103 | 104 | x, y = np.meshgrid(np.arange(-x_dis, x_dis + 1), np.arange(-y_dis, y_dis + 1), indexing="ij") 105 | Z = np.stack([x, y], axis=-1) 106 | exp_term = np.einsum("...i,ij,...j->...", Z, inv_sigma, Z) 107 | 108 | kernel = np.exp(-0.5 * exp_term) / (2 * np.pi * np.sqrt(det_sigma)) 109 | return kernel / np.sum(kernel) 110 | 111 | def _compute_y_params(self, Y: np.ndarray) -> tuple: 112 | """Compute Gaussian kernel parameters based on ground truth distribution.""" 113 | points = np.argwhere(Y > 0) 114 | if len(points) <= 1: 115 | return np.diag([0.25, 0.25]), 1, 1 116 | 117 | cov_matrix = np.cov(points, rowvar=False) 118 | sigma_x = np.sqrt(cov_matrix[0, 0]) 119 | sigma_y = np.sqrt(cov_matrix[1, 1]) 120 | total_sigma = np.sqrt(cov_matrix[0, 0] + cov_matrix[1, 1]) 121 | 122 | std_cov_matrix = self.alpha**2 * cov_matrix / (total_sigma**2) 123 | std_sigma_x = self.alpha * sigma_x / total_sigma 124 | std_sigma_y = self.alpha * sigma_y / total_sigma 125 | x_dis = round(3 * std_sigma_x) 126 | y_dis = round(3 * std_sigma_y) 127 | 128 | return std_cov_matrix, x_dis, y_dis 129 | 130 | def get_results(self) -> dict: 131 | """Return the results about context measure. 132 | 133 | Returns: 134 | dict(cm=context_measure) 135 | """ 136 | cm = np.mean(np.array(self.scores, dtype=TYPE)) 137 | return dict(cm=cm) 138 | 139 | 140 | class CamouflageContextMeasure(ContextMeasure): 141 | """Camouflage Context-measure for evaluating camouflaged object detection quality. 142 | 143 | This metric extends the base ContextMeasure by incorporating camouflage degree, which measures how well the foreground blends with its surrounding background. It uses patch-based nearest neighbor matching in Lab color space with spatial constraints to estimate camouflage difficulty. 144 | 145 | ``` 146 | @article{ContextMeasure, 147 | title={Context-measure: Contextualizing Metric for Camouflage}, 148 | author={Wang, Chen-Yang and Ji, Gepeng and Shao, Song and Cheng, Ming-Ming and Fan, Deng-Ping}, 149 | journal={arXiv preprint arXiv:2512.07076}, 150 | year={2025} 151 | } 152 | ``` 153 | """ 154 | 155 | def __init__(self, beta2: float = 1.2, alpha: float = 6.0, gamma: int = 8, lambda_spatial: float = 20): 156 | """Initialize the Camouflage Context Measure evaluator. 157 | 158 | Args: 159 | beta2 (float): Balancing factor for forward and reverse. Defaults to 1.2 for camouflage. 160 | alpha (float): Gaussian kernel scaling factor. Defaults to 6.0. 161 | gamma (int): Exponential scaling factor for camouflage degree. Defaults to 8. 162 | lambda_spatial (float): Weight for spatial distance in ANN search. Defaults to 20. 163 | """ 164 | super().__init__(beta2=beta2, alpha=alpha) 165 | self.gamma = gamma 166 | self.lambda_spatial = lambda_spatial 167 | 168 | def step(self, pred: np.ndarray, gt: np.ndarray, img: np.ndarray, normalize: bool = True): 169 | """Statistics the metric for the pair of pred, gt, and img. 170 | 171 | Args: 172 | pred (np.ndarray): Prediction, gray scale image. 173 | gt (np.ndarray): Ground truth, gray scale image. 174 | img (np.ndarray): Original RGB image (required for camouflage degree calculation). 175 | normalize (bool, optional): Whether to normalize the input data. Defaults to True. 176 | """ 177 | pred, gt = validate_and_normalize_input(pred, gt, normalize) 178 | 179 | pred = pred.astype(TYPE) 180 | gt = gt.astype(TYPE) 181 | 182 | _, cd = self._calculate_camouflage_degree(img, gt) 183 | score = self.compute(pred, gt, cd=cd) 184 | self.scores.append(score) 185 | 186 | def _calculate_camouflage_degree(self, img: np.ndarray, mask: np.ndarray, w: int = 7) -> tuple: 187 | """Compute the camouflage degree matrix using Lab+spatial ANN and RGB reconstruction. 188 | 189 | Args: 190 | img (np.ndarray): RGB image (H x W x 3). 191 | mask (np.ndarray): Binary mask (H x W). 192 | w (int): Patch size. Defaults to 7. 193 | 194 | Returns: 195 | tuple: (reconstructed_image, camouflage_degree_matrix) 196 | """ 197 | mask_binary = (mask > 0).astype(np.uint8) 198 | fg_mask = mask_binary 199 | bg_mask = self._extract_surrounding_background(fg_mask, kernel_size=20) 200 | im_fg = fg_mask[:, :, np.newaxis] * img 201 | im_bg = bg_mask[:, :, np.newaxis] * img 202 | im_lab = cv2.cvtColor(img, cv2.COLOR_RGB2LAB) 203 | 204 | # Step 1: Extract patches in Lab space 205 | im_fg_lab = im_lab * fg_mask[:, :, np.newaxis] 206 | im_bg_lab = im_lab * bg_mask[:, :, np.newaxis] 207 | 208 | fg_indices, fg_feat_lab = self._extract_patches(im_fg_lab, fg_mask, w, d=w // 2) 209 | bg_indices, bg_feat_lab = self._extract_patches(im_bg_lab, bg_mask, w, d=w // 2) 210 | 211 | # Check if we have enough patches to compute camouflage degree 212 | if len(fg_indices) == 0 or len(bg_indices) == 0: 213 | # Return zero camouflage degree when insufficient data 214 | img_recon = np.zeros_like(img) 215 | cd = np.zeros_like(mask, dtype=TYPE) 216 | return img_recon, cd 217 | 218 | # Step 2: Lab+spatial ANN query 219 | fg_nn = self._ann_with_spatial_faiss(bg_feat_lab, fg_feat_lab, bg_indices, fg_indices) 220 | 221 | # Step 3: Reconstruct foreground in RGB space 222 | img_recon = self._reconstruct_image(img, fg_indices, bg_indices, fg_nn, im_bg, w) 223 | 224 | # Step 4: Compute similarity in Lab space 225 | similarity_matrix = self._compute_delta_e2000_matrix(img_recon, im_fg.astype(np.uint8)).astype(TYPE) 226 | 227 | # Step 5: Compute camouflage degree 228 | cd = ((np.exp(self.gamma * similarity_matrix * mask_binary) - 1) / (np.exp(self.gamma) - 1)).astype(TYPE) 229 | 230 | return img_recon, cd 231 | 232 | def _ann_with_spatial_faiss(self, x, q, x_coords, q_coords, m=16): 233 | """Approximate Nearest Neighbor search with spatial constraints using sklearn. 234 | 235 | Note: Method name retained for compatibility, but now uses sklearn.neighbors.NearestNeighbors instead of FAISS for a more lightweight dependency. 236 | """ 237 | all_coords = np.vstack([x_coords, q_coords]) 238 | scaled_coords = StandardScaler().fit_transform(all_coords) 239 | x_coords_scaled = scaled_coords[: len(x_coords)] 240 | q_coords_scaled = scaled_coords[len(x_coords) :] 241 | 242 | x_aug = np.hstack([x, self.lambda_spatial * x_coords_scaled]).astype(np.float32) 243 | q_aug = np.hstack([q, self.lambda_spatial * q_coords_scaled]).astype(np.float32) 244 | 245 | # Use sklearn NearestNeighbors instead of FAISS for lightweight alternative 246 | nbrs = NearestNeighbors(n_neighbors=1, algorithm="auto", metric="euclidean") 247 | nbrs.fit(x_aug) 248 | 249 | _, indices = nbrs.kneighbors(q_aug) # top-1 250 | return indices 251 | 252 | def _extract_surrounding_background(self, mask: np.ndarray, kernel_size: int = 20) -> np.ndarray: 253 | """Extract the surrounding background region around the foreground.""" 254 | kernel = np.ones((kernel_size, kernel_size), np.uint8) 255 | dilated_mask = cv2.dilate(mask, kernel, iterations=1) 256 | surrounding_bg_mask = dilated_mask - mask 257 | return surrounding_bg_mask 258 | 259 | def _extract_patches(self, img: np.ndarray, mask: np.ndarray, w: int, d: int) -> tuple: 260 | """Extract valid patches from the image based on mask.""" 261 | h, w_, c = img.shape 262 | pad_h = (d - (h - w) % d) % d 263 | pad_w = (d - (w_ - w) % d) % d 264 | img_padded = np.pad(img, ((0, pad_h), (0, pad_w), (0, 0)), mode="reflect") 265 | mask_padded = np.pad(mask, ((0, pad_h), (0, pad_w)), mode="constant") 266 | 267 | new_h, new_w = img_padded.shape[:2] 268 | 269 | img_patches = np.lib.stride_tricks.sliding_window_view(img_padded, (w, w, img.shape[2]))[::d, ::d, 0, :, :, :] 270 | mask_patches = np.lib.stride_tricks.sliding_window_view(mask_padded, (w, w))[::d, ::d, :, :] 271 | 272 | img_patches = img_patches.reshape(-1, w * w * c) 273 | mask_patches = mask_patches.reshape(-1, w, w) 274 | 275 | grid_x, grid_y = np.meshgrid(np.arange(0, new_h - w + 1, d), np.arange(0, new_w - w + 1, d), indexing="ij") 276 | all_indices = np.column_stack((grid_x.ravel(), grid_y.ravel())) 277 | valid_idx = np.all(mask_patches > 0, axis=(1, 2)) 278 | valid_indices = all_indices[valid_idx] 279 | valid_patches = img_patches[valid_idx] 280 | 281 | return valid_indices, valid_patches 282 | 283 | def _reconstruct_image( 284 | self, 285 | img: np.ndarray, 286 | fg_indices: np.ndarray, 287 | bg_indices: np.ndarray, 288 | fg_nn: np.ndarray, 289 | im_bg: np.ndarray, 290 | w: int, 291 | ) -> np.ndarray: 292 | """Reconstruct foreground using nearest neighbor background patches.""" 293 | img_recon = np.zeros_like(img, dtype=np.int64) 294 | counts = np.zeros(img.shape[:2]) + EPS 295 | 296 | fg_x, fg_y = fg_indices[:, 0], fg_indices[:, 1] 297 | nn_i_j = fg_nn[:, 0] 298 | cii, cjj = bg_indices[nn_i_j, 0], bg_indices[nn_i_j, 1] 299 | 300 | fg_x = np.clip(fg_x, 0, img.shape[0] - w) 301 | fg_y = np.clip(fg_y, 0, img.shape[1] - w) 302 | cii = np.clip(cii, 0, img.shape[0] - w) 303 | cjj = np.clip(cjj, 0, img.shape[1] - w) 304 | 305 | for i in range(fg_indices.shape[0]): 306 | img_recon[fg_x[i] : fg_x[i] + w, fg_y[i] : fg_y[i] + w, :] += im_bg[ 307 | cii[i] : cii[i] + w, cjj[i] : cjj[i] + w, : 308 | ] 309 | counts[fg_x[i] : fg_x[i] + w, fg_y[i] : fg_y[i] + w] += 1 310 | 311 | counts = np.expand_dims(counts, axis=-1) 312 | img_recon = np.round(img_recon / counts).astype(np.uint8) 313 | 314 | return img_recon 315 | 316 | def _compute_delta_e2000_matrix(self, img1_rgb: np.ndarray, img2_rgb: np.ndarray) -> np.ndarray: 317 | """Compute the perceptual color difference (ΔE 2000) between two images. 318 | 319 | Args: 320 | img1_rgb (np.ndarray): First input image (H x W x 3) in RGB format. 321 | img2_rgb (np.ndarray): Second input image (H x W x 3) in RGB format. 322 | 323 | Returns: 324 | np.ndarray: Similarity matrix with values in [0,1] (higher = more similar). 325 | """ 326 | # Convert RGB to Lab color space 327 | lab1 = rgb2lab(img1_rgb) 328 | lab2 = rgb2lab(img2_rgb) 329 | 330 | # Compute ΔE 2000 color difference 331 | delta_e_matrix = deltaE_ciede2000(lab1, lab2) 332 | 333 | # Normalize ΔE 2000 values to [0,1] 334 | similarity_matrix = 1 - np.clip(delta_e_matrix / 100, 0, 1) 335 | 336 | return similarity_matrix 337 | 338 | def get_results(self) -> dict: 339 | """Return the results about camouflage context measure. 340 | 341 | Returns: 342 | dict(ccm=camouflage_context_measure) 343 | """ 344 | ccm = np.mean(np.array(self.scores, dtype=TYPE)) 345 | return dict(ccm=ccm) 346 | return dict(ccm=ccm) 347 | return dict(ccm=ccm) 348 | -------------------------------------------------------------------------------- /py_sod_metrics/fmeasurev2.py: -------------------------------------------------------------------------------- 1 | import abc 2 | 3 | import numpy as np 4 | 5 | from .utils import TYPE, get_adaptive_threshold, validate_and_normalize_input 6 | 7 | 8 | class _BaseHandler: 9 | """Base class for all metric handlers. 10 | 11 | Provides common functionality for calculating various segmentation metrics. 12 | """ 13 | 14 | def __init__( 15 | self, 16 | with_dynamic: bool, 17 | with_adaptive: bool, 18 | *, 19 | with_binary: bool = False, 20 | sample_based: bool = True, 21 | ): 22 | """Initialize the base handler. 23 | 24 | Args: 25 | with_dynamic (bool, optional): Record dynamic results for max/avg/curve versions. 26 | with_adaptive (bool, optional): Record adaptive results for adp version. 27 | with_binary (bool, optional): Record binary results for binary version. 28 | sample_based (bool, optional): Whether to average the metric of each sample or calculate 29 | the metric of the dataset. Defaults to True. 30 | """ 31 | self.dynamic_results = [] if with_dynamic else None 32 | self.adaptive_results = [] if with_adaptive else None 33 | self.sample_based = sample_based 34 | if with_binary: 35 | if self.sample_based: 36 | self.binary_results = [] 37 | else: 38 | self.binary_results = {"tp": 0, "fp": 0, "tn": 0, "fn": 0} 39 | else: 40 | self.binary_results = None 41 | 42 | @abc.abstractmethod 43 | def __call__(self, tp, fp, tn, fn): 44 | """Calculate the metric value. 45 | 46 | Args: 47 | tp: True positive count(s) 48 | fp: False positive count(s) 49 | tn: True negative count(s) 50 | fn: False negative count(s) 51 | 52 | Returns: 53 | Calculated metric value(s) 54 | """ 55 | pass 56 | 57 | @staticmethod 58 | def divide(numerator, denominator): 59 | """Safe division with numpy arrays handling zero denominators. 60 | 61 | Args: 62 | numerator: Numerator value(s) 63 | denominator: Denominator value(s) 64 | 65 | Returns: 66 | Result of division with zero handling 67 | """ 68 | denominator = np.array(denominator, dtype=TYPE) 69 | np.divide(numerator, denominator, out=denominator, where=denominator != 0) 70 | return denominator 71 | 72 | 73 | class IOUHandler(_BaseHandler): 74 | """Intersection over Union. 75 | 76 | iou = tp / (tp + fp + fn) 77 | """ 78 | 79 | def __call__(self, tp, fp, tn, fn): 80 | """Calculate IoU from confusion matrix components.""" 81 | # ious = np.where(Ps + FNs == 0, 0, TPs / (Ps + FNs)) 82 | return self.divide(tp, tp + fp + fn) 83 | 84 | 85 | class SpecificityHandler(_BaseHandler): 86 | """Specificity. 87 | 88 | True negative rate (TNR)/specificity (SPC)/selectivity 89 | 90 | specificity = tn / (tn + fp) 91 | """ 92 | 93 | def __call__(self, tp, fp, tn, fn): 94 | """Calculate specificity from confusion matrix components.""" 95 | # specificities = np.where(TNs + FPs == 0, 0, TNs / (TNs + FPs)) 96 | return self.divide(tn, tn + fp) 97 | 98 | 99 | TNRHandler = SpecificityHandler 100 | 101 | 102 | class DICEHandler(_BaseHandler): 103 | """DICE. 104 | 105 | dice = 2 * tp / (tp + fn + tp + fp) 106 | """ 107 | 108 | def __call__(self, tp, fp, tn, fn): 109 | """Calculate DICE coefficient from confusion matrix components.""" 110 | # dices = np.where(TPs + FPs == 0, 0, 2 * TPs / (T + Ps)) 111 | return self.divide(2 * tp, tp + fn + tp + fp) 112 | 113 | 114 | class OverallAccuracyHandler(_BaseHandler): 115 | """Overall Accuracy. 116 | 117 | oa = overall_accuracy = (tp + tn) / (tp + fp + tn + fn) 118 | """ 119 | 120 | def __call__(self, tp, fp, tn, fn): 121 | """Calculate overall accuracy from confusion matrix components.""" 122 | # dices = np.where(TPs + FPs == 0, 0, 2 * TPs / (T + Ps)) 123 | return self.divide(tp + tn, tp + fp + tn + fn) 124 | 125 | 126 | class KappaHandler(_BaseHandler): 127 | """Kappa Accuracy. 128 | 129 | kappa = kappa = (oa - p_) / (1 - p_) 130 | p_ = [(tp + fp)(tp + fn) + (tn + fn)(tn + tp)] / (tp + fp + tn + fn)^2 131 | """ 132 | 133 | def __init__( 134 | self, 135 | with_dynamic: bool, 136 | with_adaptive: bool, 137 | *, 138 | with_binary: bool = False, 139 | sample_based: bool = True, 140 | ): 141 | """Initialize the Kappa handler. 142 | 143 | Args: 144 | with_dynamic (bool, optional): Record dynamic results for max/avg/curve versions. 145 | with_adaptive (bool, optional): Record adaptive results for adp version. 146 | with_binary (bool, optional): Record binary results for binary version. 147 | sample_based (bool, optional): Whether to average the metric of each sample or calculate 148 | the metric of the dataset. Defaults to True. 149 | """ 150 | super().__init__( 151 | with_dynamic=with_dynamic, 152 | with_adaptive=with_adaptive, 153 | with_binary=with_binary, 154 | sample_based=sample_based, 155 | ) 156 | 157 | self.oa = OverallAccuracyHandler(False, False) 158 | 159 | def __call__(self, tp, fp, tn, fn): 160 | """Calculate Kappa coefficient from confusion matrix components.""" 161 | oa = self.oa(tp, fp, tn, fn) 162 | hpy_p = self.divide( 163 | (tp + fp) * (tp + fn) + (tn + fn) * (tn + tp), 164 | (tp + fp + tn + fn) ** 2, 165 | ) 166 | return self.divide(oa - hpy_p, 1 - hpy_p) 167 | 168 | 169 | class PrecisionHandler(_BaseHandler): 170 | """Precision. 171 | 172 | precision = tp / (tp + fp) 173 | """ 174 | 175 | def __call__(self, tp, fp, tn, fn): 176 | """Calculate precision from confusion matrix components.""" 177 | # precisions = np.where(Ps == 0, 0, TPs / Ps) 178 | return self.divide(tp, tp + fp) 179 | 180 | 181 | class RecallHandler(_BaseHandler): 182 | """Recall. 183 | 184 | True positive rate (TPR)/recall/sensitivity (SEN)/probability of detection/hit rate/power 185 | 186 | recall = tp / (tp + fn) 187 | """ 188 | 189 | def __call__(self, tp, fp, tn, fn): 190 | """Calculate recall from confusion matrix components.""" 191 | # recalls = np.where(TPs == 0, 0, TPs / T) 192 | return self.divide(tp, tp + fn) 193 | 194 | 195 | TPRHandler = RecallHandler 196 | SensitivityHandler = RecallHandler 197 | 198 | 199 | class FPRHandler(_BaseHandler): 200 | """False Positive Rate. 201 | 202 | False positive rate (FPR)/probability of false alarm/fall-out 203 | 204 | fpr = fp / (tn + fp) 205 | """ 206 | 207 | def __call__(self, tp, fp, tn, fn): 208 | """Calculate false positive rate from confusion matrix components.""" 209 | return self.divide(fp, tn + fp) 210 | 211 | 212 | class BERHandler(_BaseHandler): 213 | """Balance Error Rate. 214 | 215 | ber = 1 - 0.5 * (tp / (tp + fn) + tn / (tn + fp)) 216 | """ 217 | 218 | def __call__(self, tp, fp, tn, fn): 219 | """Calculate balanced error rate from confusion matrix components.""" 220 | fg = np.asarray(tp + fn, dtype=TYPE) 221 | bg = np.asarray(tn + fp, dtype=TYPE) 222 | np.divide(tp, fg, out=fg, where=fg != 0) 223 | np.divide(tn, bg, out=bg, where=bg != 0) 224 | return 1 - 0.5 * (fg + bg) 225 | 226 | 227 | class FmeasureHandler(_BaseHandler): 228 | """F-measure. 229 | 230 | fmeasure = (beta + 1) * precision * recall / (beta * precision + recall) 231 | """ 232 | 233 | def __init__( 234 | self, 235 | with_dynamic: bool, 236 | with_adaptive: bool, 237 | *, 238 | with_binary: bool = False, 239 | sample_based: bool = True, 240 | beta: float = 0.3, 241 | ): 242 | """Initialize the F-measure handler. 243 | 244 | Args: 245 | with_dynamic (bool, optional): Record dynamic results for max/avg/curve versions. 246 | with_adaptive (bool, optional): Record adaptive results for adp version. 247 | with_binary (bool, optional): Record binary results for binary version. 248 | sample_based (bool, optional): Whether to average the metric of each sample or calculate 249 | the metric of the dataset. Defaults to True. 250 | beta (bool, optional): β^2 in F-measure. Defaults to 0.3. 251 | """ 252 | super().__init__( 253 | with_dynamic=with_dynamic, 254 | with_adaptive=with_adaptive, 255 | with_binary=with_binary, 256 | sample_based=sample_based, 257 | ) 258 | 259 | self.beta = beta 260 | self.precision = PrecisionHandler(False, False) 261 | self.recall = RecallHandler(False, False) 262 | 263 | def __call__(self, tp, fp, tn, fn): 264 | """Calculate F-measure from confusion matrix components. 265 | 266 | Note: 267 | Uses separate precision and recall calculations to maintain consistency with original implementation rather than combined formula. 268 | """ 269 | p = self.precision(tp, fp, tn, fn) 270 | r = self.recall(tp, fp, tn, fn) 271 | return self.divide((self.beta + 1) * p * r, self.beta * p + r) 272 | 273 | 274 | class FmeasureV2: 275 | """Enhanced F-measure evaluator with support for multiple evaluation metrics. 276 | 277 | This class provides a flexible framework for computing various binary classification metrics including precision, recall, specificity, dice, IoU, and F-measure. It supports dynamic thresholding, adaptive thresholding, and binary evaluation modes. 278 | """ 279 | 280 | def __init__(self, metric_handlers: dict = None): 281 | """Enhanced Fmeasure class with more relevant metrics, e.g. precision, recall, specificity, dice, iou, fmeasure and so on. 282 | 283 | Args: 284 | metric_handlers (dict, optional): Handlers of different metrics. Defaults to None. 285 | """ 286 | self._metric_handlers = metric_handlers if metric_handlers else {} 287 | 288 | def add_handler(self, handler_name, metric_handler): 289 | """Add a metric handler to the evaluator. 290 | 291 | Args: 292 | handler_name (str): Name identifier for the metric handler. 293 | metric_handler: Handler instance that computes the specific metric. 294 | """ 295 | self._metric_handlers[handler_name] = metric_handler 296 | 297 | @staticmethod 298 | def get_statistics(binary: np.ndarray, gt: np.ndarray, FG: int, BG: int) -> dict: 299 | """Calculate the TP, FP, TN and FN based a adaptive threshold. 300 | 301 | Args: 302 | binary (np.ndarray): binarized `pred` containing [0, 1] 303 | gt (np.ndarray): gt binarized by 128 304 | FG (int): the number of foreground pixels in gt 305 | BG (int): the number of background pixels in gt 306 | 307 | Returns: 308 | dict: TP, FP, TN, FN 309 | """ 310 | TP = np.count_nonzero(binary[gt]) 311 | FP = np.count_nonzero(binary[~gt]) 312 | FN = FG - TP 313 | TN = BG - FP 314 | return {"tp": TP, "fp": FP, "tn": TN, "fn": FN} 315 | 316 | def adaptively_binarizing(self, pred: np.ndarray, gt: np.ndarray, FG: int, BG: int) -> dict: 317 | """Calculate the TP, FP, TN and FN based a adaptive threshold. 318 | 319 | Args: 320 | pred (np.ndarray): prediction normalized in [0, 1] 321 | gt (np.ndarray): gt binarized by 128 322 | FG (int): the number of foreground pixels in gt 323 | BG (int): the number of background pixels in gt 324 | 325 | Returns: 326 | dict: TP, FP, TN, FN 327 | """ 328 | adaptive_threshold = get_adaptive_threshold(pred, max_value=1) 329 | binary = pred >= adaptive_threshold 330 | return self.get_statistics(binary, gt, FG, BG) 331 | 332 | def dynamically_binarizing(self, pred: np.ndarray, gt: np.ndarray, FG: int, BG: int) -> dict: 333 | """Calculate the corresponding TP, FP, TN and FNs when the threshold changes from 0 to 255. 334 | 335 | Args: 336 | pred (np.ndarray): prediction normalized in [0, 1] 337 | gt (np.ndarray): gt binarized by 128 338 | FG (int): the number of foreground pixels in gt 339 | BG (int): the number of background pixels in gt 340 | 341 | Returns: 342 | dict: TPs, FPs, TNs, FNs 343 | """ 344 | # 1. 获取预测结果在真值前背景区域中的直方图 345 | pred: np.ndarray = (pred * 255).astype(np.uint8) 346 | bins: np.ndarray = np.linspace(0, 256, 257) 347 | tp_hist, _ = np.histogram(pred[gt], bins=bins) # 最后一个bin为[255, 256] 348 | fp_hist, _ = np.histogram(pred[~gt], bins=bins) 349 | 350 | # 2. 使用累积直方图(Cumulative Histogram)获得对应真值前背景中大于不同阈值的像素数量 351 | # 这里使用累加(cumsum)就是为了一次性得出 >=不同阈值 的像素数量, 这里仅计算了前景区域 352 | tp_w_thrs = np.cumsum(np.flip(tp_hist)) # >= 255, >= 254, ... >= 1, >= 0 353 | fp_w_thrs = np.cumsum(np.flip(fp_hist)) 354 | 355 | # 3. 计算对应的TP,FP,TN,FN 356 | TPs = tp_w_thrs # 前景 预测为 前景 357 | FPs = fp_w_thrs # 背景 预测为 前景 358 | FNs = FG - TPs # 前景 预测为 背景 359 | TNs = BG - FPs # 背景 预测为 背景 360 | return {"tp": TPs, "fp": FPs, "tn": TNs, "fn": FNs} 361 | 362 | def step(self, pred: np.ndarray, gt: np.ndarray, normalize: bool = True): 363 | """Statistics the metrics for the pair of pred and gt. 364 | 365 | Args: 366 | pred (np.ndarray): Prediction, gray scale image. 367 | gt (np.ndarray): Ground truth, gray scale image. 368 | normalize (bool, optional): Whether to normalize the input data. Defaults to True. 369 | """ 370 | if not self._metric_handlers: # 没有添加metric_handler 371 | raise ValueError("Please add your metric handler before using `step()`.") 372 | 373 | pred, gt = validate_and_normalize_input(pred, gt, normalize) 374 | 375 | FG = np.count_nonzero(gt) # 真实前景, FG=(TPs+FNs) 376 | BG = gt.size - FG # 真实背景, BG=(TNs+FPs) 377 | 378 | dynamical_tpfptnfn = None 379 | adaptive_tpfptnfn = None 380 | binary_tpfptnfn = None 381 | for handler_name, handler in self._metric_handlers.items(): 382 | if handler.dynamic_results is not None: 383 | if dynamical_tpfptnfn is None: 384 | dynamical_tpfptnfn = self.dynamically_binarizing(pred=pred, gt=gt, FG=FG, BG=BG) 385 | handler.dynamic_results.append(handler(**dynamical_tpfptnfn)) 386 | 387 | if handler.adaptive_results is not None: 388 | if adaptive_tpfptnfn is None: 389 | adaptive_tpfptnfn = self.adaptively_binarizing(pred=pred, gt=gt, FG=FG, BG=BG) 390 | handler.adaptive_results.append(handler(**adaptive_tpfptnfn)) 391 | 392 | if handler.binary_results is not None: 393 | if binary_tpfptnfn is None: 394 | # `pred > 0.5`: Simulating the effect of the `argmax` function. 395 | binary_tpfptnfn = self.get_statistics(binary=pred > 0.5, gt=gt, FG=FG, BG=BG) 396 | if handler.sample_based: 397 | handler.binary_results.append(handler(**binary_tpfptnfn)) 398 | else: 399 | handler.binary_results["tp"] += binary_tpfptnfn["tp"] 400 | handler.binary_results["fp"] += binary_tpfptnfn["fp"] 401 | handler.binary_results["tn"] += binary_tpfptnfn["tn"] 402 | handler.binary_results["fn"] += binary_tpfptnfn["fn"] 403 | 404 | def get_results(self) -> dict: 405 | """Return the results of the specific metric names. 406 | 407 | Returns: 408 | dict: All results corresponding to different metrics. 409 | """ 410 | results = {} 411 | for handler_name, handler in self._metric_handlers.items(): 412 | res = {} 413 | if handler.dynamic_results is not None: 414 | res["dynamic"] = np.mean(np.array(handler.dynamic_results, dtype=TYPE), axis=0) 415 | if handler.adaptive_results is not None: 416 | res["adaptive"] = np.mean(np.array(handler.adaptive_results, dtype=TYPE)) 417 | if handler.binary_results is not None: 418 | if handler.sample_based: 419 | res["binary"] = np.mean(np.array(handler.binary_results, dtype=TYPE)) 420 | else: 421 | # NOTE: use `np.mean` to simplify output format (`array(123)` -> `123`) 422 | res["binary"] = np.mean(handler(**handler.binary_results)) 423 | results[handler_name] = res 424 | return results 425 | -------------------------------------------------------------------------------- /deploy/usage.rst: -------------------------------------------------------------------------------- 1 | Usage Guide 2 | =========== 3 | 4 | This guide provides practical examples of how to use PySODMetrics for evaluating your image segmentation results. 5 | 6 | Quick Start 7 | ----------- 8 | 9 | Basic Example with Individual Metrics 10 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 11 | 12 | Here's a simple example using individual metrics: 13 | 14 | .. code-block:: python 15 | 16 | import cv2 17 | import numpy as np 18 | from py_sod_metrics import MAE, Emeasure, Smeasure, Fmeasure, WeightedFmeasure 19 | 20 | # Initialize metrics 21 | mae = MAE() 22 | em = Emeasure() 23 | sm = Smeasure() 24 | fm = Fmeasure() 25 | wfm = WeightedFmeasure() 26 | 27 | # Process your dataset 28 | # Note: pred and gt should be uint8 numpy arrays with values in [0, 255] 29 | for pred_path, gt_path in zip(pred_paths, gt_paths): 30 | pred = cv2.imread(pred_path, cv2.IMREAD_GRAYSCALE) 31 | gt = cv2.imread(gt_path, cv2.IMREAD_GRAYSCALE) 32 | 33 | # Resize prediction to match ground truth size if needed 34 | if pred.shape != gt.shape: 35 | pred = cv2.resize(pred, dsize=gt.shape[::-1], interpolation=cv2.INTER_LINEAR) 36 | 37 | # Feed predictions to metrics 38 | mae.step(pred, gt) 39 | em.step(pred, gt) 40 | sm.step(pred, gt) 41 | fm.step(pred, gt) 42 | wfm.step(pred, gt) 43 | 44 | # Get results 45 | mae_score = mae.get_results()["mae"] 46 | em_results = em.get_results()["em"] 47 | sm_score = sm.get_results()["sm"] 48 | fm_results = fm.get_results()["fm"] 49 | wfm_score = wfm.get_results()["wfm"] 50 | 51 | print(f"MAE: {mae_score:.4f}") 52 | print(f"S-measure: {sm_score:.4f}") 53 | print(f"Weighted F-measure: {wfm_score:.4f}") 54 | print(f"Max E-measure: {em_results['curve'].max():.4f}") 55 | print(f"Adaptive F-measure: {fm_results['adp']:.4f}") 56 | 57 | Using FmeasureV2 Framework (Recommended) 58 | ----------------------------------------- 59 | 60 | The ``FmeasureV2`` framework provides a unified interface for computing multiple metrics efficiently. 61 | 62 | Basic FmeasureV2 Usage 63 | ~~~~~~~~~~~~~~~~~~~~~~ 64 | 65 | .. code-block:: python 66 | 67 | import cv2 68 | from py_sod_metrics import FmeasureV2, FmeasureHandler, PrecisionHandler, RecallHandler, IOUHandler 69 | 70 | # Configure metric handlers 71 | fmv2 = FmeasureV2( 72 | metric_handlers={ 73 | "fm": FmeasureHandler(beta=0.3, with_adaptive=True, with_dynamic=True), 74 | "f1": FmeasureHandler(beta=1, with_adaptive=True, with_dynamic=True), 75 | "pre": PrecisionHandler(with_adaptive=True, with_dynamic=True), 76 | "rec": RecallHandler(with_adaptive=True, with_dynamic=True), 77 | "iou": IOUHandler(with_adaptive=True, with_dynamic=True), 78 | } 79 | ) 80 | 81 | # Process dataset 82 | for pred_path, gt_path in zip(pred_paths, gt_paths): 83 | pred = cv2.imread(pred_path, cv2.IMREAD_GRAYSCALE) 84 | gt = cv2.imread(gt_path, cv2.IMREAD_GRAYSCALE) 85 | 86 | if pred.shape != gt.shape: 87 | pred = cv2.resize(pred, dsize=gt.shape[::-1], interpolation=cv2.INTER_LINEAR) 88 | 89 | fmv2.step(pred, gt) 90 | 91 | # Get results 92 | results = fmv2.get_results() 93 | 94 | # Access different aggregation strategies 95 | print(f"Adaptive F-measure: {results['fm']['adaptive']:.4f}") 96 | print(f"Mean F-measure: {results['fm']['dynamic'].mean():.4f}") 97 | print(f"Max F-measure: {results['fm']['dynamic'].max():.4f}") 98 | print(f"Adaptive Precision: {results['pre']['adaptive']:.4f}") 99 | print(f"Adaptive IoU: {results['iou']['adaptive']:.4f}") 100 | 101 | **Available Handlers:** 102 | 103 | - ``FmeasureHandler`` - F-measure with configurable β 104 | - ``PrecisionHandler`` - Precision (Positive Predictive Value) 105 | - ``RecallHandler`` - Recall (Sensitivity, TPR) 106 | - ``IOUHandler`` - Intersection over Union 107 | - ``DICEHandler`` - Dice coefficient 108 | - ``BERHandler`` - Balanced Error Rate 109 | - ``KappaHandler`` - Cohen's Kappa 110 | - ``OverallAccuracyHandler`` - Overall classification accuracy 111 | - ``SpecificityHandler`` - Specificity (TNR) 112 | - ``SensitivityHandler`` - Sensitivity (same as Recall) 113 | - ``FPRHandler`` - False Positive Rate 114 | - ``TNRHandler`` - True Negative Rate 115 | - ``TPRHandler`` - True Positive Rate 116 | 117 | Creating a Custom Metric Recorder 118 | ---------------------------------- 119 | 120 | For managing multiple metrics conveniently, you can create a custom recorder class. 121 | 122 | Simple Metric Recorder 123 | ~~~~~~~~~~~~~~~~~~~~~~ 124 | 125 | .. code-block:: python 126 | 127 | import numpy as np 128 | from py_sod_metrics import MAE, Emeasure, Smeasure, Fmeasure, WeightedFmeasure, HumanCorrectionEffortMeasure 129 | 130 | class SimpleMetricRecorder: 131 | # A simple recorder for basic SOD metrics 132 | 133 | def __init__(self): 134 | self.mae = MAE() 135 | self.em = Emeasure() 136 | self.sm = Smeasure() 137 | self.fm = Fmeasure() 138 | self.wfm = WeightedFmeasure() 139 | self.hce = HumanCorrectionEffortMeasure() 140 | 141 | def step(self, pred, gt): 142 | # Update all metrics with a prediction-ground truth pair 143 | assert pred.shape == gt.shape 144 | assert pred.dtype == np.uint8 and gt.dtype == np.uint8 145 | 146 | self.mae.step(pred, gt) 147 | self.em.step(pred, gt) 148 | self.sm.step(pred, gt) 149 | self.fm.step(pred, gt) 150 | self.wfm.step(pred, gt) 151 | self.hce.step(pred, gt) 152 | 153 | def show(self, num_bits=3): 154 | # Get all metric results as a dictionary 155 | results = {} 156 | 157 | results['MAE'] = round(self.mae.get_results()['mae'], num_bits) 158 | results['Smeasure'] = round(self.sm.get_results()['sm'], num_bits) 159 | results['wFmeasure'] = round(self.wfm.get_results()['wfm'], num_bits) 160 | results['HCE'] = round(self.hce.get_results()['hce'], num_bits) 161 | 162 | em_results = self.em.get_results()['em'] 163 | results['maxEm'] = round(em_results['curve'].max(), num_bits) 164 | results['avgEm'] = round(em_results['curve'].mean(), num_bits) 165 | results['adpEm'] = round(em_results['adp'], num_bits) 166 | 167 | fm_results = self.fm.get_results()['fm'] 168 | results['maxFm'] = round(fm_results['curve'].max(), num_bits) 169 | results['avgFm'] = round(fm_results['curve'].mean(), num_bits) 170 | results['adpFm'] = round(fm_results['adp'], num_bits) 171 | 172 | return results 173 | 174 | # Usage example 175 | recorder = SimpleMetricRecorder() 176 | 177 | for pred, gt in dataset: 178 | recorder.step(pred, gt) 179 | 180 | results = recorder.show() 181 | print(results) 182 | 183 | Advanced Metric Recorder with FmeasureV2 184 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 185 | 186 | For more comprehensive evaluation: 187 | 188 | .. code-block:: python 189 | 190 | import numpy as np 191 | from py_sod_metrics import ( 192 | MAE, Emeasure, Smeasure, WeightedFmeasure, HumanCorrectionEffortMeasure, 193 | FmeasureV2, FmeasureHandler, PrecisionHandler, RecallHandler, 194 | IOUHandler, DICEHandler, BERHandler, KappaHandler 195 | ) 196 | 197 | class AdvancedMetricRecorder: 198 | # Advanced recorder supporting many metrics via FmeasureV2 199 | 200 | def __init__(self): 201 | # Individual metrics that don't use FmeasureV2 202 | self.mae = MAE() 203 | self.em = Emeasure() 204 | self.sm = Smeasure() 205 | self.wfm = WeightedFmeasure() 206 | self.hce = HumanCorrectionEffortMeasure() 207 | 208 | # FmeasureV2 with multiple handlers 209 | self.fmv2 = FmeasureV2( 210 | metric_handlers={ 211 | "fm": FmeasureHandler(beta=0.3, with_adaptive=True, with_dynamic=True), 212 | "f1": FmeasureHandler(beta=1, with_adaptive=True, with_dynamic=True), 213 | "pre": PrecisionHandler(with_adaptive=True, with_dynamic=True), 214 | "rec": RecallHandler(with_adaptive=True, with_dynamic=True), 215 | "iou": IOUHandler(with_adaptive=True, with_dynamic=True), 216 | "dice": DICEHandler(with_adaptive=True, with_dynamic=True), 217 | "ber": BERHandler(with_adaptive=True, with_dynamic=True), 218 | "kappa": KappaHandler(with_adaptive=True, with_dynamic=True), 219 | } 220 | ) 221 | 222 | def step(self, pred, gt): 223 | # Update all metrics 224 | assert pred.shape == gt.shape 225 | assert pred.dtype == np.uint8 and gt.dtype == np.uint8 226 | 227 | self.mae.step(pred, gt) 228 | self.em.step(pred, gt) 229 | self.sm.step(pred, gt) 230 | self.wfm.step(pred, gt) 231 | self.hce.step(pred, gt) 232 | self.fmv2.step(pred, gt) 233 | 234 | def show(self, num_bits=3): 235 | # Get all results 236 | results = {} 237 | 238 | # Individual metrics 239 | results['MAE'] = round(self.mae.get_results()['mae'], num_bits) 240 | results['Smeasure'] = round(self.sm.get_results()['sm'], num_bits) 241 | results['wFmeasure'] = round(self.wfm.get_results()['wfm'], num_bits) 242 | results['HCE'] = round(self.hce.get_results()['hce'], num_bits) 243 | 244 | # E-measure 245 | em_data = self.em.get_results()['em'] 246 | results['maxEm'] = round(em_data['curve'].max(), num_bits) 247 | results['avgEm'] = round(em_data['curve'].mean(), num_bits) 248 | results['adpEm'] = round(em_data['adp'], num_bits) 249 | 250 | # FmeasureV2 metrics 251 | fmv2_results = self.fmv2.get_results() 252 | for metric_name in ['fm', 'f1', 'pre', 'rec', 'iou', 'dice', 'ber', 'kappa']: 253 | metric_data = fmv2_results[metric_name] 254 | if 'dynamic' in metric_data: 255 | results[f'max{metric_name}'] = round(metric_data['dynamic'].max(), num_bits) 256 | results[f'avg{metric_name}'] = round(metric_data['dynamic'].mean(), num_bits) 257 | if 'adaptive' in metric_data: 258 | results[f'adp{metric_name}'] = round(metric_data['adaptive'], num_bits) 259 | 260 | return results 261 | 262 | # Usage example 263 | recorder = AdvancedMetricRecorder() 264 | 265 | for pred, gt in dataset: 266 | recorder.step(pred, gt) 267 | 268 | results = recorder.show() 269 | for name, value in results.items(): 270 | print(f"{name}: {value}") 271 | 272 | Specialized Use Cases 273 | --------------------- 274 | 275 | Context-Measure for Camouflaged Object Detection 276 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 277 | 278 | .. code-block:: python 279 | 280 | from py_sod_metrics import ContextMeasure, CamouflageContextMeasure 281 | 282 | # Standard Context Measure 283 | cm = ContextMeasure() 284 | 285 | # Camouflage Context Measure (weighted version, requires image) 286 | ccm = CamouflageContextMeasure() 287 | 288 | for pred_path, gt_path, img_path in cod_dataset: 289 | pred = cv2.imread(pred_path, cv2.IMREAD_GRAYSCALE) 290 | gt = cv2.imread(gt_path, cv2.IMREAD_GRAYSCALE) 291 | img = cv2.imread(img_path) # RGB image 292 | 293 | cm.step(pred, gt) 294 | ccm.step(pred, gt, img) # Note: CCM requires the original image 295 | 296 | cm_score = cm.get_results()['cm'] 297 | ccm_score = ccm.get_results()['ccm'] 298 | 299 | print(f"Context Measure: {cm_score:.4f}") 300 | print(f"Camouflage Context Measure: {ccm_score:.4f}") 301 | 302 | Size-Invariant Metrics 303 | ~~~~~~~~~~~~~~~~~~~~~~ 304 | 305 | .. code-block:: python 306 | 307 | from py_sod_metrics import SizeInvarianceFmeasureV2, SizeInvarianceMAE, FmeasureHandler, PrecisionHandler, RecallHandler 308 | 309 | # Size-invariant MAE 310 | si_mae = SizeInvarianceMAE() 311 | 312 | # Size-invariant FmeasureV2 313 | si_fmv2 = SizeInvarianceFmeasureV2( 314 | metric_handlers={ 315 | "si_fm": FmeasureHandler(beta=0.3, with_adaptive=True, with_dynamic=True), 316 | "si_pre": PrecisionHandler(with_adaptive=False, with_dynamic=True, sample_based=True), 317 | "si_rec": RecallHandler(with_adaptive=False, with_dynamic=True, sample_based=True), 318 | } 319 | ) 320 | 321 | # Process dataset 322 | for pred, gt in dataset: 323 | si_mae.step(pred, gt) 324 | si_fmv2.step(pred, gt) 325 | 326 | # Get results 327 | mae_score = si_mae.get_results()['si_mae'] 328 | fmv2_results = si_fmv2.get_results() 329 | 330 | print(f"SI-MAE: {mae_score:.4f}") 331 | 332 | Multi-Scale IoU 333 | ~~~~~~~~~~~~~~~ 334 | 335 | .. code-block:: python 336 | 337 | from py_sod_metrics import MSIoU 338 | 339 | # Initialize with different strategies 340 | msiou = MSIoU(with_dynamic=True, with_adaptive=True, with_binary=True) 341 | 342 | for pred, gt in dataset: 343 | msiou.step(pred, gt) 344 | 345 | results = msiou.get_results() 346 | 347 | print(f"MS-IoU (adaptive): {results['adaptive']:.4f}") 348 | print(f"MS-IoU (max): {results['dynamic'].max():.4f}") 349 | print(f"MS-IoU (mean): {results['dynamic'].mean():.4f}") 350 | print(f"MS-IoU (binary): {results['binary']:.4f}") 351 | 352 | Complete Evaluation Example 353 | ---------------------------- 354 | 355 | Here's a complete, production-ready example: 356 | 357 | .. code-block:: python 358 | 359 | import os 360 | import cv2 361 | import numpy as np 362 | from py_sod_metrics import ( 363 | MAE, Emeasure, Smeasure, WeightedFmeasure, 364 | FmeasureV2, FmeasureHandler, PrecisionHandler, RecallHandler, IOUHandler 365 | ) 366 | 367 | class SODEvaluator: 368 | # Complete SOD evaluation class 369 | 370 | def __init__(self): 371 | self.mae = MAE() 372 | self.em = Emeasure() 373 | self.sm = Smeasure() 374 | self.wfm = WeightedFmeasure() 375 | 376 | self.fmv2 = FmeasureV2( 377 | metric_handlers={ 378 | "fm": FmeasureHandler(beta=0.3, with_adaptive=True, with_dynamic=True), 379 | "pre": PrecisionHandler(with_adaptive=True, with_dynamic=True), 380 | "rec": RecallHandler(with_adaptive=True, with_dynamic=True), 381 | "iou": IOUHandler(with_adaptive=True, with_dynamic=True), 382 | } 383 | ) 384 | 385 | def step(self, pred, gt): 386 | self.mae.step(pred, gt) 387 | self.em.step(pred, gt) 388 | self.sm.step(pred, gt) 389 | self.wfm.step(pred, gt) 390 | self.fmv2.step(pred, gt) 391 | 392 | def get_results(self): 393 | results = { 394 | 'MAE': self.mae.get_results()['mae'], 395 | 'Smeasure': self.sm.get_results()['sm'], 396 | 'wFmeasure': self.wfm.get_results()['wfm'], 397 | } 398 | 399 | em = self.em.get_results()['em'] 400 | results.update({ 401 | 'maxEm': em['curve'].max(), 402 | 'avgEm': em['curve'].mean(), 403 | 'adpEm': em['adp'], 404 | }) 405 | 406 | fmv2 = self.fmv2.get_results() 407 | for name in ['fm', 'pre', 'rec', 'iou']: 408 | data = fmv2[name] 409 | results[f'max{name}'] = data['dynamic'].max() 410 | results[f'avg{name}'] = data['dynamic'].mean() 411 | results[f'adp{name}'] = data['adaptive'] 412 | 413 | return results 414 | 415 | def evaluate_predictions(pred_dir, gt_dir): 416 | # Evaluate all predictions in a directory 417 | evaluator = SODEvaluator() 418 | 419 | pred_files = sorted(os.listdir(pred_dir)) 420 | gt_files = sorted(os.listdir(gt_dir)) 421 | 422 | assert len(pred_files) == len(gt_files), "Mismatch in number of files" 423 | 424 | for pred_file, gt_file in zip(pred_files, gt_files): 425 | pred = cv2.imread(os.path.join(pred_dir, pred_file), cv2.IMREAD_GRAYSCALE) 426 | gt = cv2.imread(os.path.join(gt_dir, gt_file), cv2.IMREAD_GRAYSCALE) 427 | 428 | if pred.shape != gt.shape: 429 | pred = cv2.resize(pred, dsize=gt.shape[::-1], interpolation=cv2.INTER_LINEAR) 430 | 431 | evaluator.step(pred, gt) 432 | 433 | results = evaluator.get_results() 434 | 435 | print("=" * 50) 436 | print("Evaluation Results") 437 | print("=" * 50) 438 | for metric, value in sorted(results.items()): 439 | print(f"{metric:20s}: {value:.4f}") 440 | 441 | return results 442 | 443 | # Run evaluation 444 | if __name__ == "__main__": 445 | pred_directory = "./predictions" 446 | gt_directory = "./ground_truth" 447 | results = evaluate_predictions(pred_directory, gt_directory) 448 | 449 | Best Practices 450 | -------------- 451 | 452 | 1. **Data Format** 453 | 454 | - Predictions and ground truth should be ``uint8`` numpy arrays 455 | - Values should be in range [0, 255] 456 | - Ground truth masks should typically be binary (0 or 255) 457 | - Ensure prediction and ground truth have the same spatial dimensions 458 | 459 | 2. **Memory Efficiency** 460 | 461 | - Use the ``step()`` method iteratively for large datasets 462 | - Call ``get_results()`` only once after processing all samples 463 | - Avoid loading all images into memory at once 464 | 465 | 3. **Result Interpretation** 466 | 467 | - ``adaptive``: Threshold-based metric using 2× mean of predictions 468 | - ``dynamic``: Curve across all thresholds (256 points) 469 | - ``binary``: Metric computed on binarized predictions 470 | - ``curve``: Full precision-recall curve or threshold-based curve 471 | 472 | 4. **Choosing Metrics** 473 | 474 | - **For SOD**: MAE, S-measure, E-measure, F-measure, Weighted F-measure 475 | - **For COD**: Add Context-Measure and Camouflage Context-Measure 476 | - **For multi-scale objects**: Use size-invariant (SI) variants 477 | - **For fine structures**: Use Multi-Scale IoU 478 | - **For medical imaging**: Consider Dice coefficient and IoU 479 | 480 | 5. **Performance Tips** 481 | 482 | - Resize predictions to match ground truth size before calling ``step()`` 483 | - Use FmeasureV2 to compute multiple related metrics efficiently 484 | - Specify only the metrics you need to save computation time 485 | 486 | Reference 487 | --------- 488 | 489 | For more examples, see the `examples folder `_ in the GitHub repository: 490 | 491 | - ``metric_recorder.py`` - Production-ready metric recorder implementations 492 | - ``test_metrics.py`` - Comprehensive test cases showing all features 493 | -------------------------------------------------------------------------------- /examples/metric_recorder.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | 4 | import cv2 5 | import numpy as np 6 | 7 | sys.path.append("..") 8 | import py_sod_metrics 9 | 10 | 11 | def ndarray_to_basetype(data): 12 | """ 13 | 将单独的ndarray,或者tuple,list或者dict中的ndarray转化为基本数据类型, 14 | 即列表(.tolist())和python标量 15 | """ 16 | 17 | def _to_list_or_scalar(item): 18 | listed_item = item.tolist() 19 | if isinstance(listed_item, list) and len(listed_item) == 1: 20 | listed_item = listed_item[0] 21 | return listed_item 22 | 23 | if isinstance(data, (tuple, list)): 24 | results = [_to_list_or_scalar(item) for item in data] 25 | elif isinstance(data, dict): 26 | results = {k: _to_list_or_scalar(item) for k, item in data.items()} 27 | else: 28 | assert isinstance(data, np.ndarray) 29 | results = _to_list_or_scalar(data) 30 | return results 31 | 32 | 33 | INDIVADUAL_METRIC_MAPPING = { 34 | "mae": py_sod_metrics.MAE, 35 | "fm": py_sod_metrics.Fmeasure, 36 | "em": py_sod_metrics.Emeasure, 37 | "sm": py_sod_metrics.Smeasure, 38 | "wfm": py_sod_metrics.WeightedFmeasure, 39 | "hce": py_sod_metrics.HumanCorrectionEffortMeasure, 40 | } 41 | 42 | 43 | class GrayscaleMetricRecorderV1: 44 | def __init__(self): 45 | """ 46 | 用于统计各种指标的类 47 | https://github.com/lartpang/Py-SOD-VOS-EvalToolkit/blob/81ce89da6813fdd3e22e3f20e3a09fe1e4a1a87c/utils/recorders/metric_recorder.py 48 | 49 | 主要应用于旧版本实现中的五个指标,即mae/fm/sm/em/wfm/hce。推荐使用V2版本。 50 | """ 51 | self.mae = INDIVADUAL_METRIC_MAPPING["mae"]() 52 | self.fm = INDIVADUAL_METRIC_MAPPING["fm"]() 53 | self.sm = INDIVADUAL_METRIC_MAPPING["sm"]() 54 | self.em = INDIVADUAL_METRIC_MAPPING["em"]() 55 | self.wfm = INDIVADUAL_METRIC_MAPPING["wfm"]() 56 | self.hce = INDIVADUAL_METRIC_MAPPING["hce"]() 57 | 58 | def step(self, pre: np.ndarray, gt: np.ndarray): 59 | assert pre.shape == gt.shape 60 | assert pre.dtype == np.uint8 61 | assert gt.dtype == np.uint8 62 | 63 | self.mae.step(pre, gt) 64 | self.sm.step(pre, gt) 65 | self.fm.step(pre, gt) 66 | self.em.step(pre, gt) 67 | self.wfm.step(pre, gt) 68 | self.hce.step(pre, gt) 69 | 70 | def get_results(self, num_bits: int = 3, return_ndarray: bool = False) -> dict: 71 | """ 72 | 返回指标计算结果: 73 | 74 | - 曲线数据(sequential): fm/em/p/r 75 | - 数值指标(numerical): SM/MAE/maxE/avgE/adpE/maxF/avgF/adpF/wFm 76 | """ 77 | fm_info = self.fm.get_results() 78 | fm = fm_info["fm"] 79 | pr = fm_info["pr"] 80 | wfm = self.wfm.get_results()["wfm"] 81 | sm = self.sm.get_results()["sm"] 82 | em = self.em.get_results()["em"] 83 | mae = self.mae.get_results()["mae"] 84 | hce = self.hce.get_results()["hce"] 85 | 86 | sequential_results = { 87 | "fm": np.flip(fm["curve"]), 88 | "em": np.flip(em["curve"]), 89 | "p": np.flip(pr["p"]), 90 | "r": np.flip(pr["r"]), 91 | } 92 | numerical_results = { 93 | "SM": sm, 94 | "MAE": mae, 95 | "maxE": em["curve"].max(), 96 | "avgE": em["curve"].mean(), 97 | "adpE": em["adp"], 98 | "maxF": fm["curve"].max(), 99 | "avgF": fm["curve"].mean(), 100 | "adpF": fm["adp"], 101 | "wFm": wfm, 102 | "HCE": hce, 103 | } 104 | if num_bits is not None and isinstance(num_bits, int): 105 | numerical_results = {k: v.round(num_bits) for k, v in numerical_results.items()} 106 | if not return_ndarray: 107 | sequential_results = ndarray_to_basetype(sequential_results) 108 | numerical_results = ndarray_to_basetype(numerical_results) 109 | return {"sequential": sequential_results, "numerical": numerical_results} 110 | 111 | 112 | sample_gray = dict(with_adaptive=True, with_dynamic=True) 113 | sample_bin = dict(with_adaptive=False, with_dynamic=False, with_binary=True, sample_based=True) 114 | overall_bin = dict(with_adaptive=False, with_dynamic=False, with_binary=True, sample_based=False) 115 | # fmt: off 116 | GRAYSCALE_METRIC_MAPPING = { 117 | # 灰度数据指标 118 | "fm": {"handler": py_sod_metrics.FmeasureHandler, "kwargs": dict(**sample_gray, beta=0.3)}, 119 | "f1": {"handler": py_sod_metrics.FmeasureHandler, "kwargs": dict(**sample_gray, beta=1)}, 120 | "pre": {"handler": py_sod_metrics.PrecisionHandler, "kwargs": sample_gray}, 121 | "rec": {"handler": py_sod_metrics.RecallHandler, "kwargs": sample_gray}, 122 | "iou": {"handler": py_sod_metrics.IOUHandler, "kwargs": sample_gray}, 123 | "dice": {"handler": py_sod_metrics.DICEHandler, "kwargs": sample_gray}, 124 | "spec": {"handler": py_sod_metrics.SpecificityHandler, "kwargs": sample_gray}, 125 | "ber": {"handler": py_sod_metrics.BERHandler, "kwargs": sample_gray}, 126 | "oa": {"handler": py_sod_metrics.OverallAccuracyHandler, "kwargs": sample_gray}, 127 | "kappa": {"handler": py_sod_metrics.KappaHandler, "kwargs": sample_gray}, 128 | } 129 | BINARY_METRIC_MAPPING = { 130 | # 二值化数据指标的特殊情况一:各个样本独立计算指标后取平均 131 | "sample_bifm": {"handler": py_sod_metrics.FmeasureHandler, "kwargs": dict(**sample_bin, beta=0.3)}, 132 | "sample_bif1": {"handler": py_sod_metrics.FmeasureHandler, "kwargs": dict(**sample_bin, beta=1)}, 133 | "sample_bipre": {"handler": py_sod_metrics.PrecisionHandler, "kwargs": sample_bin}, 134 | "sample_birec": {"handler": py_sod_metrics.RecallHandler, "kwargs": sample_bin}, 135 | "sample_biiou": {"handler": py_sod_metrics.IOUHandler, "kwargs": sample_bin}, 136 | "sample_bidice": {"handler": py_sod_metrics.DICEHandler, "kwargs": sample_bin}, 137 | "sample_bispec": {"handler": py_sod_metrics.SpecificityHandler, "kwargs": sample_bin}, 138 | "sample_biber": {"handler": py_sod_metrics.BERHandler, "kwargs": sample_bin}, 139 | "sample_bioa": {"handler": py_sod_metrics.OverallAccuracyHandler, "kwargs": sample_bin}, 140 | "sample_bikappa": {"handler": py_sod_metrics.KappaHandler, "kwargs": sample_bin}, 141 | # 二值化数据指标的特殊情况二:汇总所有样本的tp、fp、tn、fn后整体计算指标 142 | "overall_bifm": {"handler": py_sod_metrics.FmeasureHandler, "kwargs": dict(**overall_bin, beta=0.3)}, 143 | "overall_bif1": {"handler": py_sod_metrics.FmeasureHandler, "kwargs": dict(**overall_bin, beta=1)}, 144 | "overall_bipre": {"handler": py_sod_metrics.PrecisionHandler, "kwargs": overall_bin}, 145 | "overall_birec": {"handler": py_sod_metrics.RecallHandler, "kwargs": overall_bin}, 146 | "overall_biiou": {"handler": py_sod_metrics.IOUHandler, "kwargs": overall_bin}, 147 | "overall_bidice": {"handler": py_sod_metrics.DICEHandler, "kwargs": overall_bin}, 148 | "overall_bispec": {"handler": py_sod_metrics.SpecificityHandler, "kwargs": overall_bin}, 149 | "overall_biber": {"handler": py_sod_metrics.BERHandler, "kwargs": overall_bin}, 150 | "overall_bioa": {"handler": py_sod_metrics.OverallAccuracyHandler, "kwargs": overall_bin}, 151 | "overall_bikappa": {"handler": py_sod_metrics.KappaHandler, "kwargs": overall_bin}, 152 | } 153 | SIZEINVARIANCE_METRIC_MAPPING = { 154 | "handler":{ 155 | "si_fm": {"handler": py_sod_metrics.FmeasureHandler, "kwargs": dict(**sample_gray, beta=0.3)}, 156 | "si_pre": {"handler": py_sod_metrics.PrecisionHandler, "kwargs": dict(with_adaptive=False, with_dynamic=True, sample_based=True)}, 157 | "si_rec": {"handler": py_sod_metrics.RecallHandler, "kwargs": dict(with_adaptive=False, with_dynamic=True, sample_based=True)}, 158 | "si_tpr": {"handler": py_sod_metrics.TPRHandler, "kwargs": dict(with_adaptive=False, with_dynamic=True, sample_based=True)}, 159 | "si_fpr": {"handler": py_sod_metrics.FPRHandler, "kwargs": dict(with_adaptive=False, with_dynamic=True, sample_based=True)}, 160 | }, 161 | "si_fmeasurev2": py_sod_metrics.SizeInvarianceFmeasureV2, 162 | "si_mae": py_sod_metrics.SizeInvarianceMAE, 163 | } 164 | # fmt: on 165 | 166 | 167 | class GrayscaleMetricRecorderV2: 168 | supported_metrics = ["mae", "em", "sm", "wfm", "hce"] + sorted(GRAYSCALE_METRIC_MAPPING.keys()) 169 | 170 | def __init__(self, metric_names=("sm", "wfm", "mae", "fmeasure", "em")): 171 | """ 172 | 用于统计各种指标的类,支持更多的指标,更好的兼容性。 173 | """ 174 | if not metric_names: 175 | metric_names = self.supported_metrics 176 | assert all([m in self.supported_metrics for m in metric_names]), f"Only support: {self.supported_metrics}" 177 | 178 | self.metric_objs = {} 179 | has_existed = False 180 | for metric_name in metric_names: 181 | if metric_name in INDIVADUAL_METRIC_MAPPING: 182 | self.metric_objs[metric_name] = INDIVADUAL_METRIC_MAPPING[metric_name]() 183 | else: # metric_name in GRAYSCALE_METRIC_MAPPING 184 | if not has_existed: # only init once 185 | self.metric_objs["fmeasurev2"] = py_sod_metrics.FmeasureV2() 186 | has_existed = True 187 | metric_handler = GRAYSCALE_METRIC_MAPPING[metric_name] 188 | self.metric_objs["fmeasurev2"].add_handler( 189 | handler_name=metric_name, 190 | metric_handler=metric_handler["handler"](**metric_handler["kwargs"]), 191 | ) 192 | 193 | def step(self, pre: np.ndarray, gt: np.ndarray): 194 | assert pre.shape == gt.shape, (pre.shape, gt.shape) 195 | assert pre.dtype == gt.dtype == np.uint8, (pre.dtype, gt.dtype) 196 | 197 | for m_obj in self.metric_objs.values(): 198 | m_obj.step(pre, gt) 199 | 200 | def get_all_results(self, num_bits: int = 3, return_ndarray: bool = False) -> dict: 201 | sequential_results = {} 202 | numerical_results = {} 203 | for m_name, m_obj in self.metric_objs.items(): 204 | info = m_obj.get_results() 205 | if m_name == "fmeasurev2": 206 | for _name, results in info.items(): 207 | dynamic_results = results.get("dynamic") 208 | adaptive_results = results.get("adaptive") 209 | if dynamic_results is not None: 210 | sequential_results[_name] = np.flip(dynamic_results) 211 | numerical_results[f"max{_name}"] = dynamic_results.max() 212 | numerical_results[f"avg{_name}"] = dynamic_results.mean() 213 | if adaptive_results is not None: 214 | numerical_results[f"adp{_name}"] = adaptive_results 215 | else: 216 | results = info[m_name] 217 | if m_name in ("wfm", "sm", "mae", "hce"): 218 | numerical_results[m_name] = results 219 | elif m_name in ("fm", "em"): 220 | sequential_results[m_name] = np.flip(results["curve"]) 221 | numerical_results.update( 222 | { 223 | f"max{m_name}": results["curve"].max(), 224 | f"avg{m_name}": results["curve"].mean(), 225 | f"adp{m_name}": results["adp"], 226 | } 227 | ) 228 | else: 229 | raise NotImplementedError(m_name) 230 | 231 | if num_bits is not None and isinstance(num_bits, int): 232 | numerical_results = {k: v.round(num_bits) for k, v in numerical_results.items()} 233 | if not return_ndarray: 234 | sequential_results = ndarray_to_basetype(sequential_results) 235 | numerical_results = ndarray_to_basetype(numerical_results) 236 | return {"sequential": sequential_results, "numerical": numerical_results} 237 | 238 | def show(self, num_bits: int = 3, return_ndarray: bool = False) -> dict: 239 | return self.get_all_results(num_bits=num_bits, return_ndarray=return_ndarray)["numerical"] 240 | 241 | 242 | class BinaryMetricRecorder: 243 | supported_metrics = ["mae", "sm", "wfm", "hce"] + sorted(BINARY_METRIC_MAPPING.keys()) 244 | 245 | def __init__(self, metric_names=("bif1", "biprecision", "birecall", "biiou")): 246 | """ 247 | 用于统计各种指标的类,主要适用于对单通道灰度图计算二值图像的指标。 248 | """ 249 | if not metric_names: 250 | metric_names = self.supported_metrics 251 | assert all([m in self.supported_metrics for m in metric_names]), f"Only support: {self.supported_metrics}" 252 | 253 | self.metric_objs = {} 254 | has_existed = False 255 | for metric_name in metric_names: 256 | if metric_name in INDIVADUAL_METRIC_MAPPING: 257 | self.metric_objs[metric_name] = INDIVADUAL_METRIC_MAPPING[metric_name]() 258 | else: # metric_name in BINARY_METRIC_MAPPING 259 | if not has_existed: # only init once 260 | self.metric_objs["fmeasurev2"] = py_sod_metrics.FmeasureV2() 261 | has_existed = True 262 | metric_handler = BINARY_METRIC_MAPPING[metric_name] 263 | self.metric_objs["fmeasurev2"].add_handler( 264 | handler_name=metric_name, 265 | metric_handler=metric_handler["handler"](**metric_handler["kwargs"]), 266 | ) 267 | 268 | def step(self, pre: np.ndarray, gt: np.ndarray): 269 | assert pre.shape == gt.shape, (pre.shape, gt.shape) 270 | assert pre.dtype == gt.dtype == np.uint8, (pre.dtype, gt.dtype) 271 | 272 | for m_obj in self.metric_objs.values(): 273 | m_obj.step(pre, gt) 274 | 275 | def get_all_results(self, num_bits: int = 3, return_ndarray: bool = False) -> dict: 276 | numerical_results = {} 277 | for m_name, m_obj in self.metric_objs.items(): 278 | info = m_obj.get_results() 279 | if m_name == "fmeasurev2": 280 | for _name, results in info.items(): 281 | binary_results = results.get("binary") 282 | if binary_results is not None: 283 | numerical_results[_name] = binary_results 284 | else: 285 | results = info[m_name] 286 | if m_name in ("mae", "sm", "wfm", "hce"): 287 | numerical_results[m_name] = results 288 | else: 289 | raise NotImplementedError(m_name) 290 | 291 | if num_bits is not None and isinstance(num_bits, int): 292 | numerical_results = {k: v.round(num_bits) for k, v in numerical_results.items()} 293 | if not return_ndarray: 294 | numerical_results = ndarray_to_basetype(numerical_results) 295 | return {"numerical": numerical_results} 296 | 297 | def show(self, num_bits: int = 3, return_ndarray: bool = False) -> dict: 298 | return self.get_all_results(num_bits=num_bits, return_ndarray=return_ndarray)["numerical"] 299 | 300 | 301 | class TargetwiseGrayscaleMetricRecorderV2: 302 | supported_metrics = ["si_mae"] + sorted(SIZEINVARIANCE_METRIC_MAPPING["handler"].keys()) 303 | 304 | def __init__(self, metric_names=("si_mae",)): 305 | if not metric_names: 306 | metric_names = self.supported_metrics 307 | assert all([m in self.supported_metrics for m in metric_names]), f"Only support: {self.supported_metrics}" 308 | 309 | self.metric_objs = {} 310 | has_existed = False 311 | for metric_name in metric_names: 312 | if metric_name in SIZEINVARIANCE_METRIC_MAPPING["handler"]: 313 | if not has_existed: # only init once 314 | self.metric_objs["si_fmeasurev2"] = SIZEINVARIANCE_METRIC_MAPPING["si_fmeasurev2"]() 315 | has_existed = True 316 | metric_handler = SIZEINVARIANCE_METRIC_MAPPING["handler"][metric_name] 317 | self.metric_objs["si_fmeasurev2"].add_handler( 318 | handler_name=metric_name, 319 | metric_handler=metric_handler["handler"](**metric_handler["kwargs"]), 320 | ) 321 | else: 322 | self.metric_objs[metric_name] = SIZEINVARIANCE_METRIC_MAPPING[metric_name]() 323 | 324 | def step(self, pre: np.ndarray, gt: np.ndarray): 325 | assert pre.shape == gt.shape, (pre.shape, gt.shape) 326 | assert pre.dtype == gt.dtype == np.uint8, (pre.dtype, gt.dtype) 327 | 328 | for m_obj in self.metric_objs.values(): 329 | m_obj.step(pre, gt) 330 | 331 | def cal_auc(self, y, x): 332 | sorted_idx = np.argsort(x, axis=-1, kind="stable") 333 | x = np.take_along_axis(x, sorted_idx, axis=-1) 334 | y = np.take_along_axis(y, sorted_idx, axis=-1) 335 | return np.trapz(y, x, axis=-1) 336 | 337 | def get_all_results(self, num_bits: int = 3, return_ndarray: bool = False) -> dict: 338 | sequential_results = {} 339 | numerical_results = {} 340 | for m_name, m_obj in self.metric_objs.items(): 341 | info = m_obj.get_results() 342 | 343 | if m_name == "si_fmeasurev2": 344 | # AUC-ROC 345 | if "si_tpr" in info and "si_fpr" in info: 346 | ys = info.pop("si_tpr")["dynamic"] # >=255,>=254,...>=1,>=0 347 | xs = info.pop("si_fpr")["dynamic"] 348 | if isinstance(ys, list) and isinstance(xs, list): # Nx[T'x256] 349 | auc_results = [] 350 | for y, x in zip(ys, xs): 351 | # NOTE: before calculate the auc, we need to flip the y and x to corresponding to ascending thresholds 352 | # because the dynamic results from our metrics is based on the descending order of thresholds, i.e., >=255,>=254,...>=1,>=0 353 | y = np.flip(y, axis=-1) 354 | x = np.flip(x, axis=-1) 355 | auc_results.append(self.cal_auc(y, x).mean()) 356 | numerical_results["si_sample_auc_roc"] = np.asarray(auc_results).mean() 357 | else: # 256 358 | numerical_results["si_overall_auc_roc"] = self.cal_auc(y=ys, x=xs).mean() 359 | 360 | # AUC-PR 361 | if "si_pre" in info and "si_rec" in info: 362 | ys = info.pop("si_pre")["dynamic"] # >=255,>=254,...>=1,>=0 363 | xs = info.pop("si_rec")["dynamic"] 364 | if isinstance(ys, list) and isinstance(xs, list): # Nx[T'x256] 365 | auc_results = [] 366 | for y, x in zip(ys, xs): 367 | y = np.flip(y, axis=-1) 368 | x = np.flip(x, axis=-1) 369 | auc_results.append(self.cal_auc(y, x).mean()) 370 | numerical_results["si_sample_auc_pr"] = np.asarray(auc_results).mean() 371 | else: # 256 372 | numerical_results["si_overall_auc_pr"] = self.cal_auc(y=ys, x=xs).mean() 373 | 374 | for _name, results in info.items(): 375 | dynamic_results = results.get("dynamic") 376 | if dynamic_results is not None: 377 | if isinstance(dynamic_results, list): # Nx[T'x256] 378 | max_results = [] 379 | avg_results = [] 380 | seq_results = [] 381 | for s in dynamic_results: 382 | max_results.append(s.max(axis=-1).mean()) # 1 383 | avg_results.append(s.mean(axis=-1).mean()) # 1 384 | seq_results.append(s.mean(axis=0)) # 256 385 | seq_results = np.mean(np.asarray(seq_results), axis=0) 386 | numerical_results[f"si_sample_max{_name}"] = np.asarray(max_results).mean() 387 | numerical_results[f"si_sample_avg{_name}"] = np.asarray(avg_results).mean() 388 | else: # 256 389 | seq_results = dynamic_results 390 | numerical_results[f"si_overall_max{_name}"] = dynamic_results.max() 391 | numerical_results[f"si_overall_avg{_name}"] = dynamic_results.mean() 392 | sequential_results[_name] = np.flip(seq_results) 393 | 394 | adaptive_results = results.get("adaptive") 395 | if adaptive_results is not None: 396 | numerical_results[f"si_sample_adp{_name}"] = adaptive_results 397 | else: 398 | results = info[m_name] 399 | if m_name in ("si_mae",): 400 | numerical_results[m_name] = results 401 | else: 402 | raise NotImplementedError(m_name) 403 | 404 | if num_bits is not None and isinstance(num_bits, int): 405 | numerical_results = {k: v.round(num_bits) for k, v in numerical_results.items()} 406 | if not return_ndarray: 407 | sequential_results = ndarray_to_basetype(sequential_results) 408 | numerical_results = ndarray_to_basetype(numerical_results) 409 | return {"sequential": sequential_results, "numerical": numerical_results} 410 | 411 | def show(self, num_bits: int = 3, return_ndarray: bool = False) -> dict: 412 | return self.get_all_results(num_bits=num_bits, return_ndarray=return_ndarray)["numerical"] 413 | 414 | 415 | if __name__ == "__main__": 416 | data_root = "./test_data" 417 | mask_root = os.path.join(data_root, "masks") 418 | pred_root = os.path.join(data_root, "preds") 419 | masks = [os.path.join(mask_root, f) for f in sorted(os.listdir(mask_root))] 420 | preds = [os.path.join(pred_root, f) for f in sorted(os.listdir(pred_root))] 421 | 422 | metrics_v1 = GrayscaleMetricRecorderV2(metric_names=GrayscaleMetricRecorderV2.supported_metrics) 423 | metrics_v2 = BinaryMetricRecorder(metric_names=BinaryMetricRecorder.supported_metrics) 424 | metrics_v3 = TargetwiseGrayscaleMetricRecorderV2( 425 | metric_names=TargetwiseGrayscaleMetricRecorderV2.supported_metrics 426 | ) 427 | for mask, pred in zip(masks, preds): 428 | mask = cv2.imread(mask, cv2.IMREAD_GRAYSCALE) 429 | pred = cv2.imread(pred, cv2.IMREAD_GRAYSCALE) 430 | if pred.shape != mask.shape: 431 | pred = cv2.resize(pred, dsize=mask.shape[::-1], interpolation=cv2.INTER_LINEAR) 432 | 433 | metrics_v1.step(pred, mask) 434 | metrics_v2.step(pred, mask) 435 | metrics_v3.step(pred, mask) 436 | print(metrics_v1.show()) 437 | print(metrics_v2.show()) 438 | print(metrics_v3.show()) 439 | -------------------------------------------------------------------------------- /examples/test_metrics.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | import sys 4 | import unittest 5 | from pprint import pprint 6 | 7 | import cv2 8 | import numpy as np 9 | from skimage import data 10 | 11 | sys.path.append("..") 12 | import py_sod_metrics 13 | 14 | with open("./version_performance.json", encoding="utf-8", mode="r") as f: 15 | default_results = json.load(f) 16 | 17 | 18 | def cal_auc(y: np.ndarray, x: np.ndarray): 19 | assert y.shape == x.shape, (y.shape, x.shape) 20 | sorted_idx = np.argsort(x, axis=-1, kind="stable") 21 | y = np.take_along_axis(y, sorted_idx, axis=-1) 22 | x = np.take_along_axis(x, sorted_idx, axis=-1) 23 | return np.trapz(y=y, x=x, axis=-1) 24 | 25 | 26 | def reduce_dynamic_results_for_max_avg(dynamic_results: list): # Nx[T'x256] -> Nx[T'] -> N -> 1 27 | max_results = [] 28 | avg_results = [] 29 | for s in dynamic_results: 30 | max_results.append(s.max(axis=-1).mean()) 31 | avg_results.append(s.mean(axis=-1).mean()) 32 | return np.asarray(max_results).mean(), np.asarray(avg_results).mean() 33 | 34 | 35 | def reduce_dynamic_results_for_auc(ys: list, xs: list): # Nx[T'x256] -> Nx[T'] -> N -> 1 36 | auc_results = [] 37 | for y, x in zip(ys, xs): 38 | # NOTE: before calculate the auc, we need to flip the y and x to corresponding to ascending thresholds 39 | # because the dynamic results from our metrics is based on the descending order of thresholds, i.e., >=255,>=254,...>=1,>=0 40 | y = np.flip(y, -1) 41 | x = np.flip(x, -1) 42 | auc_results.append(cal_auc(y=y, x=x).mean()) 43 | return np.asarray(auc_results).mean() 44 | 45 | 46 | class CheckMetricTestCase(unittest.TestCase): 47 | @classmethod 48 | def setUpClass(cls): 49 | FM = py_sod_metrics.Fmeasure() 50 | WFM = py_sod_metrics.WeightedFmeasure() 51 | SM = py_sod_metrics.Smeasure() 52 | EM = py_sod_metrics.Emeasure() 53 | MAE = py_sod_metrics.MAE() 54 | HCE = py_sod_metrics.HumanCorrectionEffortMeasure() 55 | CM = py_sod_metrics.ContextMeasure() 56 | CCM = py_sod_metrics.CamouflageContextMeasure() 57 | MSIOU = py_sod_metrics.MSIoU(with_dynamic=True, with_adaptive=True, with_binary=True) 58 | 59 | # fmt: off 60 | sample_gray = dict(with_adaptive=True, with_dynamic=True) 61 | sample_bin = dict(with_adaptive=False, with_dynamic=False, with_binary=True, sample_based=True) 62 | overall_bin = dict(with_adaptive=False, with_dynamic=False, with_binary=True, sample_based=False) 63 | FMv2 = py_sod_metrics.FmeasureV2( 64 | metric_handlers={ 65 | # 灰度数据指标 66 | "fm": py_sod_metrics.FmeasureHandler(**sample_gray, beta=0.3), 67 | "f1": py_sod_metrics.FmeasureHandler(**sample_gray, beta=1), 68 | "pre": py_sod_metrics.PrecisionHandler(**sample_gray), 69 | "rec": py_sod_metrics.RecallHandler(**sample_gray), 70 | "fpr": py_sod_metrics.FPRHandler(**sample_gray), 71 | "iou": py_sod_metrics.IOUHandler(**sample_gray), 72 | "dice": py_sod_metrics.DICEHandler(**sample_gray), 73 | "spec": py_sod_metrics.SpecificityHandler(**sample_gray), 74 | "ber": py_sod_metrics.BERHandler(**sample_gray), 75 | "oa": py_sod_metrics.OverallAccuracyHandler(**sample_gray), 76 | "kappa": py_sod_metrics.KappaHandler(**sample_gray), 77 | # 二值化数据指标的特殊情况一:各个样本独立计算指标后取平均 78 | "sample_bifm": py_sod_metrics.FmeasureHandler(**sample_bin, beta=0.3), 79 | "sample_bif1": py_sod_metrics.FmeasureHandler(**sample_bin, beta=1), 80 | "sample_bipre": py_sod_metrics.PrecisionHandler(**sample_bin), 81 | "sample_birec": py_sod_metrics.RecallHandler(**sample_bin), 82 | "sample_bifpr": py_sod_metrics.FPRHandler(**sample_bin), 83 | "sample_biiou": py_sod_metrics.IOUHandler(**sample_bin), 84 | "sample_bidice": py_sod_metrics.DICEHandler(**sample_bin), 85 | "sample_bispec": py_sod_metrics.SpecificityHandler(**sample_bin), 86 | "sample_biber": py_sod_metrics.BERHandler(**sample_bin), 87 | "sample_bioa": py_sod_metrics.OverallAccuracyHandler(**sample_bin), 88 | "sample_bikappa": py_sod_metrics.KappaHandler(**sample_bin), 89 | # 二值化数据指标的特殊情况二:汇总所有样本的tp、fp、tn、fn后整体计算指标 90 | "overall_bifm": py_sod_metrics.FmeasureHandler(**overall_bin, beta=0.3), 91 | "overall_bif1": py_sod_metrics.FmeasureHandler(**overall_bin, beta=1), 92 | "overall_bipre": py_sod_metrics.PrecisionHandler(**overall_bin), 93 | "overall_birec": py_sod_metrics.RecallHandler(**overall_bin), 94 | "overall_bifpr": py_sod_metrics.FPRHandler(**overall_bin), 95 | "overall_biiou": py_sod_metrics.IOUHandler(**overall_bin), 96 | "overall_bidice": py_sod_metrics.DICEHandler(**overall_bin), 97 | "overall_bispec": py_sod_metrics.SpecificityHandler(**overall_bin), 98 | "overall_biber": py_sod_metrics.BERHandler(**overall_bin), 99 | "overall_bioa": py_sod_metrics.OverallAccuracyHandler(**overall_bin), 100 | "overall_bikappa": py_sod_metrics.KappaHandler(**overall_bin), 101 | } 102 | ) 103 | 104 | # adaptive is not supported for non-sample-based metrics 105 | overall_gray = dict(with_adaptive=False, with_dynamic=True, sample_based=False) 106 | SI_MAE = py_sod_metrics.SizeInvarianceMAE() 107 | SI_FMv2 = py_sod_metrics.SizeInvarianceFmeasureV2( 108 | metric_handlers={ 109 | "si_sample_fm": py_sod_metrics.FmeasureHandler(**sample_gray, beta=0.3), 110 | "si_sample_f1": py_sod_metrics.FmeasureHandler(**sample_gray, beta=1), 111 | "si_sample_pre": py_sod_metrics.PrecisionHandler(**sample_gray), 112 | "si_sample_rec": py_sod_metrics.RecallHandler(**sample_gray), 113 | "si_sample_fpr": py_sod_metrics.FPRHandler(**sample_gray), 114 | "si_sample_iou": py_sod_metrics.IOUHandler(**sample_gray), 115 | "si_sample_dice": py_sod_metrics.DICEHandler(**sample_gray), 116 | "si_sample_spec": py_sod_metrics.SpecificityHandler(**sample_gray), 117 | "si_sample_ber": py_sod_metrics.BERHandler(**sample_gray), 118 | "si_sample_oa": py_sod_metrics.OverallAccuracyHandler(**sample_gray), 119 | "si_sample_kappa": py_sod_metrics.KappaHandler(**sample_gray), 120 | # 121 | "si_overall_fm": py_sod_metrics.FmeasureHandler(**overall_gray, beta=0.3), 122 | "si_overall_f1": py_sod_metrics.FmeasureHandler(**overall_gray, beta=1), 123 | "si_overall_pre": py_sod_metrics.PrecisionHandler(**overall_gray), 124 | "si_overall_rec": py_sod_metrics.RecallHandler(**overall_gray), 125 | "si_overall_fpr": py_sod_metrics.FPRHandler(**overall_gray), 126 | "si_overall_iou": py_sod_metrics.IOUHandler(**overall_gray), 127 | "si_overall_dice": py_sod_metrics.DICEHandler(**overall_gray), 128 | "si_overall_spec": py_sod_metrics.SpecificityHandler(**overall_gray), 129 | "si_overall_ber": py_sod_metrics.BERHandler(**overall_gray), 130 | "si_overall_oa": py_sod_metrics.OverallAccuracyHandler(**overall_gray), 131 | "si_overall_kappa": py_sod_metrics.KappaHandler(**overall_gray), 132 | # 二值化数据指标的特殊情况一:各个样本独立计算指标后取平均 133 | "si_sample_bifm": py_sod_metrics.FmeasureHandler(**sample_bin, beta=0.3), 134 | "si_sample_bif1": py_sod_metrics.FmeasureHandler(**sample_bin, beta=1), 135 | "si_sample_bipre": py_sod_metrics.PrecisionHandler(**sample_bin), 136 | "si_sample_birec": py_sod_metrics.RecallHandler(**sample_bin), 137 | "si_sample_bifpr": py_sod_metrics.FPRHandler(**sample_bin), 138 | "si_sample_biiou": py_sod_metrics.IOUHandler(**sample_bin), 139 | "si_sample_bidice": py_sod_metrics.DICEHandler(**sample_bin), 140 | "si_sample_bispec": py_sod_metrics.SpecificityHandler(**sample_bin), 141 | "si_sample_biber": py_sod_metrics.BERHandler(**sample_bin), 142 | "si_sample_bioa": py_sod_metrics.OverallAccuracyHandler(**sample_bin), 143 | "si_sample_bikappa": py_sod_metrics.KappaHandler(**sample_bin), 144 | # 二值化数据指标的特殊情况二:汇总所有样本的tp、fp、tn、fn后整体计算指标 145 | "si_overall_bifm": py_sod_metrics.FmeasureHandler(**overall_bin, beta=0.3), 146 | "si_overall_bif1": py_sod_metrics.FmeasureHandler(**overall_bin, beta=1), 147 | "si_overall_bipre": py_sod_metrics.PrecisionHandler(**overall_bin), 148 | "si_overall_birec": py_sod_metrics.RecallHandler(**overall_bin), 149 | "si_overall_bifpr": py_sod_metrics.FPRHandler(**overall_bin), 150 | "si_overall_biiou": py_sod_metrics.IOUHandler(**overall_bin), 151 | "si_overall_bidice": py_sod_metrics.DICEHandler(**overall_bin), 152 | "si_overall_bispec": py_sod_metrics.SpecificityHandler(**overall_bin), 153 | "si_overall_biber": py_sod_metrics.BERHandler(**overall_bin), 154 | "si_overall_bioa": py_sod_metrics.OverallAccuracyHandler(**overall_bin), 155 | "si_overall_bikappa": py_sod_metrics.KappaHandler(**overall_bin), 156 | } 157 | ) 158 | # fmt: on 159 | 160 | data_root = "./test_data" 161 | mask_root = os.path.join(data_root, "masks") 162 | pred_root = os.path.join(data_root, "preds") 163 | mask_name_list = sorted(os.listdir(mask_root)) 164 | 165 | for i, mask_name in enumerate(mask_name_list): 166 | print(f"[{i}] Processing {mask_name}...") 167 | mask_path = os.path.join(mask_root, mask_name) 168 | pred_path = os.path.join(pred_root, mask_name) 169 | mask = cv2.imread(mask_path, cv2.IMREAD_GRAYSCALE) 170 | pred = cv2.imread(pred_path, cv2.IMREAD_GRAYSCALE) 171 | 172 | # random select an image from skimage.data 173 | img = data.astronaut() 174 | img = cv2.resize(img, dsize=(mask.shape[1], mask.shape[0]), interpolation=cv2.INTER_LINEAR) 175 | 176 | FM.step(pred=pred, gt=mask) 177 | WFM.step(pred=pred, gt=mask) 178 | SM.step(pred=pred, gt=mask) 179 | EM.step(pred=pred, gt=mask) 180 | MAE.step(pred=pred, gt=mask) 181 | HCE.step(pred=pred, gt=mask) 182 | MSIOU.step(pred=pred, gt=mask) 183 | FMv2.step(pred=pred, gt=mask) 184 | SI_MAE.step(pred=pred, gt=mask) 185 | SI_FMv2.step(pred=pred, gt=mask) 186 | CM.step(pred=pred, gt=mask) 187 | CCM.step(pred=pred, gt=mask, img=img) 188 | 189 | fm = FM.get_results()["fm"] 190 | wfm = WFM.get_results()["wfm"] 191 | sm = SM.get_results()["sm"] 192 | em = EM.get_results()["em"] 193 | mae = MAE.get_results()["mae"] 194 | hce = HCE.get_results()["hce"] 195 | msiou = MSIOU.get_results() 196 | fmv2 = FMv2.get_results() 197 | si_mae = SI_MAE.get_results()["si_mae"] 198 | si_fmv2 = SI_FMv2.get_results() 199 | cm = CM.get_results()["cm"] 200 | ccm = CCM.get_results()["ccm"] 201 | 202 | cls.curr_results = { 203 | "MAE": mae, 204 | "HCE": hce, 205 | "Smeasure": sm, 206 | "wFmeasure": wfm, 207 | # "MSIOU": msiou, 208 | "adpmsiou": msiou["adaptive"], 209 | "meanmsiou": msiou["dynamic"].mean(), 210 | "maxmsiou": msiou["dynamic"].max(), 211 | "sample_bimsiou": msiou["binary"], 212 | # E-measure for sod 213 | "adpEm": em["adp"], 214 | "meanEm": em["curve"].mean(), 215 | "maxEm": em["curve"].max(), 216 | # F-measure for sod 217 | "adpFm": fm["adp"], 218 | "meanFm": fm["curve"].mean(), 219 | "maxFm": fm["curve"].max(), 220 | # size-invariant 221 | "si_mae": si_mae, 222 | # context-measure 223 | "cm": cm, 224 | "ccm": ccm, 225 | } 226 | # fmt: off 227 | base_metrics = ["fm", "f1", "pre", "rec", "fpr", "iou", "dice", "spec", "ber", "oa", "kappa"] 228 | # fmt: on 229 | for m_name in base_metrics: 230 | si_sample_max, si_sample_mean = reduce_dynamic_results_for_max_avg( 231 | si_fmv2[f"si_sample_{m_name}"]["dynamic"] 232 | ) 233 | cls.curr_results.update( 234 | { 235 | # general form 236 | f"adp{m_name}": fmv2[m_name]["adaptive"], 237 | f"mean{m_name}": fmv2[m_name]["dynamic"].mean(), 238 | f"max{m_name}": fmv2[m_name]["dynamic"].max(), 239 | f"sample_bi{m_name}": fmv2[f"sample_bi{m_name}"]["binary"], 240 | f"overall_bi{m_name}": fmv2[f"overall_bi{m_name}"]["binary"], 241 | # size-invariant 242 | # calculate the mean/maximum based on the average fm sequence across all targets from all samples. 243 | f"si_overall_mean{m_name}": si_fmv2[f"si_overall_{m_name}"]["dynamic"].mean(), 244 | f"si_overall_max{m_name}": si_fmv2[f"si_overall_{m_name}"]["dynamic"].max(), 245 | f"si_overall_bi{m_name}": si_fmv2[f"si_overall_bi{m_name}"]["binary"], 246 | # calculate the sample-specific mean/maximum based on the sample-based fm sequence with a shape of `(num_targets, 256)`. 247 | f"si_sample_mean{m_name}": si_sample_mean, 248 | f"si_sample_max{m_name}": si_sample_max, 249 | f"si_sample_adp{m_name}": si_fmv2[f"si_sample_{m_name}"]["adaptive"], 250 | f"si_sample_bi{m_name}": si_fmv2[f"si_sample_bi{m_name}"]["binary"], 251 | } 252 | ) 253 | pr_pre = fmv2["pre"]["dynamic"] # 256 254 | pr_rec = fmv2["rec"]["dynamic"] # 256 255 | roc_fpr = fmv2["fpr"]["dynamic"] # tpr is the same as recall 256 | cls.curr_results["auc_pr"] = cal_auc(y=np.flip(pr_pre, -1), x=np.flip(pr_rec, -1)) 257 | cls.curr_results["auc_roc"] = cal_auc(y=np.flip(pr_rec, -1), x=np.flip(roc_fpr, -1)) 258 | 259 | si_overall_pr_pre = si_fmv2["si_overall_pre"]["dynamic"] # 256 260 | si_overall_pr_rec = si_fmv2["si_overall_rec"]["dynamic"] # 256 261 | si_overall_roc_fpr = si_fmv2["si_overall_fpr"]["dynamic"] # 256 262 | cls.curr_results["si_overall_auc_pr"] = cal_auc( 263 | y=np.flip(si_overall_pr_pre, -1), x=np.flip(si_overall_pr_rec, -1) 264 | ) 265 | cls.curr_results["si_overall_auc_roc"] = cal_auc( 266 | y=np.flip(si_overall_pr_rec, -1), x=np.flip(si_overall_roc_fpr, -1) 267 | ) 268 | 269 | si_sample_pr_pre = si_fmv2["si_sample_pre"]["dynamic"] # Nx[T'x256] 270 | si_sample_pr_rec = si_fmv2["si_sample_rec"]["dynamic"] # Nx[T'x256] 271 | si_sample_roc_fpr = si_fmv2["si_sample_fpr"]["dynamic"] # Nx[T'x256] 272 | cls.curr_results["si_sample_auc_pr"] = reduce_dynamic_results_for_auc(ys=si_sample_pr_pre, xs=si_sample_pr_rec) 273 | cls.curr_results["si_sample_auc_roc"] = reduce_dynamic_results_for_auc( 274 | ys=si_sample_pr_rec, xs=si_sample_roc_fpr 275 | ) 276 | 277 | print("Current results:") 278 | pprint(cls.curr_results) 279 | cls.default_results = default_results["v1_4_3"] # 68 280 | for append_version in [ 281 | "v1_5_0", # 78+6 Size-Invariant Variants 282 | "v1_5_1", # 1 HCE 283 | "v1_6_0", # Context-Measure Series 284 | ]: 285 | if any([k in cls.default_results for k in default_results[append_version].keys()]): 286 | raise ValueError("Some keys will be overwritten by the SI variant results.") 287 | cls.default_results.update(default_results[append_version]) 288 | 289 | def test_sm(self): 290 | self.assertEqual(self.curr_results["Smeasure"], self.default_results["Smeasure"]) 291 | 292 | def test_wfm(self): 293 | self.assertEqual(self.curr_results["wFmeasure"], self.default_results["wFmeasure"]) 294 | 295 | def test_hce(self): 296 | self.assertEqual(self.curr_results["HCE"], self.default_results["HCE"]) 297 | 298 | def test_mae(self): 299 | self.assertEqual(self.curr_results["MAE"], self.default_results["MAE"]) 300 | 301 | self.assertEqual(self.curr_results["si_mae"], self.default_results["si_mae"]) 302 | 303 | def test_msiou(self): 304 | # self.assertEqual(self.curr_results["MSIOU"], self.default_results["MSIOU"]) 305 | self.assertEqual(self.curr_results["adpmsiou"], self.default_results["adpmsiou"]) 306 | self.assertEqual(self.curr_results["meanmsiou"], self.default_results["meanmsiou"]) 307 | self.assertEqual(self.curr_results["maxmsiou"], self.default_results["maxmsiou"]) 308 | self.assertEqual(self.curr_results["sample_bimsiou"], self.default_results["sample_bimsiou"]) 309 | 310 | def test_fm(self): 311 | self.assertEqual(self.curr_results["adpFm"], self.default_results["adpFm"]) 312 | self.assertEqual(self.curr_results["meanFm"], self.default_results["meanFm"]) 313 | self.assertEqual(self.curr_results["maxFm"], self.default_results["maxFm"]) 314 | 315 | self.assertEqual(self.curr_results["adpfm"], self.default_results["adpfm"]) 316 | self.assertEqual(self.curr_results["meanfm"], self.default_results["meanfm"]) 317 | self.assertEqual(self.curr_results["maxfm"], self.default_results["maxfm"]) 318 | 319 | # 对齐v1版本 320 | self.assertEqual(self.curr_results["adpFm"], self.default_results["adpfm"]) 321 | self.assertEqual(self.curr_results["meanFm"], self.default_results["meanfm"]) 322 | self.assertEqual(self.curr_results["maxFm"], self.default_results["maxfm"]) 323 | 324 | self.assertEqual(self.curr_results["sample_bifm"], self.default_results["sample_bifm"]) 325 | self.assertEqual(self.curr_results["overall_bifm"], self.default_results["overall_bifm"]) 326 | 327 | self.assertEqual(self.curr_results["si_sample_adpfm"], self.default_results["si_sample_adpfm"]) 328 | self.assertEqual(self.curr_results["si_sample_meanfm"], self.default_results["si_sample_meanfm"]) 329 | self.assertEqual(self.curr_results["si_sample_maxfm"], self.default_results["si_sample_maxfm"]) 330 | self.assertEqual(self.curr_results["si_sample_bifm"], self.default_results["si_sample_bifm"]) 331 | self.assertEqual(self.curr_results["si_overall_meanfm"], self.default_results["si_overall_meanfm"]) 332 | self.assertEqual(self.curr_results["si_overall_maxfm"], self.default_results["si_overall_maxfm"]) 333 | self.assertEqual(self.curr_results["si_overall_bifm"], self.default_results["si_overall_bifm"]) 334 | 335 | def test_em(self): 336 | self.assertEqual(self.curr_results["adpEm"], self.default_results["adpEm"]) 337 | self.assertEqual(self.curr_results["meanEm"], self.default_results["meanEm"]) 338 | self.assertEqual(self.curr_results["maxEm"], self.default_results["maxEm"]) 339 | 340 | def test_f1(self): 341 | self.assertEqual(self.curr_results["adpf1"], self.default_results["adpf1"]) 342 | self.assertEqual(self.curr_results["meanf1"], self.default_results["meanf1"]) 343 | self.assertEqual(self.curr_results["maxf1"], self.default_results["maxf1"]) 344 | self.assertEqual(self.curr_results["sample_bif1"], self.default_results["sample_bif1"]) 345 | self.assertEqual(self.curr_results["overall_bif1"], self.default_results["overall_bif1"]) 346 | 347 | self.assertEqual(self.curr_results["si_sample_adpf1"], self.default_results["si_sample_adpf1"]) 348 | self.assertEqual(self.curr_results["si_sample_meanf1"], self.default_results["si_sample_meanf1"]) 349 | self.assertEqual(self.curr_results["si_sample_maxf1"], self.default_results["si_sample_maxf1"]) 350 | self.assertEqual(self.curr_results["si_sample_bif1"], self.default_results["si_sample_bif1"]) 351 | self.assertEqual(self.curr_results["si_overall_meanf1"], self.default_results["si_overall_meanf1"]) 352 | self.assertEqual(self.curr_results["si_overall_maxf1"], self.default_results["si_overall_maxf1"]) 353 | self.assertEqual(self.curr_results["si_overall_bif1"], self.default_results["si_overall_bif1"]) 354 | 355 | def test_pre(self): 356 | self.assertEqual(self.curr_results["adppre"], self.default_results["adppre"]) 357 | self.assertEqual(self.curr_results["meanpre"], self.default_results["meanpre"]) 358 | self.assertEqual(self.curr_results["maxpre"], self.default_results["maxpre"]) 359 | self.assertEqual(self.curr_results["sample_bipre"], self.default_results["sample_bipre"]) 360 | self.assertEqual(self.curr_results["overall_bipre"], self.default_results["overall_bipre"]) 361 | 362 | self.assertEqual(self.curr_results["si_sample_adppre"], self.default_results["si_sample_adppre"]) 363 | self.assertEqual(self.curr_results["si_sample_meanpre"], self.default_results["si_sample_meanpre"]) 364 | self.assertEqual(self.curr_results["si_sample_maxpre"], self.default_results["si_sample_maxpre"]) 365 | self.assertEqual(self.curr_results["si_sample_bipre"], self.default_results["si_sample_bipre"]) 366 | self.assertEqual(self.curr_results["si_overall_meanpre"], self.default_results["si_overall_meanpre"]) 367 | self.assertEqual(self.curr_results["si_overall_maxpre"], self.default_results["si_overall_maxpre"]) 368 | self.assertEqual(self.curr_results["si_overall_bipre"], self.default_results["si_overall_bipre"]) 369 | 370 | def test_rec(self): 371 | self.assertEqual(self.curr_results["adprec"], self.default_results["adprec"]) 372 | self.assertEqual(self.curr_results["meanrec"], self.default_results["meanrec"]) 373 | self.assertEqual(self.curr_results["maxrec"], self.default_results["maxrec"]) 374 | self.assertEqual(self.curr_results["sample_birec"], self.default_results["sample_birec"]) 375 | self.assertEqual(self.curr_results["overall_birec"], self.default_results["overall_birec"]) 376 | 377 | self.assertEqual(self.curr_results["si_sample_adprec"], self.default_results["si_sample_adprec"]) 378 | self.assertEqual(self.curr_results["si_sample_meanrec"], self.default_results["si_sample_meanrec"]) 379 | self.assertEqual(self.curr_results["si_sample_maxrec"], self.default_results["si_sample_maxrec"]) 380 | self.assertEqual(self.curr_results["si_sample_birec"], self.default_results["si_sample_birec"]) 381 | self.assertEqual(self.curr_results["si_overall_meanrec"], self.default_results["si_overall_meanrec"]) 382 | self.assertEqual(self.curr_results["si_overall_maxrec"], self.default_results["si_overall_maxrec"]) 383 | self.assertEqual(self.curr_results["si_overall_birec"], self.default_results["si_overall_birec"]) 384 | 385 | def test_fpr(self): 386 | self.assertEqual(self.curr_results["adpfpr"], self.default_results["adpfpr"]) 387 | self.assertEqual(self.curr_results["meanfpr"], self.default_results["meanfpr"]) 388 | self.assertEqual(self.curr_results["maxfpr"], self.default_results["maxfpr"]) 389 | self.assertEqual(self.curr_results["sample_bifpr"], self.default_results["sample_bifpr"]) 390 | self.assertEqual(self.curr_results["overall_bifpr"], self.default_results["overall_bifpr"]) 391 | 392 | self.assertEqual(self.curr_results["si_sample_adpfpr"], self.default_results["si_sample_adpfpr"]) 393 | self.assertEqual(self.curr_results["si_sample_meanfpr"], self.default_results["si_sample_meanfpr"]) 394 | self.assertEqual(self.curr_results["si_sample_maxfpr"], self.default_results["si_sample_maxfpr"]) 395 | self.assertEqual(self.curr_results["si_sample_bifpr"], self.default_results["si_sample_bifpr"]) 396 | self.assertEqual(self.curr_results["si_overall_meanfpr"], self.default_results["si_overall_meanfpr"]) 397 | self.assertEqual(self.curr_results["si_overall_maxfpr"], self.default_results["si_overall_maxfpr"]) 398 | self.assertEqual(self.curr_results["si_overall_bifpr"], self.default_results["si_overall_bifpr"]) 399 | 400 | def test_iou(self): 401 | self.assertEqual(self.curr_results["adpiou"], self.default_results["adpiou"]) 402 | self.assertEqual(self.curr_results["meaniou"], self.default_results["meaniou"]) 403 | self.assertEqual(self.curr_results["maxiou"], self.default_results["maxiou"]) 404 | self.assertEqual(self.curr_results["sample_biiou"], self.default_results["sample_biiou"]) 405 | self.assertEqual(self.curr_results["overall_biiou"], self.default_results["overall_biiou"]) 406 | 407 | self.assertEqual(self.curr_results["si_sample_adpiou"], self.default_results["si_sample_adpiou"]) 408 | self.assertEqual(self.curr_results["si_sample_meaniou"], self.default_results["si_sample_meaniou"]) 409 | self.assertEqual(self.curr_results["si_sample_maxiou"], self.default_results["si_sample_maxiou"]) 410 | self.assertEqual(self.curr_results["si_sample_biiou"], self.default_results["si_sample_biiou"]) 411 | self.assertEqual(self.curr_results["si_overall_meaniou"], self.default_results["si_overall_meaniou"]) 412 | self.assertEqual(self.curr_results["si_overall_maxiou"], self.default_results["si_overall_maxiou"]) 413 | self.assertEqual(self.curr_results["si_overall_biiou"], self.default_results["si_overall_biiou"]) 414 | 415 | def test_dice(self): 416 | self.assertEqual(self.curr_results["adpdice"], self.default_results["adpdice"]) 417 | self.assertEqual(self.curr_results["meandice"], self.default_results["meandice"]) 418 | self.assertEqual(self.curr_results["maxdice"], self.default_results["maxdice"]) 419 | self.assertEqual(self.curr_results["sample_bidice"], self.default_results["sample_bidice"]) 420 | self.assertEqual(self.curr_results["overall_bidice"], self.default_results["overall_bidice"]) 421 | 422 | self.assertEqual(self.curr_results["si_sample_adpdice"], self.default_results["si_sample_adpdice"]) 423 | self.assertEqual(self.curr_results["si_sample_meandice"], self.default_results["si_sample_meandice"]) 424 | self.assertEqual(self.curr_results["si_sample_maxdice"], self.default_results["si_sample_maxdice"]) 425 | self.assertEqual(self.curr_results["si_sample_bidice"], self.default_results["si_sample_bidice"]) 426 | self.assertEqual(self.curr_results["si_overall_meandice"], self.default_results["si_overall_meandice"]) 427 | self.assertEqual(self.curr_results["si_overall_maxdice"], self.default_results["si_overall_maxdice"]) 428 | self.assertEqual(self.curr_results["si_overall_bidice"], self.default_results["si_overall_bidice"]) 429 | 430 | def test_spec(self): 431 | self.assertEqual(self.curr_results["adpspec"], self.default_results["adpspec"]) 432 | self.assertEqual(self.curr_results["meanspec"], self.default_results["meanspec"]) 433 | self.assertEqual(self.curr_results["maxspec"], self.default_results["maxspec"]) 434 | self.assertEqual(self.curr_results["sample_bispec"], self.default_results["sample_bispec"]) 435 | self.assertEqual(self.curr_results["overall_bispec"], self.default_results["overall_bispec"]) 436 | 437 | self.assertEqual(self.curr_results["si_sample_adpspec"], self.default_results["si_sample_adpspec"]) 438 | self.assertEqual(self.curr_results["si_sample_meanspec"], self.default_results["si_sample_meanspec"]) 439 | self.assertEqual(self.curr_results["si_sample_maxspec"], self.default_results["si_sample_maxspec"]) 440 | self.assertEqual(self.curr_results["si_sample_bispec"], self.default_results["si_sample_bispec"]) 441 | self.assertEqual(self.curr_results["si_overall_meanspec"], self.default_results["si_overall_meanspec"]) 442 | self.assertEqual(self.curr_results["si_overall_maxspec"], self.default_results["si_overall_maxspec"]) 443 | self.assertEqual(self.curr_results["si_overall_bispec"], self.default_results["si_overall_bispec"]) 444 | 445 | def test_ber(self): 446 | self.assertEqual(self.curr_results["adpber"], self.default_results["adpber"]) 447 | self.assertEqual(self.curr_results["meanber"], self.default_results["meanber"]) 448 | self.assertEqual(self.curr_results["maxber"], self.default_results["maxber"]) 449 | self.assertEqual(self.curr_results["sample_biber"], self.default_results["sample_biber"]) 450 | self.assertEqual(self.curr_results["overall_biber"], self.default_results["overall_biber"]) 451 | 452 | self.assertEqual(self.curr_results["si_sample_adpber"], self.default_results["si_sample_adpber"]) 453 | self.assertEqual(self.curr_results["si_sample_meanber"], self.default_results["si_sample_meanber"]) 454 | self.assertEqual(self.curr_results["si_sample_maxber"], self.default_results["si_sample_maxber"]) 455 | self.assertEqual(self.curr_results["si_sample_biber"], self.default_results["si_sample_biber"]) 456 | self.assertEqual(self.curr_results["si_overall_meanber"], self.default_results["si_overall_meanber"]) 457 | self.assertEqual(self.curr_results["si_overall_maxber"], self.default_results["si_overall_maxber"]) 458 | self.assertEqual(self.curr_results["si_overall_biber"], self.default_results["si_overall_biber"]) 459 | 460 | def test_oa(self): 461 | self.assertEqual(self.curr_results["adpoa"], self.default_results["adpoa"]) 462 | self.assertEqual(self.curr_results["meanoa"], self.default_results["meanoa"]) 463 | self.assertEqual(self.curr_results["maxoa"], self.default_results["maxoa"]) 464 | self.assertEqual(self.curr_results["sample_bioa"], self.default_results["sample_bioa"]) 465 | self.assertEqual(self.curr_results["overall_bioa"], self.default_results["overall_bioa"]) 466 | 467 | self.assertEqual(self.curr_results["si_sample_adpoa"], self.default_results["si_sample_adpoa"]) 468 | self.assertEqual(self.curr_results["si_sample_meanoa"], self.default_results["si_sample_meanoa"]) 469 | self.assertEqual(self.curr_results["si_sample_maxoa"], self.default_results["si_sample_maxoa"]) 470 | self.assertEqual(self.curr_results["si_sample_bioa"], self.default_results["si_sample_bioa"]) 471 | self.assertEqual(self.curr_results["si_overall_meanoa"], self.default_results["si_overall_meanoa"]) 472 | self.assertEqual(self.curr_results["si_overall_maxoa"], self.default_results["si_overall_maxoa"]) 473 | self.assertEqual(self.curr_results["si_overall_bioa"], self.default_results["si_overall_bioa"]) 474 | 475 | def test_kappa(self): 476 | self.assertEqual(self.curr_results["adpkappa"], self.default_results["adpkappa"]) 477 | self.assertEqual(self.curr_results["meankappa"], self.default_results["meankappa"]) 478 | self.assertEqual(self.curr_results["maxkappa"], self.default_results["maxkappa"]) 479 | self.assertEqual(self.curr_results["sample_bikappa"], self.default_results["sample_bikappa"]) 480 | self.assertEqual(self.curr_results["overall_bikappa"], self.default_results["overall_bikappa"]) 481 | 482 | self.assertEqual(self.curr_results["si_sample_adpkappa"], self.default_results["si_sample_adpkappa"]) 483 | self.assertEqual(self.curr_results["si_sample_meankappa"], self.default_results["si_sample_meankappa"]) 484 | self.assertEqual(self.curr_results["si_sample_maxkappa"], self.default_results["si_sample_maxkappa"]) 485 | self.assertEqual(self.curr_results["si_sample_bikappa"], self.default_results["si_sample_bikappa"]) 486 | self.assertEqual(self.curr_results["si_overall_meankappa"], self.default_results["si_overall_meankappa"]) 487 | self.assertEqual(self.curr_results["si_overall_maxkappa"], self.default_results["si_overall_maxkappa"]) 488 | self.assertEqual(self.curr_results["si_overall_bikappa"], self.default_results["si_overall_bikappa"]) 489 | 490 | def test_cm_series(self): 491 | # Context-measure tests - only validate they run without errors 492 | # since we don't have baseline values yet 493 | self.assertEqual(self.curr_results["cm"], self.default_results["cm"]) 494 | self.assertEqual(self.curr_results["ccm"], self.default_results["ccm"]) 495 | 496 | 497 | if __name__ == "__main__": 498 | unittest.main() 499 | --------------------------------------------------------------------------------