├── .python-version ├── .semgrepignore ├── examples ├── __init__.py ├── visualization │ ├── README.md │ └── run.py ├── asynchronous_api │ ├── README.md │ └── run.py ├── synchronous_api │ ├── README.md │ └── run.py ├── visual_prompting │ ├── README.md │ └── run.py ├── zsl_visual_prompting │ ├── README.md │ └── run.py └── metrics │ ├── README.md │ └── benchmark.py ├── tests ├── accuracy │ ├── __init__.py │ ├── images.json │ ├── conftest.py │ └── download_models.py ├── precommit │ ├── __init__.py │ └── public_scope.json ├── functional │ ├── __init__.py │ ├── conftest.py │ ├── test_load.py │ └── test_save.py └── unit │ ├── visualizer │ ├── __init__.py │ ├── conftest.py │ ├── test_visualizer.py │ ├── test_layout.py │ ├── test_primitive.py │ └── test_scene.py │ ├── models │ └── test_types.py │ ├── results │ ├── test_det_result.py │ ├── test_sseg_result.py │ └── test_cls_result.py │ ├── adapters │ └── test_utils.py │ └── metrics │ └── test_timestat.py ├── src ├── model_api │ ├── __init__.py │ ├── pipelines │ │ ├── __init__.py │ │ └── async_pipeline.py │ ├── models │ │ ├── result │ │ │ ├── base.py │ │ │ ├── utils.py │ │ │ ├── keypoint.py │ │ │ ├── __init__.py │ │ │ ├── anomaly.py │ │ │ ├── classification.py │ │ │ ├── visual_prompting.py │ │ │ └── detection.py │ │ └── __init__.py │ ├── metrics │ │ ├── __init__.py │ │ ├── time_stat.py │ │ └── performance.py │ ├── visualizer │ │ ├── layout │ │ │ ├── __init__.py │ │ │ ├── layout.py │ │ │ ├── flatten.py │ │ │ └── hstack.py │ │ ├── scene │ │ │ ├── segmentation │ │ │ │ ├── __init__.py │ │ │ │ ├── segmentation.py │ │ │ │ └── instance_segmentation.py │ │ │ ├── visual_prompting.py │ │ │ ├── __init__.py │ │ │ ├── keypoint.py │ │ │ ├── classification.py │ │ │ ├── anomaly.py │ │ │ ├── detection.py │ │ │ └── scene.py │ │ ├── primitive │ │ │ ├── __init__.py │ │ │ ├── primitive.py │ │ │ ├── bounding_box.py │ │ │ ├── keypoints.py │ │ │ ├── overlay.py │ │ │ ├── polygon.py │ │ │ └── label.py │ │ ├── __init__.py │ │ └── visualizer.py │ ├── tilers │ │ ├── __init__.py │ │ └── semantic_segmentation.py │ └── adapters │ │ ├── __init__.py │ │ └── onnx_adapter.md └── docs │ ├── keypoint_detection.md │ ├── visual_prompting.md │ └── action_classification.md ├── docs ├── source │ ├── guides │ │ └── index.md │ ├── models │ │ ├── ssd.md │ │ ├── yolo.md │ │ ├── model.md │ │ ├── types.md │ │ ├── utils.md │ │ ├── image_model.md │ │ ├── sam_models.md │ │ ├── visual_prompting.md │ │ ├── action_classification.md │ │ ├── anomaly.md │ │ ├── segmentation.md │ │ ├── classification.md │ │ ├── detection_model.md │ │ ├── instance_segmentation.md │ │ ├── keypoint_detection.md │ │ └── index.md │ ├── adapters │ │ ├── utils.md │ │ ├── onnx_adapter.md │ │ ├── openvino_adapter.md │ │ ├── inference_adapter.md │ │ └── index.md │ ├── tilers │ │ ├── tiler.md │ │ ├── detection.md │ │ ├── instance_segmentation.md │ │ ├── semantic_segmentation.md │ │ └── index.md │ ├── pipelines │ │ ├── async_pipeline.md │ │ └── index.md │ ├── index.md │ └── conf.py └── Makefile ├── .github ├── CODEOWNERS ├── ISSUE_TEMPLATE │ ├── question.md │ ├── bug_report.md │ └── feature_request.md ├── workflows │ ├── pr-labeler.yml │ ├── renovate-config-validator.yml │ ├── scorecards.yml │ ├── codeql.yml │ ├── test_accuracy.yml │ ├── pre_commit.yml │ ├── renovate.yml │ ├── publish.yaml │ ├── security-scan.yml │ ├── test_precommit.yml │ └── docs.yml ├── labeler.yml ├── PULL_REQUEST_TEMPLATE.md └── renovate.json5 ├── SECURITY.md ├── .markdownlint.yaml ├── .pre-commit-config.yaml ├── .gitignore ├── README.md └── tools └── model_converter ├── config.json └── README.md /.python-version: -------------------------------------------------------------------------------- 1 | 3.10 2 | -------------------------------------------------------------------------------- /.semgrepignore: -------------------------------------------------------------------------------- 1 | **/uv.lock 2 | -------------------------------------------------------------------------------- /examples/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2024 Intel Corporation 2 | # SPDX-License-Identifier: Apache-2.0 3 | -------------------------------------------------------------------------------- /tests/accuracy/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2024 Intel Corporation 2 | # SPDX-License-Identifier: Apache-2.0 3 | -------------------------------------------------------------------------------- /tests/precommit/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2024 Intel Corporation 2 | # SPDX-License-Identifier: Apache-2.0 3 | -------------------------------------------------------------------------------- /src/model_api/__init__.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (C) 2020-2024 Intel Corporation 3 | # SPDX-License-Identifier: Apache-2.0 4 | # 5 | -------------------------------------------------------------------------------- /tests/functional/__init__.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (C) 2024 Intel Corporation 3 | # SPDX-License-Identifier: Apache-2.0 4 | # 5 | -------------------------------------------------------------------------------- /tests/unit/visualizer/__init__.py: -------------------------------------------------------------------------------- 1 | """Visualization tests.""" 2 | 3 | # Copyright (C) 2024-2025 Intel Corporation 4 | # SPDX-License-Identifier: Apache-2.0 5 | -------------------------------------------------------------------------------- /docs/source/guides/index.md: -------------------------------------------------------------------------------- 1 | # Guides 2 | 3 | ```{toctree} 4 | :caption: Guides 5 | :hidden: 6 | 7 | ./model-configuration 8 | ./performance_metrics 9 | ``` 10 | -------------------------------------------------------------------------------- /docs/source/models/ssd.md: -------------------------------------------------------------------------------- 1 | # Ssd 2 | 3 | ```{eval-rst} 4 | .. automodule:: model_api.models.ssd 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | ``` 9 | -------------------------------------------------------------------------------- /docs/source/models/yolo.md: -------------------------------------------------------------------------------- 1 | # Yolo 2 | 3 | ```{eval-rst} 4 | .. automodule:: model_api.models.yolo 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | ``` 9 | -------------------------------------------------------------------------------- /docs/source/adapters/utils.md: -------------------------------------------------------------------------------- 1 | # Utils 2 | 3 | ```{eval-rst} 4 | .. automodule:: model_api.adapters.utils 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | ``` 9 | -------------------------------------------------------------------------------- /docs/source/models/model.md: -------------------------------------------------------------------------------- 1 | # Model 2 | 3 | ```{eval-rst} 4 | .. automodule:: model_api.models.model 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | ``` 9 | -------------------------------------------------------------------------------- /docs/source/models/types.md: -------------------------------------------------------------------------------- 1 | # Types 2 | 3 | ```{eval-rst} 4 | .. automodule:: model_api.models.types 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | ``` 9 | -------------------------------------------------------------------------------- /docs/source/models/utils.md: -------------------------------------------------------------------------------- 1 | # Utils 2 | 3 | ```{eval-rst} 4 | .. automodule:: model_api.models.utils 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | ``` 9 | -------------------------------------------------------------------------------- /docs/source/tilers/tiler.md: -------------------------------------------------------------------------------- 1 | # Tiler 2 | 3 | ```{eval-rst} 4 | .. automodule:: model_api.tilers.tiler 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | ``` 9 | -------------------------------------------------------------------------------- /docs/source/tilers/detection.md: -------------------------------------------------------------------------------- 1 | # Detection 2 | 3 | ```{eval-rst} 4 | .. automodule:: model_api.tilers.detection 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | ``` 9 | -------------------------------------------------------------------------------- /docs/source/models/image_model.md: -------------------------------------------------------------------------------- 1 | # Image Model 2 | 3 | ```{eval-rst} 4 | .. automodule:: model_api.models.image_model 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | ``` 9 | -------------------------------------------------------------------------------- /docs/source/models/sam_models.md: -------------------------------------------------------------------------------- 1 | # Sam Models 2 | 3 | ```{eval-rst} 4 | .. automodule:: model_api.models.sam_models 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | ``` 9 | -------------------------------------------------------------------------------- /docs/source/adapters/onnx_adapter.md: -------------------------------------------------------------------------------- 1 | # Onnx Adapter 2 | 3 | ```{eval-rst} 4 | .. automodule:: model_api.adapters.onnx_adapter 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | ``` 9 | -------------------------------------------------------------------------------- /docs/source/models/visual_prompting.md: -------------------------------------------------------------------------------- 1 | # Visual Prompting 2 | 3 | ```{eval-rst} 4 | .. automodule:: model_api.models.visual_prompting 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | ``` 9 | -------------------------------------------------------------------------------- /docs/source/pipelines/async_pipeline.md: -------------------------------------------------------------------------------- 1 | # Async Pipeline 2 | 3 | ```{eval-rst} 4 | .. automodule:: model_api.pipelines.async_pipeline 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | ``` 9 | -------------------------------------------------------------------------------- /docs/source/adapters/openvino_adapter.md: -------------------------------------------------------------------------------- 1 | # Openvino Adapter 2 | 3 | ```{eval-rst} 4 | .. automodule:: model_api.adapters.openvino_adapter 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | ``` 9 | -------------------------------------------------------------------------------- /docs/source/adapters/inference_adapter.md: -------------------------------------------------------------------------------- 1 | # Inference Adapter 2 | 3 | ```{eval-rst} 4 | .. automodule:: model_api.adapters.inference_adapter 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | ``` 9 | -------------------------------------------------------------------------------- /src/model_api/pipelines/__init__.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (C) 2020-2024 Intel Corporation 3 | # SPDX-License-Identifier: Apache-2.0 4 | # 5 | from .async_pipeline import AsyncPipeline 6 | 7 | __all__ = ["AsyncPipeline"] 8 | -------------------------------------------------------------------------------- /docs/source/models/action_classification.md: -------------------------------------------------------------------------------- 1 | # Action Classification 2 | 3 | ```{eval-rst} 4 | .. automodule:: model_api.models.action_classification 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | ``` 9 | -------------------------------------------------------------------------------- /docs/source/tilers/instance_segmentation.md: -------------------------------------------------------------------------------- 1 | # Instance Segmentation 2 | 3 | ```{eval-rst} 4 | .. automodule:: model_api.tilers.instance_segmentation 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | ``` 9 | -------------------------------------------------------------------------------- /docs/source/tilers/semantic_segmentation.md: -------------------------------------------------------------------------------- 1 | # Semantic Segmentation 2 | 3 | ```{eval-rst} 4 | .. automodule:: model_api.tilers.semantic_segmentation 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | ``` 9 | -------------------------------------------------------------------------------- /src/model_api/models/result/base.py: -------------------------------------------------------------------------------- 1 | """Base result type""" 2 | 3 | # Copyright (C) 2024 Intel Corporation 4 | # SPDX-License-Identifier: Apache-2.0 5 | 6 | from abc import ABC 7 | 8 | 9 | class Result(ABC): 10 | """Base result type.""" 11 | -------------------------------------------------------------------------------- /src/model_api/metrics/__init__.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (C) 2025 Intel Corporation 3 | # SPDX-License-Identifier: Apache-2.0 4 | # 5 | 6 | from .performance import PerformanceMetrics 7 | from .time_stat import TimeStat 8 | 9 | __all__ = [ 10 | "PerformanceMetrics", 11 | "TimeStat", 12 | ] 13 | -------------------------------------------------------------------------------- /.github/CODEOWNERS: -------------------------------------------------------------------------------- 1 | * @open-edge-platform/model_api-maintain 2 | 3 | # CI and security 4 | .github/workflows/ @open-edge-platform/geti-ci-maintain 5 | .github/renovate.json5 @open-edge-platform/geti-ci-maintain 6 | .semgrepignore @open-edge-platform/geti-ci-maintain 7 | security.md @open-edge-platform/geti-ci-maintain 8 | -------------------------------------------------------------------------------- /src/model_api/visualizer/layout/__init__.py: -------------------------------------------------------------------------------- 1 | """Visualization Layout.""" 2 | 3 | # Copyright (C) 2024-2025 Intel Corporation 4 | # SPDX-License-Identifier: Apache-2.0 5 | 6 | from .flatten import Flatten 7 | from .hstack import HStack 8 | from .layout import Layout 9 | 10 | __all__ = ["Flatten", "HStack", "Layout"] 11 | -------------------------------------------------------------------------------- /docs/source/pipelines/index.md: -------------------------------------------------------------------------------- 1 | # Pipelines 2 | 3 | ::::{grid} 1 2 2 3 4 | :margin: 1 1 0 0 5 | :gutter: 1 6 | 7 | :::{grid-item-card} Async Pipeline 8 | :link: ./async_pipeline 9 | :link-type: doc 10 | 11 | [todo] 12 | ::: 13 | 14 | :::: 15 | 16 | ```{toctree} 17 | :caption: Pipelines 18 | :hidden: 19 | 20 | ./async_pipeline 21 | ``` 22 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/question.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Question 3 | about: Ask any question about this repository 4 | title: "" 5 | labels: question 6 | assignees: "" 7 | --- 8 | 9 | 14 | -------------------------------------------------------------------------------- /src/model_api/visualizer/scene/segmentation/__init__.py: -------------------------------------------------------------------------------- 1 | """Segmentation Scene.""" 2 | 3 | # Copyright (C) 2025 Intel Corporation 4 | # SPDX-License-Identifier: Apache-2.0 5 | 6 | from .instance_segmentation import InstanceSegmentationScene 7 | from .segmentation import SegmentationScene 8 | 9 | __all__ = [ 10 | "InstanceSegmentationScene", 11 | "SegmentationScene", 12 | ] 13 | -------------------------------------------------------------------------------- /tests/functional/conftest.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (C) 2024 Intel Corporation 3 | # SPDX-License-Identifier: Apache-2.0 4 | # 5 | 6 | import pytest 7 | 8 | 9 | def pytest_addoption(parser): 10 | parser.addoption("--data", action="store", help="data folder with dataset") 11 | 12 | 13 | @pytest.fixture(scope="session") 14 | def data(pytestconfig): 15 | return pytestconfig.getoption("data") 16 | -------------------------------------------------------------------------------- /SECURITY.md: -------------------------------------------------------------------------------- 1 | # Security Policy 2 | 3 | Intel is committed to rapidly addressing security vulnerabilities affecting our customers and providing clear guidance on the solution, impact, severity and mitigation. 4 | 5 | ## Reporting a Vulnerability 6 | 7 | Please report any security vulnerabilities in this project utilizing the guidelines [here](https://www.intel.com/content/www/us/en/security-center/vulnerability-handling-guidelines.html). 8 | -------------------------------------------------------------------------------- /src/model_api/visualizer/scene/visual_prompting.py: -------------------------------------------------------------------------------- 1 | """Visual Prompting Scene.""" 2 | 3 | # Copyright (C) 2024 Intel Corporation 4 | # SPDX-License-Identifier: Apache-2.0 5 | 6 | from model_api.models.result import VisualPromptingResult 7 | 8 | from .scene import Scene 9 | 10 | 11 | class VisualPromptingScene(Scene): 12 | """Visual Prompting Scene.""" 13 | 14 | def __init__(self, result: VisualPromptingResult) -> None: 15 | self.result = result 16 | -------------------------------------------------------------------------------- /src/model_api/visualizer/primitive/__init__.py: -------------------------------------------------------------------------------- 1 | """Primitive classes.""" 2 | 3 | # Copyright (C) 2025 Intel Corporation 4 | # SPDX-License-Identifier: Apache-2.0 5 | 6 | from .bounding_box import BoundingBox 7 | from .keypoints import Keypoint 8 | from .label import Label 9 | from .overlay import Overlay 10 | from .polygon import Polygon 11 | from .primitive import Primitive 12 | 13 | __all__ = ["Primitive", "BoundingBox", "Label", "Overlay", "Polygon", "Keypoint"] 14 | -------------------------------------------------------------------------------- /tests/accuracy/images.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "name": "images/0002.png" 4 | }, 5 | { 6 | "name": "images/083.jpg" 7 | }, 8 | { 9 | "name": "images/16.jpg" 10 | }, 11 | { 12 | "name": "images/BloodImage_00007.jpg" 13 | }, 14 | { 15 | "name": "images/dog_50.jpg" 16 | }, 17 | { 18 | "name": "images/Slide4.jpg" 19 | }, 20 | { 21 | "name": "images/Slide4.PNG" 22 | }, 23 | { 24 | "name": "images/cards.png" 25 | } 26 | ] 27 | -------------------------------------------------------------------------------- /docs/source/models/anomaly.md: -------------------------------------------------------------------------------- 1 | # Anomaly 2 | 3 | The `AnomalyModel` is a generic OpenVINO model that aims to provide a single interface for all the exported models based on [Anomalib](https://github.com/open-edge-platform/anomalib). 4 | 5 | Currently, the `AnomalyModel` supports the following models: 6 | 7 | - Padim 8 | - STFPM 9 | 10 | ```{eval-rst} 11 | .. automodule:: model_api.models.anomaly 12 | :members: 13 | :undoc-members: 14 | :show-inheritance: 15 | ``` 16 | -------------------------------------------------------------------------------- /src/model_api/models/result/utils.py: -------------------------------------------------------------------------------- 1 | """Result utils.""" 2 | 3 | # Copyright (C) 2024 Intel Corporation 4 | # SPDX-License-Identifier: Apache-2.0 5 | 6 | from __future__ import annotations 7 | 8 | from typing import TYPE_CHECKING 9 | 10 | if TYPE_CHECKING: 11 | import numpy as np 12 | 13 | 14 | def array_shape_to_str(array: np.ndarray | None) -> str: 15 | if array is not None: 16 | return f"[{','.join(str(i) for i in array.shape)}]" 17 | return "[]" 18 | -------------------------------------------------------------------------------- /examples/visualization/README.md: -------------------------------------------------------------------------------- 1 | # Visualization Example 2 | 3 | This example demonstrates how to use the Visualizer in VisionAPI. 4 | 5 | ## Prerequisites 6 | 7 | Install Model API from source. Please refer to the main [README](../../../README.md) for details. 8 | 9 | ## Run example 10 | 11 | To run the example, please execute the following command: 12 | 13 | ```bash 14 | python run.py --image --model .xml --output 15 | ``` 16 | -------------------------------------------------------------------------------- /src/model_api/tilers/__init__.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (C) 2020-2024 Intel Corporation 3 | # SPDX-License-Identifier: Apache-2.0 4 | # 5 | 6 | from .detection import DetectionTiler 7 | from .instance_segmentation import InstanceSegmentationTiler 8 | from .semantic_segmentation import SemanticSegmentationTiler 9 | from .tiler import Tiler 10 | 11 | __all__ = [ 12 | "DetectionTiler", 13 | "InstanceSegmentationTiler", 14 | "Tiler", 15 | "SemanticSegmentationTiler", 16 | ] 17 | -------------------------------------------------------------------------------- /tests/functional/test_load.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (C) 2020-2024 Intel Corporation 3 | # SPDX-License-Identifier: Apache-2.0 4 | # 5 | 6 | from pathlib import Path 7 | 8 | from model_api.models import Model 9 | 10 | 11 | def test_model_with_unnamed_output_load(data): 12 | # the model's output doesn't have a name 13 | _ = Model.create_model( 14 | Path(data) / "otx_models/tinynet_imagenet.xml", 15 | model_type="Classification", 16 | preload=True, 17 | ) 18 | -------------------------------------------------------------------------------- /src/model_api/visualizer/primitive/primitive.py: -------------------------------------------------------------------------------- 1 | """Base class for primitives.""" 2 | 3 | # Copyright (C) 2025 Intel Corporation 4 | # SPDX-License-Identifier: Apache-2.0 5 | 6 | from __future__ import annotations 7 | 8 | from abc import ABC, abstractmethod 9 | from typing import TYPE_CHECKING 10 | 11 | if TYPE_CHECKING: 12 | import PIL 13 | 14 | 15 | class Primitive(ABC): 16 | """Base class for primitives.""" 17 | 18 | @abstractmethod 19 | def compute(self, image: PIL.Image) -> PIL.Image: 20 | """Compute the primitive.""" 21 | -------------------------------------------------------------------------------- /src/model_api/visualizer/__init__.py: -------------------------------------------------------------------------------- 1 | """Visualizer.""" 2 | 3 | # Copyright (C) 2024-2025 Intel Corporation 4 | # SPDX-License-Identifier: Apache-2.0 5 | 6 | from .layout import Flatten, HStack, Layout 7 | from .primitive import BoundingBox, Keypoint, Label, Overlay, Polygon 8 | from .scene import Scene 9 | from .visualizer import Visualizer 10 | 11 | __all__ = [ 12 | "BoundingBox", 13 | "Keypoint", 14 | "Label", 15 | "Overlay", 16 | "Polygon", 17 | "Scene", 18 | "Visualizer", 19 | "Layout", 20 | "Flatten", 21 | "HStack", 22 | ] 23 | -------------------------------------------------------------------------------- /tests/unit/models/test_types.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (C) 2025 Intel Corporation 3 | # SPDX-License-Identifier: Apache-2.0 4 | # 5 | 6 | from model_api.models.types import ListValue 7 | 8 | 9 | def test_string_list_parameter(): 10 | str_list = ListValue( 11 | value_type=str, 12 | description="List of strings", 13 | default_value=["label1", "label2", "label3"], 14 | ) 15 | assert str_list.value_type is str 16 | 17 | parsed_list = str_list.from_str("1 2 3") 18 | 19 | assert len(parsed_list) == 3 20 | assert type(parsed_list[0]) is str 21 | -------------------------------------------------------------------------------- /src/model_api/adapters/__init__.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (C) 2020-2024 Intel Corporation 3 | # SPDX-License-Identifier: Apache-2.0 4 | # 5 | 6 | from .onnx_adapter import ONNXRuntimeAdapter 7 | from .openvino_adapter import OpenvinoAdapter, create_core, get_user_config 8 | from .utils import INTERPOLATION_TYPES, RESIZE_TYPES, InputTransform, Layout 9 | 10 | __all__ = [ 11 | "create_core", 12 | "get_user_config", 13 | "Layout", 14 | "OpenvinoAdapter", 15 | "ONNXRuntimeAdapter", 16 | "RESIZE_TYPES", 17 | "InputTransform", 18 | "INTERPOLATION_TYPES", 19 | ] 20 | -------------------------------------------------------------------------------- /examples/asynchronous_api/README.md: -------------------------------------------------------------------------------- 1 | # Asynchronous API example 2 | 3 | This example demonstrates how to use a Python API of OpenVINO Model API for asynchronous inference and its basic steps: 4 | 5 | - Instantiate a model 6 | - Define a callback function for results processing 7 | - Run inference 8 | - Fetch and process results 9 | 10 | ## Prerequisites 11 | 12 | Install Model API from source. Please refer to the main [README](../../../README.md) for details. 13 | 14 | ## Run example 15 | 16 | To run the example, please execute the following command: 17 | 18 | ```bash 19 | python run.py 20 | ``` 21 | -------------------------------------------------------------------------------- /tests/unit/results/test_det_result.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (C) 2025 Intel Corporation 3 | # SPDX-License-Identifier: Apache-2.0 4 | # 5 | 6 | import numpy as np 7 | 8 | from model_api.models.result import DetectionResult 9 | 10 | 11 | def test_cls_result(): 12 | tst_vector = np.array([1, 2, 3, 4], dtype=np.float32) 13 | det_result = DetectionResult( 14 | tst_vector, 15 | tst_vector, 16 | tst_vector, 17 | ["a"], 18 | tst_vector, 19 | tst_vector, 20 | ) 21 | 22 | assert det_result.labels.dtype == np.int32 23 | assert len(det_result.label_names) == 1 24 | -------------------------------------------------------------------------------- /.markdownlint.yaml: -------------------------------------------------------------------------------- 1 | # Default state for all rules 2 | default: true 3 | 4 | # Path to configuration file to extend 5 | extends: null 6 | 7 | # MD001/heading-increment/header-increment - Heading levels should only increment by one level at a time 8 | MD001: true 9 | 10 | # MD013/line-length - Line length 11 | MD013: 12 | # Number of characters 13 | line_length: 1000 14 | 15 | # This is not useful for some files such as `CHANGELOG.md` 16 | MD024: 17 | # Only check sibling headings 18 | siblings_only: true 19 | 20 | MD033: false 21 | 22 | # If a page is printed, it helps if the URL is viewable. 23 | MD034: false # Bare URL used 24 | -------------------------------------------------------------------------------- /.github/workflows/pr-labeler.yml: -------------------------------------------------------------------------------- 1 | ##### 2 | # This workflow is triggered by pull_request_target event. 3 | # Never checkout the PR and run ANY local code on it. 4 | ##### 5 | 6 | name: "Pull Request Labeler" 7 | permissions: {} # No permissions by default on workflow level 8 | on: 9 | - pull_request_target # zizmor: ignore[dangerous-triggers] 10 | 11 | jobs: 12 | labeler: 13 | permissions: 14 | contents: read 15 | pull-requests: write 16 | runs-on: ubuntu-latest 17 | steps: 18 | - uses: actions/labeler@634933edcd8ababfe52f92936142cc22ac488b1b # v6.0.1 19 | with: 20 | repo-token: "${{ secrets.GITHUB_TOKEN }}" 21 | -------------------------------------------------------------------------------- /src/model_api/models/result/keypoint.py: -------------------------------------------------------------------------------- 1 | """Keypoint result type.""" 2 | 3 | # Copyright (C) 2024 Intel Corporation 4 | # SPDX-License-Identifier: Apache-2.0 5 | 6 | import numpy as np 7 | 8 | from .base import Result 9 | 10 | 11 | class DetectedKeypoints(Result): 12 | def __init__(self, keypoints: np.ndarray, scores: np.ndarray) -> None: 13 | self.keypoints = keypoints 14 | self.scores = scores 15 | 16 | def __str__(self): 17 | return ( 18 | f"keypoints: {self.keypoints.shape}, " 19 | f"keypoints_x_sum: {np.sum(self.keypoints[:, :1]):.3f}, " 20 | f"scores: {self.scores.shape} {np.sum(self.scores):.3f}" 21 | ) 22 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line, and also 5 | # from the environment for the first two. 6 | SPHINXOPTS ?= 7 | SPHINXBUILD ?= sphinx-build 8 | SOURCEDIR = source 9 | BUILDDIR = build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile 16 | 17 | # Catch-all target: route all unknown targets to Sphinx using the new 18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 19 | %: Makefile 20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 21 | -------------------------------------------------------------------------------- /docs/source/adapters/index.md: -------------------------------------------------------------------------------- 1 | # Adapters 2 | 3 | ::::{grid} 1 2 2 3 4 | :margin: 1 1 0 0 5 | :gutter: 1 6 | 7 | :::{grid-item-card} Utils 8 | :link: ./utils 9 | :link-type: doc 10 | 11 | [todo] 12 | ::: 13 | 14 | [todo] 15 | ::: 16 | :::{grid-item-card} Onnx Adapter 17 | :link: ./onnx_adapter 18 | :link-type: doc 19 | 20 | [todo] 21 | ::: 22 | :::{grid-item-card} Inference Adapter 23 | :link: ./inference_adapter 24 | :link-type: doc 25 | 26 | [todo] 27 | ::: 28 | :::{grid-item-card} Openvino Adapter 29 | :link: ./openvino_adapter 30 | :link-type: doc 31 | 32 | [todo] 33 | ::: 34 | 35 | :::: 36 | 37 | ```{toctree} 38 | :caption: Adapters 39 | :hidden: 40 | 41 | ./inference_adapter 42 | ./onnx_adapter 43 | ./openvino_adapter 44 | ./utils 45 | ``` 46 | -------------------------------------------------------------------------------- /src/model_api/visualizer/scene/__init__.py: -------------------------------------------------------------------------------- 1 | """Result visualization Scene.""" 2 | 3 | # Copyright (C) 2024 Intel Corporation 4 | # SPDX-License-Identifier: Apache-2.0 5 | 6 | from .anomaly import AnomalyScene 7 | from .classification import ClassificationScene 8 | from .detection import DetectionScene 9 | from .keypoint import KeypointScene 10 | from .scene import Scene 11 | from .segmentation import InstanceSegmentationScene, SegmentationScene 12 | from .visual_prompting import VisualPromptingScene 13 | 14 | __all__ = [ 15 | "AnomalyScene", 16 | "ClassificationScene", 17 | "DetectionScene", 18 | "InstanceSegmentationScene", 19 | "KeypointScene", 20 | "Scene", 21 | "SegmentationScene", 22 | "VisualPromptingScene", 23 | ] 24 | -------------------------------------------------------------------------------- /.github/labeler.yml: -------------------------------------------------------------------------------- 1 | # See help here: https://github.com/marketplace/actions/labeler 2 | 3 | python: 4 | - changed-files: 5 | - any-glob-to-any-file: 6 | - src/** 7 | 8 | tests: 9 | - changed-files: 10 | - any-glob-to-any-file: 11 | - tests/** 12 | 13 | docs: 14 | - changed-files: 15 | - any-glob-to-any-file: 16 | - docs/** 17 | - "**/*.md" 18 | - "LICENSE" 19 | 20 | build: 21 | - changed-files: 22 | - any-glob-to-any-file: 23 | - ".github/**/*" 24 | - ".pre-commit-config.yaml" 25 | - "pyproject.toml" 26 | - "**/CMakeLists.txt" 27 | 28 | samples: 29 | - changed-files: 30 | - any-glob-to-any-file: 31 | - examples/** 32 | -------------------------------------------------------------------------------- /docs/source/tilers/index.md: -------------------------------------------------------------------------------- 1 | # Tilers 2 | 3 | ::::{grid} 1 2 2 3 4 | :margin: 1 1 0 0 5 | :gutter: 1 6 | 7 | :::{grid-item-card} Semantic Segmentation 8 | :link: ./semantic_segmentation 9 | :link-type: doc 10 | 11 | [todo] 12 | ::: 13 | 14 | :::{grid-item-card} Instance Segmentation 15 | :link: ./instance_segmentation 16 | :link-type: doc 17 | 18 | [todo] 19 | ::: 20 | 21 | :::{grid-item-card} Detection 22 | :link: ./detection 23 | :link-type: doc 24 | 25 | [todo] 26 | ::: 27 | 28 | :::{grid-item-card} Tiler 29 | :link: ./tiler 30 | :link-type: doc 31 | 32 | [todo] 33 | ::: 34 | 35 | :::: 36 | 37 | ```{toctree} 38 | :caption: Tilers 39 | :hidden: 40 | 41 | ./tiler 42 | ./detection 43 | ./instance_segmentation 44 | ./semantic_segmentation 45 | ``` 46 | -------------------------------------------------------------------------------- /tests/unit/results/test_sseg_result.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (C) 2025 Intel Corporation 3 | # SPDX-License-Identifier: Apache-2.0 4 | # 5 | 6 | import numpy as np 7 | 8 | from model_api.models.result import Contour 9 | 10 | 11 | def test_contour_type(): 12 | contour = Contour( 13 | shape=[(100, 100)], 14 | label=1, 15 | probability=0.9, 16 | excluded_shapes=[[(50, 50)], [(60, 60)]], 17 | ) 18 | 19 | assert isinstance(contour.shape, np.ndarray) 20 | assert isinstance(contour.excluded_shapes, list) 21 | assert isinstance(contour.excluded_shapes[0], np.ndarray) 22 | assert contour.label == 1 23 | assert contour.probability == 0.9 24 | assert np.array_equal(contour.excluded_shapes, np.array([[(50, 50)], [(60, 60)]])) 25 | -------------------------------------------------------------------------------- /examples/synchronous_api/README.md: -------------------------------------------------------------------------------- 1 | # Synchronous API example 2 | 3 | This example demonstrates how to use a Python API of OpenVINO Model API for synchronous inference as well as basic features such as: 4 | 5 | - Automatic download and convertion of public models 6 | - Preprocessing embedding 7 | - Creating model from local source 8 | - Image Classification, Object Detection and Semantic Segmentation use cases 9 | 10 | ## Prerequisites 11 | 12 | Install Model API from source. Please refer to the main [README](../../../README.md) for details. 13 | 14 | ## Run example 15 | 16 | To run the example, please execute the following command: 17 | 18 | ```bash 19 | python run.py 20 | ``` 21 | 22 | > _NOTE_: results of Semantic Segmentation models are saved to `mask.png` file. 23 | -------------------------------------------------------------------------------- /tests/unit/visualizer/conftest.py: -------------------------------------------------------------------------------- 1 | """Conftest for visualization tests.""" 2 | 3 | # Copyright (C) 2024 Intel Corporation 4 | # SPDX-License-Identifier: Apache-2.0 5 | 6 | import numpy as np 7 | import pytest 8 | from PIL import Image 9 | 10 | from model_api.visualizer import Overlay, Scene 11 | 12 | 13 | @pytest.fixture(scope="session") 14 | def mock_image(): 15 | data = np.zeros((100, 100, 3), dtype=np.uint8) 16 | data *= 255 17 | return Image.fromarray(data) 18 | 19 | 20 | @pytest.fixture(scope="session") 21 | def mock_scene(mock_image: Image) -> Scene: 22 | """Mock scene.""" 23 | overlay = np.zeros((100, 100, 3), dtype=np.uint8) 24 | overlay[50, 50] = [255, 0, 0] 25 | return Scene( 26 | base=mock_image, 27 | overlay=Overlay(overlay), 28 | ) 29 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug report 3 | about: Create a report to help us improve 4 | title: "" 5 | labels: bug 6 | assignees: "" 7 | --- 8 | 9 | 12 | 13 | ### Describe the bug 14 | 15 | 20 | 21 | ### Steps to Reproduce 22 | 23 | 1. 24 | 2. 25 | 3. 26 | 4. 27 | 28 | ### Environment 29 | 30 | - OS: 31 | - OpenVINO version: 32 | - Inference device model and memory: 33 | 36 | -------------------------------------------------------------------------------- /src/model_api/visualizer/layout/layout.py: -------------------------------------------------------------------------------- 1 | """Visualization Layout.""" 2 | 3 | # Copyright (C) 2024-2025 Intel Corporation 4 | # SPDX-License-Identifier: Apache-2.0 5 | 6 | from __future__ import annotations 7 | 8 | from abc import ABC, abstractmethod 9 | from typing import TYPE_CHECKING, Type 10 | 11 | if TYPE_CHECKING: 12 | import PIL 13 | 14 | from model_api.visualizer.primitive import Primitive 15 | 16 | from .scene import Scene 17 | 18 | 19 | class Layout(ABC): 20 | """Base class for layouts.""" 21 | 22 | @abstractmethod 23 | def _compute_on_primitive(self, primitive: Type[Primitive], image: PIL.Image, scene: Scene) -> PIL.Image | None: 24 | pass 25 | 26 | @abstractmethod 27 | def __call__(self, scene: Scene) -> PIL.Image: 28 | """Compute the layout.""" 29 | -------------------------------------------------------------------------------- /tests/unit/results/test_cls_result.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (C) 2020-2024 Intel Corporation 3 | # SPDX-License-Identifier: Apache-2.0 4 | # 5 | 6 | import numpy as np 7 | 8 | from model_api.models.result import ClassificationResult, Label 9 | 10 | 11 | def test_cls_result(): 12 | label = Label(1, "label", 0.5) 13 | tst_vector = np.array([1, 2, 3]) 14 | cls_result = ClassificationResult([label], tst_vector, tst_vector, tst_vector) 15 | 16 | assert cls_result.top_labels[0].id == 1 17 | assert cls_result.top_labels[0].name == "label" 18 | assert cls_result.top_labels[0].confidence == 0.5 19 | assert str(cls_result) == "1 (label): 0.500, [3], [3], [3]" 20 | assert cls_result.top_labels[0].__str__() == "1 (label): 0.500" 21 | assert tuple(cls_result.top_labels[0].__iter__()) == (1, "label", 0.5) 22 | -------------------------------------------------------------------------------- /docs/source/models/segmentation.md: -------------------------------------------------------------------------------- 1 | # Segmentation 2 | 3 | The `SegmentationModel` is the OpenVINO wrapper for models exported from [OpenVINO Training Extensions](https://github.com/openvinotoolkit/training_extensions). It produces a segmentation mask for the input image. 4 | 5 | ## Model Specifications 6 | 7 | ### Inputs 8 | 9 | A single input image of shape (H, W, 3) where H and W are the height and width of the image, respectively. 10 | 11 | ### Outputs 12 | 13 | - `resultImage`: Image with the segmentation mask. 14 | - `soft_prediction`: Soft prediction of the segmentation model. 15 | - `saliency_map`: Saliency map of the input image. 16 | - `feature_vector`: Feature vector of the input image. This is useful for Active Learning. 17 | 18 | ```{eval-rst} 19 | .. automodule:: model_api.models.segmentation 20 | :members: 21 | :undoc-members: 22 | :show-inheritance: 23 | ``` 24 | -------------------------------------------------------------------------------- /tests/precommit/public_scope.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "name": "otx_models/detection_model_with_xai_head.xml", 4 | "type": "DetectionModel" 5 | }, 6 | { 7 | "name": "otx_models/mlc_mobilenetv3_large_voc.xml", 8 | "type": "ClassificationModel" 9 | }, 10 | { 11 | "name": "otx_models/Lite-hrnet-18_mod2.xml", 12 | "type": "SegmentationModel" 13 | }, 14 | { 15 | "name": "otx_models/cls_mobilenetv3_large_cars.onnx", 16 | "type": "ClassificationModel" 17 | }, 18 | { 19 | "name": "otx_models/tinynet_imagenet.xml", 20 | "type": "ClassificationModel" 21 | }, 22 | { 23 | "name": "anomalib_models/padim.xml", 24 | "type": "AnomalyDetection" 25 | }, 26 | { 27 | "name": "anomalib_models/stfpm.xml", 28 | "type": "AnomalyDetection" 29 | }, 30 | { 31 | "name": "anomalib_models/uflow.xml", 32 | "type": "AnomalyDetection" 33 | } 34 | ] 35 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Feature request 3 | about: Suggest an idea for this project 4 | title: "" 5 | labels: enhancement 6 | assignees: "" 7 | --- 8 | 9 | 12 | 13 | ### Is your feature request related to a problem? Please describe 14 | 15 | 18 | 19 | ### Describe the solution you'd like to propose 20 | 21 | 24 | 25 | ### Describe alternatives you've considered 26 | 27 | 30 | 31 | ### Additional context 32 | 33 | 36 | -------------------------------------------------------------------------------- /docs/source/index.md: -------------------------------------------------------------------------------- 1 | # Model API Documentation 2 | 3 | ## API Reference 4 | 5 | ::::{grid} 1 2 2 3 6 | :margin: 1 1 0 0 7 | :gutter: 1 8 | 9 | :::{grid-item-card} {octicon}`plug` Adapters 10 | :link: ./adapters/index 11 | :link-type: doc 12 | 13 | Adapters description[todo] 14 | ::: 15 | 16 | :::{grid-item-card} {octicon}`dependabot` Models 17 | :link: ./models/index 18 | :link-type: doc 19 | 20 | Models description[todo] 21 | ::: 22 | 23 | :::{grid-item-card} {octicon}`git-compare` Pipelines 24 | :link: ./pipelines/index 25 | :link-type: doc 26 | 27 | Pipelines description[todo] 28 | ::: 29 | 30 | :::{grid-item-card} {octicon}`versions` Tilers 31 | :link: ./tilers/index 32 | :link-type: doc 33 | 34 | Tilers description[todo] 35 | ::: 36 | 37 | :::: 38 | 39 | ```{toctree} 40 | :caption: Python Reference 41 | :hidden: 42 | 43 | ./index 44 | ``` 45 | 46 | ```{toctree} 47 | :caption: Developer Guides 48 | :hidden: 49 | 50 | ./guides/index 51 | ``` 52 | -------------------------------------------------------------------------------- /docs/source/models/classification.md: -------------------------------------------------------------------------------- 1 | # Classification 2 | 3 | ## Description 4 | 5 | The `ClassificationModel` is the OpenVINO wrapper for models exported from [OpenVINO Training Extensions](https://github.com/openvinotoolkit/training_extensions). It supports multi-label classification as well as hierarchical classification. 6 | 7 | ## Model Specifications 8 | 9 | ## Inputs 10 | 11 | A single input image of shape (H, W, 3) where H and W are the height and width of the image, respectively. 12 | 13 | ## Outputs 14 | 15 | - `top_labels`: List of tuples containing the top labels of the classification model. 16 | - `saliency_map`: Saliency map of the input image. 17 | - `feature_vector`: Feature vector of the input image. This is useful for Active Learning. 18 | - `raw_scores`: Raw scores of the classification model. 19 | 20 | ```{eval-rst} 21 | .. automodule:: model_api.models.classification 22 | :members: 23 | :undoc-members: 24 | :show-inheritance: 25 | ``` 26 | -------------------------------------------------------------------------------- /src/model_api/models/result/__init__.py: -------------------------------------------------------------------------------- 1 | """Model results.""" 2 | 3 | # Copyright (C) 2024 Intel Corporation 4 | # SPDX-License-Identifier: Apache-2.0 5 | 6 | from .anomaly import AnomalyResult 7 | from .base import Result 8 | from .classification import ClassificationResult, Label 9 | from .detection import DetectionResult 10 | from .keypoint import DetectedKeypoints 11 | from .segmentation import Contour, ImageResultWithSoftPrediction, InstanceSegmentationResult, RotatedSegmentationResult 12 | from .visual_prompting import PredictedMask, VisualPromptingResult, ZSLVisualPromptingResult 13 | 14 | __all__ = [ 15 | "AnomalyResult", 16 | "ClassificationResult", 17 | "Contour", 18 | "DetectionResult", 19 | "DetectedKeypoints", 20 | "ImageResultWithSoftPrediction", 21 | "InstanceSegmentationResult", 22 | "Label", 23 | "PredictedMask", 24 | "Result", 25 | "VisualPromptingResult", 26 | "ZSLVisualPromptingResult", 27 | "RotatedSegmentationResult", 28 | ] 29 | -------------------------------------------------------------------------------- /src/model_api/visualizer/scene/keypoint.py: -------------------------------------------------------------------------------- 1 | """Keypoint Scene.""" 2 | 3 | # Copyright (C) 2024 Intel Corporation 4 | # SPDX-License-Identifier: Apache-2.0 5 | 6 | from typing import Union 7 | 8 | from PIL import Image 9 | 10 | from model_api.models.result import DetectedKeypoints 11 | from model_api.visualizer.layout import Flatten, Layout 12 | from model_api.visualizer.primitive import Keypoint 13 | 14 | from .scene import Scene 15 | 16 | 17 | class KeypointScene(Scene): 18 | """Keypoint Scene.""" 19 | 20 | def __init__(self, image: Image, result: DetectedKeypoints, layout: Union[Layout, None] = None) -> None: 21 | super().__init__( 22 | base=image, 23 | keypoints=self._get_keypoints(result), 24 | layout=layout, 25 | ) 26 | 27 | def _get_keypoints(self, result: DetectedKeypoints) -> list[Keypoint]: 28 | return [Keypoint(result.keypoints, result.scores)] 29 | 30 | @property 31 | def default_layout(self) -> Layout: 32 | return Flatten(Keypoint) 33 | -------------------------------------------------------------------------------- /examples/visualization/run.py: -------------------------------------------------------------------------------- 1 | """Visualization Example.""" 2 | 3 | # Copyright (C) 2025 Intel Corporation 4 | # SPDX-License-Identifier: Apache-2.0 5 | 6 | import argparse 7 | from argparse import Namespace 8 | 9 | import numpy as np 10 | from PIL import Image 11 | 12 | from model_api.models import Model 13 | from model_api.visualizer import Visualizer 14 | 15 | 16 | def main(args: Namespace): 17 | image = Image.open(args.image) 18 | 19 | model = Model.create_model(args.model) 20 | 21 | predictions = model(np.array(image)) 22 | visualizer = Visualizer() 23 | 24 | if args.output: 25 | visualizer.save(image=image, result=predictions, path=args.output) 26 | else: 27 | visualizer.show(image=image, result=predictions) 28 | 29 | 30 | if __name__ == "__main__": 31 | parser = argparse.ArgumentParser() 32 | parser.add_argument("--image", type=str, required=True) 33 | parser.add_argument("--model", type=str, required=True) 34 | parser.add_argument("--output", type=str, required=False) 35 | args = parser.parse_args() 36 | main(args) 37 | -------------------------------------------------------------------------------- /.github/PULL_REQUEST_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | # What does this PR do? 2 | 3 | 12 | 13 | 14 | 15 | Fixes # (issue) 16 | 17 | ## Before submitting 18 | 19 | - [ ] This PR fixes a typo or improves the docs (you can dismiss the other checks if that's the case). 20 | - [ ] Did you make sure to update the documentation with your changes? 21 | - [ ] Did you write any new necessary tests? 22 | -------------------------------------------------------------------------------- /.github/workflows/renovate-config-validator.yml: -------------------------------------------------------------------------------- 1 | # Renovate configuration validator 2 | # 3 | # This workflow validates changes proposed into Renovate configuration file 4 | # (.github/renovate.json5) and prevents non-valid configuration to be used by Renovate. 5 | # 6 | # Required Secrets: 7 | # - None 8 | # 9 | # Automatically triggered on: 10 | # - Pull requests to .github/renovate.json5. 11 | # 12 | 13 | name: Validate Renovate configuration 14 | 15 | on: 16 | pull_request: 17 | paths: 18 | - ".github/renovate.json5" 19 | 20 | permissions: 21 | contents: read 22 | 23 | concurrency: 24 | group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.event.after }} 25 | cancel-in-progress: true 26 | 27 | jobs: 28 | validate: 29 | runs-on: ubuntu-latest 30 | steps: 31 | - name: Checkout configuration 32 | uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1 33 | with: 34 | persist-credentials: false 35 | 36 | - name: Validate configuration 37 | run: | 38 | # renovate: datasource=docker 39 | export RENOVATE_IMAGE=ghcr.io/renovatebot/renovate:40.11 40 | docker run --rm --entrypoint "renovate-config-validator" \ 41 | -v "${{ github.workspace }}/.github/renovate.json5":"/renovate.json5" \ 42 | ${RENOVATE_IMAGE} "/renovate.json5" 43 | -------------------------------------------------------------------------------- /tests/unit/visualizer/test_visualizer.py: -------------------------------------------------------------------------------- 1 | """Tests for visualizer.""" 2 | 3 | # Copyright (C) 2025 Intel Corporation 4 | # SPDX-License-Identifier: Apache-2.0 5 | 6 | from pathlib import Path 7 | 8 | import numpy as np 9 | from PIL import Image 10 | 11 | from model_api.models.result import ( 12 | AnomalyResult, 13 | ) 14 | from model_api.visualizer import Visualizer 15 | 16 | 17 | def test_render(mock_image: Image, tmpdir: Path): 18 | """Test Visualizer.render().""" 19 | heatmap = np.ones(mock_image.size, dtype=np.uint8) 20 | heatmap *= 255 21 | 22 | mask = np.zeros(mock_image.size, dtype=np.uint8) 23 | mask[32:96, 32:96] = 255 24 | mask[40:80, 0:128] = 255 25 | 26 | anomaly_result = AnomalyResult( 27 | anomaly_map=heatmap, 28 | pred_boxes=np.array([[0, 0, 128, 128], [32, 32, 96, 96]]), 29 | pred_label="Anomaly", 30 | pred_mask=mask, 31 | pred_score=0.85, 32 | ) 33 | 34 | visualizer = Visualizer() 35 | rendered_img = visualizer.render(mock_image, anomaly_result) 36 | 37 | assert isinstance(rendered_img, Image.Image) 38 | assert np.array(rendered_img).shape == np.array(mock_image).shape 39 | 40 | rendered_img_np = visualizer.render(np.array(mock_image), anomaly_result) 41 | 42 | assert isinstance(rendered_img_np, np.ndarray) 43 | assert rendered_img_np.shape == np.array(mock_image).shape 44 | -------------------------------------------------------------------------------- /examples/visual_prompting/README.md: -------------------------------------------------------------------------------- 1 | # Segment Anything example 2 | 3 | This example demonstrates how to use a Python API implementation of Segment Anything pipeline inference: 4 | 5 | - Create encoder and decoder models 6 | - Create a visual prompter pipeline 7 | - Use points as prompts 8 | - Visualized result is saved to `sam_result.jpg` 9 | 10 | ## Prerequisites 11 | 12 | Install Model API from source. Please refer to the main [README](../../../README.md) for details. 13 | 14 | ## Run example 15 | 16 | To run the example, please execute the following command: 17 | 18 | ```bash 19 | python run.py 20 | ``` 21 | 22 | where prompts are in X Y format. 23 | 24 | To run the pipeline out-of-the box you can download the test data by running the following command from the repo root: 25 | 26 | ```bash 27 | pip install httpx 28 | python tests/accuracy/download_models.py -d data -j tests/accuracy/public_scope.json -l 29 | ``` 30 | 31 | and then run 32 | 33 | ```bash 34 | python run.py ../../../data/coco128/images/train2017/000000000127.jpg \ 35 | ../../../data/otx_models/sam_vit_b_zsl_encoder.xml ../../../data/otx_models/sam_vit_b_zsl_decoder.xml \ 36 | 274 306 482 295 37 | ``` 38 | 39 | from the sample folder. Here two prompt poinst are passed via CLI: `(274, 306)` and `(482, 295)` 40 | 41 | > _NOTE_: results of segmentation models are saved to `sam_result.jpg` file. 42 | -------------------------------------------------------------------------------- /.github/workflows/scorecards.yml: -------------------------------------------------------------------------------- 1 | name: Scorecards supply-chain security 2 | on: 3 | # For Branch-Protection check. Only the default branch is supported. See 4 | # https://github.com/ossf/scorecard/blob/main/docs/checks.md#branch-protection 5 | branch_protection_rule: 6 | schedule: 7 | # Run security checks every day at 2 AM UTC 8 | - cron: "0 2 * * *" 9 | workflow_dispatch: 10 | 11 | permissions: {} 12 | 13 | jobs: 14 | analysis: 15 | name: Scorecards analysis 16 | runs-on: ubuntu-latest 17 | permissions: 18 | # Needed to upload the results to code-scanning dashboard 19 | security-events: write 20 | # Needed to publish results and get a badge 21 | id-token: write 22 | 23 | steps: 24 | - name: Checkout code 25 | uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1 26 | with: 27 | persist-credentials: false 28 | 29 | - name: Run analysis 30 | uses: ossf/scorecard-action@4eaacf0543bb3f2c246792bd56e8cdeffafb205a # v2.4.3 31 | with: 32 | results_file: results.sarif 33 | results_format: sarif 34 | publish_results: true 35 | 36 | # Upload the results to GitHub's code scanning dashboard 37 | - name: Upload to code-scanning 38 | uses: github/codeql-action/upload-sarif@cf1bb45a277cb3c205638b2cd5c984db1c46a412 # v4.31.7 39 | with: 40 | sarif_file: results.sarif 41 | -------------------------------------------------------------------------------- /.github/workflows/codeql.yml: -------------------------------------------------------------------------------- 1 | name: "CodeQL Scan" 2 | 3 | on: 4 | push: 5 | branches: ["master"] 6 | pull_request: 7 | branches: ["master"] 8 | schedule: 9 | - cron: "37 3 * * 0" 10 | 11 | permissions: {} # No permissions by default on workflow level 12 | 13 | jobs: 14 | analyze: 15 | name: Analyze (${{ matrix.language }}) 16 | runs-on: ubuntu-latest 17 | permissions: 18 | security-events: write # required to publish sarif 19 | 20 | strategy: 21 | fail-fast: false 22 | matrix: 23 | include: 24 | - language: actions 25 | build-mode: none 26 | - language: python 27 | build-mode: none 28 | 29 | steps: 30 | - name: Checkout repository 31 | uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1 32 | with: 33 | persist-credentials: false 34 | 35 | # Initializes the CodeQL tools for scanning. 36 | - name: Initialize CodeQL 37 | uses: github/codeql-action/init@cf1bb45a277cb3c205638b2cd5c984db1c46a412 # v4.31.7 38 | with: 39 | languages: ${{ matrix.language }} 40 | build-mode: ${{ matrix.build-mode }} 41 | queries: security-extended 42 | 43 | - name: Perform CodeQL Analysis 44 | uses: github/codeql-action/analyze@cf1bb45a277cb3c205638b2cd5c984db1c46a412 # v4.31.7 45 | with: 46 | category: "/language:${{matrix.language}}" 47 | -------------------------------------------------------------------------------- /docs/source/models/detection_model.md: -------------------------------------------------------------------------------- 1 | # Detection Model 2 | 3 | ## Description 4 | 5 | Detection model aims to detect objects in an image. The model outputs a list of detected objects, each containing a bounding box, score and class label. 6 | 7 | ## OpenVINO Model Specifications 8 | 9 | ### Inputs 10 | 11 | A single input image of shape (H, W, 3) where H and W are the height and width of the image, respectively. 12 | 13 | ### Outputs 14 | 15 | Detection model outputs a `DetectionResult` objects containing the following attributes: 16 | 17 | - `boxes` (np.ndarray) - Bounding boxes of the detected objects. Each in format of x1, y1, x2 y2. 18 | - `scores` (np.ndarray) - Confidence scores of the detected objects. 19 | - `labels` (np.ndarray) - Class labels of the detected objects. 20 | - `label_names` (list[str]) - List of class names of the detected objects. 21 | 22 | ## Example 23 | 24 | ```python 25 | import cv2 26 | from model_api.models import SSD 27 | 28 | # Load the model 29 | model = SSD.create_model("model.xml") 30 | 31 | # Forward pass 32 | predictions = model(image) 33 | 34 | # Iterate over detection result 35 | for box, score, label, label_name in zip( 36 | predictions.boxes, 37 | predictions.scores, 38 | predictions.labels, 39 | predictions.label_names, 40 | ): 41 | print(f"Box: {box}, Score: {score}, Label: {label}, Label Name: {label_name}") 42 | ``` 43 | 44 | ```{eval-rst} 45 | .. automodule:: model_api.models.detection_model 46 | :members: 47 | :undoc-members: 48 | :show-inheritance: 49 | ``` 50 | -------------------------------------------------------------------------------- /examples/asynchronous_api/run.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # 3 | # Copyright (C) 2020-2024 Intel Corporation 4 | # SPDX-License-Identifier: Apache-2.0 5 | # 6 | 7 | import sys 8 | 9 | import cv2 10 | 11 | from model_api.models import DetectionModel 12 | 13 | 14 | def main(): 15 | if len(sys.argv) != 2: 16 | usage_message = f"Usage: {sys.argv[0]} " 17 | raise RuntimeError(usage_message) 18 | 19 | image = cv2.cvtColor(cv2.imread(sys.argv[1]), cv2.COLOR_BGR2RGB) 20 | if image is None: 21 | error_message = f"Failed to read the image: {sys.argv[1]}" 22 | raise RuntimeError(error_message) 23 | 24 | # Create Object Detection model using mode name and download from Open Model Zoo 25 | # Replace numpy preprocessing and embed it directly into a model graph to speed up inference 26 | # download_dir is used to store downloaded model 27 | model = DetectionModel.create_model("yolo-v4-tf") 28 | 29 | ITERATIONS = 10 30 | results = {} # container for results 31 | 32 | def callback(result, userdata): 33 | print(f"Done! Number: {userdata}") 34 | results[userdata] = result 35 | 36 | model.set_callback(callback) 37 | ## Run parallel inference 38 | for i in range(ITERATIONS): 39 | model.infer_async(image, user_data=i) 40 | 41 | model.await_all() 42 | assert len(results) == ITERATIONS 43 | 44 | for i in range(ITERATIONS): 45 | print(f"Request {i}: {results[i]}") 46 | 47 | 48 | if __name__ == "__main__": 49 | main() 50 | -------------------------------------------------------------------------------- /tests/accuracy/conftest.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (C) 2020-2024 Intel Corporation 3 | # SPDX-License-Identifier: Apache-2.0 4 | # 5 | import json 6 | from pathlib import Path 7 | 8 | import pytest 9 | 10 | 11 | def pytest_addoption(parser): 12 | parser.addoption("--data", action="store", help="data folder with dataset") 13 | parser.addoption( 14 | "--model_data", 15 | action="store", 16 | default="public_scope.json", 17 | help="path to model data JSON file for test parameterization", 18 | ) 19 | parser.addoption( 20 | "--device", 21 | action="store", 22 | default="CPU", 23 | help="device to run tests on (in case of OpenvinoAdapter)", 24 | ) 25 | parser.addoption( 26 | "--dump", 27 | action="store_true", 28 | default=False, 29 | help="whether to dump results into json file", 30 | ) 31 | parser.addoption( 32 | "--results-dir", 33 | action="store", 34 | default="", 35 | help="directory to store inference result", 36 | ) 37 | 38 | 39 | def pytest_configure(config): 40 | config.test_results = [] 41 | 42 | 43 | @pytest.hookimpl(tryfirst=True, hookwrapper=True) 44 | def pytest_runtest_makereport(item, call): 45 | outcome = yield 46 | result = outcome.get_result() 47 | 48 | if result.when == "call": 49 | test_results = item.config.test_results 50 | 51 | if not test_results: 52 | return 53 | 54 | with Path("test_scope.json").open("w") as outfile: 55 | json.dump(test_results, outfile, indent=4) 56 | -------------------------------------------------------------------------------- /src/model_api/models/result/anomaly.py: -------------------------------------------------------------------------------- 1 | """Anomaly result type.""" 2 | 3 | # Copyright (C) 2024 Intel Corporation 4 | # SPDX-License-Identifier: Apache-2.0 5 | 6 | from __future__ import annotations 7 | 8 | import numpy as np 9 | 10 | from .base import Result 11 | 12 | 13 | class AnomalyResult(Result): 14 | """Results for anomaly models.""" 15 | 16 | def __init__( 17 | self, 18 | anomaly_map: np.ndarray | None = None, 19 | pred_boxes: np.ndarray | None = None, 20 | pred_label: str | None = None, 21 | pred_mask: np.ndarray | None = None, 22 | pred_score: float | None = None, 23 | ) -> None: 24 | self.anomaly_map = anomaly_map 25 | self.pred_boxes = pred_boxes 26 | self.pred_label = pred_label 27 | self.pred_mask = pred_mask 28 | self.pred_score = pred_score 29 | 30 | def _compute_min_max(self, tensor: np.ndarray) -> tuple[np.ndarray, np.ndarray]: 31 | """Computes min and max values of the tensor.""" 32 | return tensor.min(), tensor.max() 33 | 34 | def __str__(self) -> str: 35 | assert self.anomaly_map is not None 36 | assert self.pred_mask is not None 37 | anomaly_map_min, anomaly_map_max = self._compute_min_max(self.anomaly_map) 38 | pred_mask_min, pred_mask_max = self._compute_min_max(self.pred_mask) 39 | return ( 40 | f"anomaly_map min:{anomaly_map_min} max:{anomaly_map_max};" 41 | f"pred_score:{np.round(self.pred_score, 1) if self.pred_score else 0.0};" 42 | f"pred_label:{self.pred_label};" 43 | f"pred_mask min:{pred_mask_min} max:{pred_mask_max};" 44 | ) 45 | -------------------------------------------------------------------------------- /tests/unit/adapters/test_utils.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (C) 2024-2025 Intel Corporation 3 | # SPDX-License-Identifier: Apache-2.0 4 | # 5 | import cv2 as cv 6 | import numpy as np 7 | import openvino as ov 8 | import pytest 9 | from openvino.preprocess import PrePostProcessor 10 | 11 | from model_api.adapters.utils import ( 12 | resize_image_with_aspect, 13 | resize_image_with_aspect_ocv, 14 | ) 15 | 16 | 17 | @pytest.mark.parametrize( 18 | "img_shape", 19 | [(301, 999, 3), (999, 301, 3), (500, 500, 3), (1024, 768, 3), (768, 1024, 3)], 20 | ) 21 | def test_resize_image_with_aspect_ocv(img_shape): 22 | model_h = 1024 23 | model_w = 1024 24 | pad_value = 0 25 | 26 | param_node = ov.op.Parameter(ov.Type.f32, ov.Shape([1, model_h, model_w, 3])) 27 | model = ov.Model(param_node, [param_node]) 28 | ppp = PrePostProcessor(model) 29 | ppp.input().tensor().set_element_type(ov.Type.u8) 30 | ppp.input().tensor().set_layout(ov.Layout("NHWC")) 31 | ppp.input().tensor().set_shape([1, -1, -1, 3]) 32 | ppp.input().preprocess().custom( 33 | resize_image_with_aspect( 34 | (model_h, model_w), 35 | "linear", 36 | pad_value, 37 | ), 38 | ) 39 | ppp.input().preprocess().convert_element_type(ov.Type.f32) 40 | ov_resize_image_with_aspect = ov.Core().compile_model(ppp.build(), "CPU") 41 | 42 | rng = np.random.default_rng() 43 | img = rng.integers(0, 255, size=img_shape, dtype=np.uint8) 44 | ov_results = next(iter(ov_resize_image_with_aspect(img[None]).values()))[0] 45 | 46 | np_results = resize_image_with_aspect_ocv(img, (model_w, model_h)) 47 | 48 | assert cv.PSNR(np_results.astype(np.float32), ov_results) > 20.0 49 | -------------------------------------------------------------------------------- /src/model_api/visualizer/scene/classification.py: -------------------------------------------------------------------------------- 1 | """Classification Scene.""" 2 | 3 | # Copyright (C) 2024 Intel Corporation 4 | # SPDX-License-Identifier: Apache-2.0 5 | 6 | from typing import Union 7 | 8 | import cv2 9 | from PIL import Image 10 | 11 | from model_api.models.result import ClassificationResult 12 | from model_api.visualizer.layout import Flatten, Layout 13 | from model_api.visualizer.primitive import Label, Overlay 14 | 15 | from .scene import Scene 16 | 17 | 18 | class ClassificationScene(Scene): 19 | """Classification Scene.""" 20 | 21 | def __init__(self, image: Image, result: ClassificationResult, layout: Union[Layout, None] = None) -> None: 22 | super().__init__( 23 | base=image, 24 | label=self._get_labels(result), 25 | overlay=self._get_overlays(result), 26 | layout=layout, 27 | ) 28 | 29 | def _get_labels(self, result: ClassificationResult) -> list[Label]: 30 | labels = [] 31 | if result.top_labels is not None and len(result.top_labels) > 0: 32 | for label in result.top_labels: 33 | if label.name is not None: 34 | labels.append(Label(label=label.name, score=label.confidence)) 35 | return labels 36 | 37 | def _get_overlays(self, result: ClassificationResult) -> list[Overlay]: 38 | overlays = [] 39 | if result.saliency_map is not None and result.saliency_map.size > 0: 40 | saliency_map = cv2.cvtColor(result.saliency_map, cv2.COLOR_BGR2RGB) 41 | overlays.append(Overlay(saliency_map)) 42 | return overlays 43 | 44 | @property 45 | def default_layout(self) -> Layout: 46 | return Flatten(Overlay, Label) 47 | -------------------------------------------------------------------------------- /tests/unit/visualizer/test_layout.py: -------------------------------------------------------------------------------- 1 | """Test layout.""" 2 | 3 | # Copyright (C) 2024 Intel Corporation 4 | # SPDX-License-Identifier: Apache-2.0 5 | 6 | import numpy as np 7 | from PIL import Image 8 | 9 | from model_api.visualizer import Flatten, HStack, Scene 10 | from model_api.visualizer.primitive import Overlay 11 | 12 | 13 | def test_flatten_layout(mock_image: Image, mock_scene: Scene): 14 | """Test if the layout is created correctly.""" 15 | overlay = np.zeros((100, 100, 3), dtype=np.uint8) 16 | overlay[50, 50] = [255, 0, 0] 17 | overlay = Image.fromarray(overlay) 18 | 19 | expected_image = Image.blend(mock_image, overlay, 0.4) 20 | mock_scene.layout = Flatten(Overlay) 21 | assert mock_scene.render() == expected_image 22 | 23 | 24 | def test_flatten_layout_with_no_primitives(mock_image: Image, mock_scene: Scene): 25 | """Test if the layout is created correctly.""" 26 | mock_scene.layout = Flatten() 27 | assert mock_scene.render() == mock_image 28 | 29 | 30 | def test_hstack_layout(): 31 | """Test if the layout is created correctly.""" 32 | blue_overlay = np.zeros((100, 100, 3), dtype=np.uint8) 33 | blue_overlay[50, 50] = [0, 0, 255] 34 | blue_overlay = Image.fromarray(blue_overlay) 35 | 36 | red_overlay = np.zeros((100, 100, 3), dtype=np.uint8) 37 | red_overlay[50, 50] = [255, 0, 0] 38 | red_overlay = Image.fromarray(red_overlay) 39 | 40 | mock_scene = Scene( 41 | base=Image.new("RGB", (100, 100)), 42 | overlay=[Overlay(blue_overlay, opacity=1.0), Overlay(red_overlay, opacity=1.0)], 43 | layout=HStack(Overlay), 44 | ) 45 | 46 | expected_image = Image.new("RGB", (200, 100)) 47 | expected_image.paste(blue_overlay, (0, 0)) 48 | expected_image.paste(red_overlay, (100, 0)) 49 | 50 | assert mock_scene.render() == expected_image 51 | -------------------------------------------------------------------------------- /.github/workflows/test_accuracy.yml: -------------------------------------------------------------------------------- 1 | name: test_accuracy 2 | permissions: {} # No permissions by default on workflow level 3 | on: 4 | pull_request: 5 | merge_group: 6 | branches: 7 | - master 8 | concurrency: 9 | group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} 10 | cancel-in-progress: true 11 | jobs: 12 | test_accuracy_items: 13 | strategy: 14 | fail-fast: false 15 | matrix: 16 | os: 17 | - "ubuntu-24.04" 18 | - "windows-2022" 19 | python-version: 20 | - "3.10" 21 | - "3.11" 22 | - "3.12" 23 | - "3.13" 24 | runs-on: ${{ matrix.os }} 25 | steps: 26 | - uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1 27 | with: 28 | persist-credentials: false 29 | - name: Install uv 30 | uses: astral-sh/setup-uv@ed21f2f24f8dd64503750218de024bcf64c7250a # v7.1.5 31 | with: 32 | enable-cache: false 33 | python-version: ${{ matrix.python-version }} 34 | - name: Install dependencies 35 | run: | 36 | uv sync --locked --extra tests --extra-index-url https://download.pytorch.org/whl/cpu 37 | - name: Prepare test data 38 | run: | 39 | uv run python tests/accuracy/download_models.py -d data -j tests/accuracy/public_scope.json -l 40 | - name: Run Python Test 41 | run: | 42 | uv run pytest --data=./data tests/accuracy/test_accuracy.py 43 | test_accuracy: 44 | runs-on: ubuntu-latest 45 | needs: test_accuracy_items 46 | if: always() 47 | steps: 48 | - name: All tests ok 49 | if: ${{ !(contains(needs.*.result, 'failure')) }} 50 | run: exit 0 51 | - name: Some tests failed 52 | if: ${{ contains(needs.*.result, 'failure') }} 53 | run: exit 1 54 | -------------------------------------------------------------------------------- /examples/zsl_visual_prompting/README.md: -------------------------------------------------------------------------------- 1 | # Zero-shot Segment Anything example 2 | 3 | This example demonstrates how to use a Python API implementation of Zero-shot Segment Anything pipeline inference: 4 | 5 | - Create encoder and decoder models 6 | - Create a zero-shot visual prompter pipeline 7 | - Use points as prompts to learn on one image 8 | - Segment other image using leaned on the previous image representation 9 | - Visualized result is saved to `zsl_sam_result.jpg` 10 | 11 | ## Prerequisites 12 | 13 | Install Model API from source. Please refer to the main [README](../../../README.md) for details. 14 | 15 | ## Run example 16 | 17 | To run the example, please execute the following command: 18 | 19 | ```bash 20 | python run.py -t 21 | ``` 22 | 23 | where prompts are in X Y format. `t` is a threshold to match the reference features from the source image vs features 24 | obtained from the target image. 25 | Higher thresholds leads to lower mask recall in the final prediction, i.e. low confidence masks can be filtered. 26 | 27 | To run the pipeline out-of-the box you can download the test data by running the following command from the repo root: 28 | 29 | ```bash 30 | pip install httpx 31 | python tests/accuracy/download_models.py -d data -j tests/accuracy/public_scope.json -l 32 | ``` 33 | 34 | and then run 35 | 36 | ```bash 37 | python run.py ../../../data/coco128/images/train2017/000000000025.jpg \ 38 | ../../../data/coco128/images/train2017/000000000072.jpg ../../../data/otx_models/sam_vit_b_zsl_encoder.xml \ 39 | ../../../data/otx_models/sam_vit_b_zsl_decoder.xml 464 202 -t 0.7 40 | 41 | ``` 42 | 43 | from the sample folder. Here one prompt is passed via CLI: `(464 202)` 44 | 45 | > _NOTE_: results of segmentation models are saved to `zsl_sam_result.jpg` file. 46 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | default_language_version: 2 | node: 22.10.0 3 | 4 | repos: 5 | - repo: https://github.com/pre-commit/pre-commit-hooks 6 | rev: v4.6.0 7 | hooks: 8 | # list of supported hooks: https://pre-commit.com/hooks.html 9 | - id: trailing-whitespace 10 | - id: end-of-file-fixer 11 | - id: check-yaml 12 | - id: check-added-large-files 13 | - id: debug-statements 14 | - id: detect-private-key 15 | 16 | # Ruff version. 17 | - repo: https://github.com/charliermarsh/ruff-pre-commit 18 | rev: "v0.6.2" 19 | hooks: 20 | # Run the linter. 21 | - id: ruff 22 | args: ["--fix"] 23 | # Run the formatter 24 | - id: ruff-format 25 | 26 | # python static type checking 27 | - repo: https://github.com/pre-commit/mirrors-mypy 28 | rev: "v1.11.2" 29 | hooks: 30 | - id: mypy 31 | additional_dependencies: [types-PyYAML, types-setuptools] 32 | 33 | - repo: https://github.com/rbubley/mirrors-prettier 34 | rev: v3.6.2 35 | hooks: 36 | - id: prettier 37 | 38 | - repo: https://github.com/igorshubovych/markdownlint-cli 39 | rev: v0.41.0 40 | hooks: 41 | - id: markdownlint 42 | 43 | # zizmor detects security issues in GitHub Actions workflows. 44 | - repo: https://github.com/woodruffw/zizmor-pre-commit 45 | rev: v1.11.0 46 | hooks: 47 | - id: zizmor 48 | args: ["--min-severity", "low", "--min-confidence", "low"] 49 | 50 | # add bandit for security checks 51 | - repo: https://github.com/PyCQA/bandit 52 | rev: 1.8.3 53 | hooks: 54 | - id: bandit 55 | args: 56 | [ 57 | "-c", 58 | "pyproject.toml", 59 | "--severity-level", 60 | "all", 61 | "--confidence-level", 62 | "all", 63 | ] 64 | additional_dependencies: ["bandit[toml]"] 65 | -------------------------------------------------------------------------------- /.github/workflows/pre_commit.yml: -------------------------------------------------------------------------------- 1 | name: Pre-Commit Checks 2 | permissions: {} # No permissions by default on workflow level 3 | 4 | on: 5 | push: 6 | branches: 7 | - master 8 | pull_request: 9 | types: 10 | - opened 11 | - reopened 12 | - synchronize 13 | - ready_for_review 14 | workflow_dispatch: # run on request (no need for PR) 15 | 16 | jobs: 17 | Code-Quality-Checks: 18 | runs-on: ubuntu-24.04 19 | steps: 20 | - name: CHECKOUT REPOSITORY 21 | uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1 22 | with: 23 | persist-credentials: false 24 | - name: Set up Python 25 | uses: actions/setup-python@83679a892e2d95755f2dac6acb0bfd1e9ac5d548 # v6.1.0 26 | with: 27 | python-version-file: ".python-version" 28 | - name: Install uv 29 | uses: astral-sh/setup-uv@ed21f2f24f8dd64503750218de024bcf64c7250a # v7.1.5 30 | - name: Install dependencies 31 | run: | 32 | uv sync --locked --all-extras 33 | - name: Run pre-commit checks 34 | run: | 35 | uvx pre-commit run --all-files 36 | Unit-Tests: 37 | runs-on: ubuntu-24.04 38 | steps: 39 | - name: CHECKOUT REPOSITORY 40 | uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1 41 | with: 42 | persist-credentials: false 43 | - name: Set up Python 44 | uses: actions/setup-python@83679a892e2d95755f2dac6acb0bfd1e9ac5d548 # v6.1.0 45 | with: 46 | python-version-file: ".python-version" 47 | - name: Install uv 48 | uses: astral-sh/setup-uv@ed21f2f24f8dd64503750218de024bcf64c7250a # v7.1.5 49 | - name: Install dependencies 50 | run: | 51 | uv sync --locked --extra tests 52 | - name: Run python unit tests 53 | run: | 54 | uv run pytest tests/unit --cov 55 | -------------------------------------------------------------------------------- /docs/source/models/instance_segmentation.md: -------------------------------------------------------------------------------- 1 | # Instance Segmentation 2 | 3 | ## Description 4 | 5 | Instance segmentation model aims to detect and segment objects in an image. It is an extension of object detection, where each object is segmented into a separate mask. The model outputs a list of segmented objects, each containing a mask, bounding box, score and class label. 6 | 7 | ## OpenVINO Model Specifications 8 | 9 | ### Inputs 10 | 11 | A single input image of shape (H, W, 3) where H and W are the height and width of the image, respectively. 12 | 13 | ### Outputs 14 | 15 | Instance segmentation model outputs a `InstanceSegmentationResult` object containing the following attributes: 16 | 17 | - `boxes` (np.ndarray) - Bounding boxes of the detected objects. Each in format of x1, y1, x2 y2. 18 | - `scores` (np.ndarray) - Confidence scores of the detected objects. 19 | - `masks` (np.ndarray) - Segmentation masks of the detected objects. 20 | - `labels` (np.ndarray) - Class labels of the detected objects. 21 | - `label_names` (list[str]) - List of class names of the detected objects. 22 | 23 | ## Example 24 | 25 | ```python 26 | import cv2 27 | from model_api.models import MaskRCNNModel 28 | 29 | # Load the model 30 | model = MaskRCNNModel.create_model("model.xml") 31 | 32 | # Forward pass 33 | predictions = model(image) 34 | 35 | # Iterate over the segmented objects 36 | for box, score, mask, label, label_name in zip( 37 | predictions.boxes, 38 | predictions.scores, 39 | predictions.masks, 40 | predictions.labels, 41 | predictions.label_names, 42 | ): 43 | print(f"Box: {box}, Score: {score}, Label: {label}, Label Name: {label_name}") 44 | cv2.imshow("Mask", mask) 45 | cv2.waitKey(0) 46 | cv2.destroyAllWindows() 47 | ``` 48 | 49 | ```{eval-rst} 50 | .. automodule:: model_api.models.instance_segmentation 51 | :members: 52 | :undoc-members: 53 | :show-inheritance: 54 | ``` 55 | -------------------------------------------------------------------------------- /tests/unit/metrics/test_timestat.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (C) 2025 Intel Corporation 3 | # SPDX-License-Identifier: Apache-2.0 4 | # 5 | 6 | import unittest 7 | 8 | from model_api.metrics import TimeStat 9 | 10 | 11 | class TestTimeStat(unittest.TestCase): 12 | def test_initial_state(self): 13 | stat = TimeStat() 14 | assert stat.time == 0.0 15 | assert stat.durations == [] 16 | assert stat.count == 0 17 | assert stat.mean() == 0.0 18 | assert stat.stddev() == 0.0 19 | 20 | def test_update_increments(self): 21 | stat = TimeStat() 22 | stat.update() 23 | assert len(stat.durations) == 0 24 | stat.update() 25 | assert len(stat.durations) == 1 26 | assert abs(stat.time - stat.durations[0]) < 1e-7 27 | 28 | def test_reset(self): 29 | stat = TimeStat() 30 | stat.update() 31 | stat.reset() 32 | assert stat.time == 0.0 33 | assert stat.durations == [] 34 | assert stat.count == 0 35 | 36 | def test_mean(self): 37 | stat = TimeStat() 38 | for _ in range(3): 39 | stat.update() 40 | expected_mean = stat.time / stat.count 41 | assert abs(stat.mean() - expected_mean) < 1e-7 42 | 43 | def test_stddev(self): 44 | stat = TimeStat() 45 | for _ in range(5): 46 | stat.update() 47 | assert stat.stddev() >= 0.0 48 | 49 | def test_add(self): 50 | stat1 = TimeStat() 51 | stat2 = TimeStat() 52 | for _ in range(2): 53 | stat1.update() 54 | for _ in range(3): 55 | stat2.update() 56 | stat3 = stat1 + stat2 57 | assert stat3.time == stat1.time + stat2.time 58 | assert stat3.count == stat1.count + stat2.count 59 | 60 | def test_add_invalid(self): 61 | stat = TimeStat() 62 | assert stat.__add__(42) == NotImplemented 63 | -------------------------------------------------------------------------------- /src/model_api/models/result/classification.py: -------------------------------------------------------------------------------- 1 | """Classification result type.""" 2 | 3 | # Copyright (C) 2024 Intel Corporation 4 | # SPDX-License-Identifier: Apache-2.0 5 | 6 | from __future__ import annotations 7 | 8 | from typing import TYPE_CHECKING, Generator 9 | 10 | from .base import Result 11 | from .utils import array_shape_to_str 12 | 13 | if TYPE_CHECKING: 14 | import numpy as np 15 | 16 | 17 | class Label: 18 | """Entity representing a predicted label.""" 19 | 20 | def __init__( 21 | self, 22 | id: int | None = None, 23 | name: str | None = None, 24 | confidence: float | None = None, 25 | ) -> None: 26 | self.name = name 27 | self.confidence = confidence 28 | self.id = id 29 | 30 | def __iter__(self) -> Generator: 31 | output = (self.id, self.name, self.confidence) 32 | for i in output: 33 | yield i 34 | 35 | def __str__(self) -> str: 36 | return f"{self.id} ({self.name}): {self.confidence:.3f}" 37 | 38 | 39 | class ClassificationResult(Result): 40 | """Results for classification models.""" 41 | 42 | def __init__( 43 | self, 44 | top_labels: list[Label] | None = None, 45 | saliency_map: np.ndarray | None = None, 46 | feature_vector: np.ndarray | None = None, 47 | raw_scores: np.ndarray | None = None, 48 | ) -> None: 49 | self.top_labels = top_labels 50 | self.saliency_map = saliency_map 51 | self.feature_vector = feature_vector 52 | self.raw_scores = raw_scores 53 | 54 | def __str__(self) -> str: 55 | assert self.top_labels is not None 56 | labels = ", ".join(str(label) for label in self.top_labels) 57 | return ( 58 | f"{labels}, {array_shape_to_str(self.saliency_map)}, {array_shape_to_str(self.feature_vector)}, " 59 | f"{array_shape_to_str(self.raw_scores)}" 60 | ) 61 | -------------------------------------------------------------------------------- /src/model_api/visualizer/scene/segmentation/segmentation.py: -------------------------------------------------------------------------------- 1 | """Segmentation Scene.""" 2 | 3 | # Copyright (C) 2025 Intel Corporation 4 | # SPDX-License-Identifier: Apache-2.0 5 | 6 | from typing import Union 7 | 8 | import cv2 9 | import numpy as np 10 | from PIL import Image 11 | 12 | from model_api.models.result import ImageResultWithSoftPrediction 13 | from model_api.visualizer.layout import HStack, Layout 14 | from model_api.visualizer.primitive import Overlay 15 | from model_api.visualizer.scene import Scene 16 | 17 | 18 | class SegmentationScene(Scene): 19 | """Segmentation Scene.""" 20 | 21 | def __init__(self, image: Image, result: ImageResultWithSoftPrediction, layout: Union[Layout, None] = None) -> None: 22 | super().__init__( 23 | base=image, 24 | overlay=self._get_overlays(result), 25 | layout=layout, 26 | ) 27 | 28 | def _get_overlays(self, result: ImageResultWithSoftPrediction) -> list[Overlay]: 29 | overlays = [] 30 | # Use the hard prediction to get the overlays 31 | hard_prediction = result.resultImage # shape H,W 32 | num_classes = hard_prediction.max() 33 | for i in range(1, num_classes + 1): # ignore background 34 | class_map = (hard_prediction == i).astype(np.uint8) * 255 35 | class_map = cv2.applyColorMap(class_map, cv2.COLORMAP_JET) 36 | class_map = cv2.cvtColor(class_map, cv2.COLOR_BGR2RGB) 37 | overlays.append(Overlay(class_map, label=f"Class {i}")) 38 | 39 | # Add saliency map 40 | if result.saliency_map is not None and result.saliency_map.size > 0: 41 | saliency_map = cv2.cvtColor(result.saliency_map, cv2.COLOR_BGR2RGB) 42 | overlays.append(Overlay(saliency_map, label="Saliency Map")) 43 | 44 | return overlays 45 | 46 | @property 47 | def default_layout(self) -> Layout: 48 | return HStack(Overlay) 49 | -------------------------------------------------------------------------------- /examples/visual_prompting/run.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # 3 | # Copyright (C) 2020-2024 Intel Corporation 4 | # SPDX-License-Identifier: Apache-2.0 5 | # 6 | 7 | import argparse 8 | import colorsys 9 | from itertools import starmap 10 | 11 | import cv2 12 | import numpy as np 13 | 14 | from model_api.models import Model, Prompt, SAMVisualPrompter 15 | 16 | 17 | def get_colors(n: int): 18 | HSV_tuples = [(x / n, 0.5, 0.5) for x in range(n)] 19 | RGB_tuples = starmap(colorsys.hsv_to_rgb, HSV_tuples) 20 | return (np.array(list(RGB_tuples)) * 255).astype(np.uint8) 21 | 22 | 23 | def main(): 24 | parser = argparse.ArgumentParser(description="SAM sample script") 25 | parser.add_argument("image", type=str) 26 | parser.add_argument("encoder_path", type=str) 27 | parser.add_argument("decoder_path", type=str) 28 | parser.add_argument("prompts", nargs="+", type=int) 29 | args = parser.parse_args() 30 | 31 | image = cv2.cvtColor(cv2.imread(args.image), cv2.COLOR_BGR2RGB) 32 | if image is None: 33 | error_message = f"Failed to read the image: {args.image}" 34 | raise RuntimeError(error_message) 35 | 36 | encoder = Model.create_model(args.encoder_path) 37 | decoder = Model.create_model(args.decoder_path) 38 | sam_prompter = SAMVisualPrompter(encoder, decoder) 39 | 40 | all_prompts = [] 41 | for i, p in enumerate(np.array(args.prompts).reshape(-1, 2)): 42 | all_prompts.append(Prompt(p, i)) 43 | 44 | result = sam_prompter(image, points=all_prompts) 45 | 46 | colors = get_colors(len(all_prompts)) 47 | 48 | for i in range(len(result.upscaled_masks)): 49 | print(f"Prompt {i}, mask score {result.best_iou[i]:.3f}") 50 | masked_img = np.where(result.processed_mask[i][..., None], colors[i], image) 51 | image = cv2.addWeighted(image, 0.2, masked_img, 0.8, 0) 52 | 53 | cv2.imwrite("sam_result.jpg", cv2.cvtColor(image, cv2.COLOR_RGB2BGR)) 54 | 55 | 56 | if __name__ == "__main__": 57 | main() 58 | -------------------------------------------------------------------------------- /src/model_api/visualizer/layout/flatten.py: -------------------------------------------------------------------------------- 1 | """Flatten Layout.""" 2 | 3 | # Copyright (C) 2024-2025 Intel Corporation 4 | # SPDX-License-Identifier: Apache-2.0 5 | 6 | from __future__ import annotations 7 | 8 | from typing import TYPE_CHECKING, Type, Union, cast 9 | 10 | from model_api.visualizer.primitive import Label 11 | 12 | from .layout import Layout 13 | 14 | if TYPE_CHECKING: 15 | import PIL 16 | 17 | from model_api.visualizer.primitive import Primitive 18 | from model_api.visualizer.scene import Scene 19 | 20 | 21 | class Flatten(Layout): 22 | """Put all primitives on top of each other. 23 | 24 | Args: 25 | *args (Union[Type[Primitive], Layout]): Primitives or layouts to be applied. 26 | """ 27 | 28 | def __init__(self, *args: Union[Type[Primitive], Layout]) -> None: 29 | self.children = args 30 | 31 | def _compute_on_primitive(self, primitive: Type[Primitive], image: PIL.Image, scene: Scene) -> PIL.Image | None: 32 | if scene.has_primitives(primitive): 33 | primitives = scene.get_primitives(primitive) 34 | if primitive == Label: # Labels need to be rendered next to each other 35 | # cast is needed as mypy does not know that the primitives are of type Label. 36 | primitives_ = cast("list[Label]", primitives) 37 | image = Label.overlay_labels(image, primitives_) 38 | else: 39 | # Other primitives are rendered on top of each other 40 | for _primitive in primitives: 41 | image = _primitive.compute(image) 42 | return image 43 | return None 44 | 45 | def __call__(self, scene: Scene) -> PIL.Image: 46 | image: PIL.Image = scene.base.copy() 47 | for child in self.children: 48 | image_ = child(scene) if isinstance(child, Layout) else self._compute_on_primitive(child, image, scene) 49 | if image_ is not None: 50 | image = image_ 51 | return image 52 | -------------------------------------------------------------------------------- /src/docs/keypoint_detection.md: -------------------------------------------------------------------------------- 1 | # Top-down keypoints detection 2 | 3 | ## Use case and high-level description 4 | 5 | Keypoint detection model aims to detect a set of pre-defined keypoints on a cropped object. 6 | If a crop is not tight enough, quality of keypoints degrades. Having this model and an 7 | object detector, one can organize keypoint detection for all objects of interest presented on an image 8 | (top-down approach). 9 | 10 | ## Models 11 | 12 | Top-down keypoint detection pipeline uses detections that come from any appropriate detector, 13 | and a keypoints regression model acting on crops. 14 | 15 | ### Keypoint model parameters 16 | 17 | The following parameters can be provided via python API or RT Info embedded into OV model: 18 | 19 | - `labels`(`list(str)`) : a list of keypoints names. 20 | 21 | ## OV model specifications 22 | 23 | ### Inputs 24 | 25 | A single NCHW tensor representing a batch of images. 26 | 27 | ### Outputs 28 | 29 | Two vectors in Simple Coordinate Classification Perspective ([SimCC](https://arxiv.org/abs/2107.03332)) format: 30 | 31 | - `pred_x` (B, N, D1) - `x` coordinate representation, where `N` is the number of keypoints. 32 | - `pred_y` (B, N, D2) - `y` coordinate representation, where `N` is the number of keypoints. 33 | 34 | ## How to use 35 | 36 | ```python 37 | import cv2 38 | from model_api.models import TopDownKeypointDetectionPipeline, Detection, KeypointDetectionModel 39 | 40 | model = KeypointDetectionModel.create_model("kp_model.xml") 41 | # a list of detections in (x_min, y_min, x_max, y_max, score, class_id) format 42 | detections = [Detection(0, 0, 100, 100, 1.0, 0)] 43 | top_down_pipeline = TopDownKeypointDetectionPipeline(model) 44 | predictions = top_down_detector.predict(image, detections) 45 | 46 | # iterating over a list of DetectedKeypoints. Each of the items corresponds to a detection 47 | for obj_keypoints in predictions: 48 | for point in obj_keypoints.keypoints.astype(np.int32): 49 | cv2.circle( 50 | image, point, radius=0, color=(0, 255, 0), thickness=5 51 | ) 52 | ``` 53 | -------------------------------------------------------------------------------- /src/model_api/visualizer/scene/anomaly.py: -------------------------------------------------------------------------------- 1 | """Anomaly Scene.""" 2 | 3 | # Copyright (C) 2024 Intel Corporation 4 | # SPDX-License-Identifier: Apache-2.0 5 | 6 | from itertools import starmap 7 | from typing import Union 8 | 9 | import cv2 10 | from PIL import Image 11 | 12 | from model_api.models.result import AnomalyResult 13 | from model_api.visualizer.layout import Flatten, Layout 14 | from model_api.visualizer.primitive import BoundingBox, Label, Overlay, Polygon 15 | 16 | from .scene import Scene 17 | 18 | 19 | class AnomalyScene(Scene): 20 | """Anomaly Scene.""" 21 | 22 | def __init__(self, image: Image, result: AnomalyResult, layout: Union[Layout, None] = None) -> None: 23 | super().__init__( 24 | base=image, 25 | overlay=self._get_overlays(result), 26 | bounding_box=self._get_bounding_boxes(result), 27 | label=self._get_labels(result), 28 | polygon=self._get_polygons(result), 29 | layout=layout, 30 | ) 31 | 32 | def _get_overlays(self, result: AnomalyResult) -> list[Overlay]: 33 | if result.anomaly_map is not None: 34 | anomaly_map = cv2.cvtColor(result.anomaly_map, cv2.COLOR_BGR2RGB) 35 | return [Overlay(anomaly_map)] 36 | return [] 37 | 38 | def _get_bounding_boxes(self, result: AnomalyResult) -> list[BoundingBox]: 39 | if result.pred_boxes is not None: 40 | return list(starmap(BoundingBox, result.pred_boxes)) 41 | return [] 42 | 43 | def _get_labels(self, result: AnomalyResult) -> list[Label]: 44 | labels = [] 45 | if result.pred_label is not None and result.pred_score is not None: 46 | labels.append(Label(label=result.pred_label, score=result.pred_score)) 47 | return labels 48 | 49 | def _get_polygons(self, result: AnomalyResult) -> list[Polygon]: 50 | if result.pred_mask is not None: 51 | return [Polygon(result.pred_mask)] 52 | return [] 53 | 54 | @property 55 | def default_layout(self) -> Layout: 56 | return Flatten(Overlay, BoundingBox, Label, Polygon) 57 | -------------------------------------------------------------------------------- /examples/synchronous_api/run.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # 3 | # Copyright (C) 2020-2024 Intel Corporation 4 | # SPDX-License-Identifier: Apache-2.0 5 | # 6 | 7 | import sys 8 | 9 | import cv2 10 | from PIL import Image 11 | 12 | from model_api.models import ClassificationModel, DetectionModel, SegmentationModel 13 | 14 | 15 | def main(): 16 | if len(sys.argv) != 2: 17 | usage_message = f"Usage: {sys.argv[0]} " 18 | raise RuntimeError(usage_message) 19 | 20 | image = cv2.cvtColor(cv2.imread(sys.argv[1]), cv2.COLOR_BGR2RGB) 21 | if image is None: 22 | error_message = f"Failed to read the image: {sys.argv[1]}" 23 | raise RuntimeError(error_message) 24 | 25 | # Create Image Classification model using mode name and download from Open Model Zoo 26 | efficientnet_b0 = ClassificationModel.create_model("efficientnet-b0-pytorch") 27 | classifications = efficientnet_b0(image) 28 | print(f"Classification results: {classifications}") 29 | 30 | # Create Object Detection model using mode name and download from Open Model Zoo 31 | # Replace numpy preprocessing and embed it directly into a model graph to speed up inference 32 | # download_dir is used to store downloaded model 33 | ssd_mobilenet_fpn = DetectionModel.create_model( 34 | "ssd_mobilenet_v1_fpn_coco", 35 | download_dir="tmp", 36 | ) 37 | detections = ssd_mobilenet_fpn(image) 38 | print(f"Detection results: {detections}") 39 | ssd_mobilenet_fpn.save("ssd_mobilenet_v1_fpn_coco_with_preprocessing.xml") 40 | 41 | # Instantiate from a local model (downloaded previously) 42 | ssd_mobilenet_fpn_local = DetectionModel.create_model( 43 | "tmp/public/ssd_mobilenet_v1_fpn_coco/FP16/ssd_mobilenet_v1_fpn_coco.xml", 44 | ) 45 | detections = ssd_mobilenet_fpn_local(image) 46 | print(f"Detection results for local: {detections}") 47 | 48 | # Create Image Segmentation model 49 | hrnet = SegmentationModel.create_model("hrnet-v2-c1-segmentation") 50 | mask = hrnet(image) 51 | Image.fromarray(mask + 20).save("mask.png") 52 | 53 | 54 | if __name__ == "__main__": 55 | main() 56 | -------------------------------------------------------------------------------- /docs/source/models/keypoint_detection.md: -------------------------------------------------------------------------------- 1 | # Keypoint Detection 2 | 3 | ## Description 4 | 5 | Keypoint detection model aims to detect a set of pre-defined keypoints on a cropped object. 6 | If a crop is not tight enough, quality of keypoints degrades. Having this model and an 7 | object detector, one can organize keypoint detection for all objects of interest presented on an image (top-down approach). 8 | 9 | ## Models 10 | 11 | Top-down keypoint detection pipeline uses detections that come from any appropriate detector, 12 | and a keypoints regression model acting on crops. 13 | 14 | ### Parameters 15 | 16 | The following parameters can be provided via python API or RT Info embedded into OV model: 17 | 18 | - `labels`(`list(str)`) : a list of keypoints names. 19 | 20 | ## OpenVINO Model Specifications 21 | 22 | ### Inputs 23 | 24 | A single `NCHW` tensor representing a batch of images. 25 | 26 | ### Outputs 27 | 28 | Two vectors in Simple Coordinate Classification Perspective ([SimCC](https://arxiv.org/abs/2107.03332)) format: 29 | 30 | - `pred_x` (B, N, D1) - `x` coordinate representation, where `N` is the number of keypoints. 31 | - `pred_y` (B, N, D2) - `y` coordinate representation, where `N` is the number of keypoints. 32 | 33 | ## Example 34 | 35 | ```python 36 | import cv2 37 | from model_api.models import TopDownKeypointDetectionPipeline, Detection, KeypointDetectionModel 38 | 39 | model = KeypointDetectionModel.create_model("kp_model.xml") 40 | # a list of detections in (x_min, y_min, x_max, y_max, score, class_id) format 41 | detections = [Detection(0, 0, 100, 100, 1.0, 0)] 42 | top_down_pipeline = TopDownKeypointDetectionPipeline(model) 43 | predictions = top_down_detector.predict(image, detections) 44 | 45 | # iterating over a list of DetectedKeypoints. Each of the items corresponds to a detection 46 | for obj_keypoints in predictions: 47 | for point in obj_keypoints.keypoints.astype(np.int32): 48 | cv2.circle( 49 | image, point, radius=0, color=(0, 255, 0), thickness=5 50 | ) 51 | ``` 52 | 53 | ```{eval-rst} 54 | .. automodule:: model_api.models.keypoint_detection 55 | :members: 56 | :undoc-members: 57 | :show-inheritance: 58 | ``` 59 | -------------------------------------------------------------------------------- /src/model_api/visualizer/scene/detection.py: -------------------------------------------------------------------------------- 1 | """Detection Scene.""" 2 | 3 | # Copyright (C) 2024 Intel Corporation 4 | # SPDX-License-Identifier: Apache-2.0 5 | 6 | from typing import Union 7 | 8 | import cv2 9 | from PIL import Image 10 | 11 | from model_api.models.result import DetectionResult 12 | from model_api.visualizer.layout import Flatten, HStack, Layout 13 | from model_api.visualizer.primitive import BoundingBox, Label, Overlay 14 | 15 | from .scene import Scene 16 | 17 | 18 | class DetectionScene(Scene): 19 | """Detection Scene.""" 20 | 21 | def __init__(self, image: Image, result: DetectionResult, layout: Union[Layout, None] = None) -> None: 22 | super().__init__( 23 | base=image, 24 | bounding_box=self._get_bounding_boxes(result), 25 | overlay=self._get_overlays(result), 26 | layout=layout, 27 | ) 28 | 29 | def _get_overlays(self, result: DetectionResult) -> list[Overlay]: 30 | overlays = [] 31 | # Add only the overlays that are predicted 32 | label_index_mapping = dict(zip(result.labels, result.label_names)) 33 | for label_index, label_name in label_index_mapping.items(): 34 | # Index 0 as it assumes only one batch 35 | if result.saliency_map is not None and result.saliency_map.size > 0: 36 | saliency_map = cv2.applyColorMap(result.saliency_map[0][label_index], cv2.COLORMAP_JET) 37 | saliency_map = cv2.cvtColor(saliency_map, cv2.COLOR_BGR2RGB) 38 | overlays.append(Overlay(saliency_map, label=label_name.title())) 39 | return overlays 40 | 41 | def _get_bounding_boxes(self, result: DetectionResult) -> list[BoundingBox]: 42 | bounding_boxes = [] 43 | for score, label_name, bbox in zip(result.scores, result.label_names, result.bboxes): 44 | x1, y1, x2, y2 = bbox 45 | label = f"{label_name} ({score:.2f})" 46 | bounding_boxes.append(BoundingBox(x1=x1, y1=y1, x2=x2, y2=y2, label=label)) 47 | return bounding_boxes 48 | 49 | @property 50 | def default_layout(self) -> Layout: 51 | return HStack(Flatten(BoundingBox, Label), Overlay) 52 | -------------------------------------------------------------------------------- /docs/source/models/index.md: -------------------------------------------------------------------------------- 1 | # Models 2 | 3 | ::::{grid} 1 2 2 3 4 | :margin: 1 1 0 0 5 | :gutter: 1 6 | 7 | :::{grid-item-card} Utils 8 | :link: ./utils 9 | :link-type: doc 10 | 11 | [todo] 12 | ::: 13 | 14 | :::{grid-item-card} Detection Model 15 | :link: ./detection_model 16 | :link-type: doc 17 | 18 | [todo] 19 | ::: 20 | :::{grid-item-card} Anomaly 21 | :link: ./anomaly 22 | :link-type: doc 23 | 24 | [todo] 25 | ::: 26 | 27 | :::{grid-item-card} SSD 28 | :link: ./ssd 29 | :link-type: doc 30 | 31 | [todo] 32 | ::: 33 | 34 | :::{grid-item-card} Keypoint Detection 35 | :link: ./keypoint_detection 36 | :link-type: doc 37 | 38 | [todo] 39 | ::: 40 | 41 | :::{grid-item-card} Visual Prompting 42 | :link: ./visual_prompting 43 | :link-type: doc 44 | 45 | [todo] 46 | ::: 47 | 48 | :::{grid-item-card} Classification 49 | :link: ./classification 50 | :link-type: doc 51 | 52 | [todo] 53 | ::: 54 | 55 | :::{grid-item-card} Segmentation 56 | :link: ./segmentation 57 | :link-type: doc 58 | 59 | [todo] 60 | ::: 61 | 62 | :::{grid-item-card} Instance Segmentation 63 | :link: ./instance_segmentation 64 | :link-type: doc 65 | 66 | [todo] 67 | ::: 68 | 69 | :::{grid-item-card} Model 70 | :link: ./model 71 | :link-type: doc 72 | 73 | [todo] 74 | ::: 75 | 76 | :::{grid-item-card} Action Classification 77 | :link: ./action_classification 78 | :link-type: doc 79 | 80 | [todo] 81 | ::: 82 | 83 | :::{grid-item-card} Image Model 84 | :link: ./image_model 85 | :link-type: doc 86 | 87 | [todo] 88 | ::: 89 | :::{grid-item-card} Types 90 | :link: ./types 91 | :link-type: doc 92 | 93 | [todo] 94 | ::: 95 | 96 | :::{grid-item-card} Sam Models 97 | :link: ./sam_models 98 | :link-type: doc 99 | 100 | [todo] 101 | ::: 102 | :::{grid-item-card} Yolo 103 | :link: ./yolo 104 | :link-type: doc 105 | 106 | [todo] 107 | ::: 108 | 109 | :::: 110 | 111 | ```{toctree} 112 | :caption: Models 113 | :hidden: 114 | 115 | ./utils 116 | ./detection_model 117 | ./anomaly 118 | ./ssd 119 | ./keypoint_detection 120 | ./visual_prompting 121 | ./classification 122 | ./segmentation 123 | ./instance_segmentation 124 | ./model 125 | ./action_classification 126 | ./image_model 127 | ./types 128 | ./sam_models 129 | ./yolo 130 | ``` 131 | -------------------------------------------------------------------------------- /src/model_api/visualizer/primitive/bounding_box.py: -------------------------------------------------------------------------------- 1 | """Bounding box primitive.""" 2 | 3 | # Copyright (C) 2025 Intel Corporation 4 | # SPDX-License-Identifier: Apache-2.0 5 | 6 | from __future__ import annotations 7 | 8 | from PIL import Image, ImageDraw 9 | 10 | from .primitive import Primitive 11 | 12 | 13 | class BoundingBox(Primitive): 14 | """Bounding box primitive. 15 | 16 | Args: 17 | x1 (int): x-coordinate of the top-left corner of the bounding box. 18 | y1 (int): y-coordinate of the top-left corner of the bounding box. 19 | x2 (int): x-coordinate of the bottom-right corner of the bounding box. 20 | y2 (int): y-coordinate of the bottom-right corner of the bounding box. 21 | label (str | None): Label of the bounding box. 22 | color (str | tuple[int, int, int]): Color of the bounding box. 23 | 24 | Example: 25 | >>> bounding_box = BoundingBox(x1=10, y1=10, x2=100, y2=100, label="Label Name") 26 | >>> bounding_box.compute(image) 27 | """ 28 | 29 | def __init__( 30 | self, 31 | x1: int, 32 | y1: int, 33 | x2: int, 34 | y2: int, 35 | label: str | None = None, 36 | color: str | tuple[int, int, int] = "blue", 37 | ) -> None: 38 | self.x1 = x1 39 | self.y1 = y1 40 | self.x2 = x2 41 | self.y2 = y2 42 | self.label = label 43 | self.color = color 44 | self.y_buffer = 5 # Text at the bottom of the text box is clipped. This prevents that. 45 | 46 | def compute(self, image: Image) -> Image: 47 | draw = ImageDraw.Draw(image) 48 | # draw rectangle 49 | draw.rectangle((self.x1, self.y1, self.x2, self.y2), outline=self.color, width=2) 50 | # add label 51 | if self.label: 52 | # draw the background of the label 53 | textbox = draw.textbbox((0, 0), self.label) 54 | label_image = Image.new( 55 | "RGB", 56 | (textbox[2] - textbox[0], textbox[3] + self.y_buffer - textbox[1]), 57 | self.color, 58 | ) 59 | draw = ImageDraw.Draw(label_image) 60 | # write the label on the background 61 | draw.text((0, 0), self.label, fill="white") 62 | image.paste(label_image, (self.x1, self.y1)) 63 | return image 64 | -------------------------------------------------------------------------------- /src/model_api/visualizer/primitive/keypoints.py: -------------------------------------------------------------------------------- 1 | """Keypoints primitive.""" 2 | 3 | # Copyright (C) 2025 Intel Corporation 4 | # SPDX-License-Identifier: Apache-2.0 5 | 6 | from typing import Union 7 | 8 | import numpy as np 9 | from PIL import Image, ImageDraw, ImageFont 10 | 11 | from .primitive import Primitive 12 | 13 | 14 | class Keypoint(Primitive): 15 | """Keypoint primitive. 16 | 17 | Args: 18 | keypoints (np.ndarray): Keypoints. Shape: (N, 2) 19 | scores (np.ndarray | None): Scores. Shape: (N,). Defaults to None. 20 | color (str | tuple[int, int, int]): Color of the keypoints. Defaults to "purple". 21 | """ 22 | 23 | def __init__( 24 | self, 25 | keypoints: np.ndarray, 26 | scores: Union[np.ndarray, None] = None, 27 | color: Union[str, tuple[int, int, int]] = "purple", 28 | keypoint_size: int = 3, 29 | ) -> None: 30 | self.keypoints = self._validate_keypoints(keypoints) 31 | self.scores = scores 32 | self.color = color 33 | self.keypoint_size = keypoint_size 34 | 35 | def compute(self, image: Image) -> Image: 36 | """Draw keypoints on the image.""" 37 | draw = ImageDraw.Draw(image) 38 | for keypoint in self.keypoints: 39 | draw.ellipse( 40 | ( 41 | keypoint[0] - self.keypoint_size, 42 | keypoint[1] - self.keypoint_size, 43 | keypoint[0] + self.keypoint_size, 44 | keypoint[1] + self.keypoint_size, 45 | ), 46 | fill=self.color, 47 | ) 48 | 49 | if self.scores is not None: 50 | font = ImageFont.load_default(size=18) 51 | for score, keypoint in zip(self.scores, self.keypoints): 52 | textbox = draw.textbbox((0, 0), f"{score:.2f}", font=font) 53 | draw.text( 54 | (keypoint[0] - textbox[2] // 2, keypoint[1] + self.keypoint_size), 55 | f"{score:.2f}", 56 | font=font, 57 | fill=self.color, 58 | ) 59 | return image 60 | 61 | def _validate_keypoints(self, keypoints: np.ndarray) -> np.ndarray: 62 | if keypoints.shape[1] != 2: 63 | msg = "Keypoints must have shape (N, 2)" 64 | raise ValueError(msg) 65 | return keypoints 66 | -------------------------------------------------------------------------------- /src/model_api/visualizer/primitive/overlay.py: -------------------------------------------------------------------------------- 1 | """Overlay primitive.""" 2 | 3 | # Copyright (C) 2025 Intel Corporation 4 | # SPDX-License-Identifier: Apache-2.0 5 | 6 | from __future__ import annotations 7 | 8 | from typing import Union 9 | 10 | import numpy as np 11 | import PIL 12 | from PIL import ImageFont 13 | 14 | from .primitive import Primitive 15 | 16 | 17 | class Overlay(Primitive): 18 | """Overlay primitive. 19 | 20 | Useful for XAI and Anomaly Maps. 21 | 22 | Args: 23 | image (PIL.Image | np.ndarray): Image to be overlaid. 24 | label (str | None): Optional label name to overlay. 25 | opacity (float): Opacity of the overlay. 26 | """ 27 | 28 | def __init__( 29 | self, 30 | image: PIL.Image | np.ndarray, 31 | opacity: float = 0.4, 32 | label: Union[str, None] = None, 33 | ) -> None: 34 | self.image = self._to_pil(image) 35 | self.label = label 36 | self.opacity = opacity 37 | 38 | def _to_pil(self, image: PIL.Image | np.ndarray) -> PIL.Image: 39 | if isinstance(image, np.ndarray): 40 | return PIL.Image.fromarray(image) 41 | return image 42 | 43 | def compute(self, image: PIL.Image) -> PIL.Image: 44 | image_ = self.image.resize(image.size) 45 | return PIL.Image.blend(image, image_, self.opacity) 46 | 47 | @classmethod 48 | def overlay_labels(cls, image: PIL.Image, labels: Union[list[str], str, None] = None) -> PIL.Image: 49 | """Draw labels at the bottom center of the image. 50 | 51 | This is handy when you want to add a label to the image. 52 | """ 53 | if labels is not None: 54 | labels = [labels] if isinstance(labels, str) else labels 55 | font = ImageFont.load_default(size=18) 56 | buffer_y = 5 57 | dummy_image = PIL.Image.new("RGB", (1, 1)) 58 | draw = PIL.ImageDraw.Draw(dummy_image) 59 | textbox = draw.textbbox((0, 0), ", ".join(labels), font=font) 60 | image_ = PIL.Image.new("RGB", (textbox[2] - textbox[0], textbox[3] + buffer_y - textbox[1]), "white") 61 | draw = PIL.ImageDraw.Draw(image_) 62 | draw.text((0, 0), ", ".join(labels), font=font, fill="black") 63 | image.paste(image_, (image.width // 2 - image_.width // 2, image.height - image_.height - buffer_y)) 64 | return image 65 | -------------------------------------------------------------------------------- /src/model_api/metrics/time_stat.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (C) 2025 Intel Corporation 3 | # SPDX-License-Identifier: Apache-2.0 4 | # 5 | 6 | from time import perf_counter 7 | 8 | 9 | class TimeStat: 10 | """ 11 | A class to represent a statistical time metric. 12 | """ 13 | 14 | def __init__(self): 15 | """ 16 | Initializes the TimeStat object. 17 | """ 18 | self.time = 0.0 19 | self.durations = [] 20 | self.count = 0 21 | self.last_update_time = None 22 | 23 | def __add__(self, other): 24 | """ 25 | Adds two TimeStat objects. 26 | 27 | Returns: 28 | TimeStat: A new TimeStat object representing the sum of the two. 29 | """ 30 | if not isinstance(other, TimeStat): 31 | return NotImplemented 32 | 33 | new_stat = TimeStat() 34 | new_stat.time = self.time + other.time 35 | new_stat.durations = self.durations + other.durations 36 | new_stat.count = self.count + other.count 37 | return new_stat 38 | 39 | def update(self) -> None: 40 | """ 41 | Updates the statistics with the latest duration. 42 | """ 43 | time = perf_counter() 44 | if self.last_update_time: 45 | diff = time - self.last_update_time 46 | self.time += diff 47 | self.durations.append(diff) 48 | self.count += 1 49 | self.last_update_time = None 50 | else: 51 | self.last_update_time = time 52 | 53 | def reset(self) -> None: 54 | """ 55 | Resets the statistics to their initial state. 56 | """ 57 | self.time = 0.0 58 | self.durations = [] 59 | self.count = 0 60 | self.last_update_time = None 61 | 62 | def mean(self) -> float: 63 | """ 64 | Calculates the mean of the recorded durations. 65 | 66 | Returns: 67 | float: The mean of the recorded durations. 68 | """ 69 | return self.time / self.count if self.count != 0 else 0.0 70 | 71 | def stddev(self) -> float: 72 | """ 73 | Calculates the standard deviation of the recorded durations. 74 | 75 | Returns: 76 | float: The standard deviation of the recorded durations. 77 | """ 78 | if self.count == 0: 79 | return 0.0 80 | mean = self.mean() 81 | variance = sum((x - mean) ** 2 for x in self.durations) / self.count 82 | return variance**0.5 83 | -------------------------------------------------------------------------------- /src/model_api/pipelines/async_pipeline.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (C) 2020-2025 Intel Corporation 3 | # SPDX-License-Identifier: Apache-2.0 4 | # 5 | 6 | 7 | class AsyncPipeline: 8 | def __init__(self, model): 9 | self.model = model 10 | self.model.load() 11 | 12 | self.completed_results = {} 13 | self.callback_exceptions = [] 14 | self.model.inference_adapter.set_callback(self.callback) 15 | 16 | def callback(self, request, callback_args): 17 | try: 18 | id, meta, preprocessing_meta = callback_args 19 | self.completed_results[id] = ( 20 | self.model.inference_adapter.copy_raw_result(request), 21 | meta, 22 | preprocessing_meta, 23 | ) 24 | except Exception as e: # noqa: BLE001 TODO: Figure out the exact exception that might be raised 25 | self.callback_exceptions.append(e) 26 | 27 | def submit_data(self, inputs, id, meta={}): 28 | self.model.perf.preprocess_time.update() 29 | inputs, preprocessing_meta = self.model.base_preprocess(inputs) 30 | self.model.perf.preprocess_time.update() 31 | 32 | self.model.perf.inference_time.update() 33 | callback_data = id, meta, preprocessing_meta 34 | self.model.infer_async_raw(inputs, callback_data) 35 | 36 | def get_raw_result(self, id): 37 | if id in self.completed_results: 38 | return self.completed_results.pop(id) 39 | return None 40 | 41 | def get_result(self, id): 42 | result = self.get_raw_result(id) 43 | if result: 44 | raw_result, meta, preprocess_meta = result 45 | self.model.perf.inference_time.update() 46 | 47 | self.model.perf.postprocess_time.update() 48 | result = ( 49 | self.model.postprocess(raw_result, preprocess_meta), 50 | { 51 | **meta, 52 | **preprocess_meta, 53 | }, 54 | ) 55 | self.model.perf.postprocess_time.update() 56 | return result 57 | return None 58 | 59 | def is_ready(self): 60 | return self.model.is_ready() 61 | 62 | def await_all(self): 63 | if self.callback_exceptions: 64 | raise self.callback_exceptions[0] 65 | self.model.await_all() 66 | 67 | def await_any(self): 68 | if self.callback_exceptions: 69 | raise self.callback_exceptions[0] 70 | self.model.await_any() 71 | -------------------------------------------------------------------------------- /docs/source/conf.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2024 Intel Corporation 2 | # SPDX-License-Identifier: Apache-2.0 3 | 4 | # Configuration file for the Sphinx documentation builder. 5 | # 6 | # For the full list of built-in configuration values, see the documentation: 7 | # https://www.sphinx-doc.org/en/master/usage/configuration.html 8 | 9 | # -- Project information ----------------------------------------------------- 10 | # https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information 11 | 12 | import sys 13 | from pathlib import Path 14 | 15 | # Define the path to your module using Path 16 | module_path = Path(__file__).parent.parent / "src" 17 | 18 | # Insert the path to sys.path 19 | sys.path.insert(0, str(module_path.resolve())) 20 | 21 | project = "ModelAPI" 22 | copyright = "2025, Intel" 23 | author = "Intel" 24 | release = "2025" 25 | 26 | # -- General configuration --------------------------------------------------- 27 | # https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration 28 | 29 | extensions = [ 30 | "breathe", 31 | "sphinx.ext.autodoc", 32 | "sphinx.ext.mathjax", 33 | "sphinx_design", 34 | "myst_parser", 35 | "nbsphinx", 36 | "sphinx.ext.napoleon", 37 | "sphinx_autodoc_typehints", 38 | "sphinx_copybutton", 39 | "sphinx.ext.graphviz", 40 | ] 41 | 42 | myst_enable_extensions = [ 43 | "colon_fence", 44 | # other MyST extensions... 45 | ] 46 | 47 | templates_path = ["_templates"] 48 | exclude_patterns: list[str] = [] 49 | 50 | # Automatic exclusion of prompts from the copies 51 | # https://sphinx-copybutton.readthedocs.io/en/latest/use.html#automatic-exclusion-of-prompts-from-the-copies 52 | copybutton_exclude = ".linenos, .gp, .go" 53 | 54 | # -- Options for HTML output ------------------------------------------------- 55 | # https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output 56 | 57 | html_theme = "pydata_sphinx_theme" 58 | html_static_path = ["_static"] 59 | 60 | breathe_projects = {"InferenceSDK": Path(__file__).parent.parent / "build_cpp" / "xml"} 61 | breathe_default_project = "InferenceSDK" 62 | breathe_default_members = ("members", "undoc-members", "private-members") 63 | 64 | autodoc_docstring_signature = True 65 | autodoc_member_order = "bysource" 66 | intersphinx_mapping = { 67 | "python": ("https://docs.python.org/3", None), 68 | "numpy": ("https://numpy.org/doc/stable/", None), 69 | } 70 | autodoc_member_order = "groupwise" 71 | autodoc_default_options = { 72 | "members": True, 73 | "methods": True, 74 | "special-members": "__call__", 75 | "exclude-members": "_abc_impl", 76 | "show-inheritance": True, 77 | } 78 | 79 | autoclass_content = "both" 80 | 81 | autosummary_generate = True # Turn on sphinx.ext.autosummary 82 | -------------------------------------------------------------------------------- /src/model_api/visualizer/layout/hstack.py: -------------------------------------------------------------------------------- 1 | """Horizontal Stack Layout.""" 2 | 3 | # Copyright (C) 2024-2025 Intel Corporation 4 | # SPDX-License-Identifier: Apache-2.0 5 | 6 | from __future__ import annotations 7 | 8 | from typing import TYPE_CHECKING, Type, Union 9 | 10 | import PIL 11 | 12 | from model_api.visualizer.primitive import Overlay 13 | 14 | from .layout import Layout 15 | 16 | if TYPE_CHECKING: 17 | from model_api.visualizer.primitive import Primitive 18 | from model_api.visualizer.scene import Scene 19 | 20 | 21 | class HStack(Layout): 22 | """Horizontal Stack Layout. 23 | 24 | Args: 25 | *args (Union[Type[Primitive], Layout]): Primitives or layouts to be applied. 26 | """ 27 | 28 | def __init__(self, *args: Union[Type[Primitive], Layout]) -> None: 29 | self.children = args 30 | 31 | def _compute_on_primitive(self, primitive: Type[Primitive], image: PIL.Image, scene: Scene) -> PIL.Image | None: 32 | if scene.has_primitives(primitive): 33 | images = [] 34 | for _primitive in scene.get_primitives(primitive): 35 | image_ = _primitive.compute(image.copy()) 36 | if isinstance(_primitive, Overlay): 37 | image_ = Overlay.overlay_labels(image=image_, labels=_primitive.label) 38 | images.append(image_) 39 | return self._stitch(*images) 40 | return None 41 | 42 | @staticmethod 43 | def _stitch(*images: PIL.Image) -> PIL.Image: 44 | """Stitch images horizontally. 45 | 46 | Args: 47 | images (PIL.Image): Images to be stitched. 48 | 49 | Returns: 50 | PIL.Image: Stitched image. 51 | """ 52 | new_image = PIL.Image.new( 53 | "RGB", 54 | ( 55 | sum(image.width for image in images), 56 | max(image.height for image in images), 57 | ), 58 | ) 59 | x_offset = 0 60 | for image in images: 61 | new_image.paste(image, (x_offset, 0)) 62 | x_offset += image.width 63 | return new_image 64 | 65 | def __call__(self, scene: Scene) -> PIL.Image: 66 | """Stitch images horizontally. 67 | 68 | Args: 69 | scene (Scene): Scene to be stitched. 70 | 71 | Returns: 72 | PIL.Image: Stitched image. 73 | """ 74 | images: list[PIL.Image] = [] 75 | for child in self.children: 76 | if isinstance(child, Layout): 77 | image_ = child(scene) 78 | else: 79 | image_ = self._compute_on_primitive(child, scene.base.copy(), scene) 80 | if image_ is not None: 81 | images.append(image_) 82 | return self._stitch(*images) 83 | -------------------------------------------------------------------------------- /.github/renovate.json5: -------------------------------------------------------------------------------- 1 | // Dependency Update Configuration 2 | // 3 | // See https://docs.renovatebot.com/configuration-options/ 4 | // See https://json5.org/ for JSON5 syntax 5 | 6 | // [!] While updating the Renovate config, test changes on your own fork. 7 | // 1. Modify the Renovate configuration, which is located in .github/renovate.json5 and push your changes to the default branch of your fork. 8 | // 2. Enable the Renovate GitHub app in your GitHub account. 9 | // Verify that Renovate is activated in the repository settings within the Renovate Dashboard. 10 | // To enable the dashboard set `dependencyDashboard` to true 11 | // 3. Trigger the Renovate app from the dashboard, or push a new commit to your fork’s default branch to re-trigger Renovate. 12 | // 4. Use the dashboard to initiate Renovate and create a PR on your fork, then check that the proposed PRs are modifying the correct parts. 13 | // 5. Once you’ve validated that the Renovate configuration works on your fork, submit a PR, 14 | // and include links in the description to share details about the testing you've conducted. 15 | 16 | { 17 | $schema: "https://docs.renovatebot.com/renovate-schema.json", 18 | 19 | // regenerate lock weekly https://docs.renovatebot.com/configuration-options/#lockfilemaintenance 20 | lockFileMaintenance: { 21 | enabled: true, 22 | schedule: ["* * * * 0"], // weekly 23 | }, 24 | 25 | extends: ["config:base", ":gitSignOff", "helpers:pinGitHubActionDigests"], 26 | // https://docs.renovatebot.com/presets-default/#gitsignoff 27 | // https://docs.renovatebot.com/presets-helpers/#helperspingithubactiondigests 28 | 29 | // if necessary, add supported releases branches here 30 | // it is possible to enable/disable specific upgrades per branch with 31 | // `matchBaseBranches` in specific rule 32 | baseBranches: ["master"], 33 | 34 | enabledManagers: ["github-actions", "pep621"], 35 | 36 | // Set limit to 10 37 | ignorePresets: [":prHourlyLimit2"], 38 | prHourlyLimit: 10, 39 | 40 | packageRules: [ 41 | { 42 | enabled: true, 43 | matchManagers: ["pep621"], 44 | schedule: ["* * * * 0"], // weekly 45 | }, 46 | 47 | // Disable ultralytics notifications (based on previous Dependabot config) 48 | { 49 | enabled: false, 50 | matchDatasources: ["pypi"], 51 | matchDepNames: ["ultralytics"], 52 | }, 53 | 54 | // Group GitHub Actions updates 55 | { 56 | enabled: true, 57 | separateMajorMinor: false, 58 | groupName: "GitHub Actions", 59 | matchManagers: ["github-actions"], 60 | matchPackagePatterns: ["*"], 61 | schedule: ["* * 1 * *"], // every month 62 | }, 63 | ], 64 | 65 | // Enable security upgrades 66 | vulnerabilityAlerts: { 67 | enabled: true, 68 | }, 69 | osvVulnerabilityAlerts: true, 70 | dependencyDashboard: true, 71 | } 72 | -------------------------------------------------------------------------------- /src/docs/visual_prompting.md: -------------------------------------------------------------------------------- 1 | # Visual Prompting with Zero-shot Learning 2 | 3 | ## Use case and high-level description 4 | 5 | Visual Prompting and Zero-shot visual prompting allow to segment object on images 6 | using only weak supervision such as point prompts. 7 | Standard Visual Prompting task implies generating masks by given prompts within the same image. 8 | Zero-shot visual prompting allows to capture prompt-supervised features on one image, 9 | and then segment other images using these features without any additional prompts. 10 | 11 | ## Models 12 | 13 | VPT pipeline uses two models: encoder and decoder. 14 | Encoder consumes an image and produces features, while decoder consumes a specially 15 | prepared inputs that includes prompts and outputs segmentation and some auxiliary results. 16 | 17 | ### Encoder parameters 18 | 19 | The following parameters can be provided via python API or RT Info embedded into OV model: 20 | 21 | - `image_size`(`int`) : encoder native input resolution. The input is supposed to have 1:1 aspect ratio 22 | 23 | ### Decoder parameters 24 | 25 | The following parameters can be provided via python API or RT Info embedded into OV model: 26 | 27 | - `image_size`(`int`) : encoder native input resolution. The input is supposed to have 1:1 aspect ratio 28 | - `mask_threshold`(`float`): threshold for generating hard predictions from output soft masks 29 | - `embed_dim`(`int`) : size of the output embedding. This parameter is provided for convenience and should match 30 | the real output size. 31 | 32 | ## OV model specifications 33 | 34 | ### Encoder inputs 35 | 36 | A single NCHW tensor representing a batch of images. 37 | 38 | ### Encoder outputs 39 | 40 | A single NDHW, where D is the embedding dimension. HW is the output feature spatial resolution, which can differ from the input spatial resolution. 41 | 42 | ### Decoder inputs 43 | 44 | Decoder OV model should have the following named inputs: 45 | 46 | - `image_embeddings` (B, D, H, W) - embeddings obtained with encoder 47 | - `point_coords` (B, N, 2) - 2D input prompts in XY format 48 | - `point_labels` (B, N) - integer labels of input point prompts 49 | - `mask_input` (B, 1, H, W) - mask for input embeddings 50 | - `has_mask_input` (B, 1) - 0/1 flag enabling or disabling applying the `mask_input` 51 | - `ori_shape` (B, 2) - resolution of the original image used as an input to the encoder wrapper. 52 | 53 | ### Decoder outputs 54 | 55 | - `upscaled_masks` (B, N, H, W) - masks upscaled to `ori_shape` 56 | - `iou_predictions` (B, N) - IoU predictions for the output masks 57 | - `low_res_masks` (B, N, H, W) - masks in feature resolution 58 | 59 | ## How to use 60 | 61 | See demos: [VPT](https://github.com/open-edge-platform/model_api/tree/master/examples/visual_prompting) 62 | and [ZSL-VPT](https://github.com/open-edge-platform/model_api/tree/master/examples/zsl_visual_prompting) 63 | -------------------------------------------------------------------------------- /src/model_api/adapters/onnx_adapter.md: -------------------------------------------------------------------------------- 1 | # ONNX Runtime Adapter 2 | 3 | The `ONNXRuntimeAdapter` implements `InferenceAdapter` interface. The `ONNXRuntimeAdapter` allows Model API to leverage ONNX Runtime for inference. 4 | 5 | ## Prerequisites 6 | 7 | `ONNXRuntimeAdapter` enables inference via ONNX Runtime, and we need to install it first: 8 | 9 | ```sh 10 | pip install onnx onnxruntime 11 | ``` 12 | 13 | ### ONNX metadata 14 | 15 | ModelAPI uses IR RTInfo to store metadata (wrapper-specific parameters, preprocessing parameters, labels list, etc.). 16 | For details see the implementation of `Model._load_config()` method. 17 | To embed that metadata into ONNX file, one can use ONNX properties: `metadata_props.add()` and use 18 | ModelAPI-specific parameters as metadata keys with exactly the same names as in RTInfo, but split by spaces: 19 | `"model_info model_type"` and so on. 20 | 21 | ## Limitations 22 | 23 | - `ONNXRuntimeAdapter` is available in Python version of ModelAPI only. 24 | - Although `ONNXRuntimeAdapter` doesn't use OpenVINO directly, OV should be installed, because Model API depends on it at 25 | the low level. 26 | - Model reshape is not supported, and input shape should be defined in the model (excluding batch dimension) to perform 27 | shape inference and parse model outputs successfully. 28 | - `model.load()` method does nothing, model is loaded in the constructor of `ONNXRuntimeAdapter`. 29 | - `ONNXRuntimeAdapter` supports only python-based preprocessing, and sometimes it gives slightly different results, than 30 | OpenVINO operations graph-based preprocessing. Therefore, inference results can also be different than when using `OpenvinoAdapter`. 31 | - Models scope is limited to `SSD`, `MaskRCNNModel`, `SegmentationModel`, and `ClassificationModel` wrappers. 32 | 33 | ## Running a model with ONNXRuntimeAdapter 34 | 35 | The process of construction of a model with `ONNXRuntimeAdapter` is similar to one with `OpenvinoAdapter`, but 36 | ONNX Runtime session parameters are forwarded to ORT instead of OpenVINO-specific parameters: 37 | 38 | ```python 39 | import cv2 40 | # import model wrapper class 41 | from model_api.models import SSD 42 | # import inference adapter 43 | from model_api.adapters import ONNXRuntimeAdapter 44 | 45 | # read input image using opencv 46 | input_data = cv2.imread("sample.png") 47 | 48 | # define the path to mobilenet-atss model in IR format 49 | model_path = "data/otx_models/det_mobilenetv2_atss_bccd.onnx" 50 | 51 | # create adapter for ONNX runtime, pass the model path 52 | inference_adapter = ONNXRuntimeAdapter(model_path, ort_options={"providers" : ['CPUExecutionProvider']}) 53 | 54 | # create model API wrapper for SSD architecture 55 | # preload=True is required for consistency 56 | ssd_model = SSD(inference_adapter, preload=True) 57 | 58 | # apply input preprocessing, sync inference, model output postprocessing 59 | results = ssd_model(input_data) 60 | ``` 61 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Generated by examples/synchronous_api/run.py 2 | mask.png 3 | ssd_mobilenet_v1_fpn_coco_with_preprocessing.xml 4 | ssd_mobilenet_v1_fpn_coco_with_preprocessing.bin 5 | tmp/ 6 | # Generated by tests/accuracy/test_accuracy.py 7 | test_scope.json 8 | 9 | # Byte-compiled / optimized / DLL files 10 | __pycache__/ 11 | *.py[cod] 12 | *$py.class 13 | 14 | # C extensions 15 | *.so 16 | 17 | # Distribution / packaging 18 | .Python 19 | build/ 20 | develop-eggs/ 21 | dist/ 22 | downloads/ 23 | eggs/ 24 | .eggs/ 25 | lib/ 26 | lib64/ 27 | parts/ 28 | sdist/ 29 | var/ 30 | wheels/ 31 | pip-wheel-metadata/ 32 | share/python-wheels/ 33 | *.egg-info/ 34 | .installed.cfg 35 | *.egg 36 | MANIFEST 37 | 38 | # PyInstaller 39 | # Usually these files are written by a python script from a template 40 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 41 | *.manifest 42 | *.spec 43 | 44 | # Installer logs 45 | pip-log.txt 46 | pip-delete-this-directory.txt 47 | 48 | # Unit test / coverage reports 49 | htmlcov/ 50 | .tox/ 51 | .nox/ 52 | .coverage 53 | .coverage.* 54 | .cache 55 | nosetests.xml 56 | coverage.xml 57 | *.cover 58 | *.py,cover 59 | .hypothesis/ 60 | .pytest_cache/ 61 | 62 | # Translations 63 | *.mo 64 | *.pot 65 | 66 | # Django stuff: 67 | *.log 68 | local_settings.py 69 | db.sqlite3 70 | db.sqlite3-journal 71 | 72 | # Flask stuff: 73 | instance/ 74 | .webassets-cache 75 | 76 | # Scrapy stuff: 77 | .scrapy 78 | 79 | # Sphinx documentation 80 | docs/_build/ 81 | 82 | # PyBuilder 83 | target/ 84 | 85 | # Jupyter Notebook 86 | .ipynb_checkpoints 87 | 88 | # IPython 89 | profile_default/ 90 | ipython_config.py 91 | 92 | # pyenv 93 | .python-version 94 | 95 | # pipenv 96 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 97 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 98 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 99 | # install all needed dependencies. 100 | #Pipfile.lock 101 | 102 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 103 | __pypackages__/ 104 | 105 | # Celery stuff 106 | celerybeat-schedule 107 | celerybeat.pid 108 | 109 | # SageMath parsed files 110 | *.sage.py 111 | 112 | # Environments 113 | .env 114 | .venv 115 | env/ 116 | venv/ 117 | ENV/ 118 | env.bak/ 119 | venv.bak/ 120 | 121 | # Spyder project settings 122 | .spyderproject 123 | .spyproject 124 | 125 | # Rope project settings 126 | .ropeproject 127 | 128 | # mkdocs documentation 129 | /site 130 | 131 | # mypy 132 | .mypy_cache/ 133 | .dmypy.json 134 | dmypy.json 135 | 136 | # Pyre type checker 137 | .pyre/ 138 | 139 | # Sphinx documentation 140 | docs/build/ 141 | docs/build_cpp/ 142 | docs/source/_build/ 143 | 144 | # vs-code 145 | .vscode/ 146 | 147 | data/ 148 | -------------------------------------------------------------------------------- /examples/zsl_visual_prompting/run.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # 3 | # Copyright (C) 2020-2024 Intel Corporation 4 | # SPDX-License-Identifier: Apache-2.0 5 | # 6 | 7 | import argparse 8 | import colorsys 9 | from itertools import starmap 10 | 11 | import cv2 12 | import numpy as np 13 | 14 | from model_api.models import Model, Prompt, SAMLearnableVisualPrompter 15 | 16 | 17 | def get_colors(n: int): 18 | HSV_tuples = [(x / n, 0.5, 0.5) for x in range(n)] 19 | RGB_tuples = starmap(colorsys.hsv_to_rgb, HSV_tuples) 20 | return (np.array(list(RGB_tuples)) * 255).astype(np.uint8) 21 | 22 | 23 | def main(): 24 | parser = argparse.ArgumentParser(description="SAM sample script") 25 | parser.add_argument("image_source", type=str) 26 | parser.add_argument("image_target", type=str) 27 | parser.add_argument("encoder_path", type=str) 28 | parser.add_argument("decoder_path", type=str) 29 | parser.add_argument("prompts", nargs="+", type=int) 30 | parser.add_argument("-t", "--threshold", type=float, default=0.65) 31 | args = parser.parse_args() 32 | 33 | image = cv2.cvtColor(cv2.imread(args.image_source), cv2.COLOR_BGR2RGB) 34 | if image is None: 35 | error_message = f"Failed to read the source image: {args.image_source}" 36 | raise RuntimeError(error_message) 37 | 38 | image_target = cv2.cvtColor(cv2.imread(args.image_target), cv2.COLOR_BGR2RGB) 39 | if image_target is None: 40 | error_message = f"Failed to read the target image: {args.image_target}" 41 | raise RuntimeError(error_message) 42 | 43 | encoder = Model.create_model(args.encoder_path) 44 | decoder = Model.create_model(args.decoder_path) 45 | zsl_sam_prompter = SAMLearnableVisualPrompter( 46 | encoder, 47 | decoder, 48 | threshold=args.threshold, 49 | ) 50 | 51 | all_prompts = [] 52 | for i, p in enumerate(np.array(args.prompts).reshape(-1, 2)): 53 | all_prompts.append(Prompt(p, i)) 54 | 55 | zsl_sam_prompter.learn(image, points=all_prompts) 56 | 57 | colors = get_colors(len(all_prompts)) 58 | 59 | result = zsl_sam_prompter(image_target) 60 | 61 | for i in result.data: 62 | masks = result.get_mask(i) 63 | for j, instance in enumerate(masks.mask): 64 | prompt_point = masks.points[j].astype(np.int32) 65 | confidence = float(masks.scores[j]) 66 | masked_img = np.where(instance[..., None], colors[i], image_target) 67 | image_target = cv2.addWeighted(image_target, 0.2, masked_img, 0.8, 0) 68 | print(f"Reference point: {prompt_point}, point score: {confidence:.3f}") 69 | cv2.circle( 70 | image_target, 71 | prompt_point, 72 | radius=0, 73 | color=(0, 0, 255), 74 | thickness=5, 75 | ) 76 | 77 | cv2.imwrite("zsl_sam_result.jpg", cv2.cvtColor(image_target, cv2.COLOR_RGB2BGR)) 78 | 79 | 80 | if __name__ == "__main__": 81 | main() 82 | -------------------------------------------------------------------------------- /tests/unit/visualizer/test_primitive.py: -------------------------------------------------------------------------------- 1 | """Tests for primitives.""" 2 | 3 | # Copyright (C) 2024 Intel Corporation 4 | # SPDX-License-Identifier: Apache-2.0 5 | 6 | import numpy as np 7 | import PIL 8 | import pytest 9 | from PIL import ImageDraw 10 | 11 | from model_api.visualizer import BoundingBox, Keypoint, Label, Overlay, Polygon 12 | 13 | 14 | def test_overlay(mock_image: PIL.Image): 15 | """Test if the overlay is created correctly.""" 16 | empty_image = PIL.Image.new("RGB", (100, 100)) 17 | expected_image = PIL.Image.blend(empty_image, mock_image, 0.4) 18 | # Test from image 19 | overlay = Overlay(mock_image) 20 | assert overlay.compute(empty_image) == expected_image 21 | 22 | # Test from numpy array 23 | data = np.zeros((100, 100, 3), dtype=np.uint8) 24 | data *= 255 25 | overlay = Overlay(data) 26 | assert overlay.compute(empty_image) == expected_image 27 | 28 | 29 | def test_bounding_box(mock_image: PIL.Image): 30 | """Test if the bounding box is created correctly.""" 31 | expected_image = mock_image.copy() 32 | draw = ImageDraw.Draw(expected_image) 33 | draw.rectangle((10, 10, 100, 100), outline="blue", width=2) 34 | bounding_box = BoundingBox(x1=10, y1=10, x2=100, y2=100) 35 | assert bounding_box.compute(mock_image) == expected_image 36 | 37 | 38 | def test_polygon(mock_image: PIL.Image): 39 | """Test if the polygon is created correctly.""" 40 | # Test from points 41 | expected_image = mock_image.copy() 42 | draw = ImageDraw.Draw(expected_image) 43 | draw.polygon([(10, 10), (100, 10), (100, 100), (10, 100)], fill="red", width=1) 44 | polygon = Polygon( 45 | points=[(10, 10), (100, 10), (100, 100), (10, 100)], 46 | color="red", 47 | opacity=1, 48 | outline_width=1, 49 | ) 50 | assert polygon.compute(mock_image) == expected_image 51 | 52 | # Test from mask 53 | mask = np.zeros((100, 100), dtype=np.uint8) 54 | mask[10:100, 10:100] = 255 55 | expected_image = mock_image.copy() 56 | draw = ImageDraw.Draw(expected_image) 57 | draw.polygon([(10, 10), (100, 10), (100, 100), (10, 100)], fill="red", width=1) 58 | polygon = Polygon(mask=mask, color="red", opacity=1, outline_width=1) 59 | assert polygon.compute(mock_image) == expected_image 60 | 61 | with pytest.raises(ValueError, match="No contours found in the mask."): 62 | Polygon(mask=np.zeros((100, 100), dtype=np.uint8)).compute(mock_image) 63 | 64 | 65 | def test_label(mock_image: PIL.Image): 66 | label = Label(label="Label") 67 | # When using a single label, compute and overlay_labels should return the same image 68 | assert label.compute(mock_image) == Label.overlay_labels(mock_image, [label]) 69 | 70 | 71 | def test_keypoint(mock_image: PIL.Image): 72 | keypoint = Keypoint(keypoints=np.array([[100, 100]]), color="red", keypoint_size=3) 73 | draw = ImageDraw.Draw(mock_image) 74 | draw.ellipse((97, 97, 103, 103), fill="red") 75 | assert keypoint.compute(mock_image) == mock_image 76 | -------------------------------------------------------------------------------- /.github/workflows/renovate.yml: -------------------------------------------------------------------------------- 1 | # Dependencies Management Workflow 2 | # 3 | # This workflow automates the dependence management based on self-hosed Renovate 4 | # ensure the project's dependencies remains up-to-date and security fixes are delivered regularly. 5 | # 6 | # Key Features: 7 | # - Automated PR creation into pyproject.toml and uv.lock regeneration 8 | # - Dry-run for debug purposes 9 | # - Dependency dashboard (is available in GitHub issues) maintenance 10 | # 11 | # Process Stages: 12 | # 13 | # 1. Dependencies Management: 14 | # - Runs on a daily schedule. 15 | # - Identifies dependencies that may be updated based on .github/renovate.json5 configuration. 16 | # - Opens corresponding PRs with respect to schedule defined in Renovate config file. 17 | # - Updates Renovate Dependency dashboard that is available in GitHub issues. 18 | # 19 | # Required Secrets: 20 | # - RENOVATE_APP_ID: application ID 21 | # - RENOVATE_APP_PEM: application private key 22 | # 23 | # Example Usage: 24 | # 1. Scheduled Run: 25 | # Automatically runs, daily 26 | # 27 | # 2. Manual Trigger: 28 | # workflow_dispatch: 29 | # inputs: 30 | # dry-run: 31 | # description: "Run Renovate in dry-run mode (no PR)" 32 | # required: false 33 | # default: false 34 | # type: boolean 35 | # 36 | # Note: Renovate maintains and updates Dependency dashboard that is available in GitHub issues. 37 | 38 | name: Renovate 39 | on: 40 | schedule: 41 | # daily 42 | - cron: "0 2 * * *" 43 | 44 | # allow to manually trigger this workflow 45 | workflow_dispatch: 46 | inputs: 47 | dry-run: 48 | description: "Run Renovate in dry-run mode (no PR)" 49 | required: false 50 | default: false 51 | type: boolean 52 | 53 | permissions: {} 54 | 55 | jobs: 56 | renovate: 57 | permissions: 58 | contents: read 59 | runs-on: ubuntu-latest 60 | 61 | steps: 62 | - name: Checkout 63 | uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1 64 | with: 65 | persist-credentials: false 66 | 67 | - name: Get token 68 | id: get-github-app-token 69 | uses: actions/create-github-app-token@29824e69f54612133e76f7eaac726eef6c875baf # v2.2.1 70 | with: 71 | app-id: ${{ secrets.RENOVATE_APP_ID }} 72 | private-key: ${{ secrets.RENOVATE_APP_PEM }} 73 | 74 | - name: Self-hosted Renovate 75 | uses: renovatebot/github-action@5712c6a41dea6cdf32c72d92a763bd417e6606aa # v44.0.5 76 | with: 77 | configurationFile: .github/renovate.json5 78 | token: "${{ steps.get-github-app-token.outputs.token }}" 79 | env: 80 | LOG_LEVEL: ${{ github.event_name == 'workflow_dispatch' && 'debug' || 'info' }} 81 | # Dry run if the event is workflow_dispatch AND the dry-run input is true 82 | RENOVATE_DRY_RUN: ${{ (github.event_name == 'workflow_dispatch' && github.event.inputs.dry-run == 'true') && 'full' || null }} 83 | RENOVATE_PLATFORM: github 84 | RENOVATE_REPOSITORIES: ${{ github.repository }} 85 | -------------------------------------------------------------------------------- /src/model_api/models/__init__.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (C) 2020-2024 Intel Corporation 3 | # SPDX-License-Identifier: Apache-2.0 4 | # 5 | 6 | from .action_classification import ActionClassificationModel 7 | from .anomaly import AnomalyDetection 8 | from .classification import ClassificationModel 9 | from .detection_model import DetectionModel 10 | from .image_model import ImageModel 11 | from .instance_segmentation import MaskRCNNModel 12 | from .keypoint_detection import KeypointDetectionModel, TopDownKeypointDetectionPipeline 13 | from .model import Model 14 | from .result import ( 15 | AnomalyResult, 16 | ClassificationResult, 17 | Contour, 18 | DetectedKeypoints, 19 | DetectionResult, 20 | ImageResultWithSoftPrediction, 21 | InstanceSegmentationResult, 22 | PredictedMask, 23 | RotatedSegmentationResult, 24 | VisualPromptingResult, 25 | ZSLVisualPromptingResult, 26 | ) 27 | from .sam_models import SAMDecoder, SAMImageEncoder 28 | from .segmentation import SegmentationModel 29 | from .ssd import SSD 30 | from .utils import ( 31 | OutputTransform, 32 | ResizeMetadata, 33 | add_rotated_rects, 34 | get_contours, 35 | ) 36 | from .visual_prompting import Prompt, SAMLearnableVisualPrompter, SAMVisualPrompter 37 | from .yolo import YOLO, YOLOF, YOLOX, YoloV3ONNX, YoloV4, YOLOv5, YOLOv8 38 | 39 | classification_models = [ 40 | "resnet-18-pytorch", 41 | "se-resnext-50", 42 | "mobilenet-v3-large-1.0-224-tf", 43 | "efficientnet-b0-pytorch", 44 | ] 45 | 46 | detection_models = [ 47 | # "face-detection-retail-0044", # resize_type is wrong or missed in model.yml 48 | "yolo-v4-tf", 49 | "ssd_mobilenet_v1_fpn_coco", 50 | "ssdlite_mobilenet_v2", 51 | ] 52 | 53 | segmentation_models = [ 54 | "fastseg-small", 55 | ] 56 | 57 | 58 | __all__ = [ 59 | "ActionClassificationModel", 60 | "add_rotated_rects", 61 | "AnomalyDetection", 62 | "AnomalyResult", 63 | "classification_models", 64 | "ClassificationModel", 65 | "ClassificationResult", 66 | "Contour", 67 | "detection_models", 68 | "DetectedKeypoints", 69 | "DetectionModel", 70 | "DetectionResult", 71 | "get_contours", 72 | "ImageModel", 73 | "ImageResultWithSoftPrediction", 74 | "InstanceSegmentationResult", 75 | "KeypointDetectionModel", 76 | "Label", 77 | "MaskRCNNModel", 78 | "Model", 79 | "OutputTransform", 80 | "PredictedMask", 81 | "Prompt", 82 | "ResizeMetadata", 83 | "RotatedSegmentationResult", 84 | "SAMDecoder", 85 | "SAMImageEncoder", 86 | "SAMLearnableVisualPrompter", 87 | "SAMVisualPrompter", 88 | "SalientObjectDetectionModel", 89 | "segmentation_models", 90 | "SegmentationModel", 91 | "SSD", 92 | "TopDownKeypointDetectionPipeline", 93 | "VisualPromptingResult", 94 | "YOLO", 95 | "YOLOF", 96 | "YOLOv3ONNX", 97 | "YOLOv4", 98 | "YOLOv5", 99 | "YOLOv8", 100 | "YOLOX", 101 | "ZSLVisualPromptingResult", 102 | "YoloV3ONNX", 103 | "YoloV4", 104 | ] 105 | -------------------------------------------------------------------------------- /.github/workflows/publish.yaml: -------------------------------------------------------------------------------- 1 | name: Build and upload to PyPI 2 | 3 | on: 4 | workflow_dispatch: # run on request (no need for PR) 5 | release: 6 | types: [published] 7 | 8 | permissions: {} # No permissions by default on workflow level 9 | 10 | jobs: 11 | build: 12 | name: Build 13 | runs-on: ubuntu-latest 14 | steps: 15 | - name: Checkout 16 | uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1 17 | with: 18 | persist-credentials: false 19 | - name: Set up Python 20 | uses: actions/setup-python@83679a892e2d95755f2dac6acb0bfd1e9ac5d548 # v6.1.0 21 | with: 22 | python-version-file: ".python-version" 23 | - name: Install pypa/build 24 | run: | 25 | uv sync --locked 26 | - name: Build sdist 27 | run: | 28 | uv build --sdist 29 | - uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5.0.0 30 | with: 31 | name: artifact-sdist 32 | path: dist/*.tar.gz 33 | - name: Build wheel 34 | run: | 35 | uv build --wheel 36 | - uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5.0.0 37 | with: 38 | name: artifact-wheel 39 | path: dist/*.whl 40 | 41 | publish_package: 42 | name: Publish package 43 | needs: [build] 44 | environment: pypi 45 | runs-on: ubuntu-latest 46 | permissions: 47 | contents: write # required by svenstaro/upload-release-action 48 | id-token: write # required by trusted publisher 49 | steps: 50 | - name: Download artifacts 51 | uses: actions/download-artifact@018cc2cf5baa6db3ef3c5f8a56943fffe632ef53 # v6 52 | with: 53 | path: dist 54 | pattern: artifact-* 55 | merge-multiple: true 56 | # to determine where to publish the package distribution to PyPI or TestPyPI 57 | - name: Check tag 58 | id: check-tag 59 | uses: actions-ecosystem/action-regex-match@9e6c4fb3d5e898f505be7a1fb6e7b0a278f6665b # v2.0.2 60 | with: 61 | text: ${{ github.ref }} 62 | regex: '^refs/tags/[0-9]+\.[0-9]+\.[0-9]+(\.[0-9]+)+(\.[0-9]+rc[0-9]+|rc[0-9]+)?$' 63 | - name: Upload package distributions to github 64 | if: ${{ steps.check-tag.outputs.match != '' }} 65 | uses: svenstaro/upload-release-action@6b7fa9f267e90b50a19fef07b3596790bb941741 # 2.11.3 66 | with: 67 | repo_token: ${{ secrets.GITHUB_TOKEN }} 68 | file: dist/* 69 | tag: ${{ github.ref }} 70 | overwrite: true 71 | file_glob: true 72 | - name: Publish package distributions to PyPI 73 | if: ${{ steps.check-tag.outputs.match != '' }} 74 | uses: pypa/gh-action-pypi-publish@ed0c53931b1dc9bd32cbe73a98c7f6766f8a527e # v1.13.0 75 | - name: Publish package distributions to TestPyPI 76 | if: ${{ steps.check-tag.outputs.match == '' }} 77 | uses: pypa/gh-action-pypi-publish@ed0c53931b1dc9bd32cbe73a98c7f6766f8a527e # v1.13.0 78 | with: 79 | repository-url: https://test.pypi.org/legacy/ 80 | verbose: true 81 | -------------------------------------------------------------------------------- /.github/workflows/security-scan.yml: -------------------------------------------------------------------------------- 1 | name: Security Scans 2 | 3 | on: 4 | schedule: 5 | # Run security checks every day at 2 AM UTC 6 | - cron: "0 2 * * *" 7 | workflow_dispatch: 8 | push: 9 | branches: 10 | - master 11 | - release** 12 | 13 | permissions: {} 14 | 15 | jobs: 16 | zizmor-scan: 17 | runs-on: ubuntu-latest 18 | permissions: 19 | contents: read 20 | security-events: write # Needed to upload the results to code-scanning dashboard 21 | steps: 22 | - name: Checkout code 23 | uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1 24 | with: 25 | persist-credentials: false 26 | - name: Run Zizmor scan 27 | uses: open-edge-platform/geti-ci/actions/zizmor@66652424b4ec87ff529dce5ae4a03f339e58a84b 28 | with: 29 | scan-scope: "all" 30 | severity-level: "LOW" 31 | confidence-level: "LOW" 32 | fail-on-findings: false # reports only 33 | 34 | bandit-scan: 35 | runs-on: ubuntu-latest 36 | permissions: 37 | contents: read 38 | security-events: write # Needed to upload the results to code-scanning dashboard 39 | steps: 40 | - name: Checkout code 41 | uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1 42 | with: 43 | persist-credentials: false 44 | - name: Run Bandit scan 45 | uses: open-edge-platform/geti-ci/actions/bandit@66652424b4ec87ff529dce5ae4a03f339e58a84b 46 | with: 47 | scan-scope: "all" 48 | severity-level: "LOW" 49 | confidence-level: "LOW" 50 | config_file: "pyproject.toml" 51 | fail-on-findings: false # reports only 52 | 53 | trivy-scan: 54 | runs-on: ubuntu-latest 55 | permissions: 56 | contents: read 57 | security-events: write # Needed to upload the results to code-scanning dashboard 58 | steps: 59 | - name: Checkout code 60 | uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1 61 | with: 62 | persist-credentials: false 63 | - name: Run Trivy scan 64 | id: trivy 65 | uses: open-edge-platform/geti-ci/actions/trivy@66652424b4ec87ff529dce5ae4a03f339e58a84b 66 | with: 67 | scan_type: "fs" 68 | scan-scope: all 69 | severity: LOW 70 | scanners: "vuln,secret,config" 71 | format: "sarif" 72 | timeout: "15m" 73 | ignore_unfixed: "false" 74 | 75 | semgrep-scan: 76 | runs-on: ubuntu-latest 77 | permissions: 78 | contents: read 79 | security-events: write # Needed to upload the results to code-scanning dashboard 80 | steps: 81 | - name: Checkout code 82 | uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1 83 | with: 84 | persist-credentials: false 85 | - name: Run Semgrep scan 86 | id: semgrep 87 | uses: open-edge-platform/geti-ci/actions/semgrep@66652424b4ec87ff529dce5ae4a03f339e58a84b 88 | with: 89 | scan-scope: "all" 90 | severity: "LOW" 91 | fail-on-findings: false # reports only 92 | -------------------------------------------------------------------------------- /examples/metrics/README.md: -------------------------------------------------------------------------------- 1 | # Benchmark - a metrics API example 2 | 3 | This example demonstrates how to use the Python API of OpenVINO Model API for performance analysis and metrics collection during model inference. This tutorial includes the following features: 4 | 5 | - Model performance measurement 6 | - Configurable device selection (CPU, GPU, etc.) 7 | - Automatic image dataset discovery 8 | - Warm-up and test runs with customizable parameters 9 | - Detailed inference time analysis 10 | - Metrics logging and reporting 11 | - Performance statistics calculation 12 | 13 | ## Prerequisites 14 | 15 | Install Model API from source. Please refer to the main [README](../../../README.md) for details. 16 | 17 | ## Run example 18 | 19 | To run the example, please execute the following command: 20 | 21 | ```bash 22 | python benchmark.py [options] 23 | ``` 24 | 25 | ### Required Arguments 26 | 27 | - `model_path` - Path to the model file (.xml) 28 | - `dataset_path` - Path to the dataset directory containing test images 29 | 30 | ### Optional Arguments 31 | 32 | - `--device` - Device to run the model on (default: CPU) 33 | - `--warmup-runs` - Number of warmup runs (default: 5) 34 | - `--test-runs` - Number of test runs (default: 100) 35 | 36 | ### Examples 37 | 38 | ```bash 39 | # Basic usage with CPU 40 | python benchmark.py /path/to/model.xml /path/to/images 41 | 42 | # Use GPU with custom parameters 43 | python benchmark.py /path/to/model.xml /path/to/images --device GPU --warmup-runs 10 --test-runs 50 44 | 45 | # Show help 46 | python benchmark.py --help 47 | ``` 48 | 49 | ## Expected Output 50 | 51 | The example will display: 52 | 53 | - Number of images found in the dataset directory 54 | - Progress updates during warm-up and test phases 55 | - Comprehensive performance analysis results including timing statistics 56 | - Detailed metrics about the model's inference performance on the specified device 57 | 58 | Example output 59 | 60 | ```bash 61 | OpenVINO Runtime 62 | build: 2025.2.0-19140-c01cd93e24d-releases/2025/2 63 | Reading model model.xml 64 | The model model.xml is loaded to CPU 65 | Number of model infer requests: 2 66 | Starting warm-up... 67 | Running 100 test inferences... 68 | Completed 10/100 69 | Completed 20/100 70 | Completed 30/100 71 | Completed 40/100 72 | Completed 50/100 73 | Completed 60/100 74 | Completed 70/100 75 | Completed 80/100 76 | Completed 90/100 77 | Completed 100/100 78 | ============================================================ 79 | 🚀 PERFORMANCE METRICS REPORT 🚀 80 | ============================================================ 81 | 82 | 📊 Model Loading: 83 | Load Time: 2.497s 84 | 85 | ⚙️ Processing Times (mean ± std): 86 | Preprocess: 0.001s ± 0.000s 87 | Inference: 0.570s ± 0.020s 88 | Postprocess: 0.001s ± 0.000s 89 | 90 | 📈 Total Time Statistics: 91 | Mean: 0.572s ± 0.020s 92 | Min: 0.556s 93 | Max: 0.642s 94 | 95 | 🎯 Performance Summary: 96 | Total Frames: 100 97 | FPS: 1.75 98 | ============================================================ 99 | ``` 100 | -------------------------------------------------------------------------------- /src/model_api/tilers/semantic_segmentation.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (C) 2020-2024 Intel Corporation 3 | # SPDX-License-Identifier: Apache-2.0 4 | # 5 | 6 | from __future__ import annotations 7 | 8 | from contextlib import contextmanager 9 | 10 | import numpy as np 11 | 12 | from model_api.models import ImageResultWithSoftPrediction, SegmentationModel 13 | 14 | from .tiler import Tiler 15 | 16 | 17 | class SemanticSegmentationTiler(Tiler): 18 | """Tiler for segmentation models.""" 19 | 20 | def _postprocess_tile( 21 | self, 22 | predictions: ImageResultWithSoftPrediction, 23 | coord: list[int], 24 | ) -> dict: 25 | """Converts predictions to a format convenient for further merging. 26 | 27 | Args: 28 | predictions (ImageResultWithSoftPrediction): predictions from SegmentationModel 29 | coord (list[int]): coordinates of the tile 30 | 31 | Returns: 32 | dict: postprocessed predictions 33 | """ 34 | output_dict = {} 35 | output_dict["coord"] = coord 36 | output_dict["masks"] = predictions.soft_prediction 37 | return output_dict 38 | 39 | def _merge_results( 40 | self, 41 | results: list[dict], 42 | shape: tuple[int, int, int], 43 | ) -> ImageResultWithSoftPrediction: 44 | """Merge the results from all tiles. 45 | 46 | Args: 47 | results (list[dict]): list of tile predictions 48 | shape (tuple[int, int, int]): shape of the original image 49 | 50 | Returns: 51 | ImageResultWithSoftPrediction: merged predictions 52 | """ 53 | height, width = shape[:2] 54 | num_classes = len(self.model.params.labels) 55 | full_logits_mask = np.zeros((height, width, num_classes), dtype=np.float32) 56 | vote_mask = np.zeros((height, width), dtype=np.int32) 57 | for result in results: 58 | x1, y1, x2, y2 = result["coord"] 59 | mask = result["masks"] 60 | vote_mask[y1:y2, x1:x2] += 1 61 | full_logits_mask[y1:y2, x1:x2, :] += mask[: y2 - y1, : x2 - x1, :] 62 | 63 | full_logits_mask = full_logits_mask / vote_mask[:, :, None] 64 | index_mask = full_logits_mask.argmax(2) 65 | return ImageResultWithSoftPrediction( 66 | resultImage=index_mask, 67 | soft_prediction=full_logits_mask, 68 | feature_vector=np.array([]), 69 | saliency_map=np.array([]), 70 | ) 71 | 72 | def __call__(self, inputs): 73 | @contextmanager 74 | def setup_segm_model(): 75 | return_soft_prediction_state = None 76 | if isinstance(self.model, SegmentationModel): 77 | return_soft_prediction_state = self.model.params.return_soft_prediction 78 | self.model._return_soft_prediction = True # noqa: SLF001 79 | try: 80 | yield 81 | finally: 82 | if isinstance(self.model, SegmentationModel): 83 | self.model._return_soft_prediction = return_soft_prediction_state # noqa: SLF001 84 | 85 | with setup_segm_model(): 86 | return super().__call__(inputs) 87 | -------------------------------------------------------------------------------- /src/model_api/visualizer/visualizer.py: -------------------------------------------------------------------------------- 1 | """Visualizer for modelAPI.""" 2 | 3 | # Copyright (C) 2024-2025 Intel Corporation 4 | # SPDX-License-Identifier: Apache-2.0 5 | 6 | from __future__ import annotations 7 | 8 | from typing import TYPE_CHECKING 9 | 10 | import numpy as np 11 | from PIL import Image 12 | 13 | from model_api.models.result import ( 14 | AnomalyResult, 15 | ClassificationResult, 16 | DetectedKeypoints, 17 | DetectionResult, 18 | ImageResultWithSoftPrediction, 19 | InstanceSegmentationResult, 20 | Result, 21 | ) 22 | 23 | from .scene import ( 24 | AnomalyScene, 25 | ClassificationScene, 26 | DetectionScene, 27 | InstanceSegmentationScene, 28 | KeypointScene, 29 | Scene, 30 | SegmentationScene, 31 | ) 32 | 33 | if TYPE_CHECKING: 34 | from pathlib import Path 35 | 36 | from .layout import Layout 37 | 38 | 39 | class Visualizer: 40 | """Utility class to automatically select the correct scene and render/show it.""" 41 | 42 | def __init__(self, layout: Layout | None = None) -> None: 43 | self.layout = layout 44 | 45 | def show(self, image: Image.Image | np.ndarray, result: Result) -> None: 46 | if isinstance(image, np.ndarray): 47 | image = Image.fromarray(image) 48 | scene = self._scene_from_result(image, result) 49 | return scene.show() 50 | 51 | def save(self, image: Image.Image | np.ndarray, result: Result, path: Path) -> None: 52 | if isinstance(image, np.ndarray): 53 | image = Image.fromarray(image) 54 | scene = self._scene_from_result(image, result) 55 | scene.save(path) 56 | 57 | def render(self, image: Image.Image | np.ndarray, result: Result) -> Image.Image | np.ndarray: 58 | is_numpy = isinstance(image, np.ndarray) 59 | 60 | if is_numpy: 61 | image = Image.fromarray(image) 62 | 63 | scene = self._scene_from_result(image, result) 64 | result_img: Image = scene.render() 65 | 66 | if is_numpy: 67 | return np.array(result_img) 68 | 69 | return result_img 70 | 71 | def _scene_from_result(self, image: Image, result: Result) -> Scene: 72 | scene: Scene 73 | if isinstance(result, AnomalyResult): 74 | scene = AnomalyScene(image, result, self.layout) 75 | elif isinstance(result, ClassificationResult): 76 | scene = ClassificationScene(image, result, self.layout) 77 | elif isinstance(result, InstanceSegmentationResult): 78 | # Note: This has to be before DetectionScene because InstanceSegmentationResult is a subclass 79 | # of DetectionResult 80 | scene = InstanceSegmentationScene(image, result, self.layout) 81 | elif isinstance(result, ImageResultWithSoftPrediction): 82 | scene = SegmentationScene(image, result, self.layout) 83 | elif isinstance(result, DetectionResult): 84 | scene = DetectionScene(image, result, self.layout) 85 | elif isinstance(result, DetectedKeypoints): 86 | scene = KeypointScene(image, result, self.layout) 87 | else: 88 | msg = f"Unsupported result type: {type(result)}" 89 | raise ValueError(msg) 90 | 91 | return scene 92 | -------------------------------------------------------------------------------- /examples/metrics/benchmark.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # 3 | # Copyright (C) 2020-2024 Intel Corporation 4 | # SPDX-License-Identifier: Apache-2.0 5 | # 6 | import argparse 7 | import logging 8 | import sys 9 | from pathlib import Path 10 | 11 | import cv2 12 | 13 | from model_api.models import Model 14 | 15 | logging.basicConfig( 16 | level=logging.INFO, 17 | format="%(message)s", 18 | ) 19 | 20 | 21 | def get_image_files(dataset_path: str) -> list[str]: 22 | """Get list of image files from the dataset directory.""" 23 | image_extensions = ["*.jpg", "*.jpeg", "*.png", "*.bmp", "*.tiff", "*.tif"] 24 | test_images: list[str] = [] 25 | test_path = Path(dataset_path) 26 | 27 | for ext in image_extensions: 28 | test_images.extend(str(p) for p in test_path.glob(ext)) 29 | test_images.extend(str(p) for p in test_path.glob(ext.upper())) 30 | 31 | return test_images 32 | 33 | 34 | def analyze_model_performance(model_path, test_images, device, warmup_runs, test_runs): 35 | """Complete performance analysis example.""" 36 | 37 | # Load model 38 | model = Model.create_model(model_path, device=device) 39 | 40 | # Load test image 41 | image = cv2.imread(test_images[0]) 42 | 43 | print("Starting warm-up...") 44 | # Warm-up runs 45 | for _ in range(warmup_runs): 46 | model(image) 47 | 48 | # Reset metrics after warm-up 49 | model.get_performance_metrics().reset() 50 | 51 | print(f"Running {test_runs} test inferences...") 52 | # Performance measurement runs 53 | for i, image_path in enumerate(test_images[:test_runs]): 54 | image = cv2.imread(image_path) 55 | model(image) 56 | # Log progress 57 | if (i + 1) % 10 == 0: 58 | print(f" Completed {i + 1}/{test_runs}") 59 | 60 | # Analyze results 61 | metrics = model.get_performance_metrics() 62 | metrics.log_metrics() 63 | 64 | return metrics 65 | 66 | 67 | def main(): 68 | parser = argparse.ArgumentParser(description="Benchmark - a model performance analysis with metrics collection") 69 | parser.add_argument("model_path", help="Path to the model file (.xml)") 70 | parser.add_argument("dataset_path", help="Path to the dataset directory containing test images") 71 | parser.add_argument("--device", type=str, default="CPU", help="OpenVINO device to run the model on (default: CPU)") 72 | parser.add_argument("--warmup-runs", type=int, default=5, help="Number of warmup runs (default: 5)") 73 | parser.add_argument("--test-runs", type=int, default=100, help="Number of test runs (default: 100)") 74 | 75 | # Show help if no arguments are provided 76 | if len(sys.argv) == 1: 77 | parser.print_help() 78 | return 79 | 80 | args = parser.parse_args() 81 | 82 | model_path = args.model_path 83 | dataset_path = args.dataset_path 84 | 85 | # Get list of image files from the directory 86 | test_images = get_image_files(dataset_path) 87 | 88 | print(f"Found {len(test_images)} images in {dataset_path}") 89 | 90 | if not test_images: 91 | print("Error: No images found in the dataset directory!") 92 | exit(1) 93 | 94 | analyze_model_performance(model_path, test_images, args.device, args.warmup_runs, args.test_runs) 95 | 96 | 97 | if __name__ == "__main__": 98 | main() 99 | -------------------------------------------------------------------------------- /src/model_api/visualizer/scene/segmentation/instance_segmentation.py: -------------------------------------------------------------------------------- 1 | """Instance Segmentation Scene.""" 2 | 3 | # Copyright (C) 2025 Intel Corporation 4 | # SPDX-License-Identifier: Apache-2.0 5 | 6 | import random 7 | from typing import Union 8 | 9 | import cv2 10 | from PIL import Image 11 | 12 | from model_api.models.result import InstanceSegmentationResult 13 | from model_api.visualizer.layout import Flatten, HStack, Layout 14 | from model_api.visualizer.primitive import BoundingBox, Label, Overlay, Polygon 15 | from model_api.visualizer.scene import Scene 16 | 17 | 18 | class InstanceSegmentationScene(Scene): 19 | """Instance Segmentation Scene.""" 20 | 21 | def __init__(self, image: Image, result: InstanceSegmentationResult, layout: Union[Layout, None] = None) -> None: 22 | # nosec as random is used for color generation 23 | g = random.Random(0) # noqa: S311 # nosec B311 24 | self.color_per_label = {label: f"#{g.randint(0, 0xFFFFFF):06x}" for label in set(result.label_names)} # nosec B311 25 | super().__init__( 26 | base=image, 27 | label=self._get_labels(result), 28 | overlay=self._get_overlays(result), 29 | polygon=self._get_polygons(result), 30 | layout=layout, 31 | ) 32 | 33 | def _get_labels(self, result: InstanceSegmentationResult) -> list[Label]: 34 | # add only unique labels 35 | labels = [] 36 | for label_name in set(result.label_names): 37 | labels.append(Label(label=label_name, bg_color=self.color_per_label[label_name])) 38 | return labels 39 | 40 | def _get_polygons(self, result: InstanceSegmentationResult) -> list[Polygon]: 41 | polygons = [] 42 | for mask, label_name in zip(result.masks, result.label_names): 43 | polygons.append(Polygon(mask=mask, color=self.color_per_label[label_name])) 44 | return polygons 45 | 46 | def _get_bounding_boxes(self, result: InstanceSegmentationResult) -> list[BoundingBox]: 47 | bounding_boxes = [] 48 | for bbox, label_name, score in zip(result.bboxes, result.label_names, result.scores): 49 | x1, y1, x2, y2 = bbox 50 | label = f"{label_name} ({score:.2f})" 51 | bounding_boxes.append( 52 | BoundingBox(x1=x1, y1=y1, x2=x2, y2=y2, label=label, color=self.color_per_label[label_name]), 53 | ) 54 | return bounding_boxes 55 | 56 | def _get_overlays(self, result: InstanceSegmentationResult) -> list[Overlay]: 57 | overlays = [] 58 | if result.saliency_map is not None and len(result.saliency_map) > 0: 59 | labels_label_names_mapping = dict(zip(result.labels, result.label_names)) 60 | for label, label_name in labels_label_names_mapping.items(): 61 | saliency_map = result.saliency_map[label - 1] 62 | saliency_map = cv2.applyColorMap(saliency_map, cv2.COLORMAP_JET) 63 | saliency_map = cv2.cvtColor(saliency_map, cv2.COLOR_BGR2RGB) 64 | overlays.append(Overlay(saliency_map, label=f"{label_name.title()} Saliency Map")) 65 | return overlays 66 | 67 | @property 68 | def default_layout(self) -> Layout: 69 | # by default bounding box is not shown. 70 | return HStack(Flatten(Label, Polygon), Overlay) 71 | -------------------------------------------------------------------------------- /.github/workflows/test_precommit.yml: -------------------------------------------------------------------------------- 1 | name: test_precommit 2 | permissions: {} # No permissions by default on workflow level 3 | on: 4 | pull_request: 5 | merge_group: 6 | branches: 7 | - master 8 | concurrency: 9 | group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} 10 | cancel-in-progress: true 11 | jobs: 12 | Python-Functional-Tests: 13 | strategy: 14 | fail-fast: false 15 | matrix: 16 | os: 17 | - "ubuntu-24.04" 18 | - "windows-2022" 19 | python-version: 20 | - "3.10" 21 | - "3.11" 22 | - "3.12" 23 | - "3.13" 24 | name: pr pre-commit test (${{ matrix.os }}, Python ${{ matrix.python-version }}) 25 | runs-on: ${{ matrix.os }} 26 | steps: 27 | - name: CHECKOUT REPOSITORY 28 | uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1 29 | with: 30 | persist-credentials: false 31 | - name: Install uv 32 | uses: astral-sh/setup-uv@ed21f2f24f8dd64503750218de024bcf64c7250a # v7.1.5 33 | with: 34 | enable-cache: false 35 | python-version: ${{ matrix.python-version }} 36 | - name: Install dependencies 37 | run: | 38 | uv sync --locked --extra tests --extra-index-url https://download.pytorch.org/whl/cpu 39 | - name: Prepare test data 40 | run: | 41 | uv run python tests/accuracy/download_models.py -d data -j tests/precommit/public_scope.json -l 42 | - name: Run test 43 | run: | 44 | uv run pytest --data=./data tests/functional 45 | Zizmor-Scan-PR: 46 | runs-on: ubuntu-latest 47 | permissions: 48 | contents: read 49 | steps: 50 | - name: Checkout code 51 | uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1 52 | with: 53 | persist-credentials: false 54 | - name: Run Zizmor scan 55 | uses: open-edge-platform/geti-ci/actions/zizmor@66652424b4ec87ff529dce5ae4a03f339e58a84b 56 | with: 57 | scan-scope: "changed" 58 | severity-level: "LOW" 59 | confidence-level: "LOW" 60 | fail-on-findings: true 61 | Bandit-Scan-PR: 62 | runs-on: ubuntu-latest 63 | permissions: 64 | contents: read 65 | steps: 66 | - name: Checkout code 67 | uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1 68 | with: 69 | persist-credentials: false 70 | - name: Run Bandit scan 71 | uses: open-edge-platform/geti-ci/actions/bandit@66652424b4ec87ff529dce5ae4a03f339e58a84b 72 | with: 73 | scan-scope: "changed" 74 | severity-level: "LOW" 75 | confidence-level: "LOW" 76 | config_file: "./pyproject.toml" 77 | fail-on-findings: true 78 | 79 | Semgrep-Scan-PR: 80 | runs-on: ubuntu-latest 81 | permissions: 82 | contents: read 83 | security-events: write # Needed to upload the results in SARIF 84 | steps: 85 | - name: Checkout code 86 | uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1 87 | with: 88 | persist-credentials: false 89 | fetch-depth: 0 90 | - name: Run Semgrep scan 91 | uses: open-edge-platform/geti-ci/actions/semgrep@66652424b4ec87ff529dce5ae4a03f339e58a84b 92 | with: 93 | scan-scope: "changed" 94 | severity: "LOW" 95 | fail-on-findings: true 96 | -------------------------------------------------------------------------------- /src/model_api/models/result/visual_prompting.py: -------------------------------------------------------------------------------- 1 | """Visual Prompting result type.""" 2 | 3 | # Copyright (C) 2024 Intel Corporation 4 | # SPDX-License-Identifier: Apache-2.0 5 | 6 | from __future__ import annotations 7 | 8 | import numpy as np 9 | 10 | from .base import Result 11 | 12 | 13 | class VisualPromptingResult(Result): 14 | def __init__( 15 | self, 16 | upscaled_masks: list[np.ndarray] | None = None, 17 | processed_mask: list[np.ndarray] | None = None, 18 | low_res_masks: list[np.ndarray] | None = None, 19 | iou_predictions: list[np.ndarray] | None = None, 20 | scores: list[np.ndarray] | None = None, 21 | labels: list[np.ndarray] | None = None, 22 | hard_predictions: list[np.ndarray] | None = None, 23 | soft_predictions: list[np.ndarray] | None = None, 24 | best_iou: list[float] | None = None, 25 | ) -> None: 26 | self.upscaled_masks = upscaled_masks 27 | self.processed_mask = processed_mask 28 | self.low_res_masks = low_res_masks 29 | self.iou_predictions = iou_predictions 30 | self.scores = scores 31 | self.labels = labels 32 | self.hard_predictions = hard_predictions 33 | self.soft_predictions = soft_predictions 34 | self.best_iou = best_iou 35 | 36 | def _compute_min_max(self, tensor: np.ndarray) -> tuple[np.ndarray, np.ndarray]: 37 | return tensor.min(), tensor.max() 38 | 39 | def __str__(self) -> str: 40 | assert self.hard_predictions is not None 41 | assert self.upscaled_masks is not None 42 | upscaled_masks_min, upscaled_masks_max = self._compute_min_max( 43 | self.upscaled_masks[0], 44 | ) 45 | 46 | return ( 47 | f"upscaled_masks min:{upscaled_masks_min:.3f} max:{upscaled_masks_max:.3f};" 48 | f"hard_predictions shape:{self.hard_predictions[0].shape};" 49 | ) 50 | 51 | 52 | class PredictedMask: 53 | def __init__( 54 | self, 55 | mask: list[np.ndarray], 56 | points: list[np.ndarray] | np.ndarray, 57 | scores: list[float] | np.ndarray, 58 | ) -> None: 59 | self.mask = mask 60 | self.points = points 61 | self.scores = scores 62 | 63 | def __str__(self) -> str: 64 | obj_str = "" 65 | obj_str += f"mask sum: {np.sum(sum(self.mask))}; " 66 | 67 | if isinstance(self.points, list): 68 | for i, point in enumerate(self.points): 69 | obj_str += "[" 70 | obj_str += ", ".join(str(round(c, 2)) for c in point) 71 | obj_str += "] " 72 | obj_str += "iou: " + f"{float(self.scores[i]):.3f} " 73 | else: 74 | for i in range(self.points.shape[0]): 75 | point = self.points[i] 76 | obj_str += "[" 77 | obj_str += ", ".join(str(round(c, 2)) for c in point) 78 | obj_str += "] " 79 | obj_str += "iou: " + f"{float(self.scores[i]):.3f} " 80 | 81 | return obj_str.strip() 82 | 83 | 84 | class ZSLVisualPromptingResult(Result): 85 | def __init__(self, data: dict[int, PredictedMask]) -> None: 86 | self.data = data 87 | 88 | def __str__(self) -> str: 89 | return ", ".join(str(self.data[k]) for k in self.data) 90 | 91 | def get_mask(self, label: int) -> PredictedMask: 92 | """Returns a mask belonging to a given label""" 93 | return self.data[label] 94 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # OpenVINO Model API 2 | 3 | [![PyPI](https://img.shields.io/pypi/v/openvino-model-api)](https://pypi.org/project/openvino-model-api) 4 | [![Downloads](https://static.pepy.tech/personalized-badge/openvino-model-api?period=total&units=international_system&left_color=grey&right_color=green&left_text=PyPI%20Downloads)](https://pepy.tech/project/openvino-model-api) 5 | 6 | [![Pre-Merge Test](https://github.com/open-edge-platform/model_api/actions/workflows/pre_commit.yml/badge.svg)](https://github.com/open-edge-platform/model_api/actions/workflows/pre_commit.yml) 7 | [![Build Docs](https://github.com/open-edge-platform/model_api/actions/workflows/docs.yml/badge.svg)](https://github.com/open-edge-platform/model_api/actions/workflows/docs.yml) 8 | 9 | [![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](https://opensource.org/licenses/Apache-2.0) 10 | [![OpenSSF Scorecard](https://api.scorecard.dev/projects/github.com/open-edge-platform/model_api/badge)](https://scorecard.dev/viewer/?uri=github.com/open-edge-platform/model_api) 11 | 12 | ## Introduction 13 | 14 | Model API is a set of wrapper classes for particular tasks and model architectures, simplifying data preprocess and postprocess as well as routine procedures (model loading, asynchronous execution, etc.). It is aimed at simplifying end-to-end model inference. The Model API is based on the OpenVINO inference API. 15 | 16 | ## How it works 17 | 18 | Model API searches for additional information required for model inference, data, pre/postprocessing, label names, etc. directly in OpenVINO Intermediate Representation. This information is used to prepare the inference data, process and output the inference results in a human-readable format. 19 | 20 | Currently, ModelAPI supports models trained in [OpenVINO Training Extensions](https://github.com/openvinotoolkit/training_extensions) framework. 21 | Training Extensions embed all the metadata required for inference into model file. For models coming from other than Training Extensions frameworks metadata generation step is required before using ModelAPI. 22 | 23 | ## Supported model formats 24 | 25 | - [OpenVINO IR](https://docs.openvino.ai/2025/documentation/openvino-ir-format.html) 26 | - [ONNX](https://onnx.ai) 27 | 28 | ## Features 29 | 30 | - Python API 31 | - Synchronous and asynchronous inference 32 | - Model preprocessing embedding for faster inference 33 | 34 | ## Installation 35 | 36 | `pip install openvino-model-api` 37 | 38 | ## Usage 39 | 40 | ```python 41 | from model_api.models import Model 42 | 43 | # Create a model wrapper from a compatible model generated by OpenVINO Training Extensions 44 | model = Model.create_model("model.xml") 45 | 46 | # Run synchronous inference locally 47 | result = model(image) # image is numpy.ndarray 48 | 49 | # Print results in model-specific format 50 | print(f"Inference result: {result}") 51 | ``` 52 | 53 | ## Prepare a model for `InferenceAdapter` 54 | 55 | There are usecases when it is not possible to modify an internal `ov::Model` and it is hidden behind `InferenceAdapter`. `create_model()` can construct a model from a given `InferenceAdapter`. That approach assumes that the model in `InferenceAdapter` was already configured by `create_model()` called with a string (a path or a model name). It is possible to prepare such model: 56 | 57 | ```python 58 | model = DetectionModel.create_model("~/.cache/omz/public/ssdlite_mobilenet_v2/FP16/ssdlite_mobilenet_v2.xml") 59 | model.save("serialized.xml") 60 | ``` 61 | 62 | For more details please refer to the [examples](https://github.com/openvinotoolkit/model_api/tree/master/examples) of this project. 63 | -------------------------------------------------------------------------------- /src/docs/action_classification.md: -------------------------------------------------------------------------------- 1 | # ActionClassification Wrapper 2 | 3 | ## Use case and high-level description 4 | 5 | The `ActionClassificationModel` is a wrapper class designed for action classification models. 6 | This class allows to encapsulate data pre-and post processing for action classification OpenVINO models satisfying 7 | a certain specifications. 8 | Note that it isn't a subclass of `ImageModel`. It gets video as input so it is different than ImageModel. 9 | Also, it doesn't use OV.PrePostProcessor() and therefore performs data preparation steps outside of OV graph. 10 | 11 | ## Model parameters 12 | 13 | The following parameters can be provided via python API or RT Info embedded into OV model: 14 | 15 | - `labels`(`list[str]`) : List of class labels 16 | - `path_to_labels` (`str`) : Path to file with labels. Labels are overridden if it's set. 17 | - `mean_values` (`list[int | float]`) Normalization values to be subtracted from the image channels during preprocessing. 18 | - `pad_value` (`int`) Pad value used during the resize resize_image_letterbox operation embedded within the model. 19 | - `resize_type` (`str`) : The method of resizing the input image. Valid options include `crop`, `standard`, `fit_to_window`, and `fit_to_window_letterbox`. 20 | - `reverse_input_channels` (`bool`) : Whether to reverse the order of input channels. 21 | - `scale_values` (`list[int | float]`): Normalization values used to divide the image channels during preprocessing. 22 | 23 | ## OV Model specifications 24 | 25 | ### Inputs 26 | 27 | A single 6D tensor with the following layout: 28 | 29 | - N : Batch size 30 | - S : Numer of clips x Number of crops 31 | - C : Number of channels 32 | - T : Time 33 | - H : Height 34 | - W : Width 35 | 36 | NSTHWC is layout is also supported. 37 | 38 | ### Outputs 39 | 40 | A single tensor containing softmax-activated logits. 41 | 42 | ## Wrapper input-output specifications 43 | 44 | ### Inputs 45 | 46 | A single clip in THWC format. 47 | 48 | ### Outputs 49 | 50 | The output is represented as a `ClassificationResult` object, which includes the indices, labels, and logits of the top predictions. 51 | At present, saliency maps, feature vectors, and raw scores are not provided. 52 | 53 | ## How to use 54 | 55 | Utilizing the ActionClassificationModel is similar to other model wrappers, with the primary difference being the preparation of video clip inputs instead of single images. 56 | 57 | Below is an example demonstrating how to initialize the model with OpenVINO IR files and classify actions in a video clip. 58 | 59 | ```python 60 | import cv2 61 | import numpy as np 62 | # import model wrapper class 63 | from model_api.models import ActionClassificationModel 64 | # import inference adapter and helper for runtime setup 65 | from model_api.adapters import OpenvinoAdapter, create_core 66 | 67 | 68 | # define the path to action classification model in IR format 69 | model_path = "action_classification.xml" 70 | 71 | # create adapter for OpenVINO runtime, pass the model path 72 | inference_adapter = OpenvinoAdapter(create_core(), model_path, device="CPU") 73 | 74 | # instantiate the ActionClassificationModel wrapper 75 | # setting preload=True loads the model onto the CPU within the adapter0 76 | action_cls_model = ActionClassificationModel(inference_adapter, preload=True) 77 | 78 | # load video and make a clip as input 79 | cap = cv2.VideoCapture("sample.mp4") 80 | input_data = np.stack([cap.read()[1] for _ in range(action_cls_model.clip_size)]) 81 | 82 | # perform preprocessing, inference, and postprocessing 83 | results = action_cls_model(input_data) 84 | ``` 85 | 86 | As illustrated, initializing the model and performing inference can be achieved with minimal code. 87 | The wrapper class takes care of input processing, layout adjustments, and output processing automatically. 88 | -------------------------------------------------------------------------------- /.github/workflows/docs.yml: -------------------------------------------------------------------------------- 1 | name: Build Docs 2 | permissions: {} # No permissions by default on workflow level 3 | 4 | on: 5 | workflow_dispatch: # run on request (no need for PR) 6 | push: 7 | branches: 8 | - master 9 | 10 | jobs: 11 | Build-Docs: 12 | runs-on: ubuntu-24.04 13 | permissions: 14 | contents: write 15 | steps: 16 | - name: Checkout repository 17 | uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1 18 | with: 19 | persist-credentials: false 20 | - name: Set up Python 21 | uses: actions/setup-python@83679a892e2d95755f2dac6acb0bfd1e9ac5d548 # v6.1.0 22 | with: 23 | python-version-file: ".python-version" 24 | - name: Install uv 25 | uses: astral-sh/setup-uv@ed21f2f24f8dd64503750218de024bcf64c7250a # v7.1.5 26 | - name: Install dependencies 27 | run: | 28 | uv sync --locked --extra docs 29 | - name: Build Docs 30 | run: | 31 | cd docs 32 | uv run make html 33 | - name: Branch name 34 | id: branch_name 35 | shell: bash 36 | run: | 37 | echo ::set-output name=SOURCE_NAME::${GITHUB_REF#refs/*/} 38 | - name: Create gh-pages branch 39 | env: 40 | SOURCE: ${{steps.branch_name.outputs.SOURCE_NAME}} 41 | run: | 42 | if [[ ${{github.event_name}} == 'workflow_dispatch' ]]; then 43 | echo RELEASE_VERSION="test_build" >> $GITHUB_ENV 44 | else 45 | echo RELEASE_VERSION=${GITHUB_REF#refs/*/} >> $GITHUB_ENV 46 | fi 47 | echo SOURCE_NAME=${GITHUB_REF#refs/*/} >> $GITHUB_OUTPUT 48 | echo SOURCE_BRANCH=${GITHUB_REF#refs/heads/} >> $GITHUB_OUTPUT 49 | echo SOURCE_TAG=${GITHUB_REF#refs/tags/} >> $GITHUB_OUTPUT 50 | 51 | existed_in_remote=$(git ls-remote --heads origin gh-pages) 52 | 53 | if [[ -z ${existed_in_remote} ]]; then 54 | echo "Creating gh-pages branch" 55 | git config --local user.email "action@github.com" 56 | git config --local user.name "GitHub Action" 57 | git checkout --orphan gh-pages 58 | git reset --hard 59 | echo '' > index.html 60 | git add index.html 61 | touch .nojekyll 62 | git add .nojekyll 63 | git commit -m "Initializing gh-pages branch" 64 | git push origin gh-pages 65 | git checkout "${SOURCE}" 66 | echo "Created gh-pages branch" 67 | else 68 | echo "Branch gh-pages already exists" 69 | fi 70 | - name: Commit docs to gh-pages branch 71 | env: 72 | RELEASE_VERSION: ${{ env.RELEASE_VERSION }} 73 | run: | 74 | git fetch 75 | git checkout gh-pages 76 | mkdir -p /tmp/docs_build 77 | cp -r docs/build/html/* /tmp/docs_build/ 78 | rm -rf "$RELEASE_VERSION"/* 79 | echo '' > index.html 80 | mkdir -p "$RELEASE_VERSION" 81 | cp -r /tmp/docs_build/* ./"$RELEASE_VERSION" 82 | rm -rf /tmp/docs_build 83 | git config --local user.email "action@github.com" 84 | git config --local user.name "GitHub Action" 85 | if [[ "$RELEASE_VERSION" != 'test_build' ]]; then 86 | ln -sfn "$RELEASE_VERSION" latest 87 | fi 88 | git add ./latest "$RELEASE_VERSION" 89 | git add index.html 90 | git commit -m "Update documentation" -a || true 91 | - name: Push changes 92 | uses: ad-m/github-push-action@57116acb309081ee57864270b0e3c4cedbe45452 93 | with: 94 | github_token: ${{ secrets.GITHUB_TOKEN }} 95 | branch: gh-pages 96 | -------------------------------------------------------------------------------- /src/model_api/visualizer/primitive/polygon.py: -------------------------------------------------------------------------------- 1 | """Polygon primitive.""" 2 | 3 | # Copyright (C) 2025 Intel Corporation 4 | # SPDX-License-Identifier: Apache-2.0 5 | 6 | from __future__ import annotations 7 | 8 | import logging 9 | from typing import TYPE_CHECKING 10 | 11 | import cv2 12 | from PIL import Image, ImageColor, ImageDraw 13 | 14 | from .primitive import Primitive 15 | 16 | if TYPE_CHECKING: 17 | import numpy as np 18 | 19 | logger = logging.getLogger(__name__) 20 | 21 | 22 | class Polygon(Primitive): 23 | """Polygon primitive. 24 | 25 | Args: 26 | points: List of points. 27 | mask: Mask to draw the polygon. 28 | color: Color of the polygon. 29 | 30 | Examples: 31 | >>> polygon = Polygon(points=[(10, 10), (100, 10), (100, 100), (10, 100)], color="red") 32 | >>> polygon = Polygon(mask=mask, color="red") 33 | >>> polygon.compute(image).save("polygon.jpg") 34 | 35 | >>> polygon = Polygon(mask=mask, color="red") 36 | >>> polygon.compute(image).save("polygon.jpg") 37 | """ 38 | 39 | def __init__( 40 | self, 41 | points: list[tuple[int, int]] | None = None, 42 | mask: np.ndarray | None = None, 43 | color: str | tuple[int, int, int] = "blue", 44 | opacity: float = 0.4, 45 | outline_width: int = 2, 46 | ) -> None: 47 | self.points = self._get_points(points, mask) 48 | self.color = color 49 | self.opacity = opacity 50 | self.outline_width = outline_width 51 | 52 | def _get_points(self, points: list[tuple[int, int]] | None, mask: np.ndarray | None) -> list[tuple[int, int]]: 53 | """Get points from either points or mask. 54 | Note: 55 | Either points or mask should be provided. 56 | 57 | Args: 58 | points: List of points. 59 | mask: Mask to draw the polygon. 60 | 61 | Returns: 62 | List of points. 63 | """ 64 | if points is not None and mask is not None: 65 | msg = "Either points or mask should be provided, not both." 66 | raise ValueError(msg) 67 | if points is not None: 68 | points_ = points 69 | elif mask is not None: 70 | points_ = self._get_points_from_mask(mask) 71 | else: 72 | msg = "Either points or mask should be provided." 73 | raise ValueError(msg) 74 | return points_ 75 | 76 | def _get_points_from_mask(self, mask: np.ndarray) -> list[tuple[int, int]]: 77 | """Get points from mask. 78 | 79 | Args: 80 | mask: Mask to draw the polygon. 81 | 82 | Returns: 83 | List of points. 84 | """ 85 | contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) 86 | # incase of multiple contours, use the one with the largest area 87 | if len(contours) > 1: 88 | logger.warning("Multiple contours found in the mask. Using the largest one.") 89 | contours = sorted(contours, key=cv2.contourArea, reverse=True) 90 | if len(contours) == 0: 91 | msg = "No contours found in the mask." 92 | raise ValueError(msg) 93 | points_ = contours[0].squeeze().tolist() 94 | return [tuple(point) for point in points_] 95 | 96 | def compute(self, image: Image) -> Image: 97 | """Compute the polygon. 98 | 99 | Args: 100 | image: Image to draw the polygon on. 101 | 102 | Returns: 103 | Image with the polygon drawn on it. 104 | """ 105 | draw = ImageDraw.Draw(image, "RGBA") 106 | # Draw polygon with darker edge and a semi-transparent fill. 107 | ink = ImageColor.getrgb(self.color) 108 | draw.polygon(self.points, fill=(*ink, int(255 * self.opacity)), outline=self.color, width=self.outline_width) 109 | return image 110 | -------------------------------------------------------------------------------- /tests/unit/visualizer/test_scene.py: -------------------------------------------------------------------------------- 1 | """Tests for scene.""" 2 | 3 | # Copyright (C) 2025 Intel Corporation 4 | # SPDX-License-Identifier: Apache-2.0 5 | 6 | from pathlib import Path 7 | 8 | import numpy as np 9 | import pytest 10 | from PIL import Image 11 | 12 | from model_api.models.result import ( 13 | AnomalyResult, 14 | ClassificationResult, 15 | DetectionResult, 16 | ImageResultWithSoftPrediction, 17 | InstanceSegmentationResult, 18 | ) 19 | from model_api.models.result.classification import Label 20 | from model_api.visualizer import Visualizer 21 | 22 | 23 | def test_anomaly_scene(mock_image: Image, tmpdir: Path): 24 | """Test if the anomaly scene is created.""" 25 | heatmap = np.ones(mock_image.size, dtype=np.uint8) 26 | heatmap *= 255 27 | 28 | mask = np.zeros(mock_image.size, dtype=np.uint8) 29 | mask[32:96, 32:96] = 255 30 | mask[40:80, 0:128] = 255 31 | 32 | anomaly_result = AnomalyResult( 33 | anomaly_map=heatmap, 34 | pred_boxes=np.array([[0, 0, 128, 128], [32, 32, 96, 96]]), 35 | pred_label="Anomaly", 36 | pred_mask=mask, 37 | pred_score=0.85, 38 | ) 39 | 40 | visualizer = Visualizer() 41 | visualizer.save(mock_image, anomaly_result, tmpdir / "anomaly_scene.jpg") 42 | assert Path(tmpdir / "anomaly_scene.jpg").exists() 43 | 44 | 45 | def test_classification_scene(mock_image: Image, tmpdir: Path): 46 | """Test if the classification scene is created.""" 47 | classification_result = ClassificationResult( 48 | top_labels=[ 49 | Label(name="cat", confidence=0.95), 50 | Label(name="dog", confidence=0.90), 51 | ], 52 | saliency_map=np.ones(mock_image.size, dtype=np.uint8), 53 | ) 54 | visualizer = Visualizer() 55 | visualizer.save( 56 | mock_image, 57 | classification_result, 58 | tmpdir / "classification_scene.jpg", 59 | ) 60 | assert Path(tmpdir / "classification_scene.jpg").exists() 61 | 62 | 63 | def test_detection_scene(mock_image: Image, tmpdir: Path): 64 | """Test if the detection scene is created.""" 65 | detection_result = DetectionResult( 66 | bboxes=np.array([[0, 0, 128, 128], [32, 32, 96, 96]]), 67 | labels=np.array([0, 1]), 68 | label_names=["person", "car"], 69 | scores=np.array([0.85, 0.75]), 70 | saliency_map=(np.ones((1, 2, 6, 8)) * 255).astype(np.uint8), 71 | ) 72 | visualizer = Visualizer() 73 | visualizer.save(mock_image, detection_result, tmpdir / "detection_scene.jpg") 74 | assert Path(tmpdir / "detection_scene.jpg").exists() 75 | 76 | 77 | @pytest.mark.parametrize("with_saliency_map", [True, False]) 78 | def test_segmentation_scene(mock_image: Image, tmpdir: Path, with_saliency_map: bool): 79 | """Test if the segmentation scene is created.""" 80 | visualizer = Visualizer() 81 | 82 | instance_segmentation_result = InstanceSegmentationResult( 83 | bboxes=np.array([[0, 0, 128, 128], [32, 32, 96, 96]]), 84 | labels=np.array([0, 1]), 85 | masks=np.array( 86 | [ 87 | np.ones((128, 128), dtype=np.uint8), 88 | ], 89 | ), 90 | scores=np.array([0.85, 0.75]), 91 | label_names=["person", "car"], 92 | saliency_map=[np.ones((128, 128), dtype=np.uint8) * 255] if with_saliency_map else None, 93 | feature_vector=np.array([1, 2, 3, 4]), 94 | ) 95 | 96 | visualizer.save( 97 | mock_image, 98 | instance_segmentation_result, 99 | tmpdir / "instance_segmentation_scene.jpg", 100 | ) 101 | assert Path(tmpdir / "instance_segmentation_scene.jpg").exists() 102 | 103 | # Test ImageResultWithSoftPrediction 104 | soft_prediction_result = ImageResultWithSoftPrediction( 105 | resultImage=np.array( 106 | [[0, 1, 2], [1, 2, 0], [2, 0, 1]], 107 | dtype=np.uint8, 108 | ), # 3x3 test image with 3 classes 109 | soft_prediction=np.ones( 110 | (3, 3, 3), 111 | dtype=np.float32, 112 | ), # 3 classes, 3x3 prediction 113 | saliency_map=np.ones((3, 3), dtype=np.uint8) * 255 if with_saliency_map else None, 114 | feature_vector=np.array([1, 2, 3, 4]), 115 | ) 116 | 117 | visualizer.save( 118 | mock_image, 119 | soft_prediction_result, 120 | tmpdir / "soft_prediction_scene.jpg", 121 | ) 122 | assert Path(tmpdir / "soft_prediction_scene.jpg").exists() 123 | -------------------------------------------------------------------------------- /src/model_api/visualizer/primitive/label.py: -------------------------------------------------------------------------------- 1 | """Label primitive.""" 2 | 3 | # Copyright (C) 2025 Intel Corporation 4 | # SPDX-License-Identifier: Apache-2.0 5 | 6 | from io import BytesIO 7 | from typing import Union 8 | 9 | from PIL import Image, ImageDraw, ImageFont 10 | 11 | from .primitive import Primitive 12 | 13 | 14 | class Label(Primitive): 15 | """Label primitive. 16 | 17 | Labels require a different processing than other primitives as the class also handles the instance when the layout 18 | requests all the labels to be drawn on a single image. 19 | 20 | Args: 21 | label (str): Text of the label. 22 | score (float | None): Score of the label. This is optional. 23 | fg_color (str | tuple[int, int, int]): Foreground color of the label. 24 | bg_color (str | tuple[int, int, int]): Background color of the label. 25 | font_path (str | None | BytesIO): Path to the font file. 26 | size (int): Size of the font. 27 | 28 | Examples: 29 | >>> label = Label(label="Label 1") 30 | >>> label.compute(image).save("label.jpg") 31 | 32 | >>> label = Label(text="Label 1", fg_color="red", bg_color="blue", font_path="arial.ttf", size=20) 33 | >>> label.compute(image).save("label.jpg") 34 | 35 | or multiple labels on a single image: 36 | >>> label1 = Label(text="Label 1") 37 | >>> label2 = Label(text="Label 2") 38 | >>> label3 = Label(text="Label 3") 39 | >>> Label.overlay_labels(image, [label1, label2, label3]).save("labels.jpg") 40 | """ 41 | 42 | def __init__( 43 | self, 44 | label: str, 45 | score: Union[float, None] = None, 46 | fg_color: Union[str, tuple[int, int, int]] = "black", 47 | bg_color: Union[str, tuple[int, int, int]] = "yellow", 48 | font_path: Union[str, BytesIO, None] = None, 49 | size: int = 16, 50 | ) -> None: 51 | self.label = f"{label} ({score:.2f})" if score is not None else label 52 | self.fg_color = fg_color 53 | self.bg_color = bg_color 54 | self.font = ImageFont.load_default(size=size) if font_path is None else ImageFont.truetype(font_path, size) 55 | 56 | def compute(self, image: Image, buffer_y: int = 5) -> Image: 57 | """Generate label on top of the image. 58 | 59 | Args: 60 | image (PIL.Image): Image to paste the label on. 61 | buffer_y (int): Buffer to add to the y-axis of the label. 62 | """ 63 | label_image = self.generate_label_image(buffer_y) 64 | image.paste(label_image, (0, 0)) 65 | return image 66 | 67 | def generate_label_image(self, buffer_y: int = 5) -> Image: 68 | """Generate label image. 69 | 70 | Args: 71 | buffer_y (int): Buffer to add to the y-axis of the label. This is needed as the text is clipped from the 72 | bottom otherwise. 73 | 74 | Returns: 75 | PIL.Image: Image that consists only of the label. 76 | """ 77 | dummy_image = Image.new("RGB", (1, 1)) 78 | draw = ImageDraw.Draw(dummy_image) 79 | textbox = draw.textbbox((0, 0), self.label, font=self.font) 80 | label_image = Image.new("RGB", (textbox[2] - textbox[0], textbox[3] + buffer_y - textbox[1]), self.bg_color) 81 | draw = ImageDraw.Draw(label_image) 82 | draw.text((0, 0), self.label, font=self.font, fill=self.fg_color) 83 | return label_image 84 | 85 | @classmethod 86 | def overlay_labels(cls, image: Image, labels: list["Label"], buffer_y: int = 5, buffer_x: int = 5) -> Image: 87 | """Overlay multiple label images on top of the image. 88 | Paste the labels in a row but wrap the labels if they exceed the image width. 89 | 90 | Args: 91 | image (PIL.Image): Image to paste the labels on. 92 | labels (list[Label]): Labels to be pasted on the image. 93 | buffer_y (int): Buffer to add to the y-axis of the labels. 94 | buffer_x (int): Space between the labels. 95 | 96 | Returns: 97 | PIL.Image: Image with the labels pasted on it. 98 | """ 99 | offset_x = 0 100 | offset_y = 0 101 | for label in labels: 102 | label_image = label.generate_label_image(buffer_y) 103 | image.paste(label_image, (offset_x, offset_y)) 104 | offset_x += label_image.width + buffer_x 105 | if offset_x + label_image.width > image.width: 106 | offset_x = 0 107 | offset_y += label_image.height 108 | return image 109 | -------------------------------------------------------------------------------- /tests/functional/test_save.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (C) 2020-2024 Intel Corporation 3 | # SPDX-License-Identifier: Apache-2.0 4 | # 5 | 6 | from pathlib import Path 7 | 8 | import onnx 9 | 10 | from model_api.adapters import ONNXRuntimeAdapter 11 | from model_api.adapters.utils import load_parameters_from_onnx 12 | from model_api.models import Model 13 | 14 | 15 | def test_detector_save(tmp_path, data): 16 | detector = Model.create_model( 17 | Path(data) / "otx_models/detection_model_with_xai_head.xml", 18 | ) 19 | xml_path = str(tmp_path / "a.xml") 20 | detector.save(xml_path) 21 | deserialized = Model.create_model(xml_path) 22 | 23 | assert deserialized.get_model().get_rt_info(["model_info", "embedded_processing"]).astype(bool) 24 | assert type(detector) is type(deserialized) 25 | for attr in detector.parameters(): 26 | assert getattr(detector.params, attr) == getattr(deserialized.params, attr) 27 | 28 | 29 | def test_classifier_save(tmp_path, data): 30 | classifier = Model.create_model( 31 | Path(data) / "otx_models/tinynet_imagenet.xml", 32 | ) 33 | xml_path = str(tmp_path / "a.xml") 34 | classifier.save(xml_path) 35 | deserialized = Model.create_model(xml_path) 36 | 37 | assert deserialized.get_model().get_rt_info(["model_info", "embedded_processing"]).astype(bool) 38 | assert type(classifier) is type(deserialized) 39 | for attr in classifier.parameters(): 40 | assert getattr(classifier.params, attr) == getattr(deserialized.params, attr) 41 | 42 | 43 | def test_segmentor_save(tmp_path, data): 44 | segmenter = Model.create_model( 45 | Path(data) / "otx_models/Lite-hrnet-18_mod2.xml", 46 | ) 47 | xml_path = str(tmp_path / "a.xml") 48 | segmenter.save(xml_path) 49 | deserialized = Model.create_model(xml_path) 50 | 51 | assert deserialized.get_model().get_rt_info(["model_info", "embedded_processing"]).astype(bool) 52 | assert type(segmenter) is type(deserialized) 53 | for attr in segmenter.parameters(): 54 | assert getattr(segmenter.params, attr) == getattr(deserialized.params, attr) 55 | 56 | 57 | def test_onnx_save(tmp_path, data): 58 | cls_model = Model.create_model( 59 | ONNXRuntimeAdapter(Path(data) / "otx_models/cls_mobilenetv3_large_cars.onnx"), 60 | model_type="Classification", 61 | preload=True, 62 | configuration={"reverse_input_channels": True, "topk": 6}, 63 | ) 64 | 65 | onnx_path = str(tmp_path / "a.onnx") 66 | cls_model.save(onnx_path) 67 | deserialized = Model.create_model(onnx_path) 68 | 69 | assert load_parameters_from_onnx(onnx.load(onnx_path))["model_info"]["embedded_processing"] == "True" 70 | assert type(cls_model) is type(deserialized) 71 | for attr in cls_model.parameters(): 72 | assert getattr(cls_model.params, attr) == getattr(deserialized.params, attr) 73 | 74 | 75 | def test_padim_save(tmp_path, data): 76 | padim_model = Model.create_model( 77 | Path(data) / "anomalib_models/padim.xml", 78 | ) 79 | xml_path = str(tmp_path / "a.xml") 80 | padim_model.save(xml_path) 81 | deserialized = Model.create_model(xml_path) 82 | 83 | assert not deserialized.get_model().get_rt_info(["model_info", "embedded_processing"]).astype(bool) 84 | assert type(padim_model) is type(deserialized) 85 | for attr in padim_model.parameters(): 86 | assert getattr(padim_model.params, attr) == getattr(deserialized.params, attr) 87 | 88 | 89 | def test_stfpm_save(tmp_path, data): 90 | stfpm_model = Model.create_model( 91 | Path(data) / "anomalib_models/stfpm.xml", 92 | ) 93 | xml_path = str(tmp_path / "a.xml") 94 | stfpm_model.save(xml_path) 95 | deserialized = Model.create_model(xml_path) 96 | 97 | assert not deserialized.get_model().get_rt_info(["model_info", "embedded_processing"]).astype(bool) 98 | assert type(stfpm_model) is type(deserialized) 99 | for attr in stfpm_model.parameters(): 100 | assert getattr(stfpm_model.params, attr) == getattr(deserialized.params, attr) 101 | 102 | 103 | def test_uflow_save(tmp_path, data): 104 | uflow_model = Model.create_model( 105 | Path(data) / "anomalib_models/uflow.xml", 106 | ) 107 | xml_path = str(tmp_path / "a.xml") 108 | uflow_model.save(xml_path) 109 | deserialized = Model.create_model(xml_path) 110 | 111 | assert not deserialized.get_model().get_rt_info(["model_info", "embedded_processing"]).astype(bool) 112 | assert type(uflow_model) is type(deserialized) 113 | for attr in uflow_model.parameters(): 114 | assert getattr(uflow_model.params, attr) == getattr(deserialized.params, attr) 115 | -------------------------------------------------------------------------------- /tools/model_converter/config.json: -------------------------------------------------------------------------------- 1 | { 2 | "models": [ 3 | { 4 | "model_short_name": "mobilenet_v3_small", 5 | "model_class_name": "torchvision.models.mobilenetv3.mobilenet_v3_small", 6 | "model_full_name": "MobileNetV3-Small", 7 | "description": "MobileNetV3 Small - Efficient convolutional neural network for mobile and embedded vision applications", 8 | "docs": "https://docs.pytorch.org/vision/main/models/generated/torchvision.models.mobilenet_v3_small.html#torchvision.models.mobilenet_v3_small", 9 | "weights_url": "https://download.pytorch.org/models/mobilenet_v3_small-047dcff4.pth", 10 | "input_shape": [1, 3, 224, 224], 11 | "input_names": ["image"], 12 | "output_names": ["output1"], 13 | "model_params": null, 14 | "model_type": "Classification", 15 | "reverse_input_channels": false, 16 | "mean_values": "123.675 116.28 103.53", 17 | "scale_values": "58.395 57.12 57.375", 18 | "labels": "IMAGENET1K_V1" 19 | }, 20 | { 21 | "model_short_name": "efficientnet_b0", 22 | "model_class_name": "torchvision.models.efficientnet.efficientnet_b0", 23 | "model_full_name": "EfficientNet-B0", 24 | "description": "EfficientNet-B0 - Efficient convolutional neural network with compound scaling", 25 | "docs": "https://docs.pytorch.org/vision/main/models/generated/torchvision.models.efficientnet_b0.html#torchvision.models.efficientnet_b0", 26 | "weights_url": "https://download.pytorch.org/models/efficientnet_b0_rwightman-3dd342df.pth", 27 | "input_shape": [1, 3, 224, 224], 28 | "input_names": ["image"], 29 | "output_names": ["logits"], 30 | "model_params": null, 31 | "model_type": "Classification", 32 | "reverse_input_channels": true, 33 | "mean_values": "123.675 116.28 103.53", 34 | "scale_values": "58.395 57.12 57.375", 35 | "labels": "IMAGENET1K_V1" 36 | }, 37 | { 38 | "model_short_name": "resnet18", 39 | "model_class_name": "torchvision.models.resnet.resnet18", 40 | "model_full_name": "ResNet-18", 41 | "description": "ResNet-18 - 18-layer residual learning network for image classification", 42 | "weights_url": "https://download.pytorch.org/models/resnet18-f37072fd.pth", 43 | "input_shape": [1, 3, 224, 224], 44 | "input_names": ["image"], 45 | "output_names": ["output"], 46 | "model_params": null, 47 | "model_type": "Classification", 48 | "reverse_input_channels": true, 49 | "mean_values": "123.675 116.28 103.53", 50 | "scale_values": "58.395 57.12 57.375", 51 | "labels": "IMAGENET1K_V1" 52 | }, 53 | { 54 | "model_short_name": "resnet50", 55 | "model_class_name": "torchvision.models.resnet.resnet50", 56 | "model_full_name": "ResNet-50", 57 | "description": "ResNet-50 - 50-layer residual learning network for image classification", 58 | "weights_url": "https://download.pytorch.org/models/resnet50-0676ba61.pth", 59 | "input_shape": [1, 3, 224, 224], 60 | "input_names": ["image"], 61 | "output_names": ["output"], 62 | "model_params": null, 63 | "model_type": "Classification", 64 | "reverse_input_channels": true, 65 | "mean_values": "123.675 116.28 103.53", 66 | "scale_values": "58.395 57.12 57.375", 67 | "labels": "IMAGENET1K_V1" 68 | }, 69 | { 70 | "model_short_name": "squeezenet1_0", 71 | "model_class_name": "torchvision.models.squeezenet.squeezenet1_0", 72 | "model_full_name": "SqueezeNet 1.0", 73 | "description": "SqueezeNet 1.0 - Small CNN with AlexNet-level accuracy and 50x fewer parameters", 74 | "weights_url": "https://download.pytorch.org/models/squeezenet1_0-b66bff10.pth", 75 | "input_shape": [1, 3, 224, 224], 76 | "input_names": ["image"], 77 | "output_names": ["output"], 78 | "model_params": null, 79 | "model_type": "Classification", 80 | "reverse_input_channels": true, 81 | "mean_values": "123.675 116.28 103.53", 82 | "scale_values": "58.395 57.12 57.375", 83 | "labels": "IMAGENET1K_V1" 84 | }, 85 | { 86 | "model_short_name": "vgg16", 87 | "model_class_name": "torchvision.models.vgg.vgg16", 88 | "model_full_name": "VGG-16", 89 | "description": "VGG-16 - 16-layer deep convolutional network", 90 | "weights_url": "https://download.pytorch.org/models/vgg16-397923af.pth", 91 | "input_shape": [1, 3, 224, 224], 92 | "input_names": ["image"], 93 | "output_names": ["output"], 94 | "model_params": null, 95 | "model_type": "Classification", 96 | "reverse_input_channels": true, 97 | "mean_values": "123.675 116.28 103.53", 98 | "scale_values": "58.395 57.12 57.375", 99 | "labels": "IMAGENET1K_V1" 100 | } 101 | ] 102 | } 103 | -------------------------------------------------------------------------------- /src/model_api/models/result/detection.py: -------------------------------------------------------------------------------- 1 | """Detection result type.""" 2 | 3 | # Copyright (C) 2024 Intel Corporation 4 | # SPDX-License-Identifier: Apache-2.0 5 | 6 | from __future__ import annotations 7 | 8 | import numpy as np 9 | 10 | from .base import Result 11 | from .utils import array_shape_to_str 12 | 13 | 14 | class DetectionResult(Result): 15 | """Result for detection model. 16 | 17 | Args: 18 | bboxes (np.ndarray): bounding boxes in dim (N, 4) where N is the number of boxes. 19 | labels (np.ndarray): labels for each bounding box in dim (N,). 20 | scores (np.ndarray| None, optional): confidence scores for each bounding box in dim (N,). 21 | label_names (list[str] | None, optional): class names for each label. Defaults to None. 22 | saliency_map (np.ndarray | None, optional): saliency map for XAI. Defaults to None. 23 | feature_vector (np.ndarray | None, optional): feature vector for XAI. Defaults to None. 24 | """ 25 | 26 | def __init__( 27 | self, 28 | bboxes: np.ndarray, 29 | labels: np.ndarray, 30 | scores: np.ndarray | None = None, 31 | label_names: list[str] | None = None, 32 | saliency_map: np.ndarray | None = None, 33 | feature_vector: np.ndarray | None = None, 34 | ): 35 | super().__init__() 36 | self._bboxes = bboxes 37 | self._labels = labels.astype(np.int32) 38 | self._scores = scores if scores is not None else np.zeros(len(bboxes)) 39 | self._label_names = ["#"] * len(bboxes) if label_names is None else label_names 40 | self._saliency_map = saliency_map 41 | self._feature_vector = feature_vector 42 | 43 | def __len__(self) -> int: 44 | return len(self.bboxes) 45 | 46 | def __str__(self) -> str: 47 | repr_str = "" 48 | for box, score, label, name in zip( 49 | self.bboxes, 50 | self.scores, 51 | self.labels, 52 | self.label_names, 53 | ): 54 | x1, y1, x2, y2 = box 55 | repr_str += f"{x1}, {y1}, {x2}, {y2}, {label} ({name}): {score:.3f}; " 56 | 57 | repr_str += f"{array_shape_to_str(self.saliency_map)}; {array_shape_to_str(self.feature_vector)}" 58 | return repr_str 59 | 60 | def get_obj_sizes(self) -> np.ndarray: 61 | """Get object sizes. 62 | 63 | Returns: 64 | np.ndarray: Object sizes in dim of (N,). 65 | """ 66 | return (self._bboxes[:, 2] - self._bboxes[:, 0]) * (self._bboxes[:, 3] - self._bboxes[:, 1]) 67 | 68 | @property 69 | def bboxes(self) -> np.ndarray: 70 | return self._bboxes 71 | 72 | @bboxes.setter 73 | def bboxes(self, value): 74 | if not isinstance(value, np.ndarray): 75 | msg = "Bounding boxes must be numpy array." 76 | raise ValueError(msg) 77 | self._bboxes = value 78 | 79 | @property 80 | def labels(self) -> np.ndarray: 81 | return self._labels 82 | 83 | @labels.setter 84 | def labels(self, value): 85 | if not isinstance(value, np.ndarray): 86 | msg = "Labels must be numpy array." 87 | raise ValueError(msg) 88 | self._labels = value 89 | 90 | @property 91 | def scores(self) -> np.ndarray: 92 | return self._scores 93 | 94 | @scores.setter 95 | def scores(self, value): 96 | if not isinstance(value, np.ndarray): 97 | msg = "Scores must be numpy array." 98 | raise ValueError(msg) 99 | self._scores = value 100 | 101 | @property 102 | def label_names(self) -> list[str]: 103 | return self._label_names 104 | 105 | @label_names.setter 106 | def label_names(self, value): 107 | if not isinstance(value, list): 108 | msg = "Label names must be list." 109 | raise ValueError(msg) 110 | self._label_names = value 111 | 112 | @property 113 | def saliency_map(self): 114 | """Saliency map for XAI. 115 | 116 | Returns: 117 | np.ndarray: Saliency map in dim of (B, N_CLASSES, H, W). 118 | """ 119 | return self._saliency_map 120 | 121 | @saliency_map.setter 122 | def saliency_map(self, value: np.ndarray): 123 | if not isinstance(value, np.ndarray): 124 | msg = "Saliency map must be numpy array." 125 | raise ValueError(msg) 126 | self._saliency_map = value 127 | 128 | @property 129 | def feature_vector(self) -> np.ndarray: 130 | return self._feature_vector 131 | 132 | @feature_vector.setter 133 | def feature_vector(self, value): 134 | if not isinstance(value, np.ndarray): 135 | msg = "Feature vector must be numpy array." 136 | raise ValueError(msg) 137 | self._feature_vector = value 138 | -------------------------------------------------------------------------------- /tests/accuracy/download_models.py: -------------------------------------------------------------------------------- 1 | #!#!/usr/bin/env -S uv run --script 2 | # 3 | # Copyright (C) 2025 Intel Corporation 4 | # SPDX-License-Identifier: Apache-2.0 5 | # 6 | import argparse 7 | import asyncio 8 | import json 9 | import time 10 | from io import BytesIO 11 | from pathlib import Path 12 | from zipfile import ZipFile 13 | 14 | import httpx 15 | 16 | 17 | async def stream_file(client, url, filename, semaphore): 18 | if Path(filename).exists(): 19 | print(f"Skipping already downloaded {filename}") 20 | return 21 | 22 | async with semaphore: 23 | start_time = time.time() 24 | total_bytes = 0 25 | async with client.stream("GET", url) as stream: 26 | with Path(filename).open("wb") as file: 27 | async for data in stream.aiter_bytes(): 28 | file.write(data) 29 | total_bytes += len(data) 30 | end_time = time.time() 31 | download_time = end_time - start_time 32 | total_bytes /= 1024 * 1024 33 | 34 | speed_mbps = total_bytes / download_time if download_time > 0 else 0 35 | print(f"Downloaded {url} - {total_bytes:.2f} MB in {download_time:.2f}s ({speed_mbps:.2f} MB/s)") 36 | 37 | 38 | async def download_single_image(client, url, filename): 39 | image = await client.get(url) 40 | with Path(filename).open("wb") as im: 41 | im.write(image.content) 42 | 43 | 44 | async def download_images(data_dir): 45 | async with httpx.AsyncClient(timeout=20.0) as client: 46 | COCO128_URL = "https://storage.geti.intel.com/geti_predict/test/images/coco128.zip" 47 | archive = await client.get(COCO128_URL, follow_redirects=True) 48 | with ZipFile(BytesIO(archive.content)) as zfile: 49 | zfile.extractall(data_dir) 50 | 51 | image_downloads = [ 52 | ( 53 | "https://storage.geti.intel.com/geti_predict/test/images/BloodImage_00007.jpg", 54 | data_dir / "BloodImage_00007.jpg", 55 | ), 56 | ("https://storage.geti.intel.com/geti_predict/test/images/cards.png", data_dir / "cards.png"), 57 | ] 58 | 59 | await asyncio.gather(*[download_single_image(client, url, filename) for url, filename in image_downloads]) 60 | 61 | 62 | async def main(): 63 | parser = argparse.ArgumentParser() 64 | parser.add_argument( 65 | "-d", 66 | "--data_dir", 67 | type=Path, 68 | required=True, 69 | help="Directory to store downloaded models and datasets", 70 | ) 71 | parser.add_argument( 72 | "-j", 73 | "--json_path", 74 | type=Path, 75 | required=True, 76 | help="Path to the JSON file with model information", 77 | ) 78 | parser.add_argument( 79 | "-l", 80 | "--legacy", 81 | action="store_true", 82 | help="Download models using legacy directory structure (used in public_scope.json", 83 | ) 84 | args = parser.parse_args() 85 | 86 | with args.json_path.open("r") as f: 87 | models_data = json.load(f) 88 | 89 | base_path = "https://storage.geti.intel.com/geti_predict/test/" 90 | semaphore = asyncio.Semaphore(10) 91 | args.data_dir.mkdir(parents=True, exist_ok=True) 92 | async with httpx.AsyncClient(timeout=60.0) as client: 93 | tasks = [] 94 | 95 | model_names = [] 96 | for model_data in models_data: 97 | model_names.append(model_data["name"]) 98 | if args.legacy and "encoder" in model_data: 99 | model_names.append(model_data["encoder"]) 100 | if args.legacy and "extra_model" in model_data: 101 | model_names.append(model_data["extra_model"]) 102 | 103 | for model_name in model_names: 104 | download_url = base_path + model_name 105 | if args.legacy: 106 | if model_name.endswith(".onnx"): 107 | download_url = base_path + model_name.replace(".", "/model.") 108 | else: 109 | download_url = base_path + model_name.replace(".", "/openvino.") 110 | save_path = args.data_dir / model_name 111 | save_path.parent.mkdir(parents=True, exist_ok=True) 112 | 113 | tasks.append(stream_file(client, download_url, save_path, semaphore)) 114 | 115 | if model_name.endswith(".xml"): 116 | tasks.append( 117 | stream_file(client, download_url.replace(".xml", ".bin"), save_path.with_suffix(".bin"), semaphore), 118 | ) 119 | 120 | tasks.append(download_images(args.data_dir)) 121 | 122 | print(f"Starting download of {len(tasks)} files with max 10 concurrent downloads...") 123 | await asyncio.gather(*tasks) 124 | print(f"All {len(tasks)} files downloaded successfully!") 125 | 126 | 127 | if __name__ == "__main__": 128 | asyncio.run(main()) 129 | -------------------------------------------------------------------------------- /tools/model_converter/README.md: -------------------------------------------------------------------------------- 1 | # Model Converter Tool 2 | 3 | A command-line utility to download PyTorch models and convert them to OpenVINO format. 4 | 5 | ## Overview 6 | 7 | This tool reads a JSON configuration file containing model specifications, downloads PyTorch weights from URLs, loads the models, and exports them to OpenVINO Intermediate Representation (IR) format. 8 | 9 | ## Features 10 | 11 | - **Automatic Download**: Downloads model weights from HTTP/HTTPS URLs with caching support 12 | - **Dynamic Model Loading**: Dynamically imports and instantiates model classes from Python paths 13 | - **Metadata Embedding**: Embeds custom metadata into OpenVINO models 14 | - **Input/Output Naming**: Configurable input and output tensor names 15 | - **Batch Processing**: Process multiple models from a single configuration file 16 | - **Selective Conversion**: Convert specific models using the `--model` flag 17 | 18 | ## Installation 19 | 20 | ### Prerequisites 21 | 22 | ```bash 23 | # Required packages 24 | uv pip install torch torchvision openvino 25 | 26 | ``` 27 | 28 | ## Usage 29 | 30 | ### Basic Usage 31 | 32 | ```bash 33 | uv run python model_converter.py config.json -o ./output_models 34 | ``` 35 | 36 | ### Command-Line Options 37 | 38 | ```text 39 | positional arguments: 40 | config Path to JSON configuration file 41 | 42 | options: 43 | -h, --help Show help message and exit 44 | -o OUTPUT, --output OUTPUT 45 | Output directory for converted models (default: ./converted_models) 46 | -c CACHE, --cache CACHE 47 | Cache directory for downloaded weights (default: ~/.cache/torch/hub/checkpoints) 48 | --model MODEL Process only the specified model (by model_short_name) 49 | --list List all models in the configuration file and exit 50 | -v, --verbose Enable verbose logging 51 | ``` 52 | 53 | ### Examples 54 | 55 | **List all models in configuration:** 56 | 57 | ```bash 58 | uv run python model_converter.py example_config.json --list 59 | ``` 60 | 61 | **Convert all models:** 62 | 63 | ```bash 64 | uv run python model_converter.py example_config.json -o ./converted_models 65 | ``` 66 | 67 | **Convert a specific model:** 68 | 69 | ```bash 70 | uv run python model_converter.py example_config.json -o ./converted_models --model resnet50 71 | ``` 72 | 73 | **Use custom cache directory:** 74 | 75 | ```bash 76 | uv run python model_converter.py example_config.json -o ./output -c ./my_cache 77 | ``` 78 | 79 | **Enable verbose logging:** 80 | 81 | ```bash 82 | uv run python model_converter.py example_config.json -o ./output -v 83 | ``` 84 | 85 | ## Configuration File Format 86 | 87 | The configuration file is a JSON file with the following structure: 88 | 89 | ```json 90 | { 91 | "models": [ 92 | { 93 | "model_short_name": "resnet50", 94 | "model_class_name": "torchvision.models.resnet.resnet50", 95 | "model_full_name": "ResNet-50", 96 | "description": "ResNet-50 image classification model", 97 | "weights_url": "https://download.pytorch.org/models/resnet50-0676ba61.pth", 98 | "input_shape": [1, 3, 224, 224], 99 | "input_names": ["images"], 100 | "output_names": ["output"], 101 | "model_params": null, 102 | "model_type": "Classification" 103 | } 104 | ] 105 | } 106 | ``` 107 | 108 | **Important**: The `model_type` field enables automatic model detection when using [Intel's model_api](https://github.com/openvinotoolkit/model_api). When specified, this metadata is embedded in the OpenVINO IR, allowing `Model.create_model()` to automatically select the correct model wrapper class. 109 | 110 | Common `model_type` values: 111 | 112 | - `"Classification"` - Image classification models 113 | - `"DetectionModel"` - Object detection models 114 | - `"YOLOX"` - YOLOX detection models 115 | - `"SegmentationModel"` - Segmentation models 116 | 117 | ### Configuration Fields 118 | 119 | #### Required Fields 120 | 121 | - **`model_short_name`** (string): Short identifier for the model (used for output filename) 122 | - **`model_class_name`** (string): Full Python path to the model class (e.g., `torchvision.models.resnet.resnet50`) 123 | - **`weights_url`** (string): URL to download the PyTorch weights (.pth file) 124 | 125 | #### Optional Fields 126 | 127 | - **`model_full_name`** (string): Full descriptive name of the model 128 | - **`description`** (string): Description of the model 129 | - **`input_shape`** (array of integers): Input tensor shape (default: `[1, 3, 224, 224]`) 130 | - **`input_names`** (array of strings): Names for input tensors (default: `["input"]`) 131 | - **`output_names`** (array of strings): Names for output tensors (default: auto-generated) 132 | - **`model_params`** (object): Parameters to pass to model constructor (default: `null`) 133 | - **`model_type`** (string): Model type for model_api auto-detection (e.g., `"Classification"`, `"DetectionModel"`, `"YOLOX"`, etc.) 134 | -------------------------------------------------------------------------------- /src/model_api/visualizer/scene/scene.py: -------------------------------------------------------------------------------- 1 | """Scene object.""" 2 | 3 | # Copyright (C) 2024 Intel Corporation 4 | # SPDX-License-Identifier: Apache-2.0 5 | 6 | from __future__ import annotations 7 | 8 | from typing import TYPE_CHECKING, cast 9 | 10 | import numpy as np 11 | from PIL import Image 12 | 13 | from model_api.visualizer.primitive import BoundingBox, Keypoint, Label, Overlay, Polygon, Primitive 14 | 15 | if TYPE_CHECKING: 16 | from pathlib import Path 17 | 18 | from model_api.visualizer.layout import Layout 19 | 20 | 21 | class Scene: 22 | """Scene object. 23 | 24 | Used by the visualizer to render. 25 | """ 26 | 27 | def __init__( 28 | self, 29 | base: Image, 30 | bounding_box: BoundingBox | list[BoundingBox] | None = None, 31 | label: Label | list[Label] | None = None, 32 | overlay: Overlay | list[Overlay] | np.ndarray | None = None, 33 | polygon: Polygon | list[Polygon] | None = None, 34 | keypoints: Keypoint | list[Keypoint] | np.ndarray | None = None, 35 | layout: Layout | None = None, 36 | ) -> None: 37 | self.base = base 38 | self.overlay = self._to_overlay(overlay) 39 | self.bounding_box = self._to_bounding_box(bounding_box) 40 | self.label = self._to_label(label) 41 | self.polygon = self._to_polygon(polygon) 42 | self.keypoints = self._to_keypoints(keypoints) 43 | self.layout = layout 44 | 45 | def show(self) -> None: 46 | self.render().show() 47 | 48 | def save(self, path: Path) -> None: 49 | self.render().save(path) 50 | 51 | def render(self) -> Image: 52 | if self.layout is None: 53 | return self.default_layout(self) 54 | return self.layout(self) 55 | 56 | def has_primitives(self, primitive: type[Primitive]) -> bool: 57 | if primitive == Overlay: 58 | return bool(self.overlay) 59 | if primitive == BoundingBox: 60 | return bool(self.bounding_box) 61 | if primitive == Label: 62 | return bool(self.label) 63 | if primitive == Polygon: 64 | return bool(self.polygon) 65 | if primitive == Keypoint: 66 | return bool(self.keypoints) 67 | return False 68 | 69 | def get_primitives(self, primitive: type[Primitive]) -> list[Primitive]: 70 | """Get primitives of the given type. 71 | 72 | Args: 73 | primitive (type[Primitive]): The type of primitive to get. 74 | 75 | Example: 76 | >>> scene = Scene(base=Image.new("RGB", (100, 100)), overlay=[Overlay(Image.new("RGB", (100, 100)))]) 77 | >>> scene.get_primitives(Overlay) 78 | [Overlay(image=Image.new("RGB", (100, 100)))] 79 | 80 | Returns: 81 | list[Primitive]: The primitives of the given type or an empty list if no primitives are found. 82 | """ 83 | primitives: list[Primitive] | None = None 84 | # cast is needed as mypy does not know that the primitives are a subclass of Primitive. 85 | if primitive == Overlay: 86 | primitives = cast("list[Primitive]", self.overlay) 87 | elif primitive == BoundingBox: 88 | primitives = cast("list[Primitive]", self.bounding_box) 89 | elif primitive == Label: 90 | primitives = cast("list[Primitive]", self.label) 91 | elif primitive == Polygon: 92 | primitives = cast("list[Primitive]", self.polygon) 93 | elif primitive == Keypoint: 94 | primitives = cast("list[Primitive]", self.keypoints) 95 | else: 96 | msg = f"Primitive {primitive} not found" 97 | raise ValueError(msg) 98 | return primitives or [] 99 | 100 | @property 101 | def default_layout(self) -> Layout: 102 | """Default layout for the media.""" 103 | msg = "Default layout not implemented" 104 | raise NotImplementedError(msg) 105 | 106 | def _to_overlay(self, overlay: Overlay | list[Overlay] | np.ndarray | None) -> list[Overlay] | None: 107 | if isinstance(overlay, np.ndarray): 108 | image = Image.fromarray(overlay) 109 | return [Overlay(image)] 110 | if isinstance(overlay, Overlay): 111 | return [overlay] 112 | return overlay 113 | 114 | def _to_bounding_box(self, bounding_box: BoundingBox | list[BoundingBox] | None) -> list[BoundingBox] | None: 115 | if isinstance(bounding_box, BoundingBox): 116 | return [bounding_box] 117 | return bounding_box 118 | 119 | def _to_label(self, label: Label | list[Label] | None) -> list[Label] | None: 120 | if isinstance(label, Label): 121 | return [label] 122 | return label 123 | 124 | def _to_polygon(self, polygon: Polygon | list[Polygon] | None) -> list[Polygon] | None: 125 | if isinstance(polygon, Polygon): 126 | return [polygon] 127 | return polygon 128 | 129 | def _to_keypoints(self, keypoints: Keypoint | list[Keypoint] | np.ndarray | None) -> list[Keypoint] | None: 130 | if isinstance(keypoints, Keypoint): 131 | return [keypoints] 132 | if isinstance(keypoints, np.ndarray): 133 | return [Keypoint(keypoints)] 134 | return keypoints 135 | -------------------------------------------------------------------------------- /src/model_api/metrics/performance.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (C) 2025 Intel Corporation 3 | # SPDX-License-Identifier: Apache-2.0 4 | # 5 | 6 | import logging 7 | 8 | from .time_stat import TimeStat 9 | 10 | logger = logging.getLogger(__name__) 11 | 12 | 13 | class PerformanceMetrics: 14 | """ 15 | A class to represent performance metrics for a model. 16 | """ 17 | 18 | def __init__(self): 19 | """ 20 | Initializes performance metrics. 21 | """ 22 | self.load_time = TimeStat() 23 | self.preprocess_time = TimeStat() 24 | self.inference_time = TimeStat() 25 | self.postprocess_time = TimeStat() 26 | self.total_time = TimeStat() 27 | 28 | def __add__(self, other): 29 | """ 30 | Adds two PerformanceMetrics objects. 31 | """ 32 | if not isinstance(other, PerformanceMetrics): 33 | return NotImplemented 34 | 35 | new_metrics = PerformanceMetrics() 36 | new_metrics.load_time = self.load_time + other.load_time 37 | new_metrics.preprocess_time = self.preprocess_time + other.preprocess_time 38 | new_metrics.inference_time = self.inference_time + other.inference_time 39 | new_metrics.postprocess_time = self.postprocess_time + other.postprocess_time 40 | return new_metrics 41 | 42 | def reset(self) -> None: 43 | """ 44 | Resets performance metrics to the initial state. 45 | """ 46 | self.preprocess_time.reset() 47 | self.inference_time.reset() 48 | self.postprocess_time.reset() 49 | self.total_time.reset() 50 | 51 | def get_load_time(self) -> TimeStat: 52 | """ 53 | Returns the load time statistics. 54 | 55 | Returns: 56 | TimeStat: Load time statistics object. 57 | """ 58 | return self.load_time 59 | 60 | def get_preprocess_time(self) -> TimeStat: 61 | """ 62 | Returns the preprocessing time statistics. 63 | 64 | Returns: 65 | TimeStat: Preprocessing time statistics object. 66 | """ 67 | return self.preprocess_time 68 | 69 | def get_inference_time(self) -> TimeStat: 70 | """ 71 | Returns the inference time statistics. 72 | 73 | Returns: 74 | TimeStat: Inference time statistics object. 75 | """ 76 | return self.inference_time 77 | 78 | def get_postprocess_time(self) -> TimeStat: 79 | """ 80 | Returns the postprocessing time statistics. 81 | 82 | Returns: 83 | TimeStat: Postprocessing time statistics object. 84 | """ 85 | return self.postprocess_time 86 | 87 | def get_total_frames(self) -> int: 88 | """ 89 | Returns the total number of frames processed. 90 | 91 | Returns: 92 | int: Total number of frames processed. 93 | """ 94 | return len(self.total_time.durations) 95 | 96 | def get_fps(self) -> float: 97 | """ 98 | Returns the Frames Per Second (FPS) statistics. 99 | 100 | Returns: 101 | float: Frames Per Second. 102 | """ 103 | return self.get_total_frames() / sum(self.total_time.durations) if sum(self.total_time.durations) > 0 else 0.0 104 | 105 | def get_total_time_min(self) -> float: 106 | """ 107 | Returns the minimum total time for processing a frame. 108 | 109 | Returns: 110 | float: Minimum total time in seconds. 111 | """ 112 | return min(self.total_time.durations) if self.total_time.durations else 0.0 113 | 114 | def get_total_time_max(self) -> float: 115 | """ 116 | Returns the maximum total time for processing a frame. 117 | 118 | Returns: 119 | float: Maximum total time in seconds. 120 | """ 121 | return max(self.total_time.durations) if self.total_time.durations else 0.0 122 | 123 | def log_metrics(self) -> None: 124 | """ 125 | Logs all performance metrics using the logging module. 126 | """ 127 | # Create the metrics report as a multi-line string 128 | report_lines = [ 129 | "", 130 | "=" * 60, 131 | "🚀 PERFORMANCE METRICS REPORT 🚀".center(60), 132 | "=" * 60, 133 | "", 134 | "📊 Model Loading:", 135 | f" Load Time: {self.load_time.mean():.3f}s", 136 | "", 137 | "⚙️ Processing Times (mean ± std):", 138 | f" Preprocess: {self.preprocess_time.mean():.3f}s ± {self.preprocess_time.stddev():.3f}s", 139 | f" Inference: {self.inference_time.mean():.3f}s ± {self.inference_time.stddev():.3f}s", 140 | f" Postprocess: {self.postprocess_time.mean():.3f}s ± {self.postprocess_time.stddev():.3f}s", 141 | "", 142 | "📈 Total Time Statistics:", 143 | f" Mean: {self.total_time.mean():.3f}s ± {self.total_time.stddev():.3f}s", 144 | f" Min: {self.get_total_time_min():.3f}s", 145 | f" Max: {self.get_total_time_max():.3f}s", 146 | "", 147 | "🎯 Performance Summary:", 148 | f" Total Frames: {self.get_total_frames():,}", 149 | f" FPS: {self.get_fps():.2f}", 150 | "", 151 | "=" * 60, 152 | "", 153 | ] 154 | 155 | # Log the entire report as a single info message 156 | logger.info("\n".join(report_lines)) 157 | --------------------------------------------------------------------------------