├── requirements
    ├── 3d.txt
    ├── opencv.txt
    ├── depth_estimation.txt
    ├── torch.txt
    ├── base.txt
    ├── test.txt
    ├── embedding_estimation.txt
    └── preparation.txt
├── src
    └── nicr_scene_analysis_datasets
    │   ├── utils
    │       ├── __init__.py
    │       ├── misc.py
    │       ├── testing.py
    │       ├── img.py
    │       ├── io.py
    │       └── _colormaps.py
    │   ├── datasets
    │       ├── __init__.py
    │       ├── nyuv2
    │       │   ├── __init__.py
    │       │   ├── splits.mat
    │       │   ├── class13Mapping.mat
    │       │   ├── classMapping40.mat
    │       │   └── README.md
    │       ├── scannet
    │       │   ├── __init__.py
    │       │   ├── scannetv2_test.txt
    │       │   ├── README.md
    │       │   └── scannetv2_val.txt
    │       ├── sunrgbd
    │       │   ├── __init__.py
    │       │   ├── legacy_emsanet_version
    │       │   │   ├── __init__.py
    │       │   │   ├── nyu_weak_box_3d_mapping.json
    │       │   │   └── nyu_additional_class_mapping.json
    │       │   ├── nyu_weak_box_3d_mapping.json
    │       │   ├── nyu_additional_class_mapping.json
    │       │   └── README.md
    │       ├── cityscapes
    │       │   ├── __init__.py
    │       │   ├── README.md
    │       │   └── cityscapes.py
    │       ├── hypersim
    │       │   ├── __init__.py
    │       │   └── README.md
    │       ├── scenenetrgbd
    │       │   ├── __init__.py
    │       │   ├── README.md
    │       │   ├── scenenetrgbd.py
    │       │   ├── scenenet.proto
    │       │   └── dataset.py
    │       ├── ade20k
    │       │   ├── __init__.py
    │       │   ├── README.md
    │       │   └── _class_mappings.py
    │       └── coco
    │       │   ├── __init__.py
    │       │   ├── README.md
    │       │   └── dataset.py
    │   ├── scripts
    │       ├── __init__.py
    │       ├── prepare_dataset.py
    │       └── common.py
    │   ├── mira
    │       ├── __init__.py
    │       └── utils.py
    │   ├── dataset_base
    │       ├── _meta.py
    │       ├── _rgbd_dataset.py
    │       ├── _rgb_dataset.py
    │       ├── __init__.py
    │       ├── _config.py
    │       ├── _annotation.py
    │       ├── _class_weighting.py
    │       └── _concat_dataset.py
    │   ├── auxiliary_data
    │       ├── __init__.py
    │       ├── embedding_estimation
    │       │   ├── __init__.py
    │       │   └── _base.py
    │       ├── depth_estimation
    │       │   ├── __init__.py
    │       │   └── _base.py
    │       ├── _config.py
    │       └── _base.py
    │   ├── d2
    │       ├── __init__.py
    │       └── _auto_init.py
    │   ├── version.py
    │   ├── __init__.py
    │   └── pytorch.py
├── .coveragerc
├── .vscode
    ├── settings.json
    └── launch.json
├── tests
    ├── conftest.py
    ├── test_scenenetrgbd.py
    ├── test_embedding_estimation.py
    ├── test_cityscapes.py
    ├── test_coco.py
    ├── test_d2.py
    ├── test_depth_estimation.py
    ├── test_ade20k.py
    ├── test_nyuv2.py
    ├── test_concat.py
    ├── test_sunrgbd.py
    ├── test_hypersim.py
    └── test_scannet.py
├── .gitignore
├── pyproject.toml
└── .gitlab-ci.yml


/requirements/3d.txt:
--------------------------------------------------------------------------------
1 | open3d
2 | plyfile
3 | 


--------------------------------------------------------------------------------
/requirements/opencv.txt:
--------------------------------------------------------------------------------
1 | opencv-python
2 | 


--------------------------------------------------------------------------------
/requirements/depth_estimation.txt:
--------------------------------------------------------------------------------
1 | transformers>=4.45.0
2 | 


--------------------------------------------------------------------------------
/requirements/torch.txt:
--------------------------------------------------------------------------------
1 | torch>=2.3.1    # DepthAnythingV2 requires torch.nn.RMSNorm
2 | 


--------------------------------------------------------------------------------
/requirements/base.txt:
--------------------------------------------------------------------------------
1 | cityscapesScripts==1.5.0
2 | numpy
3 | pillow
4 | scipy
5 | tqdm>=4.42.0
6 | 


--------------------------------------------------------------------------------
/requirements/test.txt:
--------------------------------------------------------------------------------
1 | panopticapi @ git+https://github.com/cocodataset/panopticapi.git
2 | pytest>=3.0.2
3 | 


--------------------------------------------------------------------------------
/requirements/embedding_estimation.txt:
--------------------------------------------------------------------------------
1 | loralib
2 | alpha-clip @ git+https://github.com/SunzeY/AlphaCLIP.git
3 | gdown
4 | 


--------------------------------------------------------------------------------
/requirements/preparation.txt:
--------------------------------------------------------------------------------
1 | h5py
2 | numba
3 | pandas
4 | panopticapi @ git+https://github.com/cocodataset/panopticapi.git
5 | protobuf
6 | termcolor
7 | 


--------------------------------------------------------------------------------
/src/nicr_scene_analysis_datasets/utils/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | .. codeauthor:: Daniel Seichter <daniel.seichter@tu-ilmenau.de>
4 | """
5 | 


--------------------------------------------------------------------------------
/src/nicr_scene_analysis_datasets/datasets/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | .. codeauthor:: Daniel Seichter <daniel.seichter@tu-ilmenau.de>
4 | """
5 | 


--------------------------------------------------------------------------------
/src/nicr_scene_analysis_datasets/scripts/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | .. codeauthor:: Daniel Seichter <daniel.seichter@tu-ilmenau.de>
4 | """
5 | 


--------------------------------------------------------------------------------
/src/nicr_scene_analysis_datasets/datasets/nyuv2/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | .. codeauthor:: Daniel Seichter <daniel.seichter@tu-ilmenau.de>
4 | """
5 | 


--------------------------------------------------------------------------------
/src/nicr_scene_analysis_datasets/datasets/scannet/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | .. codeauthor:: Daniel Seichter <daniel.seichter@tu-ilmenau.de>
4 | """
5 | 


--------------------------------------------------------------------------------
/src/nicr_scene_analysis_datasets/datasets/sunrgbd/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | .. codeauthor:: Daniel Seichter <daniel.seichter@tu-ilmenau.de>
4 | """
5 | 


--------------------------------------------------------------------------------
/src/nicr_scene_analysis_datasets/datasets/cityscapes/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | .. codeauthor:: Daniel Seichter <daniel.seichter@tu-ilmenau.de>
4 | """
5 | 


--------------------------------------------------------------------------------
/src/nicr_scene_analysis_datasets/datasets/hypersim/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | .. codeauthor:: Marius Engelhardt <marius.engelhardt@tu-ilmenau.de>
4 | """
5 | 


--------------------------------------------------------------------------------
/src/nicr_scene_analysis_datasets/datasets/scenenetrgbd/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | .. codeauthor:: Daniel Seichter <daniel.seichter@tu-ilmenau.de>
4 | """
5 | 


--------------------------------------------------------------------------------
/src/nicr_scene_analysis_datasets/datasets/ade20k/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | .. codeauthor:: Soehnke Fischedick <soehnke-benedikt.fischedick@tu-ilmenau.de>
4 | """
5 | 


--------------------------------------------------------------------------------
/src/nicr_scene_analysis_datasets/datasets/coco/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | .. codeauthor:: Soehnke Fischedick <soehnke-benedikt.fischedick@tu-ilmenau.de>
4 | """
5 | 


--------------------------------------------------------------------------------
/src/nicr_scene_analysis_datasets/datasets/nyuv2/splits.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TUI-NICR/nicr-scene-analysis-datasets/HEAD/src/nicr_scene_analysis_datasets/datasets/nyuv2/splits.mat


--------------------------------------------------------------------------------
/src/nicr_scene_analysis_datasets/datasets/sunrgbd/legacy_emsanet_version/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | .. codeauthor:: Daniel Seichter <daniel.seichter@tu-ilmenau.de>
4 | """
5 | 


--------------------------------------------------------------------------------
/src/nicr_scene_analysis_datasets/datasets/nyuv2/class13Mapping.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TUI-NICR/nicr-scene-analysis-datasets/HEAD/src/nicr_scene_analysis_datasets/datasets/nyuv2/class13Mapping.mat


--------------------------------------------------------------------------------
/src/nicr_scene_analysis_datasets/datasets/nyuv2/classMapping40.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TUI-NICR/nicr-scene-analysis-datasets/HEAD/src/nicr_scene_analysis_datasets/datasets/nyuv2/classMapping40.mat


--------------------------------------------------------------------------------
/.coveragerc:
--------------------------------------------------------------------------------
 1 | [run]
 2 | omit=
 3 |     */prepare_dataset.py
 4 |     */datasets/scannet/SensorData.py
 5 |     */datasets/scenenetrgbd/scenenet_pb2.py
 6 |     */datasets/sunrgbd/match_nyuv2_instances.py
 7 |     */datasets/sunrgbd/prepare_instances.py
 8 |     */mira/*
 9 |     */scripts/*
10 | 


--------------------------------------------------------------------------------
/src/nicr_scene_analysis_datasets/mira/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | .. codeauthor:: Daniel Seichter <daniel.seichter@tu-ilmenau.de>
4 | """
5 | from ._hypersim_reader import HypersimReaderBase    # noqa: F401
6 | from ._scannet_reader import ScanNetReaderBase    # noqa: F401
7 | 


--------------------------------------------------------------------------------
/src/nicr_scene_analysis_datasets/dataset_base/_meta.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | .. codeauthor:: Daniel Seichter <daniel.seichter@tu-ilmenau.de>
 4 | """
 5 | from dataclasses import dataclass
 6 | 
 7 | 
 8 | @dataclass(frozen=True)
 9 | class DepthStats:
10 |     min: float
11 |     max: float
12 |     mean: float
13 |     std: float
14 | 


--------------------------------------------------------------------------------
/src/nicr_scene_analysis_datasets/datasets/sunrgbd/nyu_weak_box_3d_mapping.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "books": ["bookshelf"],
 3 |     "shower curtain": ["curtain"],
 4 | 
 5 |     "counter": ["sink"],
 6 |     "shelves": ["bookshelf", "cabinet"],
 7 |     "floor mat": ["otherprop"],
 8 |     "ceiling": ["otherstructure"],
 9 |     "paper": ["box"],
10 |     "bag": ["bag"]
11 | }


--------------------------------------------------------------------------------
/src/nicr_scene_analysis_datasets/datasets/sunrgbd/legacy_emsanet_version/nyu_weak_box_3d_mapping.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "books": ["bookshelf"],
 3 |     "shower curtain": ["curtain"],
 4 | 
 5 |     "counter": ["sink"],
 6 |     "shelves": ["bookshelf", "cabinet"],
 7 |     "floor mat": ["otherprop"],
 8 |     "ceiling": ["otherstructure"],
 9 |     "paper": ["box"],
10 |     "bag": ["bag"]
11 | }


--------------------------------------------------------------------------------
/src/nicr_scene_analysis_datasets/auxiliary_data/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | .. codeauthor:: Soehnke Fischedick <soehnke-benedikt.fischedick@tu-ilmenau.de>
4 | """
5 | from ._config import DatasetConfigWithAuxiliary
6 | from ._config import build_dataset_config_with_auxiliary
7 | from ._dataset import _AuxiliaryDataset
8 | from ._dataset import wrap_dataset_with_auxiliary_data
9 | 


--------------------------------------------------------------------------------
/src/nicr_scene_analysis_datasets/d2/__init__.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | .. codeauthor:: Soehnke Fischedick <soehnke-benedikt.fischedick@tu-ilmenau.de>
 4 | """
 5 | 
 6 | from . import _auto_init    # noqa: F401
 7 | from .utils import NICRChainedDatasetMapper    # noqa: F401
 8 | from .utils import NICRSceneAnalysisDatasetMapper    # noqa: F401
 9 | from .utils import register_dataset_to_d2    # noqa: F401
10 | from .utils import set_dataset_path    # noqa: F401
11 | 


--------------------------------------------------------------------------------
/src/nicr_scene_analysis_datasets/utils/misc.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | .. codeauthor:: Daniel Seichter <daniel.seichter@tu-ilmenau.de>
 4 | """
 5 | from functools import lru_cache
 6 | from functools import partialmethod
 7 | 
 8 | 
 9 | @lru_cache()
10 | def partial_class(cls, *args, **kwargs):
11 |     # modified version of: https://stackoverflow.com/a/38911383
12 |     if args or kwargs:
13 | 
14 |         class PartialClass(cls):
15 |             __init__ = partialmethod(cls.__init__, *args, **kwargs)
16 | 
17 |         return PartialClass
18 |     else:
19 |         return cls
20 | 


--------------------------------------------------------------------------------
/src/nicr_scene_analysis_datasets/utils/testing.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | .. codeauthor:: Daniel Seichter <daniel.seichter@tu-ilmenau.de>
 4 | """
 5 | import os
 6 | 
 7 | from .. import KNOWN_DATASETS
 8 | from ..version import get_version
 9 | 
10 | 
11 | DATASET_BASEPATH = os.environ.get(
12 |     'NICR_SA_DATASET_BASEPATH',
13 |     os.path.join('/datasets_nas/nicr_scene_analysis_datasets/',
14 |                  'version_{}{}{}'.format(*get_version(with_suffix=False)))
15 | )
16 | 
17 | DATASET_PATH_DICT = {
18 |     key: os.path.join(DATASET_BASEPATH, key)
19 |     for key in KNOWN_DATASETS
20 | }
21 | 


--------------------------------------------------------------------------------
/.vscode/settings.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "python.testing.pytestArgs": [
 3 |         ".",
 4 |         "-vvs",
 5 |     ],
 6 |     "python.testing.unittestEnabled": false,
 7 |     "python.testing.pytestEnabled": true,
 8 |     "python.linting.pycodestyleEnabled": true,
 9 |     "python.linting.enabled": true,
10 |     // disable annoying top-level source code modification indication
11 |     "gitlens.codeLens.authors.enabled": false,
12 |     "gitlens.codeLens.recentChange.enabled": false,
13 |     "files.trimTrailingWhitespace": true,
14 |     "[markdown]": {
15 |         "files.trimTrailingWhitespace": false
16 |     },
17 |     "cSpell.words": [
18 |         "codeauthor"
19 |     ],
20 | }


--------------------------------------------------------------------------------
/src/nicr_scene_analysis_datasets/dataset_base/_rgbd_dataset.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | .. codeauthor:: Daniel Seichter <daniel.seichter@tu-ilmenau.de>
 4 | .. codeauthor:: Soehnke Fischedick <soehnke-benedikt.fischedick@tu-ilmenau.de>
 5 | """
 6 | from typing import Tuple
 7 | 
 8 | from ._depth_dataset import DepthDataset
 9 | from ._rgb_dataset import RGBDataset
10 | 
11 | 
12 | class RGBDDataset(RGBDataset, DepthDataset):
13 |     def __init__(
14 |         self,
15 |         depth_mode: str = 'raw',
16 |         sample_keys: Tuple[str] = ('rgb', 'depth', 'semantic'),
17 |         use_cache: bool = False,
18 |         **kwargs
19 |     ) -> None:
20 |         super().__init__(
21 |             depth_mode=depth_mode,
22 |             sample_keys=sample_keys,
23 |             use_cache=use_cache,
24 |             **kwargs
25 |         )
26 | 


--------------------------------------------------------------------------------
/tests/conftest.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | .. codeauthor:: Daniel Seichter <daniel.seichter@tu-ilmenau.de>
 4 | """
 5 | import os
 6 | import shutil
 7 | 
 8 | import pytest
 9 | 
10 | 
11 | def pytest_addoption(parser):
12 |     parser.addoption('--keep-files', action='store_true')
13 | 
14 | 
15 | @pytest.fixture(scope='session')
16 | def keep_files(request):
17 |     return request.config.getoption('--keep-files')
18 | 
19 | 
20 | @pytest.fixture(scope='session')
21 | def tmp_path(tmpdir_factory, keep_files):
22 |     # see: https://docs.pytest.org/en/6.2.x/reference.html#tmpdir-factory
23 |     # use '--basetemp' to change default path
24 |     # -> BE AWARE <- --basetemp is cleared on start !!!
25 | 
26 |     path = tmpdir_factory.mktemp('nicr_scene_analysis_datasets')
27 |     print(f"\nWriting temporary files to '{path}'")
28 |     if keep_files:
29 |         print("Files are kept and require to be deleted manually!")
30 | 
31 |     yield path
32 | 
33 |     # teardown (delete if it was created)
34 |     if os.path.exists(path) and not keep_files:
35 |         shutil.rmtree(path)
36 | 


--------------------------------------------------------------------------------
/src/nicr_scene_analysis_datasets/dataset_base/_rgb_dataset.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | .. codeauthor:: Daniel Seichter <daniel.seichter@tu-ilmenau.de>
 4 | .. codeauthor:: Soehnke Fischedick <soehnke-benedikt.fischedick@tu-ilmenau.de>
 5 | """
 6 | from typing import Tuple
 7 | 
 8 | import abc
 9 | 
10 | import numpy as np
11 | 
12 | from ._annotation import IntrinsicCameraParametersNormalized
13 | from ._base_dataset import DatasetBase
14 | 
15 | 
16 | class RGBDataset(DatasetBase):
17 |     def __init__(
18 |         self,
19 |         sample_keys: Tuple[str] = ('rgb', 'semantic'),
20 |         use_cache: bool = False,
21 |         **kwargs
22 |     ) -> None:
23 |         super().__init__(
24 |             sample_keys=sample_keys,
25 |             use_cache=use_cache,
26 |             **kwargs
27 |         )
28 | 
29 |     @abc.abstractmethod
30 |     def _load_rgb(self, idx) -> np.ndarray:
31 |         pass
32 | 
33 |     def _load_rgb_intrinsics(self, idx) -> IntrinsicCameraParametersNormalized:
34 |         # so far, only few datasets support intrinsics, thus, we define a
35 |         # default here
36 |         raise NotImplementedError()
37 | 


--------------------------------------------------------------------------------
/src/nicr_scene_analysis_datasets/scripts/prepare_dataset.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | .. codeauthor:: Daniel Seichter <daniel.seichter@tu-ilmenau.de>
 4 | """
 5 | import argparse as ap
 6 | import importlib
 7 | 
 8 | from .. import KNOWN_DATASETS
 9 | 
10 | 
11 | def main():
12 |     # parse args
13 |     parser = ap.ArgumentParser(
14 |         formatter_class=ap.ArgumentDefaultsHelpFormatter,
15 |         description="Prepare a dataset for scene analysis."
16 |     )
17 |     subparsers = parser.add_subparsers(
18 |         help='Dataset to prepare.',
19 |         dest='dataset',
20 |         required=True
21 |     )
22 |     for dataset in KNOWN_DATASETS:
23 |         subparsers.add_parser(dataset, add_help=False)   # redirect help
24 | 
25 |     parsed_args, remaining_args = parser.parse_known_args()
26 | 
27 |     # import dataset module
28 |     dataset_module = importlib.import_module(
29 |         name=f'..datasets.{parsed_args.dataset}.prepare_dataset',
30 |         package=__package__
31 |     )
32 | 
33 |     # run prepare function
34 |     dataset_module.main(remaining_args)
35 | 
36 | 
37 | if __name__ == '__main__':
38 |     main()
39 | 


--------------------------------------------------------------------------------
/src/nicr_scene_analysis_datasets/version.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | .. codeauthor:: Daniel Seichter <daniel.seichter@tu-ilmenau.de>
 4 | """
 5 | import os
 6 | import subprocess
 7 | import warnings
 8 | 
 9 | _VERSION_MAJOR = 0
10 | _VERSION_MINOR = 8
11 | _VERSION_MICRO = 3
12 | 
13 | 
14 | def get_version(with_suffix=False):    # pragma no cover
15 |     if with_suffix:
16 |         try:
17 |             suffix = subprocess.check_output(
18 |                 ['git', 'describe', '--always', '--dirty'],
19 |                 cwd=os.path.abspath(os.path.dirname(__file__))
20 |             )
21 |             suffix = suffix.decode().strip()
22 |             # replace - with . to be PEP440 compliant,
23 |             # e.g., d2c4396-dirty -> d2c4396.dirty
24 |             suffix = suffix.replace('-', '.')
25 |         except Exception:
26 |             warnings.warn("Cannot determine version suffix using git.")
27 |             suffix = ''
28 | 
29 |         return _VERSION_MAJOR, _VERSION_MINOR, _VERSION_MICRO, suffix
30 | 
31 |     else:
32 |         return _VERSION_MAJOR, _VERSION_MINOR, _VERSION_MICRO
33 | 
34 | 
35 | __version__ = '{}.{}.{}'.format(*get_version(with_suffix=False))
36 | 


--------------------------------------------------------------------------------
/src/nicr_scene_analysis_datasets/auxiliary_data/embedding_estimation/__init__.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | .. codeauthor:: Soehnke Fischedick <soehnke-benedikt.fischedick@tu-ilmenau.de>
 4 | """
 5 | from typing import Union
 6 | 
 7 | from ...utils.misc import partial_class
 8 | from .alpha_clip import AlphaCLIPEmbeddingEstimator
 9 | 
10 | 
11 | _EMBEDDING_ESTIMATORS = {
12 |     # Alpha-CLIP
13 |     AlphaCLIPEmbeddingEstimator.NAME: AlphaCLIPEmbeddingEstimator,
14 | }
15 | # add all variants of each base class as well
16 | for cls in list(_EMBEDDING_ESTIMATORS.values()):
17 |     if issubclass(cls, AlphaCLIPEmbeddingEstimator):
18 |         for model_name in cls.MODEL_LOOKUP_DICT.keys():
19 |             n = f"{cls.NAME}__{model_name}"
20 |             _EMBEDDING_ESTIMATORS[n] = partial_class(cls, model_name=model_name)
21 | 
22 | KNOWN_EMBEDDING_ESTIMATORS = tuple(sorted(_EMBEDDING_ESTIMATORS.keys()))
23 | 
24 | EMBEDDING_ESTIMATOR_TYPE = Union[
25 |     AlphaCLIPEmbeddingEstimator,
26 | ]
27 | 
28 | 
29 | def get_embedding_estimator_class(name: str) -> EMBEDDING_ESTIMATOR_TYPE:
30 |     # force lowercase
31 |     name = name.lower()
32 | 
33 |     cls = _EMBEDDING_ESTIMATORS.get(name, None)
34 |     if cls is None:
35 |         raise ValueError(f"Unknown embedding estimator: '{name}'")
36 | 
37 |     return cls
38 | 


--------------------------------------------------------------------------------
/src/nicr_scene_analysis_datasets/dataset_base/__init__.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | .. codeauthor:: Daniel Seichter <daniel.seichter@tu-ilmenau.de>
 4 | """
 5 | from ._annotation import ExtrinsicCameraParametersNormalized    # noqa: F401
 6 | from ._annotation import IntrinsicCameraParametersNormalized    # noqa: F401
 7 | from ._annotation import MetaDict   # noqa: F401
 8 | from ._annotation import OrientationDict    # noqa: F401
 9 | from ._annotation import PanopticEmbeddingDict    # noqa: F401
10 | from ._annotation import SampleIdentifier    # noqa: F401
11 | from ._annotation import SceneLabel    # noqa: F401
12 | from ._annotation import SceneLabelList    # noqa: F401
13 | from ._annotation import SemanticLabel    # noqa: F401
14 | from ._annotation import SemanticLabelList    # noqa: F401
15 | 
16 | from ._class_weighting import KNOWN_CLASS_WEIGHTINGS    # noqa: F401
17 | from ._class_weighting import compute_class_weights    # noqa: F401
18 | 
19 | from ._config import build_dataset_config    # noqa: F401
20 | from ._config import DatasetConfig    # noqa: F401
21 | 
22 | from ._meta import DepthStats    # noqa: F401
23 | 
24 | from ._base_dataset import DatasetBase    # noqa: F401
25 | from ._concat_dataset import ConcatDataset    # noqa: F401
26 | from ._depth_dataset import DepthDataset    # noqa: F401
27 | from ._rgb_dataset import RGBDataset    # noqa: F401
28 | from ._rgbd_dataset import RGBDDataset    # noqa: F401
29 | 


--------------------------------------------------------------------------------
/src/nicr_scene_analysis_datasets/auxiliary_data/depth_estimation/__init__.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | .. codeauthor:: Daniel Seichter <daniel.seichter@tu-ilmenau.de>
 4 | """
 5 | from typing import Union
 6 | 
 7 | from ...utils.misc import partial_class
 8 | from .hugging_face import DepthAnythingV2DepthEstimator
 9 | from .hugging_face import DinoV2DPTDepthEstimator
10 | from .hugging_face import ZoeDepthDepthEstimator
11 | from .hugging_face import _HuggingFaceDepthEstimator
12 | 
13 | 
14 | _DEPTH_ESTIMATORS = {
15 |     # DepthAnything V2
16 |     DepthAnythingV2DepthEstimator.NAME: DepthAnythingV2DepthEstimator,
17 |     # ZoeDepth
18 |     ZoeDepthDepthEstimator.NAME: ZoeDepthDepthEstimator,
19 |     # Dino V2 with Dense Prediction Transformer (DPT) head for depth estimation
20 |     DinoV2DPTDepthEstimator.NAME: DinoV2DPTDepthEstimator
21 | }
22 | # add all variants of each base class as well
23 | for cls in list(_DEPTH_ESTIMATORS.values()):
24 |     if issubclass(cls, _HuggingFaceDepthEstimator):
25 |         for model_name in cls.MODEL_LOOKUP_DICT.keys():
26 |             n = f"{cls.NAME}__{model_name}"
27 |             _DEPTH_ESTIMATORS[n] = partial_class(cls, model_name=model_name)
28 | 
29 | KNOWN_DEPTH_ESTIMATORS = tuple(sorted(_DEPTH_ESTIMATORS.keys()))
30 | 
31 | DEPTH_ESTIMATOR_TYPE = Union[
32 |     DepthAnythingV2DepthEstimator,
33 |     ZoeDepthDepthEstimator,
34 |     DinoV2DPTDepthEstimator
35 | ]
36 | 
37 | 
38 | def get_depth_estimator_class(name: str) -> DEPTH_ESTIMATOR_TYPE:
39 |     # force lowercase
40 |     name = name.lower()
41 | 
42 |     cls = _DEPTH_ESTIMATORS.get(name, None)
43 |     if cls is None:
44 |         raise ValueError(f"Unknown depth estimator: '{name}'")
45 | 
46 |     return cls
47 | 


--------------------------------------------------------------------------------
/src/nicr_scene_analysis_datasets/auxiliary_data/embedding_estimation/_base.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | .. codeauthor:: Soehnke Fischedick <soehnke-benedikt.fischedick@tu-ilmenau.de>
 4 | """
 5 | from typing import Union
 6 | 
 7 | import numpy as np
 8 | import torch
 9 | 
10 | from .._base import AuxiliaryDataEstimatorBase
11 | 
12 | 
13 | UINT16_MAX = np.iinfo('uint16').max
14 | 
15 | 
16 | class EmbeddingEstimatorBase(AuxiliaryDataEstimatorBase):
17 |     NAME: str
18 | 
19 |     def predict(
20 |         self,
21 |         rgb_img: Union[torch.Tensor, np.ndarray],
22 |         mask_img: Union[torch.Tensor, np.ndarray],
23 |     ) -> Union[torch.Tensor, np.ndarray]:
24 |         # store input type and original shape for later postprocessing
25 |         rgb_is_numpy = isinstance(rgb_img, np.ndarray)
26 |         rgb_h, rgb_w = self._get_height_width(rgb_img)
27 |         # Ensure that mask only has 0 and 1 values
28 |         assert np.all(np.isin(mask_img, [0, 1]))
29 | 
30 |         mask_h, mask_w = self._get_height_width(mask_img)
31 |         assert rgb_h == mask_h and rgb_w == mask_w, \
32 |             f"Input image and mask must have the same shape. " \
33 |             f"Got '{rgb_h}x{rgb_w}' and '{mask_h}x{mask_w}'."
34 | 
35 |         # prepare the input to have the correct shape
36 |         rgb_img = self.prepare_input(rgb_img)
37 |         mask_img = self.prepare_input(mask_img)
38 | 
39 |         # apply estimator
40 |         rgb_img = rgb_img.to(self._device).to(torch.float32)
41 |         mask_img = mask_img.to(self._device).to(torch.float32)
42 | 
43 |         predicted_embeddings = self._estimator_predict(rgb_img, mask_img).cpu()
44 | 
45 |         # convert to numpy 2d array if input was numpy
46 |         if rgb_is_numpy:
47 |             predicted_embeddings = predicted_embeddings.numpy()
48 | 
49 |         return predicted_embeddings
50 | 


--------------------------------------------------------------------------------
/src/nicr_scene_analysis_datasets/datasets/scannet/scannetv2_test.txt:
--------------------------------------------------------------------------------
  1 | scene0707_00
  2 | scene0708_00
  3 | scene0709_00
  4 | scene0710_00
  5 | scene0711_00
  6 | scene0712_00
  7 | scene0713_00
  8 | scene0714_00
  9 | scene0715_00
 10 | scene0716_00
 11 | scene0717_00
 12 | scene0718_00
 13 | scene0719_00
 14 | scene0720_00
 15 | scene0721_00
 16 | scene0722_00
 17 | scene0723_00
 18 | scene0724_00
 19 | scene0725_00
 20 | scene0726_00
 21 | scene0727_00
 22 | scene0728_00
 23 | scene0729_00
 24 | scene0730_00
 25 | scene0731_00
 26 | scene0732_00
 27 | scene0733_00
 28 | scene0734_00
 29 | scene0735_00
 30 | scene0736_00
 31 | scene0737_00
 32 | scene0738_00
 33 | scene0739_00
 34 | scene0740_00
 35 | scene0741_00
 36 | scene0742_00
 37 | scene0743_00
 38 | scene0744_00
 39 | scene0745_00
 40 | scene0746_00
 41 | scene0747_00
 42 | scene0748_00
 43 | scene0749_00
 44 | scene0750_00
 45 | scene0751_00
 46 | scene0752_00
 47 | scene0753_00
 48 | scene0754_00
 49 | scene0755_00
 50 | scene0756_00
 51 | scene0757_00
 52 | scene0758_00
 53 | scene0759_00
 54 | scene0760_00
 55 | scene0761_00
 56 | scene0762_00
 57 | scene0763_00
 58 | scene0764_00
 59 | scene0765_00
 60 | scene0766_00
 61 | scene0767_00
 62 | scene0768_00
 63 | scene0769_00
 64 | scene0770_00
 65 | scene0771_00
 66 | scene0772_00
 67 | scene0773_00
 68 | scene0774_00
 69 | scene0775_00
 70 | scene0776_00
 71 | scene0777_00
 72 | scene0778_00
 73 | scene0779_00
 74 | scene0780_00
 75 | scene0781_00
 76 | scene0782_00
 77 | scene0783_00
 78 | scene0784_00
 79 | scene0785_00
 80 | scene0786_00
 81 | scene0787_00
 82 | scene0788_00
 83 | scene0789_00
 84 | scene0790_00
 85 | scene0791_00
 86 | scene0792_00
 87 | scene0793_00
 88 | scene0794_00
 89 | scene0795_00
 90 | scene0796_00
 91 | scene0797_00
 92 | scene0798_00
 93 | scene0799_00
 94 | scene0800_00
 95 | scene0801_00
 96 | scene0802_00
 97 | scene0803_00
 98 | scene0804_00
 99 | scene0805_00
100 | scene0806_00
101 | 


--------------------------------------------------------------------------------
/src/nicr_scene_analysis_datasets/auxiliary_data/_config.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | .. codeauthor:: Soehnke Fischedick <soehnke-benedikt.fischedick@tu-ilmenau.de>
 4 | """
 5 | import dataclasses
 6 | 
 7 | from ..dataset_base import DatasetConfig
 8 | 
 9 | 
10 | @dataclasses.dataclass(frozen=True)
11 | class DatasetConfigWithAuxiliary(DatasetConfig):
12 |     semantic_text_embeddings: list = None
13 |     scene_text_embeddings: list = None
14 |     mean_embedding_per_semantic_class: dict = None
15 |     mean_image_embedding_per_semantic_class: dict = None
16 | 
17 | 
18 | def build_dataset_config_with_auxiliary(
19 |     original_config: DatasetConfig,
20 |     semantic_text_embeddings: list,
21 |     scene_text_embeddings: list,
22 |     mean_embedding_per_semantic_class: dict,
23 |     mean_image_embedding_per_semantic_class: dict
24 | ) -> DatasetConfigWithAuxiliary:
25 |     """
26 |     Creates a new DatasetConfigWithAuxiliary instance by copying attributes
27 |     from the original config and adding auxiliary fields.
28 |     """
29 |     # Create a new instance of DatasetConfigWithAuxiliary
30 |     # Note: We didn't just use dataclasses.asdict(original_config) as it would
31 |     # also convert its members to a dict, which is not what we want.
32 |     new_config = DatasetConfigWithAuxiliary(
33 |         semantic_label_list=original_config.semantic_label_list,
34 |         semantic_label_list_without_void=original_config.semantic_label_list_without_void,
35 |         scene_label_list=original_config.scene_label_list,
36 |         scene_label_list_without_void=original_config.scene_label_list_without_void,
37 |         depth_stats=original_config.depth_stats,
38 |         semantic_text_embeddings=semantic_text_embeddings,
39 |         scene_text_embeddings=scene_text_embeddings,
40 |         mean_embedding_per_semantic_class=mean_embedding_per_semantic_class,
41 |         mean_image_embedding_per_semantic_class=mean_image_embedding_per_semantic_class
42 |     )
43 |     return new_config
44 | 


--------------------------------------------------------------------------------
/src/nicr_scene_analysis_datasets/__init__.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | .. codeauthor:: Daniel Seichter <daniel.seichter@tu-ilmenau.de>
 4 | """
 5 | from typing import Type, Union
 6 | 
 7 | 
 8 | from .utils.imports import install_nicr_scene_analysis_datasets_dependency_import_hooks
 9 | 
10 | install_nicr_scene_analysis_datasets_dependency_import_hooks()
11 | 
12 | 
13 | from .auxiliary_data import wrap_dataset_with_auxiliary_data
14 | from .dataset_base import KNOWN_CLASS_WEIGHTINGS
15 | from .dataset_base import ConcatDataset
16 | from .datasets.ade20k.dataset import ADE20K
17 | from .datasets.cityscapes.dataset import Cityscapes
18 | from .datasets.coco.dataset import COCO
19 | from .datasets.hypersim.dataset import Hypersim
20 | from .datasets.nyuv2.dataset import NYUv2
21 | from .datasets.scannet.dataset import ScanNet
22 | from .datasets.scenenetrgbd.dataset import SceneNetRGBD
23 | from .datasets.sunrgbd.dataset import SUNRGBD
24 | 
25 | 
26 | _DATASETS = {
27 |     'ade20k': ADE20K,
28 |     'cityscapes': Cityscapes,
29 |     'coco': COCO,
30 |     'hypersim': Hypersim,
31 |     'nyuv2': NYUv2,
32 |     'scannet': ScanNet,
33 |     'scenenetrgbd': SceneNetRGBD,
34 |     'sunrgbd': SUNRGBD,
35 | }
36 | KNOWN_DATASETS = tuple(_DATASETS.keys())
37 | 
38 | DatasetType = Union[
39 |     ADE20K,
40 |     Cityscapes,
41 |     COCO,
42 |     Hypersim,
43 |     NYUv2,
44 |     ScanNet,
45 |     SceneNetRGBD,
46 |     SUNRGBD,
47 |     ConcatDataset
48 | ]
49 | 
50 | 
51 | def get_dataset_class(name: str, with_auxiliary_data: bool = False) -> Type[DatasetType]:
52 |     name = name.lower()
53 |     if name not in KNOWN_DATASETS:
54 |         raise ValueError(f"Unknown dataset: '{name}'")
55 |     original_dataset_class = _DATASETS[name]
56 |     if with_auxiliary_data:
57 |         current_dataset_class = \
58 |             wrap_dataset_with_auxiliary_data(original_dataset_class)
59 |     else:
60 |         current_dataset_class = original_dataset_class
61 | 
62 |     return current_dataset_class
63 | 
64 | 
65 | from .version import __version__
66 | 


--------------------------------------------------------------------------------
/src/nicr_scene_analysis_datasets/auxiliary_data/depth_estimation/_base.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | .. codeauthor:: Daniel Seichter <daniel.seichter@tu-ilmenau.de>
 4 | .. codeauthor:: Soehnke Fischedick <soehnke-benedikt.fischedick@tu-ilmenau.de>
 5 | """
 6 | from typing import Union
 7 | 
 8 | 
 9 | import numpy as np
10 | import torch
11 | 
12 | from .._base import AuxiliaryDataEstimatorBase
13 | 
14 | 
15 | UINT16_MAX = np.iinfo('uint16').max
16 | 
17 | 
18 | class DepthEstimatorBase(AuxiliaryDataEstimatorBase):
19 |     NAME: str
20 | 
21 |     def predict(
22 |         self,
23 |         rgb_img: Union[torch.Tensor, np.ndarray],
24 |     ) -> Union[torch.Tensor, np.ndarray]:
25 |         # store input type and original shape for later postprocessing
26 |         is_numpy = isinstance(rgb_img, np.ndarray)
27 |         h, w = self._get_height_width(rgb_img)
28 | 
29 |         # prepare the input to have the correct shape
30 |         rgb_img = self.prepare_input(rgb_img)
31 | 
32 |         # apply estimator
33 |         rgb_img = rgb_img.to(self._device).to(torch.float32)
34 | 
35 |         predicted_depth = self._estimator_predict(rgb_img).cpu()
36 | 
37 |         # resize to original shape
38 |         predicted_depth = self._resize_image(
39 |             predicted_depth[:, None, ...],   # (B, H, W) -> (B, C, H, W)
40 |             height=h, width=w, mode='nearest'
41 |         )
42 | 
43 |         # convert to numpy 2d array if input was numpy
44 |         if is_numpy:
45 |             predicted_depth = predicted_depth.numpy()[0, 0]
46 |             n_above_max = (predicted_depth > UINT16_MAX).sum()
47 |             if n_above_max > 0:
48 |                 print(
49 |                     f"Warning: Detected {n_above_max} values above "
50 |                     f"{UINT16_MAX} in predicted depth."
51 |                 )
52 |                 predicted_depth = np.clip(predicted_depth, 0, UINT16_MAX)
53 |             predicted_depth = np.asarray(predicted_depth, dtype='uint16')
54 | 
55 |             assert 2 == predicted_depth.ndim
56 | 
57 |         return predicted_depth
58 | 


--------------------------------------------------------------------------------
/src/nicr_scene_analysis_datasets/datasets/cityscapes/README.md:
--------------------------------------------------------------------------------
 1 | # Cityscapes dataset
 2 | 
 3 | The Cityscapes dataset contains a diverse set of stereo video sequences recorded in street scenes from 50 different cities, with high quality pixel-level annotations of 5 000 frames in addition to a larger set of 20 000 weakly annotated frames.
 4 | The dataset is thus an order of magnitude larger than similar previous attempts. Details on [annotated classes](https://www.cityscapes-dataset.com/dataset-overview/#class-definitions) and [examples of our annotations](https://www.cityscapes-dataset.com/examples/#dense-pixel-annotations) are available at this webpage.
 5 | 
 6 | For more details, see: [Cityscapes Dataset](https://www.cityscapes-dataset.com/) and [Cityscapes Dataset at GitHub](https://github.com/mcordts/cityscapesScripts).
 7 | 
 8 | ## Prepare dataset
 9 | 
10 | 1. Download and unzip dataset files:
11 |     Use `csDownload` or download the files mentioned below manually from: [Cityscapes Dataset Downloads](https://www.cityscapes-dataset.com/downloads/)
12 | 
13 |     ```bash
14 |     CITYSCAPES_DOWNLOAD_DIR="/path/where/to/store/cityscapes_downloads"
15 | 
16 |     # using cityscapesScripts
17 |     # use "csDownload -l" to list available packages
18 | 
19 |     # labels (semantic, instance)
20 |     csDownload gtFine_trainvaltest.zip -d $CITYSCAPES_DOWNLOAD_DIR    # -> 241MB
21 |     # rgb images
22 |     csDownload leftImg8bit_trainvaltest.zip -d $CITYSCAPES_DOWNLOAD_DIR     # -> 11GB
23 |     # disparity images (only upon request)
24 |     csDownload disparity_trainvaltest.zip -d $CITYSCAPES_DOWNLOAD_DIR     # -> 3.5GB
25 |     # intrinsic and extrinsic camera parameter to calculate depth
26 |     csDownload camera_trainvaltest.zip -d $CITYSCAPES_DOWNLOAD_DIR    # -> 2MB
27 | 
28 |     # unzip files
29 |     find $CITYSCAPES_DOWNLOAD_DIR -name '*.zip' -exec unzip -o {} -d $CITYSCAPES_DOWNLOAD_DIR \;
30 |     ```
31 | 
32 | 2. Convert dataset:
33 |     ```bash
34 |     # general usage
35 |     nicr_sa_prepare_dataset cityscapes \
36 |         /path/where/to/store/cityscapes \
37 |         $CITYSCAPES_DOWNLOAD_DIR
38 |     ```
39 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | *.egg-info/
 24 | .installed.cfg
 25 | *.egg
 26 | 
 27 | # PyInstaller
 28 | #  Usually these files are written by a python script from a template
 29 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 30 | *.manifest
 31 | *.spec
 32 | 
 33 | # Installer logs
 34 | pip-log.txt
 35 | pip-delete-this-directory.txt
 36 | 
 37 | # Unit test / coverage reports
 38 | htmlcov/
 39 | .tox/
 40 | .coverage
 41 | .coverage.*
 42 | .cache
 43 | nosetests.xml
 44 | coverage.xml
 45 | *.cover
 46 | .hypothesis/
 47 | 
 48 | # Translations
 49 | *.mo
 50 | *.pot
 51 | 
 52 | # Django stuff:
 53 | *.log
 54 | local_settings.py
 55 | 
 56 | # Flask stuff:
 57 | instance/
 58 | .webassets-cache
 59 | 
 60 | # Scrapy stuff:
 61 | .scrapy
 62 | 
 63 | # Sphinx documentation
 64 | docs/_build/
 65 | 
 66 | # PyBuilder
 67 | target/
 68 | 
 69 | # Jupyter Notebook
 70 | .ipynb_checkpoints
 71 | 
 72 | # pyenv
 73 | .python-version
 74 | 
 75 | # celery beat schedule file
 76 | celerybeat-schedule
 77 | 
 78 | # SageMath parsed files
 79 | *.sage.py
 80 | 
 81 | # Environments
 82 | .env
 83 | .venv
 84 | env/
 85 | venv/
 86 | ENV/
 87 | 
 88 | # Spyder project settings
 89 | .spyderproject
 90 | .spyproject
 91 | 
 92 | # Rope project settings
 93 | .ropeproject
 94 | 
 95 | # mkdocs documentation
 96 | /site
 97 | 
 98 | # mypy
 99 | .mypy_cache/
100 | 
101 | # PyCharm
102 | .idea
103 | 
104 | # MacOS
105 | .DS_Store
106 | 
107 | # Binaries
108 | .npz
109 | .npy
110 | .h5
111 | .hdf5
112 | core
113 | 
114 | inference_time*.pdf
115 | inference_time*.json
116 | onnx_models/*.onnx
117 | *.onnx
118 | onnx_models/*.trt
119 | *.pth
120 | *.tar
121 | *.pickle
122 | datasets/*
123 | !datasets/.gitkeep
124 | trained_models/*
125 | !trained_models/.gitkeep
126 | todo.txt
127 | *.pdf
128 | results
129 | *.svg
130 | .pytest_cache
131 | .vscode
132 | 
133 | stuff/hypersim_instance_semantic_overlaps/hypersim_instances


--------------------------------------------------------------------------------
/src/nicr_scene_analysis_datasets/d2/_auto_init.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | .. codeauthor:: Soehnke Fischedick <soehnke-benedikt.fischedick@tu-ilmenau.de>
 4 | """
 5 | from ..pytorch import ADE20K
 6 | from ..pytorch import COCO
 7 | from ..pytorch import Cityscapes
 8 | from ..pytorch import Hypersim
 9 | from ..pytorch import NYUv2
10 | from ..pytorch import ScanNet
11 | from ..pytorch import SceneNetRGBD
12 | from ..pytorch import SUNRGBD
13 | from .utils import register_dataset_to_d2
14 | 
15 | # Automatically register all datasets with some default keys so that they
16 | # are available through Detectron2's DatasetCatalog.
17 | # Note that they are just registered so that the stats can be access.
18 | # For using the dataset, the 'set_dataset_path' function should be called first.
19 | # Moreover, we currently do not load the 'depth' sample key for any dataset.
20 | # If your interested in another sample key, remove the dataset and call
21 | # 'register_dataset_to_d2' yourself.
22 | register_dataset_to_d2(
23 |     name_prefix='ade20k',
24 |     dataset_class=ADE20K,
25 |     sample_keys=('identifier', 'rgb', 'semantic', 'instance')
26 | )
27 | register_dataset_to_d2(
28 |     name_prefix='cityscapes',
29 |     dataset_class=Cityscapes,
30 |     sample_keys=('identifier', 'rgb', 'semantic', 'instance')
31 | )
32 | register_dataset_to_d2(
33 |     name_prefix='coco',
34 |     dataset_class=COCO,
35 |     sample_keys=('identifier', 'rgb', 'semantic', 'instance')
36 | )
37 | register_dataset_to_d2(
38 |     name_prefix='hypersim',
39 |     dataset_class=Hypersim,
40 |     sample_keys=('identifier', 'rgb', 'semantic', 'instance')
41 | )
42 | register_dataset_to_d2(
43 |     name_prefix='nyuv2',
44 |     dataset_class=NYUv2,
45 |     sample_keys=('identifier', 'rgb', 'semantic', 'instance')
46 | )
47 | register_dataset_to_d2(
48 |     name_prefix='scannet',
49 |     dataset_class=ScanNet,
50 |     sample_keys=('identifier', 'rgb', 'semantic', 'instance')
51 | )
52 | register_dataset_to_d2(
53 |     name_prefix='scenenetrgbd',
54 |     dataset_class=SceneNetRGBD,
55 |     sample_keys=('identifier', 'rgb', 'semantic', 'instance')
56 | )
57 | register_dataset_to_d2(
58 |     name_prefix='sunrgbd',
59 |     dataset_class=SUNRGBD,
60 |     sample_keys=('identifier', 'rgb', 'semantic', 'instance')
61 | )
62 | 


--------------------------------------------------------------------------------
/src/nicr_scene_analysis_datasets/dataset_base/_config.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | .. codeauthor:: Soehnke Fischedick <soehnke-benedikt.fischedick@tu-ilmenau.de>
 4 | .. codeauthor:: Daniel Seichter <daniel.seichter@tu-ilmenau.de>
 5 | """
 6 | import dataclasses
 7 | from typing import Union
 8 | 
 9 | from ._annotation import SceneLabelList
10 | from ._annotation import SemanticLabelList
11 | from ._meta import DepthStats
12 | 
13 | 
14 | @dataclasses.dataclass(frozen=True)
15 | class DatasetConfig:
16 |     semantic_label_list: SemanticLabelList
17 |     semantic_label_list_without_void: SemanticLabelList
18 |     scene_label_list: SceneLabelList
19 |     scene_label_list_without_void: SceneLabelList
20 |     depth_stats: Union[DepthStats, None]
21 | 
22 | 
23 | def build_dataset_config(
24 |     semantic_label_list: SemanticLabelList,
25 |     scene_label_list: Union[SceneLabelList, None] = None,
26 |     depth_stats: Union[DepthStats, None] = None
27 | ) -> DatasetConfig:
28 |     """
29 |     Builds a dataset config from a semantic and scene label list and known
30 |     depth stats.
31 | 
32 |     Notes
33 |     -----
34 |     The function assumes that the first element in the semantic label list has
35 |     the void label.
36 |     """
37 |     scene_label_list = scene_label_list or SceneLabelList(())
38 | 
39 |     # build semantic label list without void
40 |     semantic_label_list_without_void = SemanticLabelList(())
41 |     for idx, label in enumerate(semantic_label_list):
42 |         # skip void
43 |         if idx == 0:
44 |             # we always have 0 as void
45 |             continue
46 |         semantic_label_list_without_void.add_label(label)
47 | 
48 |     # build scene label list without void
49 |     scene_label_list_without_void = SceneLabelList(())
50 |     for label in scene_label_list:
51 |         # skip void
52 |         if 'void' == label.class_name.lower():
53 |             # indoor domestic class labels contain a void class
54 |             continue
55 |         scene_label_list_without_void.add_label(label)
56 | 
57 |     # create dataset config
58 |     config = DatasetConfig(
59 |         semantic_label_list,
60 |         semantic_label_list_without_void,
61 |         scene_label_list,
62 |         scene_label_list_without_void,
63 |         depth_stats
64 |     )
65 | 
66 |     return config
67 | 


--------------------------------------------------------------------------------
/src/nicr_scene_analysis_datasets/datasets/coco/README.md:
--------------------------------------------------------------------------------
 1 | # COCO dataset
 2 | 
 3 | COCO is a large-scale object detection, segmentation, and captioning dataset.
 4 | It contains over 200.000 labeled images with 80 object and 91 stuff categories
 5 | for panoptic segmentation.
 6 | 
 7 | For more details, see: [COCO dataset](https://cocodataset.org/#home)
 8 | 
 9 | ## Prepare dataset
10 | 1. Convert the dataset
11 |     ```bash
12 |     # general usage
13 |     nicr_sa_prepare_dataset coco \
14 |         /path/where/to/store/coco/
15 |     ```
16 | 
17 | 2. (Optional) Generate auxiliary data
18 |     > **Note**: To use auxiliary data generation, the package must be installed with the `withauxiliarydata` option:
19 |     > ```bash
20 |     > pip install -e .[withauxiliarydata]
21 |     > ```
22 | 
23 |     ```bash
24 |     # for auxiliary data such as synthetic depth and rgb/panoptic embeddings
25 |     nicr_sa_generate_auxiliary_data \
26 |         --dataset coco \
27 |         --dataset-path /path/to/already/prepared/coco/dataset \
28 |         --auxiliary-data depth image-embedding panoptic-embedding \
29 |         --embedding-estimator-device cuda \
30 |         --embedding-estimators alpha_clip__l14-336-grit-20m \
31 |         --depth-estimator-device cuda \
32 |         --depth-estimators depthanything_v2__indoor_large \
33 |         --cache-models
34 |     ```
35 | 
36 |     With arguments:
37 |     - `--dataset-path`:
38 |         Path to the prepared COCO dataset.
39 |     - `--auxiliary-data`:
40 |         Types of auxiliary data to generate:
41 |         - `depth`: Generates synthetic depth images from RGB.
42 |         - `image-embedding`: Uses Alpha-CLIP to generate an embedding for the entire image.
43 |         - `panoptic-embedding`: Uses Alpha-CLIP to generate an embedding for each panoptic mask.
44 |     - `--depth-estimator-device`:
45 |         Device to use for depth estimation (`cpu` or `cuda`).
46 |     - `--depth-estimators`:
47 |         Depth estimator(s) to use. Use `depthanything_v2__indoor_large` to match DVEFormer.
48 |     - `--embedding-estimator-device`:
49 |         Device to use for embedding estimation (`cpu` or `cuda`).
50 |     - `--embedding-estimators`:
51 |         Embedding estimator(s) to use. Use `alpha_clip__l14-336-grit-20m` to match DVEFormer.
52 |     - `--cache-models`:
53 |         Cache models locally to avoid reloading them in future runs.
54 | 


--------------------------------------------------------------------------------
/src/nicr_scene_analysis_datasets/mira/utils.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | .. codeauthor:: Daniel Seichter <daniel.seichter@tu-ilmenau.de>
 4 | """
 5 | import cv2
 6 | 
 7 | from PythonImageWrapper import Img
 8 | from PythonImageWrapper import Img8U1
 9 | 
10 | 
11 | def to_mira_img(img, rgb2bgr=False):
12 |     if 3 == img.ndim:
13 |         h, w, n = img.shape
14 |     elif 2 == img.ndim:
15 |         h, w = img.shape
16 |         n = 1
17 |     else:
18 |         raise ValueError(f"Unknown shape: {img.shape}")
19 | 
20 |     if rgb2bgr:
21 |         img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
22 | 
23 |     if img.dtype == 'uint8':
24 |         t = '8U'
25 |     elif img.dtype == 'uint16':
26 |         t = '16U'
27 |     elif img.dtype == 'float32':
28 |         t = '32F'
29 |     else:
30 |         raise ValueError(f"Unknown dtype: {img.dtype}")
31 |     cv_type = getattr(cv2, f'CV_{t}C{n}')
32 | 
33 |     img_mira = Img(w, h, cv_type, n)
34 |     img_mira.setMat(img)
35 | 
36 |     return img_mira
37 | 
38 | 
39 | def to_mira_img8u1(img):
40 |     assert img.dtype == 'uint8'
41 |     assert img.ndim == 2
42 | 
43 |     h, w = img.shape
44 |     img_mira = Img8U1(w, h)
45 |     img_mira.setMat(img)
46 | 
47 |     return img_mira
48 | 
49 | 
50 | def parse_list(comma_sep_str, cast_to=str):
51 |     if cast_to is bool:
52 |         cast_to = lambda x: x.lower() in ['true', '1']
53 | 
54 |     return [cast_to(e.strip())
55 |             for e in comma_sep_str.strip().split(',')
56 |             if e.strip()]
57 | 
58 | 
59 | class AutoGetterSetter:
60 |     def __getattr__(self, name):
61 |         """Generic getter and setter methods for reflection"""
62 |         if name.startswith(('_rget', '_rset')):
63 |             member = name[5:]
64 |             if member not in self.__dict__:
65 |                 raise AttributeError(
66 |                     "{} has no attribute '{}'".format(self, name)
67 |                 )
68 | 
69 |             if name.startswith('_rset'):
70 |                 # make setter
71 |                 def _cb_set(value):
72 |                     setattr(self, member, value)
73 |                 return _cb_set
74 |             elif name.startswith('_rget'):
75 |                 # make getter
76 |                 def _cb_get():
77 |                     return getattr(self, member)
78 |                 return _cb_get
79 |         else:
80 |             return super().__getattr__(name)
81 | 


--------------------------------------------------------------------------------
/src/nicr_scene_analysis_datasets/scripts/common.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | .. codeauthor:: Daniel Seichter <daniel.seichter@tu-ilmenau.de>
 4 | """
 5 | from typing import List, Optional, Tuple, Union
 6 | 
 7 | import numpy as np
 8 | 
 9 | from .. import get_dataset_class
10 | from ..utils import img as img_utils
11 | 
12 | 
13 | DATASET_COLORMAPS = {
14 |     'auto_n': {},
15 |     'ade20k': {},
16 |     'cityscapes_19': {'semantic_n_classes': 19},
17 |     'cityscapes_33': {'semantic_n_classes': 33},
18 |     'coco': {},
19 |     'hypersim': {},
20 |     'nyuv2_13': {'semantic_n_classes': 13},
21 |     'nyuv2_40': {'semantic_n_classes': 40},
22 |     'nyuv2_894': {'semantic_n_classes': 894},
23 |     'scannet_20': {'semantic_n_classes': 20},
24 |     'scannet_40': {'semantic_n_classes': 40},
25 |     'scannet_200': {'semantic_n_classes': 200},
26 |     'scannet_549': {'semantic_n_classes': 549},
27 |     'scenenetrgbd': {},
28 |     'sunrgbd': {},
29 |     'visual_distinct': {}
30 | }
31 | 
32 | AVAILABLE_COLORMAPS = tuple(DATASET_COLORMAPS.keys())
33 | 
34 | 
35 | def get_colormap(
36 |     name: str,
37 |     n: Optional[int] = 256,
38 |     return_names: bool = False
39 | ) -> Union[np.ndarray, Tuple[List[str], np.ndarray]]:
40 |     if 'auto_n' == name:
41 |         # generate color map with n colors
42 |         colors = np.array(
43 |             img_utils.get_colormap(n)
44 |         )
45 |         names = [f'{i}' for i in range(n)]
46 |     elif 'visual_distinct' == name:
47 |         # use visually distinct colors (useful for visualizing instances)
48 |         colors = np.array(
49 |             img_utils.get_visual_distinct_colormap(with_void=True)
50 |         )
51 |         names = [f'{i}' for i in range(colors.shape[0])]
52 |     else:
53 |         # use colors from dataset
54 |         dataset_name = name.split('_')[0]
55 |         dataset = get_dataset_class(dataset_name)(
56 |             disable_prints=True,
57 |             **DATASET_COLORMAPS[name]
58 |         )
59 |         # with void class
60 |         colors = dataset.config.semantic_label_list.colors_array
61 |         names = dataset.config.semantic_label_list.class_names
62 | 
63 |     if not return_names:
64 |         return colors
65 | 
66 |     return colors, names
67 | 
68 | 
69 | def print_section(section_name: str, section_content: str = ''):
70 |     print(f"===== {section_name.upper()} =====")
71 |     if section_content:
72 |         print(section_content+"\n")
73 | 


--------------------------------------------------------------------------------
/tests/test_scenenetrgbd.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | Simple (interface) tests for SceneNet RGB-D dataset
 4 | 
 5 | .. codeauthor:: Daniel Seichter <daniel.seichter@tu-ilmenau.de>
 6 | """
 7 | import pytest
 8 | 
 9 | from nicr_scene_analysis_datasets import SceneNetRGBD
10 | from nicr_scene_analysis_datasets.dataset_base import SampleIdentifier
11 | from nicr_scene_analysis_datasets.utils.testing import DATASET_PATH_DICT
12 | 
13 | N_CLASSES_WITH_VOID = 13 + 1
14 | N_SAMPLES = {'train': 50595, 'valid': 6000}
15 | N_SCENE_CLASSES = 5
16 | 
17 | 
18 | @pytest.mark.parametrize('split', ('train', 'valid'))
19 | def test_dataset(split):
20 |     dataset = SceneNetRGBD(
21 |         dataset_path=DATASET_PATH_DICT['scenenetrgbd'],
22 |         split=split,
23 |         sample_keys=SceneNetRGBD.get_available_sample_keys(split),
24 |         depth_mode='refined',
25 |     )
26 | 
27 |     assert dataset.depth_mode == 'refined'
28 |     assert dataset.split == split
29 | 
30 |     assert len(dataset) == N_SAMPLES[split]
31 | 
32 |     assert dataset.semantic_n_classes == N_CLASSES_WITH_VOID
33 |     assert dataset.semantic_n_classes_without_void == N_CLASSES_WITH_VOID - 1
34 |     assert len(dataset.semantic_class_names) == dataset.semantic_n_classes
35 |     assert len(dataset.semantic_class_names_without_void) == dataset.semantic_n_classes_without_void
36 | 
37 |     assert len(dataset.scene_class_names) == N_SCENE_CLASSES
38 | 
39 |     assert len(dataset.semantic_class_colors) == dataset.semantic_n_classes
40 |     assert len(dataset.semantic_class_colors_without_void) == dataset.semantic_n_classes_without_void
41 | 
42 |     assert len(dataset.cameras) == 1
43 | 
44 |     assert isinstance(dataset.depth_min, float)
45 |     assert isinstance(dataset.depth_max, float)
46 |     assert isinstance(dataset.depth_mean, float)
47 |     assert isinstance(dataset.depth_std, float)
48 |     assert isinstance(dataset.depth_stats, dict)
49 | 
50 |     # test first 10 samples sample
51 |     for i, sample in enumerate(dataset):
52 |         assert isinstance(sample, dict)
53 |         assert isinstance(sample['identifier'], SampleIdentifier)
54 |         # inputs: rgb and depth
55 |         assert sample['rgb'].ndim == 3
56 |         assert sample['depth'].ndim == 2
57 |         # semantic
58 |         assert sample['semantic'].ndim == 2
59 |         # instance
60 |         assert sample['instance'].ndim == 2
61 |         # scene
62 |         assert isinstance(sample['scene'], int)
63 | 
64 |         if i >= 9:
65 |             break
66 | 


--------------------------------------------------------------------------------
/src/nicr_scene_analysis_datasets/datasets/nyuv2/README.md:
--------------------------------------------------------------------------------
 1 | # NYUv2 dataset
 2 | 
 3 | The NYU-Depth V2 dataset is comprised of video sequences from a variety of indoor scenes as recorded by both the RGB and Depth cameras from the Microsoft Kinect.
 4 | It contains 1449 densely labeled pairs of aligned RGB and depth images.
 5 | 
 6 | For more details, see: [NYU Depth Dataset V2](https://cs.nyu.edu/~fergus/datasets/nyu_depth_v2.html)
 7 | 
 8 | > As of Nov 2022, [precomputed normals](https://cs.nyu.edu/~deigen/dnl/normals_gt.tgz) are not publicly available any longer.
 9 |   We are trying to reach the authors.
10 |   Normal extraction is optional for now.
11 | 
12 | ## Prepare dataset
13 | 
14 | 1. Download and convert the dataset to the desired format:
15 | 
16 |   ```bash
17 |   # general usage
18 |   nicr_sa_prepare_dataset nyuv2 \
19 |       /path/where/to/store/nyuv2
20 |   ```
21 | 
22 | 2. (Optional) Generate auxiliary data
23 |   > **Note**: To use auxiliary data generation, the package must be installed with the `withauxiliarydata` option:
24 |   > ```bash
25 |   > pip install -e .[withauxiliarydata]
26 |   > ```
27 | 
28 |   ```bash
29 |   # for auxiliary data such as synthetic depth and rgb/panoptic embeddings
30 |   nicr_sa_generate_auxiliary_data \
31 |       --dataset nyuv2 \
32 |       --dataset-path /path/to/already/prepared/nyuv2/dataset\
33 |       --auxiliary-data depth image-embedding panoptic-embedding \
34 |       --embedding-estimator-device cuda \
35 |       --embedding-estimators alpha_clip__l14-336-grit-20m \
36 |       --depth-estimator-device cuda \
37 |       --depth-estimators depthanything_v2__indoor_large \
38 |       --cache-models
39 |   ```
40 |   With arguments:
41 |   - `--dataset-path`:
42 |     Path to the prepared NYUv2 dataset.
43 |   - `--auxiliary-data`:
44 |     Types of auxiliary data to generate:
45 |       - `depth`: Generates synthetic depth images from RGB.
46 |       - `image-embedding`: Uses Alpha-CLIP to generate an embedding for the entire image.
47 |       - `panoptic-embedding`: Uses Alpha-CLIP to generate an embedding for each panoptic mask.
48 |   - `--depth-estimator-device`:
49 |     Device to use for depth estimation (`cpu` or `cuda`).
50 |   - `--depth-estimators`:
51 |     Depth estimator(s) to use. Use `depthanything_v2__indoor_large` to match DVEFormer.
52 |   - `--embedding-estimator-device`:
53 |     Device to use for embedding estimation (`cpu` or `cuda`).
54 |   - `--embedding-estimators`:
55 |     Embedding estimator(s) to use. Use `alpha_clip__l14-336-grit-20m` to match DVEFormer.
56 |   - `--cache-models`:
57 |     Cache models locally to avoid reloading them in future runs.
58 | 


--------------------------------------------------------------------------------
/tests/test_embedding_estimation.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | Simple (interface) tests for embedding estimation
 4 | 
 5 | .. codeauthor:: Soehnke Fischedick <soehnke-benedikt.fischedick@tu-ilmenau.de>
 6 | """
 7 | import os
 8 | import shutil
 9 | 
10 | import cv2
11 | import numpy as np
12 | import pytest
13 | 
14 | from nicr_scene_analysis_datasets.auxiliary_data.embedding_estimation import get_embedding_estimator_class
15 | from nicr_scene_analysis_datasets.utils.io import download_file
16 | 
17 | 
18 | EXAMPLE_IMAGE = 'https://dl.fbaipublicfiles.com/dinov2/images/example.jpg'
19 | 
20 | # true: always use the same default transformers path and, thus, speed up
21 | # consecutive test runs
22 | USE_DEFAULT_CACHE_PATH = True
23 | 
24 | 
25 | def _get_example_img(tmp_path):
26 |     fn = 'example.jpg'
27 |     fp = os.path.join(tmp_path, fn)
28 |     if not os.path.exists(fp):
29 |         if EXAMPLE_IMAGE.startswith('http'):
30 |             download_file(EXAMPLE_IMAGE, fp)
31 |         else:
32 |             shutil.copy(EXAMPLE_IMAGE, fp)
33 | 
34 |     img = cv2.imread(fp, cv2.IMREAD_UNCHANGED)
35 |     assert img is not None
36 |     assert img.ndim == 3
37 | 
38 |     return img
39 | 
40 | 
41 | @pytest.mark.parametrize(
42 |     'estimator__model', (
43 |         'alpha_clip__b16-grit-1m',
44 |         'alpha_clip__l14-grit-1m',
45 |         'alpha_clip__l14-336-grit-1m',
46 |         'alpha_clip__b16-grit-20m',
47 |         'alpha_clip__l14-grit-20m',
48 |         'alpha_clip__l14-336-grit-20m',
49 |         'alpha_clip__b16-combined',
50 |         'alpha_clip__l14-combined',
51 |     )
52 | )
53 | def test_embedding_estimator(estimator__model, tmp_path):
54 |     # Get example image
55 |     img = _get_example_img(tmp_path)
56 | 
57 |     # Initialize the embedding estimator
58 |     Estimator = get_embedding_estimator_class(estimator__model)
59 |     estimator = Estimator(
60 |         device='cpu',
61 |         auto_set_up=True,
62 |         cache_basepath=tmp_path if not USE_DEFAULT_CACHE_PATH else None,
63 |     )
64 | 
65 |     # Generate mask for the whole image, same size as input
66 |     mask = np.ones_like(img, dtype=np.uint8)
67 |     #  The mask should only have one channel
68 |     mask = mask[:, :, 0][:, :, None]
69 | 
70 |     # Get embeddings
71 |     embeddings = estimator.predict(img, mask)
72 | 
73 |     # Basic assertions
74 |     assert embeddings is not None
75 |     assert isinstance(embeddings, np.ndarray)
76 | 
77 |     # Embedding should have batch dimension and embedding dimension
78 |     assert embeddings.ndim == 2
79 | 
80 |     # We only have on input, so the batch dimension should be 1
81 |     assert embeddings.shape[0] == 1
82 | 


--------------------------------------------------------------------------------
/tests/test_cityscapes.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | Simple (interface) tests for Cityscapes dataset
 4 | 
 5 | .. codeauthor:: Daniel Seichter <daniel.seichter@tu-ilmenau.de>
 6 | """
 7 | import pytest
 8 | 
 9 | from nicr_scene_analysis_datasets import Cityscapes
10 | from nicr_scene_analysis_datasets.dataset_base import SampleIdentifier
11 | from nicr_scene_analysis_datasets.utils.testing import DATASET_PATH_DICT
12 | 
13 | N_SAMPLES = {'train': 2975, 'valid': 500, 'test': 1525}
14 | 
15 | 
16 | @pytest.mark.parametrize('split', ('train', 'valid', 'test'))
17 | @pytest.mark.parametrize('semantic_n_classes', (19, 33))
18 | @pytest.mark.parametrize('disparity_instead_of_depth', (False, True))
19 | def test_dataset(split,
20 |                  semantic_n_classes,
21 |                  disparity_instead_of_depth):
22 |     dataset = Cityscapes(
23 |         dataset_path=DATASET_PATH_DICT['cityscapes'],
24 |         split=split,
25 |         sample_keys=Cityscapes.get_available_sample_keys(split),
26 |         depth_mode='raw',
27 |         disparity_instead_of_depth=disparity_instead_of_depth,
28 |         semantic_n_classes=semantic_n_classes
29 |     )
30 | 
31 |     assert dataset.depth_mode == 'raw'
32 |     assert dataset.split == split
33 | 
34 |     assert len(dataset) == N_SAMPLES[split]
35 | 
36 |     assert dataset.semantic_n_classes == semantic_n_classes + 1
37 |     assert dataset.semantic_n_classes_without_void == semantic_n_classes
38 |     assert len(dataset.semantic_class_names) == dataset.semantic_n_classes
39 |     assert len(dataset.semantic_class_names_without_void) == dataset.semantic_n_classes_without_void
40 |     assert len(dataset.semantic_class_colors) == dataset.semantic_n_classes
41 |     assert len(dataset.semantic_class_names_without_void) == dataset.semantic_n_classes_without_void
42 | 
43 |     assert len(dataset.cameras) == 1
44 | 
45 |     assert isinstance(dataset.depth_min, float)
46 |     assert isinstance(dataset.depth_max, float)
47 |     assert isinstance(dataset.depth_mean, float)
48 |     assert isinstance(dataset.depth_std, float)
49 |     assert isinstance(dataset.depth_stats, dict)
50 | 
51 |     # test first 10 samples
52 |     for i, sample in enumerate(dataset):
53 |         assert isinstance(sample, dict)
54 |         assert isinstance(sample['identifier'], SampleIdentifier)
55 |         # inputs: rgb and depth
56 |         assert sample['rgb'].ndim == 3
57 |         assert sample['depth'].ndim == 2
58 | 
59 |         if 'test' != split:
60 |             # note that there are annotation files for test but they are empty
61 |             # semantic
62 |             assert sample['semantic'].ndim == 2
63 |             # instance
64 |             assert sample['instance'].ndim == 2
65 | 
66 |         if i >= 9:
67 |             break
68 | 


--------------------------------------------------------------------------------
/src/nicr_scene_analysis_datasets/datasets/cityscapes/cityscapes.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | .. codeauthor:: Daniel Seichter <daniel.seichter@tu-ilmenau.de>
 4 | .. codeauthor:: Soehnke Fischedick <soehnke-benedikt.fischedick@tu-ilmenau.de>
 5 | .. codeauthor:: Leonard Rabes <leonard.rabes@tu-ilmenau.de>
 6 | """
 7 | from cityscapesscripts.helpers.labels import labels
 8 | 
 9 | from ...dataset_base import DepthStats
10 | from ...dataset_base import SemanticLabel
11 | from ...dataset_base import SemanticLabelList
12 | 
13 | 
14 | class CityscapesMeta:
15 |     SPLITS = ('train', 'valid', 'test')
16 | 
17 |     _DATA_SAMPLE_KEYS = ('identifier', 'meta', 'rgb', 'depth')
18 |     _ANNOTATION_SAMPLE_KEYS = ('semantic', 'instance')
19 |     SPLIT_SAMPLE_KEYS = {
20 |         SPLITS[0]: _DATA_SAMPLE_KEYS+_ANNOTATION_SAMPLE_KEYS,
21 |         SPLITS[1]: _DATA_SAMPLE_KEYS+_ANNOTATION_SAMPLE_KEYS,
22 |         SPLITS[2]: _DATA_SAMPLE_KEYS,
23 |     }
24 | 
25 |     # calculated over the whole train split
26 |     # see: my_dataset.depth_compute_stats() for calculation
27 |     TRAIN_SPLIT_DEPTH_STATS = DepthStats(
28 |         min=3.7578125,
29 |         max=300.0,   # see _load_depth() in dataset.py
30 |         mean=31.715617493177906,
31 |         std=38.70280704877372,
32 |     )
33 |     TRAIN_SPLIT_DEPTH_STATS_DISPARITY = DepthStats(
34 |         min=1.0,
35 |         max=32257.0,
36 |         mean=9069.706336834102,
37 |         std=7178.335960071306
38 |     )
39 | 
40 |     DEPTH_MODES = ('raw',)
41 | 
42 |     CAMERAS = ('camera1',)    # just a dummy camera name
43 | 
44 |     # number of semantic classes without void/unlabeled and
45 |     # license plate (class 34)
46 |     SEMANTIC_N_CLASSES = (19, 33)
47 | 
48 |     SEMANTIC_LABEL_LIST_REDUCED = SemanticLabelList((
49 |         SemanticLabel('void', False, False, (0, 0, 0)),
50 |     ))
51 |     SEMANTIC_LABEL_LIST_FULL = SemanticLabelList((
52 |         SemanticLabel('void', False, False, (0, 0, 0)),
53 |     ))
54 | 
55 |     SEMANTIC_CLASS_MAPPING_REDUCED = {
56 |         c: labels[c].trainId+1 if not labels[c].ignoreInEval else 0
57 |         for c in range(1+33)
58 |     }
59 | 
60 |     for idx, label in enumerate(labels):
61 |         semantic_label = SemanticLabel(
62 |             class_name=label.name,
63 |             is_thing=label.hasInstances,
64 |             use_orientations=False,
65 |             color=label.color
66 |         )
67 | 
68 |         if not label.ignoreInEval:
69 |             SEMANTIC_LABEL_LIST_REDUCED.add_label(semantic_label)
70 |         # 1+33 classes (0: unlabeled), ignore license plate
71 |         if idx < 33:
72 |             SEMANTIC_LABEL_LIST_FULL.add_label(semantic_label)
73 | 
74 |     # DEPTH_DIR = 'depth'    # refined depth does not exist
75 |     DEPTH_RAW_DIR = 'depth_raw'
76 |     DISPARITY_RAW_DIR = 'disparity_raw'
77 |     RGB_DIR = 'rgb'
78 |     SEMANTIC_FULL_DIR = 'semantic_33'
79 |     SEMANTIC_REDUCED_DIR = 'semantic_19'
80 |     INSTANCE_DIR = 'instance'
81 | 


--------------------------------------------------------------------------------
/.vscode/launch.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "version": "0.2.0",
 3 |     "configurations": [
 4 |         {
 5 |             "name": "create hypersim ply",
 6 |             "type": "debugpy",
 7 |             "request": "launch",
 8 |             "module": "nicr_scene_analysis_datasets.scripts.create_labeled_point_clouds",
 9 |             "console": "integratedTerminal",
10 |             "args": [
11 |                 "hypersim",
12 |                 "/datasets_nas/nicr_scene_analysis_datasets/version_052/hypersim",
13 |                 "./test",
14 |                 "--split", "train",
15 |                 "--voxel-size", "0.05",
16 |                 "--max-depth", "20",
17 |                 "--write-scannet-label",
18 |             ],
19 |             "env": {},
20 |         },
21 |         {
22 |             "name": "create hypersim",
23 |             "type": "debugpy",
24 |             "request": "launch",
25 |             "module": "nicr_scene_analysis_datasets.datasets.hypersim.prepare_dataset",
26 |             "console": "integratedTerminal",
27 |             "args": [
28 |                 "/datasets_nas/nicr_scene_analysis_datasets/version_052/hypersim",
29 |                 "/datasets_nas/segmentation/hypersim/apple-hypersim",
30 |                 "--additional-subsamples", "2", "5", "10", "20",
31 |                 "--n-processes", "16",
32 |             ],
33 |             "env": {
34 |                 //"VERSION": "052"
35 |             },
36 |         },
37 |         {
38 |             "name": "create scennetrgbd",
39 |             "type": "debugpy",
40 |             "request": "launch",
41 |             "module": "nicr_scene_analysis_datasets.datasets.scenenetrgbd.prepare_dataset",
42 |             "console": "integratedTerminal",
43 |             "args": [
44 |                 "/datasets_nas/nicr_scene_analysis_datasets/version_test/scenenetrgbd",
45 |                 "/datasets_nas/segmentation/SceneNetRGBD",
46 |                 "--n-random-views-to-include-train", "3",
47 |                 "--n-random-views-to-include-valid", "6",
48 |                 "--force-at-least-n-classes-in-view", "4",
49 |             ],
50 |         },
51 |         {
52 |             "name": "create sunrgbd v060",
53 |             "type": "debugpy",
54 |             "request": "launch",
55 |             "module": "nicr_scene_analysis_datasets.datasets.sunrgbd.prepare_dataset",
56 |             "console": "integratedTerminal",
57 |             "args": [
58 |                 "/local/datasets/sunrgbd_test",
59 |                 "--toolbox-filepath", "/local/datasets/raw/sunrgbd/SUNRGBDtoolbox.zip",
60 |                 "--data-filepath", "/local/datasets/raw/sunrgbd/SUNRGBD.zip",
61 |                 "--box-filepath", "/local/datasets/raw/sunrgbd/SUNRGBDMeta3DBB_v2.mat",
62 |                 "--create-instances",
63 |                 "--copy-instances-from-nyuv2",
64 |                 "--nyuv2-path", "/datasets_nas/nicr_scene_analysis_datasets/version_060/nyuv2"
65 |             ],
66 |         },
67 |     ]
68 | }


--------------------------------------------------------------------------------
/tests/test_coco.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | Simple (interface) tests for COCO dataset
 4 | 
 5 | .. codeauthor:: Soehnke Fischedick <soehnke-benedikt.fischedick@tu-ilmenau.de>
 6 | .. codeauthor:: Daniel Seichter <daniel.seichter@tu-ilmenau.de>
 7 | """
 8 | import pytest
 9 | 
10 | from nicr_scene_analysis_datasets import COCO
11 | from nicr_scene_analysis_datasets.dataset_base import SampleIdentifier
12 | from nicr_scene_analysis_datasets.utils.testing import DATASET_PATH_DICT
13 | 
14 | N_SAMPLES = {'train': 118287, 'valid': 5000}
15 | N_CLASSES_WITH_VOID = 133 + 1
16 | N_CAMERAS = {'train': 2477, 'valid': 603}
17 | 
18 | 
19 | @pytest.mark.parametrize('split', ('train', 'valid'))
20 | def test_dataset(split):
21 |     dataset = COCO(
22 |         dataset_path=DATASET_PATH_DICT['coco'],
23 |         split=split,
24 |         sample_keys=COCO.get_available_sample_keys(split)
25 |     )
26 |     assert dataset.split == split
27 | 
28 |     assert len(dataset) == N_SAMPLES[split]
29 | 
30 |     assert dataset.semantic_n_classes == N_CLASSES_WITH_VOID
31 |     assert dataset.semantic_n_classes_without_void == N_CLASSES_WITH_VOID - 1
32 |     assert len(dataset.semantic_class_names) == dataset.semantic_n_classes
33 |     assert len(dataset.semantic_class_names_without_void) == dataset.semantic_n_classes_without_void
34 |     assert len(dataset.semantic_class_colors) == dataset.semantic_n_classes
35 |     assert len(dataset.semantic_class_colors_without_void) == dataset.semantic_n_classes_without_void
36 | 
37 |     # test first 10 samples sample
38 |     for i, sample in enumerate(dataset):
39 |         assert isinstance(sample, dict)
40 |         assert isinstance(sample['identifier'], SampleIdentifier)
41 |         # inputs: rgb
42 |         assert sample['rgb'].ndim == 3
43 |         # semantic
44 |         assert sample['semantic'].ndim == 2
45 |         # instance
46 |         assert sample['instance'].ndim == 2
47 | 
48 |         if i >= 9:
49 |             break
50 | 
51 |     # test camera filtering
52 |     assert len(dataset.cameras) == N_CAMERAS[split], len(dataset.cameras)
53 |     for camera in dataset.cameras[::10]:    # test only every 10th camera
54 |         with dataset.filter_camera(camera):
55 |             # get shape of first sample
56 |             h, w, _ = dataset[0]['rgb'].shape
57 | 
58 |             assert f'{w}x{h}' == camera
59 | 
60 | 
61 | @pytest.mark.parametrize('split', ('train', 'valid'))
62 | def test_filter_camera(split):
63 |     # just some random cameras and counts that we know
64 |     sample_cameras = {
65 |         'train': {'480x640': 8411, '426x640': 1660},
66 |         'valid': {'640x480': 1061, '480x640': 336, '500x335': 9}
67 |     }
68 | 
69 |     cameras = tuple(sample_cameras[split].keys())
70 |     n_samples = tuple(sample_cameras[split].values())
71 | 
72 |     # create dataset with specified cameras
73 |     dataset = COCO(
74 |         dataset_path=DATASET_PATH_DICT['coco'],
75 |         split=split,
76 |         sample_keys=COCO.get_available_sample_keys(split),
77 |         cameras=cameras
78 |     )
79 | 
80 |     assert dataset.cameras == cameras
81 |     assert len(dataset) == sum(n_samples)
82 | 
83 |     # test filtering
84 |     dataset.filter_camera(cameras[0])
85 |     assert dataset.camera == cameras[0]
86 |     assert len(dataset) == n_samples[0]
87 | 
88 |     # reset filtering
89 |     dataset.filter_camera(None)
90 |     assert dataset.camera is None
91 |     assert len(dataset) == sum(n_samples)
92 | 


--------------------------------------------------------------------------------
/tests/test_d2.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | Some common dataset tests for the d2 interface
 4 | 
 5 | .. codeauthor:: Soehnke Fischedick <soehnke-benedikt.fischedick@tu-ilmenau.de>
 6 | """
 7 | import pytest
 8 | 
 9 | from detectron2.data import DatasetCatalog
10 | from detectron2.data import MetadataCatalog
11 | 
12 | # The import registers the datasets to d2
13 | from nicr_scene_analysis_datasets import d2 as nicr_d2
14 | from nicr_scene_analysis_datasets import KNOWN_DATASETS
15 | from nicr_scene_analysis_datasets.utils.testing import DATASET_PATH_DICT
16 | 
17 | 
18 | @pytest.mark.parametrize('dataset_name', KNOWN_DATASETS)
19 | @pytest.mark.parametrize('dataset_split', ('test', 'valid', 'train'))
20 | def test_d2_dataset(dataset_name, dataset_split):
21 |     invalid_names = set({
22 |         'coco_test',
23 |         'nyuv2_valid',
24 |         'scenenetrgbd_test',
25 |         'sunrgbd_valid',
26 |         'ade20k_test_panoptic_2017',
27 |     })
28 |     # Get the path of the dataset
29 |     dataset_path = DATASET_PATH_DICT[dataset_name]
30 |     # Set the path for the dataset, so that d2 can use it
31 |     nicr_d2.set_dataset_path(dataset_path)
32 |     # Get the correct name for using the dataset from the DatasetCatalog
33 |     dataset_name_d2 = f'{dataset_name}_{dataset_split}'
34 |     # Get a corrcet split for ade20k dataset
35 |     if dataset_name == 'ade20k':
36 |         dataset_name_d2 = f'{dataset_name}_{dataset_split}_panoptic_2017'
37 | 
38 |     if dataset_name_d2 in invalid_names:
39 |         return
40 |     dataset = DatasetCatalog.get(dataset_name_d2)
41 |     assert MetadataCatalog.get(dataset_name_d2).dataset_config
42 | 
43 |     for i, sample in enumerate(dataset):
44 |         assert isinstance(sample, dict)
45 |         assert 'identifier' in sample
46 |         assert 'rgb' in sample
47 |         assert 'semantic' in sample or 'semantic' not in dataset.get_available_sample_keys(dataset_split)
48 |         assert 'instance' in sample or 'instance' not in dataset.get_available_sample_keys(dataset_split)
49 | 
50 |         if i >= 9:
51 |             break
52 | 
53 | 
54 | @pytest.mark.parametrize('dataset_name', KNOWN_DATASETS)
55 | def test_d2_helper_functions(dataset_name):
56 | 
57 |     class DummyMapper:
58 |         def __call__(self, data):
59 |             data['test'] = True
60 |             return data
61 | 
62 |     valid_datasets_for_test = set({
63 |         'nyuv2',
64 |         'hypersim',
65 |         'sunrgbd'
66 |     })
67 |     if dataset_name not in valid_datasets_for_test:
68 |         return
69 | 
70 |     # Get the path of the dataset
71 |     dataset_path = DATASET_PATH_DICT[dataset_name]
72 |     # Set the path for the dataset, so that d2 can use it
73 |     nicr_d2.set_dataset_path(dataset_path)
74 |     # Get the correct name for using the dataset from the DatasetCatalog
75 |     dataset_name_d2 = f'{dataset_name}_test'
76 |     dataset = DatasetCatalog.get(dataset_name_d2)
77 |     dataset_config = MetadataCatalog.get(dataset_name_d2).dataset_config
78 | 
79 |     data_mapper = nicr_d2.NICRSceneAnalysisDatasetMapper(dataset_config)
80 |     dummy_mapper = DummyMapper()
81 |     chained_mapper = nicr_d2.NICRChainedDatasetMapper(
82 |         [data_mapper, dummy_mapper]
83 |     )
84 | 
85 |     for i, data in enumerate(dataset):
86 |         mapped_data = chained_mapper(data)
87 |         assert 'test' in mapped_data
88 |         assert mapped_data['test']
89 |         if i >= 9:
90 |             break
91 | 


--------------------------------------------------------------------------------
/tests/test_depth_estimation.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | Simple (interface) tests for depth estimation
  4 | 
  5 | .. codeauthor:: Daniel Seichter <daniel.seichter@tu-ilmenau.de>
  6 | """
  7 | import os
  8 | import shutil
  9 | 
 10 | import cv2
 11 | import pytest
 12 | 
 13 | from nicr_scene_analysis_datasets.auxiliary_data.depth_estimation import get_depth_estimator_class
 14 | from nicr_scene_analysis_datasets.utils.io import download_file
 15 | from nicr_scene_analysis_datasets.scripts import viewer_depth
 16 | 
 17 | 
 18 | # EXAMPLE_IMAGE = 'http://images.cocodataset.org/val2017/000000039769.jpg'
 19 | EXAMPLE_IMAGE = 'https://dl.fbaipublicfiles.com/dinov2/images/example.jpg'
 20 | # EXAMPLE_IMAGE = '/local/dase6070/datasets/ade20k/tmp/ADEChallengeData2016/images/training/ADE_train_00006921.jpg'  # 2100x2100 image
 21 | SHOW_RESULTS = False
 22 | 
 23 | # true: always use the same default transformers path and, thus, speed up
 24 | # consecutive test runs
 25 | USE_DEFAULT_CACHE_PATH = True
 26 | 
 27 | 
 28 | def _get_example_img(tmp_path):
 29 |     fn = 'example.jpg'
 30 |     fp = os.path.join(tmp_path, fn)
 31 |     if not os.path.exists(fp):
 32 |         if EXAMPLE_IMAGE.startswith('http'):
 33 |             download_file(EXAMPLE_IMAGE, fp)
 34 |         else:
 35 |             shutil.copy(EXAMPLE_IMAGE, fp)
 36 | 
 37 |     img = cv2.imread(fp, cv2.IMREAD_UNCHANGED)
 38 |     assert img is not None
 39 |     assert img.ndim == 3
 40 | 
 41 |     return img
 42 | 
 43 | 
 44 | def _show_result(img, tmp_path):
 45 |     if not SHOW_RESULTS:
 46 |         return
 47 | 
 48 |     # dump file and show
 49 |     output_path = os.path.join(tmp_path, 'prediction')
 50 |     os.makedirs(output_path, exist_ok=True)
 51 |     cv2.imwrite(os.path.join(output_path, 'example.png'), img)
 52 |     args = [
 53 |         output_path,
 54 |         '--color-path', str(tmp_path),
 55 |         '--color-alpha', '0.9',
 56 |     ]
 57 |     viewer_depth.main(args)
 58 | 
 59 | 
 60 | @pytest.mark.parametrize(
 61 |     'estimator__model', (
 62 |         'depthanything_v2__indoor_small',
 63 |         'depthanything_v2__indoor_base',
 64 |         'depthanything_v2__indoor_large',
 65 |         'depthanything_v2__outdoor_small',
 66 |         'depthanything_v2__outdoor_base',
 67 |         'depthanything_v2__outdoor_large',
 68 |         'zoedepth__indoor',
 69 |         'zoedepth__outdoor',
 70 |         'zoedepth__indoor_outdoor',
 71 |         'dino_v2_dpt__indoor_small',
 72 |         'dino_v2_dpt__indoor_base',
 73 |         'dino_v2_dpt__indoor_large',
 74 |         'dino_v2_dpt__indoor_giant',
 75 |         'dino_v2_dpt__outdoor_small',
 76 |         'dino_v2_dpt__outdoor_base',
 77 |         'dino_v2_dpt__outdoor_large',
 78 |         'dino_v2_dpt__outdoor_giant',
 79 |     )
 80 | )
 81 | @pytest.mark.parametrize('max_pixels', (1920 * 1080, None))
 82 | def test_depth_estimator(estimator__model, max_pixels, tmp_path):
 83 |     # get image
 84 |     img = _get_example_img(tmp_path)
 85 | 
 86 |     # get model
 87 |     Estimator = get_depth_estimator_class(estimator__model)
 88 |     estimator = Estimator(
 89 |         device='cpu',
 90 |         max_pixels=max_pixels,
 91 |         auto_set_up=True,
 92 |         cache_basepath=tmp_path if not USE_DEFAULT_CACHE_PATH else None,
 93 |     )
 94 | 
 95 |     # predict
 96 |     depth = estimator.predict(img)
 97 | 
 98 |     # perform some basic tests
 99 |     assert depth.ndim == 2
100 |     assert depth.shape == img.shape[:2]
101 | 
102 |     # optional: show result
103 |     _show_result(depth, tmp_path)
104 | 


--------------------------------------------------------------------------------
/src/nicr_scene_analysis_datasets/datasets/scannet/README.md:
--------------------------------------------------------------------------------
 1 | # ScanNet dataset
 2 | 
 3 | ScanNet is an RGB-D video dataset containing 2.5 million views in more than 1500 scans, annotated with 3D camera poses, surface reconstructions, and instance-level semantic segmentations.
 4 | For more details, see: [ScanNet v2](http://www.scan-net.org/)
 5 | 
 6 | Note: 3D meshes and surface reconstructions are not included in the preparation of the dataset.
 7 | 
 8 | 
 9 | ## Prepare dataset
10 | 1. Download the Dataset:
11 | 
12 |     To be able to download the dataset fill out the [ScanNet Terms of Use](http://kaldir.vc.in.tum.de/scannet/ScanNet_TOS.pdf) and send it to them at scannet@googlegroups.com. Once your request is approved, you will receive a `download_scannet.py` script.
13 | 
14 |     Execute it with:
15 |     ```bash
16 |     # general usage
17 |     python download-scannet.py -o /path/where/to/download/ScanNet
18 |     ```
19 | 
20 | 2. Convert dataset:
21 | 
22 |     ```bash
23 |     # general usage (note that one process might use more than 3GB RAM)
24 |     nicr_sa_prepare_dataset scannet \
25 |         /path/where/to/download/ScanNet \
26 |         /path/where/to/convert/ScanNet \
27 |         [--n-processes N] \
28 |         [--subsample N0]
29 |         [--additional-subsamples N1 N2]
30 |         [--label-map-file /path/to/scannet-labels.combined.tsv]
31 |     ```
32 |       With arguments:
33 |     - `--n-processes`:
34 |       The number of worker processes to spawn.
35 |     - `--subsample`
36 |       The subsample that is exported to the output folder.
37 |     - `--additional_subsamples`:
38 |       Tor additional subsampled versions of the dataset.
39 |     - `--label-map-file`:
40 |       Path to scannet-labels.combined.tsv, if not specified assumed to be located
41 |       in source dir.
42 | 
43 | 
44 | 3. (Optional) Generate auxiliary data:
45 |   > **Note**: To use auxiliary data generation, the package must be installed with the `withauxiliarydata` option:
46 |   > ```bash
47 |   > pip install -e .[withauxiliarydata]
48 |   > ```
49 | 
50 |     ```bash
51 |     # for auxiliary data such as synthetic depth and rgb/panoptic embeddings
52 |     nicr_sa_generate_auxiliary_data \
53 |         --dataset scannet \
54 |         --dataset-path /path/to/already/prepared/ScanNet/dataset \
55 |         --auxiliary-data depth image-embedding panoptic-embedding \
56 |         --embedding-estimator-device cuda \
57 |         --embedding-estimators alpha_clip__l14-336-grit-20m \
58 |         --depth-estimator-device cuda \
59 |         --depth-estimators depthanything_v2__indoor_large \
60 |         --cache-models
61 |     ```
62 |     With arguments:
63 |     - `--dataset-path`:
64 |       Path to the prepared ScanNet dataset.
65 |     - `--auxiliary-data`:
66 |       Types of auxiliary data to generate:
67 |         - `depth`: Generates synthetic depth images from RGB.
68 |         - `image-embedding`: Uses Alpha-CLIP to generate an embedding for the entire image.
69 |         - `panoptic-embedding`: Uses Alpha-CLIP to generate an embedding for each panoptic mask.
70 |     - `--depth-estimator-device`:
71 |       Device to use for depth estimation (`cpu` or `cuda`).
72 |     - `--depth-estimators`:
73 |       Depth estimator(s) to use. Use `depthanything_v2__indoor_large` to match DVEFormer.
74 |     - `--embedding-estimator-device`:
75 |       Device to use for embedding estimation (`cpu` or `cuda`).
76 |     - `--embedding-estimators`:
77 |       Embedding estimator(s) to use. Use `alpha_clip__l14-336-grit-20m` to match DVEFormer.
78 |     - `--cache-models`:
79 |       Cache models locally to avoid reloading them in future runs.


--------------------------------------------------------------------------------
/src/nicr_scene_analysis_datasets/datasets/scenenetrgbd/README.md:
--------------------------------------------------------------------------------
 1 | # SceneNet RGB-D dataset
 2 | 
 3 | SceneNet RGB-D expands the previous work of SceneNet to enable large scale photorealistic rendering of indoor scene trajectories. It provides pixel-perfect ground truth for scene understanding problems such as semantic segmentation, instance segmentation, and object detection, and also for geometric computer vision problems such as optical flow, depth estimation, camera pose estimation, and 3D reconstruction. Random sampling permits virtually unlimited scene configurations, and here we provide a set of 5M rendered RGB-D images from over 15K trajectories in synthetic layouts with random but physically simulated object poses. Each layout also has random lighting, camera trajectories, and textures. The scale of this dataset is well suited for pre-training data-driven computer vision techniques from scratch with RGB-D inputs, which previously has been limited by relatively small labelled datasets in NYUv2 and SUN RGB-D. It also provides a basis for investigating 3D scene labelling tasks by providing perfect camera poses and depth data as proxy for a SLAM system.
 4 | 
 5 | For more details, see: [SceneNet RGB-D](https://robotvault.bitbucket.io/scenenet-rgbd.html) and [pySceneNetRGBD](https://github.com/jmccormac/pySceneNetRGBD).
 6 | 
 7 | ## Prepare dataset
 8 | 1. Download and untar dataset files:
 9 |     ```bash
10 |     # see: https://robotvault.bitbucket.io/scenenet-rgbd.html
11 | 
12 |     SCENENETRGBD_DOWNLOAD_DIR="/path/where/to/store/scenenetrgbd_dowloads"
13 | 
14 |     # train
15 |     wget https://www.doc.ic.ac.uk/~bjm113/scenenet_data/SceneNet-train.tar.gz -P ${SCENENETRGBD_DOWNLOAD_DIR}   # -> 263GB
16 |     wget https://www.doc.ic.ac.uk/~bjm113/scenenet_data/train_protobufs.tar.gz -P ${SCENENETRGBD_DOWNLOAD_DIR}    # -> 323MB
17 | 
18 |     # valid
19 |     wget http://www.doc.ic.ac.uk/~bjm113/scenenet_data/SceneNetRGBD-val.tar.gz -P ${SCENENETRGBD_DOWNLOAD_DIR}   # -> 15GB
20 |     wget http://www.doc.ic.ac.uk/~bjm113/scenenet_data/scenenet_rgbd_val.pb -P ${SCENENETRGBD_DOWNLOAD_DIR}   # -> 31MB
21 | 
22 |     # untar files
23 |     find ${SCENENETRGBD_DOWNLOAD_DIR} -name '*.tar.gz' -exec tar xfvz {} \;
24 | 
25 |     # move train protobuf files
26 |     mv ${SCENENETRGBD_DOWNLOAD_DIR}/train_protobufs/* ${SCENENETRGBD_DOWNLOAD_DIR}
27 |     rm -rf ${SCENENETRGBD_DOWNLOAD_DIR}/train_protobufs
28 |     ```
29 | 
30 | 2. Build protobuf python source file:
31 |     ```
32 |     protoc --python_out=./ scenenet.proto
33 |     ```
34 | 
35 | 3. Convert dataset:
36 |     ```bash
37 |     # general usage
38 | 
39 |     # full dataset:
40 |     # - train: 16x1000 + 1x865 trajectories with 300 views per trajectory -> 5,059,500 samples
41 |     # - valid: 1x1000 trajectories with 300 views per trajectory -> 300,000 samples
42 |     nicr_sa_prepare_dataset scenenetrgbd \
43 |         /path/where/to/store/scenenetrgbd \
44 |         ${SCENENETRGBD_DOWNLOAD_DIR}
45 | 
46 |     # subsampled dataset
47 |     # -> randomly pick 3 views from each trajectory for training
48 |     # -> randomly pick 6 views from each trajectory for validation
49 |     # -> pick only views with >= 4 different classes
50 |     # - train: 16x1000 + 1x865 trajectories with 3 views per trajectory -> 50,595 samples
51 |     # - valid: 1x1000 trajectories with 6 views per trajectory -> 6,000 samples
52 |     nicr_sa_prepare_dataset scenenetrgbd \
53 |         /path/where/to/store/scenenetrgbd \
54 |         ${SCENENETRGBD_DOWNLOAD_DIR} \
55 |         --n-random-views-to-include-train 3 \
56 |         --n-random-views-to-include-valid 6  \
57 |         --force-at-least-n-classes-in-view 4
58 |     ```
59 | 


--------------------------------------------------------------------------------
/src/nicr_scene_analysis_datasets/datasets/sunrgbd/nyu_additional_class_mapping.json:
--------------------------------------------------------------------------------
  1 | {
  2 |     "night_stand": 32,
  3 |     "tv_stand": 39,
  4 |     "vacuum_cleaner": 40,
  5 |     "coffee_table": 39,
  6 |     "piano_bench": 39,
  7 |     "garbage_bin": 39,
  8 |     "shower_curtain": 28,
  9 |     "paper_towel_dispenser": 40,
 10 |     "toilet_paper": 40,
 11 |     "water_dispenser": 40,
 12 |     "toaster_oven": 40,
 13 |     "fire_extinguisher": 40,
 14 |     "shopping_cart": 40,
 15 |     "tissue_box": 40,
 16 |     "wine_glass": 40,
 17 |     "door_knob": 40,
 18 |     "door_lock": 40,
 19 |     "display_case": 39,
 20 |     "plant_pot": 40,
 21 |     "bunk_bed": 39,
 22 |     "water_heater": 40,
 23 |     "air_conditioner": 38,
 24 |     "water_fountain": 38,
 25 |     "frying_pan": 40,
 26 |     "mouse_pad": 40,
 27 |     "pen_stand": 40,
 28 |     "flower_pot": 40,
 29 |     "washing_machine": 39,
 30 |     "projector_screen": 38,
 31 |     "lazy_susan": 40,
 32 |     "remote_control": 40,
 33 |     "shoe_rack": 40,
 34 |     "flower_box": 40,
 35 |     "hole_puncher": 40,
 36 |     "baby_chair": 39,
 37 |     "hair_brush": 40,
 38 |     "cordless_phone": 40,
 39 |     "bean_bag": 39,
 40 |     "paper_towel": 40,
 41 |     "fax_machine": 40,
 42 |     "plastic_box": 40,
 43 |     "hand_sanitizer": 40,
 44 |     "music_stand": 39,
 45 |     "dish_rack": 40,
 46 |     "ping_pong_table": 39,
 47 |     "pool_table": 39,
 48 |     "foosball_table": 39,
 49 |     "drying_rack": 39,
 50 |     "glass_container": 40,
 51 |     "paper_cutter": 40,
 52 |     "fire_alarm": 40,
 53 |     "plastic_rack": 40,
 54 |     "plastic_tub": 40,
 55 |     "toy_plane": 40,
 56 |     "display_board": 39,
 57 |     "flower_basket": 40,
 58 |     "toy_car": 40,
 59 |     "show_piece": 40,
 60 |     "dvd_player": 40,
 61 |     "tea_pot": 40,
 62 |     "plastic_bowl": 40,
 63 |     "toy_house": 40,
 64 |     "back_pack": 40,
 65 |     "stack_of_chairs": 39,
 66 |     "flower_vase": 40,
 67 |     "plants": 40,
 68 |     "towel_bar": 38,
 69 |     "suits_case": 40,
 70 |     "plastic_container": 40,
 71 |     "shoes": 40,
 72 |     "flowers": 40,
 73 |     "bed_sheet": 40,
 74 |     "dresser_mirror": 3,
 75 |     "sofa_chair": 5,
 76 |     "tv": 25,
 77 |     "shelf": 3,
 78 |     "endtable": 7,
 79 |     "fridge": 24,
 80 |     "recycle_bin": 39,
 81 |     "bathroom_vanity": 3,
 82 |     "painting": 11,
 83 |     "island": 38,
 84 |     "kitchen_counter": 12,
 85 |     "kitchen_cabinet": 3,
 86 |     "rack": 15,
 87 |     "cubby": 15,
 88 |     "cupboard": 15,
 89 |     "tripod": 40,
 90 |     "scanner": 40,
 91 |     "poster": 11,
 92 |     "information_board": 40,
 93 |     "dining_table": 7,
 94 |     "bulletin_board": 39,
 95 |     "coffee_maker": 40,
 96 |     "file_cabinet": 3,
 97 |     "decor": 40,
 98 |     "locker": 39,
 99 |     "hanging_cabinet": 3,
100 |     "kitchen": 39,
101 |     "portrait": 11,
102 |     "organizer": 40,
103 |     "switch": 38,
104 |     "mug": 40,
105 |     "cpu": 40,
106 |     "soap_dispenser": 40,
107 |     "thermos": 40,
108 |     "microwave_oven": 40,
109 |     "electric_fan": 40,
110 |     "paper_bag": 40,
111 |     "rice_cooker": 40,
112 |     "magazine_rack": 15,
113 |     "armoire": 15,
114 |     "podium": 39,
115 |     "grab_bar": 39,
116 |     "toilet_paper_dispenser": 40,
117 |     "urinal": 33,
118 |     "basin": 34,
119 |     "stuffed_toy": 40,
120 |     "cartoon": 40,
121 |     "plastic_bottle": 40,
122 |     "plastic_bag": 40,
123 |     "computer_keyboard": 40,
124 |     "water_bottle": 40,
125 |     "kettle": 40,
126 |     "desktop": 40,
127 |     "tissue_paper": 40,
128 |     "food_tray": 40,
129 |     "end_table": 7,
130 |     "battery": 40,
131 |     "helmet": 40,
132 |     "saucer_chair": 5,
133 |     "fume_hood": 38,
134 |     "water_jug": 40,
135 |     "frige": 24,
136 |     "packet": 40,
137 |     "child_chair": 5
138 | }


--------------------------------------------------------------------------------
/src/nicr_scene_analysis_datasets/datasets/sunrgbd/legacy_emsanet_version/nyu_additional_class_mapping.json:
--------------------------------------------------------------------------------
  1 | {
  2 |     "night_stand": 32,
  3 |     "tv_stand": 39,
  4 |     "vacuum_cleaner": 40,
  5 |     "coffee_table": 39,
  6 |     "piano_bench": 39,
  7 |     "garbage_bin": 39,
  8 |     "shower_curtain": 28,
  9 |     "paper_towel_dispenser": 40,
 10 |     "toilet_paper": 40,
 11 |     "water_dispenser": 40,
 12 |     "toaster_oven": 40,
 13 |     "fire_extinguisher": 40,
 14 |     "shopping_cart": 40,
 15 |     "tissue_box": 40,
 16 |     "wine_glass": 40,
 17 |     "door_knob": 40,
 18 |     "door_lock": 40,
 19 |     "display_case": 39,
 20 |     "plant_pot": 40,
 21 |     "bunk_bed": 39,
 22 |     "water_heater": 40,
 23 |     "air_conditioner": 38,
 24 |     "water_fountain": 38,
 25 |     "frying_pan": 40,
 26 |     "mouse_pad": 40,
 27 |     "pen_stand": 40,
 28 |     "flower_pot": 40,
 29 |     "washing_machine": 39,
 30 |     "projector_screen": 38,
 31 |     "lazy_susan": 40,
 32 |     "remote_control": 40,
 33 |     "shoe_rack": 40,
 34 |     "flower_box": 40,
 35 |     "hole_puncher": 40,
 36 |     "baby_chair": 39,
 37 |     "hair_brush": 40,
 38 |     "cordless_phone": 40,
 39 |     "bean_bag": 39,
 40 |     "paper_towel": 40,
 41 |     "fax_machine": 40,
 42 |     "plastic_box": 40,
 43 |     "hand_sanitizer": 40,
 44 |     "music_stand": 39,
 45 |     "dish_rack": 40,
 46 |     "ping_pong_table": 39,
 47 |     "pool_table": 39,
 48 |     "foosball_table": 39,
 49 |     "drying_rack": 39,
 50 |     "glass_container": 40,
 51 |     "paper_cutter": 40,
 52 |     "fire_alarm": 40,
 53 |     "plastic_rack": 40,
 54 |     "plastic_tub": 40,
 55 |     "toy_plane": 40,
 56 |     "display_board": 39,
 57 |     "flower_basket": 40,
 58 |     "toy_car": 40,
 59 |     "show_piece": 40,
 60 |     "dvd_player": 40,
 61 |     "tea_pot": 40,
 62 |     "plastic_bowl": 40,
 63 |     "toy_house": 40,
 64 |     "back_pack": 40,
 65 |     "stack_of_chairs": 39,
 66 |     "flower_vase": 40,
 67 |     "plants": 40,
 68 |     "towel_bar": 38,
 69 |     "suits_case": 40,
 70 |     "plastic_container": 40,
 71 |     "shoes": 40,
 72 |     "flowers": 40,
 73 |     "bed_sheet": 40,
 74 |     "dresser_mirror": 3,
 75 |     "sofa_chair": 5,
 76 |     "tv": 25,
 77 |     "shelf": 3,
 78 |     "endtable": 7,
 79 |     "fridge": 24,
 80 |     "recycle_bin": 39,
 81 |     "bathroom_vanity": 3,
 82 |     "painting": 11,
 83 |     "island": 38,
 84 |     "kitchen_counter": 12,
 85 |     "kitchen_cabinet": 3,
 86 |     "rack": 15,
 87 |     "cubby": 15,
 88 |     "cupboard": 15,
 89 |     "tripod": 40,
 90 |     "scanner": 40,
 91 |     "poster": 11,
 92 |     "information_board": 40,
 93 |     "dining_table": 7,
 94 |     "bulletin_board": 39,
 95 |     "coffee_maker": 40,
 96 |     "file_cabinet": 3,
 97 |     "decor": 40,
 98 |     "locker": 39,
 99 |     "hanging_cabinet": 3,
100 |     "kitchen": 39,
101 |     "portrait": 11,
102 |     "organizer": 40,
103 |     "switch": 38,
104 |     "mug": 40,
105 |     "cpu": 40,
106 |     "soap_dispenser": 40,
107 |     "thermos": 40,
108 |     "microwave_oven": 40,
109 |     "electric_fan": 40,
110 |     "paper_bag": 40,
111 |     "rice_cooker": 40,
112 |     "magazine_rack": 15,
113 |     "armoire": 15,
114 |     "podium": 39,
115 |     "grab_bar": 39,
116 |     "toilet_paper_dispenser": 40,
117 |     "urinal": 33,
118 |     "basin": 34,
119 |     "stuffed_toy": 40,
120 |     "cartoon": 40,
121 |     "plastic_bottle": 40,
122 |     "plastic_bag": 40,
123 |     "computer_keyboard": 40,
124 |     "water_bottle": 40,
125 |     "kettle": 40,
126 |     "desktop": 40,
127 |     "tissue_paper": 40,
128 |     "food_tray": 40,
129 |     "end_table": 7,
130 |     "battery": 40,
131 |     "helmet": 40,
132 |     "saucer_chair": 5,
133 |     "fume_hood": 38,
134 |     "water_jug": 40,
135 |     "frige": 24,
136 |     "packet": 40,
137 |     "child_chair": 5
138 | }


--------------------------------------------------------------------------------
/src/nicr_scene_analysis_datasets/datasets/sunrgbd/README.md:
--------------------------------------------------------------------------------
 1 | # SUNRGB-D Dataset
 2 | 
 3 | The SUNRGB-D dataset is comprised of images of four different cameras, i.e.,
 4 | Intel Realsense, Asus Xtion, and Microsoft Kinect v1 and v2.
 5 | It contains all images from NYUv2, manually selected images from Berkeley
 6 | B3DO and SUN3D as well as newly shot images.
 7 | 
 8 | It contains 10,335 densely labeled pairs of aligned RGB and depth images.
 9 | 
10 | For more details, see: [SUNRGB-D dataset](https://rgbd.cs.princeton.edu/)
11 | 
12 | We further extracted dense 2d instance annotations from annotated 3d boxes to
13 | enable panoptic segmentation on SUNRGB-D. Over time, we created two versions
14 | for additional instance annotations:
15 | - 'emsanet': this initial version was created for training EMSANet (efficient
16 |   panoptic segmentation) - see IJCNN 2022 paper - and was also used for
17 |   EMSAFormer (efficient panoptic segmentation) - see IJCNN 2023 paper
18 | - 'panopticndt': this revised version was created along with the work for
19 |   PanopticNDT (panoptic mapping) - see IROS 2023 paper, it refines large parts
20 |   of the instance extraction (see changelog for v0.6.0 of this package).
21 | 
22 | 
23 | ## Prepare dataset
24 | 1. Download and convert the dataset to the desired format:
25 | 
26 |   ```bash
27 |   # general usage (latest PanopticNDT version)
28 |   nicr_sa_prepare_dataset sunrgbd \
29 |       /path/where/to/store/sunrgbd \
30 |       --create-instances \
31 |       --copy-instances-from-nyuv2 \
32 |       --nyuv2-path /path/to/already/prepared/nyuv2/
33 | 
34 |   # general usage (EMSANet version - use this version to reproduce results
35 |   # reported in EMSANet or EMSAFormer paper)
36 |   nicr_sa_prepare_dataset sunrgbd \
37 |       /path/where/to/store/sunrgbd \
38 |       --create-instances \
39 |       --instances-version emsanet \
40 |       --copy-instances-from-nyuv2 \
41 |       --nyuv2-path /path/to/already/prepared/nyuv2/
42 |   ```
43 |   > Note: NYUv2 matching requires NYUv2 prepared first.
44 | 
45 |   With arguments:
46 |   - `--create-instances`:
47 |     Whether instances should be created by matching 3D boxes with point clouds.
48 |   - `--instances-version`:
49 |     Version of instance annotations to extract, see notes above.
50 |   - `--copy-instances-from-nyuv2`:
51 |     Whether instances and orientations should copied from (already prepared!)
52 |     NYUv2 dataset.
53 |   - `--nyuv2-path /path/to/datasets/nyuv2`:
54 |     Path to (already prepared!) NYUv2 dataset when using
55 |     `copy-instances-from-nyuv2`.
56 | 
57 | 2. (Optional) Generate auxiliary data
58 |   ```bash
59 |   # for auxiliary data such as synthetic depth and rgb/panoptic embeddings
60 |   nicr_sa_generate_auxiliary_data \
61 |       --dataset sunrgbd \
62 |       --dataset-path /path/to/already/prepared/sunrgbd/dataset \
63 |       --auxiliary-data depth image-embedding panoptic-embedding \
64 |       --embedding-estimator-device cuda \
65 |       --embedding-estimators alpha_clip__l14-336-grit-20m \
66 |       --depth-estimator-device cuda \
67 |       --depth-estimators depthanything_v2__indoor_large \
68 |       --cache-models
69 |   ```
70 | 
71 |   With arguments:
72 |   - `--dataset-path`:
73 |     Path to the prepared SUNRGB-D dataset.
74 |   - `--auxiliary-data`:
75 |     Types of auxiliary data to generate:
76 |       - `depth`: Generates synthetic depth images from RGB.
77 |       - `image-embedding`: Uses Alpha-CLIP to generate an embedding for the entire image.
78 |       - `panoptic-embedding`: Uses Alpha-CLIP to generate an embedding for each panoptic mask.
79 |   - `--depth-estimator-device`:
80 |     Device to use for depth estimation (`cpu` or `cuda`).
81 |   - `--depth-estimators`:
82 |     Depth estimator(s) to use. Use `depthanything_v2__indoor_large` to match DVEFormer.
83 |   - `--embedding-estimator-device`:
84 |     Device to use for embedding estimation (`cpu` or `cuda`).
85 |   - `--embedding-estimators`:
86 |     Embedding estimator(s) to use. Use `alpha_clip__l14-336-grit-20m` to match DVEFormer.
87 |   - `--cache-models`:
88 |     Cache models locally to avoid reloading them in future runs.
89 | 


--------------------------------------------------------------------------------
/src/nicr_scene_analysis_datasets/dataset_base/_annotation.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | .. codeauthor:: Soehnke Fischedick <soehnke-benedikt.fischedick@tu-ilmenau.de>
  4 | .. codeauthor:: Daniel Seichter <daniel.seichter@tu-ilmenau.de>
  5 | """
  6 | from dataclasses import dataclass
  7 | from typing import Tuple, Union
  8 | 
  9 | import numpy as np
 10 | 
 11 | 
 12 | class ExtrinsicCameraParametersNormalized(dict):
 13 |     """Enables a custom pytorch collate function ignore this dict."""
 14 |     pass
 15 | 
 16 | 
 17 | class IntrinsicCameraParametersNormalized(dict):
 18 |     """Enables a custom pytorch collate function ignore this dict."""
 19 |     pass
 20 | 
 21 | 
 22 | class MetaDict(dict):
 23 |     """Enables a custom pytorch collate function ignore the meta data."""
 24 |     pass
 25 | 
 26 | 
 27 | class OrientationDict(dict):
 28 |     """Enables a custom pytorch collate function ignore the orientations."""
 29 |     pass
 30 | 
 31 | 
 32 | class PanopticEmbeddingDict(dict):
 33 |     """Enables a custom pytorch collate function ignore the embeddings."""
 34 |     pass
 35 | 
 36 | 
 37 | class SampleIdentifier(tuple):
 38 |     """Enables a custom pytorch collate function ignore the identifier."""
 39 |     pass
 40 | 
 41 | 
 42 | @dataclass(frozen=True)
 43 | class _LabelBase:
 44 |     class_name: str
 45 | 
 46 | 
 47 | @dataclass(frozen=True)
 48 | class SemanticLabel(_LabelBase):
 49 |     is_thing: Union[bool, None]
 50 |     use_orientations: Union[bool, None]
 51 |     color: Tuple[int]
 52 | 
 53 | 
 54 | @dataclass(frozen=True)
 55 | class SceneLabel(_LabelBase):
 56 |     # maybe add color for scene labels
 57 |     pass
 58 | 
 59 | 
 60 | class _LabelListBase:
 61 |     def __init__(
 62 |         self,
 63 |         label_list: Tuple[_LabelBase] = ()
 64 |     ) -> None:
 65 |         self.label_list = list(label_list)
 66 |         # a copy of a the class names list for faster name to idx lookup
 67 |         self._class_names = ()
 68 |         self._update_internal_lists()
 69 |         # for iterator
 70 |         self._idx = 0
 71 | 
 72 |     def __len__(self):
 73 |         return len(self.label_list)
 74 | 
 75 |     def __getitem__(self, idx):
 76 |         return self.label_list[idx]
 77 | 
 78 |     def __iter__(self):
 79 |         return self
 80 | 
 81 |     def __next__(self):
 82 |         try:
 83 |             el = self[self._idx]
 84 |             self._idx += 1
 85 |             return el
 86 |         except IndexError:
 87 |             self._idx = 0
 88 |             raise StopIteration     # done iterating
 89 | 
 90 |     def add_label(self, label: _LabelBase):
 91 |         self.label_list.append(label)
 92 |         self._update_internal_lists()
 93 | 
 94 |     def _update_internal_lists(self):
 95 |         self._class_names = tuple(item.class_name for item in self.label_list)
 96 | 
 97 |     def _name_to_idx(self, name: str) -> int:
 98 |         return self._class_names.index(name)
 99 | 
100 |     def index(self, value: Union[_LabelBase, str]) -> int:
101 |         if isinstance(value, _LabelBase):
102 |             return self.label_list.index(value)
103 |         else:
104 |             return self._name_to_idx(value)
105 | 
106 |     def __contains__(self, value: Union[_LabelBase, str]) -> bool:
107 |         if isinstance(value, _LabelBase):
108 |             return value in self.label_list
109 |         else:
110 |             return value in self._class_names
111 | 
112 |     @property
113 |     def class_names(self) -> Tuple[str]:
114 |         return self._class_names
115 | 
116 | 
117 | class SemanticLabelList(_LabelListBase):
118 |     @property
119 |     def colors(self) -> Tuple[Tuple[int]]:
120 |         return tuple(item.color for item in self.label_list)
121 | 
122 |     @property
123 |     def colors_array(self) -> np.ndarray:
124 |         return np.array(self.colors, dtype=np.uint8)
125 | 
126 |     @property
127 |     def classes_is_thing(self) -> Tuple[bool]:
128 |         return tuple(item.is_thing for item in self.label_list)
129 | 
130 |     @property
131 |     def classes_use_orientations(self) -> Tuple[bool]:
132 |         return [item.use_orientations for item in self.label_list]
133 | 
134 | 
135 | class SceneLabelList(_LabelListBase):
136 |     pass
137 | 


--------------------------------------------------------------------------------
/tests/test_ade20k.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | Simple (interface) tests for ADE20k dataset
  4 | 
  5 | .. codeauthor:: Soehnke Fischedick <soehnke-benedikt.fischedick@tu-ilmenau.de>
  6 | """
  7 | 
  8 | import numpy as np
  9 | import pytest
 10 | 
 11 | from nicr_scene_analysis_datasets import ADE20K
 12 | from nicr_scene_analysis_datasets.dataset_base import SampleIdentifier
 13 | from nicr_scene_analysis_datasets.utils.testing import DATASET_PATH_DICT
 14 | 
 15 | # Constants based on ADE20K dataset details
 16 | SPLITS = ADE20K.SPLITS
 17 | # SEMANTIC_N_CLASSES = (150, 3688)
 18 | CAMERAS = ('683x512', '674x512')
 19 | 
 20 | 
 21 | @pytest.mark.parametrize('split', SPLITS)
 22 | def test_dataset_initialization(split):
 23 |     # Initialize dataset
 24 |     dataset = ADE20K(
 25 |         dataset_path=DATASET_PATH_DICT['ade20k'],
 26 |         split=split,
 27 |         sample_keys=ADE20K.get_available_sample_keys(split),
 28 |     )
 29 | 
 30 |     # Check basic properties
 31 |     assert dataset.split == split
 32 |     # +1 because of void class
 33 |     assert dataset.semantic_n_classes == 151
 34 |     assert len(dataset.semantic_class_names) == dataset.semantic_n_classes
 35 |     assert len(dataset.semantic_class_colors) == dataset.semantic_n_classes
 36 | 
 37 |     # Check config based on semantic_n_classes
 38 |     assert dataset.config.semantic_label_list == ADE20K.SEMANTIC_LABEL_LIST_CHALLENGE_150
 39 | 
 40 |     # Check sample keys
 41 |     available_keys = ADE20K.get_available_sample_keys(split)
 42 |     for key in available_keys:
 43 |         assert key in dataset.sample_keys
 44 | 
 45 |     # Test loading first few samples
 46 |     for i in range(min(10, len(dataset))):
 47 |         sample = dataset[i]
 48 |         assert isinstance(sample, dict)
 49 |         assert isinstance(sample['identifier'], SampleIdentifier)
 50 | 
 51 |         if 'rgb' in sample:
 52 |             assert sample['rgb'].ndim == 3
 53 |             assert sample['rgb'].shape[2] == 3  # RGB channels
 54 | 
 55 |         if 'semantic' in sample:
 56 |             assert sample['semantic'].ndim == 2
 57 |             assert sample['semantic'].dtype == np.uint8
 58 | 
 59 |         if 'instance' in sample:
 60 |             assert sample['instance'].ndim == 2
 61 |             assert sample['instance'].dtype == np.uint16
 62 | 
 63 |         if 'scene' in sample:
 64 |             assert isinstance(sample['scene'], int)
 65 |             # +1 because of void class
 66 |             assert 0 <= sample['scene'] < len(dataset.scene_class_names) + 1
 67 | 
 68 | 
 69 | @pytest.mark.parametrize('split', SPLITS)
 70 | def test_scene_class_loading(split):
 71 |     if 'scene' not in ADE20K.get_available_sample_keys(split):
 72 |         pytest.skip(f"Split {split} does not contain scene labels")
 73 | 
 74 |     dataset = ADE20K(
 75 |         dataset_path=DATASET_PATH_DICT['ade20k'],
 76 |         split=split,
 77 |         sample_keys=('scene',)
 78 |     )
 79 | 
 80 |     for i in range(min(10, len(dataset))):
 81 |         scene_class = dataset[i]['scene']
 82 |         assert isinstance(scene_class, int)
 83 |         assert 0 <= scene_class < len(dataset.scene_class_names)
 84 | 
 85 | 
 86 | @pytest.mark.parametrize('split', SPLITS)
 87 | def test_camera_filtering(split):
 88 |     # Test with first camera (assuming multiple exist)
 89 |     test_cameras = (CAMERAS[0],)
 90 |     dataset = ADE20K(
 91 |         dataset_path=DATASET_PATH_DICT['ade20k'],
 92 |         split=split,
 93 |         cameras=test_cameras,
 94 |         sample_keys=ADE20K.get_available_sample_keys(split)
 95 |     )
 96 | 
 97 |     # Check all samples are from specified cameras
 98 |     for i in range(min(10, len(dataset))):
 99 |         identifier = dataset[i]['identifier']
100 |         assert identifier[0] in test_cameras
101 | 
102 |     # Test filtering after initialization
103 |     if len(dataset.cameras) > 1:
104 |         dataset.filter_camera(CAMERAS[1])
105 |         assert len(dataset) <= len(dataset._filenames_per_camera[CAMERAS[1]])
106 |         for i in range(min(10, len(dataset))):
107 |             assert dataset[i]['identifier'][0] == CAMERAS[1]
108 | 
109 | 
110 | def test_debug_mode():
111 |     # Test dataset without dataset_path
112 |     dataset = ADE20K(dataset_path=None)
113 |     assert len(dataset) == 0
114 |     assert dataset.cameras == ADE20K.CAMERAS  # Single dummy camera
115 | 


--------------------------------------------------------------------------------
/src/nicr_scene_analysis_datasets/pytorch.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | .. codeauthor:: Daniel Seichter <daniel.seichter@tu-ilmenau.de>
  4 | """
  5 | from typing import Callable, Type, Union
  6 | 
  7 | from torch.utils.data import Dataset
  8 | 
  9 | from .auxiliary_data import wrap_dataset_with_auxiliary_data
 10 | from .dataset_base import KNOWN_CLASS_WEIGHTINGS  # noqa: F401
 11 | from .dataset_base._base_dataset import DatasetBase
 12 | 
 13 | 
 14 | class _PytorchDatasetWrapper(DatasetBase, Dataset):
 15 |     def __init__(self, *args, **kwargs) -> None:
 16 |         super().__init__(*args, **kwargs)
 17 |         self._preprocessor = None
 18 | 
 19 |     @property
 20 |     def transform(self) -> Union[Callable, None]:
 21 |         # just to be compatible with VisionDataset from torchvision
 22 |         return self.preprocessor
 23 | 
 24 |     @transform.setter
 25 |     def transform(self, value: Union[Callable, None]):
 26 |         # just to be compatible with VisionDataset from torchvision
 27 |         self.preprocessor = value
 28 | 
 29 |     @property
 30 |     def preprocessor(self) -> Union[Callable, None]:
 31 |         return self._preprocessor
 32 | 
 33 |     @preprocessor.setter
 34 |     def preprocessor(self, value: Union[Callable, None]):
 35 |         self._preprocessor = value
 36 | 
 37 |     def __getitem__(self, idx):
 38 |         sample = super().__getitem__(idx)
 39 | 
 40 |         # apply preprocessing
 41 |         if self._preprocessor is not None:
 42 |             sample = self._preprocessor(sample)
 43 | 
 44 |         return sample
 45 | 
 46 | 
 47 | from . import ADE20K as _ADE20K
 48 | from . import COCO as _COCO
 49 | from . import Cityscapes as _Cityscapes
 50 | from . import Hypersim as _Hypersim
 51 | from . import NYUv2 as _NYUv2
 52 | from . import ScanNet as _ScanNet
 53 | from . import SceneNetRGBD as _SceneNetRGBD
 54 | from . import SUNRGBD as _SUNRGBD
 55 | 
 56 | 
 57 | class Cityscapes(_Cityscapes, _PytorchDatasetWrapper):
 58 |     pass
 59 | 
 60 | 
 61 | class COCO(_COCO, _PytorchDatasetWrapper):
 62 |     pass
 63 | 
 64 | 
 65 | class Hypersim(_Hypersim, _PytorchDatasetWrapper):
 66 |     pass
 67 | 
 68 | 
 69 | class NYUv2(_NYUv2, _PytorchDatasetWrapper):
 70 |     pass
 71 | 
 72 | 
 73 | class ScanNet(_ScanNet, _PytorchDatasetWrapper):
 74 |     pass
 75 | 
 76 | 
 77 | class SceneNetRGBD(_SceneNetRGBD, _PytorchDatasetWrapper):
 78 |     pass
 79 | 
 80 | 
 81 | class SUNRGBD(_SUNRGBD, _PytorchDatasetWrapper):
 82 |     pass
 83 | 
 84 | 
 85 | class ADE20K(_ADE20K, _PytorchDatasetWrapper):
 86 |     pass
 87 | 
 88 | 
 89 | from .dataset_base import ConcatDataset as _ConcatDataset
 90 | 
 91 | 
 92 | class _PytorchConcatDatasetWrapper:
 93 |     def __init__(self, *args, **kwargs) -> None:
 94 |         super().__init__(*args, **kwargs)
 95 |         self._preprocessor = None
 96 | 
 97 |     @property
 98 |     def transform(self) -> Union[Callable, None]:
 99 |         # just to be compatible with VisionDataset from torchvision
100 |         return self.preprocessor
101 | 
102 |     @transform.setter
103 |     def transform(self, value: Union[Callable, None]):
104 |         # just to be compatible with VisionDataset from torchvision
105 |         self.preprocessor = value
106 | 
107 |     @property
108 |     def preprocessor(self) -> Union[Callable, None]:
109 |         return self._preprocessor
110 | 
111 |     @preprocessor.setter
112 |     def preprocessor(self, value: Union[Callable, None]):
113 |         self._preprocessor = value
114 |         # apply preprocessor to all datasets
115 |         for ds in self._datasets:
116 |             ds.preprocessor = value
117 | 
118 | 
119 | class ConcatDataset(_ConcatDataset, _PytorchConcatDatasetWrapper):
120 |     pass
121 | 
122 | 
123 | _DATASETS = {
124 |     'ade20k': ADE20K,
125 |     'cityscapes': Cityscapes,
126 |     'coco': COCO,
127 |     'hypersim': Hypersim,
128 |     'nyuv2': NYUv2,
129 |     'scannet': ScanNet,
130 |     'scenenetrgbd': SceneNetRGBD,
131 |     'sunrgbd': SUNRGBD,
132 | }
133 | KNOWN_DATASETS = tuple(_DATASETS.keys())
134 | 
135 | DatasetType = Union[
136 |     ADE20K,
137 |     Cityscapes,
138 |     COCO,
139 |     Hypersim,
140 |     NYUv2,
141 |     ScanNet,
142 |     SceneNetRGBD,
143 |     SUNRGBD,
144 |     ConcatDataset
145 | ]
146 | 
147 | 
148 | def get_dataset_class(name: str, with_auxiliary_data: bool = False) -> Type[DatasetType]:
149 |     name = name.lower()
150 |     if name not in KNOWN_DATASETS:
151 |         raise ValueError(f"Unknown dataset: '{name}'")
152 | 
153 |     original_dataset = _DATASETS[name]
154 |     if with_auxiliary_data:
155 |         current_dataset = wrap_dataset_with_auxiliary_data(original_dataset)
156 |     else:
157 |         current_dataset = original_dataset
158 | 
159 |     return current_dataset
160 | 


--------------------------------------------------------------------------------
/src/nicr_scene_analysis_datasets/datasets/scenenetrgbd/scenenetrgbd.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | .. codeauthor:: Daniel Seichter <daniel.seichter@tu-ilmenau.de>
  4 | .. codeauthor:: Söhnke Fischedick <soehnke-benedikt.fischedick@tu-ilmenau.de>
  5 | """
  6 | from ...dataset_base import DepthStats
  7 | from ...dataset_base import SceneLabel
  8 | from ...dataset_base import SceneLabelList
  9 | from ...dataset_base import SemanticLabel
 10 | from ...dataset_base import SemanticLabelList
 11 | 
 12 | 
 13 | class SceneNetRGBDMeta:
 14 |     SPLITS = ('train', 'valid')
 15 |     SPLIT_FILELIST_FILENAMES = {SPLITS[0]: 'train.txt', SPLITS[1]: 'valid.txt'}
 16 | 
 17 |     _DATA_SAMPLE_KEYS = ('identifier', 'meta', 'rgb', 'depth')
 18 |     _ANNOTATION_SAMPLE_KEYS = ('semantic', 'instance', 'scene')
 19 |     SPLIT_SAMPLE_KEYS = {
 20 |         SPLITS[0]: _DATA_SAMPLE_KEYS+_ANNOTATION_SAMPLE_KEYS,
 21 |         SPLITS[1]: _DATA_SAMPLE_KEYS+_ANNOTATION_SAMPLE_KEYS,
 22 |     }
 23 | 
 24 |     # calculated using a subsampled dataset (see prepare_dataset.py):
 25 |     # --n_random_views_to_include_train 3
 26 |     # --n_random_views_to_include_valid 6
 27 |     # --force_at_least_n_classes_in_view 4
 28 |     # see: my_dataset.depth_compute_stats() for calculation
 29 |     TRAIN_SPLIT_DEPTH_STATS = DepthStats(
 30 |         min=0.0,
 31 |         max=20076.0,
 32 |         mean=4006.9281155769777,
 33 |         std=2459.7763971709933,
 34 |     )
 35 | 
 36 |     DEPTH_MODES = ('refined',)
 37 | 
 38 |     CAMERAS = ('camera1',)     # just a dummy camera name
 39 | 
 40 |     DEPTH_DIR = 'depth'
 41 |     RGB_DIR = 'rgb'
 42 |     SEMANTIC_13_DIR = 'semantic_13'
 43 |     INSTANCES_DIR = 'instance'
 44 |     SCENE_CLASS_DIR = 'scene'
 45 | 
 46 |     # number of classes without void (NYUv2 classes)
 47 |     SEMANTIC_N_CLASSES = 13
 48 |     # there are no orientations, thus, it is set to None
 49 |     SEMANTIC_LABEL_LIST = SemanticLabelList((
 50 |         # class_name, is_thing, use orientations, color
 51 |         SemanticLabel('void',      False, None, (0,   0,   0)),
 52 |         SemanticLabel('bed',       True,  None, (0,   0,   255)),
 53 |         SemanticLabel('books',     True,  None, (232, 88,  47)),
 54 |         SemanticLabel('ceiling',   False, None, (0,   217, 0)),
 55 |         SemanticLabel('chair',     True,  None, (148, 0,   240)),
 56 |         SemanticLabel('floor',     False, None, (222, 241, 23)),
 57 |         SemanticLabel('furniture', True,  None, (255, 205, 205)),
 58 |         SemanticLabel('objects',   True,  None, (0,   223, 228)),
 59 |         SemanticLabel('picture',   True,  None, (106, 135, 204)),
 60 |         SemanticLabel('sofa',      True,  None, (116, 28,  41)),
 61 |         SemanticLabel('table',     True,  None, (240, 35,  235)),
 62 |         SemanticLabel('tv',        True,  None, (0,   166, 156)),
 63 |         SemanticLabel('wall',      False, None, (249, 139, 0)),
 64 |         SemanticLabel('window',    True,  None, (225, 228, 194)),
 65 |     ))
 66 | 
 67 |     # original scene labels
 68 |     SCENE_LABEL_LIST = SceneLabelList((
 69 |         SceneLabel('bathroom'),
 70 |         SceneLabel('bedroom'),
 71 |         SceneLabel('kitchen'),
 72 |         SceneLabel('living_room'),
 73 |         SceneLabel('office')
 74 |     ))
 75 | 
 76 |     # scene labels for indoor domestic environments
 77 |     # mapping dict with new labels as keys and tuple of old labels as values
 78 |     SCENE_LABEL_MAPPING_INDOOR_DOMESTIC = {
 79 |         SceneLabel('void'): (
 80 |         ),
 81 |         SceneLabel('bathroom'): (
 82 |             SceneLabel('bathroom'),
 83 |         ),
 84 |         SceneLabel('bedroom'): (
 85 |             SceneLabel('bedroom'),
 86 |         ),
 87 |         SceneLabel('dining room'): (
 88 |         ),
 89 |         SceneLabel('discussion room'): (
 90 |         ),
 91 |         SceneLabel('hallway'): (
 92 |         ),
 93 |         SceneLabel('kitchen'): (
 94 |             SceneLabel('kitchen'),
 95 |         ),
 96 |         SceneLabel('living room'): (
 97 |             SceneLabel('living_room'),
 98 |         ),
 99 |         SceneLabel('office'): (
100 |             SceneLabel('office'),
101 |         ),
102 |         SceneLabel('other indoor'): (
103 |         ),
104 |         SceneLabel('stairs'): (
105 |         )
106 |     }
107 | 
108 |     SCENE_LABEL_LIST_INDOOR_DOMESTIC = SceneLabelList(
109 |         tuple(SCENE_LABEL_MAPPING_INDOOR_DOMESTIC.keys())
110 |     )
111 |     # create index mapping
112 |     SCENE_LABEL_IDX_TO_SCENE_LABEL_INDOOR_DOMESTIC_IDX = {}
113 |     for new_label, old_labels in SCENE_LABEL_MAPPING_INDOOR_DOMESTIC.items():
114 |         for old_label in old_labels:
115 |             old_idx = SCENE_LABEL_LIST.index(old_label)
116 |             new_idx = SCENE_LABEL_LIST_INDOOR_DOMESTIC.index(new_label)
117 |             SCENE_LABEL_IDX_TO_SCENE_LABEL_INDOOR_DOMESTIC_IDX[old_idx] = new_idx
118 | 


--------------------------------------------------------------------------------
/src/nicr_scene_analysis_datasets/auxiliary_data/_base.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | .. codeauthor:: Daniel Seichter <daniel.seichter@tu-ilmenau.de>
  4 | .. codeauthor:: Soehnke Fischedick <soehnke-benedikt.fischedick@tu-ilmenau.de>
  5 | """
  6 | from typing import Optional, Tuple, Union
  7 | 
  8 | import abc
  9 | import os
 10 | 
 11 | import numpy as np
 12 | import torch
 13 | 
 14 | DEFAULT_CACHE_BASEPATH = os.getenv(
 15 |     "DEFAULT_CACHE_BASEPATH",
 16 |     "~/.cache/nicr_scene_analysis_datasets/auxiliary_data"
 17 | )
 18 | 
 19 | 
 20 | class AuxiliaryDataEstimatorBase(abc.ABC):
 21 |     NAME: str
 22 | 
 23 |     def __init__(
 24 |         self,
 25 |         device: Union[str, torch.device] = 'cpu',
 26 |         max_pixels: Optional[int] = None,
 27 |         input_interpolation: str = 'bilinear',
 28 |         auto_set_up: bool = True,
 29 |         cache_basepath: Optional[str] = None,  # None -> DEFAULT_CACHE_BASEPATH
 30 |     ) -> None:
 31 |         self._device = device
 32 |         self._max_pixels = max_pixels
 33 |         self._input_interpolation = input_interpolation
 34 | 
 35 |         # cache path
 36 |         self._cache_basepath = cache_basepath
 37 |         if self._cache_basepath is None:
 38 |             self._cache_basepath = os.path.expanduser(DEFAULT_CACHE_BASEPATH)
 39 | 
 40 |         self._cache_path = os.path.join(self._cache_basepath, self.NAME)
 41 |         os.makedirs(self._cache_path, exist_ok=True)
 42 | 
 43 |         if auto_set_up:
 44 |             self.set_up_estimator(self._device)
 45 | 
 46 |     @property
 47 |     def cache_path(self) -> str:
 48 |         return self._cache_path
 49 | 
 50 |     @abc.abstractmethod
 51 |     def set_up_estimator(
 52 |         self,
 53 |         device: Union[str, torch.device] = 'cpu'
 54 |     ) -> None:
 55 |         pass
 56 | 
 57 |     @staticmethod
 58 |     def _get_height_width(
 59 |         img: Union[torch.Tensor, np.ndarray]
 60 |     ) -> Tuple[int, int]:
 61 |         if 2 == img.ndim:
 62 |             # assume single channel: (H, W)
 63 |             return img.shape[0], img.shape[1]
 64 |         elif 3 == img.ndim:
 65 |             if isinstance(img, np.ndarray):
 66 |                 # assume channels last: (H, W, C)
 67 |                 return img.shape[0], img.shape[1]
 68 |             else:
 69 |                 # assume channels first: (C, H, W)
 70 |                 return img.shape[1], img.shape[2]
 71 |         elif 4 == img.ndim:
 72 |             # assume channels first with batch axis (B, C, H, W)
 73 |             return img.shape[2], img.shape[3]
 74 | 
 75 |     @staticmethod
 76 |     def _resize_image(
 77 |         img: torch.Tensor,
 78 |         height: int,
 79 |         width: int,
 80 |         mode: str = 'nearest'
 81 |     ) -> torch.Tensor:
 82 | 
 83 |         if AuxiliaryDataEstimatorBase._get_height_width(img) == (height, width):
 84 |             # nothing to do
 85 |             return img
 86 | 
 87 |         # resize
 88 |         return torch.nn.functional.interpolate(
 89 |             img, size=(height, width), mode=mode
 90 |         )
 91 | 
 92 |     def prepare_input(
 93 |         self,
 94 |         image: Union[torch.Tensor, np.ndarray],
 95 |     ) -> torch.Tensor:
 96 |         # check input
 97 |         assert image.ndim in (3, 4)
 98 | 
 99 |         # store input type and original shape for later postprocessing
100 |         is_numpy = isinstance(image, np.ndarray)
101 |         h, w = self._get_height_width(image)
102 | 
103 |         # ensure torch tensor with channels first
104 |         if is_numpy:
105 |             # assume image is channels last, i.e., (H, W, C)
106 |             assert image.ndim == 3 and \
107 |                 (image.shape[-1] == 3 or image.shape[-1] == 1)
108 |             image = torch.from_numpy(image)
109 |             image = image.permute(2, 0, 1)   # (H, W, C) -> (C, H, W)
110 | 
111 |         # ensure (B, C, H, W)
112 |         if 3 == image.ndim:
113 |             image = image[None, ...]
114 | 
115 |         if self._max_pixels is not None:
116 |             # resize image to have at most max_pixels while keeping the aspect
117 |             # ratio
118 |             n_pixels = h * w
119 |             if n_pixels > self._max_pixels:
120 |                 image = self._resize_image(
121 |                     image,
122 |                     height=int(np.round(h * np.sqrt(self._max_pixels)/w)),
123 |                     width=int(np.round(w * np.sqrt(self._max_pixels)/h)),
124 |                     mode=self._input_interpolation
125 |                 )
126 | 
127 |         return image
128 | 
129 |     @abc.abstractmethod
130 |     def predict(
131 |         self,
132 |         rgb_img: Union[torch.Tensor, np.ndarray],
133 |     ) -> Union[torch.Tensor, np.ndarray]:
134 |         pass
135 | 
136 |     @abc.abstractmethod
137 |     def _estimator_predict(self, rgb_image: torch.Tensor) -> torch.Tensor:
138 |         """(B,C,H,W) -> (B,H,W)"""
139 |         pass
140 | 


--------------------------------------------------------------------------------
/src/nicr_scene_analysis_datasets/dataset_base/_class_weighting.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | .. codeauthor:: Daniel Seichter <daniel.seichter@tu-ilmenau.de>
  4 | """
  5 | import warnings
  6 | 
  7 | import numpy as np
  8 | from tqdm import tqdm
  9 | from tqdm.contrib.concurrent import thread_map
 10 | 
 11 | 
 12 | KNOWN_CLASS_WEIGHTINGS = (
 13 |     'median-frequency',     # median frequency balancing
 14 |     'logarithmic',     # logarithmic weighting with: 1 / ln(c+p_class)
 15 |     'linear',    # 1 - p_class
 16 |     'none'    # no weighting (ones for all classes)
 17 | )
 18 | 
 19 | 
 20 | def compute_class_weights(
 21 |     dataset,
 22 |     sample_key,
 23 |     n_classes,
 24 |     ignore_first_class: bool = True,    # ignore void class
 25 |     weight_mode: str = 'median-frequency',
 26 |     c: float = 1.02,
 27 |     n_threads: int = 1,
 28 |     debug: bool = False,
 29 |     verbose: bool = True
 30 | ) -> np.ndarray:
 31 |     assert weight_mode in KNOWN_CLASS_WEIGHTINGS
 32 | 
 33 |     if verbose:
 34 |         print_ = print
 35 |     else:
 36 |         def print_(*args, **kwargs):
 37 |             pass
 38 | 
 39 |     if debug:
 40 |         warnings.warn(
 41 |             "Weight mode 'none' is forced as debug mode is enabled, i.e., "
 42 |             "ones are used as class weights."
 43 |         )
 44 |         weight_mode = 'none'
 45 | 
 46 |     print_(f"Computing '{weight_mode}' class weights for '{sample_key}' ...")
 47 | 
 48 |     if 'none' == weight_mode:
 49 |         # equal weights for all classes -> disables class weighting
 50 |         if ignore_first_class:
 51 |             return np.ones(n_classes-1)
 52 |         else:
 53 |             return np.ones(n_classes)
 54 | 
 55 |     def count_helper(sample_idx):
 56 |         data = dataset.load(sample_key, sample_idx)
 57 |         h, w = data.shape
 58 |         n_pixels_per_class_sample = np.bincount(
 59 |             data.flatten(),
 60 |             minlength=n_classes
 61 |         )
 62 | 
 63 |         # for median frequency, we need the pixel sum of the images where
 64 |         # the specific class is present. (it only matters if the class is
 65 |         # present in the image and not how many pixels it occupies.)
 66 |         class_in_image = n_pixels_per_class_sample > 0
 67 |         n_image_pixels_with_class_sample = class_in_image * h * w
 68 | 
 69 |         return n_pixels_per_class_sample, n_image_pixels_with_class_sample
 70 | 
 71 |     n_pixels_per_class = np.zeros(n_classes, dtype=np.int64)
 72 |     n_image_pixels_with_class = np.zeros(n_classes, dtype=np.int64)
 73 | 
 74 |     if n_threads == 1:
 75 |         for i in tqdm(range(len(dataset)),
 76 |                       total=len(dataset),
 77 |                       disable=not verbose):
 78 |             # process current image at index i
 79 |             cur_n_pixels_per_class, cur_n_image_pixels_with_class = \
 80 |                 count_helper(i)
 81 | 
 82 |             # update stats
 83 |             n_pixels_per_class += cur_n_pixels_per_class
 84 |             n_image_pixels_with_class += cur_n_image_pixels_with_class
 85 |     else:
 86 |         # process images using multiple threads
 87 |         res = thread_map(count_helper, range(len(dataset)),
 88 |                          total=len(dataset),
 89 |                          max_workers=n_threads,
 90 |                          disable=not verbose)
 91 |         # update stats
 92 |         for cur_n_pixels_per_class, cur_n_image_pixels_with_class in res:
 93 |             n_pixels_per_class += cur_n_pixels_per_class
 94 |             n_image_pixels_with_class += cur_n_image_pixels_with_class
 95 | 
 96 |     # remove first class (void)
 97 |     if ignore_first_class:
 98 |         n_pixels_per_class = n_pixels_per_class[1:]
 99 |         n_image_pixels_with_class = n_image_pixels_with_class[1:]
100 | 
101 |     if weight_mode == 'linear':
102 |         probabilities = n_pixels_per_class / np.sum(n_pixels_per_class)
103 |         class_weights = 1 - probabilities
104 | 
105 |     elif weight_mode == 'median-frequency':
106 |         frequency = n_pixels_per_class / n_image_pixels_with_class
107 |         class_weights = np.nanmedian(frequency) / frequency
108 | 
109 |     elif weight_mode == 'logarithmic':
110 |         probabilities = n_pixels_per_class / np.sum(n_pixels_per_class)
111 |         class_weights = 1 / np.log(c + probabilities)
112 | 
113 |     nan_indices = np.argwhere(np.isnan(class_weights))
114 |     if len(nan_indices) != 0:
115 |         print_(f"class_weights:\n{class_weights}")
116 |         warnings.warn(
117 |             f"Class weights contain NaNs at positions: {nan_indices}, "
118 |             "setting NaNs to 0."
119 |         )
120 |         print_(f"n_pixels_per_class:\n{n_pixels_per_class}")
121 |         print_(f"n_image_pixels_with_class:\n{n_image_pixels_with_class}")
122 |         class_weights[nan_indices] = 0
123 |         print_(f"fixed class_weights:\n{class_weights}")
124 | 
125 |     return class_weights
126 | 


--------------------------------------------------------------------------------
/src/nicr_scene_analysis_datasets/utils/img.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | .. codeauthor:: Daniel Seichter <daniel.seichter@tu-ilmenau.de>
  4 | """
  5 | import numpy as np
  6 | from PIL import Image
  7 | 
  8 | 
  9 | from ._colormaps import COLORMAP_VISUALLY_DISTINCT_VOID_PLUS_256
 10 | from ._colormaps import COLORMAP_VISUALLY_DISTINCT_256
 11 | 
 12 | 
 13 | def dimshuffle(input_img, from_axes, to_axes):
 14 |     # check axes parameter
 15 |     if from_axes.find('0') == -1 or from_axes.find('1') == -1:
 16 |         raise ValueError("`from_axes` must contain both axis0 ('0') and"
 17 |                          "axis 1 ('1')")
 18 |     if to_axes.find('0') == -1 or to_axes.find('1') == -1:
 19 |         raise ValueError("`to_axes` must contain both axis0 ('0') and"
 20 |                          "axis 1 ('1')")
 21 |     if len(from_axes) != len(input_img.shape):
 22 |         raise ValueError("Number of axis given by `from_axes` does not match "
 23 |                          "the number of axis in `input_img`")
 24 | 
 25 |     # handle special cases for channel axis
 26 |     to_axes_c = to_axes.find('c')
 27 |     from_axes_c = from_axes.find('c')
 28 |     # remove channel axis (only grayscale image)
 29 |     if to_axes_c == -1 and from_axes_c >= 0:
 30 |         if input_img.shape[from_axes_c] != 1:
 31 |             raise ValueError('Cannot remove channel axis because size is not '
 32 |                              'equal to 1')
 33 |         input_img = input_img.squeeze(axis=from_axes_c)
 34 |         from_axes = from_axes.replace('c', '')
 35 | 
 36 |     # handle special cases for batch axis
 37 |     to_axes_b = to_axes.find('b')
 38 |     from_axes_b = from_axes.find('b')
 39 |     # remove batch axis
 40 |     if to_axes_b == -1 and from_axes_b >= 0:
 41 |         if input_img.shape[from_axes_b] != 1:
 42 |             raise ValueError('Cannot remove batch axis because size is not '
 43 |                              'equal to 1')
 44 |         input_img = input_img.squeeze(axis=from_axes_b)
 45 |         from_axes = from_axes.replace('b', '')
 46 | 
 47 |     # add new batch axis (in front)
 48 |     if to_axes_b >= 0 and from_axes_b == -1:
 49 |         input_img = input_img[np.newaxis]
 50 |         from_axes = 'b' + from_axes
 51 | 
 52 |     # add new channel axis (in front)
 53 |     if to_axes_c >= 0 and from_axes_c == -1:
 54 |         input_img = input_img[np.newaxis]
 55 |         from_axes = 'c' + from_axes
 56 | 
 57 |     return np.transpose(input_img, [from_axes.find(a) for a in to_axes])
 58 | 
 59 | 
 60 | def get_colormap(n):
 61 |     def bitget(byteval, idx):
 62 |         return (byteval & (1 << idx)) != 0
 63 | 
 64 |     cmap = np.zeros((n, 3), dtype='uint8')
 65 |     for i in range(n):
 66 |         r = g = b = 0
 67 |         c = i
 68 |         for j in range(8):
 69 |             r = r | (bitget(c, 0) << 7-j)
 70 |             g = g | (bitget(c, 1) << 7-j)
 71 |             b = b | (bitget(c, 2) << 7-j)
 72 |             c = c >> 3
 73 | 
 74 |         cmap[i] = np.array([r, g, b])
 75 | 
 76 |     return cmap
 77 | 
 78 | 
 79 | def get_visual_distinct_colormap(with_void: bool = True) -> np.ndarray:
 80 |     # useful for visualizing instances
 81 |     if with_void:
 82 |         return COLORMAP_VISUALLY_DISTINCT_VOID_PLUS_256
 83 |     else:
 84 |         return COLORMAP_VISUALLY_DISTINCT_256
 85 | 
 86 | 
 87 | def save_indexed_png(filepath, label, colormap):
 88 |     # note that OpenCV is not able to handle indexed pngs correctly.
 89 |     img = Image.fromarray(np.asarray(label, dtype='uint8'))
 90 |     img.putpalette(list(np.asarray(colormap, dtype='uint8').flatten()))
 91 |     img.save(filepath, 'PNG')
 92 | 
 93 | 
 94 | def blend_images(img1, img2, alpha=0.5):
 95 |     """
 96 |     Function to alpha composite two images. The output image is calculated
 97 |     by img_out = ( 1 - ( alpha*( img2 > 0 ) ) )*img1 + alpha*img2.
 98 | 
 99 |     Parameters
100 |     ----------
101 |     img1 : {numpy.ndarray, list, tuple}
102 |         The first image with axes '01' or '01c' and of dtype 'uintX' or
103 |         'floatX'. (background image).
104 |     img2 : {numpy.ndarray, list, tuple}
105 |         The second image with axes '01' or '01c' and of dtype 'uintX' or
106 |         'floatX' (foreground image).
107 |     alpha : {float}
108 |         The alpha value to use: 0.0 <= alpha <= 1.0.
109 | 
110 |     Returns
111 |     -------
112 |     img_out : numpy.ndarray
113 |         The resulting image.
114 | 
115 |     """
116 |     # ensure that img is a numpy object
117 |     img1 = np.asanyarray(img1)
118 |     img2 = np.asanyarray(img2)
119 |     assert img1.dtype == img2.dtype
120 |     assert img1.ndim == img2.ndim
121 | 
122 |     # alpha composite images
123 |     if img2.ndim == 3:
124 |         mask = np.any(img2 > 0, axis=2)
125 |     else:
126 |         mask = img2 > 0
127 | 
128 |     result = img1.copy()
129 |     result[mask, ...] = \
130 |         ((1-alpha)*img1[mask, ...] + alpha*img2[mask, ...]).astype(img1.dtype)
131 | 
132 |     return result
133 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
  1 | # build system -----------------------------------------------------------------
  2 | [build-system]
  3 | build-backend = "setuptools.build_meta"
  4 | requires = [
  5 |     "setuptools>=61.0",
  6 |     "wheel",
  7 | ]
  8 | 
  9 | # project metadata -------------------------------------------------------------
 10 | [project]
 11 | name = "nicr-scene-analysis-datasets"
 12 | description = "Package to prepare and use common datasets for scene analysis."
 13 | authors = [
 14 |     { name = "Daniel Seichter", email = "daniel.seichter@tu-ilmenau.de" },
 15 |     { name = "Soehnke Fischedick", email = "soehnke.fischedick@tu-ilmenau.de" },
 16 | ]
 17 | license = { file = "LICENSE" }
 18 | readme = "README.md"
 19 | requires-python = ">=3.8"
 20 | dynamic = [ "version", "dependencies", "optional-dependencies" ]
 21 | 
 22 | [project.urls]
 23 | Homepage = "https://www.tu-ilmenau.de/neurob"
 24 | Repository = "https://github.com/TUI-NICR/nicr-scene-analysis-datasets"
 25 | 
 26 | # package configuration  -------------------------------------------------------
 27 | [tool.setuptools.packages.find]
 28 | where = ["src"]
 29 | 
 30 | # additional package data
 31 | [tool.setuptools.package-data]
 32 | nicr_scene_analysis_datasets = [
 33 |     # ADE20K
 34 |     "datasets/ade20k/README.md",
 35 |     # Cityscapes
 36 |     "datasets/cityscapes/README.md",
 37 |     # COCO
 38 |     "datasets/coco/README.md",
 39 |     # Hypersim
 40 |     "datasets/hypersim/README.md",
 41 |     # NYUv2
 42 |     "datasets/nyuv2/README.md",
 43 |     "datasets/nyuv2/class13Mapping.mat",
 44 |     "datasets/nyuv2/classMapping40.mat",
 45 |     "datasets/nyuv2/splits.mat",
 46 |     "datasets/nyuv2/manual_orientations_test.json",
 47 |     "datasets/nyuv2/manual_orientations_train.json",
 48 |     # SceneNet RGB-D
 49 |     "datasets/scenenetrgbd/README.md",
 50 |     "datasets/scenenetrgbd/scenenet.proto",
 51 |     # SUN RGB-D
 52 |     "datasets/sunrgbd/README.md",
 53 |     "datasets/sunrgbd/nyu_additional_class_mapping.json",
 54 |     "datasets/sunrgbd/nyu_weak_box_3d_mapping.json",
 55 |     # ScanNet
 56 |     "datasets/scannet/README.md",
 57 |     "datasets/scannet/scannetv2_train.txt",
 58 |     "datasets/scannet/scannetv2_val.txt",
 59 |     "datasets/scannet/scannetv2_test.txt",
 60 | ]
 61 | 
 62 | # version and dependencies
 63 | [tool.setuptools.dynamic]
 64 | version = { attr = "nicr_scene_analysis_datasets.version.__version__" }
 65 | dependencies = { file = [
 66 |     "requirements/base.txt"
 67 | ] }
 68 | optional-dependencies.withpreparation = { file = [
 69 |     "requirements/preparation.txt"
 70 | ] }
 71 | optional-dependencies.withopencv = { file = [
 72 |     "requirements/opencv.txt"
 73 | ] }
 74 | optional-dependencies.withtorch = { file = [
 75 |     "requirements/torch.txt"
 76 | ] }
 77 | optional-dependencies.with3d = { file = [
 78 |     "requirements/3d.txt"
 79 | ] }
 80 | optional-dependencies.withauxiliarydata = { file = [
 81 |     "requirements/torch.txt",
 82 |     "requirements/depth_estimation.txt",
 83 |     "requirements/embedding_estimation.txt",
 84 | ] }
 85 | optional-dependencies.withdepthestimation = { file = [
 86 |     "requirements/torch.txt",
 87 |     "requirements/depth_estimation.txt",
 88 | ] }
 89 | optional-dependencies.withembeddingestimation = { file = [
 90 |     "requirements/torch.txt",
 91 |     "requirements/embedding_estimation.txt"
 92 | ] }
 93 | optional-dependencies.test = { file = [
 94 |     "requirements/torch.txt",
 95 |     "requirements/depth_estimation.txt",
 96 |     "requirements/embedding_estimation.txt",
 97 |     "requirements/test.txt",
 98 | ] }
 99 | 
100 | # entry points
101 | [project.scripts]
102 | nicr_sa_prepare_dataset = "nicr_scene_analysis_datasets.scripts.prepare_dataset:main"
103 | nicr_sa_prepare_labeled_point_clouds = "nicr_scene_analysis_datasets.scripts.prepare_labeled_point_clouds:main"
104 | nicr_sa_depth_viewer = "nicr_scene_analysis_datasets.scripts.viewer_depth:main"
105 | nicr_sa_semantic_instance_viewer = "nicr_scene_analysis_datasets.scripts.viewer_semantic_instance:main"
106 | nicr_sa_labeled_pc_viewer = "nicr_scene_analysis_datasets.scripts.viewer_labeled_point_cloud:main"
107 | nicr_sa_generate_auxiliary_data = "nicr_scene_analysis_datasets.scripts.generate_auxiliary_data:main"
108 | 
109 | # linting ----------------------------------------------------------------------
110 | [tool.ruff]
111 | exclude = [
112 |     # we are not the authors of these files
113 |     "src/*scannet/scannet200_constants.py",
114 |     "src/*scannet/SensorData.py",
115 |     # stuff
116 |     "stuff/*",
117 | ]
118 | 
119 | [tool.ruff.lint]
120 | ignore = [
121 |     # E501 line too long (82 > 79 characters)
122 |     "E501",
123 |     # E402 module level import not at top of file
124 |     "E402",
125 |     # E731 do not assign a lambda expression, use a def
126 |     "E731",
127 |     # [not implemented in ruff] line breaks W503 vs. W504
128 |     # "W504"
129 | ]
130 | 
131 | [tool.ruff.lint.per-file-ignores]
132 | "__init__.py" = [
133 |     # allow unused imports in __init__.py files
134 |     "F401"
135 | ]
136 | 


--------------------------------------------------------------------------------
/src/nicr_scene_analysis_datasets/datasets/scenenetrgbd/scenenet.proto:
--------------------------------------------------------------------------------
  1 | syntax = "proto2";
  2 | 
  3 | package scenenet;
  4 | 
  5 | message SceneLayout {
  6 |     enum LayoutType {
  7 |         BATHROOM = 1;
  8 |         BEDROOM = 2;
  9 |         KITCHEN = 3;
 10 |         LIVING_ROOM = 4;
 11 |         OFFICE = 5;
 12 |     }
 13 |     optional LayoutType layout_type = 1;
 14 |     // This is the name of the SceneNet model used for the layout
 15 |     optional string model = 2;
 16 | }
 17 | 
 18 | message LightInfo {
 19 |     enum LightType {
 20 |         SPHERE = 1;
 21 |         PARALLELOGRAM = 2;
 22 |     }
 23 |     optional LightType light_type = 1;
 24 |     // Light intensity
 25 |     optional Power light_output = 2;
 26 |     // This is the center for sphere type lights. And corner for others
 27 |     optional Position position = 3;
 28 |     // This is only for SPHERE lights
 29 |     optional float radius = 4;
 30 |     // This is only for PARALLELOGRAM lights
 31 |     optional Position v1 = 5;
 32 |     optional Position v2 = 6;
 33 | }
 34 | 
 35 | message RandomObjectInfo {
 36 |     optional string shapenet_hash = 1;
 37 |     optional float height_meters = 2;
 38 |     message Transformation {
 39 |         // The 3x4 matrix is as follows:
 40 |         // rotation_mat11 rotation_mat12 rotation_mat13 translation_x
 41 |         // rotation_mat21 rotation_mat22 rotation_mat23 translation_y
 42 |         // rotation_mat31 rotation_mat32 rotation_mat33 translation_y
 43 |         optional float translation_x = 1;
 44 |         optional float translation_y = 2;
 45 |         optional float translation_z = 3;
 46 |         optional float rotation_mat11 = 4;
 47 |         optional float rotation_mat12 = 5;
 48 |         optional float rotation_mat13 = 6;
 49 |         optional float rotation_mat21 = 7;
 50 |         optional float rotation_mat22 = 8;
 51 |         optional float rotation_mat23 = 9;
 52 |         optional float rotation_mat31 = 10;
 53 |         optional float rotation_mat32 = 11;
 54 |         optional float rotation_mat33 = 12;
 55 |     }
 56 |     // The transformation gives the transformation applies to an object, about
 57 |     // the center of the base plane of its axis-aligned bounding box.
 58 |     optional Transformation object_pose = 3;
 59 | }
 60 | 
 61 | message Instance {
 62 |     optional int32 instance_id = 1;
 63 |     optional string semantic_wordnet_id = 2;
 64 |     optional string semantic_english = 3;
 65 |     enum InstanceType {
 66 |         // This is the instance type when no object is present, e.g. because of
 67 |         // looking out a window into nothingness
 68 |         BACKGROUND = 1;
 69 |         // This is an object that is hard coded into the layout and does not
 70 |         // move.  This type does not have a transformation or shapenet hash
 71 |         LAYOUT_OBJECT = 2;
 72 |         // This is a randomly positioned light source
 73 |         LIGHT_OBJECT = 3;
 74 |         // This means the object is a randomly positioned shapenet object. The
 75 |         // object has a transformation and scale parameter in the object_info
 76 |         // variable.
 77 |         RANDOM_OBJECT = 4;
 78 |     }
 79 |     optional InstanceType instance_type = 4;
 80 |     // This information is only filled in for the respective type
 81 |     optional LightInfo light_info = 5;
 82 |     optional RandomObjectInfo object_info = 6;
 83 | }
 84 | 
 85 | message Power {
 86 |     optional float r = 1;
 87 |     optional float g = 2;
 88 |     optional float b = 3;
 89 | }
 90 | 
 91 | message Position {
 92 |     optional float x = 1;
 93 |     optional float y = 2;
 94 |     optional float z = 3;
 95 | }
 96 | 
 97 | message Pose {
 98 |     // The position of these two points define the camera view. The y vector is
 99 |     // defined as [0,1,0].  For an example of how to calculate the camera view
100 |     // coordinate system, see the python codebase.
101 |     optional Position camera = 1;
102 |     optional Position lookat = 2;
103 |     optional float timestamp = 3;
104 | }
105 | 
106 | message View {
107 |     // These increment by the number of skip frames, i.e. 0,25,50...7475.
108 |     optional int32 frame_num = 1;
109 |     // The photo is rendered by integrating uniformly sampled 
110 |     // exposures between the following two poses
111 |     optional Pose shutter_open = 2;
112 |     optional Pose shutter_close = 3;
113 | }
114 | 
115 | message Trajectory {
116 |     optional SceneLayout layout = 1;
117 |     // The first instances[0] is always the 'background' and
118 |     // undefined class when for example looking out windows
119 |     repeated Instance instances = 2;
120 |     // These are ordered sequentially for a trajectory
121 |     repeated View views = 3;
122 |     // This stores the path from the root data directory to the trajectory data
123 |     // folder.  If the trajectories are stored as:
124 |     // /path/i/extracted/{val/train}/0/123/photo/0.jpg 
125 |     // then this path will be '0/123' designating the trajectories folder
126 |     optional string render_path = 4;
127 | }
128 | 
129 | message Trajectories {
130 |     // This is the root list which stores all of the available trajectories
131 |     repeated Trajectory trajectories = 1;
132 | }
133 | 


--------------------------------------------------------------------------------
/src/nicr_scene_analysis_datasets/dataset_base/_concat_dataset.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | .. codeauthor:: Daniel Seichter <daniel.seichter@tu-ilmenau.de>
  4 | """
  5 | from typing import Any, Tuple, Union
  6 | 
  7 | from collections import OrderedDict
  8 | import warnings
  9 | 
 10 | from ._base_dataset import DatasetBase
 11 | 
 12 | 
 13 | class ConcatDataset:
 14 |     def __init__(
 15 |         self,
 16 |         main_dataset: DatasetBase,
 17 |         *additional_datasets: DatasetBase
 18 |     ) -> None:
 19 |         self._main_dataset = main_dataset
 20 |         self._additional_datasets = additional_datasets
 21 |         self._datasets = (main_dataset,) + additional_datasets
 22 |         self._active_datasets = (main_dataset,) + additional_datasets
 23 | 
 24 |         # catch common misconfiguration
 25 |         for ds in self._datasets:
 26 |             if hasattr(ds, 'depth_force_mm') and not ds.depth_force_mm:
 27 |                 # actually SUNRGB-D
 28 |                 warnings.warn(
 29 |                     f"Detected '{ds.__class__.__name__}' dataset with "
 30 |                     "deviating depth scale, consider setting "
 31 |                     "`depth_force_mm` to 'True'."
 32 |                 )
 33 | 
 34 |         # extract information from main dataset
 35 |         self._sample_keys = main_dataset.sample_keys
 36 |         # ensure that all additional datasets provide the sample keys of the
 37 |         # main dataset
 38 |         for ds in self._additional_datasets:
 39 |             assert all(sk in ds.sample_keys for sk in self._sample_keys)
 40 | 
 41 |         # handle cameras (create ordered union of all cameras)
 42 |         # note, we use dicts instead of sets to preserve the order (sets use a
 43 |         # random seed while hashing and, thus, do not guarantee insertion order)
 44 |         assert all(ds.camera is None for ds in self._datasets)
 45 |         cameras = []
 46 |         for ds in self._datasets:
 47 |             cameras.extend(ds.cameras)
 48 |         # note as of python 3.7, dicts guarantee insertion order, so we might
 49 |         # use dict in future
 50 |         self._cameras = tuple(OrderedDict.fromkeys(cameras).keys())
 51 | 
 52 |         self._camera = None
 53 | 
 54 |     def filter_camera(self, camera: Union[None, str]):
 55 |         assert camera is None or camera in self.cameras
 56 | 
 57 |         # apply filter to all datasets
 58 |         # note, not all datasets may support given camera, filter them using
 59 |         # active_datasets
 60 |         active_datasets = []
 61 |         for ds in self._datasets:
 62 |             if camera is None or camera in ds.cameras:
 63 |                 ds.filter_camera(camera)
 64 |                 active_datasets.append(ds)
 65 | 
 66 |         self._active_datasets = tuple(active_datasets)
 67 |         self._camera = camera
 68 | 
 69 |         return self
 70 | 
 71 |     def __enter__(self):
 72 |         # handles context stuff, e.g., with dataset.filter_camera('xy') as ds
 73 |         return self
 74 | 
 75 |     def __exit__(self, *exc: Any):
 76 |         # handles context stuff, e.g., with dataset.filter_camera('xy') as ds
 77 |         # reset camera filter
 78 |         self.filter_camera(None)
 79 | 
 80 |     def __len__(self) -> int:
 81 |         return sum(len(ds) for ds in self._active_datasets)
 82 | 
 83 |     @property
 84 |     def datasets(self) -> Tuple[DatasetBase]:
 85 |         return self._active_datasets
 86 | 
 87 |     def _determine_dataset_and_idx(self, idx: int) -> Tuple[DatasetBase, int]:
 88 |         length = len(self)
 89 | 
 90 |         # ensure that idx is in valid range
 91 |         if not (-length <= idx < length):
 92 |             raise IndexError(f"Index {idx} out of range (length: {length}).")
 93 | 
 94 |         # handle negative indices
 95 |         if idx < 0:
 96 |             idx += length
 97 | 
 98 |         # note that the lengths may change if filter_dataset is called outside
 99 |         for ds in self._active_datasets:
100 |             if idx < len(ds):
101 |                 return ds, idx
102 |             idx -= len(ds)
103 | 
104 |     def load(self, sample_key: str, idx: int) -> Any:
105 |         ds, ds_idx = self._determine_dataset_and_idx(idx)
106 |         return ds._sample_key_loaders.get(sample_key.lower())(ds_idx)
107 | 
108 |     def __getitem__(self, idx: int):
109 |         # note, we also reimplement __getitem__ to do index mapping stuff only
110 |         # once per sample
111 |         ds, ds_idx = self._determine_dataset_and_idx(idx)
112 |         return ds[ds_idx]
113 | 
114 |     @property
115 |     def cameras(self) -> Tuple[str]:
116 |         return self._cameras
117 | 
118 |     @property
119 |     def camera(self) -> Union[None, str]:
120 |         return self._camera
121 | 
122 |     def __getstate__(self):
123 |         # important for copying
124 |         return self.__dict__
125 | 
126 |     def __setstate__(self, state):
127 |         # important for copying
128 |         self.__dict__ = state
129 | 
130 |     def __getattr__(self, name):
131 |         if name not in self.__dict__ and '_main_dataset' in self.__dict__:
132 |             # redirect all other attributes/calls to main dataset
133 |             return getattr(self._main_dataset, name)
134 | 
135 |         return super().__getattr__(name)
136 | 


--------------------------------------------------------------------------------
/.gitlab-ci.yml:
--------------------------------------------------------------------------------
  1 | stages:
  2 |   - stylecheck
  3 |   - test
  4 |   - deploy
  5 | 
  6 | .conda_env: &conda_env
  7 |     before_script:
  8 |         # update conda
  9 |         - conda config --set always_yes yes
 10 |         - conda update -q conda
 11 | 
 12 |         # create and activate environment
 13 |         - conda create -q -n testenv_${CI_PIPELINE_ID}_${CI_JOB_NAME}_py${PYTHON_VERSION_TO_USE//./} python=${PYTHON_VERSION_TO_USE}
 14 |         - source activate testenv_${CI_PIPELINE_ID}_${CI_JOB_NAME}_py${PYTHON_VERSION_TO_USE//./}
 15 |     after_script:
 16 |         # remove environment
 17 |         - conda env remove --name testenv_${CI_PIPELINE_ID}_${CI_JOB_NAME}_py${PYTHON_VERSION_TO_USE//./}
 18 | 
 19 | .test_template: &test_template
 20 |     <<: *conda_env
 21 |     stage: test
 22 |     rules:
 23 |         - if: $CI_MERGE_REQUEST_TITLE =~ /^(Draft:|WIP:|\[Draft\]|\[WIP\])/
 24 |           when: manual
 25 |         - if: $CI_PIPELINE_SOURCE == "merge_request_event" && $CI_MERGE_REQUEST_TITLE !~ /^(Draft:|WIP:|\[Draft\]|\[WIP\])/
 26 |         - if: $CI_COMMIT_BRANCH == $CI_DEFAULT_BRANCH
 27 |         - if: $CI_PIPELINE_SOURCE == "schedule"
 28 |     script:
 29 |         # for debugging, list all environment variables
 30 |         # - export
 31 | 
 32 |         # install packages (use conda to avoid time-consuming installations)
 33 |         - conda install -q pytest pytest-cov
 34 |         - python -m pip install -q pytest-html
 35 | 
 36 |         # pytorch and detectron
 37 |         - |
 38 |           if [ "${PYTHON_VERSION_TO_USE}" == "3.6" ]; then
 39 |               conda install pytorch=1.10.1 torchvision=0.11.2 cpuonly -c pytorch
 40 |               python -m pip install detectron2 -f https://dl.fbaipublicfiles.com/detectron2/wheels/cpu/torch1.10/index.html
 41 |           elif [ "${PYTHON_VERSION_TO_USE}" == "3.8" ]; then
 42 |               # EMSANet
 43 |               conda install pytorch=1.13.0 torchvision=0.14.0 cpuonly -c pytorch
 44 |               python -m pip install 'git+https://github.com/facebookresearch/detectron2.git'
 45 |           else
 46 |               conda install pytorch=2.0.1 torchvision=0.15.2 cpuonly -c pytorch
 47 |               python -m pip install 'git+https://github.com/facebookresearch/detectron2.git'
 48 |           fi
 49 | 
 50 |         # panopticapi
 51 |         - pip install git+https://github.com/cocodataset/panopticapi.git
 52 |         # check conda installation
 53 |         - conda info
 54 |         - conda list
 55 | 
 56 |         # install package (and all missing dependencies)
 57 |         - python -m pip install -q --editable .[test]
 58 | 
 59 |         # test package (opt: get coverage)
 60 |         - |
 61 |           if [ "${REPORT_COVERAGE}" == "true" ]; then
 62 |               py.test tests/ -rx -s -vv --cov=${CI_PROJECT_NAME//-/_} --cov-report html --cov-report term --html=report_py${PYTHON_VERSION_TO_USE//./}.html --self-contained-html
 63 |           else
 64 |               py.test tests/ -rx -s -vv --html=report_py${PYTHON_VERSION_TO_USE//./}.html --self-contained-html
 65 |           fi
 66 |     coverage: '/^TOTAL.*\s+(\d+\%)$/'
 67 |     artifacts:
 68 |         when: always
 69 |         paths:
 70 |             - report_py${PYTHON_VERSION_TO_USE//./}.html
 71 |             - htmlcov
 72 | 
 73 | style_check:
 74 |     <<: *conda_env
 75 |     stage: stylecheck
 76 |     rules:
 77 |         - if: $CI_PIPELINE_SOURCE == "merge_request_event"
 78 |         - if: $CI_COMMIT_BRANCH == $CI_DEFAULT_BRANCH
 79 |         - if: $CI_PIPELINE_SOURCE == "schedule"
 80 |     variables:
 81 |         PYTHON_VERSION_TO_USE: "3.6"     # ubuntu 18.04 / jetson default
 82 |     script:
 83 |         # install packages
 84 |         - conda install -q pycodestyle pylint
 85 | 
 86 |         # check style using pep8
 87 |         - find ./ -name "*.py" -not -path "*/stuff/*" | xargs pycodestyle --show-source --show-pep8
 88 | 
 89 |         # check style using pylint (without taking into account)
 90 |         - pylint ${CI_PROJECT_NAME//-/_} --rcfile=${CI_PROJECT_DIR}/.pylintrc || true
 91 | 
 92 | # tests_py36:    # ubuntu18
 93 | #     <<: *test_template
 94 | #     variables:
 95 | #         PYTHON_VERSION_TO_USE: "3.6"
 96 | #         REPORT_COVERAGE: "false"
 97 | 
 98 | tests_py38:    # ubuntu20
 99 |     <<: *test_template
100 |     variables:
101 |         PYTHON_VERSION_TO_USE: "3.8"
102 |         REPORT_COVERAGE: "true"
103 | 
104 | tests_py310:    # ubuntu22
105 |     <<: *test_template
106 |     variables:
107 |         PYTHON_VERSION_TO_USE: "3.10"
108 |         REPORT_COVERAGE: "false"
109 | 
110 | tests_py311:    # current
111 |     <<: *test_template
112 |     variables:
113 |         PYTHON_VERSION_TO_USE: "3.11"
114 |         REPORT_COVERAGE: "false"
115 | 
116 | update_pip_package:
117 |     stage: deploy
118 |     rules:
119 |         - if: $CI_PIPELINE_SOURCE == "schedule"
120 |           when: never
121 |         - if: $CI_COMMIT_BRANCH == $CI_DEFAULT_BRANCH
122 |         - if: $CI_PIPELINE_SOURCE == "merge_request_event"
123 |           when: manual
124 |     script:
125 |         - pip install twine
126 | 
127 |         # build package
128 |         - python setup.py sdist bdist_wheel
129 | 
130 |         # upload package
131 |         - export TWINE_USERNAME=${TWINE_USERNAME}
132 |         - export TWINE_PASSWORD=${TWINE_PASSWORD}
133 |         - python -m twine upload --skip-existing --repository-url ${CI_API_V4_URL}/projects/${PACKAGE_REGISTRY}/packages/pypi dist/*
134 | 


--------------------------------------------------------------------------------
/src/nicr_scene_analysis_datasets/datasets/ade20k/README.md:
--------------------------------------------------------------------------------
 1 | # ADE20K dataset
 2 | This dataset provides access to the ADE20K-based semantic segmentation benchmarks, primarily focusing on the MIT Scene Parse Benchmark Challenge 2016 and Places Challenge 2017.
 3 | The 2016 Challenge data contains over 20K scene-centric images with pixel-level semantic annotations for 150 object categories, including both stuff classes (sky, road, grass) and thing classes (person, car, bed).
 4 | The 2017 Places Challenge adds instance segmentation annotations for the same images.
 5 | 
 6 | For more details about the challenges, see: [MIT Scene Parse Benchmark Challenge 2016](http://sceneparsing.csail.mit.edu/)
 7 | 
 8 | ## Dataset versions
 9 | There are two main versions of the dataset which are currently supported in this package:
10 | 
11 | 1. [MIT Scene Parse Benchmark Challenge 2016 - MIT SceneParse150](http://sceneparsing.csail.mit.edu/):
12 | - see [GitHub](https://github.com/CSAILVision/sceneparsing)
13 | - benchmark that contains a subset of images and labels of the ADE20K dataset
14 | - is often misleadingly referred to as 'ADE20K' in literature as the data come from the ADE20K dataset
15 | - contains 20,210 training images and 2,000 images for validation, for which semantic annotations and scene classes are available; in addition to that, there is a test set containing 3,352 images
16 | - 150 semantic classes - result from selecting the 150 most frequent object classes from ADE20K (which contains 3600+ classes) - annotations that are not part of this 150 class subset are ignored (treated as background/void)
17 | - usually used for semantic segmentation tasks
18 | - images taken from ADE20K are rescaled so that their longer side is at most 512px
19 | 
20 | 2. [Places Challenge 2017](http://placeschallenge.csail.mit.edu/)
21 | - see [GitHub](https://github.com/CSAILVision/placeschallenge)
22 | - adds instance segmentation annotations to the challenge data from 2016 (above)
23 | - can be combined to panoptic segmentation data (100 thing classes and 50 stuff classes)
24 | 
25 | ### Additional dataset versions (currently not supported)
26 | 3. [ADE20K Dataset 2021](https://groups.csail.mit.edu/vision/datasets/ADE20K/):
27 | - version from January 17, 2021 - most recent version of the dataset
28 | - 27,574 images (25,574 for training and 2,000 for testing/validation) - therefore, containing about 5K additional training images:
29 |   - images come in various sizes, some of them larger than the 512px mentioned above
30 |   - semantic, instance, scene and part annotations
31 |   - primarily used for open vocabulary segmentation
32 | 
33 | ## Prepare dataset
34 | 
35 | 1. Download and convert the dataset to the desired format:
36 |   The Challenge data can be downloaded without having to register.
37 |   Therefore, the prepare script can handle the download.
38 |   If you prefer to download the files yourself, you may provide the path to the archives of the challenge data with `--challenge-2016-filepath` AND `--challenge-2017-instances-filepath` (instance annotations are given as a separate download).
39 | 
40 |   ```bash
41 |   # general usage
42 |   nicr_sa_prepare_dataset ade20k \
43 |       /path/where/to/store/ade20k/ \
44 |       [--challenge-2016-filepath] \
45 |       [--challenge-2017-instances-filepath] \
46 |       [--n-processes N]
47 |   ```
48 | 
49 |   With arguments:
50 |   - `--challenge-2016-filepath`:
51 |     Path to the '2016 Scene Parse Benchmark Challenge' zip file (ADEChallengeData2016.zip).
52 |   - `--challenge-2017-instances-filepath`:
53 |     Path to the tar file containing the instance annotations of the '2017 Places Challenge' tar file (annotations_instance.tar).
54 |   - `--n-processes`:
55 |       The number of worker processes to spawn.
56 | 
57 | 2. (Optional) Generate auxiliary data
58 |     > **Note**: To use auxiliary data generation, the package must be installed with the `withauxiliarydata` option:
59 |     > ```bash
60 |     > pip install -e .[withauxiliarydata]
61 |     > ```
62 | 
63 |     ```bash
64 |     # for auxiliary data such as synthetic depth and rgb/panoptic embeddings
65 |     nicr_sa_generate_auxiliary_data \
66 |         --dataset ade20k \
67 |         --dataset-path /path/to/already/prepared/ade20k/dataset \
68 |         --auxiliary-data depth image-embedding panoptic-embedding \
69 |         --embedding-estimator-device cuda \
70 |         --embedding-estimators alpha_clip__l14-336-grit-20m \
71 |         --depth-estimator-device cuda \
72 |         --depth-estimators depthanything_v2__indoor_large \
73 |         --cache-models
74 |     ```
75 |     With arguments:
76 |     - `--dataset-path`:
77 |       Path to the prepared ADE20k dataset.
78 |     - `--auxiliary-data`:
79 |       Types of auxiliary data to generate:
80 |         - `depth`: Generates synthetic depth images from RGB.
81 |         - `image-embedding`: Uses Alpha-CLIP to generate an embedding for the entire image.
82 |         - `panoptic-embedding`: Uses Alpha-CLIP to generate an embedding for each panoptic mask.
83 |     - `--depth-estimator-device`:
84 |       Device to use for depth estimation (`cpu` or `cuda`).
85 |     - `--depth-estimators`:
86 |       Depth estimator(s) to use. Use `depthanything_v2__indoor_large` to match DVEFormer.
87 |     - `--embedding-estimator-device`:
88 |       Device to use for embedding estimation (`cpu` or `cuda`).
89 |     - `--embedding-estimators`:
90 |       Embedding estimator(s) to use. Use `alpha_clip__l14-336-grit-20m` to match DVEFormer.
91 |     - `--cache-models`:
92 |       Cache models locally to avoid reloading them in future runs.
93 | 


--------------------------------------------------------------------------------
/src/nicr_scene_analysis_datasets/utils/io.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | .. codeauthor:: Daniel Seichter <daniel.seichter@tu-ilmenau.de>
  4 | .. codeauthor:: Soehnke Fischedick <soehnke-benedikt.fischedick@tu-ilmenau.de>
  5 | """
  6 | from typing import Any, Dict, List, Union
  7 | 
  8 | from collections import OrderedDict
  9 | from datetime import datetime
 10 | import getpass
 11 | import hashlib
 12 | import json
 13 | import os
 14 | import sys
 15 | from time import time
 16 | import urllib.request
 17 | import zipfile
 18 | 
 19 | from tqdm import tqdm
 20 | 
 21 | 
 22 | from ..version import get_version
 23 | 
 24 | 
 25 | CREATION_META_FILENAME = 'creation_meta.json'
 26 | 
 27 | 
 28 | class DownloadProgressBar(tqdm):
 29 |     def update_to(self, b=1, bsize=1, tsize=None):
 30 |         if tsize is not None:
 31 |             self.total = tsize
 32 |         self.update(b * bsize - self.n)
 33 | 
 34 | 
 35 | def extract_zip(zip_filepath: str, output_dirpath: str) -> None:
 36 |     with zipfile.ZipFile(zip_filepath, 'r') as zip_file:
 37 |         for m in tqdm(zip_file.infolist(), desc='Extracting'):
 38 |             zip_file.extract(m, output_dirpath)
 39 | 
 40 | 
 41 | def download_file(
 42 |     url: str,
 43 |     output_filepath: str,
 44 |     display_progressbar: bool = False
 45 | ) -> None:
 46 |     with DownloadProgressBar(unit='B', unit_scale=True,
 47 |                              miniters=1, desc=url.split('/')[-1],
 48 |                              disable=not display_progressbar) as t:
 49 |         urllib.request.urlretrieve(url,
 50 |                                    filename=output_filepath,
 51 |                                    reporthook=t.update_to)
 52 | 
 53 | 
 54 | def create_dir(path: str) -> None:
 55 |     if not os.path.isdir(path):
 56 |         os.makedirs(path, exist_ok=True)
 57 | 
 58 | 
 59 | def get_files_by_extension(
 60 |     path: str,
 61 |     extension: str = '.png',
 62 |     flat_structure: bool = False,
 63 |     recursive: bool = False,
 64 |     follow_links: bool = True
 65 | ) -> Union[List, Dict]:
 66 |     # check input args
 67 |     if not os.path.exists(path):
 68 |         raise IOError("No such file or directory: '{}'".format(path))
 69 | 
 70 |     if flat_structure:
 71 |         filelist = []
 72 |     else:
 73 |         filelist = {}
 74 | 
 75 |     # path is a file
 76 |     if os.path.isfile(path):
 77 |         basename = os.path.basename(path)
 78 |         if extension is None or basename.lower().endswith(extension):
 79 |             if flat_structure:
 80 |                 filelist.append(path)
 81 |             else:
 82 |                 filelist[os.path.dirname(path)] = [basename]
 83 |         return filelist
 84 | 
 85 |     # get filelist
 86 |     filter_func = lambda f: extension is None or f.lower().endswith(extension)
 87 |     for root, _, filenames in os.walk(path, topdown=True,
 88 |                                       followlinks=follow_links):
 89 |         filenames = list(filter(filter_func, filenames))
 90 |         if filenames:
 91 |             if flat_structure:
 92 |                 filelist.extend((os.path.join(root, f) for f in filenames))
 93 |             else:
 94 |                 filelist[root] = sorted(filenames)
 95 |         if not recursive:
 96 |             break
 97 | 
 98 |     # return
 99 |     if flat_structure:
100 |         return sorted(filelist)
101 |     else:
102 |         return OrderedDict(sorted(filelist.items()))
103 | 
104 | 
105 | def create_or_update_creation_metafile(
106 |     dataset_basepath: str,
107 |     **additional_meta
108 | ) -> None:
109 |     filepath = os.path.join(dataset_basepath, CREATION_META_FILENAME)
110 | 
111 |     # load existing file
112 |     if os.path.exists(filepath):
113 |         with open(filepath) as f:
114 |             meta = json.load(f)
115 |     else:
116 |         meta = []
117 | 
118 |     # update file
119 |     ts = time()
120 |     meta.append({
121 |         'executable': sys.executable,
122 |         'command': ' '.join(sys.argv),
123 |         'timestamp': int(ts),
124 |         'local_time': datetime.fromtimestamp(ts).strftime('%Y-%m-%d %H:%M:%S'),
125 |         'user': getpass.getuser(),
126 |         'version': '{}.{}.{}+{}'.format(*get_version(with_suffix=True)),
127 |         'additional_meta': additional_meta or None
128 |     })
129 |     with open(filepath, 'w') as f:
130 |         json.dump(meta, f, indent=4)
131 | 
132 | 
133 | def _normalize_version(version: str) -> str:
134 |     # ensure PEP 440 compliant version
135 |     # older versions might use:
136 |     # (1)   1.2.3-a2c4e6-dirty -> 1.2.3+a2c4e6.dirty
137 |     # (2)   1.2.3- -> 1.2.3
138 |     version = version.strip('-')  # (2)
139 |     version = version.replace('-', '+', 1).replace('-', '.')  # (1)
140 |     return version
141 | 
142 | 
143 | def load_creation_metafile(dataset_basepath: str) -> Dict[str, Any]:
144 |     filepath = os.path.join(dataset_basepath, CREATION_META_FILENAME)
145 | 
146 |     if not os.path.exists(filepath):
147 |         # file does not exist, dataset might be created before metafile was
148 |         # introduced, so do not raise an error here
149 |         return
150 | 
151 |     with open(filepath) as f:
152 |         metas = json.load(f)
153 | 
154 |     # ensure PEP 440 compliant version, we messed it up in the past
155 |     for meta in metas:
156 |         meta['version'] = _normalize_version(meta['version'])
157 | 
158 |     return metas
159 | 
160 | 
161 | def get_sha256_hash(filepath: str) -> str:
162 |     sha256_hash = hashlib.sha256()
163 |     with open(filepath, "rb") as f:
164 |         # Read and update hash string value in blocks of 4K
165 |         for byte_block in iter(lambda: f.read(4096), b""):
166 |             sha256_hash.update(byte_block)
167 |     # Get the hexadecimal digest of the hash
168 |     return sha256_hash.hexdigest()
169 | 


--------------------------------------------------------------------------------
/src/nicr_scene_analysis_datasets/datasets/scannet/scannetv2_val.txt:
--------------------------------------------------------------------------------
  1 | scene0568_00
  2 | scene0568_01
  3 | scene0568_02
  4 | scene0304_00
  5 | scene0488_00
  6 | scene0488_01
  7 | scene0412_00
  8 | scene0412_01
  9 | scene0217_00
 10 | scene0019_00
 11 | scene0019_01
 12 | scene0414_00
 13 | scene0575_00
 14 | scene0575_01
 15 | scene0575_02
 16 | scene0426_00
 17 | scene0426_01
 18 | scene0426_02
 19 | scene0426_03
 20 | scene0549_00
 21 | scene0549_01
 22 | scene0578_00
 23 | scene0578_01
 24 | scene0578_02
 25 | scene0665_00
 26 | scene0665_01
 27 | scene0050_00
 28 | scene0050_01
 29 | scene0050_02
 30 | scene0257_00
 31 | scene0025_00
 32 | scene0025_01
 33 | scene0025_02
 34 | scene0583_00
 35 | scene0583_01
 36 | scene0583_02
 37 | scene0701_00
 38 | scene0701_01
 39 | scene0701_02
 40 | scene0580_00
 41 | scene0580_01
 42 | scene0565_00
 43 | scene0169_00
 44 | scene0169_01
 45 | scene0655_00
 46 | scene0655_01
 47 | scene0655_02
 48 | scene0063_00
 49 | scene0221_00
 50 | scene0221_01
 51 | scene0591_00
 52 | scene0591_01
 53 | scene0591_02
 54 | scene0678_00
 55 | scene0678_01
 56 | scene0678_02
 57 | scene0462_00
 58 | scene0427_00
 59 | scene0595_00
 60 | scene0193_00
 61 | scene0193_01
 62 | scene0164_00
 63 | scene0164_01
 64 | scene0164_02
 65 | scene0164_03
 66 | scene0598_00
 67 | scene0598_01
 68 | scene0598_02
 69 | scene0599_00
 70 | scene0599_01
 71 | scene0599_02
 72 | scene0328_00
 73 | scene0300_00
 74 | scene0300_01
 75 | scene0354_00
 76 | scene0458_00
 77 | scene0458_01
 78 | scene0423_00
 79 | scene0423_01
 80 | scene0423_02
 81 | scene0307_00
 82 | scene0307_01
 83 | scene0307_02
 84 | scene0606_00
 85 | scene0606_01
 86 | scene0606_02
 87 | scene0432_00
 88 | scene0432_01
 89 | scene0608_00
 90 | scene0608_01
 91 | scene0608_02
 92 | scene0651_00
 93 | scene0651_01
 94 | scene0651_02
 95 | scene0430_00
 96 | scene0430_01
 97 | scene0689_00
 98 | scene0357_00
 99 | scene0357_01
100 | scene0574_00
101 | scene0574_01
102 | scene0574_02
103 | scene0329_00
104 | scene0329_01
105 | scene0329_02
106 | scene0153_00
107 | scene0153_01
108 | scene0616_00
109 | scene0616_01
110 | scene0671_00
111 | scene0671_01
112 | scene0618_00
113 | scene0382_00
114 | scene0382_01
115 | scene0490_00
116 | scene0621_00
117 | scene0607_00
118 | scene0607_01
119 | scene0149_00
120 | scene0695_00
121 | scene0695_01
122 | scene0695_02
123 | scene0695_03
124 | scene0389_00
125 | scene0377_00
126 | scene0377_01
127 | scene0377_02
128 | scene0342_00
129 | scene0139_00
130 | scene0629_00
131 | scene0629_01
132 | scene0629_02
133 | scene0496_00
134 | scene0633_00
135 | scene0633_01
136 | scene0518_00
137 | scene0652_00
138 | scene0406_00
139 | scene0406_01
140 | scene0406_02
141 | scene0144_00
142 | scene0144_01
143 | scene0494_00
144 | scene0278_00
145 | scene0278_01
146 | scene0316_00
147 | scene0609_00
148 | scene0609_01
149 | scene0609_02
150 | scene0609_03
151 | scene0084_00
152 | scene0084_01
153 | scene0084_02
154 | scene0696_00
155 | scene0696_01
156 | scene0696_02
157 | scene0351_00
158 | scene0351_01
159 | scene0643_00
160 | scene0644_00
161 | scene0645_00
162 | scene0645_01
163 | scene0645_02
164 | scene0081_00
165 | scene0081_01
166 | scene0081_02
167 | scene0647_00
168 | scene0647_01
169 | scene0535_00
170 | scene0353_00
171 | scene0353_01
172 | scene0353_02
173 | scene0559_00
174 | scene0559_01
175 | scene0559_02
176 | scene0593_00
177 | scene0593_01
178 | scene0246_00
179 | scene0653_00
180 | scene0653_01
181 | scene0064_00
182 | scene0064_01
183 | scene0356_00
184 | scene0356_01
185 | scene0356_02
186 | scene0030_00
187 | scene0030_01
188 | scene0030_02
189 | scene0222_00
190 | scene0222_01
191 | scene0338_00
192 | scene0338_01
193 | scene0338_02
194 | scene0378_00
195 | scene0378_01
196 | scene0378_02
197 | scene0660_00
198 | scene0553_00
199 | scene0553_01
200 | scene0553_02
201 | scene0527_00
202 | scene0663_00
203 | scene0663_01
204 | scene0663_02
205 | scene0664_00
206 | scene0664_01
207 | scene0664_02
208 | scene0334_00
209 | scene0334_01
210 | scene0334_02
211 | scene0046_00
212 | scene0046_01
213 | scene0046_02
214 | scene0203_00
215 | scene0203_01
216 | scene0203_02
217 | scene0088_00
218 | scene0088_01
219 | scene0088_02
220 | scene0088_03
221 | scene0086_00
222 | scene0086_01
223 | scene0086_02
224 | scene0670_00
225 | scene0670_01
226 | scene0256_00
227 | scene0256_01
228 | scene0256_02
229 | scene0249_00
230 | scene0441_00
231 | scene0658_00
232 | scene0704_00
233 | scene0704_01
234 | scene0187_00
235 | scene0187_01
236 | scene0131_00
237 | scene0131_01
238 | scene0131_02
239 | scene0207_00
240 | scene0207_01
241 | scene0207_02
242 | scene0461_00
243 | scene0011_00
244 | scene0011_01
245 | scene0343_00
246 | scene0251_00
247 | scene0077_00
248 | scene0077_01
249 | scene0684_00
250 | scene0684_01
251 | scene0550_00
252 | scene0686_00
253 | scene0686_01
254 | scene0686_02
255 | scene0208_00
256 | scene0500_00
257 | scene0500_01
258 | scene0552_00
259 | scene0552_01
260 | scene0648_00
261 | scene0648_01
262 | scene0435_00
263 | scene0435_01
264 | scene0435_02
265 | scene0435_03
266 | scene0690_00
267 | scene0690_01
268 | scene0693_00
269 | scene0693_01
270 | scene0693_02
271 | scene0700_00
272 | scene0700_01
273 | scene0700_02
274 | scene0699_00
275 | scene0231_00
276 | scene0231_01
277 | scene0231_02
278 | scene0697_00
279 | scene0697_01
280 | scene0697_02
281 | scene0697_03
282 | scene0474_00
283 | scene0474_01
284 | scene0474_02
285 | scene0474_03
286 | scene0474_04
287 | scene0474_05
288 | scene0355_00
289 | scene0355_01
290 | scene0146_00
291 | scene0146_01
292 | scene0146_02
293 | scene0196_00
294 | scene0702_00
295 | scene0702_01
296 | scene0702_02
297 | scene0314_00
298 | scene0277_00
299 | scene0277_01
300 | scene0277_02
301 | scene0095_00
302 | scene0095_01
303 | scene0015_00
304 | scene0100_00
305 | scene0100_01
306 | scene0100_02
307 | scene0558_00
308 | scene0558_01
309 | scene0558_02
310 | scene0685_00
311 | scene0685_01
312 | scene0685_02
313 | 


--------------------------------------------------------------------------------
/src/nicr_scene_analysis_datasets/datasets/scenenetrgbd/dataset.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | .. codeauthor:: Daniel Seichter <daniel.seichter@tu-ilmenau.de>
  4 | """
  5 | from typing import Any, Optional, Tuple, Union
  6 | 
  7 | import os
  8 | 
  9 | import cv2
 10 | import numpy as np
 11 | 
 12 | from ...dataset_base import build_dataset_config
 13 | from ...dataset_base import DatasetConfig
 14 | from ...dataset_base import RGBDDataset
 15 | from ...dataset_base import SampleIdentifier
 16 | from .scenenetrgbd import SceneNetRGBDMeta
 17 | 
 18 | 
 19 | class SceneNetRGBD(SceneNetRGBDMeta, RGBDDataset):
 20 |     def __init__(
 21 |         self,
 22 |         *,
 23 |         dataset_path: Optional[str] = None,
 24 |         split: str = 'train',
 25 |         sample_keys: Tuple[str] = ('rgb', 'depth', 'semantic'),
 26 |         use_cache: bool = False,
 27 |         cameras: Optional[Tuple[str]] = None,
 28 |         depth_mode: str = 'refined',
 29 |         scene_use_indoor_domestic_labels: bool = False,
 30 |         **kwargs: Any
 31 |     ) -> None:
 32 |         super().__init__(
 33 |             dataset_path=dataset_path,
 34 |             depth_mode=depth_mode,
 35 |             sample_keys=sample_keys,
 36 |             use_cache=use_cache,
 37 |             **kwargs
 38 |         )
 39 | 
 40 |         assert split in self.SPLITS
 41 |         assert depth_mode in self.DEPTH_MODES
 42 |         assert all(sk in self.get_available_sample_keys(split) for sk in sample_keys)
 43 |         self._semantic_n_classes = 13
 44 |         self._split = split
 45 |         self._depth_mode = depth_mode
 46 |         self._cameras = self.CAMERAS
 47 |         self._scene_use_indoor_domestic_labels = scene_use_indoor_domestic_labels
 48 | 
 49 |         # cameras
 50 |         if cameras is None:
 51 |             # use all available cameras (=default dummy camera)
 52 |             self._cameras = self.CAMERAS
 53 |         else:
 54 |             # use subset of cameras (does not really apply to this dataset)
 55 |             assert all(c in self.CAMERAS for c in cameras)
 56 |             self._cameras = cameras
 57 | 
 58 |         # load file list
 59 |         if dataset_path is not None:
 60 |             # load file list
 61 |             fp = os.path.join(self.dataset_path,
 62 |                               self.SPLIT_FILELIST_FILENAMES[self._split])
 63 |             with open(fp, 'r') as f:
 64 |                 self._files = f.read().splitlines()
 65 |         else:
 66 |             self.debug_print("Loaded SceneNetRGBD dataset without files")
 67 | 
 68 |         if self._scene_use_indoor_domestic_labels:
 69 |             # use remapped scene labels
 70 |             scene_label_list = self.SCENE_LABEL_LIST_INDOOR_DOMESTIC
 71 |         else:
 72 |             # use original scene labels
 73 |             scene_label_list = self.SCENE_LABEL_LIST
 74 | 
 75 |         # build config object
 76 |         self._config = build_dataset_config(
 77 |             semantic_label_list=self.SEMANTIC_LABEL_LIST,
 78 |             scene_label_list=scene_label_list,
 79 |             depth_stats=self.TRAIN_SPLIT_DEPTH_STATS
 80 |         )
 81 | 
 82 |         # register loader functions
 83 |         self.auto_register_sample_key_loaders()
 84 | 
 85 |     @property
 86 |     def cameras(self) -> Tuple[str]:
 87 |         return self._cameras
 88 | 
 89 |     @property
 90 |     def config(self) -> DatasetConfig:
 91 |         return self._config
 92 | 
 93 |     @property
 94 |     def split(self) -> str:
 95 |         return self._split
 96 | 
 97 |     def _get_filename(self, idx: int) -> str:
 98 |         return self._files[idx]
 99 | 
100 |     def __len__(self) -> int:
101 |         return len(self._files)
102 | 
103 |     @staticmethod
104 |     def get_available_sample_keys(split: str) -> Tuple[str]:
105 |         return SceneNetRGBDMeta.SPLIT_SAMPLE_KEYS[split]
106 | 
107 |     def _load(
108 |         self,
109 |         directory: str,
110 |         idx: int,
111 |         extension: str = '.png'
112 |     ) -> Union[str, np.ndarray]:
113 |         # determine filepath
114 |         fp = os.path.join(self.dataset_path,
115 |                           self.split,
116 |                           directory,
117 |                           f'{self._files[idx]}{extension}')
118 | 
119 |         # load data
120 |         if '.txt' == extension:
121 |             with open(fp, 'r') as f:
122 |                 data = f.readline()
123 |         else:
124 |             # default load using OpenCV
125 |             data = cv2.imread(fp, cv2.IMREAD_UNCHANGED)
126 |             if data is None:
127 |                 raise IOError(f"Unable to load image: '{fp}'")
128 |             if data.ndim == 3:
129 |                 data = cv2.cvtColor(data, cv2.COLOR_BGR2RGB)
130 | 
131 |         return data
132 | 
133 |     def _load_rgb(self, idx: int) -> np.ndarray:
134 |         return self._load(self.RGB_DIR, idx, '.jpg')
135 | 
136 |     def _load_depth(self, idx: int) -> np.ndarray:
137 |         return self._load(self.DEPTH_DIR, idx)
138 | 
139 |     def _load_identifier(self, idx: int) -> Tuple[str]:
140 |         fn = self._files[idx]
141 |         return SampleIdentifier(os.path.normpath(fn).split(os.sep))
142 | 
143 |     def _load_semantic(self, idx: int) -> np.ndarray:
144 |         return self._load(self.SEMANTIC_13_DIR, idx).astype('uint8')
145 | 
146 |     def _load_instance(self, idx: int) -> np.ndarray:
147 |         return self._load(self.INSTANCES_DIR, idx).astype('uint16')
148 | 
149 |     def _load_scene(self, idx: int) -> int:
150 |         class_str = self._load(self.SCENE_CLASS_DIR, idx, '.txt')
151 | 
152 |         class_idx = self.SCENE_LABEL_LIST.index(class_str)
153 | 
154 |         if self._scene_use_indoor_domestic_labels:
155 |             # map class to indoor domestic environment labels
156 |             mapping = self.SCENE_LABEL_IDX_TO_SCENE_LABEL_INDOOR_DOMESTIC_IDX
157 |             class_idx = mapping[class_idx]
158 | 
159 |         return class_idx
160 | 


--------------------------------------------------------------------------------
/tests/test_nyuv2.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | Simple (interface) tests for NYUv2 dataset
  4 | 
  5 | .. codeauthor:: Daniel Seichter <daniel.seichter@tu-ilmenau.de>
  6 | .. codeauthor:: Soehnke Fischedick <soehnke-benedikt.fischedick@tu-ilmenau.de>
  7 | """
  8 | import numpy as np
  9 | from numpy.testing import assert_almost_equal
 10 | import pytest
 11 | 
 12 | from nicr_scene_analysis_datasets import NYUv2
 13 | from nicr_scene_analysis_datasets.dataset_base import OrientationDict
 14 | from nicr_scene_analysis_datasets.dataset_base import SampleIdentifier
 15 | from nicr_scene_analysis_datasets.utils.testing import DATASET_PATH_DICT
 16 | 
 17 | 
 18 | N_SAMPLES = {'train': 795, 'test': 654}
 19 | N_SCENE_CLASSES = 27
 20 | 
 21 | 
 22 | @pytest.mark.parametrize('split', ('train', 'test'))
 23 | @pytest.mark.parametrize('semantic_n_classes', (894, 40, 13))
 24 | @pytest.mark.parametrize('depth_mode', ('refined', 'raw'))
 25 | def test_dataset(split, semantic_n_classes, depth_mode):
 26 |     dataset = NYUv2(
 27 |         dataset_path=DATASET_PATH_DICT['nyuv2'],
 28 |         split=split,
 29 |         depth_mode=depth_mode,
 30 |         sample_keys=NYUv2.get_available_sample_keys(split),
 31 |         semantic_n_classes=semantic_n_classes
 32 |     )
 33 | 
 34 |     assert dataset.depth_mode == depth_mode
 35 |     assert dataset.split == split
 36 | 
 37 |     assert len(dataset) == N_SAMPLES[split]
 38 | 
 39 |     assert dataset.semantic_n_classes == semantic_n_classes + 1
 40 |     assert dataset.semantic_n_classes_without_void == semantic_n_classes
 41 |     assert len(dataset.semantic_class_names) == dataset.semantic_n_classes
 42 |     assert len(dataset.semantic_class_names_without_void) == dataset.semantic_n_classes_without_void
 43 |     assert len(dataset.semantic_class_colors) == dataset.semantic_n_classes
 44 |     assert len(dataset.semantic_class_colors_without_void) == dataset.semantic_n_classes_without_void
 45 | 
 46 |     assert len(dataset.scene_class_names) == N_SCENE_CLASSES
 47 |     assert len(dataset.cameras) == 1
 48 | 
 49 |     assert isinstance(dataset.depth_min, float)
 50 |     assert isinstance(dataset.depth_max, float)
 51 |     assert isinstance(dataset.depth_mean, float)
 52 |     assert isinstance(dataset.depth_std, float)
 53 |     assert isinstance(dataset.depth_stats, dict)
 54 | 
 55 |     # test first 10 samples
 56 |     for i, sample in enumerate(dataset):
 57 |         assert isinstance(sample, dict)
 58 |         assert isinstance(sample['identifier'], SampleIdentifier)
 59 |         # inputs: rgb and depth
 60 |         assert sample['rgb'].ndim == 3
 61 |         assert sample['depth'].ndim == 2
 62 |         # semantic
 63 |         assert sample['semantic'].ndim == 2
 64 |         # instance
 65 |         assert sample['instance'].ndim == 2
 66 |         # normal
 67 |         normal = sample['normal']
 68 |         assert normal.ndim == 3
 69 |         assert normal.dtype == 'float32'
 70 |         norms = np.linalg.norm(normal, ord=2, axis=-1)
 71 |         mask = norms > 1e-7    # filter invalid pixels
 72 |         assert_almost_equal(norms[mask], 1, decimal=4)
 73 |         # scene
 74 |         assert isinstance(sample['scene'], int)
 75 |         # orientation
 76 |         assert isinstance(sample['orientations'], OrientationDict)
 77 |         for key, value in sample['orientations'].items():
 78 |             # check if orientation with key exists in instance
 79 |             assert (sample['instance'] == key).sum() > 0
 80 | 
 81 |             assert isinstance(key, int)
 82 |             assert isinstance(value, float)
 83 |             # assert that the encoding is in radians
 84 |             assert 0 <= value <= 2*np.pi
 85 | 
 86 |         if i >= 9:
 87 |             break
 88 | 
 89 | 
 90 | def test_dataset_computing():
 91 |     # as NYUv2 is quite small, we additionally test some functions
 92 |     dataset = NYUv2(
 93 |         dataset_path=DATASET_PATH_DICT['nyuv2'],
 94 |         sample_keys=('rgb', 'depth', 'semantic')
 95 |     )
 96 |     weights_1 = dataset.semantic_compute_class_weights(
 97 |         'median-frequency', n_threads=1
 98 |     )
 99 |     weights_10 = dataset.semantic_compute_class_weights(
100 |         'median-frequency',
101 |         n_threads=10
102 |     )
103 |     assert np.array_equal(weights_1, weights_10)
104 |     assert np.array_equal(dataset.semantic_compute_class_weights(debug=True),
105 |                           np.ones(dataset.semantic_n_classes_without_void))
106 | 
107 |     assert dataset.depth_compute_stats(n_threads=10)
108 | 
109 | 
110 | @pytest.mark.parametrize('split', ('train', 'test'))
111 | def test_scene_class_mapping(split):
112 |     sample_keys = ('scene',)
113 | 
114 |     # create datasets
115 |     dataset_original = NYUv2(
116 |         dataset_path=DATASET_PATH_DICT['nyuv2'],
117 |         split=split,
118 |         sample_keys=sample_keys,
119 |         scene_use_indoor_domestic_labels=False
120 |     )
121 | 
122 |     dataset_remapped = NYUv2(
123 |         dataset_path=DATASET_PATH_DICT['nyuv2'],
124 |         split=split,
125 |         sample_keys=sample_keys,
126 |         scene_use_indoor_domestic_labels=True
127 |     )
128 | 
129 |     # count samples
130 |     def count(dataset):
131 |         class_names = dataset.config.scene_label_list.class_names
132 |         counts = {n: 0 for n in class_names}
133 |         for sample in dataset:
134 |             counts[class_names[sample['scene']]] += 1
135 | 
136 |         return counts
137 | 
138 |     counts_original = count(dataset_original)
139 |     counts_remapped = count(dataset_remapped)
140 | 
141 |     # perform simple some checks
142 |     assert sum(counts_remapped.values()) == N_SAMPLES[split]
143 |     assert sum(counts_remapped.values()) == sum(counts_original.values())
144 |     assert len(counts_remapped) == dataset_remapped.scene_n_classes
145 | 
146 |     assert dataset_original.scene_n_classes == dataset_original.scene_n_classes_without_void
147 |     assert dataset_remapped.scene_n_classes == dataset_remapped.scene_n_classes_without_void + 1
148 | 


--------------------------------------------------------------------------------
/tests/test_concat.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | Some common dataset tests
  4 | 
  5 | .. codeauthor:: Daniel Seichter <daniel.seichter@tu-ilmenau.de>
  6 | """
  7 | from copy import deepcopy
  8 | 
  9 | import cv2
 10 | 
 11 | import pytest
 12 | 
 13 | from nicr_scene_analysis_datasets import ConcatDataset
 14 | from nicr_scene_analysis_datasets import get_dataset_class
 15 | from nicr_scene_analysis_datasets.pytorch import ConcatDataset as ConcatDatasetPyTorch
 16 | from nicr_scene_analysis_datasets.pytorch import get_dataset_class as get_dataset_class_pytorch
 17 | from nicr_scene_analysis_datasets.utils.testing import DATASET_PATH_DICT
 18 | 
 19 | 
 20 | class SimpleDepthPreprocessor:
 21 |     def __call__(self, sample):
 22 |         sample['depth'] = cv2.resize(sample['depth'], (10, 10))
 23 |         return sample
 24 | 
 25 | 
 26 | @pytest.mark.parametrize('dataset_factory_and_class',
 27 |                          ((get_dataset_class, ConcatDataset),
 28 |                           (get_dataset_class_pytorch, ConcatDatasetPyTorch)))
 29 | def test_concatenated_dataset(dataset_factory_and_class):
 30 |     """Dataset concatenation"""
 31 |     dataset_factory, ConcatDatasetClass = dataset_factory_and_class
 32 | 
 33 |     sample_keys = ('identifier', 'depth')
 34 | 
 35 |     main_dataset = dataset_factory('sunrgbd')(
 36 |         dataset_path=DATASET_PATH_DICT['sunrgbd'],
 37 |         sample_keys=sample_keys,
 38 |         depth_force_mm=True,
 39 |         split='train',
 40 |         cameras=('kv1', 'kv2', 'xtion')   # remove realsense samples
 41 |     )
 42 | 
 43 |     dataset2 = dataset_factory('scannet')(
 44 |         dataset_path=DATASET_PATH_DICT['scannet'],
 45 |         sample_keys=sample_keys,
 46 |         split='train'
 47 |     )
 48 | 
 49 |     dataset3 = dataset_factory('nyuv2')(
 50 |         dataset_path=DATASET_PATH_DICT['nyuv2'],
 51 |         sample_keys=sample_keys,
 52 |         split='train'
 53 |     )
 54 | 
 55 |     dataset = ConcatDatasetClass(main_dataset, dataset2, dataset3)
 56 | 
 57 |     if issubclass(ConcatDatasetClass, ConcatDatasetPyTorch):
 58 |         # it is a pytorch dataset class, set a simple preprocessor
 59 |         preprocessor = SimpleDepthPreprocessor()
 60 |         dataset.preprocessor = preprocessor
 61 | 
 62 |         assert dataset.preprocessor == preprocessor
 63 |         assert main_dataset.preprocessor == preprocessor
 64 |         assert dataset2.preprocessor == preprocessor
 65 |         assert dataset3.preprocessor == preprocessor
 66 | 
 67 |     # simple tests
 68 |     n_samples_total = len(main_dataset) + len(dataset2) + len(dataset3)
 69 |     assert n_samples_total == len(main_dataset) + len(dataset2) + len(dataset3)
 70 | 
 71 |     # check that main_dataset is present
 72 |     offset = 0
 73 |     assert dataset[offset]['identifier'] == main_dataset[0]['identifier']
 74 |     assert (dataset[offset]['depth'] == main_dataset[0]['depth']).all()
 75 |     if issubclass(ConcatDatasetClass, ConcatDatasetPyTorch):
 76 |         # it is a pytorch dataset class, check that preprocessor was applied
 77 |         assert dataset[offset]['depth'].shape == (10, 10)
 78 | 
 79 |     # check that dataset2 is present
 80 |     offset += len(main_dataset)
 81 |     assert dataset[offset]['identifier'] == dataset2[0]['identifier']
 82 |     assert (dataset[offset]['depth'] == dataset2[0]['depth']).all()
 83 |     if issubclass(ConcatDatasetClass, ConcatDatasetPyTorch):
 84 |         # it is a pytorch dataset class, check that preprocessor was applied
 85 |         assert dataset[offset]['depth'].shape == (10, 10)
 86 | 
 87 |     # check that dataset3 is present
 88 |     offset += len(dataset2)
 89 |     assert dataset[offset]['identifier'] == dataset3[0]['identifier']
 90 |     assert (dataset[offset]['depth'] == dataset3[0]['depth']).all()
 91 |     if issubclass(ConcatDatasetClass, ConcatDatasetPyTorch):
 92 |         # it is a pytorch dataset class, check that preprocessor was applied
 93 |         assert dataset[offset]['depth'].shape == (10, 10)
 94 | 
 95 |     # check that negative indices work
 96 |     assert dataset[-n_samples_total]['identifier'] == dataset[0]['identifier']
 97 |     assert dataset[-1]['identifier'] == dataset3[-1]['identifier']
 98 | 
 99 |     # test with camera filter from outside
100 |     n_samples_sunrgbd = len(main_dataset)
101 |     with main_dataset.filter_camera('kv1') as ds:
102 |         n_samples_sunrgbd_kv1 = len(ds)
103 |         # concatenated dataset should change as well
104 |         assert len(dataset) == n_samples_total - n_samples_sunrgbd + n_samples_sunrgbd_kv1
105 |     # everything should be back to normal
106 |     assert len(dataset) == n_samples_total
107 |     assert len(main_dataset) == n_samples_sunrgbd
108 | 
109 |     # test with camera filter
110 |     with dataset.filter_camera('kv1') as ds:
111 |         assert len(ds) == n_samples_sunrgbd_kv1 + len(dataset3)
112 |         assert ds.camera == 'kv1'
113 |     # everything should be back to normal
114 |     assert len(dataset) == n_samples_total
115 |     assert dataset.camera is None
116 | 
117 |     # test with camera filter
118 |     with dataset.filter_camera('structureio_480x640') as ds:
119 |         assert len(ds) == len(dataset2)
120 |         assert ds.camera == 'structureio_480x640'
121 |     # everything should be back to normal
122 |     assert len(dataset) == n_samples_total
123 |     assert dataset.camera is None
124 | 
125 |     # test with camera filter without context
126 |     dataset.filter_camera('kv1')
127 |     assert len(ds) == n_samples_sunrgbd_kv1 + len(dataset3)
128 |     dataset.filter_camera(None)
129 |     # everything should be back to normal
130 |     assert len(dataset) == n_samples_total
131 | 
132 |     # test copying
133 |     dataset_copy = deepcopy(dataset)
134 |     assert id(dataset_copy._main_dataset) != id(dataset._main_dataset)
135 |     assert id(dataset_copy._additional_datasets[0]) != id(dataset._additional_datasets[0])
136 |     dataset.filter_camera('kv1')
137 |     assert len(dataset_copy) == n_samples_total
138 |     assert dataset_copy.camera is None
139 | 


--------------------------------------------------------------------------------
/src/nicr_scene_analysis_datasets/datasets/coco/dataset.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | .. codeauthor:: Soehnke Fischedick <soehnke-benedikt.fischedick@tu-ilmenau.de>
  4 | .. codeauthor:: Daniel Seichter <daniel.seichter@tu-ilmenau.de>
  5 | """
  6 | from typing import Any, Optional, Tuple
  7 | 
  8 | import os
  9 | 
 10 | import cv2
 11 | import numpy as np
 12 | 
 13 | from ...dataset_base import build_dataset_config
 14 | from ...dataset_base import DatasetConfig
 15 | from ...dataset_base import RGBDataset
 16 | from ...dataset_base import SampleIdentifier
 17 | from .coco import COCOMeta
 18 | 
 19 | 
 20 | class COCO(COCOMeta, RGBDataset):
 21 |     def __init__(
 22 |         self,
 23 |         *,
 24 |         dataset_path: Optional[str] = None,
 25 |         split: str = 'train',
 26 |         sample_keys: Tuple[str] = ('rgb', 'semantic'),
 27 |         use_cache: bool = False,
 28 |         cameras: Optional[Tuple[str]] = None,
 29 |         **kwargs: Any
 30 |     ) -> None:
 31 |         super().__init__(
 32 |             dataset_path=dataset_path,
 33 |             sample_keys=sample_keys,
 34 |             use_cache=use_cache,
 35 |             **kwargs
 36 |         )
 37 | 
 38 |         assert split in self.SPLITS
 39 |         assert all(sk in self.get_available_sample_keys(split) for sk in sample_keys)
 40 |         self._split = split
 41 | 
 42 |         if dataset_path is not None:
 43 |             # load filenames
 44 |             fp = os.path.join(self.dataset_path,
 45 |                               self.SPLIT_FILELIST_FILENAMES[self._split])
 46 |             self._filenames = list(np.loadtxt(fp, dtype=str))
 47 | 
 48 |             # COCO is comprised of images of various cameras and spatial
 49 |             # dimensions, so we do not know the actual cameras, however, in the
 50 |             # dataset class, we use the camera property to split the dataset
 51 |             # in virtual cameras with images of same spatial dimensions
 52 | 
 53 |             # get filelist for each camera
 54 |             self._filenames_per_camera = {}
 55 |             for fn in self._filenames:
 56 |                 camera = os.path.dirname(fn)
 57 |                 if camera not in self._filenames_per_camera:
 58 |                     self._filenames_per_camera[camera] = []
 59 |                 self._filenames_per_camera[camera].append(fn)
 60 | 
 61 |             available_cameras = tuple(self._filenames_per_camera.keys())
 62 | 
 63 |             if cameras is None:
 64 |                 # use all available cameras
 65 |                 self._cameras = available_cameras
 66 |             else:
 67 |                 # use subset of cameras
 68 |                 assert all(c in available_cameras for c in cameras)
 69 |                 self._cameras = cameras
 70 | 
 71 |                 # filter dict
 72 |                 for camera in list(self._filenames_per_camera.keys()):
 73 |                     if camera not in self._cameras:
 74 |                         # remove from dict
 75 |                         del self._filenames_per_camera[camera]
 76 |                 # recreate filelist
 77 |                 self._filenames = []
 78 |                 for camera, filenames in self._filenames_per_camera.items():
 79 |                     self._filenames.extend(
 80 |                         os.path.join(camera, fn) for fn in filenames
 81 |                     )
 82 |         else:
 83 |             self.debug_print("Loaded COCO dataset without files")
 84 |             self._cameras = self.CAMERAS     # single dummy camera
 85 | 
 86 |         # build config object
 87 |         self._config = build_dataset_config(
 88 |             semantic_label_list=self.SEMANTIC_LABEL_LIST,
 89 |         )
 90 | 
 91 |         # register loader functions
 92 |         self.auto_register_sample_key_loaders()
 93 | 
 94 |     @property
 95 |     def cameras(self) -> Tuple[str]:
 96 |         return self._cameras
 97 | 
 98 |     @property
 99 |     def config(self) -> DatasetConfig:
100 |         return self._config
101 | 
102 |     @property
103 |     def split(self) -> str:
104 |         return self._split
105 | 
106 |     def __len__(self) -> int:
107 |         if self.camera is None or self.CAMERAS[0] == self.camera:
108 |             return len(self._filenames)
109 |         return len(self._filenames_per_camera[self.camera])
110 | 
111 |     @staticmethod
112 |     def get_available_sample_keys(split: str) -> Tuple[str]:
113 |         return COCOMeta.SPLIT_SAMPLE_KEYS[split]
114 | 
115 |     def _get_filename(self, idx: int) -> str:
116 |         if self.camera is None or self.CAMERAS[0] == self.camera:
117 |             return self._filenames[idx]
118 |         else:
119 |             return self._filenames_per_camera[self.camera][idx]
120 | 
121 |     def _load(
122 |         self,
123 |         directory: str,
124 |         idx: int,
125 |         ext: str = '.png'
126 |     ) -> np.ndarray:
127 |         # get filename depending on current camera
128 |         filename = self._get_filename(idx)
129 |         fp = os.path.join(self.dataset_path,
130 |                           self.split,
131 |                           directory,
132 |                           f'{filename}{ext}')
133 |         img = cv2.imread(fp, cv2.IMREAD_UNCHANGED)
134 |         if img is None:
135 |             raise IOError(f"Unable to load image: '{fp}'")
136 |         if 3 == img.ndim:
137 |             img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
138 | 
139 |         return img
140 | 
141 |     def _load_rgb(self, idx) -> np.ndarray:
142 |         img = self._load(self.IMAGE_DIR, idx, '.jpg')
143 | 
144 |         # force RGB if the image is grayscale
145 |         if 2 == img.ndim:
146 |             img = img = cv2.cvtColor(img, cv2.COLOR_GRAY2RGB)
147 | 
148 |         return img
149 | 
150 |     def _load_identifier(self, idx: int) -> Tuple[str]:
151 |         # get filename depending on current camera
152 |         filename = self._get_filename(idx)
153 |         return SampleIdentifier(os.path.normpath(filename).split(os.sep))
154 | 
155 |     def _load_semantic(self, idx: int) -> np.ndarray:
156 |         return self._load(self.SEMANTIC_DIR, idx).astype('uint8')
157 | 
158 |     def _load_instance(self, idx: int) -> np.ndarray:
159 |         instance = self._load(self.INSTANCES_DIR, idx)
160 |         return instance.astype('uint16')
161 | 


--------------------------------------------------------------------------------
/tests/test_sunrgbd.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | Simple (interface) tests for SUNRGBD dataset
  4 | 
  5 | .. codeauthor:: Daniel Seichter <daniel.seichter@tu-ilmenau.de>
  6 | .. codeauthor:: Soehnke Fischedick <soehnke-benedikt.fischedick@tu-ilmenau.de>
  7 | """
  8 | import numpy as np
  9 | import pytest
 10 | 
 11 | from nicr_scene_analysis_datasets import SUNRGBD
 12 | from nicr_scene_analysis_datasets.dataset_base import ExtrinsicCameraParametersNormalized
 13 | from nicr_scene_analysis_datasets.dataset_base import IntrinsicCameraParametersNormalized
 14 | from nicr_scene_analysis_datasets.dataset_base import OrientationDict
 15 | from nicr_scene_analysis_datasets.dataset_base import SampleIdentifier
 16 | from nicr_scene_analysis_datasets.utils.testing import DATASET_PATH_DICT
 17 | 
 18 | 
 19 | N_CLASSES_WITH_VOID = 37 + 1
 20 | N_SCENE_CLASSES = 45
 21 | N_SAMPLES = {'train': 5285, 'test': 5050}
 22 | 
 23 | 
 24 | @pytest.mark.parametrize('split', ('train', 'test'))
 25 | @pytest.mark.parametrize('depth_mode', ('refined', 'raw'))
 26 | def test_dataset(split, depth_mode):
 27 |     dataset = SUNRGBD(
 28 |         dataset_path=DATASET_PATH_DICT['sunrgbd'],
 29 |         split=split,
 30 |         depth_mode=depth_mode,
 31 |         sample_keys=SUNRGBD.get_available_sample_keys(split)
 32 |     )
 33 | 
 34 |     assert dataset.depth_mode == depth_mode
 35 |     assert dataset.split == split
 36 | 
 37 |     assert len(dataset) == N_SAMPLES[split]
 38 | 
 39 |     assert dataset.semantic_n_classes == N_CLASSES_WITH_VOID
 40 |     assert dataset.semantic_n_classes_without_void == N_CLASSES_WITH_VOID - 1
 41 |     assert len(dataset.semantic_class_names) == dataset.semantic_n_classes
 42 |     assert len(dataset.semantic_class_names_without_void) == dataset.semantic_n_classes_without_void
 43 | 
 44 |     assert len(dataset.scene_class_names) == N_SCENE_CLASSES
 45 | 
 46 |     assert len(dataset.semantic_class_colors) == dataset.semantic_n_classes
 47 |     assert len(dataset.semantic_class_colors_without_void) == dataset.semantic_n_classes_without_void
 48 | 
 49 |     assert len(dataset.cameras) == 4
 50 | 
 51 |     assert isinstance(dataset.depth_min, float)
 52 |     assert isinstance(dataset.depth_max, float)
 53 |     assert isinstance(dataset.depth_mean, float)
 54 |     assert isinstance(dataset.depth_std, float)
 55 |     assert isinstance(dataset.depth_stats, dict)
 56 | 
 57 |     # test first 10 samples sample
 58 |     for i, sample in enumerate(dataset):
 59 |         assert isinstance(sample, dict)
 60 |         assert isinstance(sample['identifier'], SampleIdentifier)
 61 |         assert isinstance(sample['extrinsics'],
 62 |                           ExtrinsicCameraParametersNormalized)
 63 |         assert (3+4) == len(sample['extrinsics'])
 64 |         # inputs: rgb and depth
 65 |         assert sample['rgb'].ndim == 3
 66 |         assert isinstance(sample['rgb_intrinsics'],
 67 |                           IntrinsicCameraParametersNormalized)
 68 |         assert (2+2+6+2) == len(sample['rgb_intrinsics'])
 69 |         assert sample['depth'].ndim == 2
 70 |         assert isinstance(sample['depth_intrinsics'],
 71 |                           IntrinsicCameraParametersNormalized)
 72 |         assert (2+2+6+2+2) == len(sample['depth_intrinsics'])
 73 |         # semantic
 74 |         assert sample['semantic'].ndim == 2
 75 |         # instance
 76 |         assert sample['instance'].ndim == 2
 77 |         # scene
 78 |         assert isinstance(sample['scene'], int)
 79 |         # orientation
 80 |         assert isinstance(sample['orientations'], OrientationDict)
 81 |         for key, value in sample['orientations'].items():
 82 | 
 83 |             # check if orientation with key exists in instance
 84 |             assert (sample['instance'] == key).sum() > 0
 85 | 
 86 |             assert isinstance(key, int)
 87 |             assert isinstance(value, float)
 88 |             # Assert that the encoding is in radians
 89 |             assert 0 <= value <= 2*np.pi
 90 |         # 3d boxes
 91 |         assert isinstance(sample['3d_boxes'], list)
 92 | 
 93 |         if i >= 9:
 94 |             break
 95 | 
 96 | 
 97 | @pytest.mark.parametrize('split', ('train', 'test'))
 98 | def test_scene_class_mapping(split):
 99 |     sample_keys = ('scene',)
100 | 
101 |     # create datasets
102 |     dataset_original = SUNRGBD(
103 |         dataset_path=DATASET_PATH_DICT['sunrgbd'],
104 |         split=split,
105 |         sample_keys=sample_keys,
106 |         scene_use_indoor_domestic_labels=False
107 |     )
108 | 
109 |     dataset_remapped = SUNRGBD(
110 |         dataset_path=DATASET_PATH_DICT['sunrgbd'],
111 |         split=split,
112 |         sample_keys=sample_keys,
113 |         scene_use_indoor_domestic_labels=True
114 |     )
115 | 
116 |     # count samples
117 |     def count(dataset):
118 |         class_names = dataset.config.scene_label_list.class_names
119 |         counts = {n: 0 for n in class_names}
120 |         for sample in dataset:
121 |             counts[class_names[sample['scene']]] += 1
122 | 
123 |         return counts
124 | 
125 |     counts_original = count(dataset_original)
126 |     counts_remapped = count(dataset_remapped)
127 | 
128 |     # perform simple some checks
129 |     assert sum(counts_remapped.values()) == N_SAMPLES[split]
130 |     assert sum(counts_remapped.values()) == sum(counts_original.values())
131 |     assert len(counts_remapped) == dataset_remapped.scene_n_classes
132 | 
133 |     assert dataset_original.scene_n_classes == dataset_original.scene_n_classes_without_void
134 |     assert dataset_remapped.scene_n_classes == dataset_remapped.scene_n_classes_without_void + 1
135 | 
136 | 
137 | @pytest.mark.parametrize('split', ('train', 'test'))
138 | def test_filter_camera(split):
139 |     # just some random cameras and counts that we know
140 |     sample_cameras = {
141 |         'train': {'xtion': 1701, 'realsense': 587},
142 |         'test': {'kv2': 1860, 'kv1': 930}
143 |     }
144 | 
145 |     cameras = tuple(sample_cameras[split].keys())
146 |     n_samples = tuple(sample_cameras[split].values())
147 | 
148 |     # create dataset with specified cameras
149 |     dataset = SUNRGBD(
150 |         dataset_path=DATASET_PATH_DICT['sunrgbd'],
151 |         split=split,
152 |         sample_keys=SUNRGBD.get_available_sample_keys(split),
153 |         cameras=cameras
154 |     )
155 | 
156 |     assert dataset.cameras == cameras
157 |     assert len(dataset) == sum(n_samples)
158 | 
159 |     # test filtering
160 |     dataset.filter_camera(cameras[0])
161 |     assert dataset.camera == cameras[0]
162 |     assert len(dataset) == n_samples[0]
163 | 
164 |     # reset filtering
165 |     dataset.filter_camera(None)
166 |     assert dataset.camera is None
167 |     assert len(dataset) == sum(n_samples)
168 | 


--------------------------------------------------------------------------------
/src/nicr_scene_analysis_datasets/datasets/hypersim/README.md:
--------------------------------------------------------------------------------
  1 | # Hypersim dataset
  2 | 
  3 | For many fundamental scene understanding tasks, it is difficult or impossible
  4 | to obtain per-pixel ground truth labels from real images. We address this
  5 | challenge with Hypersim, a photorealistic synthetic dataset for holistic indoor
  6 | scene understanding. To create our dataset, we leverage a large repository of
  7 | synthetic scenes created by professional artists, and we generate 77,400 images
  8 | of 461 indoor scenes with detailed per-pixel labels and corresponding ground
  9 | truth geometry. Our dataset: (1) relies exclusively on publicly available 3D
 10 | assets; (2) includes complete scene geometry, material information, and
 11 | lighting information for every scene; (3) includes dense per-pixel semantic
 12 | instance segmentations for every image; and (4) factors every image into
 13 | diffuse reflectance, diffuse illumination, and a non-diffuse residual term
 14 | that captures view-dependent lighting effects. Together, these features make
 15 | our dataset well-suited for geometric learning problems that require direct 3D
 16 | supervision, multi-task learning problems that require reasoning jointly over
 17 | multiple input and output modalities, and inverse rendering problems.
 18 | 
 19 | For more details, see: [Hypersim](https://machinelearning.apple.com/research/hypersim)
 20 | 
 21 | ## Notes
 22 | 
 23 | > Hypersim uses non-standard perspective projection matrices (with
 24 | tilt-shift photography parameters) in most scenes. As common frameworks, such as
 25 | MIRA or ROS, do not support this projection, we convert the camera parameters if
 26 | possible or project the data/annotations back to a standard camera ignoring the
 27 | tilt-shift parameters. Note that this is not a perfect conversion and introduces
 28 | some artifacts, i.e., void pixels as we only back-project points without
 29 | contradictions. Void is assigned to ~5% of the pixels.
 30 | However, rendering full images with a standard perspective projection
 31 | requires buying the dataset meshes.
 32 | For more details, see [this issue](https://github.com/apple/ml-hypersim/issues/24).
 33 | To disable this conversion and to stick to original images, pass the
 34 | `--no-tilt-shift-conversion` parameter to the prepare script.
 35 | 
 36 | > We observed that merging semantic and instance labels in order to derive
 37 | panoptic labels might slightly change the semantic in few images. This is
 38 | because there are some pixels that belong to a thing class but are not assigned
 39 | to any instance (instance=0), e.g., in scene ai_052_001, a lamp is labeled as
 40 | lamp but is not annotated as instance. Panoptic merging assigns void for those
 41 | pixels. There is no workaround for this issue. Affected scenes:
 42 | valid: ai_023_003, ai_041_003, ai_052_001, ai_052_003 -> 1576566 pixels (0.03%);
 43 | test: ai_005_001, ai_008_005, ai_008_005, ai_022_001 -> 801359 pixels (0.01%).
 44 | Computing mIoU in [0, 1] to semantic / panoptic_semantic as ground truth
 45 | changes the result by ~0.0001-0.0002 - so it is not a big issue and negligible.
 46 | 
 47 | > We further observed that some images are not correctly annotated. There are
 48 | instances that are assigned to multiple semantic classes. While most overlaps
 49 | are with void (unlabeled textures -> void label), we observed other issues for:
 50 | ai_017_004: semantic classes 35 + 40: lamp + otherprop -> some small stuff in
 51 | the background; ai_021_008: semantic classes 12 + 35 -> kitchen counter + lamp
 52 | belong to same instance -> might be an annotation fault; ai_022_009: semantic
 53 | classes 1 + 8 -> door frame labeled as wall, but door instance contains both
 54 | the door frame and the door.
 55 | 
 56 | ## Prepare dataset
 57 | 
 58 | 1. Download and unzip dataset files:
 59 | 
 60 |     ```bash
 61 |     wget https://raw.githubusercontent.com/apple/ml-hypersim/6cbaa80207f44a312654e288cf445016c84658a1/code/python/tools/dataset_download_images.py
 62 | 
 63 |     # general usage
 64 |     python dataset_download_images.py \
 65 |         --downloads_dir /path/to/download \
 66 |         --decompress_dir /path/to/uncompressed/hypersim
 67 |     ```
 68 | 
 69 | 2. Convert dataset:
 70 | 
 71 |     ```bash
 72 |     # general usage
 73 |     nicr_sa_prepare_dataset hypersim \
 74 |         /path/where/to/store/hypersim \
 75 |         /path/to/uncompressed/hypersim \
 76 |         [--additional-subsamples N1 N2] \
 77 |         [--n-processes N]
 78 |     ```
 79 |     With arguments:
 80 |     - `--additional_subsamples`:
 81 |     For additional subsampled versions of the dataset.
 82 |     - `--n-processes`:
 83 |     Number of worker processes to spawn.
 84 |     - `--no-tilt-shift-conversion`:
 85 |     Disable projecting the data/annotations back to a standard camera ignoring the
 86 |     tilt-shift parameters (use this to create dataset compatible with < v050).
 87 | 
 88 | 3. (Optional) Generate auxiliary data:
 89 |     > **Note**: To use auxiliary data generation, the package must be installed with the `withauxiliarydata` option:
 90 |     > ```bash
 91 |     > pip install -e .[withauxiliarydata]
 92 |     > ```
 93 | 
 94 |     ```bash
 95 |     # for auxiliary data such as synthetic depth and rgb/panoptic embeddings
 96 |     nicr_sa_generate_auxiliary_data \
 97 |         --dataset hypersim \
 98 |         --dataset-path /path/to/already/prepared/hypersim/dataset \
 99 |         --auxiliary-data depth image-embedding panoptic-embedding \
100 |         --embedding-estimator-device cuda \
101 |         --embedding-estimators alpha_clip__l14-336-grit-20m \
102 |         --depth-estimator-device cuda \
103 |         --depth-estimators depthanything_v2__indoor_large \
104 |         --cache-models
105 |     ```
106 | 
107 |     With arguments:
108 |     - `--dataset-path`:
109 |         Path to the prepared Hypersim dataset.
110 |     - `--auxiliary-data`:
111 |         Types of auxiliary data to generate:
112 |         - `depth`: Generates synthetic depth images from RGB.
113 |         - `image-embedding`: Uses Alpha-CLIP to generate an embedding for the entire image.
114 |         - `panoptic-embedding`: Uses Alpha-CLIP to generate an embedding for each panoptic mask.
115 |     - `--depth-estimator-device`:
116 |         Device to use for depth estimation (`cpu` or `cuda`).
117 |     - `--depth-estimators`:
118 |         Depth estimator(s) to use. Use `depthanything_v2__indoor_large` to match DVEFormer.
119 |     - `--embedding-estimator-device`:
120 |         Device to use for embedding estimation (`cpu` or `cuda`).
121 |     - `--embedding-estimators`:
122 |         Embedding estimator(s) to use. Use `alpha_clip__l14-336-grit-20m` to match DVEFormer.
123 |     - `--cache-models`:
124 |         Cache models locally to avoid reloading them in future runs.
125 | 
126 | 
127 | 


--------------------------------------------------------------------------------
/tests/test_hypersim.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | Simple (interface) tests for Hypersim dataset
  4 | 
  5 | .. codeauthor:: Daniel Seichter <daniel.seichter@tu-ilmenau.de>
  6 | .. codeauthor:: Soehnke Fischedick <soehnke-benedikt.fischedick@tu-ilmenau.de>
  7 | """
  8 | import numpy as np
  9 | from numpy.testing import assert_almost_equal
 10 | import pytest
 11 | 
 12 | from nicr_scene_analysis_datasets import Hypersim
 13 | from nicr_scene_analysis_datasets.dataset_base import ExtrinsicCameraParametersNormalized
 14 | from nicr_scene_analysis_datasets.dataset_base import IntrinsicCameraParametersNormalized
 15 | from nicr_scene_analysis_datasets.dataset_base import OrientationDict
 16 | from nicr_scene_analysis_datasets.dataset_base import SampleIdentifier
 17 | from nicr_scene_analysis_datasets.utils.testing import DATASET_PATH_DICT
 18 | 
 19 | 
 20 | N_CLASSES_WITH_VOID = 40 + 1
 21 | N_SAMPLES = {
 22 |     None: {'train': 57443, 'valid': 7286, 'test': 7690},
 23 |     1: {'train': 57443, 'valid': 7286, 'test': 7690},
 24 |     2: {'train': 28722, 'valid': 3643, 'test': 3845},
 25 |     5: {'train': 11489, 'valid': 1458, 'test': 1538},
 26 |     10: {'train': 5745, 'valid': 729, 'test': 769},
 27 |     20: {'train': 2873, 'valid': 365, 'test': 385}
 28 | }
 29 | N_SCENE_CLASSES = 22
 30 | 
 31 | 
 32 | @pytest.mark.parametrize('split', ('train', 'valid', 'test'))
 33 | @pytest.mark.parametrize('depth_mode', ('raw', ))
 34 | @pytest.mark.parametrize('subsample', (None, 1, 2, 5, 10, 20))
 35 | @pytest.mark.parametrize('orientations_use', (True, False))
 36 | def test_dataset(split, depth_mode, subsample, orientations_use):
 37 |     dataset = Hypersim(
 38 |         dataset_path=DATASET_PATH_DICT['hypersim'],
 39 |         split=split,
 40 |         subsample=subsample,
 41 |         sample_keys=Hypersim.get_available_sample_keys(split),
 42 |         depth_mode=depth_mode,
 43 |         orientations_use=orientations_use,
 44 |     )
 45 | 
 46 |     assert dataset.depth_mode == depth_mode
 47 |     assert dataset.split == split
 48 | 
 49 |     assert len(dataset) == N_SAMPLES[subsample][split]
 50 | 
 51 |     assert dataset.semantic_n_classes == N_CLASSES_WITH_VOID
 52 |     assert dataset.semantic_n_classes_without_void == N_CLASSES_WITH_VOID - 1
 53 |     assert len(dataset.semantic_class_names) == dataset.semantic_n_classes
 54 |     assert len(dataset.semantic_class_names_without_void) == dataset.semantic_n_classes_without_void
 55 |     assert len(dataset.semantic_class_colors) == dataset.semantic_n_classes
 56 |     assert len(dataset.semantic_class_colors_without_void) == dataset.semantic_n_classes_without_void
 57 |     assert len(dataset.scene_class_names) == N_SCENE_CLASSES
 58 |     assert len(dataset.cameras) == 1
 59 | 
 60 |     assert isinstance(dataset.depth_min, float)
 61 |     assert isinstance(dataset.depth_max, float)
 62 |     assert isinstance(dataset.depth_mean, float)
 63 |     assert isinstance(dataset.depth_std, float)
 64 |     assert isinstance(dataset.depth_stats, dict)
 65 | 
 66 |     # test first 10 samples
 67 |     for i, sample in enumerate(dataset):
 68 |         assert isinstance(sample, dict)
 69 |         assert isinstance(sample['identifier'], SampleIdentifier)
 70 |         assert isinstance(sample['extrinsics'],
 71 |                           ExtrinsicCameraParametersNormalized)
 72 |         assert (3+4) == len(sample['extrinsics'])
 73 |         # inputs: rgb and depth
 74 |         assert sample['rgb'].ndim == 3
 75 |         assert isinstance(sample['rgb_intrinsics'],
 76 |                           IntrinsicCameraParametersNormalized)
 77 |         assert (2+2+6+2) == len(sample['rgb_intrinsics'])
 78 |         assert sample['depth'].ndim == 2
 79 |         assert isinstance(sample['depth_intrinsics'],
 80 |                           IntrinsicCameraParametersNormalized)
 81 |         assert (2+2+6+2+2) == len(sample['depth_intrinsics'])
 82 |         # semantic
 83 |         assert sample['semantic'].ndim == 2
 84 |         # instance
 85 |         assert sample['instance'].ndim == 2
 86 |         # normal
 87 |         normal = sample['normal']
 88 |         assert normal.ndim == 3
 89 |         assert normal.dtype == 'float32'
 90 |         norms = np.linalg.norm(normal, ord=2, axis=-1)
 91 |         mask = norms > 1e-7    # filter invalid pixels
 92 |         assert_almost_equal(norms[mask], 1, decimal=4)
 93 |         # scene
 94 |         assert isinstance(sample['scene'], int)
 95 |         # orientation
 96 |         assert isinstance(sample['orientations'], OrientationDict)
 97 |         for key, value in sample['orientations'].items():
 98 |             # Check if orientation with key exists in instance
 99 |             assert (sample['instance'] == key).sum() > 0
100 |             assert isinstance(key, int)
101 |             assert isinstance(value, float)
102 |             # assert that the encoding is in radians
103 |             assert 0 <= value <= 2*np.pi
104 |         # 3d boxes
105 |         assert isinstance(sample['3d_boxes'], dict)
106 | 
107 |         # verify that every instance has an orientation
108 |         for instance_id in np.unique(sample['instance']):
109 |             # void
110 |             if instance_id == 0:
111 |                 continue
112 |             if orientations_use:
113 |                 assert (instance_id in sample['orientations'])
114 | 
115 |         if i >= 9:
116 |             break
117 | 
118 | 
119 | @pytest.mark.parametrize('split', ('train', 'valid', 'test'))
120 | def test_scene_class_mapping(split):
121 |     sample_keys = ('scene',)
122 | 
123 |     # create datasets
124 |     dataset_original = Hypersim(
125 |         dataset_path=DATASET_PATH_DICT['hypersim'],
126 |         split=split,
127 |         sample_keys=sample_keys,
128 |         scene_use_indoor_domestic_labels=False
129 |     )
130 | 
131 |     dataset_remapped = Hypersim(
132 |         dataset_path=DATASET_PATH_DICT['hypersim'],
133 |         split=split,
134 |         sample_keys=sample_keys,
135 |         scene_use_indoor_domestic_labels=True
136 |     )
137 | 
138 |     # count samples
139 |     def count(dataset):
140 |         class_names = dataset.config.scene_label_list.class_names
141 |         counts = {n: 0 for n in class_names}
142 |         for sample in dataset:
143 |             counts[class_names[sample['scene']]] += 1
144 | 
145 |         return counts
146 | 
147 |     counts_original = count(dataset_original)
148 |     counts_remapped = count(dataset_remapped)
149 | 
150 |     # perform simple some checks
151 |     assert sum(counts_remapped.values()) == N_SAMPLES[None][split]
152 |     assert sum(counts_remapped.values()) == sum(counts_original.values())
153 |     assert len(counts_remapped) == dataset_remapped.scene_n_classes
154 | 
155 |     assert dataset_original.scene_n_classes == dataset_original.scene_n_classes_without_void
156 |     assert dataset_remapped.scene_n_classes == dataset_remapped.scene_n_classes_without_void + 1
157 | 


--------------------------------------------------------------------------------
/src/nicr_scene_analysis_datasets/datasets/ade20k/_class_mappings.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | .. codeauthor:: Daniel Seichter <daniel.seichter@tu-ilmenau.de>
  4 | 
  5 | Mapping tables to convert semantic classes in ADE20K.
  6 | 
  7 | Based on:
  8 |     - [1] https://github.com/CSAILVision/sceneparsing/blob/master/convertFromADE/mapFromADE.txt
  9 |     - [2] https://github.com/CSAILVision/ADE20K/blob/main/utils/ade20k_instance_catid_mapping.txt
 10 | """
 11 | 
 12 | # -----------------------------------------------------------------------------
 13 | # mapping from 2021 full ADE20K to 150 classes used in 2016 scene parsing
 14 | # challenge (from [1])
 15 | MAPPING_FULL_ADE20K_TO_SCENE_PARSE_150 = {
 16 |     2978: 1,
 17 |     312: 2,
 18 |     2420: 3,
 19 |     976: 4,
 20 |     2855: 5,
 21 |     447: 6,
 22 |     2131: 7,
 23 |     165: 8,
 24 |     3055: 9,
 25 |     1125: 10,
 26 |     350: 11,
 27 |     2377: 12,
 28 |     1831: 13,
 29 |     838: 14,
 30 |     774: 15,
 31 |     783: 15,
 32 |     2684: 16,
 33 |     1610: 17,
 34 |     1910: 18,
 35 |     687: 19,
 36 |     471: 20,
 37 |     401: 21,
 38 |     2994: 22,
 39 |     1735: 23,
 40 |     2473: 24,
 41 |     2329: 25,
 42 |     1276: 26,
 43 |     2264: 27,
 44 |     1564: 28,
 45 |     2178: 29,
 46 |     913: 30,
 47 |     57: 31,
 48 |     2272: 32,
 49 |     907: 33,
 50 |     724: 34,
 51 |     2138: 35,
 52 |     2985: 36,
 53 |     533: 36,
 54 |     1395: 37,
 55 |     155: 38,
 56 |     2053: 39,
 57 |     689: 40,
 58 |     137: 41,
 59 |     266: 42,
 60 |     581: 43,
 61 |     2380: 44,
 62 |     491: 45,
 63 |     627: 46,
 64 |     2212: 47,
 65 |     2388: 48,
 66 |     2423: 49,
 67 |     943: 50,
 68 |     2096: 51,
 69 |     1121: 52,
 70 |     1788: 53,
 71 |     2530: 54,
 72 |     2185: 55,
 73 |     420: 56,
 74 |     1948: 57,
 75 |     1869: 58,
 76 |     2251: 59,
 77 |     2531: 60,
 78 |     2128: 61,
 79 |     294: 62,
 80 |     239: 63,
 81 |     212: 64,
 82 |     571: 65,
 83 |     2793: 66,
 84 |     978: 67,
 85 |     236: 68,
 86 |     1240: 69,
 87 |     181: 70,
 88 |     629: 71,
 89 |     2598: 72,
 90 |     1744: 73,
 91 |     1374: 74,
 92 |     591: 75,
 93 |     2679: 76,
 94 |     223: 77,
 95 |     123: 78,
 96 |     47: 79,
 97 |     1282: 80,
 98 |     327: 81,
 99 |     2821: 82,
100 |     1451: 83,
101 |     2880: 84,
102 |     2828: 85,
103 |     480: 86,
104 |     77: 87,
105 |     2616: 88,
106 |     246: 89,
107 |     247: 89,
108 |     2733: 90,
109 |     14: 91,
110 |     738: 92,
111 |     38: 93,
112 |     1936: 94,
113 |     1401: 95,
114 |     120: 96,
115 |     868: 97,
116 |     1702: 98,
117 |     249: 99,
118 |     308: 100,
119 |     1969: 101,
120 |     2526: 102,
121 |     2928: 103,
122 |     2337: 104,
123 |     1023: 105,
124 |     609: 106,
125 |     389: 107,
126 |     2989: 108,
127 |     1930: 109,
128 |     2668: 110,
129 |     2586: 111,
130 |     131: 112,
131 |     146: 113,
132 |     3016: 114,
133 |     2739: 115,
134 |     95: 116,
135 |     1563: 117,
136 |     642: 118,
137 |     1708: 119,
138 |     103: 120,
139 |     1002: 121,
140 |     2569: 122,
141 |     2704: 123,
142 |     2833: 124,
143 |     1551: 125,
144 |     1981: 126,
145 |     29: 127,
146 |     187: 128,
147 |     1393: 129,
148 |     747: 130,
149 |     2254: 131,
150 |     206: 132,
151 |     2262: 133,
152 |     1260: 134,
153 |     2243: 135,
154 |     2932: 136,
155 |     2836: 137,
156 |     2850: 138,
157 |     64: 139,
158 |     894: 140,
159 |     1858: 141,
160 |     3109: 142,
161 |     1919: 143,
162 |     1583: 144,
163 |     318: 145,
164 |     2356: 146,
165 |     2046: 147,
166 |     1098: 148,
167 |     530: 149,
168 |     954: 150
169 | }
170 | # double mapping for 15, 36, and 89, thus, vice versa is not possible
171 | assert len(MAPPING_FULL_ADE20K_TO_SCENE_PARSE_150) == 153
172 | 
173 | # -----------------------------------------------------------------------------
174 | # mapping from 2021 full ADE20K to 100 classes used in the instance part of the
175 | # 2017 places challenge (from [2])
176 | MAPPING_FULL_ADE20K_TO_INSTANCE_100 = {
177 |     165: 1,
178 |     3055: 2,
179 |     350: 3,
180 |     1831: 4,
181 |     774: 5,
182 |     783: 5,
183 |     2684: 6,
184 |     687: 7,
185 |     471: 8,
186 |     401: 9,
187 |     1735: 10,
188 |     2473: 11,
189 |     2329: 12,
190 |     1564: 13,
191 |     57: 14,
192 |     2272: 15,
193 |     907: 16,
194 |     724: 17,
195 |     2985: 18,
196 |     533: 18,
197 |     1395: 19,
198 |     155: 20,
199 |     2053: 21,
200 |     689: 22,
201 |     266: 23,
202 |     581: 24,
203 |     2380: 25,
204 |     491: 26,
205 |     627: 27,
206 |     2388: 28,
207 |     943: 29,
208 |     2096: 30,
209 |     2530: 31,
210 |     420: 32,
211 |     1948: 33,
212 |     1869: 34,
213 |     2251: 35,
214 |     239: 36,
215 |     571: 37,
216 |     2793: 38,
217 |     978: 39,
218 |     236: 40,
219 |     181: 41,
220 |     629: 42,
221 |     2598: 43,
222 |     1744: 44,
223 |     1374: 45,
224 |     591: 46,
225 |     2679: 47,
226 |     223: 48,
227 |     47: 49,
228 |     327: 50,
229 |     2821: 51,
230 |     1451: 52,
231 |     2880: 53,
232 |     480: 54,
233 |     77: 55,
234 |     2616: 56,
235 |     246: 57,
236 |     247: 57,
237 |     2733: 58,
238 |     14: 59,
239 |     38: 60,
240 |     1936: 61,
241 |     120: 62,
242 |     1702: 63,
243 |     249: 64,
244 |     2928: 65,
245 |     2337: 66,
246 |     1023: 67,
247 |     2989: 68,
248 |     1930: 69,
249 |     2586: 70,
250 |     131: 71,
251 |     146: 72,
252 |     95: 73,
253 |     1563: 74,
254 |     1708: 75,
255 |     103: 76,
256 |     1002: 77,
257 |     2569: 78,
258 |     2833: 79,
259 |     1551: 80,
260 |     1981: 81,
261 |     29: 82,
262 |     187: 83,
263 |     747: 84,
264 |     2254: 85,
265 |     2262: 86,
266 |     1260: 87,
267 |     2243: 88,
268 |     2932: 89,
269 |     2836: 90,
270 |     2850: 91,
271 |     64: 92,
272 |     894: 93,
273 |     1919: 94,
274 |     1583: 95,
275 |     318: 96,
276 |     2046: 97,
277 |     1098: 98,
278 |     530: 99,
279 |     954: 100
280 | }
281 | # double mapping for 5, 18, and 57, thus, vice versa is not possible
282 | assert len(MAPPING_FULL_ADE20K_TO_INSTANCE_100) == 103
283 | 
284 | # -----------------------------------------------------------------------------
285 | # mapping from 150 classes used in 2016 scene parsing challenge to 100 classes
286 | # used in the instance part of the 2017 places challenge and vice versa
287 | # (from [2])
288 | MAPPING_SCENE_PARSE_150_TO_INSTANCE_100 = {
289 |     MAPPING_FULL_ADE20K_TO_SCENE_PARSE_150[k]: v
290 |     for k, v in MAPPING_FULL_ADE20K_TO_INSTANCE_100.items()
291 | }
292 | 
293 | # no multiple mappings, as the 100 classes are a subset of the 150 classes
294 | assert len(MAPPING_SCENE_PARSE_150_TO_INSTANCE_100) == 100
295 | 
296 | # vice versa
297 | MAPPING_INSTANCE_100_TO_SCENE_PARSE_150 = {
298 |     v: k
299 |     for k, v in MAPPING_SCENE_PARSE_150_TO_INSTANCE_100.items()
300 | }
301 | 


--------------------------------------------------------------------------------
/src/nicr_scene_analysis_datasets/utils/_colormaps.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | .. codeauthor:: Daniel Seichter <daniel.seichter@tu-ilmenau.de>
  4 | """
  5 | import numpy as np
  6 | 
  7 | # see MIRA/toolboxes/ColorMaps/include/SemanticColormaps.h
  8 | _COLORMAP_VISUALLY_DISTINCT_256_PLUS_1 = (
  9 |     (0, 0, 0),
 10 |     (22, 254, 25),
 11 |     (255, 0, 255),
 12 |     (0, 127, 255),
 13 |     (255, 127, 0),
 14 |     (127, 63, 127),
 15 |     (88, 251, 192),
 16 |     (194, 249, 49),
 17 |     (56, 1, 254),
 18 |     (239, 127, 216),
 19 |     (251, 2, 45),
 20 |     (0, 127, 0),
 21 |     (12, 156, 128),
 22 |     (0, 0, 127),
 23 |     (131, 155, 48),
 24 |     (134, 24, 6),
 25 |     (131, 86, 253),
 26 |     (133, 163, 180),
 27 |     (248, 185, 115),
 28 |     (233, 81, 107),
 29 |     (159, 0, 198),
 30 |     (21, 78, 82),
 31 |     (0, 255, 127),
 32 |     (0, 255, 255),
 33 |     (18, 61, 193),
 34 |     (186, 242, 160),
 35 |     (100, 236, 96),
 36 |     (157, 219, 251),
 37 |     (53, 191, 239),
 38 |     (240, 6, 153),
 39 |     (179, 93, 22),
 40 |     (113, 229, 4),
 41 |     (78, 83, 11),
 42 |     (253, 205, 8),
 43 |     (12, 183, 50),
 44 |     (90, 2, 90),
 45 |     (213, 65, 250),
 46 |     (176, 8, 89),
 47 |     (187, 141, 113),
 48 |     (72, 113, 174),
 49 |     (237, 200, 207),
 50 |     (254, 252, 107),
 51 |     (72, 6, 175),
 52 |     (175, 97, 177),
 53 |     (175, 150, 251),
 54 |     (250, 59, 0),
 55 |     (94, 176, 114),
 56 |     (12, 205, 178),
 57 |     (90, 115, 95),
 58 |     (194, 166, 1),
 59 |     (66, 164, 1),
 60 |     (2, 61, 12),
 61 |     (84, 133, 244),
 62 |     (168, 198, 78),
 63 |     (9, 20, 61),
 64 |     (104, 58, 195),
 65 |     (250, 59, 177),
 66 |     (57, 82, 247),
 67 |     (188, 41, 160),
 68 |     (0, 140, 191),
 69 |     (134, 27, 253),
 70 |     (196, 39, 29),
 71 |     (23, 130, 62),
 72 |     (33, 208, 105),
 73 |     (127, 214, 156),
 74 |     (250, 134, 77),
 75 |     (50, 43, 129),
 76 |     (2, 101, 145),
 77 |     (2, 5, 205),
 78 |     (85, 53, 72),
 79 |     (245, 248, 180),
 80 |     (60, 28, 8),
 81 |     (193, 189, 157),
 82 |     (116, 180, 248),
 83 |     (84, 197, 55),
 84 |     (173, 66, 83),
 85 |     (127, 2, 143),
 86 |     (3, 42, 251),
 87 |     (68, 172, 175),
 88 |     (251, 253, 30),
 89 |     (71, 253, 252),
 90 |     (240, 137, 147),
 91 |     (132, 122, 143),
 92 |     (8, 254, 194),
 93 |     (199, 254, 233),
 94 |     (123, 121, 3),
 95 |     (199, 15, 246),
 96 |     (7, 210, 3),
 97 |     (173, 212, 9),
 98 |     (251, 35, 102),
 99 |     (150, 112, 68),
100 |     (40, 254, 85),
101 |     (244, 168, 253),
102 |     (192, 142, 183),
103 |     (63, 246, 141),
104 |     (196, 137, 50),
105 |     (167, 253, 104),
106 |     (222, 183, 58),
107 |     (215, 221, 91),
108 |     (124, 110, 203),
109 |     (136, 70, 41),
110 |     (233, 96, 46),
111 |     (168, 61, 218),
112 |     (137, 252, 51),
113 |     (180, 184, 215),
114 |     (80, 126, 41),
115 |     (80, 248, 43),
116 |     (183, 104, 253),
117 |     (129, 254, 232),
118 |     (130, 184, 6),
119 |     (103, 206, 204),
120 |     (131, 30, 63),
121 |     (75, 83, 128),
122 |     (230, 98, 171),
123 |     (1, 99, 216),
124 |     (1, 167, 235),
125 |     (213, 1, 5),
126 |     (58, 41, 234),
127 |     (104, 1, 220),
128 |     (173, 98, 120),
129 |     (254, 94, 249),
130 |     (1, 225, 64),
131 |     (208, 208, 253),
132 |     (146, 179, 132),
133 |     (161, 252, 2),
134 |     (0, 60, 145),
135 |     (0, 0, 255),
136 |     (61, 0, 46),
137 |     (4, 214, 226),
138 |     (42, 169, 89),
139 |     (63, 138, 130),
140 |     (165, 0, 38),
141 |     (236, 23, 207),
142 |     (27, 95, 35),
143 |     (69, 213, 168),
144 |     (140, 249, 181),
145 |     (66, 207, 4),
146 |     (1, 41, 102),
147 |     (130, 144, 100),
148 |     (236, 51, 54),
149 |     (188, 3, 143),
150 |     (236, 221, 140),
151 |     (16, 24, 165),
152 |     (133, 128, 254),
153 |     (108, 223, 254),
154 |     (54, 142, 205),
155 |     (56, 225, 212),
156 |     (209, 121, 2),
157 |     (250, 165, 12),
158 |     (252, 172, 170),
159 |     (37, 48, 43),
160 |     (170, 214, 125),
161 |     (12, 166, 5),
162 |     (139, 39, 169),
163 |     (204, 39, 104),
164 |     (212, 248, 1),
165 |     (52, 119, 1),
166 |     (166, 217, 193),
167 |     (225, 2, 102),
168 |     (23, 115, 105),
169 |     (202, 178, 108),
170 |     (72, 90, 58),
171 |     (113, 254, 137),
172 |     (114, 87, 166),
173 |     (252, 219, 61),
174 |     (162, 56, 2),
175 |     (217, 84, 0),
176 |     (207, 110, 87),
177 |     (58, 4, 131),
178 |     (86, 151, 79),
179 |     (145, 30, 113),
180 |     (96, 2, 10),
181 |     (137, 212, 41),
182 |     (65, 253, 0),
183 |     (97, 34, 141),
184 |     (61, 66, 165),
185 |     (39, 186, 144),
186 |     (254, 49, 253),
187 |     (56, 165, 47),
188 |     (117, 85, 85),
189 |     (130, 178, 84),
190 |     (213, 252, 123),
191 |     (149, 150, 5),
192 |     (48, 23, 91),
193 |     (1, 216, 138),
194 |     (98, 49, 15),
195 |     (101, 156, 212),
196 |     (214, 218, 31),
197 |     (69, 82, 203),
198 |     (97, 52, 254),
199 |     (42, 116, 239),
200 |     (216, 94, 220),
201 |     (166, 123, 218),
202 |     (41, 150, 250),
203 |     (251, 208, 254),
204 |     (222, 167, 205),
205 |     (211, 232, 198),
206 |     (214, 61, 201),
207 |     (26, 170, 195),
208 |     (45, 223, 38),
209 |     (39, 224, 252),
210 |     (174, 169, 44),
211 |     (207, 114, 137),
212 |     (100, 141, 170),
213 |     (1, 80, 251),
214 |     (139, 86, 2),
215 |     (196, 71, 133),
216 |     (208, 4, 59),
217 |     (253, 18, 0),
218 |     (192, 23, 200),
219 |     (76, 210, 124),
220 |     (57, 36, 192),
221 |     (162, 149, 150),
222 |     (252, 106, 131),
223 |     (109, 101, 36),
224 |     (118, 207, 114),
225 |     (30, 129, 160),
226 |     (120, 0, 44),
227 |     (145, 226, 81),
228 |     (178, 43, 254),
229 |     (62, 222, 78),
230 |     (0, 151, 90),
231 |     (25, 100, 181),
232 |     (218, 164, 151),
233 |     (214, 136, 249),
234 |     (227, 40, 145),
235 |     (1, 147, 35),
236 |     (29, 81, 121),
237 |     (164, 145, 74),
238 |     (11, 28, 20),
239 |     (154, 71, 159),
240 |     (57, 2, 213),
241 |     (119, 9, 184),
242 |     (34, 251, 230),
243 |     (2, 184, 110),
244 |     (203, 174, 254),
245 |     (206, 76, 62),
246 |     (224, 149, 106),
247 |     (141, 189, 204),
248 |     (161, 2, 254),
249 |     (135, 39, 213),
250 |     (104, 185, 155),
251 |     (91, 102, 245),
252 |     (109, 154, 12),
253 |     (214, 196, 0),
254 |     (30, 235, 165),
255 |     (106, 34, 96),
256 |     (233, 139, 34),
257 |     (252, 228, 218),
258 |     (139, 76, 198),
259 |     (231, 251, 68),
260 |     (162, 123, 30),
261 |     (11, 0, 93),
262 |     (78, 29, 47),
263 |     (0, 64, 52),
264 |     (156, 155, 212),
265 |     (151, 245, 141)
266 | )
267 | 
268 | COLORMAP_VISUALLY_DISTINCT_VOID_PLUS_256 = \
269 |     np.array(_COLORMAP_VISUALLY_DISTINCT_256_PLUS_1, dtype='uint8')
270 | 
271 | COLORMAP_VISUALLY_DISTINCT_256 = \
272 |     np.array(_COLORMAP_VISUALLY_DISTINCT_256_PLUS_1[1:], dtype='uint8')
273 | 


--------------------------------------------------------------------------------
/tests/test_scannet.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | Simple (interface) tests for ScanNet dataset
  4 | 
  5 | .. codeauthor:: Daniel Seichter <daniel.seichter@tu-ilmenau.de>
  6 | """
  7 | import pytest
  8 | 
  9 | from nicr_scene_analysis_datasets import ScanNet
 10 | from nicr_scene_analysis_datasets.dataset_base import ExtrinsicCameraParametersNormalized
 11 | from nicr_scene_analysis_datasets.dataset_base import IntrinsicCameraParametersNormalized
 12 | from nicr_scene_analysis_datasets.dataset_base import SampleIdentifier
 13 | from nicr_scene_analysis_datasets.utils.testing import DATASET_PATH_DICT
 14 | 
 15 | 
 16 | N_SAMPLES = {    # subsample is applied to each trajectory in a scene (folder)
 17 |     None: {'train': 1893422, 'valid': 530449, 'test': 208862},  # not used so far
 18 |     1: {'train': 1893422, 'valid': 530449, 'test': 208862},  # not used so far
 19 |     5: {'train': 379221, 'valid': 106217, 'test': 41827},    # used for mapping
 20 |     10: {'train': 189916, 'valid': 53193, 'test': 20942},    # used for mapping
 21 |     50: {'train': 38474, 'valid': 10767, 'test': 4223},  # default subsample !
 22 |     100: {'train': 19559, 'valid': 5465, 'test': 2135},
 23 |     200: {'train': 10098, 'valid': 2814, 'test': 1089},
 24 |     500: {'train': 4403, 'valid': 1222, 'test': 468}
 25 | }
 26 | 
 27 | N_SCENE_CLASSES = 21
 28 | 
 29 | 
 30 | @pytest.mark.parametrize('split', ('train', 'valid', 'test'))
 31 | @pytest.mark.parametrize('subsample', (50, 100, 200, 500))
 32 | @pytest.mark.parametrize('semantic_n_classes', (20, 40, 200, 549))
 33 | @pytest.mark.parametrize('instance_semantic_mode', ('raw', 'refined'))
 34 | def test_dataset(split, subsample, semantic_n_classes, instance_semantic_mode):
 35 |     dataset = ScanNet(
 36 |         dataset_path=DATASET_PATH_DICT['scannet'],
 37 |         split=split,
 38 |         subsample=subsample,
 39 |         depth_mode='raw',
 40 |         sample_keys=ScanNet.get_available_sample_keys(split),
 41 |         semantic_n_classes=semantic_n_classes,
 42 |         instance_semantic_mode=instance_semantic_mode
 43 |     )
 44 | 
 45 |     assert dataset.split == split
 46 | 
 47 |     assert len(dataset) == N_SAMPLES[subsample][split]
 48 | 
 49 |     assert dataset.semantic_n_classes == semantic_n_classes + 1
 50 |     assert dataset.semantic_n_classes_without_void == semantic_n_classes
 51 |     assert len(dataset.semantic_class_names) == dataset.semantic_n_classes
 52 |     assert len(dataset.semantic_class_names_without_void) == dataset.semantic_n_classes_without_void
 53 |     assert len(dataset.semantic_class_colors) == dataset.semantic_n_classes
 54 |     assert len(dataset.semantic_class_colors_without_void) == dataset.semantic_n_classes_without_void
 55 | 
 56 |     assert len(dataset.scene_class_names) == N_SCENE_CLASSES
 57 |     assert len(dataset.cameras) == 2
 58 | 
 59 |     assert isinstance(dataset.depth_min, float)
 60 |     assert isinstance(dataset.depth_max, float)
 61 |     assert isinstance(dataset.depth_mean, float)
 62 |     assert isinstance(dataset.depth_std, float)
 63 |     assert isinstance(dataset.depth_stats, dict)
 64 | 
 65 |     # test first 10 samples
 66 |     for i, sample in enumerate(dataset):
 67 |         assert isinstance(sample, dict)
 68 |         assert isinstance(sample['identifier'], SampleIdentifier)
 69 |         assert isinstance(sample['extrinsics'],
 70 |                           ExtrinsicCameraParametersNormalized)
 71 |         assert (3+4) == len(sample['extrinsics'])
 72 |         # inputs: rgb and depth
 73 |         assert sample['rgb'].ndim == 3
 74 |         assert isinstance(sample['rgb_intrinsics'],
 75 |                           IntrinsicCameraParametersNormalized)
 76 |         assert (2+2+6+2) == len(sample['rgb_intrinsics'])
 77 |         assert sample['depth'].ndim == 2
 78 |         assert isinstance(sample['depth_intrinsics'],
 79 |                           IntrinsicCameraParametersNormalized)
 80 |         assert (2+2+6+2+2) == len(sample['depth_intrinsics'])
 81 | 
 82 |         if 'test' != split:
 83 |             # semantic
 84 |             assert sample['semantic'].ndim == 2
 85 |             # instance
 86 |             assert sample['instance'].ndim == 2
 87 |             # scene
 88 |             assert isinstance(sample['scene'], int)
 89 | 
 90 |         if i >= 9:
 91 |             break
 92 | 
 93 | 
 94 | @pytest.mark.parametrize('split', ('train', 'valid'))
 95 | def test_scene_class_mapping(split):
 96 |     sample_keys = ('scene',)
 97 | 
 98 |     # create datasets (with default subsample!)
 99 |     dataset_original = ScanNet(
100 |         dataset_path=DATASET_PATH_DICT['scannet'],
101 |         split=split,
102 |         sample_keys=sample_keys,
103 |         scene_use_indoor_domestic_labels=False
104 |     )
105 | 
106 |     dataset_remapped = ScanNet(
107 |         dataset_path=DATASET_PATH_DICT['scannet'],
108 |         split=split,
109 |         sample_keys=sample_keys,
110 |         scene_use_indoor_domestic_labels=True
111 |     )
112 | 
113 |     # count samples
114 |     def count(dataset):
115 |         class_names = dataset.config.scene_label_list.class_names
116 |         counts = {n: 0 for n in class_names}
117 |         for sample in dataset:
118 |             counts[class_names[sample['scene']]] += 1
119 | 
120 |         return counts
121 | 
122 |     counts_original = count(dataset_original)
123 |     counts_remapped = count(dataset_remapped)
124 | 
125 |     # perform simple some checks
126 |     assert sum(counts_remapped.values()) == N_SAMPLES[50][split]
127 |     assert sum(counts_remapped.values()) == sum(counts_original.values())
128 |     assert len(counts_remapped) == dataset_remapped.scene_n_classes
129 | 
130 |     assert dataset_original.scene_n_classes == dataset_original.scene_n_classes_without_void
131 |     assert dataset_remapped.scene_n_classes == dataset_remapped.scene_n_classes_without_void + 1
132 | 
133 | 
134 | @pytest.mark.parametrize('split', ('train', 'valid', 'test'))
135 | def test_filter_camera(split):
136 |     # just some random cameras and counts that we know
137 |     sample_cameras = {   # for default subsample of 50
138 |         'train': {
139 |             'structureio_480x640': 688
140 |         },
141 |         'valid': {
142 |             'structureio_968x1296': 10492,
143 |         },
144 |         'test': {'structureio_968x1296': 4223},
145 |     }
146 | 
147 |     cameras = tuple(sample_cameras[split].keys())
148 |     n_samples = tuple(sample_cameras[split].values())
149 | 
150 |     # create dataset with specified cameras
151 |     dataset = ScanNet(
152 |         dataset_path=DATASET_PATH_DICT['scannet'],
153 |         split=split,
154 |         sample_keys=ScanNet.get_available_sample_keys(split),
155 |         cameras=cameras
156 |     )
157 | 
158 |     assert dataset.cameras == cameras
159 |     assert len(dataset) == sum(n_samples)
160 | 
161 |     # test filtering
162 |     dataset.filter_camera(cameras[0])
163 |     assert dataset.camera == cameras[0]
164 |     assert len(dataset) == n_samples[0]
165 | 
166 |     # reset filtering
167 |     dataset.filter_camera(None)
168 |     assert dataset.camera is None
169 |     assert len(dataset) == sum(n_samples)
170 | 


--------------------------------------------------------------------------------