├── .gitignore
├── .gitmodules
├── .vscode
    └── settings.json
├── LICENSE
├── README.md
├── datasets
    └── .gitkeep
├── evaluate.py
├── predict.py
├── semantic_mapping.yaml
├── tox.ini
├── trained_models
    └── .gitkeep
└── utils.py


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | parts/
 18 | sdist/
 19 | var/
 20 | wheels/
 21 | *.egg-info/
 22 | .installed.cfg
 23 | *.egg
 24 | 
 25 | # PyInstaller
 26 | #  Usually these files are written by a python script from a template
 27 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 28 | *.manifest
 29 | *.spec
 30 | 
 31 | # Installer logs
 32 | pip-log.txt
 33 | pip-delete-this-directory.txt
 34 | 
 35 | # Unit test / coverage reports
 36 | htmlcov/
 37 | .tox/
 38 | .coverage
 39 | .coverage.*
 40 | .cache
 41 | nosetests.xml
 42 | coverage.xml
 43 | *.cover
 44 | .hypothesis/
 45 | 
 46 | # Translations
 47 | *.mo
 48 | *.pot
 49 | 
 50 | # Django stuff:
 51 | *.log
 52 | local_settings.py
 53 | 
 54 | # Flask stuff:
 55 | instance/
 56 | .webassets-cache
 57 | 
 58 | # Scrapy stuff:
 59 | .scrapy
 60 | 
 61 | # Sphinx documentation
 62 | docs/_build/
 63 | 
 64 | # PyBuilder
 65 | target/
 66 | 
 67 | # Jupyter Notebook
 68 | .ipynb_checkpoints
 69 | 
 70 | # pyenv
 71 | .python-version
 72 | 
 73 | # celery beat schedule file
 74 | celerybeat-schedule
 75 | 
 76 | # SageMath parsed files
 77 | *.sage.py
 78 | 
 79 | # Environments
 80 | .env
 81 | .venv
 82 | env/
 83 | venv/
 84 | ENV/
 85 | 
 86 | # Spyder project settings
 87 | .spyderproject
 88 | .spyproject
 89 | 
 90 | # Rope project settings
 91 | .ropeproject
 92 | 
 93 | # mkdocs documentation
 94 | /site
 95 | 
 96 | # mypy
 97 | .mypy_cache/
 98 | 
 99 | # PyCharm
100 | .idea
101 | 
102 | # MacOS
103 | .DS_Store
104 | 
105 | # Binaries
106 | .npz
107 | .npy
108 | .h5
109 | .hdf5
110 | core
111 | 
112 | 
113 | # VSCode
114 | # settings.json
115 | 
116 | # onnx models
117 | *.onnx
118 | 
119 | # data
120 | datasets/hypersim
121 | datasets/hypersim_predictions
122 | trained_models/*.tar.gz
123 | 


--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
1 | [submodule "lib/nicr-scene-analysis-datasets"]
2 | 	path = lib/nicr-scene-analysis-datasets
3 | 	url = https://github.com/TUI-NICR/nicr-scene-analysis-datasets
4 | 


--------------------------------------------------------------------------------
/.vscode/settings.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "python.linting.pycodestyleEnabled": true,
 3 |     "python.linting.enabled": true,
 4 |     // disable annoying top-level source code modification indication
 5 |     "gitlens.codeLens.authors.enabled": false,
 6 |     "gitlens.codeLens.recentChange.enabled": false,
 7 |     "files.trimTrailingWhitespace": true,
 8 |     "[markdown]": {
 9 |         "files.trimTrailingWhitespace": false
10 |     },
11 | }
12 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | BSD 3-Clause License
 2 | 
 3 | Copyright (c) 2021-2022, Neuroinformatics and Cognitive Robotics Lab (Technische
 4 | Universität Ilmenau)
 5 | All rights reserved.
 6 | 
 7 | Redistribution and use in source and binary forms, with or without
 8 | modification, are permitted provided that the following conditions are met:
 9 | 
10 | * Redistributions of source code must retain the above copyright notice, this
11 |   list of conditions and the following disclaimer.
12 | 
13 | * Redistributions in binary form must reproduce the above copyright notice,
14 |   this list of conditions and the following disclaimer in the documentation
15 |   and/or other materials provided with the distribution.
16 | 
17 | * Neither the name of the copyright holder nor the names of its
18 |   contributors may be used to endorse or promote products derived from
19 |   this software without specific prior written permission.
20 | 
21 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
24 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
25 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
27 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
29 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Efficient and Robust Semantic Mapping for Indoor Environments
  2 | 
  3 | This repository contains the code to our paper "Efficient and Robust Semantic Mapping for Indoor Environments" ([IEEE Xplore](https://ieeexplore.ieee.org/document/9812205), [arXiv](https://arxiv.org/pdf/2203.05836.pdf)).
  4 | 
  5 | <div align="center">
  6 |       <a href="https://youtu.be/69HEnCQYybs"><img src="https://img.youtube.com/vi/69HEnCQYybs/maxresdefault.jpg" style="width: 70%;"></a>
  7 |       <br>(Click on the image to open YouTube video)
  8 |       <br><br>
  9 | </div>
 10 | 
 11 | > You may also want to have a look at our follow-up work: [**PanopticNDT**](https://github.com/TUI-NICR/panoptic-mapping)
 12 | 
 13 | ## License and Citations
 14 | The source code and the network weights are published under BSD 3-Clause license, see [license file](LICENSE) for details.
 15 | 
 16 | If you use the source code or the network weights, please cite the following paper:
 17 | >Seichter, D., Langer, P., Wengefeld, T., Lewandowski, B., Höchemer, D., Gross, H.-M.
 18 | *Efficient and Robust Semantic Mapping for Indoor Environments*
 19 | in IEEE International Conference on Robotics and Automation (ICRA), pp. 9221-9227, 2022.
 20 | 
 21 | <details>
 22 | <summary>BibTeX</summary>
 23 |  
 24 | ```bibtex
 25 | @inproceedings{semanticndtmapping2022icra,
 26 |   title	    = {{Efficient and Robust Semantic Mapping for Indoor Environments}},
 27 |   author    = {Seichter, Daniel and Langer, Patrick and Wengefeld, Tim and Lewandowski, Benjamin and H{\"o}chemer, Dominik and Gross, Horst-Michael},
 28 |   booktitle = {IEEE International Conference on Robotics and Automation (ICRA)},
 29 |   year      = {2022},
 30 |   volume    = {},
 31 |   number    = {},
 32 |   pages     = {9221-9227}
 33 | }
 34 | 
 35 | @article{semanticndtmapping2022arXiv,
 36 |   title	    = {{Efficient and Robust Semantic Mapping for Indoor Environments}},
 37 |   author    = {Seichter, Daniel and Langer, Patrick and Wengefeld, Tim and Lewandowski, Benjamin and H{\"o}chemer, Dominik and Gross, Horst-Michael},
 38 |   journal   = {arXiv preprint arXiv:2203.05836},
 39 |   year      = {2022}
 40 | }
 41 | ```
 42 | Note that the preprint was accepted to be published in IEEE International Conference on Robotics and Automation (ICRA).
 43 | 
 44 | </details>
 45 | 
 46 | ## Setup
 47 | 
 48 | 1. Clone repository:  
 49 |     ```bash
 50 | 	# do not forget the '--recursive' ;)
 51 |     git clone --recursive https://github.com/TUI-NICR/semantic-mapping.git
 52 | 
 53 |     cd /path/to/this/repository
 54 |     ```
 55 | 
 56 | 2. Set up anaconda environment including all dependencies:  
 57 |     ```bash
 58 | 	# option 1: create conda environment from YAML file
 59 | 	conda env create -f semantic_mapping.yaml
 60 | 	conda activate semantic_mapping
 61 | 
 62 | 	# option 2: create new environment (see last tested versions)
 63 | 	conda create -n semantic_mapping python==3.8.12 anaconda==2021.11
 64 | 	conda activate semantic_mapping
 65 | 	pip install onnx==1.11.0
 66 | 	pip install opencv-python==4.2.0.34
 67 | 	pip install tqdm==4.62.3
 68 | 	# ONNXRuntime with CUDA support
 69 | 	conda install -c conda-forge cudnn==8.2.1.32
 70 | 	pip install onnxruntime-gpu==1.11.0
 71 | 
 72 | 
 73 | 	# finally, install our package for preparing and using the Hypersim dataset
 74 | 	pip install ./lib/nicr-scene-analysis-datasets[with_preparation]
 75 | 	```
 76 | 
 77 | ## Usage
 78 | 
 79 | 1. Prepare the [Hypersim](https://machinelearning.apple.com/research/hypersim) dataset:  
 80 | 	```bash
 81 | 	# download and extract raw dataset (2x ~1.8TB)
 82 | 	HYPERSIM_DOWNLOAD_PATH='./datasets/hypersim_preparation'
 83 | 	wget https://raw.githubusercontent.com/apple/ml-hypersim/6cbaa80207f44a312654e288cf445016c84658a1/code/python/tools/dataset_download_images.py
 84 |     python dataset_download_images.py --downloads_dir $HYPERSIM_DOWNLOAD_PATH
 85 | 
 86 | 	# prepare dataset (~157.5 GB, extract required data, convert to our format, blacklist some scenes/trajectories)
 87 |     python -m nicr_scene_analysis_datasets.datasets.hypersim.prepare_dataset \
 88 |         ./datasets/hypersim \
 89 |         $HYPERSIM_DOWNLOAD_PATH \
 90 |         --additional-subsamples 2 5 10 20 \
 91 |         --multiprocessing
 92 | 
 93 | 	# just in case you want to delete the downloaded raw data (2x ~1.8TB)
 94 | 	rm -rf $HYPERSIM_DOWNLOAD_PATH
 95 | 
 96 | 	```
 97 | 	For further details, we refer to the documentation of our
 98 | 	[nicr-scene-analysis-datasets python package](https://github.com/TUI-NICR/nicr-scene-analysis-datasets/tree/882276c46ca5864ebb6146afe6bae56d0b1abc11).
 99 | 
100 | 2. Download pretrained model:  
101 |     We provide the weights of our selected ESANet-R34-NBt1D (enhanced ResNet34-based encoder utilizing the Non-Bottleneck-1D block) trained on the Hypersim dataset.
102 | 	To ease both application and deployment, we removed all dependencies (PyTorch, ...) and provide the weights in [ONNX format](https://onnx.ai/).
103 | 
104 |     Click [here](https://drive.google.com/uc?id=1zUxSqq4zdC3yQ4RxiHvTh8CX7-115KUg) to download the model and extract it to `./trained_models` or use:
105 | 	```bash
106 | 	pip install gdown    # last tested: 4.4.0
107 | 	gdown 1zUxSqq4zdC3yQ4RxiHvTh8CX7-115KUg --output ./trained_models/
108 | 	tar -xvzf ./trained_models/model_hypersim.tar.gz -C ./trained_models/
109 | 
110 | 	```
111 | 
112 | 	The model was selected based on the mean intersection over union (mIoU) on the validation split: 0.4591184410660463 at epoch 498.
113 | 	On the test split, the model achieves a mIoU of 0.41168890871760977.
114 | 	Note, similar to other approaches, we only evaluate up to a reasonable maximum distance of 20m from the camera. For more detail, see `evaluate.py`.
115 | 
116 | 
117 | 3. Extract predicted semantic segmentation:  
118 |     ```bash
119 | 	# use default paths (~74.3GB for topk with k=3)
120 | 	python predict.py \
121 | 		--onnx-filepath ./trained_models/model_hypersim.onnx \
122 | 		--dataset-path ./datasets/hypersim \
123 | 		--dataset-split test \
124 | 		--topk 3 \
125 | 		--output-path ./datasets/hypersim_predictions
126 | 
127 | 	# for more details, see:
128 | 	python predict.py --help
129 |     ```
130 | 	For the example above, the predicted segmentations are stored at `./datasets/hypersim_predictions/test/`.
131 | 	See the `semantic_40_topk` subfolder for the predicted topK segmentation outputs and `semantic_40/` or `semantic_40_colored/` for the predicted (colored) top1 labels.
132 | 
133 | 4. Run your semantic mapping experiments and store the results with the following folder structure:  
134 |     ```text
135 |     path/to/results/
136 |     └── test
137 | 		├── results1
138 | 		│   ├── ai_001_010
139 | 		│   │   ├── cam_00
140 | 		│   │   │   ├── 0000.png
141 | 		│   │   │   ├── ...
142 | 		├── results2
143 | 		│   ├── ai_001_010
144 | 		│   │   ├── cam_00
145 | 		│   │   │   ├── 0000.png
146 | 		│   │   │   ├── ...
147 |    ```
148 |    You may have a look at `./lib/nicr-scene-analysis-datasets/nicr_scene_analysis_datasets/mira/_hypersim_reader.py` for a starting point.
149 |    This class shows, how the Hypersim dataset is processed in our pipelines.
150 | 
151 | 5. Run evaluation:  
152 |     ```bash
153 | 	# use default paths
154 | 	python evaluate.py \
155 | 		--dataset-path ./datasets/hypersim \
156 | 		--dataset-split test \
157 | 		--predictions-path ./datasets/hypersim_predictions
158 | 		[--result-paths path/to/results/test/results1 path/to/results/test/results2]
159 | 
160 | 	# for more details, see:
161 | 	python evaluate.py --help
162 | 	```
163 | 
164 | 	For the predicted segmentation of our ONNX model, you should obtain measures similar to:
165 | 	```text
166 | 	miou_gt_masked: 0.41168890871760977
167 | 	mean_pacc_gt_masked: 0.5683601556433829
168 | 	invalid_ratio: 0.0
169 | 	invalid_mean_ratio_gt_masked: 0.0
170 | 	vwmiou_gt_masked: 0.41168890871760977
171 | 	vwmean_pacc_gt_masked: 0.5683601556433829
172 | 	```
173 | 	Check the created `results.json` at the predictions folder for more measures (e.g. `./datasets/hypersim_predictions/test/semantic_40/results.json`)
174 | 


--------------------------------------------------------------------------------
/datasets/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TUI-NICR/semantic-mapping/c592804e27e83dd96d476593e2528ae29c084eea/datasets/.gitkeep


--------------------------------------------------------------------------------
/evaluate.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | .. codeauthor:: Daniel Seichter <daniel.seichter@tu-ilmenau.de>
  4 | """
  5 | import argparse as ap
  6 | from functools import partial
  7 | import json
  8 | import multiprocessing
  9 | import os
 10 | import warnings
 11 | 
 12 | import cv2
 13 | import numpy as np
 14 | 
 15 | from tqdm import tqdm
 16 | from tqdm.contrib.concurrent import thread_map
 17 | 
 18 | from nicr_scene_analysis_datasets import Hypersim
 19 | 
 20 | from utils import DEFAULT_DATASET_PATH
 21 | from utils import DEFAULT_PREDICTIONS_PATH
 22 | 
 23 | 
 24 | def confusion_matrix_fast(pred, gt, n_classes):
 25 |     # note: this function is 15x faster than sklearn.metrics.confusion_matrix
 26 | 
 27 |     # determine dtype for unique mapping
 28 |     n_classes_squared = n_classes**2
 29 |     if n_classes_squared < 2**(8-1)-1:
 30 |         dtype = np.int8
 31 |     elif n_classes_squared < 2**(16-1)-1:
 32 |         dtype = np.int16
 33 |     else:
 34 |         dtype = np.int64    # equal to long
 35 | 
 36 |     # convert to dtype
 37 |     pred_ = pred.astype(dtype)
 38 |     gt_ = gt.astype(dtype)
 39 | 
 40 |     # compute confusion matrix
 41 |     unique_mapping = (gt_.reshape(-1)*n_classes + pred_.reshape(-1))
 42 |     cnts = np.bincount(unique_mapping,
 43 |                        minlength=n_classes_squared)
 44 | 
 45 |     return cnts.reshape(n_classes, n_classes)
 46 | 
 47 | 
 48 | def get_confusion_matrix_for_sample(
 49 |     sample_idx,
 50 |     dataset,
 51 |     prediction_basepath,
 52 |     prediction_extension='.png',
 53 |     prediction_contains_void=True,
 54 |     max_depth_in_m=20    # max 20m
 55 | ):
 56 |     n_classes = dataset.semantic_n_classes    # with void
 57 | 
 58 |     # get sample
 59 |     sample = dataset[sample_idx]
 60 | 
 61 |     # load prediction
 62 |     fp = os.path.join(prediction_basepath, *sample['identifier'])
 63 |     fp += prediction_extension
 64 |     if '.png' == prediction_extension:
 65 |         # prediction is given as image
 66 |         pred = cv2.imread(fp, cv2.IMREAD_UNCHANGED)
 67 |         if pred is None:
 68 |             raise IOError(f"Cannot load '{fp}'")
 69 |         if pred.ndim > 2:
 70 |             warnings.warn(f"Prediction ('{fp}') has more than one channel. "
 71 |                           "Using first channel.")
 72 |             pred = pred[..., 0]
 73 |     elif '.npy' == prediction_extension:
 74 |         # prediction is given as numpy array with shape (h, w, topk)
 75 |         pred = np.load(fp)
 76 |         pred = pred[0, ...].astype('uint8')    # use top1 only
 77 | 
 78 |     if not prediction_contains_void:
 79 |         pred += 1
 80 | 
 81 |     # create flat views
 82 |     gt = sample['semantic'].reshape(-1)
 83 |     pred = pred.reshape(-1)
 84 | 
 85 |     # mask using max depth
 86 |     if max_depth_in_m is not None:
 87 |         depth = sample['depth'].reshape(-1)
 88 |         mask = depth < (max_depth_in_m*1000)
 89 |         gt = gt[mask]
 90 |         pred = pred[mask]
 91 | 
 92 |     # move invalid pixels in prediction, i.e., pixels that may indicate free
 93 |     # space, to class with index i=n_classes
 94 |     pred[pred > (n_classes-1)] = n_classes
 95 |     n_classes = n_classes + 1    # +1 = invalid pixels
 96 | 
 97 |     return confusion_matrix_fast(pred, gt, n_classes=n_classes)
 98 | 
 99 | 
100 | def get_measures(cm, ignore_void=True):
101 |     # cm is gt x pred with void + n_classes + invalid (free space)
102 | 
103 |     tp = np.diag(cm)
104 |     sum_gt = cm.sum(axis=1)
105 |     sum_pred = cm.sum(axis=0)
106 |     invalid_pixels = cm[:, -1]
107 | 
108 |     if ignore_void:
109 |         # void is first class (idx=0)
110 |         tp = tp[1:]
111 |         sum_pred = sum_pred[1:]
112 |         sum_gt = sum_gt[1:]
113 |         sum_pred -= cm[0, 1:]    # do not count fp for void
114 |         invalid_pixels = invalid_pixels[1:]
115 | 
116 |     n_total_pixels = sum_gt.sum()
117 | 
118 |     # we do want ignore classes without gt pixels
119 |     gt_mask = sum_gt != 0
120 | 
121 |     # invalid pixels
122 |     invalid_ratio = invalid_pixels.sum() / n_total_pixels
123 |     with np.errstate(divide='ignore', invalid='ignore'):
124 |         invalid_ratios = invalid_pixels / sum_gt
125 |     invalid_mean_ratio_gt_masked = np.mean(invalid_ratios[gt_mask])
126 |     valid_weights = 1 - invalid_ratios
127 | 
128 |     # intersection over union
129 |     intersections = tp
130 |     unions = sum_pred + sum_gt - tp
131 | 
132 |     with np.errstate(divide='ignore', invalid='ignore'):
133 |         ious = intersections / unions.astype(np.float32)
134 | 
135 |     # mean intersection over union and gt masked version
136 |     miou = np.mean(np.nan_to_num(ious, nan=0.0))
137 |     miou_gt_masked = np.mean(ious[gt_mask])
138 | 
139 |     # frequency weighted intersection over union
140 |     # normal fwiou and gt masked version are equal
141 |     fwiou_gt_masked = np.sum(ious[gt_mask] * tp[gt_mask]/n_total_pixels)
142 | 
143 |     # pixel accuracy and mean pixel accuracy
144 |     pacc = tp.sum() / sum_gt.sum()
145 | 
146 |     with np.errstate(divide='ignore', invalid='ignore'):
147 |         paccs = tp / sum_gt
148 | 
149 |     mean_pacc_gt_masked = np.mean(tp[gt_mask] / sum_gt[gt_mask])
150 | 
151 |     # valid weighted mean intersection over union
152 |     vwmiou_gt_masked = np.mean(ious[gt_mask]*valid_weights[gt_mask])
153 | 
154 |     # valid weighted mean pixel accuracy
155 |     vwmean_pacc_gt_masked = np.mean(tp[gt_mask] / sum_gt[gt_mask] * valid_weights[gt_mask])
156 | 
157 |     # build dict of measures
158 |     measures = {
159 |         'cm': cm.tolist(),
160 |         'invalid_ratio': invalid_ratio,
161 |         'invalid_ratios': invalid_ratios.tolist(),
162 |         'invalid_mean_ratio_gt_masked': invalid_mean_ratio_gt_masked,
163 |         'ious': ious.tolist(),
164 |         'miou': miou,
165 |         'miou_gt_masked': miou_gt_masked,
166 |         'fwiou_gt_masked': fwiou_gt_masked,
167 |         'pacc': pacc,
168 |         'paccs': paccs.tolist(),
169 |         'mean_pacc_gt_masked': mean_pacc_gt_masked,
170 |         'vwmiou_gt_masked': vwmiou_gt_masked,
171 |         'vwmean_pacc_gt_masked': vwmean_pacc_gt_masked,
172 |     }
173 | 
174 |     return measures
175 | 
176 | 
177 | def _parse_args():
178 |     parser = ap.ArgumentParser(formatter_class=ap.ArgumentDefaultsHelpFormatter)
179 |     parser.add_argument(
180 |         '--dataset-path',
181 |         type=str,
182 |         default=DEFAULT_DATASET_PATH,
183 |         help="Path to the dataset."
184 |     )
185 |     parser.add_argument(
186 |         '--dataset-split',
187 |         type=str,
188 |         default='test',
189 |         help="Dataset split to use."
190 |     )
191 |     parser.add_argument(
192 |         '--predictions-path',
193 |         type=str,
194 |         default=DEFAULT_PREDICTIONS_PATH,
195 |         help="Path to stored predicted semantic segmentation. Use an empty "
196 |              "string to skip the evaluating the predicted semantic "
197 |              "segmentation."
198 |     )
199 |     parser.add_argument(
200 |         '--result-paths',
201 |         nargs='+',
202 |         type=str,
203 |         help="Paths to further results.",
204 |         default=[]
205 |     )
206 |     parser.add_argument(
207 |         '--force-recomputing',
208 |         action='store_true',
209 |         default=False,
210 |         help="Force recomputing."
211 |     )
212 |     parser.add_argument(
213 |         '--n-worker',
214 |         type=int,
215 |         default=min(multiprocessing.cpu_count(), 48),
216 |         help="Number of workers to use."
217 |     )
218 | 
219 |     return parser.parse_args()
220 | 
221 | 
222 | def main():
223 |     # args
224 |     args = _parse_args()
225 | 
226 |     # just obtain all sample names
227 |     dataset = Hypersim(dataset_path=args.dataset_path,
228 |                        split=args.dataset_split,
229 |                        subsample=None,
230 |                        sample_keys=('identifier',))
231 |     samples = [s['identifier'] for s in dataset]    # tuple (scene, cam, id)
232 |     scenes = sorted(list(set(s[0] for s in samples)))
233 | 
234 |     # load dataset
235 |     dataset = Hypersim(dataset_path=args.dataset_path,
236 |                        split=args.dataset_split,
237 |                        subsample=None,
238 |                        sample_keys=('identifier', 'depth', 'semantic'),
239 |                        use_cache=False,
240 |                        cache_disable_deepcopy=False)
241 | 
242 |     # get paths to evaluate
243 |     paths = []
244 |     if args.predictions_path:
245 |         # evaluate the network prediction
246 |         paths += [
247 |             os.path.join(args.predictions_path, args.dataset_split,
248 |                          Hypersim.SEMANTIC_DIR),
249 |         ]
250 |     paths += [
251 |         os.path.join(path) for path in args.result_paths
252 |     ]
253 | 
254 |     # run evaluation
255 |     for path in tqdm(paths):
256 |         print(f"Evaluating: '{path}'")
257 |         results_fp = os.path.join(path, 'results.json')
258 | 
259 |         if os.path.exists(results_fp) and not args.force_recomputing:
260 |             continue
261 | 
262 |         # get confusion matrices
263 |         if 1 == args.n_worker:
264 |             cms = []
265 |             for i in tqdm(range(len(dataset))):
266 |                 cm = get_confusion_matrix_for_sample(
267 |                     i,
268 |                     dataset=dataset,
269 |                     prediction_basepath=path,
270 |                     prediction_extension='.png',
271 |                     prediction_contains_void=True,
272 |                     max_depth_in_m=20
273 |                 )
274 |                 cms.append(cm)
275 |         else:
276 |             f = partial(get_confusion_matrix_for_sample,
277 |                         dataset=dataset,
278 |                         prediction_basepath=path,
279 |                         prediction_extension='.png',
280 |                         prediction_contains_void=True,
281 |                         max_depth_in_m=20)
282 |             cms = thread_map(f, list(range(len(dataset))),
283 |                              max_workers=args.n_worker,
284 |                              chunksize=10,
285 |                              leave=False)
286 | 
287 |         # get overall measures
288 |         assert len(cms) == len(samples)
289 |         cm = np.array(cms).sum(axis=0)
290 | 
291 |         measures = get_measures(cm, ignore_void=True)
292 |         for k in ('miou_gt_masked', 'mean_pacc_gt_masked',
293 |                   'invalid_ratio', 'invalid_mean_ratio_gt_masked',
294 |                   'vwmiou_gt_masked', 'vwmean_pacc_gt_masked'):
295 |             print(f"{k}: {measures[k]}")
296 | 
297 |         # get results for each scene
298 |         cms_per_scene = {s: [] for s in scenes}
299 |         for cm, sample in zip(cms, samples):
300 |             scene = sample[0]
301 |             cms_per_scene[scene].append(cm)
302 | 
303 |         measures['per_scene'] = {}
304 |         for scene, cms_scene in cms_per_scene.items():
305 |             cm = np.array(cms_scene).sum(axis=0)
306 |             measures['per_scene'][scene] = get_measures(cm, ignore_void=True)
307 | 
308 |         # write results to file
309 |         with open(results_fp, 'w') as f:
310 |             json.dump(measures, f, indent=4)
311 | 
312 | 
313 | if __name__ == '__main__':
314 |     main()
315 | 


--------------------------------------------------------------------------------
/predict.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | .. codeauthor:: Daniel Seichter <daniel.seichter@tu-ilmenau.de>
  4 | """
  5 | import argparse as ap
  6 | import os
  7 | 
  8 | import cv2
  9 | from nicr_scene_analysis_datasets import Hypersim
 10 | from nicr_scene_analysis_datasets.utils.img import save_indexed_png
 11 | import numpy as np
 12 | import onnx
 13 | import onnxruntime as ort
 14 | from tqdm import tqdm
 15 | 
 16 | from utils import DEFAULT_DATASET_PATH
 17 | from utils import DEFAULT_ONNX_FILEPATH
 18 | from utils import DEFAULT_PREDICTIONS_PATH
 19 | 
 20 | 
 21 | def _get_ort_session(onnx_filepath, img_hw, topk=3):
 22 |     model = onnx.load(onnx_filepath)
 23 | 
 24 |     # get network output shape (same as input shape)
 25 |     # note: our optimizations to the resize operations seem to break onnx's
 26 |     # shape inference with OpSet >= 13
 27 |     model_output_img_shape = (
 28 |         model.graph.input[0].type.tensor_type.shape.dim[2].dim_value,
 29 |         model.graph.input[0].type.tensor_type.shape.dim[3].dim_value
 30 |     )
 31 | 
 32 |     # add missing nodes: final upsampling, softmax, and topk
 33 |     # see: https://github.com/onnx/onnx/blob/main/docs/Operators.md
 34 |     # -> final upsampling
 35 |     final_upsampling_node = onnx.helper.make_node(
 36 |         'Resize',
 37 |         # inputs=['output', 'roi', 'scales'],
 38 |         inputs=['output', '', 'scales'],    # '' for 'roi' requires OpSet >= 13
 39 |         outputs=['final_upsampling_output'],
 40 |         coordinate_transformation_mode='pytorch_half_pixel',
 41 |         cubic_coeff_a=-0.75,
 42 |         mode='linear',
 43 |         nearest_mode='floor',
 44 |     )
 45 |     # roi = onnx.helper.make_tensor('roi', onnx.TensorProto.FLOAT, [0], [])
 46 |     scale_h = img_hw[0] / model_output_img_shape[0]
 47 |     scale_w = img_hw[1] / model_output_img_shape[1]
 48 |     scales = onnx.helper.make_tensor('scales',
 49 |                                      onnx.TensorProto.FLOAT, [4],
 50 |                                      [1, 1, scale_h, scale_w])
 51 |     # -> softmax (note that softmax op with 4D inputs requires OpSet >= 13)
 52 |     softmax_node = onnx.helper.make_node(
 53 |         'Softmax',
 54 |         inputs=['final_upsampling_output'],
 55 |         outputs=['prediction'],
 56 |         axis=1
 57 |     )
 58 |     # topk
 59 |     topk_node = onnx.helper.make_node(
 60 |         'TopK',
 61 |         inputs=['prediction', 'k'],
 62 |         outputs=['scores', 'classes'],
 63 |         axis=1,
 64 |         largest=1,
 65 |         sorted=1
 66 |     )
 67 |     k = onnx.helper.make_tensor('k', onnx.TensorProto.INT64, [1], [int(topk)])
 68 | 
 69 |     # add new nodes and initializers to graph
 70 |     # model.graph.initializer.append(roi)
 71 |     model.graph.initializer.append(scales)
 72 |     model.graph.node.append(final_upsampling_node)
 73 |     model.graph.node.append(softmax_node)
 74 |     model.graph.initializer.append(k)
 75 |     model.graph.node.append(topk_node)
 76 | 
 77 |     # replace output information
 78 |     if model.graph.input[0].type.tensor_type.shape.dim[0].dim_param:
 79 |         # dynamic batch axis
 80 |         b = model.graph.input[0].type.tensor_type.shape.dim[0].dim_param
 81 |     else:
 82 |         # fixed batch axis
 83 |         b = model.graph.input[0].type.tensor_type.shape.dim[0].dim_value
 84 | 
 85 |     scores_info = onnx.helper.make_tensor_value_info('scores',
 86 |                                                      onnx.TensorProto.FLOAT,
 87 |                                                      shape=[b, topk, *img_hw])
 88 |     classes_info = onnx.helper.make_tensor_value_info('classes',
 89 |                                                       onnx.TensorProto.INT64,
 90 |                                                       shape=[b, topk, *img_hw])
 91 |     model.graph.output.pop(0)
 92 |     model.graph.output.append(scores_info)
 93 |     model.graph.output.append(classes_info)
 94 | 
 95 |     # perform final check
 96 |     onnx.checker.check_model(model)
 97 |     # onnx.save(model, './model.onnx')
 98 | 
 99 |     # create onnxruntime seesion
100 |     ort_session = ort.InferenceSession(
101 |         model.SerializeToString(),
102 |         providers=[
103 |             # 'TensorrtExecutionProvider',
104 |             'CUDAExecutionProvider',
105 |             'CPUExecutionProvider'
106 |         ]
107 |     )
108 |     return ort_session
109 | 
110 | 
111 | def _parse_args():
112 |     parser = ap.ArgumentParser(formatter_class=ap.ArgumentDefaultsHelpFormatter)
113 |     parser.add_argument(
114 |         '--onnx-filepath',
115 |         type=str,
116 |         default=DEFAULT_ONNX_FILEPATH,
117 |         help="Path to ONNX model to use."
118 |     )
119 |     parser.add_argument(
120 |         '--dataset-path',
121 |         type=str,
122 |         default=DEFAULT_DATASET_PATH,
123 |         help="Path to the dataset."
124 |     )
125 |     parser.add_argument(
126 |         '--dataset-split',
127 |         type=str,
128 |         default='test',
129 |         help="Dataset split to use."
130 |     )
131 |     parser.add_argument(
132 |         '--output-path',
133 |         type=str,
134 |         default=DEFAULT_PREDICTIONS_PATH,
135 |         help="Path where to store predicted semantic segmentation."
136 |     )
137 |     parser.add_argument(
138 |         '--topk',
139 |         type=int,
140 |         default=3,
141 |         help="TopK classes to consider."
142 |     )
143 |     return parser.parse_args()
144 | 
145 | 
146 | def main():
147 |     # args
148 |     args = _parse_args()
149 | 
150 |     # load data
151 |     dataset = Hypersim(
152 |         dataset_path=args.dataset_path,
153 |         split=args.dataset_split,
154 |         subsample=None,
155 |         sample_keys=('identifier', 'rgb', 'depth'),
156 |         depth_mode='raw'
157 |     )
158 | 
159 |     RGB_MEAN = np.array((0.485, 0.456, 0.406), dtype='float32') * 255
160 |     RGB_STD = np.array((0.229, 0.224, 0.225), dtype='float32') * 255
161 | 
162 |     # ensure that the used depth stats are valid for this model (there was a
163 |     # copy and paste issue that we fixed in future versions of
164 |     # nicr_scene_analysis_datasets)
165 |     assert dataset.depth_mean == 6249.621001070915
166 |     assert dataset.depth_std == 6249.621001070915     # <- c&p ^^
167 | 
168 |     # process files (for simplification with batch size 1)
169 |     ort_session = None
170 |     for sample in tqdm(dataset, desc='Processing files'):
171 |         # load model lazily (we need a sample to get the spatial dimensions)
172 |         if ort_session is None:
173 |             ort_session = _get_ort_session(
174 |                 onnx_filepath=args.onnx_filepath,
175 |                 img_hw=sample['rgb'].shape[:2],
176 |                 topk=args.topk
177 |             )
178 | 
179 |         # get network input shape (from rgb input)
180 |         h, w = ort_session.get_inputs()[0].shape[-2:]
181 | 
182 |         # rgb preprocessing
183 |         # -> resize
184 |         rgb = cv2.resize(sample['rgb'], (w, h),
185 |                          interpolation=cv2.INTER_LINEAR)
186 |         # -> normalize
187 |         rgb = rgb.astype('float32')
188 |         rgb -= RGB_MEAN[None, None, ...]
189 |         rgb /= RGB_STD[None, None, ...]
190 |         # -> create tensor (add batch axis, channels first)
191 |         rgb = rgb.transpose(2, 0, 1)[None, ...]
192 | 
193 |         # depth preprocessing
194 |         # -> resize
195 |         depth = cv2.resize(sample['depth'], (w, h),
196 |                            interpolation=cv2.INTER_NEAREST)
197 |         # -> normalize
198 |         mask_invalid = depth == 0    # mask for invalid depth values
199 |         depth = depth.astype('float32')
200 |         depth -= dataset.depth_mean
201 |         depth /= dataset.depth_std
202 |         # reset invalid values (the network should not be able to learn from
203 |         # these pixels)
204 |         depth[mask_invalid] = 0
205 |         # -> create tensor (add batch and channel axes)
206 |         depth = depth[None, None, ...]
207 | 
208 |         # apply model
209 |         scores, classes = ort_session.run(None, {'rgb': rgb, 'depth': depth})
210 | 
211 |         # remove batch axis
212 |         scores = scores[0]
213 |         classes = classes[0]
214 | 
215 |         # cast classes to uint8 (< 255 classes)
216 |         classes = classes.astype('uint8')
217 | 
218 |         # create predicted segmentation
219 |         # note that we store the topk predictions as class_idx + score (to
220 |         # save some space), you may further can think about using float16
221 |         scores_clamped = np.clip(scores, a_min=0, a_max=0.9999)
222 |         classes = classes + 1    # add void class (void + 40 classes)
223 |         segmentation = scores_clamped + classes
224 | 
225 |         # ensure that class is still correct (top0 only)
226 |         assert (segmentation[0].astype('uint8') == classes[0]).all()
227 | 
228 |         # store predicted segmentation
229 |         # -> topk prediction (for mapping later)
230 |         fp = os.path.join(args.output_path, args.dataset_split,
231 |                           f'{Hypersim.SEMANTIC_DIR}_topk',
232 |                           *sample['identifier'])
233 |         os.makedirs(os.path.dirname(fp), exist_ok=True)
234 |         np.save(f'{fp}.npy', segmentation)
235 | 
236 |         # -> predicted classes
237 |         for i in range(args.topk):
238 |             dirname = Hypersim.SEMANTIC_DIR
239 |             if i > 0:
240 |                 dirname += f'_topk_{i}'
241 |             fp = os.path.join(args.output_path, args.dataset_split,
242 |                               dirname, *sample['identifier'])
243 |             os.makedirs(os.path.dirname(fp), exist_ok=True)
244 |             cv2.imwrite(f'{fp}.png', segmentation[i].astype('uint8'))
245 | 
246 |         # -> predicted classes as colored images (with color palette, do not
247 |         # load these images later on with OpenCV, PIL is fine)
248 |         for i in range(args.topk):
249 |             dirname = Hypersim.SEMANTIC_COLORED_DIR
250 |             if i > 0:
251 |                 dirname += f'_topk_{i}'
252 |             fp = os.path.join(args.output_path, args.dataset_split,
253 |                               dirname, *sample['identifier'])
254 |             os.makedirs(os.path.dirname(fp), exist_ok=True)
255 |             save_indexed_png(f'{fp}.png', segmentation[i].astype('uint8'),
256 |                              colormap=dataset.semantic_class_colors)
257 | 
258 | 
259 | if __name__ == '__main__':
260 |     main()
261 | 


--------------------------------------------------------------------------------
/semantic_mapping.yaml:
--------------------------------------------------------------------------------
  1 | name: semantic_mapping
  2 | channels:
  3 |   - conda-forge
  4 |   - defaults
  5 | dependencies:
  6 |   - _libgcc_mutex=0.1=main
  7 |   - _openmp_mutex=4.5=1_gnu
  8 |   - alabaster=0.7.12=pyhd3eb1b0_0
  9 |   - anaconda=2021.11=py38_0
 10 |   - anaconda-client=1.9.0=py38h06a4308_0
 11 |   - anaconda-project=0.10.1=pyhd3eb1b0_0
 12 |   - anyio=2.2.0=py38h06a4308_1
 13 |   - appdirs=1.4.4=pyhd3eb1b0_0
 14 |   - argh=0.26.2=py38_0
 15 |   - argon2-cffi=20.1.0=py38h27cfd23_1
 16 |   - arrow=0.13.1=py38_0
 17 |   - asn1crypto=1.4.0=py_0
 18 |   - astroid=2.6.6=py38h06a4308_0
 19 |   - astropy=4.3.1=py38h09021b7_0
 20 |   - async_generator=1.10=pyhd3eb1b0_0
 21 |   - atomicwrites=1.4.0=py_0
 22 |   - attrs=21.2.0=pyhd3eb1b0_0
 23 |   - autopep8=1.5.7=pyhd3eb1b0_0
 24 |   - babel=2.9.1=pyhd3eb1b0_0
 25 |   - backcall=0.2.0=pyhd3eb1b0_0
 26 |   - backports=1.0=pyhd3eb1b0_2
 27 |   - backports.shutil_get_terminal_size=1.0.0=pyhd3eb1b0_3
 28 |   - beautifulsoup4=4.10.0=pyh06a4308_0
 29 |   - binaryornot=0.4.4=pyhd3eb1b0_1
 30 |   - bitarray=2.3.0=py38h7f8727e_1
 31 |   - bkcharts=0.2=py38_0
 32 |   - black=19.10b0=py_0
 33 |   - blas=1.0=mkl
 34 |   - bleach=4.0.0=pyhd3eb1b0_0
 35 |   - blosc=1.21.0=h8c45485_0
 36 |   - bokeh=2.4.1=py38h06a4308_0
 37 |   - boto=2.49.0=py38_0
 38 |   - bottleneck=1.3.2=py38heb32a55_1
 39 |   - brotli=1.0.9=he6710b0_2
 40 |   - brotlipy=0.7.0=py38h27cfd23_1003
 41 |   - brunsli=0.1=h2531618_0
 42 |   - bzip2=1.0.8=h7b6447c_0
 43 |   - c-ares=1.17.1=h27cfd23_0
 44 |   - ca-certificates=2021.10.26=h06a4308_2
 45 |   - cairo=1.16.0=hf32fb01_1
 46 |   - certifi=2021.10.8=py38h06a4308_0
 47 |   - cffi=1.14.6=py38h400218f_0
 48 |   - cfitsio=3.470=hf0d0db6_6
 49 |   - chardet=4.0.0=py38h06a4308_1003
 50 |   - charls=2.2.0=h2531618_0
 51 |   - charset-normalizer=2.0.4=pyhd3eb1b0_0
 52 |   - click=8.0.3=pyhd3eb1b0_0
 53 |   - cloudpickle=2.0.0=pyhd3eb1b0_0
 54 |   - clyent=1.2.2=py38_1
 55 |   - colorama=0.4.4=pyhd3eb1b0_0
 56 |   - conda=4.12.0=py38h06a4308_0
 57 |   - conda-content-trust=0.1.1=pyhd3eb1b0_0
 58 |   - conda-pack=0.6.0=pyhd3eb1b0_0
 59 |   - conda-package-handling=1.8.1=py38h7f8727e_0
 60 |   - conda-token=0.3.0=pyhd3eb1b0_0
 61 |   - contextlib2=0.6.0.post1=pyhd3eb1b0_0
 62 |   - cookiecutter=1.7.2=pyhd3eb1b0_0
 63 |   - cryptography=3.4.8=py38hd23ed53_0
 64 |   - cudatoolkit=11.2.2=he111cf0_8
 65 |   - cudnn=8.2.1.32=h86fa8c9_0
 66 |   - curl=7.78.0=h1ccaba5_0
 67 |   - cycler=0.10.0=py38_0
 68 |   - cython=0.29.24=py38hdbfa776_0
 69 |   - cytoolz=0.11.0=py38h7b6447c_0
 70 |   - daal4py=2021.3.0=py38hae6d005_0
 71 |   - dal=2021.3.0=h06a4308_557
 72 |   - dask=2021.10.0=pyhd3eb1b0_0
 73 |   - dask-core=2021.10.0=pyhd3eb1b0_0
 74 |   - dataclasses=0.8=pyh6d0b6a4_7
 75 |   - dbus=1.13.18=hb2f20db_0
 76 |   - debugpy=1.4.1=py38h295c915_0
 77 |   - decorator=5.1.0=pyhd3eb1b0_0
 78 |   - defusedxml=0.7.1=pyhd3eb1b0_0
 79 |   - diff-match-patch=20200713=pyhd3eb1b0_0
 80 |   - distributed=2021.10.0=py38h06a4308_0
 81 |   - docutils=0.17.1=py38h06a4308_1
 82 |   - entrypoints=0.3=py38_0
 83 |   - et_xmlfile=1.1.0=py38h06a4308_0
 84 |   - expat=2.4.1=h2531618_2
 85 |   - fastcache=1.1.0=py38h7b6447c_0
 86 |   - filelock=3.3.1=pyhd3eb1b0_1
 87 |   - flake8=3.9.2=pyhd3eb1b0_0
 88 |   - flask=1.1.2=pyhd3eb1b0_0
 89 |   - fontconfig=2.13.1=h6c09931_0
 90 |   - fonttools=4.25.0=pyhd3eb1b0_0
 91 |   - freetype=2.10.4=h5ab3b9f_0
 92 |   - fribidi=1.0.10=h7b6447c_0
 93 |   - fsspec=2021.8.1=pyhd3eb1b0_0
 94 |   - get_terminal_size=1.0.0=haa9412d_0
 95 |   - gevent=21.8.0=py38h7f8727e_1
 96 |   - giflib=5.2.1=h7b6447c_0
 97 |   - glib=2.69.1=h5202010_0
 98 |   - glob2=0.7=pyhd3eb1b0_0
 99 |   - gmp=6.2.1=h2531618_2
100 |   - gmpy2=2.0.8=py38hd5f6e3b_3
101 |   - graphite2=1.3.14=h23475e2_0
102 |   - greenlet=1.1.1=py38h295c915_0
103 |   - gst-plugins-base=1.14.0=h8213a91_2
104 |   - gstreamer=1.14.0=h28cd5cc_2
105 |   - h5py=2.10.0=py38h7918eee_0
106 |   - harfbuzz=2.8.1=h6f93f22_0
107 |   - hdf5=1.10.4=hb1b8bf9_0
108 |   - heapdict=1.0.1=pyhd3eb1b0_0
109 |   - html5lib=1.1=pyhd3eb1b0_0
110 |   - icu=58.2=he6710b0_3
111 |   - idna=3.2=pyhd3eb1b0_0
112 |   - imagecodecs=2021.8.26=py38h4cda21f_0
113 |   - imageio=2.9.0=pyhd3eb1b0_0
114 |   - imagesize=1.2.0=pyhd3eb1b0_0
115 |   - importlib-metadata=4.8.1=py38h06a4308_0
116 |   - importlib_metadata=4.8.1=hd3eb1b0_0
117 |   - inflection=0.5.1=py38h06a4308_0
118 |   - iniconfig=1.1.1=pyhd3eb1b0_0
119 |   - intel-openmp=2021.4.0=h06a4308_3561
120 |   - intervaltree=3.1.0=pyhd3eb1b0_0
121 |   - ipykernel=6.4.1=py38h06a4308_1
122 |   - ipython=7.29.0=py38hb070fc8_0
123 |   - ipython_genutils=0.2.0=pyhd3eb1b0_1
124 |   - ipywidgets=7.6.5=pyhd3eb1b0_1
125 |   - isort=5.9.3=pyhd3eb1b0_0
126 |   - itsdangerous=2.0.1=pyhd3eb1b0_0
127 |   - jbig=2.1=hdba287a_0
128 |   - jdcal=1.4.1=pyhd3eb1b0_0
129 |   - jedi=0.18.0=py38h06a4308_1
130 |   - jeepney=0.7.1=pyhd3eb1b0_0
131 |   - jinja2=2.11.3=pyhd3eb1b0_0
132 |   - jinja2-time=0.2.0=pyhd3eb1b0_2
133 |   - joblib=1.1.0=pyhd3eb1b0_0
134 |   - jpeg=9d=h7f8727e_0
135 |   - json5=0.9.6=pyhd3eb1b0_0
136 |   - jsonschema=3.2.0=pyhd3eb1b0_2
137 |   - jupyter=1.0.0=py38_7
138 |   - jupyter_client=6.1.12=pyhd3eb1b0_0
139 |   - jupyter_console=6.4.0=pyhd3eb1b0_0
140 |   - jupyter_core=4.8.1=py38h06a4308_0
141 |   - jupyter_server=1.4.1=py38h06a4308_0
142 |   - jupyterlab=3.2.1=pyhd3eb1b0_1
143 |   - jupyterlab_pygments=0.1.2=py_0
144 |   - jupyterlab_server=2.8.2=pyhd3eb1b0_0
145 |   - jupyterlab_widgets=1.0.0=pyhd3eb1b0_1
146 |   - jxrlib=1.1=h7b6447c_2
147 |   - keyring=23.1.0=py38h06a4308_0
148 |   - kiwisolver=1.3.1=py38h2531618_0
149 |   - krb5=1.19.2=hac12032_0
150 |   - lazy-object-proxy=1.6.0=py38h27cfd23_0
151 |   - lcms2=2.12=h3be6417_0
152 |   - ld_impl_linux-64=2.35.1=h7274673_9
153 |   - lerc=3.0=h295c915_0
154 |   - libaec=1.0.4=he6710b0_1
155 |   - libarchive=3.4.2=h62408e4_0
156 |   - libcurl=7.78.0=h0b77cf5_0
157 |   - libdeflate=1.8=h7f8727e_5
158 |   - libedit=3.1.20210910=h7f8727e_0
159 |   - libev=4.33=h7f8727e_1
160 |   - libffi=3.3=he6710b0_2
161 |   - libgcc-ng=9.3.0=h5101ec6_17
162 |   - libgfortran-ng=7.5.0=ha8ba4b0_17
163 |   - libgfortran4=7.5.0=ha8ba4b0_17
164 |   - libgomp=9.3.0=h5101ec6_17
165 |   - liblief=0.10.1=he6710b0_0
166 |   - libllvm11=11.1.0=h3826bc1_0
167 |   - libnghttp2=1.41.0=hf8bcb03_2
168 |   - libpng=1.6.37=hbc83047_0
169 |   - libsodium=1.0.18=h7b6447c_0
170 |   - libspatialindex=1.9.3=h2531618_0
171 |   - libssh2=1.9.0=h1ba5d50_1
172 |   - libstdcxx-ng=9.3.0=hd4cf53a_17
173 |   - libtiff=4.2.0=h85742a9_0
174 |   - libtool=2.4.6=h7b6447c_1005
175 |   - libuuid=1.0.3=h7f8727e_2
176 |   - libuv=1.40.0=h7b6447c_0
177 |   - libwebp=1.2.0=h89dd481_0
178 |   - libwebp-base=1.2.0=h27cfd23_0
179 |   - libxcb=1.14=h7b6447c_0
180 |   - libxml2=2.9.12=h03d6c58_0
181 |   - libxslt=1.1.34=hc22bd24_0
182 |   - libzopfli=1.0.3=he6710b0_0
183 |   - llvmlite=0.37.0=py38h295c915_1
184 |   - locket=0.2.1=py38h06a4308_1
185 |   - lxml=4.6.3=py38h9120a33_0
186 |   - lz4-c=1.9.3=h295c915_1
187 |   - lzo=2.10=h7b6447c_2
188 |   - markupsafe=1.1.1=py38h7b6447c_0
189 |   - matplotlib=3.4.3=py38h06a4308_0
190 |   - matplotlib-base=3.4.3=py38hbbc1b5f_0
191 |   - matplotlib-inline=0.1.2=pyhd3eb1b0_2
192 |   - mccabe=0.6.1=py38_1
193 |   - mistune=0.8.4=py38h7b6447c_1000
194 |   - mkl=2021.4.0=h06a4308_640
195 |   - mkl-service=2.4.0=py38h7f8727e_0
196 |   - mkl_fft=1.3.1=py38hd3c417c_0
197 |   - mkl_random=1.2.2=py38h51133e4_0
198 |   - mock=4.0.3=pyhd3eb1b0_0
199 |   - more-itertools=8.10.0=pyhd3eb1b0_0
200 |   - mpc=1.1.0=h10f8cd9_1
201 |   - mpfr=4.0.2=hb69a4c5_1
202 |   - mpi=1.0=mpich
203 |   - mpich=3.3.2=hc856adb_0
204 |   - mpmath=1.2.1=py38h06a4308_0
205 |   - msgpack-python=1.0.2=py38hff7bd54_1
206 |   - multipledispatch=0.6.0=py38_0
207 |   - munkres=1.1.4=py_0
208 |   - mypy_extensions=0.4.3=py38_0
209 |   - nbclassic=0.2.6=pyhd3eb1b0_0
210 |   - nbclient=0.5.3=pyhd3eb1b0_0
211 |   - nbconvert=6.1.0=py38h06a4308_0
212 |   - nbformat=5.1.3=pyhd3eb1b0_0
213 |   - ncurses=6.3=heee7806_1
214 |   - nest-asyncio=1.5.1=pyhd3eb1b0_0
215 |   - networkx=2.6.3=pyhd3eb1b0_0
216 |   - nltk=3.6.5=pyhd3eb1b0_0
217 |   - nose=1.3.7=pyhd3eb1b0_1006
218 |   - notebook=6.4.5=py38h06a4308_0
219 |   - numba=0.54.1=py38h51133e4_0
220 |   - numexpr=2.7.3=py38h22e1b3c_1
221 |   - numpydoc=1.1.0=pyhd3eb1b0_1
222 |   - olefile=0.46=pyhd3eb1b0_0
223 |   - openjpeg=2.4.0=h3ad879b_0
224 |   - openpyxl=3.0.9=pyhd3eb1b0_0
225 |   - openssl=1.1.1l=h7f8727e_0
226 |   - packaging=21.0=pyhd3eb1b0_0
227 |   - pandas=1.3.4=py38h8c16a72_0
228 |   - pandocfilters=1.4.3=py38h06a4308_1
229 |   - pango=1.45.3=hd140c19_0
230 |   - parso=0.8.2=pyhd3eb1b0_0
231 |   - partd=1.2.0=pyhd3eb1b0_0
232 |   - patchelf=0.13=h295c915_0
233 |   - path=16.0.0=py38h06a4308_0
234 |   - path.py=12.5.0=hd3eb1b0_0
235 |   - pathlib2=2.3.6=py38h06a4308_2
236 |   - pathspec=0.7.0=py_0
237 |   - patsy=0.5.2=py38h06a4308_0
238 |   - pcre=8.45=h295c915_0
239 |   - pep8=1.7.1=py38_0
240 |   - pexpect=4.8.0=pyhd3eb1b0_3
241 |   - pickleshare=0.7.5=pyhd3eb1b0_1003
242 |   - pillow=8.4.0=py38h5aabda8_0
243 |   - pip=21.2.4=py38h06a4308_0
244 |   - pixman=0.40.0=h7f8727e_1
245 |   - pkginfo=1.7.1=py38h06a4308_0
246 |   - pluggy=0.13.1=py38h06a4308_0
247 |   - ply=3.11=py38_0
248 |   - poyo=0.5.0=pyhd3eb1b0_0
249 |   - prometheus_client=0.11.0=pyhd3eb1b0_0
250 |   - prompt-toolkit=3.0.20=pyhd3eb1b0_0
251 |   - prompt_toolkit=3.0.20=hd3eb1b0_0
252 |   - psutil=5.8.0=py38h27cfd23_1
253 |   - ptyprocess=0.7.0=pyhd3eb1b0_2
254 |   - py=1.10.0=pyhd3eb1b0_0
255 |   - py-lief=0.10.1=py38h403a769_0
256 |   - pycodestyle=2.7.0=pyhd3eb1b0_0
257 |   - pycosat=0.6.3=py38h7b6447c_1
258 |   - pycparser=2.20=py_2
259 |   - pycurl=7.44.1=py38h8f2d780_1
260 |   - pydocstyle=6.1.1=pyhd3eb1b0_0
261 |   - pyerfa=2.0.0=py38h27cfd23_0
262 |   - pyflakes=2.3.1=pyhd3eb1b0_0
263 |   - pygments=2.10.0=pyhd3eb1b0_0
264 |   - pylint=2.9.6=py38h06a4308_1
265 |   - pyls-spyder=0.4.0=pyhd3eb1b0_0
266 |   - pyodbc=4.0.31=py38h295c915_0
267 |   - pyopenssl=21.0.0=pyhd3eb1b0_1
268 |   - pyparsing=3.0.4=pyhd3eb1b0_0
269 |   - pyqt=5.9.2=py38h05f1152_4
270 |   - pyrsistent=0.18.0=py38heee7806_0
271 |   - pysocks=1.7.1=py38h06a4308_0
272 |   - pytables=3.6.1=py38h9fd0a39_0
273 |   - pytest=6.2.4=py38h06a4308_2
274 |   - python=3.8.12=h12debd9_0
275 |   - python-dateutil=2.8.2=pyhd3eb1b0_0
276 |   - python-libarchive-c=2.9=pyhd3eb1b0_1
277 |   - python-lsp-black=1.0.0=pyhd3eb1b0_0
278 |   - python-lsp-jsonrpc=1.0.0=pyhd3eb1b0_0
279 |   - python-lsp-server=1.2.4=pyhd3eb1b0_0
280 |   - python-slugify=5.0.2=pyhd3eb1b0_0
281 |   - pytz=2021.3=pyhd3eb1b0_0
282 |   - pywavelets=1.1.1=py38h7b6447c_2
283 |   - pyxdg=0.27=pyhd3eb1b0_0
284 |   - pyyaml=6.0=py38h7f8727e_1
285 |   - pyzmq=22.2.1=py38h295c915_1
286 |   - qdarkstyle=3.0.2=pyhd3eb1b0_0
287 |   - qstylizer=0.1.10=pyhd3eb1b0_0
288 |   - qt=5.9.7=h5867ecd_1
289 |   - qtawesome=1.0.2=pyhd3eb1b0_0
290 |   - qtconsole=5.1.1=pyhd3eb1b0_0
291 |   - qtpy=1.10.0=pyhd3eb1b0_0
292 |   - readline=8.1=h27cfd23_0
293 |   - regex=2021.8.3=py38h7f8727e_0
294 |   - requests=2.26.0=pyhd3eb1b0_0
295 |   - ripgrep=12.1.1=0
296 |   - rope=0.19.0=pyhd3eb1b0_0
297 |   - rtree=0.9.7=py38h06a4308_1
298 |   - ruamel_yaml=0.15.100=py38h27cfd23_0
299 |   - scikit-image=0.18.3=py38h51133e4_0
300 |   - scikit-learn=0.24.2=py38ha9443f7_0
301 |   - scikit-learn-intelex=2021.3.0=py38h06a4308_0
302 |   - scipy=1.7.1=py38h292c36d_2
303 |   - seaborn=0.11.2=pyhd3eb1b0_0
304 |   - secretstorage=3.3.1=py38h06a4308_0
305 |   - send2trash=1.8.0=pyhd3eb1b0_1
306 |   - setuptools=58.0.4=py38h06a4308_0
307 |   - simplegeneric=0.8.1=py38_2
308 |   - singledispatch=3.7.0=pyhd3eb1b0_1001
309 |   - sip=4.19.13=py38he6710b0_0
310 |   - six=1.16.0=pyhd3eb1b0_0
311 |   - snappy=1.1.8=he6710b0_0
312 |   - sniffio=1.2.0=py38h06a4308_1
313 |   - snowballstemmer=2.1.0=pyhd3eb1b0_0
314 |   - sortedcollections=2.1.0=pyhd3eb1b0_0
315 |   - sortedcontainers=2.4.0=pyhd3eb1b0_0
316 |   - soupsieve=2.2.1=pyhd3eb1b0_0
317 |   - sphinx=4.2.0=pyhd3eb1b0_1
318 |   - sphinxcontrib=1.0=py38_1
319 |   - sphinxcontrib-applehelp=1.0.2=pyhd3eb1b0_0
320 |   - sphinxcontrib-devhelp=1.0.2=pyhd3eb1b0_0
321 |   - sphinxcontrib-htmlhelp=2.0.0=pyhd3eb1b0_0
322 |   - sphinxcontrib-jsmath=1.0.1=pyhd3eb1b0_0
323 |   - sphinxcontrib-qthelp=1.0.3=pyhd3eb1b0_0
324 |   - sphinxcontrib-serializinghtml=1.1.5=pyhd3eb1b0_0
325 |   - sphinxcontrib-websupport=1.2.4=py_0
326 |   - spyder=5.1.5=py38h06a4308_1
327 |   - spyder-kernels=2.1.3=py38h06a4308_0
328 |   - sqlalchemy=1.4.22=py38h7f8727e_0
329 |   - sqlite=3.36.0=hc218d9a_0
330 |   - statsmodels=0.12.2=py38h27cfd23_0
331 |   - sympy=1.9=py38h06a4308_0
332 |   - tbb=2021.4.0=hd09550d_0
333 |   - tbb4py=2021.4.0=py38hd09550d_0
334 |   - tblib=1.7.0=pyhd3eb1b0_0
335 |   - terminado=0.9.4=py38h06a4308_0
336 |   - testpath=0.5.0=pyhd3eb1b0_0
337 |   - text-unidecode=1.3=pyhd3eb1b0_0
338 |   - textdistance=4.2.1=pyhd3eb1b0_0
339 |   - threadpoolctl=2.2.0=pyh0d69192_0
340 |   - three-merge=0.1.1=pyhd3eb1b0_0
341 |   - tifffile=2021.7.2=pyhd3eb1b0_2
342 |   - tinycss=0.4=pyhd3eb1b0_1002
343 |   - tk=8.6.11=h1ccaba5_0
344 |   - toml=0.10.2=pyhd3eb1b0_0
345 |   - toolz=0.11.1=pyhd3eb1b0_0
346 |   - tornado=6.1=py38h27cfd23_0
347 |   - tqdm=4.62.3=pyhd3eb1b0_1
348 |   - traitlets=5.1.0=pyhd3eb1b0_0
349 |   - typed-ast=1.4.3=py38h7f8727e_1
350 |   - typing_extensions=3.10.0.2=pyh06a4308_0
351 |   - ujson=4.0.2=py38h2531618_0
352 |   - unicodecsv=0.14.1=py38_0
353 |   - unidecode=1.2.0=pyhd3eb1b0_0
354 |   - unixodbc=2.3.9=h7b6447c_0
355 |   - urllib3=1.26.7=pyhd3eb1b0_0
356 |   - watchdog=2.1.3=py38h06a4308_0
357 |   - wcwidth=0.2.5=pyhd3eb1b0_0
358 |   - webencodings=0.5.1=py38_1
359 |   - werkzeug=2.0.2=pyhd3eb1b0_0
360 |   - wheel=0.37.0=pyhd3eb1b0_1
361 |   - whichcraft=0.6.1=pyhd3eb1b0_0
362 |   - widgetsnbextension=3.5.1=py38_0
363 |   - wrapt=1.12.1=py38h7b6447c_1
364 |   - wurlitzer=2.1.1=py38h06a4308_0
365 |   - xlrd=2.0.1=pyhd3eb1b0_0
366 |   - xlsxwriter=3.0.1=pyhd3eb1b0_0
367 |   - xlwt=1.3.0=py38_0
368 |   - xz=5.2.5=h7b6447c_0
369 |   - yaml=0.2.5=h7b6447c_0
370 |   - yapf=0.31.0=pyhd3eb1b0_0
371 |   - zeromq=4.3.4=h2531618_0
372 |   - zfp=0.5.5=h2531618_6
373 |   - zict=2.0.0=pyhd3eb1b0_0
374 |   - zipp=3.6.0=pyhd3eb1b0_0
375 |   - zlib=1.2.11=h7b6447c_3
376 |   - zope=1.0=py38_1
377 |   - zope.event=4.5.0=py38_0
378 |   - zope.interface=5.4.0=py38h7f8727e_0
379 |   - zstd=1.4.9=haebb681_0
380 |   - pip:
381 |     - cityscapesscripts==1.5.0
382 |     - flatbuffers==2.0
383 |     # - nicr-scene-analysis-datasets==0.3.1
384 |     - numpy==1.22.3
385 |     - onnx==1.11.0
386 |     - onnxruntime-gpu==1.11.0
387 |     - opencv-python==4.2.0.34
388 |     - protobuf==3.20.1
389 | prefix: /home/user/anaconda3/envs/semantic_mapping
390 | 


--------------------------------------------------------------------------------
/tox.ini:
--------------------------------------------------------------------------------
1 | [pycodestyle]
2 | # E501 line too long (82 > 79 characters)
3 | # E402 module level import not at top of file
4 | # E731 do not assign a lambda expression, use a def
5 | # +pep8 default ignore: E121, E123, E126, E226, E24, E704
6 | ignore = E226, E501, E402, E731, E121, E123, E126, E226, E24, E704, E265


--------------------------------------------------------------------------------
/trained_models/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TUI-NICR/semantic-mapping/c592804e27e83dd96d476593e2528ae29c084eea/trained_models/.gitkeep


--------------------------------------------------------------------------------
/utils.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | .. codeauthor:: Daniel Seichter <daniel.seichter@tu-ilmenau.de>
 4 | """
 5 | import os
 6 | 
 7 | 
 8 | def _get_default_path(*path_components):
 9 |     base_path = os.path.dirname(os.path.abspath(__file__))
10 |     return os.path.join(base_path, *path_components)
11 | 
12 | 
13 | DEFAULT_ONNX_FILEPATH = _get_default_path('trained_models',
14 |                                           'model_hypersim.onnx')
15 | 
16 | DEFAULT_DATASET_PATH = _get_default_path('datasets', 'hypersim')
17 | 
18 | DEFAULT_PREDICTIONS_PATH = _get_default_path('datasets', 'hypersim_predictions')
19 | 


--------------------------------------------------------------------------------