├── .gitignore ├── .gitmodules ├── .vscode └── settings.json ├── LICENSE ├── README.md ├── datasets └── .gitkeep ├── evaluate.py ├── predict.py ├── semantic_mapping.yaml ├── tox.ini ├── trained_models └── .gitkeep └── utils.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | parts/ 18 | sdist/ 19 | var/ 20 | wheels/ 21 | *.egg-info/ 22 | .installed.cfg 23 | *.egg 24 | 25 | # PyInstaller 26 | # Usually these files are written by a python script from a template 27 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 28 | *.manifest 29 | *.spec 30 | 31 | # Installer logs 32 | pip-log.txt 33 | pip-delete-this-directory.txt 34 | 35 | # Unit test / coverage reports 36 | htmlcov/ 37 | .tox/ 38 | .coverage 39 | .coverage.* 40 | .cache 41 | nosetests.xml 42 | coverage.xml 43 | *.cover 44 | .hypothesis/ 45 | 46 | # Translations 47 | *.mo 48 | *.pot 49 | 50 | # Django stuff: 51 | *.log 52 | local_settings.py 53 | 54 | # Flask stuff: 55 | instance/ 56 | .webassets-cache 57 | 58 | # Scrapy stuff: 59 | .scrapy 60 | 61 | # Sphinx documentation 62 | docs/_build/ 63 | 64 | # PyBuilder 65 | target/ 66 | 67 | # Jupyter Notebook 68 | .ipynb_checkpoints 69 | 70 | # pyenv 71 | .python-version 72 | 73 | # celery beat schedule file 74 | celerybeat-schedule 75 | 76 | # SageMath parsed files 77 | *.sage.py 78 | 79 | # Environments 80 | .env 81 | .venv 82 | env/ 83 | venv/ 84 | ENV/ 85 | 86 | # Spyder project settings 87 | .spyderproject 88 | .spyproject 89 | 90 | # Rope project settings 91 | .ropeproject 92 | 93 | # mkdocs documentation 94 | /site 95 | 96 | # mypy 97 | .mypy_cache/ 98 | 99 | # PyCharm 100 | .idea 101 | 102 | # MacOS 103 | .DS_Store 104 | 105 | # Binaries 106 | .npz 107 | .npy 108 | .h5 109 | .hdf5 110 | core 111 | 112 | 113 | # VSCode 114 | # settings.json 115 | 116 | # onnx models 117 | *.onnx 118 | 119 | # data 120 | datasets/hypersim 121 | datasets/hypersim_predictions 122 | trained_models/*.tar.gz 123 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "lib/nicr-scene-analysis-datasets"] 2 | path = lib/nicr-scene-analysis-datasets 3 | url = https://github.com/TUI-NICR/nicr-scene-analysis-datasets 4 | -------------------------------------------------------------------------------- /.vscode/settings.json: -------------------------------------------------------------------------------- 1 | { 2 | "python.linting.pycodestyleEnabled": true, 3 | "python.linting.enabled": true, 4 | // disable annoying top-level source code modification indication 5 | "gitlens.codeLens.authors.enabled": false, 6 | "gitlens.codeLens.recentChange.enabled": false, 7 | "files.trimTrailingWhitespace": true, 8 | "[markdown]": { 9 | "files.trimTrailingWhitespace": false 10 | }, 11 | } 12 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | BSD 3-Clause License 2 | 3 | Copyright (c) 2021-2022, Neuroinformatics and Cognitive Robotics Lab (Technische 4 | Universität Ilmenau) 5 | All rights reserved. 6 | 7 | Redistribution and use in source and binary forms, with or without 8 | modification, are permitted provided that the following conditions are met: 9 | 10 | * Redistributions of source code must retain the above copyright notice, this 11 | list of conditions and the following disclaimer. 12 | 13 | * Redistributions in binary form must reproduce the above copyright notice, 14 | this list of conditions and the following disclaimer in the documentation 15 | and/or other materials provided with the distribution. 16 | 17 | * Neither the name of the copyright holder nor the names of its 18 | contributors may be used to endorse or promote products derived from 19 | this software without specific prior written permission. 20 | 21 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 22 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 24 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 25 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 27 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 28 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 29 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 30 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 31 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Efficient and Robust Semantic Mapping for Indoor Environments 2 | 3 | This repository contains the code to our paper "Efficient and Robust Semantic Mapping for Indoor Environments" ([IEEE Xplore](https://ieeexplore.ieee.org/document/9812205), [arXiv](https://arxiv.org/pdf/2203.05836.pdf)). 4 | 5 |
6 | 7 |
(Click on the image to open YouTube video) 8 |

9 |
10 | 11 | > You may also want to have a look at our follow-up work: [**PanopticNDT**](https://github.com/TUI-NICR/panoptic-mapping) 12 | 13 | ## License and Citations 14 | The source code and the network weights are published under BSD 3-Clause license, see [license file](LICENSE) for details. 15 | 16 | If you use the source code or the network weights, please cite the following paper: 17 | >Seichter, D., Langer, P., Wengefeld, T., Lewandowski, B., Höchemer, D., Gross, H.-M. 18 | *Efficient and Robust Semantic Mapping for Indoor Environments* 19 | in IEEE International Conference on Robotics and Automation (ICRA), pp. 9221-9227, 2022. 20 | 21 |
22 | BibTeX 23 | 24 | ```bibtex 25 | @inproceedings{semanticndtmapping2022icra, 26 | title = {{Efficient and Robust Semantic Mapping for Indoor Environments}}, 27 | author = {Seichter, Daniel and Langer, Patrick and Wengefeld, Tim and Lewandowski, Benjamin and H{\"o}chemer, Dominik and Gross, Horst-Michael}, 28 | booktitle = {IEEE International Conference on Robotics and Automation (ICRA)}, 29 | year = {2022}, 30 | volume = {}, 31 | number = {}, 32 | pages = {9221-9227} 33 | } 34 | 35 | @article{semanticndtmapping2022arXiv, 36 | title = {{Efficient and Robust Semantic Mapping for Indoor Environments}}, 37 | author = {Seichter, Daniel and Langer, Patrick and Wengefeld, Tim and Lewandowski, Benjamin and H{\"o}chemer, Dominik and Gross, Horst-Michael}, 38 | journal = {arXiv preprint arXiv:2203.05836}, 39 | year = {2022} 40 | } 41 | ``` 42 | Note that the preprint was accepted to be published in IEEE International Conference on Robotics and Automation (ICRA). 43 | 44 |
45 | 46 | ## Setup 47 | 48 | 1. Clone repository: 49 | ```bash 50 | # do not forget the '--recursive' ;) 51 | git clone --recursive https://github.com/TUI-NICR/semantic-mapping.git 52 | 53 | cd /path/to/this/repository 54 | ``` 55 | 56 | 2. Set up anaconda environment including all dependencies: 57 | ```bash 58 | # option 1: create conda environment from YAML file 59 | conda env create -f semantic_mapping.yaml 60 | conda activate semantic_mapping 61 | 62 | # option 2: create new environment (see last tested versions) 63 | conda create -n semantic_mapping python==3.8.12 anaconda==2021.11 64 | conda activate semantic_mapping 65 | pip install onnx==1.11.0 66 | pip install opencv-python==4.2.0.34 67 | pip install tqdm==4.62.3 68 | # ONNXRuntime with CUDA support 69 | conda install -c conda-forge cudnn==8.2.1.32 70 | pip install onnxruntime-gpu==1.11.0 71 | 72 | 73 | # finally, install our package for preparing and using the Hypersim dataset 74 | pip install ./lib/nicr-scene-analysis-datasets[with_preparation] 75 | ``` 76 | 77 | ## Usage 78 | 79 | 1. Prepare the [Hypersim](https://machinelearning.apple.com/research/hypersim) dataset: 80 | ```bash 81 | # download and extract raw dataset (2x ~1.8TB) 82 | HYPERSIM_DOWNLOAD_PATH='./datasets/hypersim_preparation' 83 | wget https://raw.githubusercontent.com/apple/ml-hypersim/6cbaa80207f44a312654e288cf445016c84658a1/code/python/tools/dataset_download_images.py 84 | python dataset_download_images.py --downloads_dir $HYPERSIM_DOWNLOAD_PATH 85 | 86 | # prepare dataset (~157.5 GB, extract required data, convert to our format, blacklist some scenes/trajectories) 87 | python -m nicr_scene_analysis_datasets.datasets.hypersim.prepare_dataset \ 88 | ./datasets/hypersim \ 89 | $HYPERSIM_DOWNLOAD_PATH \ 90 | --additional-subsamples 2 5 10 20 \ 91 | --multiprocessing 92 | 93 | # just in case you want to delete the downloaded raw data (2x ~1.8TB) 94 | rm -rf $HYPERSIM_DOWNLOAD_PATH 95 | 96 | ``` 97 | For further details, we refer to the documentation of our 98 | [nicr-scene-analysis-datasets python package](https://github.com/TUI-NICR/nicr-scene-analysis-datasets/tree/882276c46ca5864ebb6146afe6bae56d0b1abc11). 99 | 100 | 2. Download pretrained model: 101 | We provide the weights of our selected ESANet-R34-NBt1D (enhanced ResNet34-based encoder utilizing the Non-Bottleneck-1D block) trained on the Hypersim dataset. 102 | To ease both application and deployment, we removed all dependencies (PyTorch, ...) and provide the weights in [ONNX format](https://onnx.ai/). 103 | 104 | Click [here](https://drive.google.com/uc?id=1zUxSqq4zdC3yQ4RxiHvTh8CX7-115KUg) to download the model and extract it to `./trained_models` or use: 105 | ```bash 106 | pip install gdown # last tested: 4.4.0 107 | gdown 1zUxSqq4zdC3yQ4RxiHvTh8CX7-115KUg --output ./trained_models/ 108 | tar -xvzf ./trained_models/model_hypersim.tar.gz -C ./trained_models/ 109 | 110 | ``` 111 | 112 | The model was selected based on the mean intersection over union (mIoU) on the validation split: 0.4591184410660463 at epoch 498. 113 | On the test split, the model achieves a mIoU of 0.41168890871760977. 114 | Note, similar to other approaches, we only evaluate up to a reasonable maximum distance of 20m from the camera. For more detail, see `evaluate.py`. 115 | 116 | 117 | 3. Extract predicted semantic segmentation: 118 | ```bash 119 | # use default paths (~74.3GB for topk with k=3) 120 | python predict.py \ 121 | --onnx-filepath ./trained_models/model_hypersim.onnx \ 122 | --dataset-path ./datasets/hypersim \ 123 | --dataset-split test \ 124 | --topk 3 \ 125 | --output-path ./datasets/hypersim_predictions 126 | 127 | # for more details, see: 128 | python predict.py --help 129 | ``` 130 | For the example above, the predicted segmentations are stored at `./datasets/hypersim_predictions/test/`. 131 | See the `semantic_40_topk` subfolder for the predicted topK segmentation outputs and `semantic_40/` or `semantic_40_colored/` for the predicted (colored) top1 labels. 132 | 133 | 4. Run your semantic mapping experiments and store the results with the following folder structure: 134 | ```text 135 | path/to/results/ 136 | └── test 137 | ├── results1 138 | │   ├── ai_001_010 139 | │   │   ├── cam_00 140 | │   │   │   ├── 0000.png 141 | │   │   │   ├── ... 142 | ├── results2 143 | │   ├── ai_001_010 144 | │   │   ├── cam_00 145 | │   │   │   ├── 0000.png 146 | │   │   │   ├── ... 147 | ``` 148 | You may have a look at `./lib/nicr-scene-analysis-datasets/nicr_scene_analysis_datasets/mira/_hypersim_reader.py` for a starting point. 149 | This class shows, how the Hypersim dataset is processed in our pipelines. 150 | 151 | 5. Run evaluation: 152 | ```bash 153 | # use default paths 154 | python evaluate.py \ 155 | --dataset-path ./datasets/hypersim \ 156 | --dataset-split test \ 157 | --predictions-path ./datasets/hypersim_predictions 158 | [--result-paths path/to/results/test/results1 path/to/results/test/results2] 159 | 160 | # for more details, see: 161 | python evaluate.py --help 162 | ``` 163 | 164 | For the predicted segmentation of our ONNX model, you should obtain measures similar to: 165 | ```text 166 | miou_gt_masked: 0.41168890871760977 167 | mean_pacc_gt_masked: 0.5683601556433829 168 | invalid_ratio: 0.0 169 | invalid_mean_ratio_gt_masked: 0.0 170 | vwmiou_gt_masked: 0.41168890871760977 171 | vwmean_pacc_gt_masked: 0.5683601556433829 172 | ``` 173 | Check the created `results.json` at the predictions folder for more measures (e.g. `./datasets/hypersim_predictions/test/semantic_40/results.json`) 174 | -------------------------------------------------------------------------------- /datasets/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TUI-NICR/semantic-mapping/c592804e27e83dd96d476593e2528ae29c084eea/datasets/.gitkeep -------------------------------------------------------------------------------- /evaluate.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | .. codeauthor:: Daniel Seichter 4 | """ 5 | import argparse as ap 6 | from functools import partial 7 | import json 8 | import multiprocessing 9 | import os 10 | import warnings 11 | 12 | import cv2 13 | import numpy as np 14 | 15 | from tqdm import tqdm 16 | from tqdm.contrib.concurrent import thread_map 17 | 18 | from nicr_scene_analysis_datasets import Hypersim 19 | 20 | from utils import DEFAULT_DATASET_PATH 21 | from utils import DEFAULT_PREDICTIONS_PATH 22 | 23 | 24 | def confusion_matrix_fast(pred, gt, n_classes): 25 | # note: this function is 15x faster than sklearn.metrics.confusion_matrix 26 | 27 | # determine dtype for unique mapping 28 | n_classes_squared = n_classes**2 29 | if n_classes_squared < 2**(8-1)-1: 30 | dtype = np.int8 31 | elif n_classes_squared < 2**(16-1)-1: 32 | dtype = np.int16 33 | else: 34 | dtype = np.int64 # equal to long 35 | 36 | # convert to dtype 37 | pred_ = pred.astype(dtype) 38 | gt_ = gt.astype(dtype) 39 | 40 | # compute confusion matrix 41 | unique_mapping = (gt_.reshape(-1)*n_classes + pred_.reshape(-1)) 42 | cnts = np.bincount(unique_mapping, 43 | minlength=n_classes_squared) 44 | 45 | return cnts.reshape(n_classes, n_classes) 46 | 47 | 48 | def get_confusion_matrix_for_sample( 49 | sample_idx, 50 | dataset, 51 | prediction_basepath, 52 | prediction_extension='.png', 53 | prediction_contains_void=True, 54 | max_depth_in_m=20 # max 20m 55 | ): 56 | n_classes = dataset.semantic_n_classes # with void 57 | 58 | # get sample 59 | sample = dataset[sample_idx] 60 | 61 | # load prediction 62 | fp = os.path.join(prediction_basepath, *sample['identifier']) 63 | fp += prediction_extension 64 | if '.png' == prediction_extension: 65 | # prediction is given as image 66 | pred = cv2.imread(fp, cv2.IMREAD_UNCHANGED) 67 | if pred is None: 68 | raise IOError(f"Cannot load '{fp}'") 69 | if pred.ndim > 2: 70 | warnings.warn(f"Prediction ('{fp}') has more than one channel. " 71 | "Using first channel.") 72 | pred = pred[..., 0] 73 | elif '.npy' == prediction_extension: 74 | # prediction is given as numpy array with shape (h, w, topk) 75 | pred = np.load(fp) 76 | pred = pred[0, ...].astype('uint8') # use top1 only 77 | 78 | if not prediction_contains_void: 79 | pred += 1 80 | 81 | # create flat views 82 | gt = sample['semantic'].reshape(-1) 83 | pred = pred.reshape(-1) 84 | 85 | # mask using max depth 86 | if max_depth_in_m is not None: 87 | depth = sample['depth'].reshape(-1) 88 | mask = depth < (max_depth_in_m*1000) 89 | gt = gt[mask] 90 | pred = pred[mask] 91 | 92 | # move invalid pixels in prediction, i.e., pixels that may indicate free 93 | # space, to class with index i=n_classes 94 | pred[pred > (n_classes-1)] = n_classes 95 | n_classes = n_classes + 1 # +1 = invalid pixels 96 | 97 | return confusion_matrix_fast(pred, gt, n_classes=n_classes) 98 | 99 | 100 | def get_measures(cm, ignore_void=True): 101 | # cm is gt x pred with void + n_classes + invalid (free space) 102 | 103 | tp = np.diag(cm) 104 | sum_gt = cm.sum(axis=1) 105 | sum_pred = cm.sum(axis=0) 106 | invalid_pixels = cm[:, -1] 107 | 108 | if ignore_void: 109 | # void is first class (idx=0) 110 | tp = tp[1:] 111 | sum_pred = sum_pred[1:] 112 | sum_gt = sum_gt[1:] 113 | sum_pred -= cm[0, 1:] # do not count fp for void 114 | invalid_pixels = invalid_pixels[1:] 115 | 116 | n_total_pixels = sum_gt.sum() 117 | 118 | # we do want ignore classes without gt pixels 119 | gt_mask = sum_gt != 0 120 | 121 | # invalid pixels 122 | invalid_ratio = invalid_pixels.sum() / n_total_pixels 123 | with np.errstate(divide='ignore', invalid='ignore'): 124 | invalid_ratios = invalid_pixels / sum_gt 125 | invalid_mean_ratio_gt_masked = np.mean(invalid_ratios[gt_mask]) 126 | valid_weights = 1 - invalid_ratios 127 | 128 | # intersection over union 129 | intersections = tp 130 | unions = sum_pred + sum_gt - tp 131 | 132 | with np.errstate(divide='ignore', invalid='ignore'): 133 | ious = intersections / unions.astype(np.float32) 134 | 135 | # mean intersection over union and gt masked version 136 | miou = np.mean(np.nan_to_num(ious, nan=0.0)) 137 | miou_gt_masked = np.mean(ious[gt_mask]) 138 | 139 | # frequency weighted intersection over union 140 | # normal fwiou and gt masked version are equal 141 | fwiou_gt_masked = np.sum(ious[gt_mask] * tp[gt_mask]/n_total_pixels) 142 | 143 | # pixel accuracy and mean pixel accuracy 144 | pacc = tp.sum() / sum_gt.sum() 145 | 146 | with np.errstate(divide='ignore', invalid='ignore'): 147 | paccs = tp / sum_gt 148 | 149 | mean_pacc_gt_masked = np.mean(tp[gt_mask] / sum_gt[gt_mask]) 150 | 151 | # valid weighted mean intersection over union 152 | vwmiou_gt_masked = np.mean(ious[gt_mask]*valid_weights[gt_mask]) 153 | 154 | # valid weighted mean pixel accuracy 155 | vwmean_pacc_gt_masked = np.mean(tp[gt_mask] / sum_gt[gt_mask] * valid_weights[gt_mask]) 156 | 157 | # build dict of measures 158 | measures = { 159 | 'cm': cm.tolist(), 160 | 'invalid_ratio': invalid_ratio, 161 | 'invalid_ratios': invalid_ratios.tolist(), 162 | 'invalid_mean_ratio_gt_masked': invalid_mean_ratio_gt_masked, 163 | 'ious': ious.tolist(), 164 | 'miou': miou, 165 | 'miou_gt_masked': miou_gt_masked, 166 | 'fwiou_gt_masked': fwiou_gt_masked, 167 | 'pacc': pacc, 168 | 'paccs': paccs.tolist(), 169 | 'mean_pacc_gt_masked': mean_pacc_gt_masked, 170 | 'vwmiou_gt_masked': vwmiou_gt_masked, 171 | 'vwmean_pacc_gt_masked': vwmean_pacc_gt_masked, 172 | } 173 | 174 | return measures 175 | 176 | 177 | def _parse_args(): 178 | parser = ap.ArgumentParser(formatter_class=ap.ArgumentDefaultsHelpFormatter) 179 | parser.add_argument( 180 | '--dataset-path', 181 | type=str, 182 | default=DEFAULT_DATASET_PATH, 183 | help="Path to the dataset." 184 | ) 185 | parser.add_argument( 186 | '--dataset-split', 187 | type=str, 188 | default='test', 189 | help="Dataset split to use." 190 | ) 191 | parser.add_argument( 192 | '--predictions-path', 193 | type=str, 194 | default=DEFAULT_PREDICTIONS_PATH, 195 | help="Path to stored predicted semantic segmentation. Use an empty " 196 | "string to skip the evaluating the predicted semantic " 197 | "segmentation." 198 | ) 199 | parser.add_argument( 200 | '--result-paths', 201 | nargs='+', 202 | type=str, 203 | help="Paths to further results.", 204 | default=[] 205 | ) 206 | parser.add_argument( 207 | '--force-recomputing', 208 | action='store_true', 209 | default=False, 210 | help="Force recomputing." 211 | ) 212 | parser.add_argument( 213 | '--n-worker', 214 | type=int, 215 | default=min(multiprocessing.cpu_count(), 48), 216 | help="Number of workers to use." 217 | ) 218 | 219 | return parser.parse_args() 220 | 221 | 222 | def main(): 223 | # args 224 | args = _parse_args() 225 | 226 | # just obtain all sample names 227 | dataset = Hypersim(dataset_path=args.dataset_path, 228 | split=args.dataset_split, 229 | subsample=None, 230 | sample_keys=('identifier',)) 231 | samples = [s['identifier'] for s in dataset] # tuple (scene, cam, id) 232 | scenes = sorted(list(set(s[0] for s in samples))) 233 | 234 | # load dataset 235 | dataset = Hypersim(dataset_path=args.dataset_path, 236 | split=args.dataset_split, 237 | subsample=None, 238 | sample_keys=('identifier', 'depth', 'semantic'), 239 | use_cache=False, 240 | cache_disable_deepcopy=False) 241 | 242 | # get paths to evaluate 243 | paths = [] 244 | if args.predictions_path: 245 | # evaluate the network prediction 246 | paths += [ 247 | os.path.join(args.predictions_path, args.dataset_split, 248 | Hypersim.SEMANTIC_DIR), 249 | ] 250 | paths += [ 251 | os.path.join(path) for path in args.result_paths 252 | ] 253 | 254 | # run evaluation 255 | for path in tqdm(paths): 256 | print(f"Evaluating: '{path}'") 257 | results_fp = os.path.join(path, 'results.json') 258 | 259 | if os.path.exists(results_fp) and not args.force_recomputing: 260 | continue 261 | 262 | # get confusion matrices 263 | if 1 == args.n_worker: 264 | cms = [] 265 | for i in tqdm(range(len(dataset))): 266 | cm = get_confusion_matrix_for_sample( 267 | i, 268 | dataset=dataset, 269 | prediction_basepath=path, 270 | prediction_extension='.png', 271 | prediction_contains_void=True, 272 | max_depth_in_m=20 273 | ) 274 | cms.append(cm) 275 | else: 276 | f = partial(get_confusion_matrix_for_sample, 277 | dataset=dataset, 278 | prediction_basepath=path, 279 | prediction_extension='.png', 280 | prediction_contains_void=True, 281 | max_depth_in_m=20) 282 | cms = thread_map(f, list(range(len(dataset))), 283 | max_workers=args.n_worker, 284 | chunksize=10, 285 | leave=False) 286 | 287 | # get overall measures 288 | assert len(cms) == len(samples) 289 | cm = np.array(cms).sum(axis=0) 290 | 291 | measures = get_measures(cm, ignore_void=True) 292 | for k in ('miou_gt_masked', 'mean_pacc_gt_masked', 293 | 'invalid_ratio', 'invalid_mean_ratio_gt_masked', 294 | 'vwmiou_gt_masked', 'vwmean_pacc_gt_masked'): 295 | print(f"{k}: {measures[k]}") 296 | 297 | # get results for each scene 298 | cms_per_scene = {s: [] for s in scenes} 299 | for cm, sample in zip(cms, samples): 300 | scene = sample[0] 301 | cms_per_scene[scene].append(cm) 302 | 303 | measures['per_scene'] = {} 304 | for scene, cms_scene in cms_per_scene.items(): 305 | cm = np.array(cms_scene).sum(axis=0) 306 | measures['per_scene'][scene] = get_measures(cm, ignore_void=True) 307 | 308 | # write results to file 309 | with open(results_fp, 'w') as f: 310 | json.dump(measures, f, indent=4) 311 | 312 | 313 | if __name__ == '__main__': 314 | main() 315 | -------------------------------------------------------------------------------- /predict.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | .. codeauthor:: Daniel Seichter 4 | """ 5 | import argparse as ap 6 | import os 7 | 8 | import cv2 9 | from nicr_scene_analysis_datasets import Hypersim 10 | from nicr_scene_analysis_datasets.utils.img import save_indexed_png 11 | import numpy as np 12 | import onnx 13 | import onnxruntime as ort 14 | from tqdm import tqdm 15 | 16 | from utils import DEFAULT_DATASET_PATH 17 | from utils import DEFAULT_ONNX_FILEPATH 18 | from utils import DEFAULT_PREDICTIONS_PATH 19 | 20 | 21 | def _get_ort_session(onnx_filepath, img_hw, topk=3): 22 | model = onnx.load(onnx_filepath) 23 | 24 | # get network output shape (same as input shape) 25 | # note: our optimizations to the resize operations seem to break onnx's 26 | # shape inference with OpSet >= 13 27 | model_output_img_shape = ( 28 | model.graph.input[0].type.tensor_type.shape.dim[2].dim_value, 29 | model.graph.input[0].type.tensor_type.shape.dim[3].dim_value 30 | ) 31 | 32 | # add missing nodes: final upsampling, softmax, and topk 33 | # see: https://github.com/onnx/onnx/blob/main/docs/Operators.md 34 | # -> final upsampling 35 | final_upsampling_node = onnx.helper.make_node( 36 | 'Resize', 37 | # inputs=['output', 'roi', 'scales'], 38 | inputs=['output', '', 'scales'], # '' for 'roi' requires OpSet >= 13 39 | outputs=['final_upsampling_output'], 40 | coordinate_transformation_mode='pytorch_half_pixel', 41 | cubic_coeff_a=-0.75, 42 | mode='linear', 43 | nearest_mode='floor', 44 | ) 45 | # roi = onnx.helper.make_tensor('roi', onnx.TensorProto.FLOAT, [0], []) 46 | scale_h = img_hw[0] / model_output_img_shape[0] 47 | scale_w = img_hw[1] / model_output_img_shape[1] 48 | scales = onnx.helper.make_tensor('scales', 49 | onnx.TensorProto.FLOAT, [4], 50 | [1, 1, scale_h, scale_w]) 51 | # -> softmax (note that softmax op with 4D inputs requires OpSet >= 13) 52 | softmax_node = onnx.helper.make_node( 53 | 'Softmax', 54 | inputs=['final_upsampling_output'], 55 | outputs=['prediction'], 56 | axis=1 57 | ) 58 | # topk 59 | topk_node = onnx.helper.make_node( 60 | 'TopK', 61 | inputs=['prediction', 'k'], 62 | outputs=['scores', 'classes'], 63 | axis=1, 64 | largest=1, 65 | sorted=1 66 | ) 67 | k = onnx.helper.make_tensor('k', onnx.TensorProto.INT64, [1], [int(topk)]) 68 | 69 | # add new nodes and initializers to graph 70 | # model.graph.initializer.append(roi) 71 | model.graph.initializer.append(scales) 72 | model.graph.node.append(final_upsampling_node) 73 | model.graph.node.append(softmax_node) 74 | model.graph.initializer.append(k) 75 | model.graph.node.append(topk_node) 76 | 77 | # replace output information 78 | if model.graph.input[0].type.tensor_type.shape.dim[0].dim_param: 79 | # dynamic batch axis 80 | b = model.graph.input[0].type.tensor_type.shape.dim[0].dim_param 81 | else: 82 | # fixed batch axis 83 | b = model.graph.input[0].type.tensor_type.shape.dim[0].dim_value 84 | 85 | scores_info = onnx.helper.make_tensor_value_info('scores', 86 | onnx.TensorProto.FLOAT, 87 | shape=[b, topk, *img_hw]) 88 | classes_info = onnx.helper.make_tensor_value_info('classes', 89 | onnx.TensorProto.INT64, 90 | shape=[b, topk, *img_hw]) 91 | model.graph.output.pop(0) 92 | model.graph.output.append(scores_info) 93 | model.graph.output.append(classes_info) 94 | 95 | # perform final check 96 | onnx.checker.check_model(model) 97 | # onnx.save(model, './model.onnx') 98 | 99 | # create onnxruntime seesion 100 | ort_session = ort.InferenceSession( 101 | model.SerializeToString(), 102 | providers=[ 103 | # 'TensorrtExecutionProvider', 104 | 'CUDAExecutionProvider', 105 | 'CPUExecutionProvider' 106 | ] 107 | ) 108 | return ort_session 109 | 110 | 111 | def _parse_args(): 112 | parser = ap.ArgumentParser(formatter_class=ap.ArgumentDefaultsHelpFormatter) 113 | parser.add_argument( 114 | '--onnx-filepath', 115 | type=str, 116 | default=DEFAULT_ONNX_FILEPATH, 117 | help="Path to ONNX model to use." 118 | ) 119 | parser.add_argument( 120 | '--dataset-path', 121 | type=str, 122 | default=DEFAULT_DATASET_PATH, 123 | help="Path to the dataset." 124 | ) 125 | parser.add_argument( 126 | '--dataset-split', 127 | type=str, 128 | default='test', 129 | help="Dataset split to use." 130 | ) 131 | parser.add_argument( 132 | '--output-path', 133 | type=str, 134 | default=DEFAULT_PREDICTIONS_PATH, 135 | help="Path where to store predicted semantic segmentation." 136 | ) 137 | parser.add_argument( 138 | '--topk', 139 | type=int, 140 | default=3, 141 | help="TopK classes to consider." 142 | ) 143 | return parser.parse_args() 144 | 145 | 146 | def main(): 147 | # args 148 | args = _parse_args() 149 | 150 | # load data 151 | dataset = Hypersim( 152 | dataset_path=args.dataset_path, 153 | split=args.dataset_split, 154 | subsample=None, 155 | sample_keys=('identifier', 'rgb', 'depth'), 156 | depth_mode='raw' 157 | ) 158 | 159 | RGB_MEAN = np.array((0.485, 0.456, 0.406), dtype='float32') * 255 160 | RGB_STD = np.array((0.229, 0.224, 0.225), dtype='float32') * 255 161 | 162 | # ensure that the used depth stats are valid for this model (there was a 163 | # copy and paste issue that we fixed in future versions of 164 | # nicr_scene_analysis_datasets) 165 | assert dataset.depth_mean == 6249.621001070915 166 | assert dataset.depth_std == 6249.621001070915 # <- c&p ^^ 167 | 168 | # process files (for simplification with batch size 1) 169 | ort_session = None 170 | for sample in tqdm(dataset, desc='Processing files'): 171 | # load model lazily (we need a sample to get the spatial dimensions) 172 | if ort_session is None: 173 | ort_session = _get_ort_session( 174 | onnx_filepath=args.onnx_filepath, 175 | img_hw=sample['rgb'].shape[:2], 176 | topk=args.topk 177 | ) 178 | 179 | # get network input shape (from rgb input) 180 | h, w = ort_session.get_inputs()[0].shape[-2:] 181 | 182 | # rgb preprocessing 183 | # -> resize 184 | rgb = cv2.resize(sample['rgb'], (w, h), 185 | interpolation=cv2.INTER_LINEAR) 186 | # -> normalize 187 | rgb = rgb.astype('float32') 188 | rgb -= RGB_MEAN[None, None, ...] 189 | rgb /= RGB_STD[None, None, ...] 190 | # -> create tensor (add batch axis, channels first) 191 | rgb = rgb.transpose(2, 0, 1)[None, ...] 192 | 193 | # depth preprocessing 194 | # -> resize 195 | depth = cv2.resize(sample['depth'], (w, h), 196 | interpolation=cv2.INTER_NEAREST) 197 | # -> normalize 198 | mask_invalid = depth == 0 # mask for invalid depth values 199 | depth = depth.astype('float32') 200 | depth -= dataset.depth_mean 201 | depth /= dataset.depth_std 202 | # reset invalid values (the network should not be able to learn from 203 | # these pixels) 204 | depth[mask_invalid] = 0 205 | # -> create tensor (add batch and channel axes) 206 | depth = depth[None, None, ...] 207 | 208 | # apply model 209 | scores, classes = ort_session.run(None, {'rgb': rgb, 'depth': depth}) 210 | 211 | # remove batch axis 212 | scores = scores[0] 213 | classes = classes[0] 214 | 215 | # cast classes to uint8 (< 255 classes) 216 | classes = classes.astype('uint8') 217 | 218 | # create predicted segmentation 219 | # note that we store the topk predictions as class_idx + score (to 220 | # save some space), you may further can think about using float16 221 | scores_clamped = np.clip(scores, a_min=0, a_max=0.9999) 222 | classes = classes + 1 # add void class (void + 40 classes) 223 | segmentation = scores_clamped + classes 224 | 225 | # ensure that class is still correct (top0 only) 226 | assert (segmentation[0].astype('uint8') == classes[0]).all() 227 | 228 | # store predicted segmentation 229 | # -> topk prediction (for mapping later) 230 | fp = os.path.join(args.output_path, args.dataset_split, 231 | f'{Hypersim.SEMANTIC_DIR}_topk', 232 | *sample['identifier']) 233 | os.makedirs(os.path.dirname(fp), exist_ok=True) 234 | np.save(f'{fp}.npy', segmentation) 235 | 236 | # -> predicted classes 237 | for i in range(args.topk): 238 | dirname = Hypersim.SEMANTIC_DIR 239 | if i > 0: 240 | dirname += f'_topk_{i}' 241 | fp = os.path.join(args.output_path, args.dataset_split, 242 | dirname, *sample['identifier']) 243 | os.makedirs(os.path.dirname(fp), exist_ok=True) 244 | cv2.imwrite(f'{fp}.png', segmentation[i].astype('uint8')) 245 | 246 | # -> predicted classes as colored images (with color palette, do not 247 | # load these images later on with OpenCV, PIL is fine) 248 | for i in range(args.topk): 249 | dirname = Hypersim.SEMANTIC_COLORED_DIR 250 | if i > 0: 251 | dirname += f'_topk_{i}' 252 | fp = os.path.join(args.output_path, args.dataset_split, 253 | dirname, *sample['identifier']) 254 | os.makedirs(os.path.dirname(fp), exist_ok=True) 255 | save_indexed_png(f'{fp}.png', segmentation[i].astype('uint8'), 256 | colormap=dataset.semantic_class_colors) 257 | 258 | 259 | if __name__ == '__main__': 260 | main() 261 | -------------------------------------------------------------------------------- /semantic_mapping.yaml: -------------------------------------------------------------------------------- 1 | name: semantic_mapping 2 | channels: 3 | - conda-forge 4 | - defaults 5 | dependencies: 6 | - _libgcc_mutex=0.1=main 7 | - _openmp_mutex=4.5=1_gnu 8 | - alabaster=0.7.12=pyhd3eb1b0_0 9 | - anaconda=2021.11=py38_0 10 | - anaconda-client=1.9.0=py38h06a4308_0 11 | - anaconda-project=0.10.1=pyhd3eb1b0_0 12 | - anyio=2.2.0=py38h06a4308_1 13 | - appdirs=1.4.4=pyhd3eb1b0_0 14 | - argh=0.26.2=py38_0 15 | - argon2-cffi=20.1.0=py38h27cfd23_1 16 | - arrow=0.13.1=py38_0 17 | - asn1crypto=1.4.0=py_0 18 | - astroid=2.6.6=py38h06a4308_0 19 | - astropy=4.3.1=py38h09021b7_0 20 | - async_generator=1.10=pyhd3eb1b0_0 21 | - atomicwrites=1.4.0=py_0 22 | - attrs=21.2.0=pyhd3eb1b0_0 23 | - autopep8=1.5.7=pyhd3eb1b0_0 24 | - babel=2.9.1=pyhd3eb1b0_0 25 | - backcall=0.2.0=pyhd3eb1b0_0 26 | - backports=1.0=pyhd3eb1b0_2 27 | - backports.shutil_get_terminal_size=1.0.0=pyhd3eb1b0_3 28 | - beautifulsoup4=4.10.0=pyh06a4308_0 29 | - binaryornot=0.4.4=pyhd3eb1b0_1 30 | - bitarray=2.3.0=py38h7f8727e_1 31 | - bkcharts=0.2=py38_0 32 | - black=19.10b0=py_0 33 | - blas=1.0=mkl 34 | - bleach=4.0.0=pyhd3eb1b0_0 35 | - blosc=1.21.0=h8c45485_0 36 | - bokeh=2.4.1=py38h06a4308_0 37 | - boto=2.49.0=py38_0 38 | - bottleneck=1.3.2=py38heb32a55_1 39 | - brotli=1.0.9=he6710b0_2 40 | - brotlipy=0.7.0=py38h27cfd23_1003 41 | - brunsli=0.1=h2531618_0 42 | - bzip2=1.0.8=h7b6447c_0 43 | - c-ares=1.17.1=h27cfd23_0 44 | - ca-certificates=2021.10.26=h06a4308_2 45 | - cairo=1.16.0=hf32fb01_1 46 | - certifi=2021.10.8=py38h06a4308_0 47 | - cffi=1.14.6=py38h400218f_0 48 | - cfitsio=3.470=hf0d0db6_6 49 | - chardet=4.0.0=py38h06a4308_1003 50 | - charls=2.2.0=h2531618_0 51 | - charset-normalizer=2.0.4=pyhd3eb1b0_0 52 | - click=8.0.3=pyhd3eb1b0_0 53 | - cloudpickle=2.0.0=pyhd3eb1b0_0 54 | - clyent=1.2.2=py38_1 55 | - colorama=0.4.4=pyhd3eb1b0_0 56 | - conda=4.12.0=py38h06a4308_0 57 | - conda-content-trust=0.1.1=pyhd3eb1b0_0 58 | - conda-pack=0.6.0=pyhd3eb1b0_0 59 | - conda-package-handling=1.8.1=py38h7f8727e_0 60 | - conda-token=0.3.0=pyhd3eb1b0_0 61 | - contextlib2=0.6.0.post1=pyhd3eb1b0_0 62 | - cookiecutter=1.7.2=pyhd3eb1b0_0 63 | - cryptography=3.4.8=py38hd23ed53_0 64 | - cudatoolkit=11.2.2=he111cf0_8 65 | - cudnn=8.2.1.32=h86fa8c9_0 66 | - curl=7.78.0=h1ccaba5_0 67 | - cycler=0.10.0=py38_0 68 | - cython=0.29.24=py38hdbfa776_0 69 | - cytoolz=0.11.0=py38h7b6447c_0 70 | - daal4py=2021.3.0=py38hae6d005_0 71 | - dal=2021.3.0=h06a4308_557 72 | - dask=2021.10.0=pyhd3eb1b0_0 73 | - dask-core=2021.10.0=pyhd3eb1b0_0 74 | - dataclasses=0.8=pyh6d0b6a4_7 75 | - dbus=1.13.18=hb2f20db_0 76 | - debugpy=1.4.1=py38h295c915_0 77 | - decorator=5.1.0=pyhd3eb1b0_0 78 | - defusedxml=0.7.1=pyhd3eb1b0_0 79 | - diff-match-patch=20200713=pyhd3eb1b0_0 80 | - distributed=2021.10.0=py38h06a4308_0 81 | - docutils=0.17.1=py38h06a4308_1 82 | - entrypoints=0.3=py38_0 83 | - et_xmlfile=1.1.0=py38h06a4308_0 84 | - expat=2.4.1=h2531618_2 85 | - fastcache=1.1.0=py38h7b6447c_0 86 | - filelock=3.3.1=pyhd3eb1b0_1 87 | - flake8=3.9.2=pyhd3eb1b0_0 88 | - flask=1.1.2=pyhd3eb1b0_0 89 | - fontconfig=2.13.1=h6c09931_0 90 | - fonttools=4.25.0=pyhd3eb1b0_0 91 | - freetype=2.10.4=h5ab3b9f_0 92 | - fribidi=1.0.10=h7b6447c_0 93 | - fsspec=2021.8.1=pyhd3eb1b0_0 94 | - get_terminal_size=1.0.0=haa9412d_0 95 | - gevent=21.8.0=py38h7f8727e_1 96 | - giflib=5.2.1=h7b6447c_0 97 | - glib=2.69.1=h5202010_0 98 | - glob2=0.7=pyhd3eb1b0_0 99 | - gmp=6.2.1=h2531618_2 100 | - gmpy2=2.0.8=py38hd5f6e3b_3 101 | - graphite2=1.3.14=h23475e2_0 102 | - greenlet=1.1.1=py38h295c915_0 103 | - gst-plugins-base=1.14.0=h8213a91_2 104 | - gstreamer=1.14.0=h28cd5cc_2 105 | - h5py=2.10.0=py38h7918eee_0 106 | - harfbuzz=2.8.1=h6f93f22_0 107 | - hdf5=1.10.4=hb1b8bf9_0 108 | - heapdict=1.0.1=pyhd3eb1b0_0 109 | - html5lib=1.1=pyhd3eb1b0_0 110 | - icu=58.2=he6710b0_3 111 | - idna=3.2=pyhd3eb1b0_0 112 | - imagecodecs=2021.8.26=py38h4cda21f_0 113 | - imageio=2.9.0=pyhd3eb1b0_0 114 | - imagesize=1.2.0=pyhd3eb1b0_0 115 | - importlib-metadata=4.8.1=py38h06a4308_0 116 | - importlib_metadata=4.8.1=hd3eb1b0_0 117 | - inflection=0.5.1=py38h06a4308_0 118 | - iniconfig=1.1.1=pyhd3eb1b0_0 119 | - intel-openmp=2021.4.0=h06a4308_3561 120 | - intervaltree=3.1.0=pyhd3eb1b0_0 121 | - ipykernel=6.4.1=py38h06a4308_1 122 | - ipython=7.29.0=py38hb070fc8_0 123 | - ipython_genutils=0.2.0=pyhd3eb1b0_1 124 | - ipywidgets=7.6.5=pyhd3eb1b0_1 125 | - isort=5.9.3=pyhd3eb1b0_0 126 | - itsdangerous=2.0.1=pyhd3eb1b0_0 127 | - jbig=2.1=hdba287a_0 128 | - jdcal=1.4.1=pyhd3eb1b0_0 129 | - jedi=0.18.0=py38h06a4308_1 130 | - jeepney=0.7.1=pyhd3eb1b0_0 131 | - jinja2=2.11.3=pyhd3eb1b0_0 132 | - jinja2-time=0.2.0=pyhd3eb1b0_2 133 | - joblib=1.1.0=pyhd3eb1b0_0 134 | - jpeg=9d=h7f8727e_0 135 | - json5=0.9.6=pyhd3eb1b0_0 136 | - jsonschema=3.2.0=pyhd3eb1b0_2 137 | - jupyter=1.0.0=py38_7 138 | - jupyter_client=6.1.12=pyhd3eb1b0_0 139 | - jupyter_console=6.4.0=pyhd3eb1b0_0 140 | - jupyter_core=4.8.1=py38h06a4308_0 141 | - jupyter_server=1.4.1=py38h06a4308_0 142 | - jupyterlab=3.2.1=pyhd3eb1b0_1 143 | - jupyterlab_pygments=0.1.2=py_0 144 | - jupyterlab_server=2.8.2=pyhd3eb1b0_0 145 | - jupyterlab_widgets=1.0.0=pyhd3eb1b0_1 146 | - jxrlib=1.1=h7b6447c_2 147 | - keyring=23.1.0=py38h06a4308_0 148 | - kiwisolver=1.3.1=py38h2531618_0 149 | - krb5=1.19.2=hac12032_0 150 | - lazy-object-proxy=1.6.0=py38h27cfd23_0 151 | - lcms2=2.12=h3be6417_0 152 | - ld_impl_linux-64=2.35.1=h7274673_9 153 | - lerc=3.0=h295c915_0 154 | - libaec=1.0.4=he6710b0_1 155 | - libarchive=3.4.2=h62408e4_0 156 | - libcurl=7.78.0=h0b77cf5_0 157 | - libdeflate=1.8=h7f8727e_5 158 | - libedit=3.1.20210910=h7f8727e_0 159 | - libev=4.33=h7f8727e_1 160 | - libffi=3.3=he6710b0_2 161 | - libgcc-ng=9.3.0=h5101ec6_17 162 | - libgfortran-ng=7.5.0=ha8ba4b0_17 163 | - libgfortran4=7.5.0=ha8ba4b0_17 164 | - libgomp=9.3.0=h5101ec6_17 165 | - liblief=0.10.1=he6710b0_0 166 | - libllvm11=11.1.0=h3826bc1_0 167 | - libnghttp2=1.41.0=hf8bcb03_2 168 | - libpng=1.6.37=hbc83047_0 169 | - libsodium=1.0.18=h7b6447c_0 170 | - libspatialindex=1.9.3=h2531618_0 171 | - libssh2=1.9.0=h1ba5d50_1 172 | - libstdcxx-ng=9.3.0=hd4cf53a_17 173 | - libtiff=4.2.0=h85742a9_0 174 | - libtool=2.4.6=h7b6447c_1005 175 | - libuuid=1.0.3=h7f8727e_2 176 | - libuv=1.40.0=h7b6447c_0 177 | - libwebp=1.2.0=h89dd481_0 178 | - libwebp-base=1.2.0=h27cfd23_0 179 | - libxcb=1.14=h7b6447c_0 180 | - libxml2=2.9.12=h03d6c58_0 181 | - libxslt=1.1.34=hc22bd24_0 182 | - libzopfli=1.0.3=he6710b0_0 183 | - llvmlite=0.37.0=py38h295c915_1 184 | - locket=0.2.1=py38h06a4308_1 185 | - lxml=4.6.3=py38h9120a33_0 186 | - lz4-c=1.9.3=h295c915_1 187 | - lzo=2.10=h7b6447c_2 188 | - markupsafe=1.1.1=py38h7b6447c_0 189 | - matplotlib=3.4.3=py38h06a4308_0 190 | - matplotlib-base=3.4.3=py38hbbc1b5f_0 191 | - matplotlib-inline=0.1.2=pyhd3eb1b0_2 192 | - mccabe=0.6.1=py38_1 193 | - mistune=0.8.4=py38h7b6447c_1000 194 | - mkl=2021.4.0=h06a4308_640 195 | - mkl-service=2.4.0=py38h7f8727e_0 196 | - mkl_fft=1.3.1=py38hd3c417c_0 197 | - mkl_random=1.2.2=py38h51133e4_0 198 | - mock=4.0.3=pyhd3eb1b0_0 199 | - more-itertools=8.10.0=pyhd3eb1b0_0 200 | - mpc=1.1.0=h10f8cd9_1 201 | - mpfr=4.0.2=hb69a4c5_1 202 | - mpi=1.0=mpich 203 | - mpich=3.3.2=hc856adb_0 204 | - mpmath=1.2.1=py38h06a4308_0 205 | - msgpack-python=1.0.2=py38hff7bd54_1 206 | - multipledispatch=0.6.0=py38_0 207 | - munkres=1.1.4=py_0 208 | - mypy_extensions=0.4.3=py38_0 209 | - nbclassic=0.2.6=pyhd3eb1b0_0 210 | - nbclient=0.5.3=pyhd3eb1b0_0 211 | - nbconvert=6.1.0=py38h06a4308_0 212 | - nbformat=5.1.3=pyhd3eb1b0_0 213 | - ncurses=6.3=heee7806_1 214 | - nest-asyncio=1.5.1=pyhd3eb1b0_0 215 | - networkx=2.6.3=pyhd3eb1b0_0 216 | - nltk=3.6.5=pyhd3eb1b0_0 217 | - nose=1.3.7=pyhd3eb1b0_1006 218 | - notebook=6.4.5=py38h06a4308_0 219 | - numba=0.54.1=py38h51133e4_0 220 | - numexpr=2.7.3=py38h22e1b3c_1 221 | - numpydoc=1.1.0=pyhd3eb1b0_1 222 | - olefile=0.46=pyhd3eb1b0_0 223 | - openjpeg=2.4.0=h3ad879b_0 224 | - openpyxl=3.0.9=pyhd3eb1b0_0 225 | - openssl=1.1.1l=h7f8727e_0 226 | - packaging=21.0=pyhd3eb1b0_0 227 | - pandas=1.3.4=py38h8c16a72_0 228 | - pandocfilters=1.4.3=py38h06a4308_1 229 | - pango=1.45.3=hd140c19_0 230 | - parso=0.8.2=pyhd3eb1b0_0 231 | - partd=1.2.0=pyhd3eb1b0_0 232 | - patchelf=0.13=h295c915_0 233 | - path=16.0.0=py38h06a4308_0 234 | - path.py=12.5.0=hd3eb1b0_0 235 | - pathlib2=2.3.6=py38h06a4308_2 236 | - pathspec=0.7.0=py_0 237 | - patsy=0.5.2=py38h06a4308_0 238 | - pcre=8.45=h295c915_0 239 | - pep8=1.7.1=py38_0 240 | - pexpect=4.8.0=pyhd3eb1b0_3 241 | - pickleshare=0.7.5=pyhd3eb1b0_1003 242 | - pillow=8.4.0=py38h5aabda8_0 243 | - pip=21.2.4=py38h06a4308_0 244 | - pixman=0.40.0=h7f8727e_1 245 | - pkginfo=1.7.1=py38h06a4308_0 246 | - pluggy=0.13.1=py38h06a4308_0 247 | - ply=3.11=py38_0 248 | - poyo=0.5.0=pyhd3eb1b0_0 249 | - prometheus_client=0.11.0=pyhd3eb1b0_0 250 | - prompt-toolkit=3.0.20=pyhd3eb1b0_0 251 | - prompt_toolkit=3.0.20=hd3eb1b0_0 252 | - psutil=5.8.0=py38h27cfd23_1 253 | - ptyprocess=0.7.0=pyhd3eb1b0_2 254 | - py=1.10.0=pyhd3eb1b0_0 255 | - py-lief=0.10.1=py38h403a769_0 256 | - pycodestyle=2.7.0=pyhd3eb1b0_0 257 | - pycosat=0.6.3=py38h7b6447c_1 258 | - pycparser=2.20=py_2 259 | - pycurl=7.44.1=py38h8f2d780_1 260 | - pydocstyle=6.1.1=pyhd3eb1b0_0 261 | - pyerfa=2.0.0=py38h27cfd23_0 262 | - pyflakes=2.3.1=pyhd3eb1b0_0 263 | - pygments=2.10.0=pyhd3eb1b0_0 264 | - pylint=2.9.6=py38h06a4308_1 265 | - pyls-spyder=0.4.0=pyhd3eb1b0_0 266 | - pyodbc=4.0.31=py38h295c915_0 267 | - pyopenssl=21.0.0=pyhd3eb1b0_1 268 | - pyparsing=3.0.4=pyhd3eb1b0_0 269 | - pyqt=5.9.2=py38h05f1152_4 270 | - pyrsistent=0.18.0=py38heee7806_0 271 | - pysocks=1.7.1=py38h06a4308_0 272 | - pytables=3.6.1=py38h9fd0a39_0 273 | - pytest=6.2.4=py38h06a4308_2 274 | - python=3.8.12=h12debd9_0 275 | - python-dateutil=2.8.2=pyhd3eb1b0_0 276 | - python-libarchive-c=2.9=pyhd3eb1b0_1 277 | - python-lsp-black=1.0.0=pyhd3eb1b0_0 278 | - python-lsp-jsonrpc=1.0.0=pyhd3eb1b0_0 279 | - python-lsp-server=1.2.4=pyhd3eb1b0_0 280 | - python-slugify=5.0.2=pyhd3eb1b0_0 281 | - pytz=2021.3=pyhd3eb1b0_0 282 | - pywavelets=1.1.1=py38h7b6447c_2 283 | - pyxdg=0.27=pyhd3eb1b0_0 284 | - pyyaml=6.0=py38h7f8727e_1 285 | - pyzmq=22.2.1=py38h295c915_1 286 | - qdarkstyle=3.0.2=pyhd3eb1b0_0 287 | - qstylizer=0.1.10=pyhd3eb1b0_0 288 | - qt=5.9.7=h5867ecd_1 289 | - qtawesome=1.0.2=pyhd3eb1b0_0 290 | - qtconsole=5.1.1=pyhd3eb1b0_0 291 | - qtpy=1.10.0=pyhd3eb1b0_0 292 | - readline=8.1=h27cfd23_0 293 | - regex=2021.8.3=py38h7f8727e_0 294 | - requests=2.26.0=pyhd3eb1b0_0 295 | - ripgrep=12.1.1=0 296 | - rope=0.19.0=pyhd3eb1b0_0 297 | - rtree=0.9.7=py38h06a4308_1 298 | - ruamel_yaml=0.15.100=py38h27cfd23_0 299 | - scikit-image=0.18.3=py38h51133e4_0 300 | - scikit-learn=0.24.2=py38ha9443f7_0 301 | - scikit-learn-intelex=2021.3.0=py38h06a4308_0 302 | - scipy=1.7.1=py38h292c36d_2 303 | - seaborn=0.11.2=pyhd3eb1b0_0 304 | - secretstorage=3.3.1=py38h06a4308_0 305 | - send2trash=1.8.0=pyhd3eb1b0_1 306 | - setuptools=58.0.4=py38h06a4308_0 307 | - simplegeneric=0.8.1=py38_2 308 | - singledispatch=3.7.0=pyhd3eb1b0_1001 309 | - sip=4.19.13=py38he6710b0_0 310 | - six=1.16.0=pyhd3eb1b0_0 311 | - snappy=1.1.8=he6710b0_0 312 | - sniffio=1.2.0=py38h06a4308_1 313 | - snowballstemmer=2.1.0=pyhd3eb1b0_0 314 | - sortedcollections=2.1.0=pyhd3eb1b0_0 315 | - sortedcontainers=2.4.0=pyhd3eb1b0_0 316 | - soupsieve=2.2.1=pyhd3eb1b0_0 317 | - sphinx=4.2.0=pyhd3eb1b0_1 318 | - sphinxcontrib=1.0=py38_1 319 | - sphinxcontrib-applehelp=1.0.2=pyhd3eb1b0_0 320 | - sphinxcontrib-devhelp=1.0.2=pyhd3eb1b0_0 321 | - sphinxcontrib-htmlhelp=2.0.0=pyhd3eb1b0_0 322 | - sphinxcontrib-jsmath=1.0.1=pyhd3eb1b0_0 323 | - sphinxcontrib-qthelp=1.0.3=pyhd3eb1b0_0 324 | - sphinxcontrib-serializinghtml=1.1.5=pyhd3eb1b0_0 325 | - sphinxcontrib-websupport=1.2.4=py_0 326 | - spyder=5.1.5=py38h06a4308_1 327 | - spyder-kernels=2.1.3=py38h06a4308_0 328 | - sqlalchemy=1.4.22=py38h7f8727e_0 329 | - sqlite=3.36.0=hc218d9a_0 330 | - statsmodels=0.12.2=py38h27cfd23_0 331 | - sympy=1.9=py38h06a4308_0 332 | - tbb=2021.4.0=hd09550d_0 333 | - tbb4py=2021.4.0=py38hd09550d_0 334 | - tblib=1.7.0=pyhd3eb1b0_0 335 | - terminado=0.9.4=py38h06a4308_0 336 | - testpath=0.5.0=pyhd3eb1b0_0 337 | - text-unidecode=1.3=pyhd3eb1b0_0 338 | - textdistance=4.2.1=pyhd3eb1b0_0 339 | - threadpoolctl=2.2.0=pyh0d69192_0 340 | - three-merge=0.1.1=pyhd3eb1b0_0 341 | - tifffile=2021.7.2=pyhd3eb1b0_2 342 | - tinycss=0.4=pyhd3eb1b0_1002 343 | - tk=8.6.11=h1ccaba5_0 344 | - toml=0.10.2=pyhd3eb1b0_0 345 | - toolz=0.11.1=pyhd3eb1b0_0 346 | - tornado=6.1=py38h27cfd23_0 347 | - tqdm=4.62.3=pyhd3eb1b0_1 348 | - traitlets=5.1.0=pyhd3eb1b0_0 349 | - typed-ast=1.4.3=py38h7f8727e_1 350 | - typing_extensions=3.10.0.2=pyh06a4308_0 351 | - ujson=4.0.2=py38h2531618_0 352 | - unicodecsv=0.14.1=py38_0 353 | - unidecode=1.2.0=pyhd3eb1b0_0 354 | - unixodbc=2.3.9=h7b6447c_0 355 | - urllib3=1.26.7=pyhd3eb1b0_0 356 | - watchdog=2.1.3=py38h06a4308_0 357 | - wcwidth=0.2.5=pyhd3eb1b0_0 358 | - webencodings=0.5.1=py38_1 359 | - werkzeug=2.0.2=pyhd3eb1b0_0 360 | - wheel=0.37.0=pyhd3eb1b0_1 361 | - whichcraft=0.6.1=pyhd3eb1b0_0 362 | - widgetsnbextension=3.5.1=py38_0 363 | - wrapt=1.12.1=py38h7b6447c_1 364 | - wurlitzer=2.1.1=py38h06a4308_0 365 | - xlrd=2.0.1=pyhd3eb1b0_0 366 | - xlsxwriter=3.0.1=pyhd3eb1b0_0 367 | - xlwt=1.3.0=py38_0 368 | - xz=5.2.5=h7b6447c_0 369 | - yaml=0.2.5=h7b6447c_0 370 | - yapf=0.31.0=pyhd3eb1b0_0 371 | - zeromq=4.3.4=h2531618_0 372 | - zfp=0.5.5=h2531618_6 373 | - zict=2.0.0=pyhd3eb1b0_0 374 | - zipp=3.6.0=pyhd3eb1b0_0 375 | - zlib=1.2.11=h7b6447c_3 376 | - zope=1.0=py38_1 377 | - zope.event=4.5.0=py38_0 378 | - zope.interface=5.4.0=py38h7f8727e_0 379 | - zstd=1.4.9=haebb681_0 380 | - pip: 381 | - cityscapesscripts==1.5.0 382 | - flatbuffers==2.0 383 | # - nicr-scene-analysis-datasets==0.3.1 384 | - numpy==1.22.3 385 | - onnx==1.11.0 386 | - onnxruntime-gpu==1.11.0 387 | - opencv-python==4.2.0.34 388 | - protobuf==3.20.1 389 | prefix: /home/user/anaconda3/envs/semantic_mapping 390 | -------------------------------------------------------------------------------- /tox.ini: -------------------------------------------------------------------------------- 1 | [pycodestyle] 2 | # E501 line too long (82 > 79 characters) 3 | # E402 module level import not at top of file 4 | # E731 do not assign a lambda expression, use a def 5 | # +pep8 default ignore: E121, E123, E126, E226, E24, E704 6 | ignore = E226, E501, E402, E731, E121, E123, E126, E226, E24, E704, E265 -------------------------------------------------------------------------------- /trained_models/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TUI-NICR/semantic-mapping/c592804e27e83dd96d476593e2528ae29c084eea/trained_models/.gitkeep -------------------------------------------------------------------------------- /utils.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | .. codeauthor:: Daniel Seichter 4 | """ 5 | import os 6 | 7 | 8 | def _get_default_path(*path_components): 9 | base_path = os.path.dirname(os.path.abspath(__file__)) 10 | return os.path.join(base_path, *path_components) 11 | 12 | 13 | DEFAULT_ONNX_FILEPATH = _get_default_path('trained_models', 14 | 'model_hypersim.onnx') 15 | 16 | DEFAULT_DATASET_PATH = _get_default_path('datasets', 'hypersim') 17 | 18 | DEFAULT_PREDICTIONS_PATH = _get_default_path('datasets', 'hypersim_predictions') 19 | --------------------------------------------------------------------------------