├── .gitignore
├── .gitmodules
├── .vscode
└── settings.json
├── LICENSE
├── README.md
├── datasets
└── .gitkeep
├── evaluate.py
├── predict.py
├── semantic_mapping.yaml
├── tox.ini
├── trained_models
└── .gitkeep
└── utils.py
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 |
6 | # C extensions
7 | *.so
8 |
9 | # Distribution / packaging
10 | .Python
11 | build/
12 | develop-eggs/
13 | dist/
14 | downloads/
15 | eggs/
16 | .eggs/
17 | parts/
18 | sdist/
19 | var/
20 | wheels/
21 | *.egg-info/
22 | .installed.cfg
23 | *.egg
24 |
25 | # PyInstaller
26 | # Usually these files are written by a python script from a template
27 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
28 | *.manifest
29 | *.spec
30 |
31 | # Installer logs
32 | pip-log.txt
33 | pip-delete-this-directory.txt
34 |
35 | # Unit test / coverage reports
36 | htmlcov/
37 | .tox/
38 | .coverage
39 | .coverage.*
40 | .cache
41 | nosetests.xml
42 | coverage.xml
43 | *.cover
44 | .hypothesis/
45 |
46 | # Translations
47 | *.mo
48 | *.pot
49 |
50 | # Django stuff:
51 | *.log
52 | local_settings.py
53 |
54 | # Flask stuff:
55 | instance/
56 | .webassets-cache
57 |
58 | # Scrapy stuff:
59 | .scrapy
60 |
61 | # Sphinx documentation
62 | docs/_build/
63 |
64 | # PyBuilder
65 | target/
66 |
67 | # Jupyter Notebook
68 | .ipynb_checkpoints
69 |
70 | # pyenv
71 | .python-version
72 |
73 | # celery beat schedule file
74 | celerybeat-schedule
75 |
76 | # SageMath parsed files
77 | *.sage.py
78 |
79 | # Environments
80 | .env
81 | .venv
82 | env/
83 | venv/
84 | ENV/
85 |
86 | # Spyder project settings
87 | .spyderproject
88 | .spyproject
89 |
90 | # Rope project settings
91 | .ropeproject
92 |
93 | # mkdocs documentation
94 | /site
95 |
96 | # mypy
97 | .mypy_cache/
98 |
99 | # PyCharm
100 | .idea
101 |
102 | # MacOS
103 | .DS_Store
104 |
105 | # Binaries
106 | .npz
107 | .npy
108 | .h5
109 | .hdf5
110 | core
111 |
112 |
113 | # VSCode
114 | # settings.json
115 |
116 | # onnx models
117 | *.onnx
118 |
119 | # data
120 | datasets/hypersim
121 | datasets/hypersim_predictions
122 | trained_models/*.tar.gz
123 |
--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
1 | [submodule "lib/nicr-scene-analysis-datasets"]
2 | path = lib/nicr-scene-analysis-datasets
3 | url = https://github.com/TUI-NICR/nicr-scene-analysis-datasets
4 |
--------------------------------------------------------------------------------
/.vscode/settings.json:
--------------------------------------------------------------------------------
1 | {
2 | "python.linting.pycodestyleEnabled": true,
3 | "python.linting.enabled": true,
4 | // disable annoying top-level source code modification indication
5 | "gitlens.codeLens.authors.enabled": false,
6 | "gitlens.codeLens.recentChange.enabled": false,
7 | "files.trimTrailingWhitespace": true,
8 | "[markdown]": {
9 | "files.trimTrailingWhitespace": false
10 | },
11 | }
12 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | BSD 3-Clause License
2 |
3 | Copyright (c) 2021-2022, Neuroinformatics and Cognitive Robotics Lab (Technische
4 | Universität Ilmenau)
5 | All rights reserved.
6 |
7 | Redistribution and use in source and binary forms, with or without
8 | modification, are permitted provided that the following conditions are met:
9 |
10 | * Redistributions of source code must retain the above copyright notice, this
11 | list of conditions and the following disclaimer.
12 |
13 | * Redistributions in binary form must reproduce the above copyright notice,
14 | this list of conditions and the following disclaimer in the documentation
15 | and/or other materials provided with the distribution.
16 |
17 | * Neither the name of the copyright holder nor the names of its
18 | contributors may be used to endorse or promote products derived from
19 | this software without specific prior written permission.
20 |
21 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
24 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
25 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
27 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
29 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Efficient and Robust Semantic Mapping for Indoor Environments
2 |
3 | This repository contains the code to our paper "Efficient and Robust Semantic Mapping for Indoor Environments" ([IEEE Xplore](https://ieeexplore.ieee.org/document/9812205), [arXiv](https://arxiv.org/pdf/2203.05836.pdf)).
4 |
5 |
6 |

7 |
(Click on the image to open YouTube video)
8 |
9 |
10 |
11 | > You may also want to have a look at our follow-up work: [**PanopticNDT**](https://github.com/TUI-NICR/panoptic-mapping)
12 |
13 | ## License and Citations
14 | The source code and the network weights are published under BSD 3-Clause license, see [license file](LICENSE) for details.
15 |
16 | If you use the source code or the network weights, please cite the following paper:
17 | >Seichter, D., Langer, P., Wengefeld, T., Lewandowski, B., Höchemer, D., Gross, H.-M.
18 | *Efficient and Robust Semantic Mapping for Indoor Environments*
19 | in IEEE International Conference on Robotics and Automation (ICRA), pp. 9221-9227, 2022.
20 |
21 |
22 | BibTeX
23 |
24 | ```bibtex
25 | @inproceedings{semanticndtmapping2022icra,
26 | title = {{Efficient and Robust Semantic Mapping for Indoor Environments}},
27 | author = {Seichter, Daniel and Langer, Patrick and Wengefeld, Tim and Lewandowski, Benjamin and H{\"o}chemer, Dominik and Gross, Horst-Michael},
28 | booktitle = {IEEE International Conference on Robotics and Automation (ICRA)},
29 | year = {2022},
30 | volume = {},
31 | number = {},
32 | pages = {9221-9227}
33 | }
34 |
35 | @article{semanticndtmapping2022arXiv,
36 | title = {{Efficient and Robust Semantic Mapping for Indoor Environments}},
37 | author = {Seichter, Daniel and Langer, Patrick and Wengefeld, Tim and Lewandowski, Benjamin and H{\"o}chemer, Dominik and Gross, Horst-Michael},
38 | journal = {arXiv preprint arXiv:2203.05836},
39 | year = {2022}
40 | }
41 | ```
42 | Note that the preprint was accepted to be published in IEEE International Conference on Robotics and Automation (ICRA).
43 |
44 |
45 |
46 | ## Setup
47 |
48 | 1. Clone repository:
49 | ```bash
50 | # do not forget the '--recursive' ;)
51 | git clone --recursive https://github.com/TUI-NICR/semantic-mapping.git
52 |
53 | cd /path/to/this/repository
54 | ```
55 |
56 | 2. Set up anaconda environment including all dependencies:
57 | ```bash
58 | # option 1: create conda environment from YAML file
59 | conda env create -f semantic_mapping.yaml
60 | conda activate semantic_mapping
61 |
62 | # option 2: create new environment (see last tested versions)
63 | conda create -n semantic_mapping python==3.8.12 anaconda==2021.11
64 | conda activate semantic_mapping
65 | pip install onnx==1.11.0
66 | pip install opencv-python==4.2.0.34
67 | pip install tqdm==4.62.3
68 | # ONNXRuntime with CUDA support
69 | conda install -c conda-forge cudnn==8.2.1.32
70 | pip install onnxruntime-gpu==1.11.0
71 |
72 |
73 | # finally, install our package for preparing and using the Hypersim dataset
74 | pip install ./lib/nicr-scene-analysis-datasets[with_preparation]
75 | ```
76 |
77 | ## Usage
78 |
79 | 1. Prepare the [Hypersim](https://machinelearning.apple.com/research/hypersim) dataset:
80 | ```bash
81 | # download and extract raw dataset (2x ~1.8TB)
82 | HYPERSIM_DOWNLOAD_PATH='./datasets/hypersim_preparation'
83 | wget https://raw.githubusercontent.com/apple/ml-hypersim/6cbaa80207f44a312654e288cf445016c84658a1/code/python/tools/dataset_download_images.py
84 | python dataset_download_images.py --downloads_dir $HYPERSIM_DOWNLOAD_PATH
85 |
86 | # prepare dataset (~157.5 GB, extract required data, convert to our format, blacklist some scenes/trajectories)
87 | python -m nicr_scene_analysis_datasets.datasets.hypersim.prepare_dataset \
88 | ./datasets/hypersim \
89 | $HYPERSIM_DOWNLOAD_PATH \
90 | --additional-subsamples 2 5 10 20 \
91 | --multiprocessing
92 |
93 | # just in case you want to delete the downloaded raw data (2x ~1.8TB)
94 | rm -rf $HYPERSIM_DOWNLOAD_PATH
95 |
96 | ```
97 | For further details, we refer to the documentation of our
98 | [nicr-scene-analysis-datasets python package](https://github.com/TUI-NICR/nicr-scene-analysis-datasets/tree/882276c46ca5864ebb6146afe6bae56d0b1abc11).
99 |
100 | 2. Download pretrained model:
101 | We provide the weights of our selected ESANet-R34-NBt1D (enhanced ResNet34-based encoder utilizing the Non-Bottleneck-1D block) trained on the Hypersim dataset.
102 | To ease both application and deployment, we removed all dependencies (PyTorch, ...) and provide the weights in [ONNX format](https://onnx.ai/).
103 |
104 | Click [here](https://drive.google.com/uc?id=1zUxSqq4zdC3yQ4RxiHvTh8CX7-115KUg) to download the model and extract it to `./trained_models` or use:
105 | ```bash
106 | pip install gdown # last tested: 4.4.0
107 | gdown 1zUxSqq4zdC3yQ4RxiHvTh8CX7-115KUg --output ./trained_models/
108 | tar -xvzf ./trained_models/model_hypersim.tar.gz -C ./trained_models/
109 |
110 | ```
111 |
112 | The model was selected based on the mean intersection over union (mIoU) on the validation split: 0.4591184410660463 at epoch 498.
113 | On the test split, the model achieves a mIoU of 0.41168890871760977.
114 | Note, similar to other approaches, we only evaluate up to a reasonable maximum distance of 20m from the camera. For more detail, see `evaluate.py`.
115 |
116 |
117 | 3. Extract predicted semantic segmentation:
118 | ```bash
119 | # use default paths (~74.3GB for topk with k=3)
120 | python predict.py \
121 | --onnx-filepath ./trained_models/model_hypersim.onnx \
122 | --dataset-path ./datasets/hypersim \
123 | --dataset-split test \
124 | --topk 3 \
125 | --output-path ./datasets/hypersim_predictions
126 |
127 | # for more details, see:
128 | python predict.py --help
129 | ```
130 | For the example above, the predicted segmentations are stored at `./datasets/hypersim_predictions/test/`.
131 | See the `semantic_40_topk` subfolder for the predicted topK segmentation outputs and `semantic_40/` or `semantic_40_colored/` for the predicted (colored) top1 labels.
132 |
133 | 4. Run your semantic mapping experiments and store the results with the following folder structure:
134 | ```text
135 | path/to/results/
136 | └── test
137 | ├── results1
138 | │ ├── ai_001_010
139 | │ │ ├── cam_00
140 | │ │ │ ├── 0000.png
141 | │ │ │ ├── ...
142 | ├── results2
143 | │ ├── ai_001_010
144 | │ │ ├── cam_00
145 | │ │ │ ├── 0000.png
146 | │ │ │ ├── ...
147 | ```
148 | You may have a look at `./lib/nicr-scene-analysis-datasets/nicr_scene_analysis_datasets/mira/_hypersim_reader.py` for a starting point.
149 | This class shows, how the Hypersim dataset is processed in our pipelines.
150 |
151 | 5. Run evaluation:
152 | ```bash
153 | # use default paths
154 | python evaluate.py \
155 | --dataset-path ./datasets/hypersim \
156 | --dataset-split test \
157 | --predictions-path ./datasets/hypersim_predictions
158 | [--result-paths path/to/results/test/results1 path/to/results/test/results2]
159 |
160 | # for more details, see:
161 | python evaluate.py --help
162 | ```
163 |
164 | For the predicted segmentation of our ONNX model, you should obtain measures similar to:
165 | ```text
166 | miou_gt_masked: 0.41168890871760977
167 | mean_pacc_gt_masked: 0.5683601556433829
168 | invalid_ratio: 0.0
169 | invalid_mean_ratio_gt_masked: 0.0
170 | vwmiou_gt_masked: 0.41168890871760977
171 | vwmean_pacc_gt_masked: 0.5683601556433829
172 | ```
173 | Check the created `results.json` at the predictions folder for more measures (e.g. `./datasets/hypersim_predictions/test/semantic_40/results.json`)
174 |
--------------------------------------------------------------------------------
/datasets/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TUI-NICR/semantic-mapping/c592804e27e83dd96d476593e2528ae29c084eea/datasets/.gitkeep
--------------------------------------------------------------------------------
/evaluate.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | .. codeauthor:: Daniel Seichter
4 | """
5 | import argparse as ap
6 | from functools import partial
7 | import json
8 | import multiprocessing
9 | import os
10 | import warnings
11 |
12 | import cv2
13 | import numpy as np
14 |
15 | from tqdm import tqdm
16 | from tqdm.contrib.concurrent import thread_map
17 |
18 | from nicr_scene_analysis_datasets import Hypersim
19 |
20 | from utils import DEFAULT_DATASET_PATH
21 | from utils import DEFAULT_PREDICTIONS_PATH
22 |
23 |
24 | def confusion_matrix_fast(pred, gt, n_classes):
25 | # note: this function is 15x faster than sklearn.metrics.confusion_matrix
26 |
27 | # determine dtype for unique mapping
28 | n_classes_squared = n_classes**2
29 | if n_classes_squared < 2**(8-1)-1:
30 | dtype = np.int8
31 | elif n_classes_squared < 2**(16-1)-1:
32 | dtype = np.int16
33 | else:
34 | dtype = np.int64 # equal to long
35 |
36 | # convert to dtype
37 | pred_ = pred.astype(dtype)
38 | gt_ = gt.astype(dtype)
39 |
40 | # compute confusion matrix
41 | unique_mapping = (gt_.reshape(-1)*n_classes + pred_.reshape(-1))
42 | cnts = np.bincount(unique_mapping,
43 | minlength=n_classes_squared)
44 |
45 | return cnts.reshape(n_classes, n_classes)
46 |
47 |
48 | def get_confusion_matrix_for_sample(
49 | sample_idx,
50 | dataset,
51 | prediction_basepath,
52 | prediction_extension='.png',
53 | prediction_contains_void=True,
54 | max_depth_in_m=20 # max 20m
55 | ):
56 | n_classes = dataset.semantic_n_classes # with void
57 |
58 | # get sample
59 | sample = dataset[sample_idx]
60 |
61 | # load prediction
62 | fp = os.path.join(prediction_basepath, *sample['identifier'])
63 | fp += prediction_extension
64 | if '.png' == prediction_extension:
65 | # prediction is given as image
66 | pred = cv2.imread(fp, cv2.IMREAD_UNCHANGED)
67 | if pred is None:
68 | raise IOError(f"Cannot load '{fp}'")
69 | if pred.ndim > 2:
70 | warnings.warn(f"Prediction ('{fp}') has more than one channel. "
71 | "Using first channel.")
72 | pred = pred[..., 0]
73 | elif '.npy' == prediction_extension:
74 | # prediction is given as numpy array with shape (h, w, topk)
75 | pred = np.load(fp)
76 | pred = pred[0, ...].astype('uint8') # use top1 only
77 |
78 | if not prediction_contains_void:
79 | pred += 1
80 |
81 | # create flat views
82 | gt = sample['semantic'].reshape(-1)
83 | pred = pred.reshape(-1)
84 |
85 | # mask using max depth
86 | if max_depth_in_m is not None:
87 | depth = sample['depth'].reshape(-1)
88 | mask = depth < (max_depth_in_m*1000)
89 | gt = gt[mask]
90 | pred = pred[mask]
91 |
92 | # move invalid pixels in prediction, i.e., pixels that may indicate free
93 | # space, to class with index i=n_classes
94 | pred[pred > (n_classes-1)] = n_classes
95 | n_classes = n_classes + 1 # +1 = invalid pixels
96 |
97 | return confusion_matrix_fast(pred, gt, n_classes=n_classes)
98 |
99 |
100 | def get_measures(cm, ignore_void=True):
101 | # cm is gt x pred with void + n_classes + invalid (free space)
102 |
103 | tp = np.diag(cm)
104 | sum_gt = cm.sum(axis=1)
105 | sum_pred = cm.sum(axis=0)
106 | invalid_pixels = cm[:, -1]
107 |
108 | if ignore_void:
109 | # void is first class (idx=0)
110 | tp = tp[1:]
111 | sum_pred = sum_pred[1:]
112 | sum_gt = sum_gt[1:]
113 | sum_pred -= cm[0, 1:] # do not count fp for void
114 | invalid_pixels = invalid_pixels[1:]
115 |
116 | n_total_pixels = sum_gt.sum()
117 |
118 | # we do want ignore classes without gt pixels
119 | gt_mask = sum_gt != 0
120 |
121 | # invalid pixels
122 | invalid_ratio = invalid_pixels.sum() / n_total_pixels
123 | with np.errstate(divide='ignore', invalid='ignore'):
124 | invalid_ratios = invalid_pixels / sum_gt
125 | invalid_mean_ratio_gt_masked = np.mean(invalid_ratios[gt_mask])
126 | valid_weights = 1 - invalid_ratios
127 |
128 | # intersection over union
129 | intersections = tp
130 | unions = sum_pred + sum_gt - tp
131 |
132 | with np.errstate(divide='ignore', invalid='ignore'):
133 | ious = intersections / unions.astype(np.float32)
134 |
135 | # mean intersection over union and gt masked version
136 | miou = np.mean(np.nan_to_num(ious, nan=0.0))
137 | miou_gt_masked = np.mean(ious[gt_mask])
138 |
139 | # frequency weighted intersection over union
140 | # normal fwiou and gt masked version are equal
141 | fwiou_gt_masked = np.sum(ious[gt_mask] * tp[gt_mask]/n_total_pixels)
142 |
143 | # pixel accuracy and mean pixel accuracy
144 | pacc = tp.sum() / sum_gt.sum()
145 |
146 | with np.errstate(divide='ignore', invalid='ignore'):
147 | paccs = tp / sum_gt
148 |
149 | mean_pacc_gt_masked = np.mean(tp[gt_mask] / sum_gt[gt_mask])
150 |
151 | # valid weighted mean intersection over union
152 | vwmiou_gt_masked = np.mean(ious[gt_mask]*valid_weights[gt_mask])
153 |
154 | # valid weighted mean pixel accuracy
155 | vwmean_pacc_gt_masked = np.mean(tp[gt_mask] / sum_gt[gt_mask] * valid_weights[gt_mask])
156 |
157 | # build dict of measures
158 | measures = {
159 | 'cm': cm.tolist(),
160 | 'invalid_ratio': invalid_ratio,
161 | 'invalid_ratios': invalid_ratios.tolist(),
162 | 'invalid_mean_ratio_gt_masked': invalid_mean_ratio_gt_masked,
163 | 'ious': ious.tolist(),
164 | 'miou': miou,
165 | 'miou_gt_masked': miou_gt_masked,
166 | 'fwiou_gt_masked': fwiou_gt_masked,
167 | 'pacc': pacc,
168 | 'paccs': paccs.tolist(),
169 | 'mean_pacc_gt_masked': mean_pacc_gt_masked,
170 | 'vwmiou_gt_masked': vwmiou_gt_masked,
171 | 'vwmean_pacc_gt_masked': vwmean_pacc_gt_masked,
172 | }
173 |
174 | return measures
175 |
176 |
177 | def _parse_args():
178 | parser = ap.ArgumentParser(formatter_class=ap.ArgumentDefaultsHelpFormatter)
179 | parser.add_argument(
180 | '--dataset-path',
181 | type=str,
182 | default=DEFAULT_DATASET_PATH,
183 | help="Path to the dataset."
184 | )
185 | parser.add_argument(
186 | '--dataset-split',
187 | type=str,
188 | default='test',
189 | help="Dataset split to use."
190 | )
191 | parser.add_argument(
192 | '--predictions-path',
193 | type=str,
194 | default=DEFAULT_PREDICTIONS_PATH,
195 | help="Path to stored predicted semantic segmentation. Use an empty "
196 | "string to skip the evaluating the predicted semantic "
197 | "segmentation."
198 | )
199 | parser.add_argument(
200 | '--result-paths',
201 | nargs='+',
202 | type=str,
203 | help="Paths to further results.",
204 | default=[]
205 | )
206 | parser.add_argument(
207 | '--force-recomputing',
208 | action='store_true',
209 | default=False,
210 | help="Force recomputing."
211 | )
212 | parser.add_argument(
213 | '--n-worker',
214 | type=int,
215 | default=min(multiprocessing.cpu_count(), 48),
216 | help="Number of workers to use."
217 | )
218 |
219 | return parser.parse_args()
220 |
221 |
222 | def main():
223 | # args
224 | args = _parse_args()
225 |
226 | # just obtain all sample names
227 | dataset = Hypersim(dataset_path=args.dataset_path,
228 | split=args.dataset_split,
229 | subsample=None,
230 | sample_keys=('identifier',))
231 | samples = [s['identifier'] for s in dataset] # tuple (scene, cam, id)
232 | scenes = sorted(list(set(s[0] for s in samples)))
233 |
234 | # load dataset
235 | dataset = Hypersim(dataset_path=args.dataset_path,
236 | split=args.dataset_split,
237 | subsample=None,
238 | sample_keys=('identifier', 'depth', 'semantic'),
239 | use_cache=False,
240 | cache_disable_deepcopy=False)
241 |
242 | # get paths to evaluate
243 | paths = []
244 | if args.predictions_path:
245 | # evaluate the network prediction
246 | paths += [
247 | os.path.join(args.predictions_path, args.dataset_split,
248 | Hypersim.SEMANTIC_DIR),
249 | ]
250 | paths += [
251 | os.path.join(path) for path in args.result_paths
252 | ]
253 |
254 | # run evaluation
255 | for path in tqdm(paths):
256 | print(f"Evaluating: '{path}'")
257 | results_fp = os.path.join(path, 'results.json')
258 |
259 | if os.path.exists(results_fp) and not args.force_recomputing:
260 | continue
261 |
262 | # get confusion matrices
263 | if 1 == args.n_worker:
264 | cms = []
265 | for i in tqdm(range(len(dataset))):
266 | cm = get_confusion_matrix_for_sample(
267 | i,
268 | dataset=dataset,
269 | prediction_basepath=path,
270 | prediction_extension='.png',
271 | prediction_contains_void=True,
272 | max_depth_in_m=20
273 | )
274 | cms.append(cm)
275 | else:
276 | f = partial(get_confusion_matrix_for_sample,
277 | dataset=dataset,
278 | prediction_basepath=path,
279 | prediction_extension='.png',
280 | prediction_contains_void=True,
281 | max_depth_in_m=20)
282 | cms = thread_map(f, list(range(len(dataset))),
283 | max_workers=args.n_worker,
284 | chunksize=10,
285 | leave=False)
286 |
287 | # get overall measures
288 | assert len(cms) == len(samples)
289 | cm = np.array(cms).sum(axis=0)
290 |
291 | measures = get_measures(cm, ignore_void=True)
292 | for k in ('miou_gt_masked', 'mean_pacc_gt_masked',
293 | 'invalid_ratio', 'invalid_mean_ratio_gt_masked',
294 | 'vwmiou_gt_masked', 'vwmean_pacc_gt_masked'):
295 | print(f"{k}: {measures[k]}")
296 |
297 | # get results for each scene
298 | cms_per_scene = {s: [] for s in scenes}
299 | for cm, sample in zip(cms, samples):
300 | scene = sample[0]
301 | cms_per_scene[scene].append(cm)
302 |
303 | measures['per_scene'] = {}
304 | for scene, cms_scene in cms_per_scene.items():
305 | cm = np.array(cms_scene).sum(axis=0)
306 | measures['per_scene'][scene] = get_measures(cm, ignore_void=True)
307 |
308 | # write results to file
309 | with open(results_fp, 'w') as f:
310 | json.dump(measures, f, indent=4)
311 |
312 |
313 | if __name__ == '__main__':
314 | main()
315 |
--------------------------------------------------------------------------------
/predict.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | .. codeauthor:: Daniel Seichter
4 | """
5 | import argparse as ap
6 | import os
7 |
8 | import cv2
9 | from nicr_scene_analysis_datasets import Hypersim
10 | from nicr_scene_analysis_datasets.utils.img import save_indexed_png
11 | import numpy as np
12 | import onnx
13 | import onnxruntime as ort
14 | from tqdm import tqdm
15 |
16 | from utils import DEFAULT_DATASET_PATH
17 | from utils import DEFAULT_ONNX_FILEPATH
18 | from utils import DEFAULT_PREDICTIONS_PATH
19 |
20 |
21 | def _get_ort_session(onnx_filepath, img_hw, topk=3):
22 | model = onnx.load(onnx_filepath)
23 |
24 | # get network output shape (same as input shape)
25 | # note: our optimizations to the resize operations seem to break onnx's
26 | # shape inference with OpSet >= 13
27 | model_output_img_shape = (
28 | model.graph.input[0].type.tensor_type.shape.dim[2].dim_value,
29 | model.graph.input[0].type.tensor_type.shape.dim[3].dim_value
30 | )
31 |
32 | # add missing nodes: final upsampling, softmax, and topk
33 | # see: https://github.com/onnx/onnx/blob/main/docs/Operators.md
34 | # -> final upsampling
35 | final_upsampling_node = onnx.helper.make_node(
36 | 'Resize',
37 | # inputs=['output', 'roi', 'scales'],
38 | inputs=['output', '', 'scales'], # '' for 'roi' requires OpSet >= 13
39 | outputs=['final_upsampling_output'],
40 | coordinate_transformation_mode='pytorch_half_pixel',
41 | cubic_coeff_a=-0.75,
42 | mode='linear',
43 | nearest_mode='floor',
44 | )
45 | # roi = onnx.helper.make_tensor('roi', onnx.TensorProto.FLOAT, [0], [])
46 | scale_h = img_hw[0] / model_output_img_shape[0]
47 | scale_w = img_hw[1] / model_output_img_shape[1]
48 | scales = onnx.helper.make_tensor('scales',
49 | onnx.TensorProto.FLOAT, [4],
50 | [1, 1, scale_h, scale_w])
51 | # -> softmax (note that softmax op with 4D inputs requires OpSet >= 13)
52 | softmax_node = onnx.helper.make_node(
53 | 'Softmax',
54 | inputs=['final_upsampling_output'],
55 | outputs=['prediction'],
56 | axis=1
57 | )
58 | # topk
59 | topk_node = onnx.helper.make_node(
60 | 'TopK',
61 | inputs=['prediction', 'k'],
62 | outputs=['scores', 'classes'],
63 | axis=1,
64 | largest=1,
65 | sorted=1
66 | )
67 | k = onnx.helper.make_tensor('k', onnx.TensorProto.INT64, [1], [int(topk)])
68 |
69 | # add new nodes and initializers to graph
70 | # model.graph.initializer.append(roi)
71 | model.graph.initializer.append(scales)
72 | model.graph.node.append(final_upsampling_node)
73 | model.graph.node.append(softmax_node)
74 | model.graph.initializer.append(k)
75 | model.graph.node.append(topk_node)
76 |
77 | # replace output information
78 | if model.graph.input[0].type.tensor_type.shape.dim[0].dim_param:
79 | # dynamic batch axis
80 | b = model.graph.input[0].type.tensor_type.shape.dim[0].dim_param
81 | else:
82 | # fixed batch axis
83 | b = model.graph.input[0].type.tensor_type.shape.dim[0].dim_value
84 |
85 | scores_info = onnx.helper.make_tensor_value_info('scores',
86 | onnx.TensorProto.FLOAT,
87 | shape=[b, topk, *img_hw])
88 | classes_info = onnx.helper.make_tensor_value_info('classes',
89 | onnx.TensorProto.INT64,
90 | shape=[b, topk, *img_hw])
91 | model.graph.output.pop(0)
92 | model.graph.output.append(scores_info)
93 | model.graph.output.append(classes_info)
94 |
95 | # perform final check
96 | onnx.checker.check_model(model)
97 | # onnx.save(model, './model.onnx')
98 |
99 | # create onnxruntime seesion
100 | ort_session = ort.InferenceSession(
101 | model.SerializeToString(),
102 | providers=[
103 | # 'TensorrtExecutionProvider',
104 | 'CUDAExecutionProvider',
105 | 'CPUExecutionProvider'
106 | ]
107 | )
108 | return ort_session
109 |
110 |
111 | def _parse_args():
112 | parser = ap.ArgumentParser(formatter_class=ap.ArgumentDefaultsHelpFormatter)
113 | parser.add_argument(
114 | '--onnx-filepath',
115 | type=str,
116 | default=DEFAULT_ONNX_FILEPATH,
117 | help="Path to ONNX model to use."
118 | )
119 | parser.add_argument(
120 | '--dataset-path',
121 | type=str,
122 | default=DEFAULT_DATASET_PATH,
123 | help="Path to the dataset."
124 | )
125 | parser.add_argument(
126 | '--dataset-split',
127 | type=str,
128 | default='test',
129 | help="Dataset split to use."
130 | )
131 | parser.add_argument(
132 | '--output-path',
133 | type=str,
134 | default=DEFAULT_PREDICTIONS_PATH,
135 | help="Path where to store predicted semantic segmentation."
136 | )
137 | parser.add_argument(
138 | '--topk',
139 | type=int,
140 | default=3,
141 | help="TopK classes to consider."
142 | )
143 | return parser.parse_args()
144 |
145 |
146 | def main():
147 | # args
148 | args = _parse_args()
149 |
150 | # load data
151 | dataset = Hypersim(
152 | dataset_path=args.dataset_path,
153 | split=args.dataset_split,
154 | subsample=None,
155 | sample_keys=('identifier', 'rgb', 'depth'),
156 | depth_mode='raw'
157 | )
158 |
159 | RGB_MEAN = np.array((0.485, 0.456, 0.406), dtype='float32') * 255
160 | RGB_STD = np.array((0.229, 0.224, 0.225), dtype='float32') * 255
161 |
162 | # ensure that the used depth stats are valid for this model (there was a
163 | # copy and paste issue that we fixed in future versions of
164 | # nicr_scene_analysis_datasets)
165 | assert dataset.depth_mean == 6249.621001070915
166 | assert dataset.depth_std == 6249.621001070915 # <- c&p ^^
167 |
168 | # process files (for simplification with batch size 1)
169 | ort_session = None
170 | for sample in tqdm(dataset, desc='Processing files'):
171 | # load model lazily (we need a sample to get the spatial dimensions)
172 | if ort_session is None:
173 | ort_session = _get_ort_session(
174 | onnx_filepath=args.onnx_filepath,
175 | img_hw=sample['rgb'].shape[:2],
176 | topk=args.topk
177 | )
178 |
179 | # get network input shape (from rgb input)
180 | h, w = ort_session.get_inputs()[0].shape[-2:]
181 |
182 | # rgb preprocessing
183 | # -> resize
184 | rgb = cv2.resize(sample['rgb'], (w, h),
185 | interpolation=cv2.INTER_LINEAR)
186 | # -> normalize
187 | rgb = rgb.astype('float32')
188 | rgb -= RGB_MEAN[None, None, ...]
189 | rgb /= RGB_STD[None, None, ...]
190 | # -> create tensor (add batch axis, channels first)
191 | rgb = rgb.transpose(2, 0, 1)[None, ...]
192 |
193 | # depth preprocessing
194 | # -> resize
195 | depth = cv2.resize(sample['depth'], (w, h),
196 | interpolation=cv2.INTER_NEAREST)
197 | # -> normalize
198 | mask_invalid = depth == 0 # mask for invalid depth values
199 | depth = depth.astype('float32')
200 | depth -= dataset.depth_mean
201 | depth /= dataset.depth_std
202 | # reset invalid values (the network should not be able to learn from
203 | # these pixels)
204 | depth[mask_invalid] = 0
205 | # -> create tensor (add batch and channel axes)
206 | depth = depth[None, None, ...]
207 |
208 | # apply model
209 | scores, classes = ort_session.run(None, {'rgb': rgb, 'depth': depth})
210 |
211 | # remove batch axis
212 | scores = scores[0]
213 | classes = classes[0]
214 |
215 | # cast classes to uint8 (< 255 classes)
216 | classes = classes.astype('uint8')
217 |
218 | # create predicted segmentation
219 | # note that we store the topk predictions as class_idx + score (to
220 | # save some space), you may further can think about using float16
221 | scores_clamped = np.clip(scores, a_min=0, a_max=0.9999)
222 | classes = classes + 1 # add void class (void + 40 classes)
223 | segmentation = scores_clamped + classes
224 |
225 | # ensure that class is still correct (top0 only)
226 | assert (segmentation[0].astype('uint8') == classes[0]).all()
227 |
228 | # store predicted segmentation
229 | # -> topk prediction (for mapping later)
230 | fp = os.path.join(args.output_path, args.dataset_split,
231 | f'{Hypersim.SEMANTIC_DIR}_topk',
232 | *sample['identifier'])
233 | os.makedirs(os.path.dirname(fp), exist_ok=True)
234 | np.save(f'{fp}.npy', segmentation)
235 |
236 | # -> predicted classes
237 | for i in range(args.topk):
238 | dirname = Hypersim.SEMANTIC_DIR
239 | if i > 0:
240 | dirname += f'_topk_{i}'
241 | fp = os.path.join(args.output_path, args.dataset_split,
242 | dirname, *sample['identifier'])
243 | os.makedirs(os.path.dirname(fp), exist_ok=True)
244 | cv2.imwrite(f'{fp}.png', segmentation[i].astype('uint8'))
245 |
246 | # -> predicted classes as colored images (with color palette, do not
247 | # load these images later on with OpenCV, PIL is fine)
248 | for i in range(args.topk):
249 | dirname = Hypersim.SEMANTIC_COLORED_DIR
250 | if i > 0:
251 | dirname += f'_topk_{i}'
252 | fp = os.path.join(args.output_path, args.dataset_split,
253 | dirname, *sample['identifier'])
254 | os.makedirs(os.path.dirname(fp), exist_ok=True)
255 | save_indexed_png(f'{fp}.png', segmentation[i].astype('uint8'),
256 | colormap=dataset.semantic_class_colors)
257 |
258 |
259 | if __name__ == '__main__':
260 | main()
261 |
--------------------------------------------------------------------------------
/semantic_mapping.yaml:
--------------------------------------------------------------------------------
1 | name: semantic_mapping
2 | channels:
3 | - conda-forge
4 | - defaults
5 | dependencies:
6 | - _libgcc_mutex=0.1=main
7 | - _openmp_mutex=4.5=1_gnu
8 | - alabaster=0.7.12=pyhd3eb1b0_0
9 | - anaconda=2021.11=py38_0
10 | - anaconda-client=1.9.0=py38h06a4308_0
11 | - anaconda-project=0.10.1=pyhd3eb1b0_0
12 | - anyio=2.2.0=py38h06a4308_1
13 | - appdirs=1.4.4=pyhd3eb1b0_0
14 | - argh=0.26.2=py38_0
15 | - argon2-cffi=20.1.0=py38h27cfd23_1
16 | - arrow=0.13.1=py38_0
17 | - asn1crypto=1.4.0=py_0
18 | - astroid=2.6.6=py38h06a4308_0
19 | - astropy=4.3.1=py38h09021b7_0
20 | - async_generator=1.10=pyhd3eb1b0_0
21 | - atomicwrites=1.4.0=py_0
22 | - attrs=21.2.0=pyhd3eb1b0_0
23 | - autopep8=1.5.7=pyhd3eb1b0_0
24 | - babel=2.9.1=pyhd3eb1b0_0
25 | - backcall=0.2.0=pyhd3eb1b0_0
26 | - backports=1.0=pyhd3eb1b0_2
27 | - backports.shutil_get_terminal_size=1.0.0=pyhd3eb1b0_3
28 | - beautifulsoup4=4.10.0=pyh06a4308_0
29 | - binaryornot=0.4.4=pyhd3eb1b0_1
30 | - bitarray=2.3.0=py38h7f8727e_1
31 | - bkcharts=0.2=py38_0
32 | - black=19.10b0=py_0
33 | - blas=1.0=mkl
34 | - bleach=4.0.0=pyhd3eb1b0_0
35 | - blosc=1.21.0=h8c45485_0
36 | - bokeh=2.4.1=py38h06a4308_0
37 | - boto=2.49.0=py38_0
38 | - bottleneck=1.3.2=py38heb32a55_1
39 | - brotli=1.0.9=he6710b0_2
40 | - brotlipy=0.7.0=py38h27cfd23_1003
41 | - brunsli=0.1=h2531618_0
42 | - bzip2=1.0.8=h7b6447c_0
43 | - c-ares=1.17.1=h27cfd23_0
44 | - ca-certificates=2021.10.26=h06a4308_2
45 | - cairo=1.16.0=hf32fb01_1
46 | - certifi=2021.10.8=py38h06a4308_0
47 | - cffi=1.14.6=py38h400218f_0
48 | - cfitsio=3.470=hf0d0db6_6
49 | - chardet=4.0.0=py38h06a4308_1003
50 | - charls=2.2.0=h2531618_0
51 | - charset-normalizer=2.0.4=pyhd3eb1b0_0
52 | - click=8.0.3=pyhd3eb1b0_0
53 | - cloudpickle=2.0.0=pyhd3eb1b0_0
54 | - clyent=1.2.2=py38_1
55 | - colorama=0.4.4=pyhd3eb1b0_0
56 | - conda=4.12.0=py38h06a4308_0
57 | - conda-content-trust=0.1.1=pyhd3eb1b0_0
58 | - conda-pack=0.6.0=pyhd3eb1b0_0
59 | - conda-package-handling=1.8.1=py38h7f8727e_0
60 | - conda-token=0.3.0=pyhd3eb1b0_0
61 | - contextlib2=0.6.0.post1=pyhd3eb1b0_0
62 | - cookiecutter=1.7.2=pyhd3eb1b0_0
63 | - cryptography=3.4.8=py38hd23ed53_0
64 | - cudatoolkit=11.2.2=he111cf0_8
65 | - cudnn=8.2.1.32=h86fa8c9_0
66 | - curl=7.78.0=h1ccaba5_0
67 | - cycler=0.10.0=py38_0
68 | - cython=0.29.24=py38hdbfa776_0
69 | - cytoolz=0.11.0=py38h7b6447c_0
70 | - daal4py=2021.3.0=py38hae6d005_0
71 | - dal=2021.3.0=h06a4308_557
72 | - dask=2021.10.0=pyhd3eb1b0_0
73 | - dask-core=2021.10.0=pyhd3eb1b0_0
74 | - dataclasses=0.8=pyh6d0b6a4_7
75 | - dbus=1.13.18=hb2f20db_0
76 | - debugpy=1.4.1=py38h295c915_0
77 | - decorator=5.1.0=pyhd3eb1b0_0
78 | - defusedxml=0.7.1=pyhd3eb1b0_0
79 | - diff-match-patch=20200713=pyhd3eb1b0_0
80 | - distributed=2021.10.0=py38h06a4308_0
81 | - docutils=0.17.1=py38h06a4308_1
82 | - entrypoints=0.3=py38_0
83 | - et_xmlfile=1.1.0=py38h06a4308_0
84 | - expat=2.4.1=h2531618_2
85 | - fastcache=1.1.0=py38h7b6447c_0
86 | - filelock=3.3.1=pyhd3eb1b0_1
87 | - flake8=3.9.2=pyhd3eb1b0_0
88 | - flask=1.1.2=pyhd3eb1b0_0
89 | - fontconfig=2.13.1=h6c09931_0
90 | - fonttools=4.25.0=pyhd3eb1b0_0
91 | - freetype=2.10.4=h5ab3b9f_0
92 | - fribidi=1.0.10=h7b6447c_0
93 | - fsspec=2021.8.1=pyhd3eb1b0_0
94 | - get_terminal_size=1.0.0=haa9412d_0
95 | - gevent=21.8.0=py38h7f8727e_1
96 | - giflib=5.2.1=h7b6447c_0
97 | - glib=2.69.1=h5202010_0
98 | - glob2=0.7=pyhd3eb1b0_0
99 | - gmp=6.2.1=h2531618_2
100 | - gmpy2=2.0.8=py38hd5f6e3b_3
101 | - graphite2=1.3.14=h23475e2_0
102 | - greenlet=1.1.1=py38h295c915_0
103 | - gst-plugins-base=1.14.0=h8213a91_2
104 | - gstreamer=1.14.0=h28cd5cc_2
105 | - h5py=2.10.0=py38h7918eee_0
106 | - harfbuzz=2.8.1=h6f93f22_0
107 | - hdf5=1.10.4=hb1b8bf9_0
108 | - heapdict=1.0.1=pyhd3eb1b0_0
109 | - html5lib=1.1=pyhd3eb1b0_0
110 | - icu=58.2=he6710b0_3
111 | - idna=3.2=pyhd3eb1b0_0
112 | - imagecodecs=2021.8.26=py38h4cda21f_0
113 | - imageio=2.9.0=pyhd3eb1b0_0
114 | - imagesize=1.2.0=pyhd3eb1b0_0
115 | - importlib-metadata=4.8.1=py38h06a4308_0
116 | - importlib_metadata=4.8.1=hd3eb1b0_0
117 | - inflection=0.5.1=py38h06a4308_0
118 | - iniconfig=1.1.1=pyhd3eb1b0_0
119 | - intel-openmp=2021.4.0=h06a4308_3561
120 | - intervaltree=3.1.0=pyhd3eb1b0_0
121 | - ipykernel=6.4.1=py38h06a4308_1
122 | - ipython=7.29.0=py38hb070fc8_0
123 | - ipython_genutils=0.2.0=pyhd3eb1b0_1
124 | - ipywidgets=7.6.5=pyhd3eb1b0_1
125 | - isort=5.9.3=pyhd3eb1b0_0
126 | - itsdangerous=2.0.1=pyhd3eb1b0_0
127 | - jbig=2.1=hdba287a_0
128 | - jdcal=1.4.1=pyhd3eb1b0_0
129 | - jedi=0.18.0=py38h06a4308_1
130 | - jeepney=0.7.1=pyhd3eb1b0_0
131 | - jinja2=2.11.3=pyhd3eb1b0_0
132 | - jinja2-time=0.2.0=pyhd3eb1b0_2
133 | - joblib=1.1.0=pyhd3eb1b0_0
134 | - jpeg=9d=h7f8727e_0
135 | - json5=0.9.6=pyhd3eb1b0_0
136 | - jsonschema=3.2.0=pyhd3eb1b0_2
137 | - jupyter=1.0.0=py38_7
138 | - jupyter_client=6.1.12=pyhd3eb1b0_0
139 | - jupyter_console=6.4.0=pyhd3eb1b0_0
140 | - jupyter_core=4.8.1=py38h06a4308_0
141 | - jupyter_server=1.4.1=py38h06a4308_0
142 | - jupyterlab=3.2.1=pyhd3eb1b0_1
143 | - jupyterlab_pygments=0.1.2=py_0
144 | - jupyterlab_server=2.8.2=pyhd3eb1b0_0
145 | - jupyterlab_widgets=1.0.0=pyhd3eb1b0_1
146 | - jxrlib=1.1=h7b6447c_2
147 | - keyring=23.1.0=py38h06a4308_0
148 | - kiwisolver=1.3.1=py38h2531618_0
149 | - krb5=1.19.2=hac12032_0
150 | - lazy-object-proxy=1.6.0=py38h27cfd23_0
151 | - lcms2=2.12=h3be6417_0
152 | - ld_impl_linux-64=2.35.1=h7274673_9
153 | - lerc=3.0=h295c915_0
154 | - libaec=1.0.4=he6710b0_1
155 | - libarchive=3.4.2=h62408e4_0
156 | - libcurl=7.78.0=h0b77cf5_0
157 | - libdeflate=1.8=h7f8727e_5
158 | - libedit=3.1.20210910=h7f8727e_0
159 | - libev=4.33=h7f8727e_1
160 | - libffi=3.3=he6710b0_2
161 | - libgcc-ng=9.3.0=h5101ec6_17
162 | - libgfortran-ng=7.5.0=ha8ba4b0_17
163 | - libgfortran4=7.5.0=ha8ba4b0_17
164 | - libgomp=9.3.0=h5101ec6_17
165 | - liblief=0.10.1=he6710b0_0
166 | - libllvm11=11.1.0=h3826bc1_0
167 | - libnghttp2=1.41.0=hf8bcb03_2
168 | - libpng=1.6.37=hbc83047_0
169 | - libsodium=1.0.18=h7b6447c_0
170 | - libspatialindex=1.9.3=h2531618_0
171 | - libssh2=1.9.0=h1ba5d50_1
172 | - libstdcxx-ng=9.3.0=hd4cf53a_17
173 | - libtiff=4.2.0=h85742a9_0
174 | - libtool=2.4.6=h7b6447c_1005
175 | - libuuid=1.0.3=h7f8727e_2
176 | - libuv=1.40.0=h7b6447c_0
177 | - libwebp=1.2.0=h89dd481_0
178 | - libwebp-base=1.2.0=h27cfd23_0
179 | - libxcb=1.14=h7b6447c_0
180 | - libxml2=2.9.12=h03d6c58_0
181 | - libxslt=1.1.34=hc22bd24_0
182 | - libzopfli=1.0.3=he6710b0_0
183 | - llvmlite=0.37.0=py38h295c915_1
184 | - locket=0.2.1=py38h06a4308_1
185 | - lxml=4.6.3=py38h9120a33_0
186 | - lz4-c=1.9.3=h295c915_1
187 | - lzo=2.10=h7b6447c_2
188 | - markupsafe=1.1.1=py38h7b6447c_0
189 | - matplotlib=3.4.3=py38h06a4308_0
190 | - matplotlib-base=3.4.3=py38hbbc1b5f_0
191 | - matplotlib-inline=0.1.2=pyhd3eb1b0_2
192 | - mccabe=0.6.1=py38_1
193 | - mistune=0.8.4=py38h7b6447c_1000
194 | - mkl=2021.4.0=h06a4308_640
195 | - mkl-service=2.4.0=py38h7f8727e_0
196 | - mkl_fft=1.3.1=py38hd3c417c_0
197 | - mkl_random=1.2.2=py38h51133e4_0
198 | - mock=4.0.3=pyhd3eb1b0_0
199 | - more-itertools=8.10.0=pyhd3eb1b0_0
200 | - mpc=1.1.0=h10f8cd9_1
201 | - mpfr=4.0.2=hb69a4c5_1
202 | - mpi=1.0=mpich
203 | - mpich=3.3.2=hc856adb_0
204 | - mpmath=1.2.1=py38h06a4308_0
205 | - msgpack-python=1.0.2=py38hff7bd54_1
206 | - multipledispatch=0.6.0=py38_0
207 | - munkres=1.1.4=py_0
208 | - mypy_extensions=0.4.3=py38_0
209 | - nbclassic=0.2.6=pyhd3eb1b0_0
210 | - nbclient=0.5.3=pyhd3eb1b0_0
211 | - nbconvert=6.1.0=py38h06a4308_0
212 | - nbformat=5.1.3=pyhd3eb1b0_0
213 | - ncurses=6.3=heee7806_1
214 | - nest-asyncio=1.5.1=pyhd3eb1b0_0
215 | - networkx=2.6.3=pyhd3eb1b0_0
216 | - nltk=3.6.5=pyhd3eb1b0_0
217 | - nose=1.3.7=pyhd3eb1b0_1006
218 | - notebook=6.4.5=py38h06a4308_0
219 | - numba=0.54.1=py38h51133e4_0
220 | - numexpr=2.7.3=py38h22e1b3c_1
221 | - numpydoc=1.1.0=pyhd3eb1b0_1
222 | - olefile=0.46=pyhd3eb1b0_0
223 | - openjpeg=2.4.0=h3ad879b_0
224 | - openpyxl=3.0.9=pyhd3eb1b0_0
225 | - openssl=1.1.1l=h7f8727e_0
226 | - packaging=21.0=pyhd3eb1b0_0
227 | - pandas=1.3.4=py38h8c16a72_0
228 | - pandocfilters=1.4.3=py38h06a4308_1
229 | - pango=1.45.3=hd140c19_0
230 | - parso=0.8.2=pyhd3eb1b0_0
231 | - partd=1.2.0=pyhd3eb1b0_0
232 | - patchelf=0.13=h295c915_0
233 | - path=16.0.0=py38h06a4308_0
234 | - path.py=12.5.0=hd3eb1b0_0
235 | - pathlib2=2.3.6=py38h06a4308_2
236 | - pathspec=0.7.0=py_0
237 | - patsy=0.5.2=py38h06a4308_0
238 | - pcre=8.45=h295c915_0
239 | - pep8=1.7.1=py38_0
240 | - pexpect=4.8.0=pyhd3eb1b0_3
241 | - pickleshare=0.7.5=pyhd3eb1b0_1003
242 | - pillow=8.4.0=py38h5aabda8_0
243 | - pip=21.2.4=py38h06a4308_0
244 | - pixman=0.40.0=h7f8727e_1
245 | - pkginfo=1.7.1=py38h06a4308_0
246 | - pluggy=0.13.1=py38h06a4308_0
247 | - ply=3.11=py38_0
248 | - poyo=0.5.0=pyhd3eb1b0_0
249 | - prometheus_client=0.11.0=pyhd3eb1b0_0
250 | - prompt-toolkit=3.0.20=pyhd3eb1b0_0
251 | - prompt_toolkit=3.0.20=hd3eb1b0_0
252 | - psutil=5.8.0=py38h27cfd23_1
253 | - ptyprocess=0.7.0=pyhd3eb1b0_2
254 | - py=1.10.0=pyhd3eb1b0_0
255 | - py-lief=0.10.1=py38h403a769_0
256 | - pycodestyle=2.7.0=pyhd3eb1b0_0
257 | - pycosat=0.6.3=py38h7b6447c_1
258 | - pycparser=2.20=py_2
259 | - pycurl=7.44.1=py38h8f2d780_1
260 | - pydocstyle=6.1.1=pyhd3eb1b0_0
261 | - pyerfa=2.0.0=py38h27cfd23_0
262 | - pyflakes=2.3.1=pyhd3eb1b0_0
263 | - pygments=2.10.0=pyhd3eb1b0_0
264 | - pylint=2.9.6=py38h06a4308_1
265 | - pyls-spyder=0.4.0=pyhd3eb1b0_0
266 | - pyodbc=4.0.31=py38h295c915_0
267 | - pyopenssl=21.0.0=pyhd3eb1b0_1
268 | - pyparsing=3.0.4=pyhd3eb1b0_0
269 | - pyqt=5.9.2=py38h05f1152_4
270 | - pyrsistent=0.18.0=py38heee7806_0
271 | - pysocks=1.7.1=py38h06a4308_0
272 | - pytables=3.6.1=py38h9fd0a39_0
273 | - pytest=6.2.4=py38h06a4308_2
274 | - python=3.8.12=h12debd9_0
275 | - python-dateutil=2.8.2=pyhd3eb1b0_0
276 | - python-libarchive-c=2.9=pyhd3eb1b0_1
277 | - python-lsp-black=1.0.0=pyhd3eb1b0_0
278 | - python-lsp-jsonrpc=1.0.0=pyhd3eb1b0_0
279 | - python-lsp-server=1.2.4=pyhd3eb1b0_0
280 | - python-slugify=5.0.2=pyhd3eb1b0_0
281 | - pytz=2021.3=pyhd3eb1b0_0
282 | - pywavelets=1.1.1=py38h7b6447c_2
283 | - pyxdg=0.27=pyhd3eb1b0_0
284 | - pyyaml=6.0=py38h7f8727e_1
285 | - pyzmq=22.2.1=py38h295c915_1
286 | - qdarkstyle=3.0.2=pyhd3eb1b0_0
287 | - qstylizer=0.1.10=pyhd3eb1b0_0
288 | - qt=5.9.7=h5867ecd_1
289 | - qtawesome=1.0.2=pyhd3eb1b0_0
290 | - qtconsole=5.1.1=pyhd3eb1b0_0
291 | - qtpy=1.10.0=pyhd3eb1b0_0
292 | - readline=8.1=h27cfd23_0
293 | - regex=2021.8.3=py38h7f8727e_0
294 | - requests=2.26.0=pyhd3eb1b0_0
295 | - ripgrep=12.1.1=0
296 | - rope=0.19.0=pyhd3eb1b0_0
297 | - rtree=0.9.7=py38h06a4308_1
298 | - ruamel_yaml=0.15.100=py38h27cfd23_0
299 | - scikit-image=0.18.3=py38h51133e4_0
300 | - scikit-learn=0.24.2=py38ha9443f7_0
301 | - scikit-learn-intelex=2021.3.0=py38h06a4308_0
302 | - scipy=1.7.1=py38h292c36d_2
303 | - seaborn=0.11.2=pyhd3eb1b0_0
304 | - secretstorage=3.3.1=py38h06a4308_0
305 | - send2trash=1.8.0=pyhd3eb1b0_1
306 | - setuptools=58.0.4=py38h06a4308_0
307 | - simplegeneric=0.8.1=py38_2
308 | - singledispatch=3.7.0=pyhd3eb1b0_1001
309 | - sip=4.19.13=py38he6710b0_0
310 | - six=1.16.0=pyhd3eb1b0_0
311 | - snappy=1.1.8=he6710b0_0
312 | - sniffio=1.2.0=py38h06a4308_1
313 | - snowballstemmer=2.1.0=pyhd3eb1b0_0
314 | - sortedcollections=2.1.0=pyhd3eb1b0_0
315 | - sortedcontainers=2.4.0=pyhd3eb1b0_0
316 | - soupsieve=2.2.1=pyhd3eb1b0_0
317 | - sphinx=4.2.0=pyhd3eb1b0_1
318 | - sphinxcontrib=1.0=py38_1
319 | - sphinxcontrib-applehelp=1.0.2=pyhd3eb1b0_0
320 | - sphinxcontrib-devhelp=1.0.2=pyhd3eb1b0_0
321 | - sphinxcontrib-htmlhelp=2.0.0=pyhd3eb1b0_0
322 | - sphinxcontrib-jsmath=1.0.1=pyhd3eb1b0_0
323 | - sphinxcontrib-qthelp=1.0.3=pyhd3eb1b0_0
324 | - sphinxcontrib-serializinghtml=1.1.5=pyhd3eb1b0_0
325 | - sphinxcontrib-websupport=1.2.4=py_0
326 | - spyder=5.1.5=py38h06a4308_1
327 | - spyder-kernels=2.1.3=py38h06a4308_0
328 | - sqlalchemy=1.4.22=py38h7f8727e_0
329 | - sqlite=3.36.0=hc218d9a_0
330 | - statsmodels=0.12.2=py38h27cfd23_0
331 | - sympy=1.9=py38h06a4308_0
332 | - tbb=2021.4.0=hd09550d_0
333 | - tbb4py=2021.4.0=py38hd09550d_0
334 | - tblib=1.7.0=pyhd3eb1b0_0
335 | - terminado=0.9.4=py38h06a4308_0
336 | - testpath=0.5.0=pyhd3eb1b0_0
337 | - text-unidecode=1.3=pyhd3eb1b0_0
338 | - textdistance=4.2.1=pyhd3eb1b0_0
339 | - threadpoolctl=2.2.0=pyh0d69192_0
340 | - three-merge=0.1.1=pyhd3eb1b0_0
341 | - tifffile=2021.7.2=pyhd3eb1b0_2
342 | - tinycss=0.4=pyhd3eb1b0_1002
343 | - tk=8.6.11=h1ccaba5_0
344 | - toml=0.10.2=pyhd3eb1b0_0
345 | - toolz=0.11.1=pyhd3eb1b0_0
346 | - tornado=6.1=py38h27cfd23_0
347 | - tqdm=4.62.3=pyhd3eb1b0_1
348 | - traitlets=5.1.0=pyhd3eb1b0_0
349 | - typed-ast=1.4.3=py38h7f8727e_1
350 | - typing_extensions=3.10.0.2=pyh06a4308_0
351 | - ujson=4.0.2=py38h2531618_0
352 | - unicodecsv=0.14.1=py38_0
353 | - unidecode=1.2.0=pyhd3eb1b0_0
354 | - unixodbc=2.3.9=h7b6447c_0
355 | - urllib3=1.26.7=pyhd3eb1b0_0
356 | - watchdog=2.1.3=py38h06a4308_0
357 | - wcwidth=0.2.5=pyhd3eb1b0_0
358 | - webencodings=0.5.1=py38_1
359 | - werkzeug=2.0.2=pyhd3eb1b0_0
360 | - wheel=0.37.0=pyhd3eb1b0_1
361 | - whichcraft=0.6.1=pyhd3eb1b0_0
362 | - widgetsnbextension=3.5.1=py38_0
363 | - wrapt=1.12.1=py38h7b6447c_1
364 | - wurlitzer=2.1.1=py38h06a4308_0
365 | - xlrd=2.0.1=pyhd3eb1b0_0
366 | - xlsxwriter=3.0.1=pyhd3eb1b0_0
367 | - xlwt=1.3.0=py38_0
368 | - xz=5.2.5=h7b6447c_0
369 | - yaml=0.2.5=h7b6447c_0
370 | - yapf=0.31.0=pyhd3eb1b0_0
371 | - zeromq=4.3.4=h2531618_0
372 | - zfp=0.5.5=h2531618_6
373 | - zict=2.0.0=pyhd3eb1b0_0
374 | - zipp=3.6.0=pyhd3eb1b0_0
375 | - zlib=1.2.11=h7b6447c_3
376 | - zope=1.0=py38_1
377 | - zope.event=4.5.0=py38_0
378 | - zope.interface=5.4.0=py38h7f8727e_0
379 | - zstd=1.4.9=haebb681_0
380 | - pip:
381 | - cityscapesscripts==1.5.0
382 | - flatbuffers==2.0
383 | # - nicr-scene-analysis-datasets==0.3.1
384 | - numpy==1.22.3
385 | - onnx==1.11.0
386 | - onnxruntime-gpu==1.11.0
387 | - opencv-python==4.2.0.34
388 | - protobuf==3.20.1
389 | prefix: /home/user/anaconda3/envs/semantic_mapping
390 |
--------------------------------------------------------------------------------
/tox.ini:
--------------------------------------------------------------------------------
1 | [pycodestyle]
2 | # E501 line too long (82 > 79 characters)
3 | # E402 module level import not at top of file
4 | # E731 do not assign a lambda expression, use a def
5 | # +pep8 default ignore: E121, E123, E126, E226, E24, E704
6 | ignore = E226, E501, E402, E731, E121, E123, E126, E226, E24, E704, E265
--------------------------------------------------------------------------------
/trained_models/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TUI-NICR/semantic-mapping/c592804e27e83dd96d476593e2528ae29c084eea/trained_models/.gitkeep
--------------------------------------------------------------------------------
/utils.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | .. codeauthor:: Daniel Seichter
4 | """
5 | import os
6 |
7 |
8 | def _get_default_path(*path_components):
9 | base_path = os.path.dirname(os.path.abspath(__file__))
10 | return os.path.join(base_path, *path_components)
11 |
12 |
13 | DEFAULT_ONNX_FILEPATH = _get_default_path('trained_models',
14 | 'model_hypersim.onnx')
15 |
16 | DEFAULT_DATASET_PATH = _get_default_path('datasets', 'hypersim')
17 |
18 | DEFAULT_PREDICTIONS_PATH = _get_default_path('datasets', 'hypersim_predictions')
19 |
--------------------------------------------------------------------------------