├── .gitignore
├── LICENCE
├── README.md
├── README_prepare_data_mp3d_layout.md
├── README_prepare_data_s2d3d.md
├── README_reproduction.md
├── assets
    ├── label13_weight.pth
    ├── pano_asmasuxybohhcj.depth.png
    ├── pano_asmasuxybohhcj.layout.txt
    ├── pano_asmasuxybohhcj.png
    ├── repo_teaser.jpg
    ├── snapshot_depth.jpg
    └── snapshot_layout.jpg
├── config
    ├── mp3d_depth
    │   ├── HOHO_depth_dct_efficienthc_TransEn1.yaml
    │   ├── HOHO_depth_dct_efficienthc_TransEn1_hardnet.yaml
    │   └── ablation
    │   │   ├── tuning___HOHO_depth_dct128_efficienthc_TransEn1.yaml
    │   │   ├── tuning___HOHO_depth_dct256_efficienthc_TransEn1.yaml
    │   │   ├── tuning___HOHO_depth_dct32_efficienthc_TransEn1.yaml
    │   │   ├── tuning___HOHO_depth_dct512_efficienthc_TransEn1.yaml
    │   │   ├── tuning___HOHO_depth_dct_LSTM.yaml
    │   │   ├── tuning___HOHO_depth_dct_Linear.yaml
    │   │   ├── tuning___HOHO_depth_dct_TransEn1.yaml
    │   │   ├── tuning___HOHO_depth_dct_efficienthc_LSTM.yaml
    │   │   ├── tuning___HOHO_depth_dct_efficienthc_Linear.yaml
    │   │   ├── tuning___HOHO_depth_dct_efficienthc_TransEn1.yaml
    │   │   ├── tuning___HOHO_depth_dct_efficienthc_TransEn1_resnet34.yaml
    │   │   ├── tuning___HOHO_depth_lin128_efficienthc_TransEn1.yaml
    │   │   ├── tuning___HOHO_depth_lin256_efficienthc_TransEn1.yaml
    │   │   ├── tuning___HOHO_depth_lin32_efficienthc_TransEn1.yaml
    │   │   ├── tuning___HOHO_depth_lin512_efficienthc_TransEn1.yaml
    │   │   └── tuning___HOHO_depth_lin64_efficienthc_TransEn1.yaml
    ├── mp3d_layout
    │   └── HOHO_layout_aug_efficienthc_Transen1_resnet34.yaml
    ├── s2d3d_depth
    │   ├── HOHO_depthS_SGD_dct_efficienthc_TransEn1.yaml
    │   ├── HOHO_depthS_dct_efficienthc_TransEn1.yaml
    │   └── HOHO_depth_dct_efficienthc_TransEn1.yaml
    └── s2d3d_sem
    │   ├── HOHO_depth_dct_efficienthc_TransEn1_h1024_fold1_resnet101.yaml
    │   ├── HOHO_depth_dct_efficienthc_TransEn1_h1024_fold1_resnet101rgb.yaml
    │   ├── HOHO_depth_dct_efficienthc_TransEn1_h1024_fold2_resnet101.yaml
    │   ├── HOHO_depth_dct_efficienthc_TransEn1_h1024_fold2_resnet101rgb.yaml
    │   ├── HOHO_depth_dct_efficienthc_TransEn1_h1024_fold3_resnet101.yaml
    │   ├── HOHO_depth_dct_efficienthc_TransEn1_h1024_fold3_resnet101rgb.yaml
    │   ├── HOHO_depth_dct_efficienthc_TransEn1_h256_fold1_simple.yaml
    │   ├── HOHO_depth_dct_efficienthc_TransEn1_h256_fold2_simple.yaml
    │   ├── HOHO_depth_dct_efficienthc_TransEn1_h256_fold3_simple.yaml
    │   ├── HOHO_depth_dct_efficienthc_TransEn1_h64_fold1_simple.yaml
    │   ├── HOHO_depth_dct_efficienthc_TransEn1_h64_fold2_simple.yaml
    │   └── HOHO_depth_dct_efficienthc_TransEn1_h64_fold3_simple.yaml
├── count_params_flops.py
├── eval_layout.py
├── infer_depth.ipynb
├── infer_depth.py
├── infer_layout.ipynb
├── infer_layout.py
├── infer_sem.ipynb
├── lib
    ├── config.py
    ├── dataset
    │   ├── __init__.py
    │   ├── dataset_depth.py
    │   ├── dataset_layout.py
    │   └── dataset_s2d3d_sem.py
    ├── misc
    │   ├── __init__.py
    │   ├── gen_txt_structured3d.py
    │   ├── pano_lsd_align.py
    │   ├── panostretch.py
    │   ├── post_proc.py
    │   ├── structured3d_extract_zip.py
    │   ├── structured3d_prepare_dataset.py
    │   └── utils.py
    └── model
    │   ├── backbone
    │       ├── __init__.py
    │       ├── hardnet.py
    │       ├── resnet.py
    │       └── simple.py
    │   ├── hohonet.py
    │   ├── horizon_compression
    │       ├── __init__.py
    │       ├── ehc.py
    │       ├── hc.py
    │       └── simple.py
    │   ├── horizon_refinement
    │       ├── __init__.py
    │       ├── attention.py
    │       ├── identity.py
    │       ├── linear.py
    │       └── rnn.py
    │   ├── horizon_upsample
    │       ├── __init__.py
    │       └── upsample1d.py
    │   ├── modality
    │       ├── __init__.py
    │       ├── bases.py
    │       ├── depth.py
    │       ├── layout.py
    │       └── semantic.py
    │   └── utils.py
├── test_depth.py
├── test_layout.py
├── test_sem.py
├── train.py
├── vis_depth.py
└── vis_layout.py


/.gitignore:
--------------------------------------------------------------------------------
  1 | .DS_Store
  2 | output
  3 | ckpt
  4 | data
  5 | 
  6 | # Byte-compiled / optimized / DLL files
  7 | __pycache__/
  8 | *.py[cod]
  9 | *$py.class
 10 | 
 11 | # C extensions
 12 | *.so
 13 | 
 14 | # Distribution / packaging
 15 | .Python
 16 | build/
 17 | develop-eggs/
 18 | dist/
 19 | downloads/
 20 | eggs/
 21 | .eggs/
 22 | lib64/
 23 | parts/
 24 | sdist/
 25 | var/
 26 | wheels/
 27 | pip-wheel-metadata/
 28 | share/python-wheels/
 29 | *.egg-info/
 30 | .installed.cfg
 31 | *.egg
 32 | MANIFEST
 33 | 
 34 | # PyInstaller
 35 | #  Usually these files are written by a python script from a template
 36 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 37 | *.manifest
 38 | *.spec
 39 | 
 40 | # Installer logs
 41 | pip-log.txt
 42 | pip-delete-this-directory.txt
 43 | 
 44 | # Unit test / coverage reports
 45 | htmlcov/
 46 | .tox/
 47 | .nox/
 48 | .coverage
 49 | .coverage.*
 50 | .cache
 51 | nosetests.xml
 52 | coverage.xml
 53 | *.cover
 54 | *.py,cover
 55 | .hypothesis/
 56 | .pytest_cache/
 57 | 
 58 | # Translations
 59 | *.mo
 60 | *.pot
 61 | 
 62 | # Django stuff:
 63 | *.log
 64 | local_settings.py
 65 | db.sqlite3
 66 | db.sqlite3-journal
 67 | 
 68 | # Flask stuff:
 69 | instance/
 70 | .webassets-cache
 71 | 
 72 | # Scrapy stuff:
 73 | .scrapy
 74 | 
 75 | # Sphinx documentation
 76 | docs/_build/
 77 | 
 78 | # PyBuilder
 79 | target/
 80 | 
 81 | # Jupyter Notebook
 82 | .ipynb_checkpoints
 83 | 
 84 | # IPython
 85 | profile_default/
 86 | ipython_config.py
 87 | 
 88 | # pyenv
 89 | .python-version
 90 | 
 91 | # pipenv
 92 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 93 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 94 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 95 | #   install all needed dependencies.
 96 | #Pipfile.lock
 97 | 
 98 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
 99 | __pypackages__/
100 | 
101 | # Celery stuff
102 | celerybeat-schedule
103 | celerybeat.pid
104 | 
105 | # SageMath parsed files
106 | *.sage.py
107 | 
108 | # Environments
109 | .env
110 | .venv
111 | env/
112 | venv/
113 | ENV/
114 | env.bak/
115 | venv.bak/
116 | 
117 | # Spyder project settings
118 | .spyderproject
119 | .spyproject
120 | 
121 | # Rope project settings
122 | .ropeproject
123 | 
124 | # mkdocs documentation
125 | /site
126 | 
127 | # mypy
128 | .mypy_cache/
129 | .dmypy.json
130 | dmypy.json
131 | 
132 | # Pyre type checker
133 | .pyre/
134 | 


--------------------------------------------------------------------------------
/LICENCE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2023 sunset
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # HoHoNet
 2 | 
 3 | Code for our paper in CVPR 2021: **HoHoNet: 360 Indoor Holistic Understanding with Latent Horizontal Features** ([paper](https://arxiv.org/abs/2011.11498), [video](https://www.youtube.com/watch?v=xXtRaRKmMpA)).
 4 | 
 5 | ![teaser](./assets/repo_teaser.jpg)
 6 | 
 7 | #### News
 8 | - **April 3, 2021**: Release inference code, jupyter notebook and visualization tools. Guide for reproduction is also finished.
 9 | - **March 4, 2021**: A new backbone **[HarDNet](https://github.com/PingoLH/Pytorch-HarDNet)** is included, which shows better speed and depth accuracy.
10 | 
11 | 
12 | ## Pretrained weight
13 | Links to trained weights `ckpt/`: [download on Google drive](https://drive.google.com/drive/folders/1raT3vRXnQXRAQuYq36dE-93xFc_hgkTQ?usp=sharing) or [download on Dropbox](https://www.dropbox.com/sh/b014nop5jrehpoq/AACWNTMMHEAbaKOO1drqGio4a?dl=0).
14 | 
15 | 
16 | ## Inference
17 | In below, we use an out-of-training-distribution 360 image from PanoContext as an example.
18 | 
19 | ### Jupyter notebook
20 | See [infer_depth.ipynb](infer_depth.ipynb), [infer_layout.ipynb](infer_layout.ipynb), and [infer_sem.ipynb](infer_sem.ipynb) for interactive demo and visualization.
21 | 
22 | ### Batch inference
23 | Run `infer_depth.py`/`infer_layout.py` to inference depth/layout.
24 | Use `--cfg` and `--pth` to specify the path to config file and pretrained weight.
25 | Specify input path with `--inp`. Glob pattern for a batch of files is avaiable.
26 | The results are stored into `--out` directory with the same filename with extention set ot `.depth.png` and `.layout.txt`.
27 | 
28 | Example for depth:
29 | ```
30 | python infer_depth.py --cfg config/mp3d_depth/HOHO_depth_dct_efficienthc_TransEn1_hardnet.yaml --pth ckpt/mp3d_depth_HOHO_depth_dct_efficienthc_TransEn1_hardnet/ep60.pth --out assets/ --inp assets/pano_asmasuxybohhcj.png
31 | ```
32 | 
33 | Example for layout:
34 | ```
35 | python infer_layout.py --cfg config/mp3d_layout/HOHO_layout_aug_efficienthc_Transen1_resnet34.yaml --pth ckpt/mp3d_layout_HOHO_layout_aug_efficienthc_Transen1_resnet34/ep300.pth --out assets/ --inp assets/pano_asmasuxybohhcj.png
36 | ```
37 | 
38 | ### Visualization tools
39 | To visualize layout as 3D mesh, run:
40 | ```
41 | python vis_layout.py --img assets/pano_asmasuxybohhcj.png --layout assets/pano_asmasuxybohhcj.layout.txt
42 | ```
43 | Rendering options: `--show_ceiling`, `--ignore_floor`, `--ignore_wall`, `--ignore_wireframe` are available.
44 | Set `--out` to export the mesh to `ply` file.
45 | Set `--no_vis` to disable the visualization.
46 | <p align="center">
47 |     <img height="300" src="./assets/snapshot_layout.jpg">
48 | </p>
49 | 
50 | 
51 | To visualize depth as point cloud, run:
52 | ```
53 | python vis_depth.py --img assets/pano_asmasuxybohhcj.png --depth assets/pano_asmasuxybohhcj.depth.png
54 | ```
55 | Rendering options: `--crop_ratio`, `--crop_z_above`.
56 | <p align="center">
57 |     <img height="300" src="./assets/snapshot_depth.jpg">
58 | </p>
59 | 
60 | 
61 | 
62 | ## Reproduction
63 | Please see [README_reproduction.md](README_reproduction.md) for the guide to:
64 | 1. prepare the datasets for each task in our paper
65 | 2. reproduce the training for each task
66 | 3. reproduce the numerical results in our paper with the provided pretrained weights
67 | 
68 | 
69 | ## Citation
70 | ```
71 | @inproceedings{SunSC21,
72 |   author    = {Cheng Sun and
73 |                Min Sun and
74 |                Hwann{-}Tzong Chen},
75 |   title     = {HoHoNet: 360 Indoor Holistic Understanding With Latent Horizontal
76 |                Features},
77 |   booktitle = {CVPR},
78 |   year      = {2021},
79 | }
80 | ```
81 | 


--------------------------------------------------------------------------------
/README_prepare_data_mp3d_layout.md:
--------------------------------------------------------------------------------
  1 | # Prepare MatterportLayout dataset
  2 | 
  3 | References:
  4 | - [3D Manhattan Room Layout Reconstruction from a Single 360 Image](https://arxiv.org/abs/1910.04099)
  5 | - [PanoAnnotator](https://github.com/SunDaDenny/PanoAnnotator)
  6 | - [LayoutMP3D: Layout Annotation of Matterport3D](https://arxiv.org/abs/2003.13516)
  7 | - [Matterport3DLayoutAnnotation github](https://github.com/ericsujw/Matterport3DLayoutAnnotation) (we use the annotation provided by LayoutNetv2)
  8 | 
  9 | ## Dataset preparation
 10 | ### Step 1: download source
 11 | Please refer to [Matterport3DLayoutAnnotation](https://github.com/ericsujw/Matterport3DLayoutAnnotation) to download the source datas.
 12 | - Put all the rgb under `{ROOT}/image_up/`.
 13 | - Download the annotation to `{ROOT}/label_data/` (originally json format).
 14 | - Download the data split into `{ROOT}/mp3d_[train|val|test].txt`.
 15 | 
 16 | ### Step 2: convert json annotation to corners in txt format
 17 | Use below code to convert original ground-truth json into txt. **(Remember to update the uppercase variables)**
 18 | ```python
 19 | import os
 20 | import glob
 21 | import json
 22 | import numpy as np
 23 | 
 24 | IN_GLOB = 'label_data/*json'
 25 | OUT_DIR = 'label_cor'
 26 | os.makedirs(OUT_DIR, exist_ok=True)
 27 | 
 28 | for p in glob.glob(IN_GLOB):
 29 |     gt = json.load(open(p))
 30 |     assert gt['cameraHeight'] == 1.6
 31 |     us = np.array([pts['coords'][0] for pts in gt['layoutPoints']['points']])
 32 |     us = us * 1024
 33 |     cs = np.array([pts['xyz'] for pts in gt['layoutPoints']['points']])
 34 |     cs = np.sqrt((cs**2)[:, [0, 2]].sum(1))
 35 | 
 36 |     vf = np.arctan2(-1.6, cs)
 37 |     vc = np.arctan2(-1.6 + gt['layoutHeight'], cs)
 38 |     vf = (-vf / np.pi + 0.5) * 512
 39 |     vc = (-vc / np.pi + 0.5) * 512
 40 | 
 41 |     cor_x = np.repeat(us, 2)
 42 |     cor_y = np.stack([vc, vf], -1).reshape(-1)
 43 |     cor_xy = np.stack([cor_x, cor_y], -1)
 44 | 
 45 |     out_path = os.path.join(OUT_DIR, os.path.split(p)[-1][:-4] + 'txt')
 46 |     with open(out_path, 'w') as f:
 47 |         for x, y in cor_xy:
 48 |             f.write('%.2f %.2f\n' % (x, y))
 49 | ```
 50 | 
 51 | ### Step 3: data split
 52 | Use below code to organize the data split for training and evaluation. **(Remember to update the uppercase variables)**
 53 | ```python
 54 | import os
 55 | from shutil import copy2
 56 | 
 57 | IMG_ROOT = 'image_up'
 58 | TXT_ROOT = 'label_cor'
 59 | OUT_ROOT = 'mp3d_layout'
 60 | TRAIN_TXT = 'mp3d_train.txt'
 61 | VALID_TXT = 'mp3d_val.txt'
 62 | TEST_TXT = 'mp3d_test.txt'
 63 | 
 64 | def go(txt, split):
 65 |     out_img_root = os.path.join(OUT_ROOT, split, 'img')
 66 |     out_txt_root = os.path.join(OUT_ROOT, split, 'label_cor')
 67 |     os.makedirs(out_img_root, exist_ok=True)
 68 |     os.makedirs(out_txt_root, exist_ok=True)
 69 | 
 70 |     with open(txt) as f:
 71 |         ks = ['_'.join(l.strip().split()) for l in f]
 72 | 
 73 |     for k in ks:
 74 |         copy2(os.path.join(IMG_ROOT, k + '.png'), out_img_root)
 75 |         copy2(os.path.join(TXT_ROOT, k + '_label.txt'), out_txt_root)
 76 |         os.rename(os.path.join(out_txt_root, k + '_label.txt'), os.path.join(out_txt_root, k + '.txt'))
 77 | 
 78 | 
 79 | go(TRAIN_TXT, 'train')
 80 | go(VALID_TXT, 'valid')
 81 | go(TEST_TXT, 'test')
 82 | ```
 83 | 
 84 | ### Step 4: clamp occlusion
 85 | We assume only visible corners in txt annotation (which is the same as [Holistic 3D Vision Challenge, ECCV2020](https://competitions.codalab.org/competitions/24183#learn_the_details-evaluation)'s format).
 86 | For MatterportLayout dataset, please copy&paste below script to `clamp_occ_corners.py` and run:
 87 | - `python clamp_occ_corners.py --ori_glob "data/mp3d_layout/train/label_cor/*txt" --output_dir data/mp3d_layout/train_no_occ/label_cor/*txt`
 88 | - `python clamp_occ_corners.py --ori_glob "data/mp3d_layout/valid/label_cor/*txt" --output_dir data/mp3d_layout/valid_no_occ/label_cor/*txt`
 89 | - `python clamp_occ_corners.py --ori_glob "data/mp3d_layout/test/label_cor/*txt" --output_dir data/mp3d_layout/test_no_occ/label_cor/*txt`
 90 | ```python
 91 | import os
 92 | import json
 93 | import glob
 94 | import numpy as np
 95 | from shapely.geometry import LineString
 96 | 
 97 | from misc import panostretch
 98 | 
 99 | def cor_2_1d(cor, H=512, W=1024):
100 |     bon_ceil_x, bon_ceil_y = [], []
101 |     bon_floor_x, bon_floor_y = [], []
102 |     n_cor = len(cor)
103 |     for i in range(n_cor // 2):
104 |         xys = panostretch.pano_connect_points(cor[i*2],
105 |                                               cor[(i*2+2) % n_cor],
106 |                                               z=-50, w=W, h=H)
107 |         bon_ceil_x.extend(xys[:, 0])
108 |         bon_ceil_y.extend(xys[:, 1])
109 |     for i in range(n_cor // 2):
110 |         xys = panostretch.pano_connect_points(cor[i*2+1],
111 |                                               cor[(i*2+3) % n_cor],
112 |                                               z=50, w=W, h=H)
113 |         bon_floor_x.extend(xys[:, 0])
114 |         bon_floor_y.extend(xys[:, 1])
115 |     bon_ceil_x, bon_ceil_y = sort_xy_filter_unique(bon_ceil_x, bon_ceil_y, y_small_first=True)
116 |     bon_floor_x, bon_floor_y = sort_xy_filter_unique(bon_floor_x, bon_floor_y, y_small_first=False)
117 |     bon = np.zeros((2, W))
118 |     bon[0] = np.interp(np.arange(W), bon_ceil_x, bon_ceil_y, period=W)
119 |     bon[1] = np.interp(np.arange(W), bon_floor_x, bon_floor_y, period=W)
120 |     #bon = ((bon + 0.5) / H - 0.5) * np.pi
121 |     return bon
122 | 
123 | def sort_xy_filter_unique(xs, ys, y_small_first=True):
124 |     xs, ys = np.array(xs), np.array(ys)
125 |     idx_sort = np.argsort(xs + ys / ys.max() * (int(y_small_first)*2-1))
126 |     xs, ys = xs[idx_sort], ys[idx_sort]
127 |     _, idx_unique = np.unique(xs, return_index=True)
128 |     xs, ys = xs[idx_unique], ys[idx_unique]
129 |     assert np.all(np.diff(xs) > 0)
130 |     return xs, ys
131 | 
132 | def find_occlusion(coor):
133 |     u = panostretch.coorx2u(coor[:, 0])
134 |     v = panostretch.coory2v(coor[:, 1])
135 |     x, y = panostretch.uv2xy(u, v, z=-50)
136 |     occlusion = []
137 |     for i in range(len(x)):
138 |         raycast = LineString([(0, 0), (x[i], y[i])])
139 |         other_layout = []
140 |         for j in range(i+1, len(x)):
141 |             other_layout.append((x[j], y[j]))
142 |         for j in range(0, i):
143 |             other_layout.append((x[j], y[j]))
144 |         other_layout = LineString(other_layout)
145 |         occlusion.append(raycast.intersects(other_layout))
146 |     return np.array(occlusion)
147 | 
148 | 
149 | 
150 | if __name__ == '__main__':
151 | 
152 |     import argparse
153 |     parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
154 |     parser.add_argument('--ori_glob', required=True)
155 |     parser.add_argument('--output_dir', required=True)
156 |     args = parser.parse_args()
157 | 
158 |     os.makedirs(args.output_dir, exist_ok=True)
159 | 
160 |     paths = glob.glob(args.ori_glob)
161 |     for path in paths:
162 |         if path.endswith('json'):
163 |             with open(path) as f:
164 |                 dt = json.load(f)
165 |             cor = np.array(dt['uv'], np.float32)
166 |             cor[:, 0] *= 1024
167 |             cor[:, 1] *= 512
168 |         else:
169 |             with open(path) as f:
170 |                 cor = np.array([l.strip().split() for l in f]).astype(np.float32)
171 |         cor = cor.reshape(-1, 4)
172 |         duplicated = [False] * len(cor)
173 |         for i in range(len(duplicated)):
174 |             for j in range(i+1, len(duplicated)):
175 |                 if (cor[j] ==  cor[i]).sum() == 4:
176 |                     duplicated[j] = True
177 |         cor = cor[~np.array(duplicated)].reshape(-1, 2)
178 |         cor[:, 0] = cor[:, 0] % 1024
179 |         cor = np.roll(cor[:, :2], -2 * np.argmin(cor[::2, 0]), 0)
180 |         occlusion = find_occlusion(cor[::2].copy()).repeat(2)
181 | 
182 |         bon = cor_2_1d(cor)
183 | 
184 |         cor_v1 = []
185 |         for i in range(0, len(cor), 2):
186 |             if occlusion[i] & ~occlusion[(i+2) % len(cor)]:
187 |                 cur_x = cor[i, 0]
188 |                 next_x = cor[(i+2) % len(cor), 0]
189 |                 prev_x, j = None, i-2
190 |                 while prev_x is None:
191 |                     if j < 0:
192 |                         j += len(cor)
193 |                     if ~occlusion[j]:
194 |                         prev_x = cor[j, 0]
195 |                         break
196 |                     j -= 2
197 |                 dist2next = min(abs(next_x-cur_x), abs(next_x+1024-cur_x), abs(next_x-1024-cur_x))
198 |                 dist2prev = min(abs(prev_x-cur_x), abs(prev_x+1024-cur_x), abs(prev_x-1024-cur_x))
199 |                 # print(cor[i], prev_x, next_x, dist2next, dist2prev)
200 |                 if dist2prev < dist2next:
201 |                     cor_v1.append([prev_x, bon[0, (int(prev_x)+1) % 1024]])
202 |                     cor_v1.append([prev_x, bon[1, (int(prev_x)+1) % 1024]])
203 |                 else:
204 |                     cor_v1.append([next_x, bon[0, (int(next_x)-1) % 1024]])
205 |                     cor_v1.append([next_x, bon[1, (int(next_x)-1) % 1024]])
206 |             elif ~occlusion[i]:
207 |                 cor_v1.extend(cor[i:i+2])
208 | 
209 |         cor_v1 = np.stack(cor_v1, 0)
210 |         for _ in range(len(cor_v1)):
211 |             if np.alltrue(cor_v1[::2, 0][1:] - cor_v1[::2, 0][:-1] >= 0):
212 |                 break
213 |             cor_v1 = np.roll(cor_v1, 2, axis=0)
214 |         if not np.alltrue(cor_v1[::2, 0][1:] - cor_v1[::2, 0][:-1] >= 0):
215 |             cor_v1[2::2] = np.flip(cor_v1[2::2], 0)
216 |             cor_v1[3::2] = np.flip(cor_v1[3::2], 0)
217 |         for _ in range(len(cor_v1)):
218 |             if np.alltrue(cor_v1[::2, 0][1:] - cor_v1[::2, 0][:-1] >= 0):
219 |                 break
220 |             cor_v1 = np.roll(cor_v1, 2, axis=0)
221 |         with open(os.path.join(args.output_dir, f'{os.path.split(path)[1].replace("json", "txt")}'), 'w') as f:
222 |             for u, v in cor_v1:
223 |                 f.write(f'{u:.0f} {v:.0f}\n')
224 | ```
225 | 
226 | 
227 | 
228 | ### Final file structure
229 | So now, you should have a `mp3d_layout` directory with below structure for HoHoNet to train.
230 | 
231 | 	data
232 |     └── mp3d_layout
233 |         ├── train
234 |         │   ├── img/*png
235 |         │   └── label_cor/*txt
236 |         ├── train_no_occ
237 |         │   ├── img/*png
238 |         │   └── label_cor/*txt
239 |         ├── valid
240 |         │   ├── img/*png
241 |         │   └── label_cor/*txt
242 |         ├── valid_no_occ
243 |         │   ├── img/*png
244 |         │   └── label_cor/*txt
245 |         ├── test
246 |         │   ├── img/*png
247 |         │   └── label_cor/*txt
248 |         └── test_no_occ
249 |             ├── img/*png
250 |             └── label_cor/*txt
251 | 


--------------------------------------------------------------------------------
/README_prepare_data_s2d3d.md:
--------------------------------------------------------------------------------
  1 | # Prepare Stanford2d3d dataset
  2 | 
  3 | ## Dataset preparation
  4 | ### Step 1: download source
  5 | Please refer to [2D-3D-Semantics](https://github.com/alexsax/2D-3D-Semantics) to download the source datas.
  6 | Make sure `"$S2D3D_ROOT"/area_[1|2|3|4|5a|5b|6]/pano/[depth|rgb|semantic]` existed.
  7 | 
  8 | 
  9 | ### Step 2: resize and copy into `data/stanford2D3D/` for depth modality
 10 | The source data are in high resolution (`2048x4096`).
 11 | To reduce data loading time during training, we resize them to `512x1024` and copy into HoHoNet's `data/`.
 12 | Copy below code and paste into `prepare_S2D3D_d.py`.
 13 | Run `python prepare_S2D3D_d.py --ori_root "$S2D3D_ROOT" --new_root "$HOHO_ROOT/data/stanford2D3D/"`.
 14 | ```python
 15 | import os
 16 | import glob
 17 | import argparse
 18 | from tqdm import tqdm
 19 | 
 20 | import numpy as np
 21 | from imageio import imread, imwrite
 22 | from skimage.transform import rescale
 23 | 
 24 | parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
 25 | parser.add_argument('--ori_root', required=True)
 26 | parser.add_argument('--new_root', required=True)
 27 | args = parser.parse_args()
 28 | 
 29 | areas = ['area_1', 'area_2', 'area_3', 'area_4', 'area_5a', 'area_5b', 'area_6']
 30 | 
 31 | for area in areas:
 32 |     print('Processing:', area)
 33 |     os.makedirs(os.path.join(args.new_root, area, 'rgb'), exist_ok=True)
 34 |     os.makedirs(os.path.join(args.new_root, area, 'depth'), exist_ok=True)
 35 |     for fname in tqdm(os.listdir(os.path.join(args.ori_root, area, 'pano', 'rgb'))):
 36 |         if fname[0] == '.' or not fname.endswith('png'):
 37 |             continue
 38 |         rgb_path = os.path.join(args.ori_root, area, 'pano', 'rgb', fname)
 39 |         d_path = os.path.join(args.ori_root, area, 'pano', 'depth', fname[:-7] + 'depth.png')
 40 |         assert os.path.isfile(d_path)
 41 | 
 42 |         rgb = imread(rgb_path)[..., :3]
 43 |         depth = imread(d_path)
 44 |         rgb = rescale(rgb, 0.25, order=0, mode='wrap', anti_aliasing=False, preserve_range=True)
 45 |         depth = rescale(depth, 0.25, order=0, mode='wrap', anti_aliasing=False, preserve_range=True)
 46 | 
 47 |         imwrite(os.path.join(args.new_root, area, 'rgb', fname), rgb.astype(np.uint8))
 48 |         imwrite(os.path.join(args.new_root, area, 'depth', fname[:-7] + 'depth.png'), depth.astype(np.uint16))
 49 | ```
 50 | 
 51 | ### Step 3: resize and copy into `data/s2d3d_sem` for semantic modality
 52 | Please download `semantic_labels.json`, `name2label.json`, and `colors.npy` on [Google drive](https://drive.google.com/drive/folders/1raT3vRXnQXRAQuYq36dE-93xFc_hgkTQ?usp=sharing) or [Dropbox](https://www.dropbox.com/sh/b014nop5jrehpoq/AACWNTMMHEAbaKOO1drqGio4a?dl=0).
 53 | Put these files under your `$S2D3D_ROOT/`.
 54 | Copy below code and paste into `prepare_S2D3D_sem.py`.
 55 | Run `python prepare_S2D3D_sem.py --ori_root "$S2D3D_ROOT" --new_root "$HOHO_ROOT/data/s2d3d_sem/"`.
 56 | ```python
 57 | import os
 58 | import json
 59 | import glob
 60 | from PIL import Image
 61 | from tqdm import trange
 62 | import numpy as np
 63 | from shutil import copyfile
 64 | 
 65 | parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
 66 | parser.add_argument('--ori_root', required=True)
 67 | parser.add_argument('--new_root', required=True)
 68 | args = parser.parse_args()
 69 | 
 70 | areas = ['area_1', 'area_2', 'area_3', 'area_4', 'area_5a', 'area_5b', 'area_6']
 71 | 
 72 | with open(os.path.join(args.ori_root, 'semantic_labels.json')) as f:
 73 |     id2name = [name.split('_')[0] for name in json.load(f)] + ['<UNK>']
 74 | 
 75 | with open(os.path.join(args.ori_root, 'name2label.json')) as f:
 76 |     name2id = json.load(f)
 77 | 
 78 | colors = np.load(os.path.join(args.ori_root, 'colors.npy'))
 79 | 
 80 | id2label = np.array([name2id[name] for name in id2name], np.uint8)
 81 | 
 82 | for area in areas:
 83 |     rgb_paths = sorted(glob.glob(os.path.join(args.ori_root, area, 'pano', 'rgb', '*png')))
 84 |     sem_paths = sorted(glob.glob(os.path.join(args.ori_root, area, 'pano', 'semantic', '*png')))
 85 |     os.makedirs(os.path.join(args.new_root, area, 'rgb'), exist_ok=True)
 86 |     os.makedirs(os.path.join(args.new_root, area, 'semantic'), exist_ok=True)
 87 |     os.makedirs(os.path.join(args.new_root, area, 'semantic_visualize'), exist_ok=True)
 88 |     for i in trange(len(rgb_paths)):
 89 |         rgb_k = os.path.split(rgb_paths[i])[-1]
 90 |         sem_k = os.path.split(sem_paths[i])[-1]
 91 | 
 92 |         # RGB
 93 |         rgb = Image.open(rgb_paths[i]).convert('RGB').resize((1024, 512), Image.LANCZOS)
 94 |         rgb.save(os.path.join(args.new_root, area, 'rgb', rgb_k))
 95 |         vis = np.array(rgb)
 96 |         # Semantic
 97 |         sem = np.array(Image.open(sem_paths[i]).resize((1024, 512), Image.NEAREST), np.int32)
 98 |         unk = (sem[..., 0] != 0)
 99 |         sem = id2label[sem[..., 1] * 256 + sem[..., 2]]
100 |         sem[unk] = 0
101 |         Image.fromarray(sem).save(os.path.join(args.new_root, area, 'semantic', rgb_k))
102 |         # Visualization
103 |         vis = vis // 2 + colors[sem] // 2
104 |         Image.fromarray(vis).save(os.path.join(args.new_root, area, 'semantic_visualize', rgb_k))
105 | ```
106 | 
107 | ### Step 4: prepare data split
108 | Download data split `fold[1|2|3]_[train|valid].txt` and `small_[train|valid|test].txt` on [Google drive](https://drive.google.com/drive/folders/1raT3vRXnQXRAQuYq36dE-93xFc_hgkTQ?usp=sharing) or [Dropbox](https://www.dropbox.com/sh/b014nop5jrehpoq/AACWNTMMHEAbaKOO1drqGio4a?dl=0).
109 | Put these `txt` files under `data/stanford2D3D`.
110 | 
111 | 
112 | 
113 | ### Final file structure
114 | So now, you should have a `stanford2D3D` and `s2d3d_sem` directories with below structure for HoHoNet to train.
115 | 
116 |     data
117 |     ├── stanford2D3D
118 |     │   ├── area_[1|2|3|4|5a|5b|6]
119 |     │   │   ├── img/*png
120 |     │   │   └── depth/*png
121 |     │   ├── small_[train|valid|test].txt
122 |     │   └── fold[1|2|3]_[train|valid].txt
123 |     │
124 |     └── s2d3d_sem
125 |         └── area_[1|2|3|4|5a|5b|6]
126 |             ├── rgb/*png
127 |             └── semantic/*png
128 | 


--------------------------------------------------------------------------------
/README_reproduction.md:
--------------------------------------------------------------------------------
  1 | # Reproduction
  2 | 
  3 | Below provides:
  4 | 1. guide to prepare the datasets for each task in our paper
  5 | 2. reproduce the training and numerical results in our paper
  6 | 
  7 | ## Dataset
  8 | Detail instruction for preparing the datas for each dataset and task:
  9 | - `Matterport3d` x `Layout`
 10 |     - see [Prepare MatterportLayout dataset](README_prepare_data_mp3d_layout.md)
 11 | - `Matterport3d` x `Depth (BiFuse's stitching)`
 12 |     - We use the rgb-d stitching provided by [BiFuse](https://github.com/Yeh-yu-hsuan/BiFuse)
 13 |     - Put their `mp3d_align/` under `data/`
 14 |     - Download data split via [Google drive](https://drive.google.com/drive/folders/1raT3vRXnQXRAQuYq36dE-93xFc_hgkTQ?usp=sharing) or via [Dropbox](https://www.dropbox.com/sh/b014nop5jrehpoq/AACWNTMMHEAbaKOO1drqGio4a?dl=0) and put them under `data/matterport3d/`.
 15 | - `Matterport3d` x `Depth (our new stitching)`
 16 |     - We remove the depth noise in BiFuse's stitching
 17 |     - This is not the version we use in our paper
 18 |     - **TODO:** release new stiching code with experiment results on it
 19 | - `Stanford2d3d` x `Depth`:
 20 |     - see [Prepare Stanford2d3d dataset](README_prepare_data_s2d3d.md)
 21 | - `Stanford2d3d` x `Semantic segmentation`:
 22 |     - see [Prepare Stanford2d3d dataset](README_prepare_data_s2d3d.md)
 23 | 
 24 | The overall file strucure of the datasets is depicted as follow:
 25 | 
 26 |     data
 27 |     ├── mp3d_align                 # Stitching provided by BiFuse (https://github.com/Yeh-yu-hsuan/BiFuse)
 28 |     │   ├── 17DRP5sb8fy
 29 |     │   │   ├── 00ebbf3782c64d74aaf7dd39cd561175
 30 |     │   │   │   ├── color.jpg
 31 |     │   │   │   └── depth.npy
 32 |     │   │   └── ...
 33 |     │   └── ...
 34 |     │
 35 |     ├── matterport3d
 36 |     │   ├── scenes_abla_train.txt  # 41 house id for ablation training
 37 |     │   ├── scenes_abla_valid.txt  # 20 house id for ablation evaluation
 38 |     │   ├── scenes_train.txt       # 61 house id for training following BiFuse
 39 |     │   ├── mp3d_scenes_test.txt   # 28 house id for testing following BiFuse
 40 |     │   └── mp3d_rgbd/             # Our new stitching which fixs the depth noise in BiFuse's version
 41 |     │                              # Release new stitching code with new experiments later.
 42 |     │
 43 |     ├── mp3d_layout                # Please follow README_prepare_data_mp3d_layout.md
 44 |     │   ├── train_no_occ
 45 |     │   │   ├── img/*png
 46 |     │   │   └── label_cor/*txt
 47 |     │   ├── valid_no_occ
 48 |     │   │   ├── img/*png
 49 |     │   │   └── label_cor/*txt
 50 |     │   └── test_no_occ
 51 |     │       ├── img/*png
 52 |     │       └── label_cor/*txt
 53 |     │
 54 |     ├── stanford2D3D               # Please follow README_prepare_data_s2d3d.md
 55 |     │   ├── area_[1|2|3|4|5a|5b|6]
 56 |     │   │   ├── img/*png
 57 |     │   │   └── depth/*png
 58 |     │   ├── small_[train|valid|test].txt
 59 |     │   └── fold[1|2|3]_[train|valid].txt
 60 |     │
 61 |     └── s2d3d_sem                  # Please follow README_prepare_data_s2d3d.md
 62 |         └── area_[1|2|3|4|5a|5b|6]
 63 |             ├── rgb/*png
 64 |             └── semantic/*png
 65 | 
 66 | 
 67 | ## Reproduction: training
 68 | The configs for reproducing the experiments are all in `config/`.
 69 | 
 70 | Just run:
 71 | ```
 72 | python train.py --cfg {PATH_TO_CONFIG}
 73 | ```
 74 | to train the same setting as experiments in our paper.
 75 | Note that the results with same config but different runs could be different as the random seed is not fixed.
 76 | 
 77 | Some examples:
 78 | ```
 79 | python train.py --cfg config/mp3d_depth/HOHO_depth_dct_efficienthc_TransEn1_hardnet.yaml
 80 | python train.py --cfg config/mp3d_layout/HOHO_layout_aug_efficienthc_Transen1_resnet34.yaml
 81 | python train.py --cfg config/s2d3d_depth/HOHO_depth_dct_efficienthc_TransEn1.yaml
 82 | python train.py --cfg config/s2d3d_sem/HOHO_depth_dct_efficienthc_TransEn1_h1024_fold1_resnet101.yaml
 83 | ```
 84 | 
 85 | ## Reproduction: measuring FPS
 86 | Just run:
 87 | ```
 88 | python count_params_flops.py --cfg {PATH_TO_CONFIG}
 89 | ```
 90 | It measures averaged feed-forward times of the model.
 91 | The results reported in our paper are obtained on a GeForce RTX 2080.
 92 | 
 93 | ## Reproduction: quantitative evaluation
 94 | Please make sure the dataset and the trained weights are organized as the instruction above.
 95 | If not, the config should be updated accordinly and you should directly assign the path to the trained weight to the testing script via `--pth`.
 96 | 
 97 | 
 98 | <br/>
 99 | 
100 | ### `Matterport3D` x `depth` (BiFuse's stitching and setting)
101 | Assume pretrained weights located at:
102 | - `ckpt/mp3d_depth_HOHO_depth_dct_efficienthc_TransEn1_hardnet/ep60.pth`
103 | - `ckpt/mp3d_depth_HOHO_depth_dct_efficienthc_TransEn1/ep60.pth`
104 | 
105 | Run:
106 | ```
107 | python test_depth.py --cfg config/mp3d_depth/HOHO_depth_dct_efficienthc_TransEn1.yaml
108 | python test_depth.py --cfg config/mp3d_depth/HOHO_depth_dct_efficienthc_TransEn1_hardnet.yaml
109 | ```
110 | 
111 | Results:
112 | | Exp | fps | mre | mae | rmse | rmse_log | log10 | delta_1 | delta_3 | delta_3 |
113 | | :-- | :-- | :-- | :-- | :--- | :------- | :---- | :------ | :------ | :------ |
114 | | HOHO_depth_dct_efficienthc_TransEn1 | 52 | 0.1488 | 0.2862 | 0.5138 | 0.0871 | 0.0505 | 0.8786 | 0.9519 | 0.9771 |
115 | | HOHO_depth_dct_efficienthc_TransEn1_hardnet | 67 | 0.1482 | 0.2761 | 0.4968 | 0.0857 | 0.0494 | 0.8830 | 0.9547 | 0.9797 |
116 | 
117 | 
118 | <br/>
119 | 
120 | ### `Matterport3D` x `depth` (our new stitching and setting)
121 | **TODO**
122 | 
123 | 
124 | <br/>
125 | 
126 | ### `Matterport3D` x `layout` (LayoutNetv2's setting)
127 | Assume pretrained weights located at:
128 | - `ckpt/mp3d_layout_HOHO_layout_aug_efficienthc_Transen1_resnet34/ep300.pth`
129 | 
130 | Run to predict layout and store the results in txt files:
131 | ```
132 | python test_layout.py --cfg config/mp3d_layout/HOHO_layout_aug_efficienthc_Transen1_resnet34.yaml --img_glob "data/mp3d_layout/test/img/*" --output_dir output/mp3d_layout/HOHO_layout_aug_efficienthc_Transen1_resnet34/
133 | ```
134 | 
135 | Run to evaluate the prediction:
136 | ```
137 | python eval_layout.py --gt_glob "data/mp3d_layout/test/label_cor/*" --dt_glob "output/mp3d_layout/HOHO_layout_aug_efficienthc_Transen1_resnet34/*"
138 | ```
139 | 
140 | Results:
141 | | Exp | fps | 2DIoU | 3DIoU | RMSE | delta_1 |
142 | | :-- | :-- | :---- | :---- | :--- | :------ |
143 | | HOHO_layout_aug_efficienthc_Transen1_resnet34 | 111 | 82.32 | 79.88 | 0.22 | 0.95 |
144 | 
145 | **[Note]** our implementation for the depth-based evaluation (i.e., RMSE, delta_1) is very different from LayoutNetv2's so the results from the two repo is not direct comparable.
146 | 
147 | 
148 | <br/>
149 | 
150 | ### `Stanford2d3d` x `depth` (BiFuse's setting)
151 | Assume pretrained weights located at:
152 | - `ckpt/s2d3d_depth_HOHO_depth_dct_efficienthc_TransEn1/ep60.pth`
153 | 
154 | Run:
155 | ```
156 | python test_depth.py --cfg config/s2d3d_depth/HOHO_depth_dct_efficienthc_TransEn1.yaml
157 | ```
158 | 
159 | Results:
160 | | Exp | fps | mre | mae | rmse | rmse_log | log10 | delta_1 | delta_3 | delta_3 |
161 | | :-- | :-- | :-- | :-- | :--- | :------- | :---- | :------ | :------ | :------ |
162 | | HOHO_depth_dct_efficienthc_TransEn1 | 52 | 0.1014 | 0.2027 | 0.3834 | 0.0668 | 0.0438 | 0.9054 | 0.9693 | 0.9886 |
163 | 
164 | 
165 | <br/>
166 | 
167 | ### `Stanford2d3d` x `depth` (GeoReg360's setting)
168 | Assume pretrained weights located at:
169 | - `ckpt/s2d3d_depth_HOHO_depthS_dct_efficienthc_TransEn1/ep60.pth`
170 | - `ckpt/s2d3d_depth_HOHO_depthS_SGD_dct_efficienthc_TransEn1/ep60.pth`
171 | 
172 | Run:
173 | ```
174 | python test_depth.py --cfg config/s2d3d_depth/HOHO_depthS_SGD_dct_efficienthc_TransEn1.yaml --clip 100
175 | python test_depth.py --cfg config/s2d3d_depth/HOHO_depthS_dct_efficienthc_TransEn1.yaml --clip 100
176 | ```
177 | 
178 | **[Note]** remember to add `--clip 100` to disable depth clip for a fair comparison with GeoReg360's setting.
179 | 
180 | Results:
181 | | Exp | fps | mre | mae | rmse | rmse_log | log10 | delta_1 | delta_3 | delta_3 |
182 | | :-- | :-- | :-- | :-- | :--- | :------- | :---- | :------ | :------ | :------ |
183 | | HOHO_depthS_SGD_dct_efficienthc_TransEn1 | 106 | 0.1114 | 0.2197 | 0.4083 | 0.0737 | 0.0502 | 0.8671 | 0.9694 | 0.9916 |
184 | | HOHO_depthS_dct_efficienthc_TransEn1 | 104 | 0.1040 | 0.2134 | 0.3940 | 0.0678 | 0.0475 | 0.8955 | 0.9749 | 0.9933 |
185 | 
186 | 
187 | <br/>
188 | 
189 | ### `Stanford2d3d` x `semantic segmentation`
190 | Run:
191 | ```
192 | python test_sem.py --cfg config/s2d3d_sem/HOHO_depth_dct_efficienthc_TransEn1_h64_fold1_simple.yaml
193 | python test_sem.py --cfg config/s2d3d_sem/HOHO_depth_dct_efficienthc_TransEn1_h64_fold2_simple.yaml
194 | python test_sem.py --cfg config/s2d3d_sem/HOHO_depth_dct_efficienthc_TransEn1_h64_fold3_simple.yaml
195 | 
196 | python test_sem.py --cfg config/s2d3d_sem/HOHO_depth_dct_efficienthc_TransEn1_h256_fold1_simple.yaml
197 | python test_sem.py --cfg config/s2d3d_sem/HOHO_depth_dct_efficienthc_TransEn1_h256_fold2_simple.yaml
198 | python test_sem.py --cfg config/s2d3d_sem/HOHO_depth_dct_efficienthc_TransEn1_h256_fold3_simple.yaml
199 | 
200 | python test_sem.py --cfg config/s2d3d_sem/HOHO_depth_dct_efficienthc_TransEn1_h1024_fold1_resnet101rgb.yaml
201 | python test_sem.py --cfg config/s2d3d_sem/HOHO_depth_dct_efficienthc_TransEn1_h1024_fold2_resnet101rgb.yaml
202 | python test_sem.py --cfg config/s2d3d_sem/HOHO_depth_dct_efficienthc_TransEn1_h1024_fold3_resnet101rgb.yaml
203 | 
204 | python test_sem.py --cfg config/s2d3d_sem/HOHO_depth_dct_efficienthc_TransEn1_h1024_fold1_resnet101.yaml
205 | python test_sem.py --cfg config/s2d3d_sem/HOHO_depth_dct_efficienthc_TransEn1_h1024_fold2_resnet101.yaml
206 | python test_sem.py --cfg config/s2d3d_sem/HOHO_depth_dct_efficienthc_TransEn1_h1024_fold3_resnet101.yaml
207 | ```
208 | 
209 | Results:
210 | | Exp | fps | iou | acc |
211 | | :-- | :-- | :-- | :-- |
212 | | HOHO_depth_dct_efficienthc_TransEn1_h64_fold1_simple | 202 | 43.04 | 53.06 |
213 | | HOHO_depth_dct_efficienthc_TransEn1_h64_fold2_simple | 204 | 36.27 | 48.45 |
214 | | HOHO_depth_dct_efficienthc_TransEn1_h64_fold3_simple | 202 | 43.14 | 54.81 |
215 | 
216 | | Exp | fps | iou | acc |
217 | | :-- | :-- | :-- | :-- |
218 | | HOHO_depth_dct_efficienthc_TransEn1_h256_fold1_simple | 135 | 46.49 | 56.33 |
219 | | HOHO_depth_dct_efficienthc_TransEn1_h256_fold2_simple | 135 | 37.18 | 48.60 |
220 | | HOHO_depth_dct_efficienthc_TransEn1_h256_fold3_simple | 135 | 46.09 | 56.81 |
221 | 
222 | | Exp | fps | iou | acc |
223 | | :-- | :-- | :-- | :-- |
224 | | HOHO_depth_dct_efficienthc_TransEn1_h1024_fold1_resnet101rgb | 10 | 53.94 | 64.30 |
225 | | HOHO_depth_dct_efficienthc_TransEn1_h1024_fold2_resnet101rgb | 10 | 45.03 | 61.70 |
226 | | HOHO_depth_dct_efficienthc_TransEn1_h1024_fold3_resnet101rgb | 10 | 56.87 | 68.94 |
227 | 
228 | | Exp | fps | iou | acc |
229 | | :-- | :-- | :-- | :-- |
230 | | HOHO_depth_dct_efficienthc_TransEn1_h1024_fold1_resnet101 | 10 | 59.05 | 68.91 |
231 | | HOHO_depth_dct_efficienthc_TransEn1_h1024_fold2_resnet101 | 10 | 49.70 | 65.86 |
232 | | HOHO_depth_dct_efficienthc_TransEn1_h1024_fold3_resnet101 | 10 | 60.28 | 71.85 |
233 | 


--------------------------------------------------------------------------------
/assets/label13_weight.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sunset1995/HoHoNet/2bbc0866789cf7ad728064bc52aaf1d11b67c885/assets/label13_weight.pth


--------------------------------------------------------------------------------
/assets/pano_asmasuxybohhcj.depth.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sunset1995/HoHoNet/2bbc0866789cf7ad728064bc52aaf1d11b67c885/assets/pano_asmasuxybohhcj.depth.png


--------------------------------------------------------------------------------
/assets/pano_asmasuxybohhcj.layout.txt:
--------------------------------------------------------------------------------
 1 | 83.7 161.1
 2 | 83.7 332.7
 3 | 126.6 133.3
 4 | 126.6 358.2
 5 | 181.2 170.3
 6 | 181.2 324.7
 7 | 354.4 176.4
 8 | 354.4 319.4
 9 | 609.0 149.1
10 | 609.0 343.6
11 | 941.1 160.6
12 | 941.1 333.1
13 | 


--------------------------------------------------------------------------------
/assets/pano_asmasuxybohhcj.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sunset1995/HoHoNet/2bbc0866789cf7ad728064bc52aaf1d11b67c885/assets/pano_asmasuxybohhcj.png


--------------------------------------------------------------------------------
/assets/repo_teaser.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sunset1995/HoHoNet/2bbc0866789cf7ad728064bc52aaf1d11b67c885/assets/repo_teaser.jpg


--------------------------------------------------------------------------------
/assets/snapshot_depth.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sunset1995/HoHoNet/2bbc0866789cf7ad728064bc52aaf1d11b67c885/assets/snapshot_depth.jpg


--------------------------------------------------------------------------------
/assets/snapshot_layout.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sunset1995/HoHoNet/2bbc0866789cf7ad728064bc52aaf1d11b67c885/assets/snapshot_layout.jpg


--------------------------------------------------------------------------------
/config/mp3d_depth/HOHO_depth_dct_efficienthc_TransEn1.yaml:
--------------------------------------------------------------------------------
 1 | ckpt_root: ckpt
 2 | cuda: True
 3 | num_workers: 8
 4 | 
 5 | dataset:
 6 |     name: CorruptMP3dDepthDataset
 7 |     common_kwargs:
 8 |         root: data/mp3d_align
 9 |         hw: (512, 1024)
10 |     train_kwargs:
11 |         scene_txt: data/matterport3d/scenes_train.txt
12 |         rand_rotate: True
13 |         rand_flip: True
14 |         rand_gamma: True
15 |     valid_kwargs:
16 |         scene_txt: data/matterport3d/mp3d_scenes_test.txt
17 |         rand_rotate: False
18 |         rand_flip: False
19 |         rand_gamma: False
20 | 
21 | training:
22 |     epoch: 60
23 |     batch_size: 4
24 |     save_every: 60
25 |     optim_lr: 0.0001
26 |     optim_poly_gamma: 0.9
27 |     optim_betas: (0.9, 0.999)
28 | 
29 | model:
30 |     file: lib.model.hohonet
31 |     modelclass: HoHoNet
32 |     kwargs:
33 |         emb_dim: 256
34 |         backbone_config:
35 |             module: Resnet
36 |             kwargs:
37 |                 backbone: resnet50
38 |         decode_config:
39 |             module: EfficientHeightReduction
40 |         refine_config:
41 |             module: TransEn
42 |             kwargs:
43 |                 position_encode: 256
44 |                 num_layers: 1
45 |         modalities_config:
46 |             DepthEstimator:
47 |                 basis: dct
48 |                 n_components: 64
49 |                 loss: l1
50 | 


--------------------------------------------------------------------------------
/config/mp3d_depth/HOHO_depth_dct_efficienthc_TransEn1_hardnet.yaml:
--------------------------------------------------------------------------------
 1 | ckpt_root: ckpt
 2 | cuda: True
 3 | num_workers: 8
 4 | 
 5 | dataset:
 6 |     name: CorruptMP3dDepthDataset
 7 |     common_kwargs:
 8 |         root: data/mp3d_align
 9 |         hw: (512, 1024)
10 |     train_kwargs:
11 |         scene_txt: data/matterport3d/scenes_train.txt
12 |         rand_rotate: True
13 |         rand_flip: True
14 |         rand_gamma: True
15 |     valid_kwargs:
16 |         scene_txt: data/matterport3d/mp3d_scenes_test.txt
17 |         rand_rotate: False
18 |         rand_flip: False
19 |         rand_gamma: False
20 | 
21 | training:
22 |     epoch: 60
23 |     batch_size: 4
24 |     save_every: 60
25 |     optim_lr: 0.0001
26 |     optim_poly_gamma: 0.9
27 |     optim_betas: (0.9, 0.999)
28 | 
29 | model:
30 |     file: lib.model.hohonet
31 |     modelclass: HoHoNet
32 |     kwargs:
33 |         emb_dim: 256
34 |         backbone_config:
35 |             module: HarDNet
36 |             kwargs:
37 |                 depth_wise: False
38 |                 arch: 68
39 |                 pretrained: True
40 |         decode_config:
41 |             module: EfficientHeightReduction
42 |         refine_config:
43 |             module: TransEn
44 |             kwargs:
45 |                 position_encode: 256
46 |                 num_layers: 1
47 |         modalities_config:
48 |             DepthEstimator:
49 |                 basis: dct
50 |                 n_components: 64
51 |                 loss: l1
52 | 


--------------------------------------------------------------------------------
/config/mp3d_depth/ablation/tuning___HOHO_depth_dct128_efficienthc_TransEn1.yaml:
--------------------------------------------------------------------------------
 1 | ckpt_root: ckpt
 2 | cuda: True
 3 | num_workers: 8
 4 | 
 5 | dataset:
 6 |     name: CorruptMP3dDepthDataset
 7 |     common_kwargs:
 8 |         root: data/mp3d_align
 9 |         hw: (512, 1024)
10 |     train_kwargs:
11 |         scene_txt: data/matterport3d/scenes_abla_train.txt
12 |         rand_rotate: True
13 |         rand_flip: True
14 |         rand_gamma: True
15 |     valid_kwargs:
16 |         scene_txt: data/matterport3d/scenes_abla_valid.txt
17 |         rand_rotate: False
18 |         rand_flip: False
19 |         rand_gamma: False
20 | 
21 | training:
22 |     epoch: 40
23 |     batch_size: 4
24 |     save_every: 40
25 |     optim_lr: 0.0001
26 |     optim_poly_gamma: 0.9
27 |     optim_betas: (0.9, 0.999)
28 | 
29 | model:
30 |     file: lib.model.hohonet
31 |     modelclass: HoHoNet
32 |     kwargs:
33 |         emb_dim: 256
34 |         backbone_config:
35 |             module: Resnet
36 |             kwargs:
37 |                 backbone: resnet50
38 |         decode_config:
39 |             module: EfficientHeightReduction
40 |         refine_config:
41 |             module: TransEn
42 |             kwargs:
43 |                 position_encode: 256
44 |                 num_layers: 1
45 |         modalities_config:
46 |             DepthEstimator:
47 |                 basis: dct
48 |                 n_components: 128
49 |                 loss: l1
50 | 
51 | 


--------------------------------------------------------------------------------
/config/mp3d_depth/ablation/tuning___HOHO_depth_dct256_efficienthc_TransEn1.yaml:
--------------------------------------------------------------------------------
 1 | ckpt_root: ckpt
 2 | cuda: True
 3 | num_workers: 8
 4 | 
 5 | dataset:
 6 |     name: CorruptMP3dDepthDataset
 7 |     common_kwargs:
 8 |         root: data/mp3d_align
 9 |         hw: (512, 1024)
10 |     train_kwargs:
11 |         scene_txt: data/matterport3d/scenes_abla_train.txt
12 |         rand_rotate: True
13 |         rand_flip: True
14 |         rand_gamma: True
15 |     valid_kwargs:
16 |         scene_txt: data/matterport3d/scenes_abla_valid.txt
17 |         rand_rotate: False
18 |         rand_flip: False
19 |         rand_gamma: False
20 | 
21 | training:
22 |     epoch: 40
23 |     batch_size: 4
24 |     save_every: 40
25 |     optim_lr: 0.0001
26 |     optim_poly_gamma: 0.9
27 |     optim_betas: (0.9, 0.999)
28 | 
29 | model:
30 |     file: lib.model.hohonet
31 |     modelclass: HoHoNet
32 |     kwargs:
33 |         emb_dim: 256
34 |         backbone_config:
35 |             module: Resnet
36 |             kwargs:
37 |                 backbone: resnet50
38 |         decode_config:
39 |             module: EfficientHeightReduction
40 |         refine_config:
41 |             module: TransEn
42 |             kwargs:
43 |                 position_encode: 256
44 |                 num_layers: 1
45 |         modalities_config:
46 |             DepthEstimator:
47 |                 basis: dct
48 |                 n_components: 256
49 |                 loss: l1
50 | 
51 | 


--------------------------------------------------------------------------------
/config/mp3d_depth/ablation/tuning___HOHO_depth_dct32_efficienthc_TransEn1.yaml:
--------------------------------------------------------------------------------
 1 | ckpt_root: ckpt
 2 | cuda: True
 3 | num_workers: 8
 4 | 
 5 | dataset:
 6 |     name: CorruptMP3dDepthDataset
 7 |     common_kwargs:
 8 |         root: data/mp3d_align
 9 |         hw: (512, 1024)
10 |     train_kwargs:
11 |         scene_txt: data/matterport3d/scenes_abla_train.txt
12 |         rand_rotate: True
13 |         rand_flip: True
14 |         rand_gamma: True
15 |     valid_kwargs:
16 |         scene_txt: data/matterport3d/scenes_abla_valid.txt
17 |         rand_rotate: False
18 |         rand_flip: False
19 |         rand_gamma: False
20 | 
21 | training:
22 |     epoch: 40
23 |     batch_size: 4
24 |     save_every: 40
25 |     optim_lr: 0.0001
26 |     optim_poly_gamma: 0.9
27 |     optim_betas: (0.9, 0.999)
28 | 
29 | model:
30 |     file: lib.model.hohonet
31 |     modelclass: HoHoNet
32 |     kwargs:
33 |         emb_dim: 256
34 |         backbone_config:
35 |             module: Resnet
36 |             kwargs:
37 |                 backbone: resnet50
38 |         decode_config:
39 |             module: EfficientHeightReduction
40 |         refine_config:
41 |             module: TransEn
42 |             kwargs:
43 |                 position_encode: 256
44 |                 num_layers: 1
45 |         modalities_config:
46 |             DepthEstimator:
47 |                 basis: dct
48 |                 n_components: 32
49 |                 loss: l1
50 | 
51 | 


--------------------------------------------------------------------------------
/config/mp3d_depth/ablation/tuning___HOHO_depth_dct512_efficienthc_TransEn1.yaml:
--------------------------------------------------------------------------------
 1 | ckpt_root: ckpt
 2 | cuda: True
 3 | num_workers: 8
 4 | 
 5 | dataset:
 6 |     name: CorruptMP3dDepthDataset
 7 |     common_kwargs:
 8 |         root: data/mp3d_align
 9 |         hw: (512, 1024)
10 |     train_kwargs:
11 |         scene_txt: data/matterport3d/scenes_abla_train.txt
12 |         rand_rotate: True
13 |         rand_flip: True
14 |         rand_gamma: True
15 |     valid_kwargs:
16 |         scene_txt: data/matterport3d/scenes_abla_valid.txt
17 |         rand_rotate: False
18 |         rand_flip: False
19 |         rand_gamma: False
20 | 
21 | training:
22 |     epoch: 40
23 |     batch_size: 4
24 |     save_every: 40
25 |     optim_lr: 0.0001
26 |     optim_poly_gamma: 0.9
27 |     optim_betas: (0.9, 0.999)
28 | 
29 | model:
30 |     file: lib.model.hohonet
31 |     modelclass: HoHoNet
32 |     kwargs:
33 |         emb_dim: 256
34 |         backbone_config:
35 |             module: Resnet
36 |             kwargs:
37 |                 backbone: resnet50
38 |         decode_config:
39 |             module: EfficientHeightReduction
40 |         refine_config:
41 |             module: TransEn
42 |             kwargs:
43 |                 position_encode: 256
44 |                 num_layers: 1
45 |         modalities_config:
46 |             DepthEstimator:
47 |                 basis: dct
48 |                 n_components: 512
49 |                 loss: l1
50 | 
51 | 


--------------------------------------------------------------------------------
/config/mp3d_depth/ablation/tuning___HOHO_depth_dct_LSTM.yaml:
--------------------------------------------------------------------------------
 1 | ckpt_root: ckpt
 2 | cuda: True
 3 | num_workers: 8
 4 | 
 5 | dataset:
 6 |     name: CorruptMP3dDepthDataset
 7 |     common_kwargs:
 8 |         root: data/mp3d_align
 9 |         hw: (512, 1024)
10 |     train_kwargs:
11 |         scene_txt: data/matterport3d/scenes_abla_train.txt
12 |         rand_rotate: True
13 |         rand_flip: True
14 |         rand_gamma: True
15 |     valid_kwargs:
16 |         scene_txt: data/matterport3d/scenes_abla_valid.txt
17 |         rand_rotate: False
18 |         rand_flip: False
19 |         rand_gamma: False
20 | 
21 | training:
22 |     epoch: 40
23 |     batch_size: 4
24 |     save_every: 40
25 |     optim_lr: 0.0001
26 |     optim_poly_gamma: 0.9
27 |     optim_betas: (0.9, 0.999)
28 | 
29 | model:
30 |     file: lib.model.hohonet
31 |     modelclass: HoHoNet
32 |     kwargs:
33 |         emb_dim: 256
34 |         backbone_config:
35 |             module: Resnet
36 |             kwargs:
37 |                 backbone: resnet50
38 |         decode_config:
39 |             module: GlobalHeightStage
40 |         refine_config:
41 |             module: LSTM
42 |         modalities_config:
43 |             DepthEstimator:
44 |                 basis: dct
45 |                 n_components: 64
46 |                 loss: l1
47 | 
48 | 


--------------------------------------------------------------------------------
/config/mp3d_depth/ablation/tuning___HOHO_depth_dct_Linear.yaml:
--------------------------------------------------------------------------------
 1 | ckpt_root: ckpt
 2 | cuda: True
 3 | num_workers: 8
 4 | 
 5 | dataset:
 6 |     name: CorruptMP3dDepthDataset
 7 |     common_kwargs:
 8 |         root: data/mp3d_align
 9 |         hw: (512, 1024)
10 |     train_kwargs:
11 |         scene_txt: data/matterport3d/scenes_abla_train.txt
12 |         rand_rotate: True
13 |         rand_flip: True
14 |         rand_gamma: True
15 |     valid_kwargs:
16 |         scene_txt: data/matterport3d/scenes_abla_valid.txt
17 |         rand_rotate: False
18 |         rand_flip: False
19 |         rand_gamma: False
20 | 
21 | training:
22 |     epoch: 40
23 |     batch_size: 4
24 |     save_every: 40
25 |     optim_lr: 0.0001
26 |     optim_poly_gamma: 0.9
27 |     optim_betas: (0.9, 0.999)
28 | 
29 | model:
30 |     file: lib.model.hohonet
31 |     modelclass: HoHoNet
32 |     kwargs:
33 |         emb_dim: 256
34 |         backbone_config:
35 |             module: Resnet
36 |             kwargs:
37 |                 backbone: resnet50
38 |         decode_config:
39 |             module: GlobalHeightStage
40 |         refine_config:
41 |             module: Linear
42 |         modalities_config:
43 |             DepthEstimator:
44 |                 basis: dct
45 |                 n_components: 64
46 |                 loss: l1
47 | 
48 | 


--------------------------------------------------------------------------------
/config/mp3d_depth/ablation/tuning___HOHO_depth_dct_TransEn1.yaml:
--------------------------------------------------------------------------------
 1 | ckpt_root: ckpt
 2 | cuda: True
 3 | num_workers: 8
 4 | 
 5 | dataset:
 6 |     name: CorruptMP3dDepthDataset
 7 |     common_kwargs:
 8 |         root: data/mp3d_align
 9 |         hw: (512, 1024)
10 |     train_kwargs:
11 |         scene_txt: data/matterport3d/scenes_abla_train.txt
12 |         rand_rotate: True
13 |         rand_flip: True
14 |         rand_gamma: True
15 |     valid_kwargs:
16 |         scene_txt: data/matterport3d/scenes_abla_valid.txt
17 |         rand_rotate: False
18 |         rand_flip: False
19 |         rand_gamma: False
20 | 
21 | training:
22 |     epoch: 40
23 |     batch_size: 4
24 |     save_every: 40
25 |     optim_lr: 0.0001
26 |     optim_poly_gamma: 0.9
27 |     optim_betas: (0.9, 0.999)
28 | 
29 | model:
30 |     file: lib.model.hohonet
31 |     modelclass: HoHoNet
32 |     kwargs:
33 |         emb_dim: 256
34 |         backbone_config:
35 |             module: Resnet
36 |             kwargs:
37 |                 backbone: resnet50
38 |         decode_config:
39 |             module: GlobalHeightStage
40 |         refine_config:
41 |             module: TransEn
42 |             kwargs:
43 |                 position_encode: 256
44 |                 num_layers: 1
45 |         modalities_config:
46 |             DepthEstimator:
47 |                 basis: dct
48 |                 n_components: 64
49 |                 loss: l1
50 | 
51 | 


--------------------------------------------------------------------------------
/config/mp3d_depth/ablation/tuning___HOHO_depth_dct_efficienthc_LSTM.yaml:
--------------------------------------------------------------------------------
 1 | ckpt_root: ckpt
 2 | cuda: True
 3 | num_workers: 8
 4 | 
 5 | dataset:
 6 |     name: CorruptMP3dDepthDataset
 7 |     common_kwargs:
 8 |         root: data/mp3d_align
 9 |         hw: (512, 1024)
10 |     train_kwargs:
11 |         scene_txt: data/matterport3d/scenes_abla_train.txt
12 |         rand_rotate: True
13 |         rand_flip: True
14 |         rand_gamma: True
15 |     valid_kwargs:
16 |         scene_txt: data/matterport3d/scenes_abla_valid.txt
17 |         rand_rotate: False
18 |         rand_flip: False
19 |         rand_gamma: False
20 | 
21 | training:
22 |     epoch: 40
23 |     batch_size: 4
24 |     save_every: 40
25 |     optim_lr: 0.0001
26 |     optim_poly_gamma: 0.9
27 |     optim_betas: (0.9, 0.999)
28 | 
29 | model:
30 |     file: lib.model.hohonet
31 |     modelclass: HoHoNet
32 |     kwargs:
33 |         emb_dim: 256
34 |         backbone_config:
35 |             module: Resnet
36 |             kwargs:
37 |                 backbone: resnet50
38 |         decode_config:
39 |             module: EfficientHeightReduction
40 |         refine_config:
41 |             module: LSTM
42 |         modalities_config:
43 |             DepthEstimator:
44 |                 basis: dct
45 |                 n_components: 64
46 |                 loss: l1
47 | 


--------------------------------------------------------------------------------
/config/mp3d_depth/ablation/tuning___HOHO_depth_dct_efficienthc_Linear.yaml:
--------------------------------------------------------------------------------
 1 | ckpt_root: ckpt
 2 | cuda: True
 3 | num_workers: 8
 4 | 
 5 | dataset:
 6 |     name: CorruptMP3dDepthDataset
 7 |     common_kwargs:
 8 |         root: data/mp3d_align
 9 |         hw: (512, 1024)
10 |     train_kwargs:
11 |         scene_txt: data/matterport3d/scenes_abla_train.txt
12 |         rand_rotate: True
13 |         rand_flip: True
14 |         rand_gamma: True
15 |     valid_kwargs:
16 |         scene_txt: data/matterport3d/scenes_abla_valid.txt
17 |         rand_rotate: False
18 |         rand_flip: False
19 |         rand_gamma: False
20 | 
21 | training:
22 |     epoch: 40
23 |     batch_size: 4
24 |     save_every: 40
25 |     optim_lr: 0.0001
26 |     optim_poly_gamma: 0.9
27 |     optim_betas: (0.9, 0.999)
28 | 
29 | model:
30 |     file: lib.model.hohonet
31 |     modelclass: HoHoNet
32 |     kwargs:
33 |         emb_dim: 256
34 |         backbone_config:
35 |             module: Resnet
36 |             kwargs:
37 |                 backbone: resnet50
38 |         decode_config:
39 |             module: EfficientHeightReduction
40 |         refine_config:
41 |             module: Linear
42 |         modalities_config:
43 |             DepthEstimator:
44 |                 basis: dct
45 |                 n_components: 64
46 |                 loss: l1
47 | 
48 | 


--------------------------------------------------------------------------------
/config/mp3d_depth/ablation/tuning___HOHO_depth_dct_efficienthc_TransEn1.yaml:
--------------------------------------------------------------------------------
 1 | ckpt_root: ckpt
 2 | cuda: True
 3 | num_workers: 8
 4 | 
 5 | dataset:
 6 |     name: CorruptMP3dDepthDataset
 7 |     common_kwargs:
 8 |         root: data/mp3d_align
 9 |         hw: (512, 1024)
10 |     train_kwargs:
11 |         scene_txt: data/matterport3d/scenes_abla_train.txt
12 |         rand_rotate: True
13 |         rand_flip: True
14 |         rand_gamma: True
15 |     valid_kwargs:
16 |         scene_txt: data/matterport3d/scenes_abla_valid.txt
17 |         rand_rotate: False
18 |         rand_flip: False
19 |         rand_gamma: False
20 | 
21 | training:
22 |     epoch: 40
23 |     batch_size: 4
24 |     save_every: 40
25 |     optim_lr: 0.0001
26 |     optim_poly_gamma: 0.9
27 |     optim_betas: (0.9, 0.999)
28 | 
29 | model:
30 |     file: lib.model.hohonet
31 |     modelclass: HoHoNet
32 |     kwargs:
33 |         emb_dim: 256
34 |         backbone_config:
35 |             module: Resnet
36 |             kwargs:
37 |                 backbone: resnet50
38 |         decode_config:
39 |             module: EfficientHeightReduction
40 |         refine_config:
41 |             module: TransEn
42 |             kwargs:
43 |                 position_encode: 256
44 |                 num_layers: 1
45 |         modalities_config:
46 |             DepthEstimator:
47 |                 basis: dct
48 |                 n_components: 64
49 |                 loss: l1
50 | 


--------------------------------------------------------------------------------
/config/mp3d_depth/ablation/tuning___HOHO_depth_dct_efficienthc_TransEn1_resnet34.yaml:
--------------------------------------------------------------------------------
 1 | ckpt_root: ckpt
 2 | cuda: True
 3 | num_workers: 8
 4 | 
 5 | dataset:
 6 |     name: CorruptMP3dDepthDataset
 7 |     common_kwargs:
 8 |         root: data/mp3d_align
 9 |         hw: (512, 1024)
10 |     train_kwargs:
11 |         scene_txt: data/matterport3d/scenes_abla_train.txt
12 |         rand_rotate: True
13 |         rand_flip: True
14 |         rand_gamma: True
15 |     valid_kwargs:
16 |         scene_txt: data/matterport3d/scenes_abla_valid.txt
17 |         rand_rotate: False
18 |         rand_flip: False
19 |         rand_gamma: False
20 | 
21 | training:
22 |     epoch: 40
23 |     batch_size: 4
24 |     save_every: 40
25 |     optim_lr: 0.0001
26 |     optim_poly_gamma: 0.9
27 |     optim_betas: (0.9, 0.999)
28 | 
29 | model:
30 |     file: lib.model.hohonet
31 |     modelclass: HoHoNet
32 |     kwargs:
33 |         emb_dim: 256
34 |         backbone_config:
35 |             module: Resnet
36 |             kwargs:
37 |                 backbone: resnet34
38 |         decode_config:
39 |             module: EfficientHeightReduction
40 |         refine_config:
41 |             module: TransEn
42 |             kwargs:
43 |                 position_encode: 256
44 |                 num_layers: 1
45 |         modalities_config:
46 |             DepthEstimator:
47 |                 basis: dct
48 |                 n_components: 64
49 |                 loss: l1
50 | 


--------------------------------------------------------------------------------
/config/mp3d_depth/ablation/tuning___HOHO_depth_lin128_efficienthc_TransEn1.yaml:
--------------------------------------------------------------------------------
 1 | ckpt_root: ckpt
 2 | cuda: True
 3 | num_workers: 8
 4 | 
 5 | dataset:
 6 |     name: CorruptMP3dDepthDataset
 7 |     common_kwargs:
 8 |         root: data/mp3d_align
 9 |         hw: (512, 1024)
10 |     train_kwargs:
11 |         scene_txt: data/matterport3d/scenes_abla_train.txt
12 |         rand_rotate: True
13 |         rand_flip: True
14 |         rand_gamma: True
15 |     valid_kwargs:
16 |         scene_txt: data/matterport3d/scenes_abla_valid.txt
17 |         rand_rotate: False
18 |         rand_flip: False
19 |         rand_gamma: False
20 | 
21 | training:
22 |     epoch: 40
23 |     batch_size: 4
24 |     save_every: 40
25 |     optim_lr: 0.0001
26 |     optim_poly_gamma: 0.9
27 |     optim_betas: (0.9, 0.999)
28 | 
29 | model:
30 |     file: lib.model.hohonet
31 |     modelclass: HoHoNet
32 |     kwargs:
33 |         emb_dim: 256
34 |         backbone_config:
35 |             module: Resnet
36 |             kwargs:
37 |                 backbone: resnet50
38 |         decode_config:
39 |             module: EfficientHeightReduction
40 |         refine_config:
41 |             module: TransEn
42 |             kwargs:
43 |                 position_encode: 256
44 |                 num_layers: 1
45 |         modalities_config:
46 |             DepthEstimator:
47 |                 basis: linear
48 |                 n_components: 128
49 |                 loss: l1
50 | 
51 | 


--------------------------------------------------------------------------------
/config/mp3d_depth/ablation/tuning___HOHO_depth_lin256_efficienthc_TransEn1.yaml:
--------------------------------------------------------------------------------
 1 | ckpt_root: ckpt
 2 | cuda: True
 3 | num_workers: 8
 4 | 
 5 | dataset:
 6 |     name: CorruptMP3dDepthDataset
 7 |     common_kwargs:
 8 |         root: data/mp3d_align
 9 |         hw: (512, 1024)
10 |     train_kwargs:
11 |         scene_txt: data/matterport3d/scenes_abla_train.txt
12 |         rand_rotate: True
13 |         rand_flip: True
14 |         rand_gamma: True
15 |     valid_kwargs:
16 |         scene_txt: data/matterport3d/scenes_abla_valid.txt
17 |         rand_rotate: False
18 |         rand_flip: False
19 |         rand_gamma: False
20 | 
21 | training:
22 |     epoch: 40
23 |     batch_size: 4
24 |     save_every: 40
25 |     optim_lr: 0.0001
26 |     optim_poly_gamma: 0.9
27 |     optim_betas: (0.9, 0.999)
28 | 
29 | model:
30 |     file: lib.model.hohonet
31 |     modelclass: HoHoNet
32 |     kwargs:
33 |         emb_dim: 256
34 |         backbone_config:
35 |             module: Resnet
36 |             kwargs:
37 |                 backbone: resnet50
38 |         decode_config:
39 |             module: EfficientHeightReduction
40 |         refine_config:
41 |             module: TransEn
42 |             kwargs:
43 |                 position_encode: 256
44 |                 num_layers: 1
45 |         modalities_config:
46 |             DepthEstimator:
47 |                 basis: linear
48 |                 n_components: 256
49 |                 loss: l1
50 | 
51 | 


--------------------------------------------------------------------------------
/config/mp3d_depth/ablation/tuning___HOHO_depth_lin32_efficienthc_TransEn1.yaml:
--------------------------------------------------------------------------------
 1 | ckpt_root: ckpt
 2 | cuda: True
 3 | num_workers: 8
 4 | 
 5 | dataset:
 6 |     name: CorruptMP3dDepthDataset
 7 |     common_kwargs:
 8 |         root: data/mp3d_align
 9 |         hw: (512, 1024)
10 |     train_kwargs:
11 |         scene_txt: data/matterport3d/scenes_abla_train.txt
12 |         rand_rotate: True
13 |         rand_flip: True
14 |         rand_gamma: True
15 |     valid_kwargs:
16 |         scene_txt: data/matterport3d/scenes_abla_valid.txt
17 |         rand_rotate: False
18 |         rand_flip: False
19 |         rand_gamma: False
20 | 
21 | training:
22 |     epoch: 40
23 |     batch_size: 4
24 |     save_every: 40
25 |     optim_lr: 0.0001
26 |     optim_poly_gamma: 0.9
27 |     optim_betas: (0.9, 0.999)
28 | 
29 | model:
30 |     file: lib.model.hohonet
31 |     modelclass: HoHoNet
32 |     kwargs:
33 |         emb_dim: 256
34 |         backbone_config:
35 |             module: Resnet
36 |             kwargs:
37 |                 backbone: resnet50
38 |         decode_config:
39 |             module: EfficientHeightReduction
40 |         refine_config:
41 |             module: TransEn
42 |             kwargs:
43 |                 position_encode: 256
44 |                 num_layers: 1
45 |         modalities_config:
46 |             DepthEstimator:
47 |                 basis: linear
48 |                 n_components: 32
49 |                 loss: l1
50 | 
51 | 


--------------------------------------------------------------------------------
/config/mp3d_depth/ablation/tuning___HOHO_depth_lin512_efficienthc_TransEn1.yaml:
--------------------------------------------------------------------------------
 1 | ckpt_root: ckpt
 2 | cuda: True
 3 | num_workers: 8
 4 | 
 5 | dataset:
 6 |     name: CorruptMP3dDepthDataset
 7 |     common_kwargs:
 8 |         root: data/mp3d_align
 9 |         hw: (512, 1024)
10 |     train_kwargs:
11 |         scene_txt: data/matterport3d/scenes_abla_train.txt
12 |         rand_rotate: True
13 |         rand_flip: True
14 |         rand_gamma: True
15 |     valid_kwargs:
16 |         scene_txt: data/matterport3d/scenes_abla_valid.txt
17 |         rand_rotate: False
18 |         rand_flip: False
19 |         rand_gamma: False
20 | 
21 | training:
22 |     epoch: 40
23 |     batch_size: 4
24 |     save_every: 40
25 |     optim_lr: 0.0001
26 |     optim_poly_gamma: 0.9
27 |     optim_betas: (0.9, 0.999)
28 | 
29 | model:
30 |     file: lib.model.hohonet
31 |     modelclass: HoHoNet
32 |     kwargs:
33 |         emb_dim: 256
34 |         backbone_config:
35 |             module: Resnet
36 |             kwargs:
37 |                 backbone: resnet50
38 |         decode_config:
39 |             module: EfficientHeightReduction
40 |         refine_config:
41 |             module: TransEn
42 |             kwargs:
43 |                 position_encode: 256
44 |                 num_layers: 1
45 |         modalities_config:
46 |             DepthEstimator:
47 |                 basis: linear
48 |                 n_components: 512
49 |                 loss: l1
50 | 
51 | 


--------------------------------------------------------------------------------
/config/mp3d_depth/ablation/tuning___HOHO_depth_lin64_efficienthc_TransEn1.yaml:
--------------------------------------------------------------------------------
 1 | ckpt_root: ckpt
 2 | cuda: True
 3 | num_workers: 8
 4 | 
 5 | dataset:
 6 |     name: CorruptMP3dDepthDataset
 7 |     common_kwargs:
 8 |         root: data/mp3d_align
 9 |         hw: (512, 1024)
10 |     train_kwargs:
11 |         scene_txt: data/matterport3d/scenes_abla_train.txt
12 |         rand_rotate: True
13 |         rand_flip: True
14 |         rand_gamma: True
15 |     valid_kwargs:
16 |         scene_txt: data/matterport3d/scenes_abla_valid.txt
17 |         rand_rotate: False
18 |         rand_flip: False
19 |         rand_gamma: False
20 | 
21 | training:
22 |     epoch: 40
23 |     batch_size: 4
24 |     save_every: 40
25 |     optim_lr: 0.0001
26 |     optim_poly_gamma: 0.9
27 |     optim_betas: (0.9, 0.999)
28 | 
29 | model:
30 |     file: lib.model.hohonet
31 |     modelclass: HoHoNet
32 |     kwargs:
33 |         emb_dim: 256
34 |         backbone_config:
35 |             module: Resnet
36 |             kwargs:
37 |                 backbone: resnet50
38 |         decode_config:
39 |             module: EfficientHeightReduction
40 |         refine_config:
41 |             module: TransEn
42 |             kwargs:
43 |                 position_encode: 256
44 |                 num_layers: 1
45 |         modalities_config:
46 |             DepthEstimator:
47 |                 basis: linear
48 |                 n_components: 64
49 |                 loss: l1
50 | 
51 | 


--------------------------------------------------------------------------------
/config/mp3d_layout/HOHO_layout_aug_efficienthc_Transen1_resnet34.yaml:
--------------------------------------------------------------------------------
 1 | ckpt_root: ckpt
 2 | cuda: True
 3 | num_workers: 8
 4 | 
 5 | dataset:
 6 |     name: PanoCorBonDataset
 7 |     train_kwargs:
 8 |         root_dir: data/mp3d_layout/train_no_occ
 9 |         flip: True
10 |         rotate: True
11 |         gamma: True
12 |         stretch: True
13 |     valid_kwargs:
14 |         root_dir: data/mp3d_layout/valid_no_occ
15 | 
16 | training:
17 |     epoch: 300
18 |     batch_size: 4
19 |     save_every: 300
20 |     optim_lr: 0.0001
21 |     optim_poly_gamma: 0.9
22 | 
23 | model:
24 |     file: lib.model.hohonet
25 |     modelclass: HoHoNet
26 |     kwargs:
27 |         emb_dim: 256
28 |         backbone_config:
29 |             module: Resnet
30 |             kwargs:
31 |                 backbone: resnet34
32 |         decode_config:
33 |             module: EfficientHeightReduction
34 |         refine_config:
35 |             module: TransEn
36 |             kwargs:
37 |                 position_encode: 256
38 |                 nhead: 8
39 |                 num_layers: 1
40 |                 dim_feedforward: 2048
41 |         modalities_config:
42 |             LayoutEstimator:
43 |                 cor_weight: 1.
44 |                 bon_weight: 1.
45 |                 last_bias: False
46 |                 last_ks: 1
47 | 


--------------------------------------------------------------------------------
/config/s2d3d_depth/HOHO_depthS_SGD_dct_efficienthc_TransEn1.yaml:
--------------------------------------------------------------------------------
 1 | ckpt_root: ckpt
 2 | cuda: True
 3 | num_workers: 8
 4 | 
 5 | dataset:
 6 |     name: S2d3dDepthDataset
 7 |     common_kwargs:
 8 |         root: data/stanford2D3D
 9 |         hw: (256, 512)
10 |         dmax: 100.
11 |     train_kwargs:
12 |         scene_txt: data/stanford2D3D/small_train.txt
13 |         rand_rotate: True
14 |         rand_flip: True
15 |         rand_gamma: True
16 |     valid_kwargs:
17 |         scene_txt: data/stanford2D3D/small_test.txt
18 |         rand_rotate: False
19 |         rand_flip: False
20 |         rand_gamma: False
21 | 
22 | training:
23 |     optim: SGD
24 |     epoch: 60
25 |     batch_size: 8
26 |     save_every: 60
27 |     optim_lr: 0.01
28 |     weight_decay: 0.0005
29 |     optim_poly_gamma: 0.9
30 |     optim_betas: (0.9, 0.999)
31 | 
32 | model:
33 |     file: lib.model.hohonet
34 |     modelclass: HoHoNet
35 |     kwargs:
36 |         emb_dim: 256
37 |         backbone_config:
38 |             module: Resnet
39 |             kwargs:
40 |                 backbone: resnet50
41 |                 input_height: 256
42 |         decode_config:
43 |             module: EfficientHeightReduction
44 |         refine_config:
45 |             module: TransEn
46 |             kwargs:
47 |                 position_encode: 128
48 |                 num_layers: 1
49 |         modalities_config:
50 |             DepthEstimator:
51 |                 basis: dct
52 |                 n_components: 64
53 |                 loss: l1
54 |                 output_height: 256
55 | 
56 | 


--------------------------------------------------------------------------------
/config/s2d3d_depth/HOHO_depthS_dct_efficienthc_TransEn1.yaml:
--------------------------------------------------------------------------------
 1 | ckpt_root: ckpt
 2 | cuda: True
 3 | num_workers: 8
 4 | 
 5 | dataset:
 6 |     name: S2d3dDepthDataset
 7 |     common_kwargs:
 8 |         root: data/stanford2D3D
 9 |         hw: (256, 512)
10 |         dmax: 100.
11 |     train_kwargs:
12 |         scene_txt: data/stanford2D3D/small_train.txt
13 |         rand_rotate: True
14 |         rand_flip: True
15 |         rand_gamma: True
16 |     valid_kwargs:
17 |         scene_txt: data/stanford2D3D/small_test.txt
18 |         rand_rotate: False
19 |         rand_flip: False
20 |         rand_gamma: False
21 | 
22 | training:
23 |     epoch: 60
24 |     batch_size: 4
25 |     save_every: 60
26 |     optim_lr: 0.0001
27 |     optim_poly_gamma: 0.9
28 |     optim_betas: (0.9, 0.999)
29 | 
30 | model:
31 |     file: lib.model.hohonet
32 |     modelclass: HoHoNet
33 |     kwargs:
34 |         emb_dim: 256
35 |         backbone_config:
36 |             module: Resnet
37 |             kwargs:
38 |                 backbone: resnet50
39 |                 input_height: 256
40 |         decode_config:
41 |             module: EfficientHeightReduction
42 |         refine_config:
43 |             module: TransEn
44 |             kwargs:
45 |                 position_encode: 128
46 |                 num_layers: 1
47 |         modalities_config:
48 |             DepthEstimator:
49 |                 basis: dct
50 |                 n_components: 64
51 |                 loss: l1
52 |                 output_height: 256
53 | 
54 | 


--------------------------------------------------------------------------------
/config/s2d3d_depth/HOHO_depth_dct_efficienthc_TransEn1.yaml:
--------------------------------------------------------------------------------
 1 | ckpt_root: ckpt
 2 | cuda: True
 3 | num_workers: 8
 4 | 
 5 | dataset:
 6 |     name: S2d3dDepthDataset
 7 |     common_kwargs:
 8 |         root: data/stanford2D3D
 9 |         hw: (512, 1024)
10 |     train_kwargs:
11 |         scene_txt: data/stanford2D3D/fold1_train.txt
12 |         rand_rotate: True
13 |         rand_flip: True
14 |         rand_gamma: True
15 |     valid_kwargs:
16 |         scene_txt: data/stanford2D3D/fold1_valid.txt
17 |         rand_rotate: False
18 |         rand_flip: False
19 |         rand_gamma: False
20 | 
21 | training:
22 |     epoch: 60
23 |     batch_size: 4
24 |     save_every: 60
25 |     optim_lr: 0.0001
26 |     optim_poly_gamma: 0.9
27 |     optim_betas: (0.9, 0.999)
28 | 
29 | model:
30 |     file: lib.model.hohonet
31 |     modelclass: HoHoNet
32 |     kwargs:
33 |         emb_dim: 256
34 |         backbone_config:
35 |             module: Resnet
36 |             kwargs:
37 |                 backbone: resnet50
38 |         decode_config:
39 |             module: EfficientHeightReduction
40 |         refine_config:
41 |             module: TransEn
42 |             kwargs:
43 |                 position_encode: 256
44 |                 num_layers: 1
45 |         modalities_config:
46 |             DepthEstimator:
47 |                 basis: dct
48 |                 n_components: 64
49 |                 loss: l1
50 | 
51 | 


--------------------------------------------------------------------------------
/config/s2d3d_sem/HOHO_depth_dct_efficienthc_TransEn1_h1024_fold1_resnet101.yaml:
--------------------------------------------------------------------------------
 1 | ckpt_root: ckpt
 2 | cuda: True
 3 | num_workers: 8
 4 | 
 5 | dataset:
 6 |     name: S2d3dSemDataset
 7 |     common_kwargs:
 8 |         root: data/s2d3d_sem/
 9 |         hw: (1024, 2048)
10 |     train_kwargs:
11 |         fold: 1_train
12 |         flip: True
13 |         rotate: True
14 |     valid_kwargs:
15 |         fold: 1_valid
16 | 
17 | training:
18 |     epoch: 60
19 |     batch_size: 4
20 |     save_every: 60
21 |     optim_lr: 0.0001
22 |     optim_poly_gamma: 0.9
23 |     optim_betas: (0.9, 0.999)
24 | 
25 | model:
26 |     file: lib.model.hohonet
27 |     modelclass: HoHoNet
28 |     kwargs:
29 |         emb_dim: 256
30 |         input_norm: ugscnn
31 |         backbone_config:
32 |             module: Resnet
33 |             kwargs:
34 |                 input_extra: 1
35 |                 backbone: resnet101
36 |                 input_height: 1024
37 |         decode_config:
38 |             module: EfficientHeightReduction
39 |         refine_config:
40 |             module: TransEn
41 |             kwargs:
42 |                 position_encode: 512
43 |                 num_layers: 1
44 |         modalities_config:
45 |             SemanticSegmenter:
46 |                 num_classes: 13
47 |                 label_weight: data/s2d3d_sem/label13_weight.pth
48 |                 basis: dct
49 |                 loss: ce
50 |                 n_components: 64
51 |                 output_height: 1024
52 | 
53 | 


--------------------------------------------------------------------------------
/config/s2d3d_sem/HOHO_depth_dct_efficienthc_TransEn1_h1024_fold1_resnet101rgb.yaml:
--------------------------------------------------------------------------------
 1 | ckpt_root: ckpt
 2 | cuda: True
 3 | num_workers: 8
 4 | 
 5 | dataset:
 6 |     name: S2d3dSemDataset
 7 |     common_kwargs:
 8 |         root: data/s2d3d_sem/
 9 |         hw: (1024, 2048)
10 |         depth: False
11 |     train_kwargs:
12 |         fold: 1_train
13 |         flip: True
14 |         rotate: True
15 |     valid_kwargs:
16 |         fold: 1_valid
17 | 
18 | training:
19 |     epoch: 60
20 |     batch_size: 4
21 |     save_every: 60
22 |     optim_lr: 0.0001
23 |     optim_poly_gamma: 0.9
24 |     optim_betas: (0.9, 0.999)
25 | 
26 | model:
27 |     file: lib.model.hohonet
28 |     modelclass: HoHoNet
29 |     kwargs:
30 |         emb_dim: 256
31 |         backbone_config:
32 |             module: Resnet
33 |             kwargs:
34 |                 backbone: resnet101
35 |                 input_height: 1024
36 |         decode_config:
37 |             module: EfficientHeightReduction
38 |         refine_config:
39 |             module: TransEn
40 |             kwargs:
41 |                 position_encode: 512
42 |                 num_layers: 1
43 |         modalities_config:
44 |             SemanticSegmenter:
45 |                 num_classes: 13
46 |                 label_weight: data/s2d3d_sem/label13_weight.pth
47 |                 basis: dct
48 |                 loss: ce
49 |                 n_components: 64
50 |                 output_height: 1024
51 | 
52 | 


--------------------------------------------------------------------------------
/config/s2d3d_sem/HOHO_depth_dct_efficienthc_TransEn1_h1024_fold2_resnet101.yaml:
--------------------------------------------------------------------------------
 1 | ckpt_root: ckpt
 2 | cuda: True
 3 | num_workers: 8
 4 | 
 5 | dataset:
 6 |     name: S2d3dSemDataset
 7 |     common_kwargs:
 8 |         root: data/s2d3d_sem/
 9 |         hw: (1024, 2048)
10 |     train_kwargs:
11 |         fold: 2_train
12 |         flip: True
13 |         rotate: True
14 |     valid_kwargs:
15 |         fold: 2_valid
16 | 
17 | training:
18 |     epoch: 60
19 |     batch_size: 4
20 |     save_every: 60
21 |     optim_lr: 0.0001
22 |     optim_poly_gamma: 0.9
23 |     optim_betas: (0.9, 0.999)
24 | 
25 | model:
26 |     file: lib.model.hohonet
27 |     modelclass: HoHoNet
28 |     kwargs:
29 |         emb_dim: 256
30 |         input_norm: ugscnn
31 |         backbone_config:
32 |             module: Resnet
33 |             kwargs:
34 |                 input_extra: 1
35 |                 backbone: resnet101
36 |                 input_height: 1024
37 |         decode_config:
38 |             module: EfficientHeightReduction
39 |         refine_config:
40 |             module: TransEn
41 |             kwargs:
42 |                 position_encode: 512
43 |                 num_layers: 1
44 |         modalities_config:
45 |             SemanticSegmenter:
46 |                 num_classes: 13
47 |                 label_weight: data/s2d3d_sem/label13_weight.pth
48 |                 basis: dct
49 |                 loss: ce
50 |                 n_components: 64
51 |                 output_height: 1024
52 | 
53 | 


--------------------------------------------------------------------------------
/config/s2d3d_sem/HOHO_depth_dct_efficienthc_TransEn1_h1024_fold2_resnet101rgb.yaml:
--------------------------------------------------------------------------------
 1 | ckpt_root: ckpt
 2 | cuda: True
 3 | num_workers: 8
 4 | 
 5 | dataset:
 6 |     name: S2d3dSemDataset
 7 |     common_kwargs:
 8 |         root: data/s2d3d_sem/
 9 |         hw: (1024, 2048)
10 |         depth: False
11 |     train_kwargs:
12 |         fold: 2_train
13 |         flip: True
14 |         rotate: True
15 |     valid_kwargs:
16 |         fold: 2_valid
17 | 
18 | training:
19 |     epoch: 60
20 |     batch_size: 4
21 |     save_every: 60
22 |     optim_lr: 0.0001
23 |     optim_poly_gamma: 0.9
24 |     optim_betas: (0.9, 0.999)
25 | 
26 | model:
27 |     file: lib.model.hohonet
28 |     modelclass: HoHoNet
29 |     kwargs:
30 |         emb_dim: 256
31 |         backbone_config:
32 |             module: Resnet
33 |             kwargs:
34 |                 backbone: resnet101
35 |                 input_height: 1024
36 |         decode_config:
37 |             module: EfficientHeightReduction
38 |         refine_config:
39 |             module: TransEn
40 |             kwargs:
41 |                 position_encode: 512
42 |                 num_layers: 1
43 |         modalities_config:
44 |             SemanticSegmenter:
45 |                 num_classes: 13
46 |                 label_weight: data/s2d3d_sem/label13_weight.pth
47 |                 basis: dct
48 |                 loss: ce
49 |                 n_components: 64
50 |                 output_height: 1024
51 | 
52 | 


--------------------------------------------------------------------------------
/config/s2d3d_sem/HOHO_depth_dct_efficienthc_TransEn1_h1024_fold3_resnet101.yaml:
--------------------------------------------------------------------------------
 1 | ckpt_root: ckpt
 2 | cuda: True
 3 | num_workers: 8
 4 | 
 5 | dataset:
 6 |     name: S2d3dSemDataset
 7 |     common_kwargs:
 8 |         root: data/s2d3d_sem/
 9 |         hw: (1024, 2048)
10 |     train_kwargs:
11 |         fold: 3_train
12 |         flip: True
13 |         rotate: True
14 |     valid_kwargs:
15 |         fold: 3_valid
16 | 
17 | training:
18 |     epoch: 60
19 |     batch_size: 4
20 |     save_every: 60
21 |     optim_lr: 0.0001
22 |     optim_poly_gamma: 0.9
23 |     optim_betas: (0.9, 0.999)
24 | 
25 | model:
26 |     file: lib.model.hohonet
27 |     modelclass: HoHoNet
28 |     kwargs:
29 |         emb_dim: 256
30 |         input_norm: ugscnn
31 |         backbone_config:
32 |             module: Resnet
33 |             kwargs:
34 |                 input_extra: 1
35 |                 backbone: resnet101
36 |                 input_height: 1024
37 |         decode_config:
38 |             module: EfficientHeightReduction
39 |         refine_config:
40 |             module: TransEn
41 |             kwargs:
42 |                 position_encode: 512
43 |                 num_layers: 1
44 |         modalities_config:
45 |             SemanticSegmenter:
46 |                 num_classes: 13
47 |                 label_weight: data/s2d3d_sem/label13_weight.pth
48 |                 basis: dct
49 |                 loss: ce
50 |                 n_components: 64
51 |                 output_height: 1024
52 | 
53 | 


--------------------------------------------------------------------------------
/config/s2d3d_sem/HOHO_depth_dct_efficienthc_TransEn1_h1024_fold3_resnet101rgb.yaml:
--------------------------------------------------------------------------------
 1 | ckpt_root: ckpt
 2 | cuda: True
 3 | num_workers: 8
 4 | 
 5 | dataset:
 6 |     name: S2d3dSemDataset
 7 |     common_kwargs:
 8 |         root: data/s2d3d_sem/
 9 |         hw: (1024, 2048)
10 |         depth: False
11 |     train_kwargs:
12 |         fold: 3_train
13 |         flip: True
14 |         rotate: True
15 |     valid_kwargs:
16 |         fold: 3_valid
17 | 
18 | training:
19 |     epoch: 60
20 |     batch_size: 4
21 |     save_every: 60
22 |     optim_lr: 0.0001
23 |     optim_poly_gamma: 0.9
24 |     optim_betas: (0.9, 0.999)
25 | 
26 | model:
27 |     file: lib.model.hohonet
28 |     modelclass: HoHoNet
29 |     kwargs:
30 |         emb_dim: 256
31 |         backbone_config:
32 |             module: Resnet
33 |             kwargs:
34 |                 backbone: resnet101
35 |                 input_height: 1024
36 |         decode_config:
37 |             module: EfficientHeightReduction
38 |         refine_config:
39 |             module: TransEn
40 |             kwargs:
41 |                 position_encode: 512
42 |                 num_layers: 1
43 |         modalities_config:
44 |             SemanticSegmenter:
45 |                 num_classes: 13
46 |                 label_weight: data/s2d3d_sem/label13_weight.pth
47 |                 basis: dct
48 |                 loss: ce
49 |                 n_components: 64
50 |                 output_height: 1024
51 | 
52 | 


--------------------------------------------------------------------------------
/config/s2d3d_sem/HOHO_depth_dct_efficienthc_TransEn1_h256_fold1_simple.yaml:
--------------------------------------------------------------------------------
 1 | ckpt_root: ckpt
 2 | cuda: True
 3 | num_workers: 8
 4 | 
 5 | dataset:
 6 |     name: S2d3dSemDataset
 7 |     common_kwargs:
 8 |         root: data/s2d3d_sem/
 9 |         hw: (256, 512)
10 |     train_kwargs:
11 |         fold: 1_train
12 |         flip: True
13 |         rotate: True
14 |     valid_kwargs:
15 |         fold: 1_valid
16 | 
17 | training:
18 |     epoch: 300
19 |     batch_size: 16
20 |     save_every: 300
21 |     optim_lr: 0.001
22 |     optim_poly_gamma: 0.9
23 |     optim_betas: (0.9, 0.999)
24 | 
25 | model:
26 |     file: lib.model.hohonet
27 |     modelclass: HoHoNet
28 |     kwargs:
29 |         emb_dim: 256
30 |         input_norm: ugscnn
31 |         backbone_config:
32 |             module: SimpleEncoder
33 |             kwargs:
34 |                 input_extra: 1
35 |                 input_height: 256
36 |                 block: conv3x3max
37 |                 expand: 2
38 |         decode_config:
39 |             module: EfficientHeightReduction
40 |             kwargs:
41 |                 out_ch: 256
42 |         refine_config:
43 |             module: TransEn
44 |             kwargs:
45 |                 position_encode: 128
46 |                 num_layers: 1
47 |         modalities_config:
48 |             SemanticSegmenter:
49 |                 num_classes: 13
50 |                 label_weight: data/s2d3d_sem/label13_weight.pth
51 |                 basis: dct
52 |                 loss: ce
53 |                 n_components: 64
54 |                 output_height: 256
55 |                 dropout: 0.5
56 | 
57 | 


--------------------------------------------------------------------------------
/config/s2d3d_sem/HOHO_depth_dct_efficienthc_TransEn1_h256_fold2_simple.yaml:
--------------------------------------------------------------------------------
 1 | ckpt_root: ckpt
 2 | cuda: True
 3 | num_workers: 8
 4 | 
 5 | dataset:
 6 |     name: S2d3dSemDataset
 7 |     common_kwargs:
 8 |         root: data/s2d3d_sem/
 9 |         hw: (256, 512)
10 |     train_kwargs:
11 |         fold: 2_train
12 |         flip: True
13 |         rotate: True
14 |     valid_kwargs:
15 |         fold: 2_valid
16 | 
17 | training:
18 |     epoch: 300
19 |     batch_size: 16
20 |     save_every: 300
21 |     optim_lr: 0.001
22 |     optim_poly_gamma: 0.9
23 |     optim_betas: (0.9, 0.999)
24 | 
25 | model:
26 |     file: lib.model.hohonet
27 |     modelclass: HoHoNet
28 |     kwargs:
29 |         emb_dim: 256
30 |         input_norm: ugscnn
31 |         backbone_config:
32 |             module: SimpleEncoder
33 |             kwargs:
34 |                 input_extra: 1
35 |                 input_height: 256
36 |                 block: conv3x3max
37 |                 expand: 2
38 |         decode_config:
39 |             module: EfficientHeightReduction
40 |             kwargs:
41 |                 out_ch: 256
42 |         refine_config:
43 |             module: TransEn
44 |             kwargs:
45 |                 position_encode: 128
46 |                 num_layers: 1
47 |         modalities_config:
48 |             SemanticSegmenter:
49 |                 num_classes: 13
50 |                 label_weight: data/s2d3d_sem/label13_weight.pth
51 |                 basis: dct
52 |                 loss: ce
53 |                 n_components: 64
54 |                 output_height: 256
55 |                 dropout: 0.5
56 | 
57 | 


--------------------------------------------------------------------------------
/config/s2d3d_sem/HOHO_depth_dct_efficienthc_TransEn1_h256_fold3_simple.yaml:
--------------------------------------------------------------------------------
 1 | ckpt_root: ckpt
 2 | cuda: True
 3 | num_workers: 8
 4 | 
 5 | dataset:
 6 |     name: S2d3dSemDataset
 7 |     common_kwargs:
 8 |         root: data/s2d3d_sem/
 9 |         hw: (256, 512)
10 |     train_kwargs:
11 |         fold: 3_train
12 |         flip: True
13 |         rotate: True
14 |     valid_kwargs:
15 |         fold: 3_valid
16 | 
17 | training:
18 |     epoch: 300
19 |     batch_size: 16
20 |     save_every: 300
21 |     optim_lr: 0.001
22 |     optim_poly_gamma: 0.9
23 |     optim_betas: (0.9, 0.999)
24 | 
25 | model:
26 |     file: lib.model.hohonet
27 |     modelclass: HoHoNet
28 |     kwargs:
29 |         emb_dim: 256
30 |         input_norm: ugscnn
31 |         backbone_config:
32 |             module: SimpleEncoder
33 |             kwargs:
34 |                 input_extra: 1
35 |                 input_height: 256
36 |                 block: conv3x3max
37 |                 expand: 2
38 |         decode_config:
39 |             module: EfficientHeightReduction
40 |             kwargs:
41 |                 out_ch: 256
42 |         refine_config:
43 |             module: TransEn
44 |             kwargs:
45 |                 position_encode: 128
46 |                 num_layers: 1
47 |         modalities_config:
48 |             SemanticSegmenter:
49 |                 num_classes: 13
50 |                 label_weight: data/s2d3d_sem/label13_weight.pth
51 |                 basis: dct
52 |                 loss: ce
53 |                 n_components: 64
54 |                 output_height: 256
55 |                 dropout: 0.5
56 | 
57 | 


--------------------------------------------------------------------------------
/config/s2d3d_sem/HOHO_depth_dct_efficienthc_TransEn1_h64_fold1_simple.yaml:
--------------------------------------------------------------------------------
 1 | ckpt_root: ckpt
 2 | cuda: True
 3 | num_workers: 8
 4 | 
 5 | dataset:
 6 |     name: S2d3dSemDataset
 7 |     common_kwargs:
 8 |         root: data/s2d3d_sem/
 9 |         hw: (64, 128)
10 |     train_kwargs:
11 |         fold: 1_train
12 |         flip: True
13 |         rotate: True
14 |     valid_kwargs:
15 |         fold: 1_valid
16 | 
17 | training:
18 |     epoch: 300
19 |     batch_size: 16
20 |     save_every: 300
21 |     optim_lr: 0.001
22 |     optim_poly_gamma: 0.9
23 |     optim_betas: (0.9, 0.999)
24 | 
25 | model:
26 |     file: lib.model.hohonet
27 |     modelclass: HoHoNet
28 |     kwargs:
29 |         emb_dim: 256
30 |         input_norm: ugscnn
31 |         backbone_config:
32 |             module: SimpleEncoder
33 |             kwargs:
34 |                 input_extra: 1
35 |                 input_height: 64
36 |                 block: conv3x3max
37 |                 expand: 2
38 |         decode_config:
39 |             module: EfficientHeightReduction
40 |             kwargs:
41 |                 out_ch: 256
42 |         refine_config:
43 |             module: TransEn
44 |             kwargs:
45 |                 position_encode: 32
46 |                 num_layers: 1
47 |         modalities_config:
48 |             SemanticSegmenter:
49 |                 num_classes: 13
50 |                 label_weight: data/s2d3d_sem/label13_weight.pth
51 |                 basis: dct
52 |                 loss: ce
53 |                 n_components: 64
54 |                 output_height: 64
55 |                 dropout: 0.5
56 | 
57 | 


--------------------------------------------------------------------------------
/config/s2d3d_sem/HOHO_depth_dct_efficienthc_TransEn1_h64_fold2_simple.yaml:
--------------------------------------------------------------------------------
 1 | ckpt_root: ckpt
 2 | cuda: True
 3 | num_workers: 8
 4 | 
 5 | dataset:
 6 |     name: S2d3dSemDataset
 7 |     common_kwargs:
 8 |         root: data/s2d3d_sem/
 9 |         hw: (64, 128)
10 |     train_kwargs:
11 |         fold: 2_train
12 |         flip: True
13 |         rotate: True
14 |     valid_kwargs:
15 |         fold: 2_valid
16 | 
17 | training:
18 |     epoch: 300
19 |     batch_size: 16
20 |     save_every: 300
21 |     optim_lr: 0.001
22 |     optim_poly_gamma: 0.9
23 |     optim_betas: (0.9, 0.999)
24 | 
25 | model:
26 |     file: lib.model.hohonet
27 |     modelclass: HoHoNet
28 |     kwargs:
29 |         emb_dim: 256
30 |         input_norm: ugscnn
31 |         backbone_config:
32 |             module: SimpleEncoder
33 |             kwargs:
34 |                 input_extra: 1
35 |                 input_height: 64
36 |                 block: conv3x3max
37 |                 expand: 2
38 |         decode_config:
39 |             module: EfficientHeightReduction
40 |             kwargs:
41 |                 out_ch: 256
42 |         refine_config:
43 |             module: TransEn
44 |             kwargs:
45 |                 position_encode: 32
46 |                 num_layers: 1
47 |         modalities_config:
48 |             SemanticSegmenter:
49 |                 num_classes: 13
50 |                 label_weight: data/s2d3d_sem/label13_weight.pth
51 |                 basis: dct
52 |                 loss: ce
53 |                 n_components: 64
54 |                 output_height: 64
55 |                 dropout: 0.5
56 | 
57 | 


--------------------------------------------------------------------------------
/config/s2d3d_sem/HOHO_depth_dct_efficienthc_TransEn1_h64_fold3_simple.yaml:
--------------------------------------------------------------------------------
 1 | ckpt_root: ckpt
 2 | cuda: True
 3 | num_workers: 8
 4 | 
 5 | dataset:
 6 |     name: S2d3dSemDataset
 7 |     common_kwargs:
 8 |         root: data/s2d3d_sem/
 9 |         hw: (64, 128)
10 |     train_kwargs:
11 |         fold: 3_train
12 |         flip: True
13 |         rotate: True
14 |     valid_kwargs:
15 |         fold: 3_valid
16 | 
17 | training:
18 |     epoch: 300
19 |     batch_size: 16
20 |     save_every: 300
21 |     optim_lr: 0.001
22 |     optim_poly_gamma: 0.9
23 |     optim_betas: (0.9, 0.999)
24 | 
25 | model:
26 |     file: lib.model.hohonet
27 |     modelclass: HoHoNet
28 |     kwargs:
29 |         emb_dim: 256
30 |         input_norm: ugscnn
31 |         backbone_config:
32 |             module: SimpleEncoder
33 |             kwargs:
34 |                 input_extra: 1
35 |                 input_height: 64
36 |                 block: conv3x3max
37 |                 expand: 2
38 |         decode_config:
39 |             module: EfficientHeightReduction
40 |             kwargs:
41 |                 out_ch: 256
42 |         refine_config:
43 |             module: TransEn
44 |             kwargs:
45 |                 position_encode: 32
46 |                 num_layers: 1
47 |         modalities_config:
48 |             SemanticSegmenter:
49 |                 num_classes: 13
50 |                 label_weight: data/s2d3d_sem/label13_weight.pth
51 |                 basis: dct
52 |                 loss: ce
53 |                 n_components: 64
54 |                 output_height: 64
55 |                 dropout: 0.5
56 | 
57 | 


--------------------------------------------------------------------------------
/count_params_flops.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import argparse
 3 | import importlib
 4 | from tqdm import tqdm, trange
 5 | from collections import Counter
 6 | 
 7 | import numpy as np
 8 | 
 9 | import torch
10 | import torch.nn as nn
11 | import torch.nn.functional as F
12 | 
13 | from thop import profile, clever_format
14 | 
15 | from lib.config import config, update_config
16 | 
17 | 
18 | if __name__ == '__main__':
19 | 
20 |     # Parse args & config
21 |     parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
22 |     parser.add_argument('--cfg', required=True)
23 |     parser.add_argument('opts',
24 |                         help='Modify config options using the command-line',
25 |                         default=None, nargs=argparse.REMAINDER)
26 |     args = parser.parse_args()
27 |     update_config(config, args)
28 | 
29 |     # Init global variable
30 |     device = 'cuda' if config.cuda else 'cpu'
31 |     if config.cuda and config.cuda_benchmark:
32 |         torch.backends.cudnn.benchmark = True
33 | 
34 |     # Init network
35 |     model_file = importlib.import_module(config.model.file)
36 |     model_class = getattr(model_file, config.model.modelclass)
37 |     net = model_class(**config.model.kwargs).to(device)
38 |     net.eval()
39 | 
40 |     # testing
41 |     layers = net
42 |     inputs = [torch.randn(1, 3, 512, 1024).to(device)]
43 |     with torch.no_grad():
44 |         flops, params = profile(layers, inputs)
45 |     print(f'input :', [v.shape for v in inputs])
46 |     print(f'flops : {flops/(10**9):.2f} G')
47 |     print(f'params: {params/(10**6):.2f} M')
48 | 
49 |     import time
50 |     fps = []
51 |     with torch.no_grad():
52 |         layers(inputs[0])
53 |         for _ in range(50):
54 |             eps_time = time.time()
55 |             layers(inputs[0])
56 |             torch.cuda.synchronize()
57 |             eps_time = time.time() - eps_time
58 |             fps.append(eps_time)
59 |     print(f'fps   : {1 / (sum(fps) / len(fps)):.2f}')
60 | 
61 | 


--------------------------------------------------------------------------------
/eval_layout.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import json
  3 | import glob
  4 | import argparse
  5 | import numpy as np
  6 | from tqdm import tqdm
  7 | from shapely.geometry import Polygon
  8 | 
  9 | from lib.dataset.dataset_layout import cor_2_1d
 10 | from lib.misc import post_proc
 11 | 
 12 | 
 13 | def prepare_gtdt_pairs(gt_glob, dt_glob):
 14 |     gt_paths = sorted(glob.glob(gt_glob))
 15 |     dt_paths_json = dict([(os.path.split(v)[-1].split('.')[0], v)
 16 |                             for v in glob.glob(dt_glob) if v.endswith('json')])
 17 |     dt_paths_txt = dict([(os.path.split(v)[-1].split('.')[0], v)
 18 |                             for v in glob.glob(dt_glob) if v.endswith('txt')])
 19 | 
 20 |     gtdt_pairs = []
 21 |     for gt_path in gt_paths:
 22 |         k = os.path.split(gt_path)[-1].split('.')[0]
 23 |         if k in dt_paths_json:
 24 |             gtdt_pairs.append((gt_path, dt_paths_json[k]))
 25 |         else:
 26 |             gtdt_pairs.append((gt_path, dt_paths_txt[k]))
 27 |     return gtdt_pairs
 28 | 
 29 | 
 30 | def layout_2_depth(cor_id, h, w, return_mask=False):
 31 |     # Convert corners to per-column boundary first
 32 |     # Up -pi/2,  Down pi/2
 33 |     vc, vf = cor_2_1d(cor_id, h, w)
 34 |     vc = vc[None, :]  # [1, w]
 35 |     vf = vf[None, :]  # [1, w]
 36 |     assert (vc > 0).sum() == 0
 37 |     assert (vf < 0).sum() == 0
 38 | 
 39 |     # Per-pixel v coordinate (vertical angle)
 40 |     vs = ((np.arange(h) + 0.5) / h - 0.5) * np.pi
 41 |     vs = np.repeat(vs[:, None], w, axis=1)  # [h, w]
 42 | 
 43 |     # Floor-plane to depth
 44 |     floor_h = 1.6
 45 |     floor_d = np.abs(floor_h / np.sin(vs))
 46 | 
 47 |     # wall to camera distance on horizontal plane at cross camera center
 48 |     cs = floor_h / np.tan(vf)
 49 | 
 50 |     # Ceiling-plane to depth
 51 |     ceil_h = np.abs(cs * np.tan(vc))      # [1, w]
 52 |     ceil_d = np.abs(ceil_h / np.sin(vs))  # [h, w]
 53 | 
 54 |     # Wall to depth
 55 |     wall_d = np.abs(cs / np.cos(vs))  # [h, w]
 56 | 
 57 |     # Recover layout depth
 58 |     floor_mask = (vs > vf)
 59 |     ceil_mask = (vs < vc)
 60 |     wall_mask = (~floor_mask) & (~ceil_mask)
 61 |     depth = np.zeros([h, w], np.float32)    # [h, w]
 62 |     depth[floor_mask] = floor_d[floor_mask]
 63 |     depth[ceil_mask] = ceil_d[ceil_mask]
 64 |     depth[wall_mask] = wall_d[wall_mask]
 65 | 
 66 |     assert (depth == 0).sum() == 0
 67 |     if return_mask:
 68 |         return depth, floor_mask, ceil_mask, wall_mask
 69 |     return depth
 70 | 
 71 | 
 72 | def test_general(dt_cor_id, gt_cor_id, w, h, losses):
 73 |     dt_floor_coor = dt_cor_id[1::2]
 74 |     dt_ceil_coor = dt_cor_id[0::2]
 75 |     gt_floor_coor = gt_cor_id[1::2]
 76 |     gt_ceil_coor = gt_cor_id[0::2]
 77 |     assert (dt_floor_coor[:, 0] != dt_ceil_coor[:, 0]).sum() == 0
 78 |     assert (gt_floor_coor[:, 0] != gt_ceil_coor[:, 0]).sum() == 0
 79 | 
 80 |     # Eval 3d IoU and height error(in meter)
 81 |     N = len(dt_floor_coor)
 82 |     ch = -1.6
 83 |     dt_floor_xy = post_proc.np_coor2xy(dt_floor_coor, ch, 1024, 512, floorW=1, floorH=1)
 84 |     gt_floor_xy = post_proc.np_coor2xy(gt_floor_coor, ch, 1024, 512, floorW=1, floorH=1)
 85 |     dt_poly = Polygon(dt_floor_xy)
 86 |     gt_poly = Polygon(gt_floor_xy)
 87 |     if not gt_poly.is_valid:
 88 |         print('Skip ground truth invalid (%s)' % gt_path)
 89 |         return
 90 | 
 91 |     # 2D IoU
 92 |     try:
 93 |         area_dt = dt_poly.area
 94 |         area_gt = gt_poly.area
 95 |         area_inter = dt_poly.intersection(gt_poly).area
 96 |         iou2d = area_inter / (area_gt + area_dt - area_inter)
 97 |     except:
 98 |         iou2d = 0
 99 | 
100 |     # 3D IoU
101 |     try:
102 |         cch_dt = post_proc.get_z1(dt_floor_coor[:, 1], dt_ceil_coor[:, 1], ch, 512)
103 |         cch_gt = post_proc.get_z1(gt_floor_coor[:, 1], gt_ceil_coor[:, 1], ch, 512)
104 |         h_dt = abs(cch_dt.mean() - ch)
105 |         h_gt = abs(cch_gt.mean() - ch)
106 |         area3d_inter = area_inter * min(h_dt, h_gt)
107 |         area3d_pred = area_dt * h_dt
108 |         area3d_gt = area_gt * h_gt
109 |         iou3d = area3d_inter / (area3d_pred + area3d_gt - area3d_inter)
110 |     except:
111 |         iou3d = 0
112 | 
113 |     # rmse & delta_1
114 |     gt_layout_depth = layout_2_depth(gt_cor_id, h, w)
115 |     try:
116 |         dt_layout_depth = layout_2_depth(dt_cor_id, h, w)
117 |     except:
118 |         dt_layout_depth = np.zeros_like(gt_layout_depth)
119 |     rmse = ((gt_layout_depth - dt_layout_depth)**2).mean() ** 0.5
120 |     thres = np.maximum(gt_layout_depth/dt_layout_depth, dt_layout_depth/gt_layout_depth)
121 |     delta_1 = (thres < 1.25).mean()
122 | 
123 |     # Add a result
124 |     n_corners = len(gt_floor_coor)
125 |     if n_corners % 2 == 1:
126 |         n_corners = 'odd'
127 |     elif n_corners < 10:
128 |         n_corners = str(n_corners)
129 |     else:
130 |         n_corners = '10+'
131 |     losses[n_corners]['2DIoU'].append(iou2d)
132 |     losses[n_corners]['3DIoU'].append(iou3d)
133 |     losses[n_corners]['rmse'].append(rmse)
134 |     losses[n_corners]['delta_1'].append(delta_1)
135 |     losses['overall']['2DIoU'].append(iou2d)
136 |     losses['overall']['3DIoU'].append(iou3d)
137 |     losses['overall']['rmse'].append(rmse)
138 |     losses['overall']['delta_1'].append(delta_1)
139 | 
140 | 
141 | if __name__ == '__main__':
142 | 
143 |     parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
144 |     parser.add_argument('--dt_glob',
145 |                         help='NOTE: Remeber to quote your glob path.'
146 |                              'Files assumed to be json from inference.py')
147 |     parser.add_argument('--gt_glob',
148 |                         help='NOTE: Remeber to quote your glob path.'
149 |                              'Files assumed to be txt')
150 |     parser.add_argument('--w', default=1024, type=int,
151 |                         help='GT images width')
152 |     parser.add_argument('--h', default=512, type=int,
153 |                         help='GT images height')
154 |     args = parser.parse_args()
155 | 
156 |     # Prepare (gt, dt) pairs
157 |     gtdt_pairs = prepare_gtdt_pairs(args.gt_glob, args.dt_glob)
158 | 
159 |     # Testing
160 |     losses = dict([
161 |         (n_corner, {'2DIoU': [], '3DIoU': [], 'rmse': [], 'delta_1': []})
162 |         for n_corner in ['4', '6', '8', '10+', 'odd', 'overall']
163 |     ])
164 |     for gt_path, dt_path in tqdm(gtdt_pairs, desc='Testing'):
165 |         # Parse ground truth
166 |         with open(gt_path) as f:
167 |             gt_cor_id = np.array([l.split() for l in f], np.float32)
168 | 
169 |         # Parse inferenced result
170 |         if dt_path.endswith('json'):
171 |             with open(dt_path) as f:
172 |                 dt = json.load(f)
173 |             dt_cor_id = np.array(dt['uv'], np.float32)
174 |             dt_cor_id[:, 0] *= args.w
175 |             dt_cor_id[:, 1] *= args.h
176 |         else:
177 |             dt_cor_id = np.loadtxt(dt_path, np.float32)
178 | 
179 |         test_general(dt_cor_id, gt_cor_id, args.w, args.h, losses)
180 | 
181 |     for k, result in losses.items():
182 |         iou2d = np.array(result['2DIoU'])
183 |         iou3d = np.array(result['3DIoU'])
184 |         rmse = np.array(result['rmse'])
185 |         delta_1 = np.array(result['delta_1'])
186 |         if len(iou2d) == 0:
187 |             continue
188 |         print('GT #Corners: %s  (%d instances)' % (k, len(iou2d)))
189 |         print('    2DIoU  : %.2f' % (iou2d.mean() * 100))
190 |         print('    3DIoU  : %.2f' % (iou3d.mean() * 100))
191 |         print('    RMSE   : %.2f' % (rmse.mean()))
192 |         print('    delta^1: %.2f' % (delta_1.mean()))
193 | 


--------------------------------------------------------------------------------
/infer_depth.py:
--------------------------------------------------------------------------------
 1 | import os, sys, time, glob
 2 | import argparse
 3 | import importlib
 4 | from tqdm import tqdm
 5 | from imageio import imread, imwrite
 6 | import torch
 7 | import numpy as np
 8 | 
 9 | from lib.config import config, update_config
10 | 
11 | 
12 | if __name__ == '__main__':
13 | 
14 |     # Parse args & config
15 |     parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
16 |     parser.add_argument('--cfg', required=True)
17 |     parser.add_argument('--pth', required=True)
18 |     parser.add_argument('--out', required=True)
19 |     parser.add_argument('--inp', required=True)
20 |     parser.add_argument('opts',
21 |                         help='Modify config options using the command-line',
22 |                         default=None, nargs=argparse.REMAINDER)
23 |     args = parser.parse_args()
24 |     update_config(config, args)
25 |     device = 'cuda' if config.cuda else 'cpu'
26 | 
27 |     # Parse input paths
28 |     rgb_lst = glob.glob(args.inp)
29 |     if len(rgb_lst) == 0:
30 |         print('No images found')
31 |         import sys; sys.exit()
32 | 
33 |     # Init model
34 |     model_file = importlib.import_module(config.model.file)
35 |     model_class = getattr(model_file, config.model.modelclass)
36 |     net = model_class(**config.model.kwargs)
37 |     net.load_state_dict(torch.load(args.pth, map_location=device))
38 |     net = net.eval().to(device)
39 | 
40 |     # Run inference
41 |     with torch.no_grad():
42 |         for path in tqdm(rgb_lst):
43 |             rgb = imread(path)
44 |             x = torch.from_numpy(rgb).permute(2,0,1)[None].float() / 255.
45 |             if x.shape[2:] != config.dataset.common_kwargs.hw:
46 |                 x = torch.nn.functional.interpolate(x, config.dataset.common_kwargs.hw, mode='area')
47 |             x = x.to(device)
48 |             pred_depth = net.infer(x)
49 |             if not torch.is_tensor(pred_depth):
50 |                 pred_depth = pred_depth.pop('depth')
51 | 
52 |             fname = os.path.splitext(os.path.split(path)[1])[0]
53 |             imwrite(
54 |                 os.path.join(args.out, f'{fname}.depth.png'),
55 |                 pred_depth.mul(1000).squeeze().cpu().numpy().astype(np.uint16)
56 |             )
57 | 
58 | 


--------------------------------------------------------------------------------
/infer_layout.py:
--------------------------------------------------------------------------------
 1 | import os, sys, time, glob
 2 | import argparse
 3 | import importlib
 4 | from tqdm import tqdm
 5 | from imageio import imread, imwrite
 6 | import torch
 7 | import numpy as np
 8 | 
 9 | from lib.config import config, update_config
10 | 
11 | 
12 | if __name__ == '__main__':
13 | 
14 |     # Parse args & config
15 |     parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
16 |     parser.add_argument('--cfg', required=True)
17 |     parser.add_argument('--pth', required=True)
18 |     parser.add_argument('--out', required=True)
19 |     parser.add_argument('--inp', required=True)
20 |     parser.add_argument('opts',
21 |                         help='Modify config options using the command-line',
22 |                         default=None, nargs=argparse.REMAINDER)
23 |     args = parser.parse_args()
24 |     update_config(config, args)
25 |     device = 'cuda' if config.cuda else 'cpu'
26 | 
27 |     # Parse input paths
28 |     rgb_lst = glob.glob(args.inp)
29 |     if len(rgb_lst) == 0:
30 |         print('No images found')
31 |         import sys; sys.exit()
32 | 
33 |     # Init model
34 |     model_file = importlib.import_module(config.model.file)
35 |     model_class = getattr(model_file, config.model.modelclass)
36 |     net = model_class(**config.model.kwargs)
37 |     net.load_state_dict(torch.load(args.pth, map_location=device))
38 |     net = net.eval().to(device)
39 | 
40 |     # Run inference
41 |     with torch.no_grad():
42 |         for path in tqdm(rgb_lst):
43 |             rgb = imread(path)
44 |             x = torch.from_numpy(rgb).permute(2,0,1)[None].float() / 255.
45 |             x = x.to(device)
46 |             cor_id = net.infer(x)['cor_id']
47 | 
48 |             fname = os.path.splitext(os.path.split(path)[1])[0]
49 |             with open(os.path.join(args.out, f'{fname}.layout.txt'), 'w') as f:
50 |                 for u, v in cor_id:
51 |                     f.write(f'{u:.1f} {v:.1f}\n')
52 | 
53 | 


--------------------------------------------------------------------------------
/lib/config.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from yacs.config import CfgNode as CN
 3 | 
 4 | config = CN()
 5 | 
 6 | config.ckpt_root = 'ckpt'
 7 | config.cuda = True
 8 | config.cuda_benchmark = True
 9 | config.num_workers = 8
10 | 
11 | config.dataset = CN()
12 | config.dataset.name = 'PanoCorBonDataset'
13 | config.dataset.common_kwargs = CN(new_allowed=True)
14 | config.dataset.train_kwargs = CN(new_allowed=True)
15 | config.dataset.valid_kwargs = CN(new_allowed=True)
16 | 
17 | config.training = CN()
18 | config.training.epoch = 300
19 | config.training.batch_size = 4
20 | config.training.save_every = 100
21 | config.training.optim = 'Adam'
22 | config.training.optim_lr = 0.0001
23 | config.training.optim_betas = (0.9, 0.999)
24 | config.training.weight_decay = 0.0
25 | config.training.wd_group_mode = 'bn and bias'
26 | config.training.optim_milestons = [0.5, 0.9]
27 | config.training.optim_gamma = 0.2
28 | config.training.optim_poly_gamma = -1.0
29 | config.training.fix_encoder_bn = False
30 | 
31 | config.model = CN()
32 | config.model.file = 'lib.model.HorizonNet'
33 | config.model.modelclass = 'HorizonNet'
34 | config.model.kwargs = CN(new_allowed=True)
35 | 
36 | 
37 | def update_config(cfg, args):
38 |     cfg.defrost()
39 | 
40 |     cfg.merge_from_file(args.cfg)
41 |     cfg.merge_from_list(args.opts)
42 | 
43 |     cfg.freeze()
44 | 
45 | def infer_exp_id(cfg_path):
46 |     cfg_path = cfg_path.split('config/')[-1]
47 |     if cfg_path.endswith('.yaml'):
48 |         cfg_path = cfg_path[:-len('.yaml')]
49 |     return '_'.join(cfg_path.split('/'))
50 | 
51 | 


--------------------------------------------------------------------------------
/lib/dataset/__init__.py:
--------------------------------------------------------------------------------
1 | from .dataset_layout import PanoCorBonDataset
2 | from .dataset_s2d3d_sem import S2d3dSemDataset
3 | from .dataset_depth import CorruptMP3dDepthDataset, MP3dDepthDataset, S2d3dDepthDataset
4 | 
5 | 


--------------------------------------------------------------------------------
/lib/dataset/dataset_depth.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import glob
  3 | import numpy as np
  4 | 
  5 | from imageio import imread
  6 | from scipy.spatial.transform import Rotation
  7 | from lib.misc.pano_lsd_align import rotatePanorama
  8 | 
  9 | import torch
 10 | import torch.utils.data as data
 11 | 
 12 | 
 13 | class BaseDataset(data.Dataset):
 14 |     def __init__(self, dmin=0.01, dmax=10, hw=(512, 1024),
 15 |             rand_rotate=False, rand_flip=False, rand_gamma=False,
 16 |             rand_pitch=0, rand_roll=0,
 17 |             fix_pitch=0, fix_roll=0):
 18 |         self.fname = []
 19 |         self.rgb_paths, self.d_paths = [], []
 20 |         self.dmin = dmin
 21 |         self.dmax = dmax
 22 |         self.hw = hw
 23 |         self.rand_rotate = rand_rotate
 24 |         self.rand_flip = rand_flip
 25 |         self.rand_gamma = rand_gamma
 26 |         self.rand_pitch = rand_pitch
 27 |         self.rand_roll = rand_roll
 28 |         self.fix_pitch = fix_pitch
 29 |         self.fix_roll = fix_roll
 30 | 
 31 |     def __len__(self):
 32 |         return len(self.rgb_paths)
 33 | 
 34 |     def read_rgb(self, path):
 35 |         return imread(path)
 36 | 
 37 |     def read_depth(self, path):
 38 |         raise NotImplementedError
 39 | 
 40 |     def __getitem__(self, idx):
 41 |         # Read data
 42 |         fname = self.fname[idx]
 43 |         color = self.read_rgb(self.rgb_paths[idx])
 44 |         depth = self.read_depth(self.d_paths[idx])
 45 | 
 46 |         # To tensor and reshape to [C, H, W]
 47 |         color = torch.from_numpy(color).permute(2,0,1).float() / 255
 48 |         depth = torch.from_numpy(depth)[None].float()
 49 |         depth = torch.clamp(depth, max=self.dmax)
 50 | 
 51 |         # Resize
 52 |         if color.shape[1:] != self.hw:
 53 |             color = torch.nn.functional.interpolate(color[None], self.hw, mode='area')[0]
 54 |         if depth.shape[1:] != self.hw:
 55 |             depth = torch.nn.functional.interpolate(depth[None], self.hw, mode='nearest')[0]
 56 | 
 57 |         # Data augmentation
 58 |         if self.rand_rotate:
 59 |             shift = np.random.randint(self.hw[1])
 60 |             color = torch.roll(color, shift, dims=-1)
 61 |             depth = torch.roll(depth, shift, dims=-1)
 62 | 
 63 |         if self.rand_flip and np.random.randint(2):
 64 |             color = torch.flip(color, dims=[-1])
 65 |             depth = torch.flip(depth, dims=[-1])
 66 | 
 67 |         if self.rand_gamma:
 68 |             p = np.random.uniform(1, 1.2)
 69 |             if np.random.randint(2) == 0:
 70 |                 p = 1 / p
 71 |             color = color ** p
 72 | 
 73 |         # Rotation augmentation
 74 |         if self.rand_pitch > 0 or self.rand_roll > 0 or self.fix_pitch != 0 or self.fix_roll > 0:
 75 |             color = color.permute(1,2,0).numpy()
 76 |             depth = depth.permute(1,2,0).numpy()
 77 |             if self.fix_pitch:
 78 |                 rot = self.fix_pitch
 79 |                 vp = Rotation.from_rotvec([rot * np.pi / 180, 0, 0]).as_matrix()
 80 |                 color = rotatePanorama(color, vp, order=0)
 81 |             elif self.rand_pitch > 0:
 82 |                 rot = np.random.randint(0, self.rand_pitch)
 83 |                 vp = Rotation.from_rotvec([rot * np.pi / 180, 0, 0]).as_matrix()
 84 |                 color = rotatePanorama(color, vp, order=0)
 85 |                 depth = rotatePanorama(depth, vp, order=0)
 86 |             if self.fix_roll:
 87 |                 rot = self.fix_roll
 88 |                 vp = Rotation.from_rotvec([0, rot * np.pi / 180, 0]).as_matrix()
 89 |                 color = rotatePanorama(color, vp, order=0)
 90 |             elif self.rand_roll > 0:
 91 |                 rot = np.random.randint(0, self.rand_roll)
 92 |                 vp = Rotation.from_rotvec([0, rot * np.pi / 180, 0]).as_matrix()
 93 |                 color = rotatePanorama(color, vp, order=0)
 94 |                 depth = rotatePanorama(depth, vp, order=0)
 95 |             color = torch.from_numpy(color).permute(2,0,1).float()
 96 |             depth = torch.from_numpy(depth).permute(2,0,1).float()
 97 | 
 98 |         return {'x': color, 'depth': depth, 'fname': fname.ljust(200)}
 99 | 
100 | 
101 | class CorruptMP3dDepthDataset(BaseDataset):
102 |     def __init__(self, root, scene_txt, **kwargs):
103 |         super(CorruptMP3dDepthDataset, self).__init__(**kwargs)
104 | 
105 |         # List all rgbd paths
106 |         with open(scene_txt) as f:
107 |             scene_split_ids = set(f.read().split())
108 |         for scene in os.listdir(root):
109 |             scene_root = os.path.join(root, scene)
110 |             if not os.path.isdir(scene_root) or scene not in scene_split_ids:
111 |                 continue
112 |             for cam in os.listdir(scene_root):
113 |                 cam_root = os.path.join(scene_root, cam)
114 |                 if not os.path.isdir(cam_root):
115 |                     continue
116 |                 self.rgb_paths.append(os.path.join(cam_root, 'color.jpg'))
117 |                 self.d_paths.append(os.path.join(cam_root, 'depth.npy'))
118 |         assert len(self.rgb_paths) == len(self.d_paths)
119 |         for path in self.rgb_paths:
120 |             self.fname.append('_'.join(path.split('/')))
121 | 
122 |     def read_depth(self, path):
123 |         depth = np.load(path)
124 |         depth[depth == 0.01] = 0
125 |         return depth
126 | 
127 | 
128 | class MP3dDepthDataset(BaseDataset):
129 |     def __init__(self, root, scene_txt, **kwargs):
130 |         super(MP3dDepthDataset, self).__init__(**kwargs)
131 | 
132 |         # List all rgbd paths
133 |         with open(scene_txt) as f:
134 |             scene_split_ids = set(f.read().split())
135 |         for scene in os.listdir(root):
136 |             scene_root = os.path.join(root, scene)
137 |             if not os.path.isdir(scene_root) or scene not in scene_split_ids:
138 |                 continue
139 |             self.rgb_paths.extend(sorted(glob.glob(os.path.join(scene_root, '*rgb.png'))))
140 |             self.d_paths.extend(sorted(glob.glob(os.path.join(scene_root, '*depth.exr'))))
141 |         assert len(self.rgb_paths) == len(self.d_paths)
142 |         for path in self.rgb_paths:
143 |             self.fname.append('_'.join(path.split('/')))
144 | 
145 |     def read_depth(self, path):
146 |         import Imath
147 |         import OpenEXR
148 |         f = OpenEXR.InputFile(path)
149 |         dw = f.header()['dataWindow']
150 |         size = (dw.max.x - dw.min.x + 1, dw.max.y - dw.min.y + 1)
151 |         depth = np.frombuffer(f.channel('Y', Imath.PixelType(Imath.PixelType.FLOAT)), np.float32)
152 |         depth = depth.reshape(size[1], size[0])
153 |         f.close()
154 |         return depth.astype(np.float32)
155 | 
156 | 
157 | class S2d3dDepthDataset(BaseDataset):
158 |     def __init__(self, root, scene_txt, **kwargs):
159 |         super(S2d3dDepthDataset, self).__init__(**kwargs)
160 | 
161 |         # List all rgbd paths
162 |         with open(scene_txt) as f:
163 |             path_pair = [l.strip().split() for l in f]
164 |         for rgb_path, dep_path in path_pair:
165 |             self.rgb_paths.append(os.path.join(root, rgb_path))
166 |             self.d_paths.append(os.path.join(root, dep_path))
167 |             self.fname.append(os.path.split(rgb_path)[1])
168 | 
169 |     def read_depth(self, path):
170 |         depth = imread(path)
171 |         return np.where(depth==65535, 0, depth/512)
172 | 
173 | 


--------------------------------------------------------------------------------
/lib/dataset/dataset_s2d3d_sem.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import glob
  3 | import numpy as np
  4 | from imageio import imread
  5 | from shapely.geometry import LineString
  6 | 
  7 | import torch
  8 | import torch.utils.data as data
  9 | import torch.nn.functional as F
 10 | 
 11 | from lib.misc import panostretch
 12 | 
 13 | __FOLD__ = {
 14 |     '1_train': ['area_1', 'area_2', 'area_3', 'area_4', 'area_6'],
 15 |     '1_valid': ['area_5a', 'area_5b'],
 16 |     '2_train': ['area_1', 'area_3', 'area_5a', 'area_5b', 'area_6'],
 17 |     '2_valid': ['area_2', 'area_4'],
 18 |     '3_train': ['area_2', 'area_4', 'area_5a', 'area_5b'],
 19 |     '3_valid': ['area_1', 'area_3', 'area_6'],
 20 | }
 21 | 
 22 | class S2d3dSemDataset(data.Dataset):
 23 |     NUM_CLASSES = 13
 24 |     ID2CLASS = ['beam', 'board', 'bookcase', 'ceiling', 'chair', 'clutter', 'column', 'door', 'floor', 'sofa', 'table', 'wall', 'window']
 25 |     def __init__(self, root, fold, depth=True, hw=(512, 1024), mask_black=True, flip=False, rotate=False):
 26 |         assert fold in __FOLD__, 'Unknown fold'
 27 |         self.depth = depth
 28 |         self.hw = hw
 29 |         self.mask_black = mask_black
 30 |         self.rgb_paths = []
 31 |         self.sem_paths = []
 32 |         self.dep_paths = []
 33 |         for dname in __FOLD__[fold]:
 34 |             self.rgb_paths.extend(sorted(glob.glob(os.path.join(root, dname, 'rgb', '*png'))))
 35 |             self.sem_paths.extend(sorted(glob.glob(os.path.join(root, dname, 'semantic', '*png'))))
 36 |             self.dep_paths.extend(sorted(glob.glob(os.path.join(root, dname, 'depth', '*png'))))
 37 |         assert len(self.rgb_paths)
 38 |         assert len(self.rgb_paths) == len(self.sem_paths)
 39 |         assert len(self.rgb_paths) == len(self.dep_paths)
 40 |         self.flip = flip
 41 |         self.rotate = rotate
 42 | 
 43 |     def __len__(self):
 44 |         return len(self.rgb_paths)
 45 | 
 46 |     def __getitem__(self, idx):
 47 |         rgb = torch.FloatTensor(imread(self.rgb_paths[idx]) / 255.).permute(2,0,1)
 48 |         sem = torch.LongTensor(imread(self.sem_paths[idx])) - 1
 49 |         if self.depth:
 50 |             dep = imread(self.dep_paths[idx])
 51 |             dep = np.where(dep==65535, 0, dep/512)
 52 |             dep = np.clip(dep, 0, 4)
 53 |             dep = torch.FloatTensor(dep[None])
 54 |             rgb = torch.cat([rgb, dep], 0)
 55 |         H, W = rgb.shape[1:]
 56 |         if (H, W) != self.hw:
 57 |             rgb = F.interpolate(rgb[None], size=self.hw, mode='bilinear', align_corners=False)[0]
 58 |             sem = F.interpolate(sem[None,None].float(), size=self.hw, mode='nearest')[0,0].long()
 59 | 
 60 |         # Random flip
 61 |         if self.flip and np.random.randint(2) == 0:
 62 |             rgb = torch.flip(rgb, (-1,))
 63 |             sem = torch.flip(sem, (-1,))
 64 | 
 65 |         # Random horizontal rotate
 66 |         if self.rotate:
 67 |             dx = np.random.randint(W)
 68 |             rgb = torch.roll(rgb, dx, dims=-1)
 69 |             sem = torch.roll(sem, dx, dims=-1)
 70 | 
 71 |         # Mask out top-down black
 72 |         if self.mask_black:
 73 |             sem[rgb.sum(0) == 0] = -1
 74 | 
 75 |         # Convert all data to tensor
 76 |         out_dict = {
 77 |             'x': rgb,
 78 |             'sem': sem,
 79 |             'fname': os.path.split(self.rgb_paths[idx])[1].ljust(200),
 80 |         }
 81 |         return out_dict
 82 | 
 83 | 
 84 | if __name__ == '__main__':
 85 | 
 86 |     import argparse
 87 |     from tqdm import tqdm
 88 | 
 89 |     parser = argparse.ArgumentParser()
 90 |     parser.add_argument('--root_dir', default='data/valid/')
 91 |     parser.add_argument('--ith', default=0, type=int,
 92 |                         help='Pick a data id to visualize.'
 93 |                              '-1 for visualize all data')
 94 |     parser.add_argument('--flip', action='store_true',
 95 |                         help='whether to random flip')
 96 |     parser.add_argument('--rotate', action='store_true',
 97 |                         help='whether to random horizon rotation')
 98 |     parser.add_argument('--gamma', action='store_true',
 99 |                         help='whether to random luminance change')
100 |     parser.add_argument('--stretch', action='store_true',
101 |                         help='whether to random pano stretch')
102 |     parser.add_argument('--dist_clip', default=20)
103 |     parser.add_argument('--out_dir', default='data/vis_dataset')
104 |     args = parser.parse_args()
105 | 
106 |     os.makedirs(args.out_dir, exist_ok=True)
107 | 
108 |     print('args:')
109 |     for key, val in vars(args).items():
110 |         print('    {:16} {}'.format(key, val))
111 | 
112 |     dataset = PanoCorBonDataset(
113 |         root_dir=args.root_dir,
114 |         flip=args.flip, rotate=args.rotate, gamma=args.gamma, stretch=args.stretch)
115 | 
116 |     # Showing some information about dataset
117 |     print('len(dataset): {}'.format(len(dataset)))
118 |     batch = dataset[args.ith]
119 |     for k, v in batch.items():
120 |         if torch.is_tensor(v):
121 |             print(k, v.shape)
122 |         else:
123 |             print(k, v)
124 |     print('=' * 20)
125 | 
126 |     if args.ith >= 0:
127 |         to_visualize = [dataset[args.ith]]
128 |     else:
129 |         to_visualize = dataset
130 | 
131 |     import matplotlib.pyplot as plt
132 |     cmap = plt.get_cmap('bwr')
133 |     for batch in tqdm(to_visualize):
134 |         fname = os.path.split(batch['img_path'])[-1]
135 |         img = batch['x'].permute(1,2,0).numpy()
136 |         y_bon = batch['bon'].numpy()
137 |         y_bon = ((y_bon / np.pi + 0.5) * img.shape[0]).round().astype(int)
138 |         img[y_bon[0], np.arange(len(y_bon[0])), 1] = 1
139 |         img[y_bon[1], np.arange(len(y_bon[1])), 1] = 1
140 |         img = (img * 255).astype(np.uint8)
141 |         img_pad = np.full((3, 1024, 3), 255, np.uint8)
142 |         img_vot = batch['vot'].repeat(30, 1).numpy()
143 |         img_vot = (img_vot / args.dist_clip + 1) / 2
144 |         vot_mask = (img_vot >= 0) & (img_vot <= 1)
145 |         img_vot = (cmap(img_vot)[...,:3] * 255).astype(np.uint8)
146 |         img_vot[~vot_mask] = 0
147 |         out = np.concatenate([img_vot, img_pad, img], 0)
148 |         Image.fromarray(out).save(os.path.join(args.out_dir, fname))
149 | 
150 | 


--------------------------------------------------------------------------------
/lib/misc/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sunset1995/HoHoNet/2bbc0866789cf7ad728064bc52aaf1d11b67c885/lib/misc/__init__.py


--------------------------------------------------------------------------------
/lib/misc/gen_txt_structured3d.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | Help generate txt for train.py
 3 | Please contact https://github.com/bertjiazheng/Structured3D for dataset.
 4 | '''
 5 | 
 6 | import os
 7 | import glob
 8 | import argparse
 9 | 
10 | parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
11 | parser.add_argument('--root', required=True,
12 |                     help='path to the dataset directory')
13 | parser.add_argument('--train_txt', required=True,
14 |                     help='path to save txt for train')
15 | parser.add_argument('--valid_txt', required=True,
16 |                     help='path to save txt for valid')
17 | parser.add_argument('--test_txt', required=True,
18 |                     help='path to save txt for test')
19 | args = parser.parse_args()
20 | 
21 | train_scene = ['scene_%05d' % i for i in range(0, 3000)]
22 | valid_scene = ['scene_%05d' % i for i in range(3000, 3250)]
23 | test_scene = ['scene_%05d' % i for i in range(3250, 3500)]
24 | 
25 | # Simple check: all directories exist
26 | for path in train_scene + valid_scene + test_scene:
27 |     assert os.path.isdir(os.path.join(args.root, path)), '%s not found' % path
28 | 
29 | def gen_pairs(scene_id_lst):
30 |     pairs = []
31 |     for scene_id in scene_id_lst:
32 |         for fname in os.listdir(os.path.join(args.root, scene_id, 'rgb')):
33 |             room_id = os.path.split(fname)[1].split('_')[0]
34 | 
35 |             img_k = os.path.join(os.path.join(scene_id, 'rgb', fname))
36 |             layout_k = os.path.join(os.path.join(scene_id, 'layout', room_id + '_layout.txt'))
37 |             assert os.path.isfile(os.path.join(args.root, img_k))
38 |             assert os.path.isfile(os.path.join(args.root, layout_k))
39 |             pairs.append((img_k, layout_k))
40 |     return pairs
41 | 
42 | with open(args.train_txt, 'w') as f:
43 |     pairs = gen_pairs(train_scene)
44 |     f.write('\n'.join([' '.join(p) for p in pairs]))
45 | 
46 | with open(args.valid_txt, 'w') as f:
47 |     pairs = gen_pairs(valid_scene)
48 |     f.write('\n'.join([' '.join(p) for p in pairs]))
49 | 
50 | with open(args.test_txt, 'w') as f:
51 |     pairs = gen_pairs(test_scene)
52 |     f.write('\n'.join([' '.join(p) for p in pairs]))
53 | 


--------------------------------------------------------------------------------
/lib/misc/panostretch.py:
--------------------------------------------------------------------------------
  1 | import functools
  2 | import numpy as np
  3 | from scipy.ndimage import map_coordinates
  4 | 
  5 | 
  6 | def uv_meshgrid(w, h):
  7 |     uv = np.stack(np.meshgrid(range(w), range(h)), axis=-1)
  8 |     uv = uv.astype(np.float64)
  9 |     uv[..., 0] = ((uv[..., 0] + 0.5) / w - 0.5) * 2 * np.pi
 10 |     uv[..., 1] = ((uv[..., 1] + 0.5) / h - 0.5) * np.pi
 11 |     return uv
 12 | 
 13 | 
 14 | @functools.lru_cache()
 15 | def _uv_tri(w, h):
 16 |     uv = uv_meshgrid(w, h)
 17 |     sin_u = np.sin(uv[..., 0])
 18 |     cos_u = np.cos(uv[..., 0])
 19 |     tan_v = np.tan(uv[..., 1])
 20 |     return sin_u, cos_u, tan_v
 21 | 
 22 | 
 23 | def uv_tri(w, h):
 24 |     sin_u, cos_u, tan_v = _uv_tri(w, h)
 25 |     return sin_u.copy(), cos_u.copy(), tan_v.copy()
 26 | 
 27 | 
 28 | def coorx2u(x, w=1024):
 29 |     return ((x + 0.5) / w - 0.5) * 2 * np.pi
 30 | 
 31 | 
 32 | def coory2v(y, h=512):
 33 |     return ((y + 0.5) / h - 0.5) * np.pi
 34 | 
 35 | 
 36 | def u2coorx(u, w=1024):
 37 |     return (u / (2 * np.pi) + 0.5) * w - 0.5
 38 | 
 39 | 
 40 | def v2coory(v, h=512):
 41 |     return (v / np.pi + 0.5) * h - 0.5
 42 | 
 43 | 
 44 | def uv2xy(u, v, z=-50):
 45 |     c = z / np.tan(v)
 46 |     x = c * np.cos(u)
 47 |     y = c * np.sin(u)
 48 |     return x, y
 49 | 
 50 | 
 51 | def pano_connect_points(p1, p2, z=-50, w=1024, h=512):
 52 |     if p1[0] == p2[0]:
 53 |         return np.array([p1, p2], np.float32)
 54 | 
 55 |     u1 = coorx2u(p1[0], w)
 56 |     v1 = coory2v(p1[1], h)
 57 |     u2 = coorx2u(p2[0], w)
 58 |     v2 = coory2v(p2[1], h)
 59 | 
 60 |     x1, y1 = uv2xy(u1, v1, z)
 61 |     x2, y2 = uv2xy(u2, v2, z)
 62 | 
 63 |     if abs(p1[0] - p2[0]) < w / 2:
 64 |         pstart = np.ceil(min(p1[0], p2[0]))
 65 |         pend = np.floor(max(p1[0], p2[0]))
 66 |     else:
 67 |         pstart = np.ceil(max(p1[0], p2[0]))
 68 |         pend = np.floor(min(p1[0], p2[0]) + w)
 69 |     coorxs = (np.arange(pstart, pend + 1) % w).astype(np.float64)
 70 |     vx = x2 - x1
 71 |     vy = y2 - y1
 72 |     us = coorx2u(coorxs, w)
 73 |     ps = (np.tan(us) * x1 - y1) / (vy - np.tan(us) * vx)
 74 |     cs = np.sqrt((x1 + ps * vx) ** 2 + (y1 + ps * vy) ** 2)
 75 |     vs = np.arctan2(z, cs)
 76 |     coorys = v2coory(vs, h)
 77 | 
 78 |     return np.stack([coorxs, coorys], axis=-1)
 79 | 
 80 | 
 81 | def pano_stretch(img, corners, kx, ky, order=1):
 82 |     '''
 83 |     img:     [H, W, C]
 84 |     corners: [N, 2] in image coordinate (x, y) format
 85 |     kx:      Stretching along front-back direction
 86 |     ky:      Stretching along left-right direction
 87 |     order:   Interpolation order. 0 for nearest-neighbor. 1 for bilinear.
 88 |     '''
 89 | 
 90 |     # Process image
 91 |     sin_u, cos_u, tan_v = uv_tri(img.shape[1], img.shape[0])
 92 |     u0 = np.arctan2(sin_u * kx / ky, cos_u)
 93 |     v0 = np.arctan(tan_v * np.sin(u0) / sin_u * ky)
 94 | 
 95 |     refx = (u0 / (2 * np.pi) + 0.5) * img.shape[1] - 0.5
 96 |     refy = (v0 / np.pi + 0.5) * img.shape[0] - 0.5
 97 | 
 98 |     # [TODO]: using opencv remap could probably speedup the process a little
 99 |     stretched_img = np.stack([
100 |         map_coordinates(img[..., i], [refy, refx], order=order, mode='wrap')
101 |         for i in range(img.shape[-1])
102 |     ], axis=-1)
103 | 
104 |     # Process corners
105 |     corners_u0 = coorx2u(corners[:, 0], img.shape[1])
106 |     corners_v0 = coory2v(corners[:, 1], img.shape[0])
107 |     corners_u = np.arctan2(np.sin(corners_u0) * ky / kx, np.cos(corners_u0))
108 |     corners_v = np.arctan(np.tan(corners_v0) * np.sin(corners_u) / np.sin(corners_u0) / ky)
109 |     cornersX = u2coorx(corners_u, img.shape[1])
110 |     cornersY = v2coory(corners_v, img.shape[0])
111 |     stretched_corners = np.stack([cornersX, cornersY], axis=-1)
112 | 
113 |     return stretched_img, stretched_corners
114 | 
115 | 
116 | def visualize_pano_stretch(stretched_img, stretched_cor, title):
117 |     '''
118 |     Helper function for visualizing the effect of pano_stretch
119 |     '''
120 |     thikness = 2
121 |     color = (0, 255, 0)
122 |     for i in range(4):
123 |         xys = pano_connect_points(stretched_cor[i*2], stretched_cor[(i*2+2) % 8], z=-50)
124 |         xys = xys.astype(int)
125 |         blue_split = np.where((xys[1:, 0] - xys[:-1, 0]) < 0)[0]
126 |         if len(blue_split) == 0:
127 |             cv2.polylines(stretched_img, [xys], False, color, 2)
128 |         else:
129 |             t = blue_split[0] + 1
130 |             cv2.polylines(stretched_img, [xys[:t]], False, color, thikness)
131 |             cv2.polylines(stretched_img, [xys[t:]], False, color, thikness)
132 | 
133 |     for i in range(4):
134 |         xys = pano_connect_points(stretched_cor[i*2+1], stretched_cor[(i*2+3) % 8], z=50)
135 |         xys = xys.astype(int)
136 |         blue_split = np.where((xys[1:, 0] - xys[:-1, 0]) < 0)[0]
137 |         if len(blue_split) == 0:
138 |             cv2.polylines(stretched_img, [xys], False, color, 2)
139 |         else:
140 |             t = blue_split[0] + 1
141 |             cv2.polylines(stretched_img, [xys[:t]], False, color, thikness)
142 |             cv2.polylines(stretched_img, [xys[t:]], False, color, thikness)
143 | 
144 |     cv2.putText(stretched_img, title, (25, 50), cv2.FONT_HERSHEY_SIMPLEX, 1,
145 |                 (0, 0, 0), 2, cv2.LINE_AA)
146 | 
147 |     return stretched_img.astype(np.uint8)
148 | 
149 | 
150 | if __name__ == '__main__':
151 | 
152 |     import argparse
153 |     import time
154 |     from PIL import Image
155 |     import cv2
156 | 
157 |     parser = argparse.ArgumentParser()
158 |     parser.add_argument('--i', default='data/valid/img/pano_abpohapclcyuuz.png')
159 |     parser.add_argument('--i_gt', default='data/valid/label_cor/pano_abpohapclcyuuz.txt')
160 |     parser.add_argument('--o', default='sample_stretched_pano.png')
161 |     parser.add_argument('--kx', default=2, type=float,
162 |                         help='Stretching along front-back direction')
163 |     parser.add_argument('--ky', default=1, type=float,
164 |                         help='Stretching along left-right direction')
165 |     args = parser.parse_args()
166 | 
167 |     img = np.array(Image.open(args.i), np.float64)
168 |     with open(args.i_gt) as f:
169 |         cor = np.array([line.strip().split() for line in f], np.int32)
170 |     stretched_img, stretched_cor = pano_stretch(img, cor, args.kx, args.ky)
171 | 
172 |     title = 'kx=%3.2f, ky=%3.2f' % (args.kx, args.ky)
173 |     visual_stretched_img = visualize_pano_stretch(stretched_img, stretched_cor, title)
174 |     Image.fromarray(visual_stretched_img).save(args.o)
175 | 


--------------------------------------------------------------------------------
/lib/misc/structured3d_extract_zip.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import argparse
 3 | from zipfile import ZipFile
 4 | from tqdm import tqdm
 5 | import imageio
 6 | 
 7 | '''
 8 | Zipfile format assumption:
 9 | Structured3D
10 | -- [scene_xxxxx]
11 |     -- other something
12 |     -- 2D_rendering
13 |         -- [image_id]
14 |             -- panorama
15 |                 -- camera_xyz.txt
16 |                 -- layout.txt
17 |                 -- [empty|simple|full]
18 |                     -- depth.png
19 |                     -- rgb_rawlight.png
20 |                     -- rgb_coldlight.png
21 |                     -- rgb_warmlight.png
22 |                     -- other something
23 | 
24 | Output format
25 | outdir
26 | -- [scene_xxxxx]
27 |     -- img
28 |     -- layout
29 | '''
30 | 
31 | parser = argparse.ArgumentParser()
32 | parser.add_argument('--zippath', required=True)
33 | parser.add_argument('--style', default='full')
34 | parser.add_argument('--outdir', default='structured3d')
35 | args = parser.parse_args()
36 | 
37 | path_format = 'Structured3D/%s/2D_rendering/%s/panorama/%s'
38 | 
39 | with ZipFile(args.zippath) as zipf:
40 |     id_set = set()
41 |     for path in zipf.namelist():
42 |         assert path.startswith('Structured3D')
43 |         if path.endswith('camera_xyz.txt'):
44 |             path_lst = path.split('/')
45 |             scene_id = path_lst[1]
46 |             image_id = path_lst[3]
47 |             id_set.add((scene_id, image_id))
48 | 
49 |     for scene_id, image_id in tqdm(id_set):
50 |         path_img = path_format % (scene_id, image_id, '%s/rgb_rawlight.png' % args.style)
51 |         path_layout = path_format % (scene_id, image_id, 'layout.txt')
52 | 
53 |         os.makedirs(os.path.join(args.outdir, scene_id, 'rgb'), exist_ok=True)
54 |         os.makedirs(os.path.join(args.outdir, scene_id, 'layout'), exist_ok=True)
55 | 
56 |         with zipf.open(path_img) as f:
57 |             rgb = imageio.imread(f)[..., :3]
58 |             imageio.imwrite(os.path.join(args.outdir, scene_id, 'rgb', image_id + '_rgb_rawlight.png'), rgb)
59 |         with zipf.open(path_layout) as f:
60 |             with open(os.path.join(args.outdir, scene_id, 'layout', image_id + '_layout.txt'), 'w') as fo:
61 |                 fo.write(f.read().decode())
62 | 


--------------------------------------------------------------------------------
/lib/misc/structured3d_prepare_dataset.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import argparse
 3 | from zipfile import ZipFile
 4 | from tqdm import tqdm
 5 | import imageio
 6 | 
 7 | '''
 8 | Assume datas is extracted by `misc/structured3d_extract_zip.py`.
 9 | That is to said, assuming following structure:
10 | - {in_root}/scene_xxxxx
11 |     - rgb/
12 |         - *png
13 |     - layout/
14 |         - *txt
15 | 
16 | The reorganized structure as follow:
17 | - {out_train_root}
18 |     - img/
19 |         - scene_xxxxx_*png (softlink)
20 |     - label_cor/
21 |         - scene_xxxxx_*txt (softlink)
22 | - {out_valid_root} ...
23 | - {out_test_root} ...
24 | '''
25 | TRAIN_SCENE = ['scene_%05d' % i for i in range(0, 3000)]
26 | VALID_SCENE = ['scene_%05d' % i for i in range(3000, 3250)]
27 | TEST_SCENE = ['scene_%05d' % i for i in range(3250, 3500)]
28 | 
29 | parser = argparse.ArgumentParser()
30 | parser.add_argument('--in_root', required=True)
31 | parser.add_argument('--out_train_root', default='data/st3d_train_full_raw_light')
32 | parser.add_argument('--out_valid_root', default='data/st3d_valid_full_raw_light')
33 | parser.add_argument('--out_test_root', default='data/st3d_test_full_raw_light')
34 | args = parser.parse_args()
35 | 
36 | def prepare_dataset(scene_ids, out_dir):
37 |     root_img = os.path.join(out_dir, 'img')
38 |     root_cor = os.path.join(out_dir, 'label_cor')
39 |     os.makedirs(root_img, exist_ok=True)
40 |     os.makedirs(root_cor, exist_ok=True)
41 |     for scene_id in tqdm(scene_ids):
42 |         source_img_root = os.path.join(args.in_root, scene_id, 'rgb')
43 |         source_cor_root = os.path.join(args.in_root, scene_id, 'layout')
44 |         for fname in os.listdir(source_cor_root):
45 |             room_id = fname.split('_')[0]
46 |             source_img_path = os.path.join(args.in_root, scene_id, 'rgb', room_id + '_rgb_rawlight.png')
47 |             source_cor_path = os.path.join(args.in_root, scene_id, 'layout', room_id + '_layout.txt')
48 |             target_img_path = os.path.join(root_img, '%s_%s.png' % (scene_id, room_id))
49 |             target_cor_path = os.path.join(root_cor, '%s_%s.txt' % (scene_id, room_id))
50 |             assert os.path.isfile(source_img_path)
51 |             assert os.path.isfile(source_cor_path)
52 |             os.symlink(source_img_path, target_img_path)
53 |             os.symlink(source_cor_path, target_cor_path)
54 | 
55 | prepare_dataset(TRAIN_SCENE, args.out_train_root)
56 | prepare_dataset(VALID_SCENE, args.out_valid_root)
57 | prepare_dataset(TEST_SCENE, args.out_test_root)
58 | 


--------------------------------------------------------------------------------
/lib/misc/utils.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | from collections import OrderedDict
 4 | 
 5 | 
 6 | def group_weight(module):
 7 |     # Group module parameters into two group
 8 |     # One need weight_decay and the other doesn't
 9 |     group_decay = []
10 |     group_no_decay = []
11 |     for m in module.modules():
12 |         if isinstance(m, nn.Linear):
13 |             group_decay.append(m.weight)
14 |             if m.bias is not None:
15 |                 group_no_decay.append(m.bias)
16 |         elif isinstance(m, nn.modules.conv._ConvNd):
17 |             group_decay.append(m.weight)
18 |             if m.bias is not None:
19 |                 group_no_decay.append(m.bias)
20 |         elif isinstance(m, nn.modules.batchnorm._BatchNorm):
21 |             if m.weight is not None:
22 |                 group_no_decay.append(m.weight)
23 |             if m.bias is not None:
24 |                 group_no_decay.append(m.bias)
25 |         elif isinstance(m, nn.GroupNorm):
26 |             if m.weight is not None:
27 |                 group_no_decay.append(m.weight)
28 |             if m.bias is not None:
29 |                 group_no_decay.append(m.bias)
30 | 
31 |     assert len(list(module.parameters())) == len(group_decay) + len(group_no_decay)
32 |     return [dict(params=group_decay), dict(params=group_no_decay, weight_decay=.0)]
33 | 
34 | 
35 | def adjust_learning_rate(optimizer, args):
36 |     if args.cur_iter < args.warmup_iters:
37 |         frac = args.cur_iter / args.warmup_iters
38 |         step = args.lr - args.warmup_lr
39 |         args.running_lr = args.warmup_lr + step * frac
40 |     else:
41 |         frac = (float(args.cur_iter) - args.warmup_iters) / (args.max_iters - args.warmup_iters)
42 |         scale_running_lr = max((1. - frac), 0.) ** args.lr_pow
43 |         args.running_lr = args.lr * scale_running_lr
44 | 
45 |     for param_group in optimizer.param_groups:
46 |         param_group['lr'] = args.running_lr
47 | 
48 | 
49 | def save_model(net, path, args):
50 |     state_dict = OrderedDict({
51 |         'args': args.__dict__,
52 |         'kwargs': {
53 |             'backbone': net.backbone,
54 |             'use_rnn': net.use_rnn,
55 |         },
56 |         'state_dict': net.state_dict(),
57 |     })
58 |     torch.save(state_dict, path)
59 | 
60 | 
61 | def load_trained_model(Net, path):
62 |     state_dict = torch.load(path, map_location='cpu')
63 |     net = Net(**state_dict['kwargs'])
64 |     net.load_state_dict(state_dict['state_dict'])
65 |     return net
66 | 


--------------------------------------------------------------------------------
/lib/model/backbone/__init__.py:
--------------------------------------------------------------------------------
1 | from .resnet import Resnet
2 | from .simple import SimpleEncoder
3 | from .hardnet import HarDNet
4 | 


--------------------------------------------------------------------------------
/lib/model/backbone/hardnet.py:
--------------------------------------------------------------------------------
  1 | ''' Copy-paste from
  2 | https://github.com/PingoLH/Pytorch-HarDNet
  3 | '''
  4 | import os
  5 | import torch
  6 | import torch.nn as nn
  7 | import torch.nn.functional as F
  8 | 
  9 | class Flatten(nn.Module):
 10 |     def __init__(self):
 11 |         super().__init__()
 12 |     def forward(self, x):
 13 |         return x.view(x.data.size(0),-1)
 14 | 
 15 | 
 16 | 
 17 | class CombConvLayer(nn.Sequential):
 18 |     def __init__(self, in_channels, out_channels, kernel=1, stride=1, dropout=0.1, bias=False):
 19 |         super().__init__()
 20 |         self.add_module('layer1',ConvLayer(in_channels, out_channels, kernel))
 21 |         self.add_module('layer2',DWConvLayer(out_channels, out_channels, stride=stride))
 22 |         
 23 |     def forward(self, x):
 24 |         return super().forward(x)
 25 | 
 26 | class DWConvLayer(nn.Sequential):
 27 |     def __init__(self, in_channels, out_channels,  stride=1,  bias=False):
 28 |         super().__init__()
 29 |         out_ch = out_channels
 30 |         
 31 |         groups = in_channels
 32 |         kernel = 3
 33 |         #print(kernel, 'x', kernel, 'x', out_channels, 'x', out_channels, 'DepthWise')
 34 |         
 35 |         self.add_module('dwconv', nn.Conv2d(groups, groups, kernel_size=3,
 36 |                                           stride=stride, padding=1, groups=groups, bias=bias))
 37 |         self.add_module('norm', nn.BatchNorm2d(groups))
 38 |     def forward(self, x):
 39 |         return super().forward(x)  
 40 | 
 41 | class ConvLayer(nn.Sequential):
 42 |     def __init__(self, in_channels, out_channels, kernel=3, stride=1, dropout=0.1, bias=False):
 43 |         super().__init__()
 44 |         out_ch = out_channels
 45 |         groups = 1
 46 |         #print(kernel, 'x', kernel, 'x', in_channels, 'x', out_channels)
 47 |         self.add_module('conv', nn.Conv2d(in_channels, out_ch, kernel_size=kernel,          
 48 |                                           stride=stride, padding=kernel//2, groups=groups, bias=bias))
 49 |         self.add_module('norm', nn.BatchNorm2d(out_ch))
 50 |         self.add_module('relu', nn.ReLU6(True))                                          
 51 |     def forward(self, x):
 52 |         return super().forward(x)
 53 | 
 54 | 
 55 | class HarDBlock(nn.Module):
 56 |     def get_link(self, layer, base_ch, growth_rate, grmul):
 57 |         if layer == 0:
 58 |           return base_ch, 0, []
 59 |         out_channels = growth_rate
 60 |         link = []
 61 |         for i in range(10):
 62 |           dv = 2 ** i
 63 |           if layer % dv == 0:
 64 |             k = layer - dv
 65 |             link.append(k)
 66 |             if i > 0:
 67 |                 out_channels *= grmul
 68 |         out_channels = int(int(out_channels + 1) / 2) * 2
 69 |         in_channels = 0
 70 |         for i in link:
 71 |           ch,_,_ = self.get_link(i, base_ch, growth_rate, grmul)
 72 |           in_channels += ch
 73 |         return out_channels, in_channels, link
 74 | 
 75 |     def get_out_ch(self):
 76 |         return self.out_channels
 77 | 
 78 |     def __init__(self, in_channels, growth_rate, grmul, n_layers, keepBase=False, residual_out=False, dwconv=False):
 79 |         super().__init__()
 80 |         self.keepBase = keepBase
 81 |         self.links = []
 82 |         layers_ = []
 83 |         self.out_channels = 0 # if upsample else in_channels
 84 |         for i in range(n_layers):
 85 |           outch, inch, link = self.get_link(i+1, in_channels, growth_rate, grmul)
 86 |           self.links.append(link)
 87 |           use_relu = residual_out
 88 |           if dwconv:
 89 |             layers_.append(CombConvLayer(inch, outch))
 90 |           else:
 91 |             layers_.append(ConvLayer(inch, outch))
 92 |           
 93 |           if (i % 2 == 0) or (i == n_layers - 1):
 94 |             self.out_channels += outch
 95 |         #print("Blk out =",self.out_channels)
 96 |         self.layers = nn.ModuleList(layers_)
 97 |         
 98 |     def forward(self, x):
 99 |         layers_ = [x]
100 |         
101 |         for layer in range(len(self.layers)):
102 |             link = self.links[layer]
103 |             tin = []
104 |             for i in link:
105 |                 tin.append(layers_[i])
106 |             if len(tin) > 1:            
107 |                 x = torch.cat(tin, 1)
108 |             else:
109 |                 x = tin[0]
110 |             out = self.layers[layer](x)
111 |             layers_.append(out)
112 |             
113 |         t = len(layers_)
114 |         out_ = []
115 |         for i in range(t):
116 |           if (i == 0 and self.keepBase) or \
117 |              (i == t-1) or (i%2 == 1):
118 |               out_.append(layers_[i])
119 |         out = torch.cat(out_, 1)
120 |         return out
121 |         
122 |         
123 |         
124 |         
125 | class HarDNet(nn.Module):
126 |     def __init__(self, depth_wise=False, arch=68, pretrained=True, weight_path='', input_height=512):
127 |         super().__init__()
128 |         first_ch  = [32, 64]
129 |         second_kernel = 3
130 |         max_pool = True
131 |         grmul = 1.7
132 |         drop_rate = 0.1
133 |         
134 |         #HarDNet68
135 |         ch_list = [  128, 256, 320, 640, 1024]
136 |         gr       = [  14, 16, 20, 40,160]
137 |         n_layers = [   8, 16, 16, 16,  4]
138 |         downSamp = [   1,  0,  1,  1,  0]
139 |         
140 |         if arch==85:
141 |           #HarDNet85
142 |           first_ch  = [48, 96]
143 |           ch_list = [  192, 256, 320, 480, 720, 1280]
144 |           gr       = [  24,  24,  28,  36,  48, 256]
145 |           n_layers = [   8,  16,  16,  16,  16,   4]
146 |           downSamp = [   1,   0,   1,   0,   1,   0]
147 |           drop_rate = 0.2
148 |         elif arch==39:
149 |           #HarDNet39
150 |           first_ch  = [24, 48]
151 |           ch_list = [  96, 320, 640, 1024]
152 |           grmul = 1.6
153 |           gr       = [  16,  20, 64, 160]
154 |           n_layers = [   4,  16,  8,   4]
155 |           downSamp = [   1,   1,  1,   0]
156 |           
157 |         if depth_wise:
158 |           second_kernel = 1
159 |           max_pool = False
160 |           drop_rate = 0.05
161 |         
162 |         blks = len(n_layers)
163 |         self.base = nn.ModuleList([])
164 | 
165 |         # First Layer: Standard Conv3x3, Stride=2
166 |         self.base.append (
167 |              ConvLayer(in_channels=3, out_channels=first_ch[0], kernel=3,
168 |                        stride=2,  bias=False) )
169 |   
170 |         # Second Layer
171 |         self.base.append ( ConvLayer(first_ch[0], first_ch[1],  kernel=second_kernel) )
172 |         
173 |         # Maxpooling or DWConv3x3 downsampling
174 |         if max_pool:
175 |           self.base.append(nn.MaxPool2d(kernel_size=3, stride=2, padding=1))
176 |         else:
177 |           self.base.append ( DWConvLayer(first_ch[1], first_ch[1], stride=2) )
178 | 
179 |         # Build all HarDNet blocks
180 |         ch = first_ch[1]
181 |         for i in range(blks):
182 |             blk = HarDBlock(ch, gr[i], grmul, n_layers[i], dwconv=depth_wise)
183 |             ch = blk.get_out_ch()
184 |             self.base.append ( blk )
185 |             
186 |             if i == blks-1 and arch == 85:
187 |                 self.base.append ( nn.Dropout(0.1))
188 |             
189 |             self.base.append ( ConvLayer(ch, ch_list[i], kernel=1) )
190 |             ch = ch_list[i]
191 |             if downSamp[i] == 1:
192 |               if max_pool:
193 |                 self.base.append(nn.MaxPool2d(kernel_size=2, stride=2))
194 |               else:
195 |                 self.base.append ( DWConvLayer(ch, ch, stride=2) )
196 | 
197 |         ch = ch_list[blks-1]
198 |         self.base.append (
199 |             nn.Sequential(
200 |                 nn.AdaptiveAvgPool2d((1,1)),
201 |                 Flatten(),
202 |                 nn.Dropout(drop_rate),
203 |                 nn.Linear(ch, 1000) ))
204 | 
205 |         if pretrained:
206 |           if hasattr(torch, 'hub'):
207 | 
208 |             if arch == 68 and not depth_wise:
209 |               checkpoint = 'https://ping-chao.com/hardnet/hardnet68-5d684880.pth'
210 |             elif arch == 85 and not depth_wise:
211 |               checkpoint = 'https://ping-chao.com/hardnet/hardnet85-a28faa00.pth'
212 |             elif arch == 68 and depth_wise:
213 |               checkpoint = 'https://ping-chao.com/hardnet/hardnet68ds-632474d2.pth'
214 |             else:
215 |               checkpoint = 'https://ping-chao.com/hardnet/hardnet39ds-0e6c6fa9.pth'
216 | 
217 |             self.load_state_dict(torch.hub.load_state_dict_from_url(checkpoint, progress=False))
218 |           else:
219 |             postfix = 'ds' if depth_wise else ''
220 |             weight_file = '%shardnet%d%s.pth'%(weight_path, arch, postfix)
221 |             if not os.path.isfile(weight_file):
222 |               print(weight_file,'is not found')
223 |               exit(0)
224 |             weights = torch.load(weight_file)
225 |             self.load_state_dict(weights)
226 | 
227 |           postfix = 'DS' if depth_wise else ''
228 |           print('ImageNet pretrained weights for HarDNet%d%s is loaded'%(arch, postfix))
229 | 
230 |         # Patch for HoHoNet
231 |         self.base = self.base[:-1]
232 |         if arch == 68:
233 |             self.out_channels = [128, 320, 640, 1024]
234 |             self.checkpoint = [4, 9, 12, 15]
235 |         elif arch == 85:
236 |             self.out_channels = [192, 320, 720, 1280]
237 |             self.checkpoint = [4, 9, 14, 18]
238 |         else:
239 |             raise NotImplementedError
240 |         self.feat_heights = [input_height//4//(2**i) for i in range(4)]
241 | 
242 |     def forward(self, x):
243 |         x_lst = []
244 |         for i, layer in enumerate(self.base):
245 |           x = layer(x)
246 |           if i in self.checkpoint:
247 |               x_lst.append(x)
248 |         return x_lst
249 | 
250 | 


--------------------------------------------------------------------------------
/lib/model/backbone/resnet.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import torch.nn.functional as F
 4 | import torchvision.models as models
 5 | 
 6 | class Resnet(nn.Module):
 7 |     def __init__(self, backbone='resnet50', coco='', input_extra=0, input_height=512):
 8 |         super(Resnet, self).__init__()
 9 |         self.encoder = getattr(models, backbone)(pretrained=True)
10 |         del self.encoder.fc, self.encoder.avgpool
11 |         if coco:
12 |             coco_pretrain = getattr(models.segmentation, coco)(pretrained=True).backbone
13 |             self.encoder.load_state_dict(coco_pretrain.state_dict())
14 |         self.out_channels = [256, 512, 1024, 2048]
15 |         self.feat_heights = [input_height//4//(2**i) for i in range(4)]
16 |         if int(backbone[6:]) < 50:
17 |             self.out_channels = [_//4 for _ in self.out_channels]
18 | 
19 |         # Patch for extra input channel
20 |         if input_extra > 0:
21 |             ori_conv1 = self.encoder.conv1
22 |             new_conv1 = nn.Conv2d(
23 |                 3+input_extra, ori_conv1.out_channels,
24 |                 kernel_size=ori_conv1.kernel_size,
25 |                 stride=ori_conv1.stride,
26 |                 padding=ori_conv1.padding,
27 |                 bias=ori_conv1.bias)
28 |             with torch.no_grad():
29 |                 for i in range(0, 3+input_extra, 3):
30 |                     n = new_conv1.weight[:, i:i+3].shape[1]
31 |                     new_conv1.weight[:, i:i+n] = ori_conv1.weight[:, :n]
32 |             self.encoder.conv1 = new_conv1
33 | 
34 |         # Prepare for pre/pose down height filtering
35 |         self.pre_down = None
36 |         self.post_down = None
37 | 
38 |     def forward(self, x):
39 |         features = []
40 |         x = self.encoder.conv1(x)
41 |         x = self.encoder.bn1(x)
42 |         x = self.encoder.relu(x)
43 |         x = self.encoder.maxpool(x)
44 | 
45 |         if self.pre_down is not None:
46 |             x = self.pre_down(x)
47 |         x = self.encoder.layer1(x);
48 |         if self.post_down is not None:
49 |             x = self.post_down(x)
50 |         features.append(x)  # 1/4
51 |         x = self.encoder.layer2(x);  features.append(x)  # 1/8
52 |         x = self.encoder.layer3(x);  features.append(x)  # 1/16
53 |         x = self.encoder.layer4(x);  features.append(x)  # 1/32
54 |         return features
55 | 


--------------------------------------------------------------------------------
/lib/model/backbone/simple.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.nn.functional as F
  4 | import torchvision.models as models
  5 | 
  6 | class SimpleResBlock(nn.Module):
  7 |     def __init__(self, a, b, c, s):
  8 |         super(SimpleResBlock, self).__init__()
  9 |         self.layer = nn.Sequential(
 10 |             nn.Conv2d(a, b, 1, bias=False),
 11 |             nn.BatchNorm2d(b),
 12 |             nn.ReLU(inplace=True),
 13 |             nn.Conv2d(b, b, 3, padding=1, stride=s, bias=False),
 14 |             nn.BatchNorm2d(b),
 15 |             nn.ReLU(inplace=True),
 16 |             nn.Conv2d(b, c, 1, bias=False),
 17 |             nn.BatchNorm2d(c),
 18 |         )
 19 |         self.skip = nn.Sequential(
 20 |             nn.Conv2d(a, c, 1, stride=s, bias=False),
 21 |             nn.BatchNorm2d(c),
 22 |         )
 23 |         self.relu = nn.ReLU(inplace=True)
 24 |         nn.init.constant_(self.layer[-1].weight, 0)
 25 |         nn.init.constant_(self.layer[-1].bias, 0)
 26 | 
 27 |     def forward(self, x):
 28 |         return self.relu(self.layer(x) + self.skip(x))
 29 | 
 30 | class SimpleConv3x3Block(nn.Module):
 31 |     def __init__(self, a, b, c, s):
 32 |         super(SimpleConv3x3Block, self).__init__()
 33 |         self.layer = nn.Sequential(
 34 |             nn.Conv2d(a, c, 3, padding=1, stride=s, bias=False),
 35 |             nn.BatchNorm2d(c),
 36 |             nn.ReLU(inplace=True),
 37 |             nn.Conv2d(c, c, 3, padding=1, bias=False),
 38 |             nn.BatchNorm2d(c),
 39 |             nn.ReLU(inplace=True),
 40 |         )
 41 | 
 42 |     def forward(self, x):
 43 |         return self.layer(x)
 44 | 
 45 | def SimpleConv3x3MaxBlock(a, b, c, s):
 46 |     return nn.Sequential(
 47 |         nn.Conv2d(a, c, 3, padding=1, bias=False),
 48 |         nn.BatchNorm2d(c),
 49 |         nn.ReLU(inplace=True),
 50 |         nn.Conv2d(c, c, 3, padding=1, bias=False),
 51 |         nn.BatchNorm2d(c),
 52 |         nn.ReLU(inplace=True),
 53 |         nn.MaxPool2d(s, stride=s),
 54 |     )
 55 | 
 56 | def SimpleConv3x3lBlock(a, b, c, s):
 57 |     return nn.Sequential(
 58 |         nn.Conv2d(a, c, 3, padding=1, bias=False),
 59 |         nn.BatchNorm2d(c),
 60 |         nn.ReLU(inplace=True),
 61 |         nn.Conv2d(c, c, 3, padding=1, stride=s, bias=False),
 62 |         nn.BatchNorm2d(c),
 63 |         nn.ReLU(inplace=True),
 64 |     )
 65 | 
 66 | 
 67 | class SimpleEncoder(nn.Module):
 68 |     def __init__(self, input_extra=0, input_height=512, block='res', expand=1):
 69 |         super(SimpleEncoder, self).__init__()
 70 |         self.conv_pre = nn.Sequential(
 71 |             nn.Conv2d(3+input_extra, 16*expand, kernel_size=3, padding=1, bias=False),
 72 |             nn.BatchNorm2d(16*expand),
 73 |             nn.ReLU(inplace=True),
 74 |         )
 75 | 
 76 |         if block == 'res':
 77 |             Block = SimpleResBlock
 78 |         elif block == 'conv3x3':
 79 |             Block = SimpleConv3x3Block
 80 |         elif block == 'conv3x3l':
 81 |             Block = SimpleConv3x3lBlock
 82 |         elif block == 'conv3x3max':
 83 |             Block = SimpleConv3x3MaxBlock
 84 |         else:
 85 |             raise NotImplementedError
 86 |         self.block0 = Block(16*expand, 16*expand, 32*expand, 2)
 87 |         self.block1 = Block(32*expand, 32*expand, 64*expand, 2)
 88 |         self.block2 = Block(64*expand, 64*expand, 128*expand, 2)
 89 |         self.block3 = Block(128*expand, 128*expand, 256*expand, 2)
 90 |         self.block4 = Block(256*expand, 256*expand, 256*expand, 2)
 91 | 
 92 |         self.out_channels = [64*expand, 128*expand, 256*expand, 256*expand]
 93 |         self.feat_heights = [input_height//4//(2**i) for i in range(4)]
 94 | 
 95 |     def forward(self, x):
 96 |         features = []
 97 |         x = self.conv_pre(x)
 98 |         x = self.block0(x)
 99 |         x = self.block1(x);  features.append(x)  # 1/4
100 |         x = self.block2(x);  features.append(x)  # 1/8
101 |         x = self.block3(x);  features.append(x)  # 1/16
102 |         x = self.block4(x);  features.append(x)  # 1/32
103 |         return features
104 | 


--------------------------------------------------------------------------------
/lib/model/hohonet.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | 
  3 | import torch
  4 | import torch.nn as nn
  5 | import torch.nn.functional as F
  6 | 
  7 | from . import backbone
  8 | from . import horizon_compression
  9 | from . import horizon_refinement
 10 | from . import horizon_upsample
 11 | from . import modality
 12 | from .utils import wrap_lr_pad
 13 | 
 14 | 
 15 | '''
 16 | HoHoNet
 17 | '''
 18 | class HoHoNet(nn.Module):
 19 |     def __init__(self, emb_dim=256, input_hw=None, input_norm='imagenet', pretrain='',
 20 |                  backbone_config={'module': 'Resnet'},
 21 |                  decode_config={'module': 'EfficientHeightReduction'},
 22 |                  refine_config={'module': 'TransEn'},
 23 |                  upsample_config={'module': 'Upsample1D'},
 24 |                  modalities_config={}):
 25 |         super(HoHoNet, self).__init__()
 26 |         self.input_hw = input_hw
 27 |         if input_norm == 'imagenet':
 28 |             self.register_buffer('x_mean', torch.FloatTensor(np.array([0.485, 0.456, 0.406])[None, :, None, None]))
 29 |             self.register_buffer('x_std', torch.FloatTensor(np.array([0.229, 0.224, 0.225])[None, :, None, None]))
 30 |         elif input_norm == 'ugscnn':
 31 |             self.register_buffer('x_mean', torch.FloatTensor(np.array([0.4974898, 0.47918808, 0.42809588, 1.0961773])[None, :, None, None]))
 32 |             self.register_buffer('x_std', torch.FloatTensor(np.array([0.23762763, 0.23354423, 0.23272438, 0.75536704])[None, :, None, None]))
 33 |         else:
 34 |             raise NotImplementedError
 35 | 
 36 |         # Encoder
 37 |         Encoder = getattr(backbone, backbone_config['module'])
 38 |         Encoder_kwargs = backbone_config.get('kwargs', {})
 39 |         self.encoder = Encoder(**Encoder_kwargs)
 40 | 
 41 |         # Horizon compression convert backbone features to horizontal feature
 42 |         # I name the variable as decoder during development and forgot to fix :P
 43 |         Decoder = getattr(horizon_compression, decode_config['module'])
 44 |         Decoder_kwargs = decode_config.get('kwargs', {})
 45 |         self.decoder = Decoder(self.encoder.out_channels, self.encoder.feat_heights, **Decoder_kwargs)
 46 | 
 47 |         # Horizontal feature refinement module
 48 |         Refinement = getattr(horizon_refinement, refine_config['module'])
 49 |         Refinement_kwargs = refine_config.get('kwargs', {})
 50 |         self.horizon_refine = Refinement(self.decoder.out_channels, **Refinement_kwargs)
 51 | 
 52 |         # Channel reduction to the shared latent
 53 |         Upsampler = getattr(horizon_upsample, upsample_config['module'])
 54 |         Upsampler_kwargs = upsample_config.get('kwargs', {})
 55 |         self.emb_shared_latent = Upsampler(self.horizon_refine.out_channels, emb_dim)
 56 | 
 57 |         # Instantiate desired modalities
 58 |         self.modalities = nn.ModuleList([
 59 |             getattr(modality, key)(emb_dim, **config)
 60 |             for key, config in modalities_config.items()
 61 |         ])
 62 | 
 63 |         # Patch for all conv1d/2d layer's left-right padding
 64 |         wrap_lr_pad(self)
 65 | 
 66 |         # Load pretrained
 67 |         if pretrain:
 68 |             print(f'Load pretrained {pretrain}')
 69 |             st = torch.load(pretrain)
 70 |             missing_key = self.state_dict().keys() - st.keys()
 71 |             unknown_key = st.keys() - self.state_dict().keys()
 72 |             print('Missing key:', missing_key)
 73 |             print('Unknown key:', unknown_key)
 74 |             self.load_state_dict(st, strict=False)
 75 | 
 76 |     def extract_feat(self, x):
 77 |         ''' Map the input RGB to the shared latent (by all modalities) '''
 78 | 
 79 |         if self.input_hw:
 80 |             x = F.interpolate(x, size=self.input_hw, mode='bilinear', align_corners=False)
 81 |         x = (x - self.x_mean) / self.x_std
 82 |         # encoder
 83 |         conv_list = self.encoder(x)
 84 |         # decoder to get horizontal feature
 85 |         feat = self.decoder(conv_list)
 86 |         # refine feat
 87 |         feat = self.horizon_refine(feat)
 88 |         # embed the shared latent
 89 |         feat = self.emb_shared_latent(feat)
 90 |         return feat
 91 | 
 92 |     def call_modality(self, method, *feed_args, **feed_kwargs):
 93 |         ''' Calling the method implemented in each modality and merge the results '''
 94 |         output_dict = {}
 95 |         for m in self.modalities:
 96 |             curr_dict = getattr(m, method)(*feed_args, **feed_kwargs)
 97 |             assert len(output_dict.keys() & curr_dict.keys()) == 0, 'Key collision for different modalities'
 98 |             output_dict.update(curr_dict)
 99 |         return output_dict
100 | 
101 |     def forward(self, x):
102 |         feat = self.extract_feat(x)
103 |         return self.call_modality('forward', feat)
104 | 
105 |     def infer(self, x):
106 |         feat = self.extract_feat(x)
107 |         return self.call_modality('infer', feat)
108 | 
109 |     def compute_losses(self, batch):
110 |         feat = self.extract_feat(batch['x'])
111 |         losses = self.call_modality('compute_losses', feat, batch=batch)
112 |         losses['total'] = sum(v for k, v in losses.items() if k.startswith('total'))
113 |         return losses
114 | 
115 | 


--------------------------------------------------------------------------------
/lib/model/horizon_compression/__init__.py:
--------------------------------------------------------------------------------
1 | from .hc import GlobalHeightStage
2 | from .ehc import EfficientHeightReduction
3 | from .simple import SimpleReduction
4 | 


--------------------------------------------------------------------------------
/lib/model/horizon_compression/ehc.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import torch.nn.functional as F
 4 | 
 5 | from ..utils import pano_upsample_w, PanoUpsampleW
 6 | 
 7 | 
 8 | '''
 9 | EHC
10 | '''
11 | class EfficientHeightReduction(nn.Module):
12 |     def __init__(self, cs, heights, out_ch=1024, fuse_ks=1):
13 |         ''' Process 4 blocks from encoder to single multiscale features '''
14 |         super(EfficientHeightReduction, self).__init__()
15 |         c1, c2, c3, c4 = cs
16 |         h1, h2, h3, h4 = heights
17 | 
18 |         def EfficientConvCompressH(in_c, out_c, scale, down_h):
19 |             return nn.Sequential(
20 |                 nn.Conv2d(in_c, out_c, 3, padding=1, bias=False),
21 |                 nn.BatchNorm2d(out_c),
22 |                 nn.ReLU(inplace=True),
23 |                 PanoUpsampleW(scale),
24 |                 nn.Conv2d(out_c, out_c, 3, padding=1, bias=False),
25 |                 nn.BatchNorm2d(out_c),
26 |                 nn.ReLU(inplace=True),
27 |                 nn.Conv2d(out_c, out_c, (down_h, 1), groups=out_c, bias=False),
28 |             )
29 | 
30 |         self.ghc_lst = nn.ModuleList([
31 |             EfficientConvCompressH(c1, c1//4, scale=1, down_h=h1),
32 |             EfficientConvCompressH(c2, c2//4, scale=2, down_h=h2),
33 |             EfficientConvCompressH(c3, c3//4, scale=4, down_h=h3),
34 |             EfficientConvCompressH(c4, c4//4, scale=8, down_h=h4),
35 |         ])
36 |         self.fuse = nn.Sequential(
37 |             nn.Conv2d((c1+c2+c3+c4)//4, out_ch, fuse_ks, padding=fuse_ks//2, bias=False),
38 |             nn.BatchNorm2d(out_ch),
39 |             nn.ReLU(inplace=True),
40 |         )
41 |         self.out_channels = out_ch
42 | 
43 |     def forward(self, conv_list):
44 |         assert len(conv_list) == 4
45 |         feature = torch.cat([
46 |             f(x) for f, x in zip(self.ghc_lst, conv_list)
47 |         ], dim=1)
48 |         feature = self.fuse(feature).squeeze(2)
49 |         return {'1D': feature, 'conv_list': conv_list}
50 | 


--------------------------------------------------------------------------------
/lib/model/horizon_compression/hc.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import torch.nn.functional as F
 4 | 
 5 | from ..utils import pano_upsample_w, PanoUpsampleW
 6 | 
 7 | 
 8 | '''
 9 | Original HC
10 | '''
11 | class GlobalHeightConv(nn.Module):
12 |     def __init__(self, in_c, out_c):
13 |         super(GlobalHeightConv, self).__init__()
14 | 
15 |         def ConvCompressH(in_c, out_c, ks=3):
16 |             return nn.Sequential(
17 |                 nn.Conv2d(in_c, out_c, kernel_size=ks, stride=(2, 1), padding=ks//2),
18 |                 nn.BatchNorm2d(out_c),
19 |                 nn.ReLU(inplace=True),
20 |             )
21 | 
22 |         self.layer = nn.Sequential(
23 |             ConvCompressH(in_c, in_c//2),
24 |             ConvCompressH(in_c//2, in_c//2),
25 |             ConvCompressH(in_c//2, in_c//4),
26 |             ConvCompressH(in_c//4, out_c),
27 |         )
28 | 
29 |     def forward(self, x, out_w):
30 |         x = self.layer(x)
31 |         assert out_w % x.shape[3] == 0
32 |         return pano_upsample_w(x, out_w//x.shape[-1])
33 | 
34 | 
35 | class GlobalHeightStage(nn.Module):
36 |     def __init__(self, cs, heights, down_h=8):
37 |         ''' Process 4 blocks from encoder to single multiscale features '''
38 |         super(GlobalHeightStage, self).__init__()
39 |         c1, c2, c3, c4 = cs
40 |         h1, h2, h3, h4 = heights
41 |         self.ghc_lst = nn.ModuleList([
42 |             GlobalHeightConv(c1, c1//down_h),
43 |             GlobalHeightConv(c2, c2//down_h),
44 |             GlobalHeightConv(c3, c3//down_h),
45 |             GlobalHeightConv(c4, c4//down_h),
46 |         ])
47 |         self.out_channels = (c1*h1 + c2*h2 + c3*h3 + c4*h4) // 16 // down_h
48 | 
49 |     def forward(self, conv_list):
50 |         assert len(conv_list) == 4
51 |         bs, _, _, out_w = conv_list[0].shape
52 |         feature = torch.cat([
53 |             f(x, out_w).reshape(bs, -1, out_w)
54 |             for f, x in zip(self.ghc_lst, conv_list)
55 |         ], dim=1)
56 |         return {'1D': feature}
57 | 


--------------------------------------------------------------------------------
/lib/model/horizon_compression/simple.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import torch.nn.functional as F
 4 | 
 5 | from ..utils import pano_upsample_w, PanoUpsampleW
 6 | 
 7 | 
 8 | '''
 9 | Simple decoder (for s2d3d sem small input size)
10 | '''
11 | class SimpleReduction(nn.Module):
12 |     def __init__(self, cs, heights, out_ch=64):
13 |         ''' Process 4 blocks from encoder to single multiscale features '''
14 |         super(SimpleReduction, self).__init__()
15 |         c1, c2, c3, c4 = cs
16 |         h1, h2, h3, h4 = heights
17 | 
18 |         def EfficientConvCompressH(in_c, out_c, scale, down_h):
19 |             return nn.Sequential(
20 |                 PanoUpsampleW(scale),
21 |                 nn.Conv2d(in_c, out_c, (down_h, 1), bias=False),
22 |                 nn.BatchNorm2d(out_c),
23 |                 nn.ReLU(inplace=True),
24 |             )
25 | 
26 |         self.ghc_lst = nn.ModuleList([
27 |             EfficientConvCompressH(c1, c1//4, scale=1, down_h=h1),
28 |             EfficientConvCompressH(c2, c2//4, scale=2, down_h=h2),
29 |             EfficientConvCompressH(c3, c3//4, scale=4, down_h=h3),
30 |             EfficientConvCompressH(c4, c4//4, scale=8, down_h=h4),
31 |         ])
32 |         self.fuse = nn.Sequential(
33 |             nn.Conv2d((c1+c2+c3+c4)//4, out_ch, (1, 9), padding=(0, 4), bias=False),
34 |             nn.BatchNorm2d(out_ch),
35 |             nn.ReLU(inplace=True),
36 |         )
37 |         self.out_channels = out_ch
38 | 
39 |     def forward(self, conv_list):
40 |         assert len(conv_list) == 4
41 |         feature = torch.cat([
42 |             f(x) for f, x in zip(self.ghc_lst, conv_list)
43 |         ], dim=1)
44 |         feature = self.fuse(feature).squeeze(2)
45 |         return {'1D': feature}
46 | 


--------------------------------------------------------------------------------
/lib/model/horizon_refinement/__init__.py:
--------------------------------------------------------------------------------
1 | from .identity import Identity
2 | from .linear import Linear
3 | from .rnn import LSTM, GRU
4 | from .attention import TransEn
5 | 


--------------------------------------------------------------------------------
/lib/model/horizon_refinement/attention.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import torch.nn.functional as F
 4 | 
 5 | import copy
 6 | 
 7 | 
 8 | ''' Transformer encoder '''
 9 | class TransformerEncoder(nn.Module):
10 |     ''' Adapt from https://github.com/pytorch/pytorch/blob/master/torch/nn/modules/transformer.py '''
11 |     def __init__(self, encoder_layer, num_layers):
12 |         super(TransformerEncoder, self).__init__()
13 |         self.layers = nn.ModuleList([copy.deepcopy(encoder_layer) for i in range(num_layers)])
14 |         self.num_layers = num_layers
15 | 
16 |     def forward(self, x):
17 |         for mod in self.layers:
18 |             x = mod(x)
19 |         return x
20 | 
21 | class TransformerEncoderLayer(nn.Module):
22 |     ''' Adapt from https://github.com/pytorch/pytorch/blob/master/torch/nn/modules/transformer.py '''
23 |     def __init__(self, d_model, nhead, dim_feedforward=2048, dropout=0.1, mode='pre'):
24 |         super(TransformerEncoderLayer, self).__init__()
25 |         self.mode = mode
26 |         self.self_attn = nn.MultiheadAttention(d_model, nhead, dropout=dropout)
27 |         # Implementation of Feedforward model
28 |         self.linear1 = nn.Linear(d_model, dim_feedforward)
29 |         self.dropout = nn.Dropout(dropout)
30 |         self.linear2 = nn.Linear(dim_feedforward, d_model)
31 | 
32 |         self.norm1 = nn.LayerNorm(d_model)
33 |         self.norm2 = nn.LayerNorm(d_model)
34 |         self.dropout1 = nn.Dropout(dropout)
35 |         self.dropout2 = nn.Dropout(dropout)
36 | 
37 |         self.activation = nn.ReLU(inplace=True)
38 | 
39 |     def forward(self, x):
40 |         if self.mode == 'post':
41 |             x2 = self.self_attn(x, x, x)[0]
42 |             x = x + self.dropout1(x2)
43 |             x = self.norm1(x)
44 |             x2 = self.linear2(self.dropout(self.activation(self.linear1(x))))
45 |             x = x + self.dropout2(x2)
46 |             x = self.norm2(x)
47 |             return x
48 |         elif self.mode == 'pre':
49 |             x2 = self.norm1(x)
50 |             x2 = self.self_attn(x2, x2, x2)[0]
51 |             x = x + self.dropout1(x2)
52 |             x2 = self.norm2(x)
53 |             x2 = self.linear2(self.dropout(self.activation(self.linear1(x2))))
54 |             x = x + self.dropout2(x2)
55 |             return x
56 |         raise NotImplementedError
57 | 
58 | class TransEn(nn.Module):
59 |     def __init__(self, c_mid, position_encode, nhead=8, num_layers=2, dim_feedforward=2048, mode='pre'):
60 |         super(TransEn, self).__init__()
61 |         if isinstance(c_mid, (tuple, list)):
62 |             c_mid = c_mid[0]
63 |         encoder_layer = TransformerEncoderLayer(c_mid, nhead, dim_feedforward, mode=mode)
64 |         self.transen = TransformerEncoder(encoder_layer, num_layers)
65 | 
66 |         import math
67 |         max_len, d_model = position_encode, c_mid
68 |         pe = torch.zeros(max_len, d_model)
69 |         position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
70 |         div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-math.log(10000.0) / d_model))
71 |         pe[:, 0::2] = torch.sin(position * div_term)
72 |         pe[:, 1::2] = torch.cos(position * div_term)
73 |         self.register_buffer('pos', pe.T[None].contiguous())
74 | 
75 |         self.out_channels = c_mid
76 | 
77 |     def forward(self, feat):
78 |         feat1d = feat['1D']
79 |         feat1d = (feat1d + self.pos).permute(2,0,1)
80 |         feat1d = self.transen(feat1d).permute(1,2,0)
81 |         feat['1D'] = feat1d
82 |         return feat
83 | 


--------------------------------------------------------------------------------
/lib/model/horizon_refinement/identity.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import torch.nn.functional as F
 4 | 
 5 | 
 6 | class Identity(nn.Module):
 7 |     def __init__(self, c_mid, *args, **kwargs):
 8 |         super(Identity, self).__init__()
 9 |         self.out_channels = c_mid
10 | 
11 |     def forward(self, x):
12 |         return x
13 | 


--------------------------------------------------------------------------------
/lib/model/horizon_refinement/linear.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import torch.nn.functional as F
 4 | 
 5 | 
 6 | def conv1dbnrelu(in_channels, out_channels, **kwargs):
 7 |     return nn.Sequential(
 8 |         nn.Conv1d(in_channels, out_channels, **kwargs),
 9 |         nn.BatchNorm1d(out_channels),
10 |         nn.ReLU(inplace=True),
11 |     )
12 | 
13 | class Linear(nn.Module):
14 |     def __init__(self, c_mid, base_ch=256):
15 |         super(Linear, self).__init__()
16 |         self.conv_1x1 = conv1dbnrelu(c_mid, base_ch*4, kernel_size=1, bias=False)
17 |         self.out_channels = base_ch*4
18 | 
19 |     def forward(self, feat):
20 |         feat = feat['1D']
21 |         feat = self.conv_1x1(feat)
22 |         return {'1D': feat}
23 | 


--------------------------------------------------------------------------------
/lib/model/horizon_refinement/rnn.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import torch.nn.functional as F
 4 | 
 5 | 
 6 | ''' RNN '''
 7 | class LSTM(nn.Module):
 8 |     def __init__(self, c_mid, base_ch=256, num_layers=2, bidirectional=True):
 9 |         super(LSTM, self).__init__()
10 |         self.rnn = nn.LSTM(
11 |             c_mid, hidden_size=base_ch,
12 |             num_layers=num_layers, bidirectional=bidirectional)
13 |         self.out_channels = base_ch * (1+int(bidirectional))
14 | 
15 |     def forward(self, feat):
16 |         feat = self.rnn(feat.permute(2,0,1))[0].permute(1,2,0).contiguous()
17 |         return {'1D': feat}
18 | 
19 | class GRU(nn.Module):
20 |     def __init__(self, c_mid, base_ch=256, num_layers=2, bidirectional=True):
21 |         super(GRU, self).__init__()
22 |         self.rnn = nn.GRU(
23 |             c_mid, hidden_size=base_ch,
24 |             num_layers=num_layers, bidirectional=bidirectional)
25 |         self.out_channels = base_ch * (1+int(bidirectional))
26 | 
27 |     def forward(self, feat):
28 |         feat = feat['1D']
29 |         feat = self.rnn(feat.permute(2,0,1))[0].permute(1,2,0).contiguous()
30 |         return {'1D': feat}
31 | 


--------------------------------------------------------------------------------
/lib/model/horizon_upsample/__init__.py:
--------------------------------------------------------------------------------
1 | from torch.nn import Identity
2 | from .upsample1d import Upsample1D
3 | 


--------------------------------------------------------------------------------
/lib/model/horizon_upsample/upsample1d.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import torch.nn.functional as F
 4 | 
 5 | from ..utils import PanoUpsampleW
 6 | 
 7 | 
 8 | class Upsample1D(nn.Sequential):
 9 |     def __init__(self, ic, oc):
10 |         super(Upsample1D, self).__init__(
11 |             PanoUpsampleW(4),
12 |             nn.Conv1d(ic, oc, 3, padding=1, bias=False),
13 |             nn.BatchNorm1d(oc),
14 |             nn.ReLU(inplace=True),
15 |         )
16 | 
17 |     def forward(self, feat):
18 |         feat1d = feat['1D']
19 |         for module in self:
20 |             feat1d = module(feat1d)
21 |         feat['1D'] = feat1d
22 |         return feat
23 | 


--------------------------------------------------------------------------------
/lib/model/modality/__init__.py:
--------------------------------------------------------------------------------
1 | from .depth import DepthEstimator
2 | from .semantic import SemanticSegmenter
3 | from .layout import LayoutEstimator
4 | 


--------------------------------------------------------------------------------
/lib/model/modality/bases.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import numpy as np
 3 | 
 4 | 
 5 | def dct(n_components, output_height):
 6 |     basis = (torch.arange(output_height)[None].float() + 0.5) / output_height * np.pi
 7 |     basis = torch.arange(0, n_components)[:,None].float() * basis
 8 |     basis = torch.cos(basis)
 9 |     return basis
10 | 
11 | 
12 | def linear(*args, **kwargs):
13 |     return None
14 | 


--------------------------------------------------------------------------------
/lib/model/modality/depth.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | 
  3 | import torch
  4 | import torch.nn as nn
  5 | import torch.nn.functional as F
  6 | 
  7 | from . import bases
  8 | from ..utils import PanoUpsampleW
  9 | 
 10 | 
 11 | ''' Dense (per-pixel) depth estimation '''
 12 | class DepthBase(nn.Module):
 13 |     def __init__(self):
 14 |         super(DepthBase, self).__init__()
 15 | 
 16 |     def infer(self, x_emb):
 17 |         depth = self(x_emb)['depth']
 18 |         return {'depth': depth}
 19 | 
 20 |     def compute_losses(self, x_emb, batch):
 21 |         gt = batch['depth']
 22 |         mask = (gt > 0)
 23 | 
 24 |         # Forward
 25 |         pred_dict = self(x_emb)
 26 |         pred = pred_dict['depth']
 27 | 
 28 |         # Compute losses
 29 |         losses = {}
 30 |         l1 = (pred[mask] - gt[mask]).abs()
 31 |         l2 = (pred[mask] - gt[mask]).pow(2)
 32 |         losses['mae'] = l1.mean()
 33 |         losses['rmse'] = l2.mean().sqrt()
 34 |         losses['delta1'] = (torch.max(pred[mask]/gt[mask], gt[mask]/pred[mask]) < 1.25).float().mean()
 35 | 
 36 |         losses['total.depth'] = loss_for_backward(pred_dict['depth1d'], gt, mask, self.loss)
 37 |         if 'residual' in pred_dict:
 38 |             with torch.no_grad():
 39 |                 gt_residual = gt - pred_dict['depth1d'].detach()
 40 |             losses['total.residual'] = loss_for_backward(pred_dict['residual'], gt_residual, mask, 'l1')
 41 |         return losses
 42 | 
 43 | 
 44 | def loss_for_backward(pred, gt, mask, loss):
 45 |     if loss == 'l1':
 46 |         return F.l1_loss(pred[mask], gt[mask])
 47 |     elif loss == 'l2':
 48 |         return F.mse_loss(pred[mask], gt[mask])
 49 |     elif loss == 'huber':
 50 |         return F.smooth_l1_loss(pred[mask], gt[mask])
 51 |     elif loss == 'berhu':
 52 |         l1 = (pred[mask] - gt[mask]).abs().mean()
 53 |         l2 = (pred[mask] - gt[mask]).pow(2).mean()
 54 |         with torch.no_grad():
 55 |             c = max(l1.detach().max() * 0.2, 0.01)
 56 |         l2c = (l2 + c**2) / (2 * c)
 57 |         return torch.where(l1<=c, l1, l2c).mean()
 58 |     else:
 59 |         raise NotImplementedError
 60 | 
 61 | 
 62 | class DepthEstimator(DepthBase):
 63 |     def __init__(self, emb_dim, basis='dct', loss='l1', n_components=64,
 64 |                  init_weight=0.1, init_bias=2.5, output_height=512,
 65 |                  resisual=False, basis_tuning=False):
 66 |         super(DepthEstimator, self).__init__()
 67 |         self.loss = loss
 68 | 
 69 |         self.output_height = output_height
 70 |         basis = getattr(bases, basis)(n_components, output_height)
 71 |         if basis_tuning:
 72 |             self.basis = nn.Parameter(basis)
 73 |         else:
 74 |             self.register_buffer('basis', basis)
 75 | 
 76 |         self.estimator = nn.Sequential(
 77 |             nn.Conv1d(emb_dim, emb_dim, 1),
 78 |             nn.BatchNorm1d(emb_dim),
 79 |             nn.ReLU(inplace=True),
 80 |             nn.Conv1d(emb_dim, n_components, 1, bias=False),
 81 |         )
 82 |         self.bias = nn.Parameter(torch.full([1], init_bias))
 83 |         nn.init.normal_(self.estimator[-1].weight, std=init_weight/np.sqrt(emb_dim/2))
 84 | 
 85 |         self.residual = None
 86 |         if resisual:
 87 |             self.residual = nn.Sequential(
 88 |                 nn.Conv2d(256, 64, 3, padding=1, bias=False),
 89 |                 nn.BatchNorm2d(64),
 90 |                 nn.ReLU(inplace=True),
 91 |                 nn.Conv2d(64, 1, 1, bias=False),
 92 |                 PanoUpsampleW(4),
 93 |                 nn.UpsamplingBilinear2d(scale_factor=(4,1)),
 94 |             )
 95 | 
 96 |     def forward(self, x_emb):
 97 |         ws = self.estimator(x_emb['1D'])
 98 |         if self.basis is None:
 99 |             h, w = self.output_height, ws.shape[-1]
100 |             depth = self.bias + F.interpolate(ws.unsqueeze(1), size=(h,w), mode='bilinear', align_corners=False)
101 |         else:
102 |             depth = self.bias + torch.einsum('bkw,kh->bhw', ws, self.basis).unsqueeze(1)
103 |         ret_dict = {'depth': depth, 'depth1d': depth}
104 |         if self.residual is not None:
105 |             residual = 0.1 * self.residual(x_emb['conv_list'][0].detach())
106 |             ret_dict['residual'] = residual
107 |             ret_dict['depth'] = depth + residual
108 |         return ret_dict
109 | 


--------------------------------------------------------------------------------
/lib/model/modality/layout.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | 
  3 | import torch
  4 | import torch.nn as nn
  5 | import torch.nn.functional as F
  6 | 
  7 | from . import bases
  8 | 
  9 | from lib.misc import panostretch, post_proc
 10 | from ..utils import peaks_finding
 11 | from scipy.ndimage.filters import maximum_filter
 12 | from shapely.geometry import Polygon
 13 | 
 14 | 
 15 | ''' Layout (per-column) estimation '''
 16 | class LayoutEstimator(nn.Module):
 17 |     def __init__(self, emb_dim, bon_weight=1., cor_weight=1., bon_loss='l1', cor_loss='bce', bon_scale=1.,
 18 |                  init_weight=0.1, dropout=0., oneconv=True, last_ks=1, last_bias=True,
 19 |                  H=512, W=1024, post_force_cuboid=False):
 20 |         super(LayoutEstimator, self).__init__()
 21 |         self.bon_loss = bon_loss
 22 |         self.cor_loss = cor_loss
 23 |         self.bon_scale = bon_scale
 24 |         self.bon_weight = bon_weight
 25 |         self.cor_weight = cor_weight
 26 |         self.H = H
 27 |         self.W = W
 28 |         self.post_force_cuboid = post_force_cuboid
 29 | 
 30 |         if oneconv:
 31 |             self.pred_bon = nn.Conv1d(emb_dim, 2, last_ks, padding=last_ks//2, bias=last_bias)
 32 |             self.pred_cor = nn.Conv1d(emb_dim, 1, last_ks, padding=last_ks//2, bias=last_bias)
 33 |             if last_bias:
 34 |                 nn.init.constant_(self.pred_bon.bias[0], -0.478)
 35 |                 nn.init.constant_(self.pred_bon.bias[1], 0.425)
 36 |                 nn.init.constant_(self.pred_cor.bias, -1.)
 37 |         else:
 38 |             self.pred_bon = nn.Sequential(
 39 |                 nn.Conv1d(emb_dim, emb_dim, 3, padding=1, bias=False),
 40 |                 nn.BatchNorm1d(emb_dim),
 41 |                 nn.ReLU(inplace=True),
 42 |                 nn.Conv1d(emb_dim, 2, 1),
 43 |             )
 44 |             self.pred_cor = nn.Sequential(
 45 |                 nn.Conv1d(emb_dim, emb_dim, 3, padding=1, bias=False),
 46 |                 nn.BatchNorm1d(emb_dim),
 47 |                 nn.ReLU(inplace=True),
 48 |                 nn.Conv1d(emb_dim, 1, 1),
 49 |             )
 50 |             nn.init.constant_(self.pred_bon[-1].bias[0], -0.478)
 51 |             nn.init.constant_(self.pred_bon[-1].bias[1], 0.425)
 52 |             nn.init.constant_(self.pred_cor[-1].bias, -1.)
 53 |         self.dropout = None
 54 |         if dropout > 0:
 55 |             self.dropout = nn.Dropout(dropout)
 56 | 
 57 |     def forward(self, x_emb):
 58 |         x_emb = x_emb['1D']
 59 |         if self.dropout is not None:
 60 |             x_emb = self.dropout(x_emb)
 61 |         pred_bon = self.pred_bon(x_emb)
 62 |         pred_cor = self.pred_cor(x_emb)
 63 |         return {'bon': pred_bon, 'cor': pred_cor}
 64 | 
 65 |     def infer(self, x_emb):
 66 |         pred = self(x_emb)
 67 |         pred_bon = pred['bon'] / self.bon_scale
 68 |         pred_cor = pred['cor']
 69 |         H, W = self.H, self.W
 70 | 
 71 |         y_bon_ = (pred_bon[0].cpu().numpy() / np.pi + 0.5) * H - 0.5
 72 |         y_cor_ = pred_cor[0,0].sigmoid().cpu().numpy()
 73 |         # Init floor/ceil plane
 74 |         z0 = 50
 75 |         _, z1 = post_proc.np_refine_by_fix_z(*y_bon_, z0)
 76 | 
 77 |         # Detech wall-wall peaks
 78 |         def find_N_peaks(signal, r, min_v, N):
 79 |             max_v = maximum_filter(signal, size=r, mode='wrap')
 80 |             pk_loc = np.where(max_v == signal)[0]
 81 |             pk_loc = pk_loc[signal[pk_loc] > min_v]
 82 |             if N is not None:
 83 |                 order = np.argsort(-signal[pk_loc])
 84 |                 pk_loc = pk_loc[order[:N]]
 85 |                 pk_loc = pk_loc[np.argsort(pk_loc)]
 86 |             return pk_loc, signal[pk_loc]
 87 |         min_v = 0 if self.post_force_cuboid else 0.05
 88 |         r = int(round(W * 0.05 / 2))
 89 |         N = 4 if self.post_force_cuboid else None
 90 |         xs_ = find_N_peaks(y_cor_, r=r, min_v=min_v, N=N)[0]
 91 | 
 92 |         # Generate wall-walls
 93 |         cor, xy_cor = post_proc.gen_ww(xs_, y_bon_[0], z0, tol=abs(0.16 * z1 / 1.6), force_cuboid=self.post_force_cuboid)
 94 |         if not self.post_force_cuboid:
 95 |             # Check valid (for fear self-intersection)
 96 |             xy2d = np.zeros((len(xy_cor), 2), np.float32)
 97 |             for i in range(len(xy_cor)):
 98 |                 xy2d[i, xy_cor[i]['type']] = xy_cor[i]['val']
 99 |                 xy2d[i, xy_cor[i-1]['type']] = xy_cor[i-1]['val']
100 |             if not Polygon(xy2d).is_valid:
101 |                 import sys
102 |                 print(
103 |                     'Fail to generate valid general layout!! '
104 |                     'Generate cuboid as fallback.',
105 |                     file=sys.stderr)
106 |                 xs_ = find_N_peaks(y_cor_, r=r, min_v=0, N=4)[0]
107 |                 cor, xy_cor = post_proc.gen_ww(xs_, y_bon_[0], z0, tol=abs(0.16 * z1 / 1.6), force_cuboid=True)
108 | 
109 |         # Expand with btn coory
110 |         cor = np.hstack([cor, post_proc.infer_coory(cor[:, 1], z1 - z0, z0)[:, None]])
111 |         # Collect corner position in equirectangular
112 |         cor_id = np.zeros((len(cor)*2, 2), np.float32)
113 |         for j in range(len(cor)):
114 |             cor_id[j*2] = cor[j, 0], cor[j, 1]
115 |             cor_id[j*2 + 1] = cor[j, 0], cor[j, 2]
116 |         return {'cor_id': cor_id, 'y_bon_': y_bon_, 'y_cor_': y_cor_}
117 | 
118 |     def compute_losses(self, x_emb, batch):
119 |         gt_bon = batch['bon'] * self.bon_scale
120 |         gt_vot = batch['vot']
121 |         gt_cor = 0.96 ** gt_vot.abs()
122 | 
123 |         # Forward
124 |         pred = self(x_emb)
125 | 
126 |         # Compute losses
127 |         losses = {}
128 |         if self.bon_loss == 'l1':
129 |             losses['bon'] = F.l1_loss(pred['bon'], gt_bon)
130 |         elif self.bon_loss == 'l2':
131 |             losses['bon'] = F.mse_loss(pred['bon'], gt_bon)
132 |         else:
133 |             raise NotImplementedError
134 | 
135 |         if self.cor_loss == 'bce':
136 |             losses['cor'] = F.binary_cross_entropy_with_logits(pred['cor'], gt_cor)
137 |         elif self.cor_loss == 'prfocal':
138 |             g, p = gt_cor, pred['cor']
139 |             pos_mask = (g >= 1-1e-6)
140 |             B, alpha, beta = len(g), 2, 4
141 |             L_pos = -F.logsigmoid(p) * F.sigmoid(-p).pow(alpha)
142 |             L_neg = -F.logsigmoid(-p) * F.sigmoid(p).pow(alpha) * (1-g).pow(beta)
143 |             L = torch.where(pos_mask, L_pos, L_neg).view(B,-1).sum(-1) / pos_mask.float().view(B,-1).sum(-1)
144 |             losses['cor'] = L.mean()
145 |         else:
146 |             raise NotImplementedError
147 | 
148 |         losses['total.layout'] = self.bon_weight * losses['bon'] + self.cor_weight * losses['cor']
149 |         with torch.no_grad():
150 |             losses['bon.mae'] = F.l1_loss(pred['bon'], gt_bon) / self.bon_scale
151 |             losses['cor.mae'] = F.l1_loss(pred['cor'].sigmoid(), gt_cor)
152 |         return losses
153 | 


--------------------------------------------------------------------------------
/lib/model/modality/semantic.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | 
  3 | import torch
  4 | import torch.nn as nn
  5 | import torch.nn.functional as F
  6 | 
  7 | from . import bases
  8 | 
  9 | 
 10 | ''' Dense (per-pixel) semantic segmentation '''
 11 | class SemanticSegmenter(nn.Module):
 12 |     def __init__(self, emb_dim, num_classes, basis='dct', loss='bce', label_weight='', invalid_ids=[], n_components=64,
 13 |                  last_ks=1, dropout=0, init_weight=0.1, init_bias=None, output_height=512, pre1d=False):
 14 |         super(SemanticSegmenter, self).__init__()
 15 |         self.num_classes = num_classes
 16 |         self.loss = loss
 17 |         self.n_components = n_components
 18 |         self.invalid_ids = invalid_ids
 19 |         if init_bias is None:
 20 |             if self.loss == 'bce':
 21 |                 init_bias = -np.log(num_classes-1)
 22 |             else:
 23 |                 init_bias = 0.0
 24 | 
 25 |         self.output_height = output_height
 26 |         self.register_buffer('basis', getattr(bases, basis)(n_components, output_height))
 27 | 
 28 |         self.estimator = nn.Sequential(
 29 |             nn.Conv1d(emb_dim, emb_dim, last_ks, padding=last_ks//2),
 30 |             nn.BatchNorm1d(emb_dim),
 31 |             nn.ReLU(inplace=True),
 32 |             nn.Conv1d(emb_dim, n_components * num_classes, 1, bias=False),
 33 |         )
 34 |         if dropout > 0:
 35 |             self.estimator = nn.Sequential(*self.estimator[:-1], nn.Dropout(dropout), self.estimator[-1])
 36 |         self.bias = nn.Parameter(torch.full([1, num_classes, 1, 1], init_bias))
 37 |         nn.init.normal_(self.estimator[-1].weight, std=init_weight/np.sqrt(emb_dim/2))
 38 | 
 39 |         self.estimator1d = None
 40 |         if pre1d:
 41 |             self.estimator1d = nn.Sequential(
 42 |                 nn.Conv1d(emb_dim, emb_dim, last_ks, padding=last_ks//2),
 43 |                 nn.BatchNorm1d(emb_dim),
 44 |                 nn.ReLU(inplace=True),
 45 |                 nn.Conv1d(emb_dim, num_classes, 1),
 46 |             )
 47 |             nn.init.constant_(self.estimator1d[-1].bias, -np.log(10-1))
 48 | 
 49 |         if label_weight:
 50 |             self.register_buffer('label_weight', torch.load(label_weight).float())
 51 |         else:
 52 |             self.register_buffer('label_weight', torch.ones(num_classes))
 53 |         self.label_weight[self.invalid_ids] = 0
 54 |         self.label_weight *= (num_classes - len(self.invalid_ids)) / self.label_weight.sum()
 55 | 
 56 |     def forward(self, x_emb):
 57 |         x_emb = x_emb['1D']
 58 |         B, _, W = x_emb.shape
 59 |         ws = self.estimator(x_emb).view(B, self.num_classes, self.n_components, W)
 60 |         if self.basis is None:
 61 |             h, w = self.output_height, ws.shape[-1]
 62 |             sem = self.bias + F.interpolate(ws, size=(h,w), mode='bilinear', align_corners=False)
 63 |         else:
 64 |             sem = self.bias + torch.einsum('bckw,kh->bchw', ws, self.basis)
 65 |         sem[:, self.invalid_ids] = -100
 66 | 
 67 |         if self.estimator1d is not None:
 68 |             sem1d = self.estimator1d(x_emb).view(B, self.num_classes, 1, W)
 69 |             sem1d[:, self.invalid_ids] = -100
 70 |             sem.permute(0,1,3,2)[sem1d.sigmoid().squeeze(2) < 0.1] = float("-Inf")
 71 |             return {'sem': sem, 'sem1d': sem1d}
 72 |         else:
 73 |             return {'sem': sem}
 74 | 
 75 |     def infer(self, x_emb):
 76 |         return self(x_emb)
 77 | 
 78 |     def compute_losses(self, x_emb, batch):
 79 |         gt = batch['sem']
 80 |         mask = (gt >= 0)
 81 |         B, H, W = gt.shape
 82 |         if mask.sum() == 0:
 83 |             return {}
 84 | 
 85 |         # Forward
 86 |         pred = self(x_emb)
 87 |         pred_sem = pred['sem']
 88 | 
 89 |         # Compute losses
 90 |         losses = {}
 91 | 
 92 |         if 'sem1d' in pred:
 93 |             pred_sem1d = pred['sem1d']
 94 |             gt1d = torch.zeros_like(pred_sem1d)
 95 |             brcid = torch.stack(torch.meshgrid(torch.arange(gt.shape[0]), torch.arange(gt.shape[1]), torch.arange(gt.shape[2])), -1)
 96 |             bid, rid, cid = brcid[mask].T
 97 |             gt1d[bid, gt[mask], 0, cid] = 1
 98 |             losses['acc.sem1d.fn'] = ((pred_sem1d.sigmoid() < 0.1) & (gt1d == 1)).float().mean()
 99 |             losses['acc.sem1d.tn'] = ((pred_sem1d.sigmoid() < 0.1) & (gt1d == 0)).float().mean()
100 |             losses['total.sem1d'] = F.binary_cross_entropy_with_logits(pred_sem1d, gt1d)
101 | 
102 |         pred_sem = pred_sem.permute(0,2,3,1)[mask]
103 |         gt = gt[mask]
104 |         if 'sem1d' in pred:
105 |             activate = (pred_sem1d.detach().sigmoid() >= 0.1).float().repeat(1,1,H,1)
106 |             activate = activate.permute(0,2,3,1)[mask]
107 |         else:
108 |             activate = torch.ones_like(pred_sem)
109 |         losses['acc'] = (pred_sem.argmax(1) == gt).float().mean()
110 |         if self.loss == 'bce':
111 |             gt_onehot = torch.zeros_like(pred_sem).scatter_(dim=1, index=gt[:,None], src=torch.ones_like(pred_sem))
112 |             bce = F.binary_cross_entropy_with_logits(pred_sem, gt_onehot, reduction='none')
113 |             bce = (bce * self.label_weight)[activate.bool()]
114 |             losses['total.sem'] = bce.mean()
115 |         elif self.loss == 'ce':
116 |             ce = F.cross_entropy(pred_sem, gt, weight=self.label_weight, reduction='none')
117 |             ce = ce[~torch.isinf(ce) & ~torch.isnan(ce)]
118 |             losses['total.sem'] = ce.mean()
119 |         elif self.loss.startswith('mse'):
120 |             R = float(self.loss[3:])
121 |             gt_R = torch.full_like(pred_sem, -R).scatter_(dim=1, index=gt[:,None], src=torch.full_like(pred_sem, R))
122 |             mse = (pred_sem - gt_R).pow(2)
123 |             losses['total.sem'] = (mse * self.label_weight).mean()
124 |         else:
125 |             raise NotImplementedError
126 |         return losses
127 | 


--------------------------------------------------------------------------------
/lib/model/utils.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.nn.functional as F
  4 | import functools
  5 | 
  6 | import scipy
  7 | import numpy as np
  8 | from scipy.ndimage.filters import maximum_filter
  9 | from sklearn.linear_model import HuberRegressor
 10 | 
 11 | 
 12 | ''' Panorama patch for layers '''
 13 | def lr_pad(x, padding=1):
 14 |     ''' Pad left/right-most to each other instead of zero padding '''
 15 |     return torch.cat([x[..., -padding:], x, x[..., :padding]], dim=-1)
 16 | 
 17 | class LR_PAD(nn.Module):
 18 |     ''' Pad left/right-most to each other instead of zero padding '''
 19 |     def __init__(self, padding=1):
 20 |         super(LR_PAD, self).__init__()
 21 |         self.padding = padding
 22 | 
 23 |     def forward(self, x):
 24 |         return lr_pad(x, self.padding)
 25 | 
 26 | def wrap_lr_pad(net):
 27 |     for name, m in net.named_modules():
 28 |         names = name.split('.')
 29 |         root = functools.reduce(lambda o, i: getattr(o, i), [net] + names[:-1])
 30 |         if isinstance(m, nn.Conv2d):
 31 |             if m.padding[1] == 0:
 32 |                 continue
 33 |             w_pad = int(m.padding[1])
 34 |             m.padding = (m.padding[0], 0)
 35 |             setattr(
 36 |                 root, names[-1],
 37 |                 nn.Sequential(LR_PAD(w_pad), m)
 38 |             )
 39 |         elif isinstance(m, nn.Conv1d):
 40 |             if m.padding == (0, ):
 41 |                 continue
 42 |             w_pad = int(m.padding[0])
 43 |             m.padding = (0,)
 44 |             setattr(
 45 |                 root, names[-1],
 46 |                 nn.Sequential(LR_PAD(w_pad), m)
 47 |             )
 48 | 
 49 | def pano_upsample_w(x, s):
 50 |     if len(x.shape) == 3:
 51 |         mode = 'linear'
 52 |         scale_factor = s
 53 |     elif len(x.shape) == 4:
 54 |         mode = 'bilinear'
 55 |         scale_factor = (1, s)
 56 |     else:
 57 |         raise NotImplementedError
 58 |     x = torch.cat([x[...,-1:], x, x[...,:1]], dim=-1)
 59 |     x = F.interpolate(x, scale_factor=scale_factor, mode=mode, align_corners=False)
 60 |     x = x[...,s:-s]
 61 |     return x
 62 | 
 63 | class PanoUpsampleW(nn.Module):
 64 |     def __init__(self, s):
 65 |         super(PanoUpsampleW, self).__init__()
 66 |         self.s = s
 67 | 
 68 |     def forward(self, x):
 69 |         return pano_upsample_w(x, self.s)
 70 | 
 71 | 
 72 | ''' Testing augmentation helper '''
 73 | def augment(x, flip, rotate, rotate_flip):
 74 |     aug_type = ['']
 75 |     x_augmented = [x]
 76 |     if flip:
 77 |         aug_type.append('flip')
 78 |         x_augmented.append(x.flip(dims=(-1,)))
 79 |     for shift in rotate:
 80 |         aug_type.append('rotate %d' % shift)
 81 |         x_augmented.append(x.roll(shifts=shift, dims=-1))
 82 |         if rotate_flip:
 83 |             aug_type.append('rotate_flip %d' % shift)
 84 |             x_augmented.append(x_augmented[-1].flip(dims=(-1,)))
 85 |     return torch.cat(x_augmented, 0), aug_type
 86 | 
 87 | def augment_undo(pred_augmented, aug_type):
 88 |     pred_augmented = pred_augmented.cpu().numpy()
 89 |     assert len(pred_augmented) == len(aug_type), 'Unable to recover testing aug'
 90 |     pred_final = 0
 91 |     for pred, aug in zip(pred_augmented, aug_type):
 92 |         if aug == 'flip':
 93 |             pred_final += np.flip(pred, axis=-1)
 94 |         elif aug.startswith('rotate'):
 95 |             if 'flip' in aug:
 96 |                 pred = np.flip(pred, axis=-1)
 97 |             shift = int(aug.split()[-1])
 98 |             pred_final += np.roll(pred, -shift, axis=-1)
 99 |         elif aug == '':
100 |             pred_final += pred
101 |         else:
102 |             raise NotImplementedError
103 | 
104 |     return pred_final / len(aug_type)
105 | 
106 | 
107 | ''' Post-processing '''
108 | def peaks_mask_torch(x1d, winsz=7, min_v=0.5):
109 |     pad = winsz // 2
110 |     x1d_max = F.max_pool1d(torch.cat([x1d[...,-pad:], x1d, x1d[...,:pad]], -1), winsz, stride=1)
111 |     return (x1d == x1d_max) & (x1d >= min_v)
112 | 
113 | def peaks_finding_torch(x1d, winsz=7, min_v=0.5):
114 |     ''' x1d: [B, 1, W] '''
115 |     bid, _, cid = torch.where(peaks_mask_torch(x1d, winsz, min_v))
116 |     return bid, cid
117 | 
118 | def peaks_finding(signal, winsz=7, min_v=0.5):
119 |     max_v = maximum_filter(signal, size=winsz, mode='wrap')
120 |     pk_loc = np.where(max_v == signal)[0]
121 |     pk_loc = pk_loc[signal[pk_loc] > min_v]
122 |     return pk_loc
123 | 


--------------------------------------------------------------------------------
/test_depth.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import argparse
  3 | import importlib
  4 | from natsort import natsorted
  5 | from tqdm import tqdm, trange
  6 | from collections import Counter
  7 | 
  8 | import numpy as np
  9 | from imageio import imwrite
 10 | from scipy.spatial.transform import Rotation
 11 | from lib.misc.pano_lsd_align import rotatePanorama, panoEdgeDetection
 12 | 
 13 | import torch
 14 | import torch.nn as nn
 15 | import torch.nn.functional as F
 16 | from torch.utils.data import DataLoader
 17 | 
 18 | from lib.config import config, update_config, infer_exp_id
 19 | from lib import dataset
 20 | 
 21 | 
 22 | def eval_metric(pred, gt, dmax):
 23 |     gt = gt.clamp(0.01, dmax)
 24 |     pred = pred.clamp(0.01, dmax)
 25 |     mre = ((gt - pred).abs() / gt).mean().item()
 26 |     mae = (gt - pred).abs().mean().item()
 27 |     rmse = ((gt - pred)**2).mean().sqrt().item()
 28 |     rmse_log = ((gt.log10() - pred.log10())**2).mean().sqrt().item()
 29 |     log10 = (gt.log10() - pred.log10()).abs().mean().item()
 30 | 
 31 |     delta = torch.max(pred/gt, gt/pred)
 32 |     delta_1 = (delta < 1.25).float().mean().item()
 33 |     delta_2 = (delta < 1.25**2).float().mean().item()
 34 |     delta_3 = (delta < 1.25**3).float().mean().item()
 35 |     return {
 36 |         'mre': mre, 'mae': mae, 'rmse': rmse, 'rmse_log': rmse_log, 'log10': log10,
 37 |         'delta_1': delta_1, 'delta_2': delta_2, 'delta_3': delta_3,
 38 |     }
 39 | 
 40 | 
 41 | if __name__ == '__main__':
 42 | 
 43 |     # Parse args & config
 44 |     parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
 45 |     parser.add_argument('--cfg', required=True)
 46 |     parser.add_argument('--pth')
 47 |     parser.add_argument('--out')
 48 |     parser.add_argument('--vis_dir')
 49 |     parser.add_argument('--clip', default=10, type=float)
 50 |     parser.add_argument('--y', action='store_true')
 51 |     parser.add_argument('--pitch', default=0, type=float)
 52 |     parser.add_argument('--roll', default=0, type=float)
 53 |     parser.add_argument('opts',
 54 |                         help='Modify config options using the command-line',
 55 |                         default=None, nargs=argparse.REMAINDER)
 56 |     args = parser.parse_args()
 57 |     update_config(config, args)
 58 |     device = 'cuda' if config.cuda else 'cpu'
 59 | 
 60 |     if not args.pth:
 61 |         from glob import glob
 62 |         exp_id = infer_exp_id(args.cfg)
 63 |         exp_ckpt_root = os.path.join(config.ckpt_root, exp_id)
 64 |         args.pth = natsorted(glob(os.path.join(exp_ckpt_root, 'ep*pth')))[-1]
 65 |         print(f'No pth given,  inferring the trained pth: {args.pth}')
 66 | 
 67 |     if not args.out:
 68 |         out = [os.path.splitext(args.pth)[0]]
 69 |         if args.pitch > 0:
 70 |             out.append(f'.pitch{args.pitch:.0f}')
 71 |         if args.roll > 0:
 72 |             out.append(f'.roll{args.roll:.0f}')
 73 |         args.out = ''.join(out + ['.npz'])
 74 |         print(f'No out given,  inferring the output path: {args.out}')
 75 |     if os.path.isfile(args.out) and not args.y:
 76 |         print(f'{args.out} is existed:')
 77 |         print(dict(np.load(args.out)))
 78 |         print('Re-write this results ?', end=' ')
 79 |         input()
 80 | 
 81 |     # Init dataset
 82 |     DatasetClass = getattr(dataset, config.dataset.name)
 83 |     config.dataset.valid_kwargs.update(config.dataset.common_kwargs)
 84 |     config.dataset.valid_kwargs['fix_pitch'] = args.pitch
 85 |     config.dataset.valid_kwargs['fix_roll'] = args.roll
 86 |     valid_dataset = DatasetClass(**config.dataset.valid_kwargs)
 87 | 
 88 |     # Init network
 89 |     model_file = importlib.import_module(config.model.file)
 90 |     model_class = getattr(model_file, config.model.modelclass)
 91 |     net = model_class(**config.model.kwargs).to(device)
 92 |     net.load_state_dict(torch.load(args.pth))
 93 |     net.eval()
 94 | 
 95 |     # Run evaluation
 96 |     evaluation_metric = Counter()
 97 |     for batch in tqdm(valid_dataset):
 98 |         # Add batch dim and move to gpu
 99 |         color = batch['x'][None].to(device)
100 |         depth = batch['depth'][None].to(device)
101 |         mask = (depth > 0)
102 | 
103 |         # feed forward
104 |         with torch.no_grad():
105 |             pred_depth = net.infer(color)
106 |         if not torch.is_tensor(pred_depth):
107 |             viz_dict = pred_depth
108 |             pred_depth = viz_dict.pop('depth')
109 |         pred_depth = pred_depth.clamp(0.01)
110 | 
111 |         if args.pitch:
112 |             vp = Rotation.from_rotvec([-args.pitch * np.pi / 180, 0, 0]).as_matrix()
113 |             pred_depth = pred_depth.squeeze()[...,None].cpu().numpy()
114 |             pred_depth = rotatePanorama(pred_depth, vp, order=0)[...,0]
115 |             pred_depth = torch.from_numpy(pred_depth[None,None]).to(depth.device)
116 |         if args.roll:
117 |             vp = Rotation.from_rotvec([0, -args.roll * np.pi / 180, 0]).as_matrix()
118 |             pred_depth = pred_depth.squeeze()[...,None].cpu().numpy()
119 |             pred_depth = rotatePanorama(pred_depth, vp, order=0)[...,0]
120 |             pred_depth = torch.from_numpy(pred_depth[None,None]).to(depth.device)
121 | 
122 |         if args.vis_dir:
123 |             fname = batch['fname'].strip()
124 |             os.makedirs(args.vis_dir, exist_ok=True)
125 |             rgb = (batch['x'].permute(1,2,0) * 255).cpu().numpy().astype(np.uint8)
126 |             dep = pred_depth.squeeze().mul(512).cpu().numpy().astype(np.uint16)
127 |             dep[~mask.squeeze().cpu().numpy()] = 0
128 |             gtdep = depth.squeeze().mul(512).cpu().numpy().astype(np.uint16)
129 |             imwrite(os.path.join(args.vis_dir, fname + '.rgb' + '.jpg'), rgb)
130 |             imwrite(os.path.join(args.vis_dir, fname + '.rgb' + '.png'), gtdep)
131 |             imwrite(os.path.join(args.vis_dir, fname + '.depth' + '.png'), dep)
132 |             for k, v in viz_dict.items():
133 |                 if v.dtype == np.uint8 or v.dtype == np.uint16:
134 |                     imwrite(os.path.join(args.vis_dir, fname + '.' + k + '.png'), v)
135 |                 else:
136 |                     raise NotImplementedError
137 | 
138 |         evaluation_metric['N'] += 1
139 |         for metric, v in eval_metric(pred_depth[mask], depth[mask], args.clip).items():
140 |             evaluation_metric[metric] += v
141 | 
142 |     N = evaluation_metric.pop('N')
143 |     for metric, v in evaluation_metric.items():
144 |         evaluation_metric[metric] = v / N
145 |     for metric, v in evaluation_metric.items():
146 |         print(f'{metric:20s} {v:.4f}')
147 | 
148 |     np.savez(args.out, **evaluation_metric)
149 | 
150 | 


--------------------------------------------------------------------------------
/test_layout.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import glob
 3 | import json
 4 | import argparse
 5 | import importlib
 6 | import numpy as np
 7 | from PIL import Image
 8 | from tqdm import tqdm
 9 | 
10 | import torch
11 | import torch.nn as nn
12 | import torch.nn.functional as F
13 | 
14 | from lib.config import config, update_config, infer_exp_id
15 | 
16 | 
17 | if __name__ == '__main__':
18 | 
19 |     parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
20 |     parser.add_argument('--cfg', required=True)
21 |     parser.add_argument('--pth', help='path to load saved checkpoint.')
22 |     parser.add_argument('--img_glob', required=True)
23 |     parser.add_argument('--output_dir', required=True)
24 |     # Augmentation related
25 |     parser.add_argument('--flip', action='store_true',
26 |                         help='whether to perfome left-right flip. '
27 |                              '# of input x2.')
28 |     parser.add_argument('--rotate', nargs='*', default=[], type=int,
29 |                         help='whether to perfome horizontal rotate. '
30 |                              'each elements indicate fraction of image width. '
31 |                              '# of input xlen(rotate).')
32 |     # Misc arguments
33 |     parser.add_argument('--no_cuda', action='store_true',
34 |                         help='disable cuda')
35 |     parser.add_argument('opts',
36 |                         help='Modify config options using the command-line',
37 |                         default=None, nargs=argparse.REMAINDER)
38 |     args = parser.parse_args()
39 | 
40 |     # Init setting
41 |     update_config(config, args)
42 |     if not args.pth:
43 |         exp_id = infer_exp_id(args.cfg)
44 |         exp_ckpt_root = os.path.join(config.ckpt_root, exp_id)
45 |         args.pth = sorted(glob.glob(os.path.join(exp_ckpt_root, '*pth')))[-1]
46 |         print(f'--pth is not given.  Auto infer the pth={args.pth}')
47 |     device = torch.device('cpu' if args.no_cuda else 'cuda')
48 | 
49 |     # Prepare image to processed
50 |     paths = sorted(glob.glob(args.img_glob))
51 |     if len(paths) == 0:
52 |         print('no images found')
53 |     for path in paths:
54 |         assert os.path.isfile(path), '%s not found' % path
55 | 
56 |     # Prepare the trained model
57 |     model_file = importlib.import_module(config.model.file)
58 |     model_class = getattr(model_file, config.model.modelclass)
59 |     net = model_class(**config.model.kwargs)
60 |     net.load_state_dict(torch.load(args.pth))
61 |     net = net.to(device).eval()
62 | 
63 |     # Check target directory
64 |     if not os.path.isdir(args.output_dir):
65 |         print('Output directory %s not existed. Create one.' % args.output_dir)
66 |         os.makedirs(args.output_dir)
67 | 
68 |     # Inferencing
69 |     with torch.no_grad():
70 |         for i_path in tqdm(paths, desc='Inferencing'):
71 |             k = os.path.split(i_path)[-1][:-4]
72 | 
73 |             # Load image
74 |             img_pil = Image.open(i_path)
75 |             if img_pil.size != (1024, 512):
76 |                 img_pil = img_pil.resize((1024, 512), Image.BICUBIC)
77 |             img_ori = np.array(img_pil)[..., :3].transpose([2, 0, 1]).copy()
78 |             x = torch.FloatTensor([img_ori / 255]).to(device)
79 | 
80 |             # Inferenceing corners
81 |             net.fname = k
82 |             cor_id = net.infer(x)['cor_id']
83 | 
84 |             # Output result
85 |             with open(os.path.join(args.output_dir, k + '.txt'), 'w') as f:
86 |                 for x, y in cor_id:
87 |                     f.write('%d %d\n' % (x, y))
88 | 
89 | 


--------------------------------------------------------------------------------
/test_sem.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import argparse
  3 | import importlib
  4 | from natsort import natsorted
  5 | from tqdm import tqdm, trange
  6 | from collections import Counter
  7 | 
  8 | import numpy as np
  9 | 
 10 | import torch
 11 | import torch.nn as nn
 12 | import torch.nn.functional as F
 13 | from torch.utils.data import DataLoader
 14 | 
 15 | from lib.config import config, update_config, infer_exp_id
 16 | from lib import dataset
 17 | 
 18 | 
 19 | if __name__ == '__main__':
 20 | 
 21 |     # Parse args & config
 22 |     parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
 23 |     parser.add_argument('--cfg', required=True)
 24 |     parser.add_argument('--pth')
 25 |     parser.add_argument('--out')
 26 |     parser.add_argument('--vis_dir')
 27 |     parser.add_argument('--y', action='store_true')
 28 |     parser.add_argument('--test_hw', type=int, nargs='*')
 29 |     parser.add_argument('opts',
 30 |                         help='Modify config options using the command-line',
 31 |                         default=None, nargs=argparse.REMAINDER)
 32 |     args = parser.parse_args()
 33 |     update_config(config, args)
 34 |     device = 'cuda' if config.cuda else 'cpu'
 35 | 
 36 |     if config.cuda and config.cuda_benchmark:
 37 |         torch.backends.cudnn.benchmark = False
 38 | 
 39 |     # Init global variable
 40 |     if not args.pth:
 41 |         from glob import glob
 42 |         exp_id = infer_exp_id(args.cfg)
 43 |         exp_ckpt_root = os.path.join(config.ckpt_root, exp_id)
 44 |         args.pth = natsorted(glob(os.path.join(exp_ckpt_root, 'ep*pth')))[-1]
 45 |         print(f'No pth given,  inferring the trained pth: {args.pth}')
 46 | 
 47 |     if not args.out:
 48 |         args.out = os.path.splitext(args.pth)[0]
 49 |         print(f'No out given,  inferring the output dir: {args.out}')
 50 |         os.makedirs(args.out, exist_ok=True)
 51 |     if os.path.isfile(os.path.join(args.out, 'cm.npz')) and not args.y:
 52 |         print(f'{os.path.join(args.out, "cm.npz")} is existed:')
 53 |         cm = np.load(os.path.join(args.out, 'cm.npz'))['cm']
 54 |         inter = np.diag(cm)
 55 |         union = cm.sum(0) + cm.sum(1) - inter
 56 |         ious = inter / union
 57 |         accs = inter / cm.sum(1)
 58 |         DatasetClass = getattr(dataset, config.dataset.name)
 59 |         config.dataset.valid_kwargs.update(config.dataset.common_kwargs)
 60 |         valid_dataset = DatasetClass(**config.dataset.valid_kwargs)
 61 |         id2class = np.array(valid_dataset.ID2CLASS)
 62 |         for name, iou, acc in zip(id2class, ious, accs):
 63 |             print(f'{name:20s}:    iou {iou*100:5.2f}    /    acc {acc*100:5.2f}')
 64 |         print(f'{"Overall":20s}:    iou {ious.mean()*100:5.2f}    /    acc {accs.mean()*100:5.2f}')
 65 |         print('Re-write this results ?', end=' ')
 66 |         input()
 67 | 
 68 |     # Init dataset
 69 |     DatasetClass = getattr(dataset, config.dataset.name)
 70 |     config.dataset.valid_kwargs.update(config.dataset.common_kwargs)
 71 |     if args.test_hw:
 72 |         input_hw = config.dataset.common_kwargs['hw']
 73 |         config.dataset.valid_kwargs['hw'] = args.test_hw
 74 |     else:
 75 |         input_hw = None
 76 |     valid_dataset = DatasetClass(**config.dataset.valid_kwargs)
 77 |     valid_loader = DataLoader(valid_dataset, 1,
 78 |                               num_workers=config.num_workers,
 79 |                               pin_memory=config.cuda)
 80 | 
 81 |     # Init network
 82 |     model_file = importlib.import_module(config.model.file)
 83 |     model_class = getattr(model_file, config.model.modelclass)
 84 |     net = model_class(**config.model.kwargs).to(device)
 85 |     net.load_state_dict(torch.load(args.pth))
 86 |     net = net.to(device).eval()
 87 | 
 88 |     # Start eval
 89 |     cm = 0
 90 |     num_classes = config.model.kwargs.modalities_config.SemanticSegmenter.num_classes
 91 |     with torch.no_grad():
 92 |         for batch in tqdm(valid_loader, position=1, total=len(valid_loader)):
 93 |             color = batch['x'].to(device)
 94 |             sem = batch['sem'].to(device)
 95 |             mask = (sem >= 0)
 96 |             if mask.sum() == 0:
 97 |                 continue
 98 | 
 99 |             # feed forward & compute losses
100 |             if input_hw is not None:
101 |                 color = F.interpolate(color, size=input_hw, mode='bilinear', align_corners=False)
102 |             pred_sem = net.infer(color)['sem']
103 |             if input_hw is not None:
104 |                 pred_sem = F.interpolate(pred_sem, size=args.test_hw, mode='bilinear', align_corners=False)
105 | 
106 |             # Visualization
107 |             if args.vis_dir:
108 |                 import matplotlib.pyplot as plt
109 |                 from imageio import imwrite
110 |                 cmap = (plt.get_cmap('gist_rainbow')(np.arange(num_classes) / num_classes)[...,:3] * 255).astype(np.uint8)
111 |                 rgb = (batch['x'][0, :3].permute(1,2,0) * 255).cpu().numpy().astype(np.uint8)
112 |                 vis_sem = cmap[pred_sem[0].argmax(0).cpu().numpy()]
113 |                 vis_sem = (rgb * 0.2 + vis_sem * 0.8).astype(np.uint8)
114 |                 imwrite(os.path.join(args.vis_dir, batch['fname'][0].strip()), vis_sem)
115 |                 vis_sem = cmap[sem[0].cpu().numpy()]
116 |                 vis_sem = (rgb * 0.2 + vis_sem * 0.8).astype(np.uint8)
117 |                 imwrite(os.path.join(args.vis_dir, batch['fname'][0].strip() + '.gt.png'), vis_sem)
118 | 
119 |             # Log
120 |             gt = sem[mask]
121 |             pred = pred_sem.argmax(1)[mask]
122 |             assert gt.min() >= 0 and gt.max() < num_classes and pred_sem.shape[1] == num_classes
123 |             cm += np.bincount((gt * num_classes + pred).cpu().numpy(), minlength=num_classes**2)
124 | 
125 |     # Summarize
126 |     print('  Summarize  '.center(50, '='))
127 |     cm = cm.reshape(num_classes, num_classes)
128 |     id2class = np.array(valid_dataset.ID2CLASS)
129 |     valid_mask = (cm.sum(1) != 0)
130 |     cm = cm[valid_mask][:, valid_mask]
131 |     id2class = id2class[valid_mask]
132 |     inter = np.diag(cm)
133 |     union = cm.sum(0) + cm.sum(1) - inter
134 |     ious = inter / union
135 |     accs = inter / cm.sum(1)
136 |     for name, iou, acc in zip(id2class, ious, accs):
137 |         print(f'{name:20s}:    iou {iou*100:5.2f}    /    acc {acc*100:5.2f}')
138 |     print(f'{"Overall":20s}:    iou {ious.mean()*100:5.2f}    /    acc {accs.mean()*100:5.2f}')
139 |     np.savez(os.path.join(args.out, 'cm.npz'), cm=cm)
140 | 
141 | 


--------------------------------------------------------------------------------
/train.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import argparse
  3 | import importlib
  4 | from tqdm import tqdm, trange
  5 | from collections import Counter
  6 | 
  7 | import numpy as np
  8 | 
  9 | import torch
 10 | import torch.nn as nn
 11 | import torch.nn.functional as F
 12 | from torch.utils.data import DataLoader
 13 | 
 14 | from lib.config import config, update_config, infer_exp_id
 15 | from lib import dataset
 16 | 
 17 | 
 18 | def train_loop(net, loader, optimizer):
 19 |     net.train()
 20 |     if config.training.fix_encoder_bn:
 21 |         apply_fn_based_on_key(net.encoder, ['bn'], lambda m: m.eval())
 22 |     epoch_losses = Counter()
 23 |     for iit, batch in tqdm(enumerate(loader, 1), position=1, total=len(loader)):
 24 |         # Move data to the given computation device
 25 |         for k, v in batch.items():
 26 |             if torch.is_tensor(v):
 27 |                 batch[k] = v.to(device)
 28 | 
 29 |         # feed forward & compute losses
 30 |         losses = net.compute_losses(batch)
 31 |         if len(losses) == 0:
 32 |             continue
 33 | 
 34 |         # backprop
 35 |         optimizer.zero_grad()
 36 |         losses['total'].backward()
 37 |         optimizer.step()
 38 | 
 39 |         # Log
 40 |         BS = len(batch['x'])
 41 |         epoch_losses['N'] += BS
 42 |         for k, v in losses.items():
 43 |             if torch.is_tensor(v):
 44 |                 epoch_losses[k] += BS * v.item()
 45 |             else:
 46 |                 epoch_losses[k] += BS * v
 47 | 
 48 |     # Statistic over the epoch
 49 |     N = epoch_losses.pop('N')
 50 |     for k, v in epoch_losses.items():
 51 |         epoch_losses[k] = v / N
 52 | 
 53 |     return epoch_losses
 54 | 
 55 | 
 56 | def valid_loop(net, loader):
 57 |     net.eval()
 58 |     epoch_losses = Counter()
 59 |     with torch.no_grad():
 60 |         for iit, batch in tqdm(enumerate(loader, 1), position=1, total=len(loader)):
 61 |             for k, v in batch.items():
 62 |                 if torch.is_tensor(v):
 63 |                     batch[k] = v.to(device)
 64 | 
 65 |             # feed forward & compute losses
 66 |             losses = net.compute_losses(batch)
 67 | 
 68 |             # Log
 69 |             for k, v in losses.items():
 70 |                 if torch.is_tensor(v):
 71 |                     epoch_losses[k] += float(v.item()) / len(loader)
 72 |                 else:
 73 |                     epoch_losses[k] += v / len(loader)
 74 | 
 75 |     return epoch_losses
 76 | 
 77 | 
 78 | def apply_fn_based_on_key(net, key_lst, fn):
 79 |     for name, m in net.named_modules():
 80 |         if any(k in name for k in key_lst):
 81 |             fn(m)
 82 | 
 83 | 
 84 | def group_parameters(net, wd_group_mode):
 85 |     wd = []
 86 |     nowd = []
 87 |     for name, p in net.named_parameters():
 88 |         if not p.requires_grad:
 89 |             continue
 90 |         if wd_group_mode == 'bn and bias':
 91 |             if 'bn' in name or 'bias' in name:
 92 |                 nowd.append(p)
 93 |             else:
 94 |                 wd.append(p)
 95 |         elif wd_group_mode == 'encoder decoder':
 96 |             if 'feature_extractor' in name:
 97 |                 nowd.append(p)
 98 |             else:
 99 |                 wd.append(p)
100 |     return [{'params': wd}, {'params': nowd, 'weight_decay': 0}]
101 | 
102 | 
103 | if __name__ == '__main__':
104 | 
105 |     # Parse args & config
106 |     parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
107 |     parser.add_argument('--cfg', required=True)
108 |     parser.add_argument('opts',
109 |                         help='Modify config options using the command-line',
110 |                         default=None, nargs=argparse.REMAINDER)
111 |     args = parser.parse_args()
112 |     update_config(config, args)
113 | 
114 |     # Init global variable
115 |     exp_id = infer_exp_id(args.cfg)
116 |     exp_ckpt_root = os.path.join(config.ckpt_root, exp_id)
117 |     os.makedirs(exp_ckpt_root, exist_ok=True)
118 |     device = 'cuda' if config.cuda else 'cpu'
119 |     if config.cuda and config.cuda_benchmark:
120 |         torch.backends.cudnn.benchmark = True
121 | 
122 |     # Init dataset
123 |     DatasetClass = getattr(dataset, config.dataset.name)
124 |     config.dataset.train_kwargs.update(config.dataset.common_kwargs)
125 |     config.dataset.valid_kwargs.update(config.dataset.common_kwargs)
126 |     train_dataset = DatasetClass(**config.dataset.train_kwargs)
127 |     valid_dataset = DatasetClass(**config.dataset.valid_kwargs)
128 |     train_loader = DataLoader(train_dataset, config.training.batch_size,
129 |                               shuffle=True, drop_last=True,
130 |                               num_workers=config.num_workers,
131 |                               pin_memory=config.cuda,
132 |                               worker_init_fn=lambda x: np.random.seed())
133 |     valid_loader = DataLoader(valid_dataset, 1,
134 |                               num_workers=config.num_workers,
135 |                               pin_memory=config.cuda)
136 | 
137 |     # Init network
138 |     model_file = importlib.import_module(config.model.file)
139 |     model_class = getattr(model_file, config.model.modelclass)
140 |     net = model_class(**config.model.kwargs).to(device)
141 |     if config.training.fix_encoder_bn:
142 |         apply_fn_based_on_key(net.encoder, ['bn'], lambda m: m.requires_grad_(False))
143 | 
144 |     # Init optimizer
145 |     if config.training.optim == 'Adam':
146 |         optimizer = torch.optim.Adam(
147 |             group_parameters(net, config.training.wd_group_mode),
148 |             lr=config.training.optim_lr, weight_decay=config.training.weight_decay)
149 |     elif config.training.optim == 'AdamW':
150 |         optimizer = torch.optim.AdamW(
151 |             group_parameters(net, config.training.wd_group_mode),
152 |             lr=config.training.optim_lr, weight_decay=config.training.weight_decay)
153 |     elif config.training.optim == 'SGD':
154 |         optimizer = torch.optim.SGD(
155 |             group_parameters(net, config.training.wd_group_mode), momentum=0.9,
156 |             lr=config.training.optim_lr, weight_decay=config.training.weight_decay)
157 | 
158 |     if config.training.optim_poly_gamma > 0:
159 |         def lr_poly_rate(epoch):
160 |             return (1 - epoch / config.training.epoch) ** config.training.optim_poly_gamma
161 |         scheduler = torch.optim.lr_scheduler.LambdaLR(optimizer, lr_lambda=lr_poly_rate)
162 |     else:
163 |         scheduler = torch.optim.lr_scheduler.MultiStepLR(
164 |             optimizer, milestones=[int(p * config.training.epoch) for p in config.training.optim_milestons],
165 |             gamma=config.training.optim_gamma)
166 | 
167 |     # Start training
168 |     for iep in trange(1, config.training.epoch + 1, position=0):
169 | 
170 |         # Train phase
171 |         epoch_losses = train_loop(net, train_loader, optimizer)
172 |         scheduler.step()
173 |         print(f'EP[{iep}/{config.training.epoch}] train:  ' +
174 |               ' \ '.join([f'{k} {v:.3f}' for k, v in epoch_losses.items()]))
175 | 
176 |         # Periodically save model
177 |         if iep % config.training.save_every == 0:
178 |             torch.save(net.state_dict(), os.path.join(exp_ckpt_root, f'ep{iep}.pth'))
179 |             print('Model saved')
180 | 
181 |         # Valid phase
182 |         epoch_losses = valid_loop(net, valid_loader)
183 |         print(f'EP[{iep}/{config.training.epoch}] valid:  ' +
184 |               ' \ '.join([f'{k} {v:.3f}' for k, v in epoch_losses.items()]))
185 | 
186 | 


--------------------------------------------------------------------------------
/vis_depth.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import numpy as np
 3 | import open3d as o3d
 4 | from imageio import imread
 5 | 
 6 | 
 7 | def get_uni_sphere_xyz(H, W):
 8 |     j, i = np.meshgrid(np.arange(H), np.arange(W), indexing='ij')
 9 |     u = (i+0.5) / W * 2 * np.pi
10 |     v = ((j+0.5) / H - 0.5) * np.pi
11 |     z = -np.sin(v)
12 |     c = np.cos(v)
13 |     y = c * np.sin(u)
14 |     x = c * np.cos(u)
15 |     sphere_xyz = np.stack([x, y, z], -1)
16 |     return sphere_xyz
17 | 
18 | 
19 | if __name__ == '__main__':
20 | 
21 |     import argparse
22 |     parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
23 |     parser.add_argument('--img', required=True,
24 |                         help='Image texture in equirectangular format')
25 |     parser.add_argument('--depth', required=True,
26 |                         help='Depth map')
27 |     parser.add_argument('--scale', default=0.001, type=float,
28 |                         help='Rescale the depth map')
29 |     parser.add_argument('--crop_ratio', default=80/512, type=float,
30 |                         help='Crop ratio for upper and lower part of the image')
31 |     parser.add_argument('--crop_z_above', default=1.2, type=float,
32 |                         help='Filter 3D point with z coordinate above')
33 |     args = parser.parse_args()
34 | 
35 |     # Reading rgb-d
36 |     rgb = imread(args.img)
37 |     depth = imread(args.depth)[...,None].astype(np.float32) * args.scale
38 | 
39 |     # Project to 3d
40 |     H, W = rgb.shape[:2]
41 |     xyz = depth * get_uni_sphere_xyz(H, W)
42 |     xyzrgb = np.concatenate([xyz, rgb/255.], 2)
43 | 
44 |     # Crop the image and flatten
45 |     if args.crop_ratio > 0:
46 |         assert args.crop_ratio < 1
47 |         crop = int(H * args.crop_ratio)
48 |         xyzrgb = xyzrgb[crop:-crop]
49 |     xyzrgb = xyzrgb.reshape(-1, 6)
50 | 
51 |     # Crop in 3d
52 |     xyzrgb = xyzrgb[xyzrgb[:,2] <= args.crop_z_above]
53 | 
54 |     # Visualize
55 |     pcd = o3d.geometry.PointCloud()
56 |     pcd.points = o3d.utility.Vector3dVector(xyzrgb[:, :3])
57 |     pcd.colors = o3d.utility.Vector3dVector(xyzrgb[:, 3:])
58 | 
59 |     o3d.visualization.draw_geometries([
60 |         pcd,
61 |         o3d.geometry.TriangleMesh.create_coordinate_frame(size=0.3, origin=[0, 0, 0])
62 |     ])
63 | 


--------------------------------------------------------------------------------
/vis_layout.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | import numpy as np
  3 | import open3d as o3d
  4 | from PIL import Image
  5 | from scipy.signal import correlate2d
  6 | from scipy.ndimage import shift
  7 | 
  8 | from lib.misc.post_proc import np_coor2xy, np_coorx2u, np_coory2v
  9 | from eval_layout import layout_2_depth
 10 | 
 11 | 
 12 | if __name__ == '__main__':
 13 | 
 14 |     import argparse
 15 |     parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
 16 |     parser.add_argument('--img', required=True,
 17 |                         help='Image texture in equirectangular format')
 18 |     parser.add_argument('--layout', required=True,
 19 |                         help='Txt or json file containing layout corners (cor_id)')
 20 |     parser.add_argument('--out')
 21 |     parser.add_argument('--no_vis', action='store_true')
 22 |     parser.add_argument('--show_ceiling', action='store_true',
 23 |                         help='Rendering ceiling (skip by default)')
 24 |     parser.add_argument('--ignore_floor', action='store_true',
 25 |                         help='Skip rendering floor')
 26 |     parser.add_argument('--ignore_wall', action='store_true',
 27 |                         help='Skip rendering wall')
 28 |     parser.add_argument('--ignore_wireframe', action='store_true',
 29 |                         help='Skip rendering wireframe')
 30 |     args = parser.parse_args()
 31 | 
 32 |     if not args.out and args.no_vis:
 33 |         print('You may want to export (via --out) or visualize (without --vis)')
 34 |         import sys; sys.exit()
 35 | 
 36 |     # Reading source (texture img, cor_id txt)
 37 |     equirect_texture = np.array(Image.open(args.img))
 38 |     H, W = equirect_texture.shape[:2]
 39 |     if args.layout.endswith('json'):
 40 |         with open(args.layout) as f:
 41 |             inferenced_result = json.load(f)
 42 |         cor_id = np.array(inferenced_result['uv'], np.float32)
 43 |         cor_id[:, 0] *= W
 44 |         cor_id[:, 1] *= H
 45 |     else:
 46 |         cor_id = np.loadtxt(args.layout).astype(np.float32)
 47 | 
 48 |     # Convert corners to layout
 49 |     depth, floor_mask, ceil_mask, wall_mask = layout_2_depth(cor_id, H, W, return_mask=True)
 50 |     coorx, coory = np.meshgrid(np.arange(W), np.arange(H))
 51 |     us = np_coorx2u(coorx, W)
 52 |     vs = np_coory2v(coory, H)
 53 |     zs = depth * np.sin(vs)
 54 |     cs = depth * np.cos(vs)
 55 |     xs = cs * np.sin(us)
 56 |     ys = -cs * np.cos(us)
 57 | 
 58 |     # Aggregate mask
 59 |     mask = np.ones_like(floor_mask)
 60 |     if args.ignore_floor:
 61 |         mask &= ~floor_mask
 62 |     if not args.show_ceiling:
 63 |         mask &= ~ceil_mask
 64 |     if args.ignore_wall:
 65 |         mask &= ~wall_mask
 66 | 
 67 |     # Prepare ply's points and faces
 68 |     xyzrgb = np.concatenate([
 69 |         xs[...,None], ys[...,None], zs[...,None],
 70 |         equirect_texture], -1)
 71 |     xyzrgb = np.concatenate([xyzrgb, xyzrgb[:,[0]]], 1)
 72 |     mask = np.concatenate([mask, mask[:,[0]]], 1)
 73 |     lo_tri_template = np.array([
 74 |         [0, 0, 0],
 75 |         [0, 1, 0],
 76 |         [0, 1, 1]])
 77 |     up_tri_template = np.array([
 78 |         [0, 0, 0],
 79 |         [0, 1, 1],
 80 |         [0, 0, 1]])
 81 |     ma_tri_template = np.array([
 82 |         [0, 0, 0],
 83 |         [0, 1, 1],
 84 |         [0, 1, 0]])
 85 |     lo_mask = (correlate2d(mask, lo_tri_template, mode='same') == 3)
 86 |     up_mask = (correlate2d(mask, up_tri_template, mode='same') == 3)
 87 |     ma_mask = (correlate2d(mask, ma_tri_template, mode='same') == 3) & (~lo_mask) & (~up_mask)
 88 |     ref_mask = (
 89 |         lo_mask | (correlate2d(lo_mask, np.flip(lo_tri_template, (0,1)), mode='same') > 0) |\
 90 |         up_mask | (correlate2d(up_mask, np.flip(up_tri_template, (0,1)), mode='same') > 0) |\
 91 |         ma_mask | (correlate2d(ma_mask, np.flip(ma_tri_template, (0,1)), mode='same') > 0)
 92 |     )
 93 |     points = xyzrgb[ref_mask]
 94 | 
 95 |     ref_id = np.full(ref_mask.shape, -1, np.int32)
 96 |     ref_id[ref_mask] = np.arange(ref_mask.sum())
 97 |     faces_lo_tri = np.stack([
 98 |         ref_id[lo_mask],
 99 |         ref_id[shift(lo_mask, [1, 0], cval=False, order=0)],
100 |         ref_id[shift(lo_mask, [1, 1], cval=False, order=0)],
101 |     ], 1)
102 |     faces_up_tri = np.stack([
103 |         ref_id[up_mask],
104 |         ref_id[shift(up_mask, [1, 1], cval=False, order=0)],
105 |         ref_id[shift(up_mask, [0, 1], cval=False, order=0)],
106 |     ], 1)
107 |     faces_ma_tri = np.stack([
108 |         ref_id[ma_mask],
109 |         ref_id[shift(ma_mask, [1, 0], cval=False, order=0)],
110 |         ref_id[shift(ma_mask, [0, 1], cval=False, order=0)],
111 |     ], 1)
112 |     faces = np.concatenate([faces_lo_tri, faces_up_tri, faces_ma_tri])
113 | 
114 |     # Dump results ply
115 |     if args.out:
116 |         ply_header = '\n'.join([
117 |             'ply',
118 |             'format ascii 1.0',
119 |             f'element vertex {len(points):d}',
120 |             'property float x',
121 |             'property float y',
122 |             'property float z',
123 |             'property uchar red',
124 |             'property uchar green',
125 |             'property uchar blue',
126 |             f'element face {len(faces):d}',
127 |             'property list uchar int vertex_indices',
128 |             'end_header',
129 |         ])
130 |         with open(args.out, 'w') as f:
131 |             f.write(ply_header)
132 |             f.write('\n')
133 |             for x, y, z, r, g, b in points:
134 |                 f.write(f'{x:.2f} {y:.2f} {z:.2f} {r:.0f} {g:.0f} {b:.0f}\n')
135 |             for i, j, k in faces:
136 |                 f.write(f'3 {i:d} {j:d} {k:d}\n')
137 | 
138 |     if not args.no_vis:
139 |         mesh = o3d.geometry.TriangleMesh()
140 |         mesh.vertices = o3d.utility.Vector3dVector(points[:, :3])
141 |         mesh.vertex_colors = o3d.utility.Vector3dVector(points[:, 3:] / 255.)
142 |         mesh.triangles = o3d.utility.Vector3iVector(faces)
143 |         draw_geometries = [mesh]
144 | 
145 |         # Show wireframe
146 |         if not args.ignore_wireframe:
147 |             # Convert cor_id to 3d xyz
148 |             N = len(cor_id) // 2
149 |             floor_z = -1.6
150 |             floor_xy = np_coor2xy(cor_id[1::2], floor_z, W, H, floorW=1, floorH=1)
151 |             c = np.sqrt((floor_xy**2).sum(1))
152 |             v = np_coory2v(cor_id[0::2, 1], H)
153 |             ceil_z = (c * np.tan(v)).mean()
154 | 
155 |             # Prepare wireframe in open3d
156 |             assert N == len(floor_xy)
157 |             wf_points = [[x, y, floor_z] for x, y in floor_xy] +\
158 |                         [[x, y, ceil_z] for x, y in floor_xy]
159 |             wf_lines = [[i, (i+1)%N] for i in range(N)] +\
160 |                        [[i+N, (i+1)%N+N] for i in range(N)] +\
161 |                        [[i, i+N] for i in range(N)]
162 |             wf_colors = [[1, 0, 0] for i in range(len(wf_lines))]
163 |             wf_line_set = o3d.geometry.LineSet()
164 |             wf_line_set.points = o3d.utility.Vector3dVector(wf_points)
165 |             wf_line_set.lines = o3d.utility.Vector2iVector(wf_lines)
166 |             wf_line_set.colors = o3d.utility.Vector3dVector(wf_colors)
167 |             draw_geometries.append(wf_line_set)
168 | 
169 |         o3d.visualization.draw_geometries(draw_geometries, mesh_show_back_face=True)
170 | 


--------------------------------------------------------------------------------