├── dataset
├── __init__.py
├── communal
│ ├── __init__.py
│ ├── base_dataset.py
│ └── read.py
├── mp3d_dataset.py
├── build.py
├── pano_s2d3d_dataset.py
├── pano_s2d3d_mix_dataset.py
└── zind_dataset.py
├── visualization
├── visualizer
│ ├── Viewer
│ │ ├── __init__.py
│ │ └── Utils.py
│ ├── earcut
│ │ └── __init__.py
│ ├── __init__.py
│ ├── .gitignore
│ ├── src
│ │ ├── demo.png
│ │ ├── 3Dlayout.png
│ │ └── example.jpg
│ ├── requirements.txt
│ ├── shader
│ │ ├── __init__.py
│ │ ├── vertex_line.py
│ │ ├── vertex_pano.py
│ │ ├── geometry_line.py
│ │ ├── fragment_line.py
│ │ ├── fragment_pano.py
│ │ ├── vertex_line.glsl
│ │ ├── vertex_pano.glsl
│ │ ├── fragment_line.glsl
│ │ ├── geometry_line.glsl
│ │ └── fragment_pano.glsl
│ ├── README.md
│ ├── LICENSE
│ └── visualizer.py
├── __init__.py
├── obj3d.py
├── grad.py
├── floorplan.py
└── boundary.py
├── .gitignore
├── models
├── __init__.py
├── other
│ ├── __init__.py
│ ├── optimizer.py
│ ├── init_env.py
│ ├── scheduler.py
│ └── criterion.py
├── modules
│ ├── __init__.py
│ ├── swin_transformer.py
│ ├── transformer.py
│ ├── patch_feature_extractor.py
│ ├── swg_transformer.py
│ └── conv_transformer.py
├── build.py
├── base_model.py
└── lgt_net.py
├── utils
├── __init__.py
├── misc.py
├── time_watch.py
├── writer.py
├── logger.py
└── height.py
├── config
└── __init__.py
├── evaluation
├── __init__.py
├── eval_visible_iou.py
├── f1_score.py
├── analyse_layout_type.py
└── iou.py
├── postprocessing
├── __init__.py
├── dula
│ ├── __init__.py
│ ├── layout_old.py
│ └── layout.py
└── post_process.py
├── preprocessing
├── __init__.py
└── filter.py
├── src
├── demo
│ ├── demo.png
│ ├── demo1.png
│ └── demo.json
├── fig
│ ├── network.png
│ ├── demo1_pred.png
│ └── post_processing
│ │ ├── img_0.png
│ │ ├── img_1.png
│ │ ├── img_2.png
│ │ ├── img_3.png
│ │ ├── original.png
│ │ └── optimized.png
└── config
│ ├── other
│ ├── led_net_zind.yaml
│ ├── horizon_net_zind.yaml
│ ├── led_net_mp3d.yaml
│ └── horizon_net_mp3d.yaml
│ ├── ablation_study
│ ├── w_lstm.yaml
│ ├── wo_height.yaml
│ ├── wo_nomal_gradient.yaml
│ ├── wo_pe.yaml
│ ├── full.yaml
│ ├── w_g_rpe1.yaml
│ ├── w_g_rpe2.yaml
│ ├── w_ape.yaml
│ ├── wo_global.yaml
│ ├── wo_window.yaml
│ ├── wo_gradient.yaml
│ ├── w_vit.yaml
│ └── w_vit_zind.yaml
│ ├── zind.yaml
│ ├── mp3d.yaml
│ ├── pano.yaml
│ ├── s2d3d.yaml
│ └── mp3d_scheduler.yaml
├── loss
├── __init__.py
├── led_loss.py
├── object_loss.py
├── boundary_loss.py
└── grad_loss.py
├── requirements.txt
├── LICENSE
├── Post-Porcessing.md
├── convert_ckpt.py
└── app.py
/dataset/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/visualization/visualizer/Viewer/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | src/output
2 | checkpoints
3 | flagged
4 |
--------------------------------------------------------------------------------
/models/__init__.py:
--------------------------------------------------------------------------------
1 | from models.lgt_net import LGT_Net
2 |
--------------------------------------------------------------------------------
/utils/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | @date: 2021/06/19
3 | @description:
4 | """
--------------------------------------------------------------------------------
/visualization/visualizer/earcut/__init__.py:
--------------------------------------------------------------------------------
1 | from .earcut import *
--------------------------------------------------------------------------------
/config/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | @Date: 2021/07/17
3 | @description:
4 | """
5 |
--------------------------------------------------------------------------------
/evaluation/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | @date: 2021/6/29
3 | @description:
4 | """
5 |
--------------------------------------------------------------------------------
/visualization/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | @date: 2021/06/19
3 | @description:
4 | """
--------------------------------------------------------------------------------
/models/other/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | @Date: 2021/07/18
3 | @description:
4 | """
5 |
--------------------------------------------------------------------------------
/postprocessing/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | @Date: 2021/10/06
3 | @description:
4 | """
5 |
--------------------------------------------------------------------------------
/preprocessing/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | @date: 2021/7/5
3 | @description:
4 | """
5 |
--------------------------------------------------------------------------------
/dataset/communal/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | @Date: 2021/09/22
3 | @description:
4 | """
5 |
--------------------------------------------------------------------------------
/postprocessing/dula/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | @Date: 2021/10/06
3 | @description:
4 | """
5 |
--------------------------------------------------------------------------------
/src/demo/demo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhigangjiang/LGT-Net/HEAD/src/demo/demo.png
--------------------------------------------------------------------------------
/src/demo/demo1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhigangjiang/LGT-Net/HEAD/src/demo/demo1.png
--------------------------------------------------------------------------------
/visualization/visualizer/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | @Date: 2021/11/06
3 | @description:
4 | """
5 |
--------------------------------------------------------------------------------
/src/fig/network.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhigangjiang/LGT-Net/HEAD/src/fig/network.png
--------------------------------------------------------------------------------
/src/fig/demo1_pred.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhigangjiang/LGT-Net/HEAD/src/fig/demo1_pred.png
--------------------------------------------------------------------------------
/visualization/visualizer/.gitignore:
--------------------------------------------------------------------------------
1 | *.pyc
2 | *.swp
3 | __pycache__
4 | .DS_store
5 | data*/
6 | paper_tools/*/
7 |
--------------------------------------------------------------------------------
/src/fig/post_processing/img_0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhigangjiang/LGT-Net/HEAD/src/fig/post_processing/img_0.png
--------------------------------------------------------------------------------
/src/fig/post_processing/img_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhigangjiang/LGT-Net/HEAD/src/fig/post_processing/img_1.png
--------------------------------------------------------------------------------
/src/fig/post_processing/img_2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhigangjiang/LGT-Net/HEAD/src/fig/post_processing/img_2.png
--------------------------------------------------------------------------------
/src/fig/post_processing/img_3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhigangjiang/LGT-Net/HEAD/src/fig/post_processing/img_3.png
--------------------------------------------------------------------------------
/src/fig/post_processing/original.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhigangjiang/LGT-Net/HEAD/src/fig/post_processing/original.png
--------------------------------------------------------------------------------
/src/fig/post_processing/optimized.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhigangjiang/LGT-Net/HEAD/src/fig/post_processing/optimized.png
--------------------------------------------------------------------------------
/visualization/visualizer/src/demo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhigangjiang/LGT-Net/HEAD/visualization/visualizer/src/demo.png
--------------------------------------------------------------------------------
/visualization/visualizer/src/3Dlayout.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhigangjiang/LGT-Net/HEAD/visualization/visualizer/src/3Dlayout.png
--------------------------------------------------------------------------------
/visualization/visualizer/src/example.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhigangjiang/LGT-Net/HEAD/visualization/visualizer/src/example.jpg
--------------------------------------------------------------------------------
/visualization/visualizer/requirements.txt:
--------------------------------------------------------------------------------
1 | opencv-python
2 | PyQt5
3 | PyOpenGL
4 | pyglm==1.99.0
5 | numpy
6 | scipy
7 | matplotlib
8 | imageio
9 |
--------------------------------------------------------------------------------
/visualization/visualizer/shader/__init__.py:
--------------------------------------------------------------------------------
1 | from . import vertex_pano, fragment_pano
2 | from . import vertex_line, fragment_line
3 | from . import geometry_line
4 |
--------------------------------------------------------------------------------
/visualization/visualizer/shader/vertex_line.py:
--------------------------------------------------------------------------------
1 | import os
2 | dirname = os.path.dirname(os.path.abspath(__file__))
3 | with open('%s/vertex_line.glsl'%(dirname), 'r') as f:
4 | src = f.read()
5 |
6 |
--------------------------------------------------------------------------------
/visualization/visualizer/shader/vertex_pano.py:
--------------------------------------------------------------------------------
1 | import os
2 | dirname = os.path.dirname(os.path.abspath(__file__))
3 | with open('%s/vertex_pano.glsl'%(dirname), 'r') as f:
4 | src = f.read()
5 |
6 |
--------------------------------------------------------------------------------
/visualization/visualizer/shader/geometry_line.py:
--------------------------------------------------------------------------------
1 | import os
2 | dirname = os.path.dirname(os.path.abspath(__file__))
3 | with open('%s/geometry_line.glsl'%(dirname), 'r') as f:
4 | src = f.read()
5 |
6 |
--------------------------------------------------------------------------------
/visualization/visualizer/shader/fragment_line.py:
--------------------------------------------------------------------------------
1 | import os
2 | dirname = os.path.dirname(os.path.abspath(__file__))
3 |
4 | with open('%s/fragment_line.glsl'%(dirname), 'r') as f:
5 | src = f.read()
6 |
7 |
8 |
--------------------------------------------------------------------------------
/visualization/visualizer/shader/fragment_pano.py:
--------------------------------------------------------------------------------
1 | import os
2 | dirname = os.path.dirname(os.path.abspath(__file__))
3 |
4 | with open('%s/fragment_pano.glsl'%(dirname), 'r') as f:
5 | src = f.read()
6 |
7 |
8 |
--------------------------------------------------------------------------------
/models/modules/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | @Date: 2021/09/01
3 | @description:
4 | """
5 |
6 | from models.modules.swin_transformer import Swin_Transformer
7 | from models.modules.swg_transformer import SWG_Transformer
8 | from models.modules.transformer import Transformer
9 |
--------------------------------------------------------------------------------
/loss/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | @date: 2021/7/19
3 | @description:
4 | """
5 |
6 | from torch.nn import L1Loss
7 | from loss.led_loss import LEDLoss
8 | from loss.grad_loss import GradLoss
9 | from loss.boundary_loss import BoundaryLoss
10 | from loss.object_loss import ObjectLoss, HeatmapLoss
11 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | numpy==1.21.0
2 | torch==1.7.1
3 | torchvision==0.8.2
4 | yacs==0.1.8
5 | einops==0.3.0
6 | opencv-python==4.5.3.56
7 | pylsd-nova==1.2.0
8 | tqdm==4.64.0
9 | scipy==1.8.1
10 | termcolor==1.1.0
11 | shapely==1.8.2
12 | imageio==2.19.2
13 | open3d==0.15.2
14 | gdown==4.4.0
15 | gradio==3.0.5
--------------------------------------------------------------------------------
/visualization/visualizer/shader/vertex_line.glsl:
--------------------------------------------------------------------------------
1 | #version 410
2 | layout(location=0) in vec3 iv3vertex;
3 |
4 | uniform mat4 um4p;
5 | uniform mat4 um4v;
6 | uniform mat4 um4m;
7 |
8 | void main(){
9 | //gl_Position = um4p * um4v * um4m * vec4(iv3vertex[0], iv3vertex[1], iv3vertex[2], 1.0);
10 | gl_Position = vec4(iv3vertex*0.999, 1.0);
11 | }
12 |
--------------------------------------------------------------------------------
/visualization/visualizer/shader/vertex_pano.glsl:
--------------------------------------------------------------------------------
1 | #version 410
2 | layout(location = 0) in vec3 iv3vertex;
3 |
4 | uniform mat4 um4p;
5 | uniform mat4 um4v;
6 | uniform mat4 um4m;
7 | out vec3 modelPosition;
8 |
9 | void main(){
10 | gl_Position = um4p * um4v * um4m * vec4(iv3vertex, 1.0);
11 | //gl_Position = vec4(iv3vertex, 1.0);
12 | modelPosition = iv3vertex;
13 | }
14 |
--------------------------------------------------------------------------------
/visualization/visualizer/shader/fragment_line.glsl:
--------------------------------------------------------------------------------
1 | #version 410
2 | #define pi 3.14159265359
3 | layout(location = 0) out vec4 fragColor;
4 | uniform int um4f;
5 |
6 | void main(){
7 | if (um4f==0)
8 | fragColor = vec4(vec3(255, 250, 84)/255.0, 1.0);
9 | else if(um4f==1)
10 | fragColor = vec4(0, 0, 1, 1.0);
11 | else
12 | fragColor = vec4(vec3(154, 255, 154)/255.0, 1.0);
13 |
14 | fragColor = vec4(0.5, 0.5, 0.5, 1.0);
15 | }
16 |
--------------------------------------------------------------------------------
/utils/misc.py:
--------------------------------------------------------------------------------
1 | """
2 | @date: 2021/8/4
3 | @description:
4 | """
5 | import numpy as np
6 | import torch
7 |
8 |
9 | def tensor2np(t: torch.Tensor) -> np.array:
10 | if isinstance(t, torch.Tensor):
11 | if t.device == 'cpu':
12 | return t.detach().numpy()
13 | else:
14 | return t.detach().cpu().numpy()
15 | else:
16 | return t
17 |
18 |
19 | def tensor2np_d(d: dict) -> dict:
20 | output = {}
21 | for k in d.keys():
22 | output[k] = tensor2np(d[k])
23 | return output
24 |
--------------------------------------------------------------------------------
/utils/time_watch.py:
--------------------------------------------------------------------------------
1 | """
2 | @Date: 2021/07/18
3 | @description:
4 | """
5 | import time
6 |
7 |
8 | class TimeWatch:
9 | def __init__(self, name="", logger=None):
10 | self.name = name
11 | self.start = time.time()
12 | self.logger = logger
13 |
14 | def __del__(self):
15 | end = time.time()
16 | output = f"{self.name} | time use {(end - self.start):.2f}s."
17 | if self.logger:
18 | self.logger.info(output)
19 | else:
20 | print(output)
21 |
22 |
23 | if __name__ == '__main__':
24 | w = TimeWatch("__main__")
25 | time.sleep(2)
--------------------------------------------------------------------------------
/src/config/other/led_net_zind.yaml:
--------------------------------------------------------------------------------
1 | COMMENT: 'Using our framework to implement LED2-Net, Training on ZInd'
2 | TAG: 'zind'
3 | SEED: 123
4 | MODEL:
5 | NAME: 'LGT_Net'
6 | ARGS: [ {
7 | 'decoder_name': 'LSTM',
8 | 'output_name': 'LED',
9 | 'dropout': 0.5,
10 | } ]
11 | TRAIN:
12 | DEVICE: 'cuda:0'
13 | SCRATCH: False
14 | DETERMINISTIC: True
15 | CRITERION:
16 | DEPTH:
17 | WEIGHT: 1.0
18 | LOSS: 'LEDLoss'
19 | NEED_ALL: True
20 | BASE_LR:
21 | 3e-4
22 | EPOCHS: 200
23 | RESUME_LAST: False
24 | OPTIMIZER:
25 | NAME: 'adam'
26 | DATA:
27 | DATASET: 'zind'
28 | DIR: 'src/dataset/zind'
29 | BATCH_SIZE: 6
30 | NUM_WORKERS: 6
31 | FOR_TEST_INDEX: None
32 | AUG:
33 | FLIP: True
34 | STRETCH: True
35 | ROTATE: True
36 | GAMMA: True
37 | AMP_OPT_LEVEL: 'O0'
--------------------------------------------------------------------------------
/src/config/other/horizon_net_zind.yaml:
--------------------------------------------------------------------------------
1 | COMMENT: 'Using our framework to implement HorizonNet, Training on ZInd'
2 | TAG: 'zind'
3 | SEED: 123
4 | MODEL:
5 | NAME: 'LGT_Net'
6 | ARGS: [ {
7 | 'decoder_name': 'LSTM',
8 | 'output_name': 'Horizon',
9 | 'dropout': 0.5,
10 | } ]
11 | TRAIN:
12 | DEVICE: 'cuda:0'
13 | SCRATCH: False
14 | DETERMINISTIC: True
15 | CRITERION:
16 | DEPTH:
17 | WEIGHT: 1.0
18 | LOSS: 'BoundaryLoss'
19 | NEED_ALL: True
20 | BASE_LR:
21 | 3e-4
22 | EPOCHS: 200
23 | RESUME_LAST: True
24 | OPTIMIZER:
25 | NAME: 'adam'
26 | DATA:
27 | DATASET: 'zind'
28 | DIR: 'src/dataset/zind'
29 | BATCH_SIZE: 6
30 | NUM_WORKERS: 6
31 | FOR_TEST_INDEX: None
32 | AUG:
33 | FLIP: True
34 | STRETCH: True
35 | ROTATE: True
36 | GAMMA: True
37 | AMP_OPT_LEVEL: 'O0'
--------------------------------------------------------------------------------
/src/config/other/led_net_mp3d.yaml:
--------------------------------------------------------------------------------
1 | COMMENT: 'Using our framework to implement LED2-Net, Training on MatterportLayout'
2 | TAG: 'mp3d'
3 | SEED: 123
4 | MODEL:
5 | NAME: 'LGT_Net'
6 | ARGS: [ {
7 | 'decoder_name': 'LSTM',
8 | 'output_name': 'LED',
9 | 'dropout': 0.5,
10 | } ]
11 | TRAIN:
12 | DEVICE: 'cuda:0'
13 | SCRATCH: False
14 | DETERMINISTIC: True
15 | CRITERION:
16 | DEPTH:
17 | WEIGHT: 1.0
18 | LOSS: 'LEDLoss'
19 | NEED_ALL: True
20 | BASE_LR:
21 | 3e-4
22 | EPOCHS: 1000
23 | RESUME_LAST: False
24 | OPTIMIZER:
25 | NAME: 'adam'
26 | DATA:
27 | DATASET: 'mp3d'
28 | DIR: 'src/dataset/mp3d'
29 | BATCH_SIZE: 6
30 | NUM_WORKERS: 6
31 | FOR_TEST_INDEX: None
32 | AUG:
33 | FLIP: True
34 | STRETCH: True
35 | ROTATE: True
36 | GAMMA: True
37 | AMP_OPT_LEVEL: 'O0'
--------------------------------------------------------------------------------
/src/config/other/horizon_net_mp3d.yaml:
--------------------------------------------------------------------------------
1 | COMMENT: 'Using our framework to implement HorizonNet, Training on MatterportLayout'
2 | TAG: 'mp3d'
3 | SEED: 123
4 | MODEL:
5 | NAME: 'LGT_Net'
6 | ARGS: [ {
7 | 'decoder_name': 'LSTM',
8 | 'output_name': 'Horizon',
9 | 'dropout': 0.5,
10 | } ]
11 | TRAIN:
12 | DEVICE: 'cuda:0'
13 | SCRATCH: False
14 | DETERMINISTIC: True
15 | CRITERION:
16 | DEPTH:
17 | WEIGHT: 1.0
18 | LOSS: 'BoundaryLoss'
19 | NEED_ALL: True
20 | BASE_LR:
21 | 3e-4
22 | EPOCHS: 1000
23 | RESUME_LAST: True
24 | OPTIMIZER:
25 | NAME: 'adam'
26 | DATA:
27 | DATASET: 'mp3d'
28 | DIR: 'src/dataset/mp3d'
29 | BATCH_SIZE: 6
30 | NUM_WORKERS: 6
31 | FOR_TEST_INDEX: None
32 | AUG:
33 | FLIP: True
34 | STRETCH: True
35 | ROTATE: True
36 | GAMMA: True
37 | AMP_OPT_LEVEL: 'O0'
--------------------------------------------------------------------------------
/visualization/visualizer/README.md:
--------------------------------------------------------------------------------
1 | # 360LayoutVisualizer
2 |
3 | This repo is a visualization tool for 360 Manhattan layout based on PyQt5 and OpenGL. The layout format follows LayoutMP3D.
4 |
5 |
6 | First, install the corresponding packages with the following command.
7 | ```bash
8 | pip install -r requirements.txt
9 | ```
10 | Then, run the script for the visualization of our provided example.
11 | ```bash
12 | python visualizer.py --img src/example.jpg --json src/example.json
13 | ```
14 | You can use mouse and keyboard to control the camera.
15 | ```yaml
16 | w, a, s, d: translate the camera
17 | left-click: rotate the camera
18 | scroll: zoom in/out
19 | ```
20 |
21 |
--------------------------------------------------------------------------------
/src/config/ablation_study/w_lstm.yaml:
--------------------------------------------------------------------------------
1 | COMMENT: 'Ablation Study: w/ Bi-LSTM on MatterportLayout'
2 | VAL_NAME: 'test'
3 | TAG: 'ablation_study_w_lstm'
4 | SEED: 123
5 | MODEL:
6 | NAME: 'LGT_Net'
7 | ARGS: [{
8 | 'decoder_name': 'LSTM',
9 | 'output_name': 'LGT',
10 | 'dropout': 0.5,
11 | }]
12 | TRAIN:
13 | DEVICE: 'cuda:0'
14 | SCRATCH: False
15 | DETERMINISTIC: True
16 | CRITERION:
17 | DEPTH:
18 | WEIGHT: 0.9
19 | RATIO:
20 | WEIGHT: 0.1
21 | GRAD:
22 | WEIGHT: 0.1
23 | WEIGHTS: [ 1.0, 1.0 ]
24 | BASE_LR:
25 | 1e-04
26 | RESUME_LAST: False
27 | OPTIMIZER:
28 | NAME: 'adam'
29 | EPOCHS: 1000
30 | DATA:
31 | DATASET: 'mp3d'
32 | DIR: 'src/dataset/mp3d'
33 | BATCH_SIZE: 6
34 | NUM_WORKERS: 6
35 | FOR_TEST_INDEX: None
36 | AUG:
37 | FLIP: True
38 | STRETCH: True
39 | ROTATE: True
40 | GAMMA: True
41 | AMP_OPT_LEVEL: 'O0'
42 |
--------------------------------------------------------------------------------
/src/config/zind.yaml:
--------------------------------------------------------------------------------
1 | COMMENT: 'Training on ZInd'
2 | TAG: 'zind'
3 | SEED: 123
4 | MODEL:
5 | NAME: 'LGT_Net'
6 | ARGS: [ {
7 | 'decoder_name': 'SWG_Transformer',
8 | 'win_size': 16,
9 | 'rpe': 'lr_parameter_mirror',
10 | 'dropout': 0.0,
11 | 'depth': 6,
12 | 'output_name': 'LGT'
13 | } ]
14 | TRAIN:
15 | DEVICE: 'cuda:0'
16 | SCRATCH: False
17 | DETERMINISTIC: True
18 | CRITERION:
19 | DEPTH:
20 | WEIGHT: 0.9
21 | RATIO:
22 | WEIGHT: 0.1
23 | GRAD:
24 | WEIGHT: 0.1
25 | WEIGHTS: [ 1.0, 1.0 ]
26 | BASE_LR:
27 | 1e-4
28 | RESUME_LAST: False
29 | OPTIMIZER:
30 | NAME: 'adam'
31 | EPOCHS: 1000
32 | DATA:
33 | DATASET: 'zind'
34 | DIR: 'src/dataset/zind'
35 | BATCH_SIZE: 6
36 | NUM_WORKERS: 6
37 | FOR_TEST_INDEX: None
38 | AUG:
39 | FLIP: True
40 | STRETCH: True
41 | ROTATE: True
42 | GAMMA: True
43 | AMP_OPT_LEVEL: 'O0'
44 |
--------------------------------------------------------------------------------
/src/config/ablation_study/wo_height.yaml:
--------------------------------------------------------------------------------
1 | COMMENT: 'Ablation Study: w/o Height on MatterportLayout'
2 | VAL_NAME: 'test'
3 | TAG: 'ablation_study_wo_height'
4 | SEED: 123
5 | MODEL:
6 | NAME: 'LGT_Net'
7 | ARGS: [ {
8 | 'decoder_name': 'SWG_Transformer',
9 | 'win_size': 16,
10 | 'rpe': 'lr_parameter_mirror',
11 | 'dropout': 0.0,
12 | 'output_name': 'LED'
13 | } ]
14 | TRAIN:
15 | DEVICE: 'cuda:0'
16 | SCRATCH: False
17 | DETERMINISTIC: True
18 | CRITERION:
19 | DEPTH:
20 | WEIGHT: 1.0
21 | LOSS: 'LEDLoss'
22 | NEED_ALL: True
23 | BASE_LR:
24 | 1e-4
25 | RESUME_LAST: False
26 | OPTIMIZER:
27 | NAME: 'sgd'
28 | EPOCHS: 1000
29 | DATA:
30 | DATASET: 'mp3d'
31 | DIR: 'src/dataset/mp3d'
32 | BATCH_SIZE: 6
33 | NUM_WORKERS: 6
34 | FOR_TEST_INDEX: None
35 | AUG:
36 | FLIP: True
37 | STRETCH: True
38 | ROTATE: True
39 | GAMMA: True
40 | AMP_OPT_LEVEL: 'O0'
--------------------------------------------------------------------------------
/src/config/mp3d.yaml:
--------------------------------------------------------------------------------
1 | COMMENT: 'Training on MatterportLayout'
2 | TAG: 'mp3d'
3 | SEED: 123
4 | MODEL:
5 | NAME: 'LGT_Net'
6 | ARGS: [ {
7 | 'decoder_name': 'SWG_Transformer',
8 | 'win_size': 16,
9 | 'rpe': 'lr_parameter_mirror',
10 | 'dropout': 0.0,
11 | 'depth': 8,
12 | 'output_name': 'LGT'
13 | } ]
14 | TRAIN:
15 | DEVICE: 'cuda:0'
16 | SCRATCH: False
17 | DETERMINISTIC: True
18 | CRITERION:
19 | DEPTH:
20 | WEIGHT: 0.9
21 | RATIO:
22 | WEIGHT: 0.1
23 | GRAD:
24 | WEIGHT: 0.1
25 | WEIGHTS: [ 1.0, 1.0 ]
26 | BASE_LR:
27 | 1e-4
28 | RESUME_LAST: False
29 | OPTIMIZER:
30 | NAME: 'adam'
31 | EPOCHS: 1000
32 | DATA:
33 | DATASET: 'mp3d'
34 | DIR: 'src/dataset/mp3d'
35 | BATCH_SIZE: 6
36 | NUM_WORKERS: 6
37 | FOR_TEST_INDEX: None
38 | AUG:
39 | FLIP: True
40 | STRETCH: True
41 | ROTATE: True
42 | GAMMA: True
43 | AMP_OPT_LEVEL: 'O0'
44 |
--------------------------------------------------------------------------------
/src/config/ablation_study/wo_nomal_gradient.yaml:
--------------------------------------------------------------------------------
1 | COMMENT: 'Ablation Study: w/o Normal+Gradient on MatterportLayout'
2 | VAL_NAME: 'test'
3 | TAG: 'ablation_study_wo_normal_gradient'
4 | SEED: 123
5 | MODEL:
6 | NAME: 'LGT_Net'
7 | ARGS: [ {
8 | 'decoder_name': 'SWG_Transformer',
9 | 'win_size': 16,
10 | 'rpe': 'lr_parameter_mirror',
11 | 'dropout': 0.0,
12 | 'output_name': 'LGT'
13 | } ]
14 | TRAIN:
15 | DEVICE: 'cuda:0'
16 | SCRATCH: False
17 | DETERMINISTIC: True
18 | CRITERION:
19 | DEPTH:
20 | WEIGHT: 0.9
21 | RATIO:
22 | WEIGHT: 0.1
23 | EPOCHS: 1000
24 | BASE_LR:
25 | 1e-4
26 | RESUME_LAST: False
27 | OPTIMIZER:
28 | NAME: 'adam'
29 | DATA:
30 | DATASET: 'mp3d'
31 | DIR: 'src/dataset/mp3d'
32 | BATCH_SIZE: 6
33 | NUM_WORKERS: 6
34 | FOR_TEST_INDEX: None
35 | AUG:
36 | FLIP: True
37 | STRETCH: True
38 | ROTATE: True
39 | GAMMA: True
40 | AMP_OPT_LEVEL: 'O0'
--------------------------------------------------------------------------------
/src/config/ablation_study/wo_pe.yaml:
--------------------------------------------------------------------------------
1 | COMMENT: 'Ablation Study: w/o PE on MatterportLayout'
2 | VAL_NAME: 'test'
3 | TAG: 'ablation_study_wo_pe'
4 | SEED: 123
5 | MODEL:
6 | NAME: 'LGT_Net'
7 | ARGS: [ {
8 | 'decoder_name': 'SWG_Transformer',
9 | 'win_size': 16,
10 | 'rpe': None,
11 | 'dropout': 0.0,
12 | 'output_name': 'LGT'
13 | } ]
14 | TRAIN:
15 | DEVICE: 'cuda:0'
16 | SCRATCH: False
17 | DETERMINISTIC: True
18 | CRITERION:
19 | DEPTH:
20 | WEIGHT: 0.9
21 | RATIO:
22 | WEIGHT: 0.1
23 | GRAD:
24 | WEIGHT: 0.1
25 | WEIGHTS: [ 1.0, 1.0 ]
26 | BASE_LR:
27 | 1e-4
28 | RESUME_LAST: False
29 | OPTIMIZER:
30 | NAME: 'adam'
31 | EPOCHS: 1000
32 | DATA:
33 | DATASET: 'mp3d'
34 | DIR: 'src/dataset/mp3d'
35 | BATCH_SIZE: 6
36 | NUM_WORKERS: 6
37 | FOR_TEST_INDEX: None
38 | AUG:
39 | FLIP: True
40 | STRETCH: True
41 | ROTATE: True
42 | GAMMA: True
43 | AMP_OPT_LEVEL: 'O0'
44 |
--------------------------------------------------------------------------------
/src/config/ablation_study/full.yaml:
--------------------------------------------------------------------------------
1 | COMMENT: 'Ablation Study: Ours (full) on MatterportLayout'
2 | VAL_NAME: 'test'
3 | TAG: 'ablation_study_full'
4 | SEED: 123
5 | MODEL:
6 | NAME: 'LGT_Net'
7 | ARGS: [ {
8 | 'decoder_name': 'SWG_Transformer',
9 | 'win_size': 16,
10 | 'rpe': 'lr_parameter_mirror',
11 | 'dropout': 0.0,
12 | 'output_name': 'LGT'
13 | } ]
14 | TRAIN:
15 | DEVICE: 'cuda:0'
16 | SCRATCH: False
17 | DETERMINISTIC: True
18 | CRITERION:
19 | DEPTH:
20 | WEIGHT: 0.9
21 | RATIO:
22 | WEIGHT: 0.1
23 | GRAD:
24 | WEIGHT: 0.1
25 | WEIGHTS: [ 1.0, 1.0 ]
26 | BASE_LR:
27 | 1e-4
28 | RESUME_LAST: False
29 | OPTIMIZER:
30 | NAME: 'adam'
31 | EPOCHS: 1000
32 | DATA:
33 | DATASET: 'mp3d'
34 | DIR: 'src/dataset/mp3d'
35 | BATCH_SIZE: 6
36 | NUM_WORKERS: 6
37 | FOR_TEST_INDEX: None
38 | AUG:
39 | FLIP: True
40 | STRETCH: True
41 | ROTATE: True
42 | GAMMA: True
43 | AMP_OPT_LEVEL: 'O0'
44 |
--------------------------------------------------------------------------------
/src/config/ablation_study/w_g_rpe1.yaml:
--------------------------------------------------------------------------------
1 | COMMENT: 'Ablation Study: w G-RPE1 on MatterportLayout'
2 | VAL_NAME: 'test'
3 | TAG: 'ablation_study_w_g_rpe1'
4 | SEED: 123
5 | MODEL:
6 | NAME: 'LGT_Net'
7 | ARGS: [ {
8 | 'decoder_name': 'SWG_Transformer',
9 | 'win_size': 16,
10 | 'rpe': 'lr_parameter',
11 | 'dropout': 0.0,
12 | 'output_name': 'LGT'
13 | } ]
14 | TRAIN:
15 | DEVICE: 'cuda:0'
16 | SCRATCH: False
17 | DETERMINISTIC: True
18 | CRITERION:
19 | DEPTH:
20 | WEIGHT: 0.9
21 | RATIO:
22 | WEIGHT: 0.1
23 | GRAD:
24 | WEIGHT: 0.1
25 | WEIGHTS: [ 1.0, 1.0 ]
26 | BASE_LR:
27 | 1e-4
28 | RESUME_LAST: False
29 | OPTIMIZER:
30 | NAME: 'adam'
31 | EPOCHS: 1000
32 | DATA:
33 | DATASET: 'mp3d'
34 | DIR: 'src/dataset/mp3d'
35 | BATCH_SIZE: 6
36 | NUM_WORKERS: 6
37 | FOR_TEST_INDEX: None
38 | AUG:
39 | FLIP: True
40 | STRETCH: True
41 | ROTATE: True
42 | GAMMA: True
43 | AMP_OPT_LEVEL: 'O0'
44 |
--------------------------------------------------------------------------------
/src/config/ablation_study/w_g_rpe2.yaml:
--------------------------------------------------------------------------------
1 | COMMENT: 'Ablation Study: w G-RPE2 on MatterportLayout'
2 | VAL_NAME: 'test'
3 | TAG: 'ablation_study_w_g_rpe2'
4 | SEED: 123
5 | MODEL:
6 | NAME: 'LGT_Net'
7 | ARGS: [ {
8 | 'decoder_name': 'SWG_Transformer',
9 | 'win_size': 16,
10 | 'rpe': 'lr_parameter_half',
11 | 'dropout': 0.0,
12 | 'output_name': 'LGT'
13 | } ]
14 | TRAIN:
15 | DEVICE: 'cuda:0'
16 | SCRATCH: False
17 | DETERMINISTIC: True
18 | CRITERION:
19 | DEPTH:
20 | WEIGHT: 0.9
21 | RATIO:
22 | WEIGHT: 0.1
23 | GRAD:
24 | WEIGHT: 0.1
25 | WEIGHTS: [ 1.0, 1.0 ]
26 | BASE_LR:
27 | 1e-4
28 | RESUME_LAST: False
29 | OPTIMIZER:
30 | NAME: 'adam'
31 | EPOCHS: 1000
32 | DATA:
33 | DATASET: 'mp3d'
34 | DIR: 'src/dataset/mp3d'
35 | BATCH_SIZE: 6
36 | NUM_WORKERS: 6
37 | FOR_TEST_INDEX: None
38 | AUG:
39 | FLIP: True
40 | STRETCH: True
41 | ROTATE: True
42 | GAMMA: True
43 | AMP_OPT_LEVEL: 'O0'
44 |
--------------------------------------------------------------------------------
/src/config/ablation_study/w_ape.yaml:
--------------------------------------------------------------------------------
1 | COMMENT: 'Ablation Study: w APE on MatterportLayout'
2 | VAL_NAME: 'test'
3 | TAG: 'ablation_study_w_ape'
4 | SEED: 123
5 | MODEL:
6 | NAME: 'LGT_Net'
7 | ARGS: [ {
8 | 'decoder_name': 'SWG_Transformer',
9 | 'win_size': 16,
10 | 'ape': 'lr_parameter',
11 | 'rpe': None,
12 | 'dropout': 0.0,
13 | 'output_name': 'LGT'
14 | } ]
15 | TRAIN:
16 | DEVICE: 'cuda:0'
17 | SCRATCH: False
18 | DETERMINISTIC: True
19 | CRITERION:
20 | DEPTH:
21 | WEIGHT: 0.9
22 | RATIO:
23 | WEIGHT: 0.1
24 | GRAD:
25 | WEIGHT: 0.1
26 | WEIGHTS: [ 1.0, 1.0 ]
27 | BASE_LR:
28 | 1e-4
29 | RESUME_LAST: False
30 | OPTIMIZER:
31 | NAME: 'adam'
32 | EPOCHS: 1000
33 | DATA:
34 | DATASET: 'mp3d'
35 | DIR: 'src/dataset/mp3d'
36 | BATCH_SIZE: 6
37 | NUM_WORKERS: 6
38 | FOR_TEST_INDEX: None
39 | AUG:
40 | FLIP: True
41 | STRETCH: True
42 | ROTATE: True
43 | GAMMA: True
44 | AMP_OPT_LEVEL: 'O0'
45 |
--------------------------------------------------------------------------------
/src/config/pano.yaml:
--------------------------------------------------------------------------------
1 | COMMENT: 'Training on PanoContext(train)+Stanford2D-3D(whole)'
2 | TAG: 'pano'
3 | SEED: 123
4 | MODEL:
5 | NAME: 'LGT_Net'
6 | ARGS: [ {
7 | 'decoder_name': 'SWG_Transformer',
8 | 'win_size': 16,
9 | 'rpe': 'lr_parameter_mirror',
10 | 'dropout': 0.0,
11 | 'depth': 6,
12 | 'output_name': 'LGT'
13 | } ]
14 | TRAIN:
15 | DEVICE: 'cuda:0'
16 | SCRATCH: False
17 | DETERMINISTIC: True
18 | CRITERION:
19 | DEPTH:
20 | WEIGHT: 0.9
21 | RATIO:
22 | WEIGHT: 0.1
23 | GRAD:
24 | WEIGHT: 0.1
25 | WEIGHTS: [ 1.0, 1.0 ]
26 | BASE_LR:
27 | 1e-4
28 | RESUME_LAST: False
29 | OPTIMIZER:
30 | NAME: 'adam'
31 | EPOCHS: 1000
32 | DATA:
33 | DATASET: 'pano_s2d3d_mix'
34 | DIR: 'src/dataset/pano_s2d3d'
35 | SUBSET: 'pano'
36 | BATCH_SIZE: 6
37 | NUM_WORKERS: 6
38 | FOR_TEST_INDEX: None
39 | AUG:
40 | FLIP: True
41 | STRETCH: True
42 | ROTATE: True
43 | GAMMA: True
44 | AMP_OPT_LEVEL: 'O0'
45 |
--------------------------------------------------------------------------------
/src/config/ablation_study/wo_global.yaml:
--------------------------------------------------------------------------------
1 | COMMENT: 'Ablation Study: w/o Global Block on MatterportLayout'
2 | VAL_NAME: 'test'
3 | TAG: 'ablation_study_wo_global'
4 | SEED: 123
5 | MODEL:
6 | NAME: 'LGT_Net'
7 | ARGS: [{
8 | 'decoder_name': 'Swin_Transformer',
9 | 'win_size': 16,
10 | 'rpe':'lr_parameter_mirror',
11 | 'dropout': 0.0,
12 | 'output_name': 'LGT'
13 | }]
14 | TRAIN:
15 | DEVICE: 'cuda:0'
16 | SCRATCH: False
17 | DETERMINISTIC: True
18 | CRITERION:
19 | DEPTH:
20 | WEIGHT: 0.9
21 | RATIO:
22 | WEIGHT: 0.1
23 | GRAD:
24 | WEIGHT: 0.1
25 | WEIGHTS: [ 1.0, 1.0 ]
26 | BASE_LR:
27 | 1e-4
28 | RESUME_LAST: False
29 | OPTIMIZER:
30 | NAME: 'adam'
31 | EPOCHS: 1000
32 | DATA:
33 | DATASET: 'mp3d'
34 | DIR: 'src/dataset/mp3d'
35 | BATCH_SIZE: 6
36 | NUM_WORKERS: 6
37 | FOR_TEST_INDEX: None
38 | AUG:
39 | FLIP: True
40 | STRETCH: True
41 | ROTATE: True
42 | GAMMA: True
43 | AMP_OPT_LEVEL: 'O0'
44 |
--------------------------------------------------------------------------------
/src/config/ablation_study/wo_window.yaml:
--------------------------------------------------------------------------------
1 | COMMENT: 'Ablation Study: w/o Window Block on MatterportLayout'
2 | VAL_NAME: 'test'
3 | TAG: 'ablation_study_wo_window'
4 | SEED: 123
5 | MODEL:
6 | NAME: 'LGT_Net'
7 | ARGS: [{
8 | 'decoder_name': 'Transformer',
9 | 'win_size': 16,
10 | 'rpe':'lr_parameter_mirror',
11 | 'dropout': 0.0,
12 | 'output_name': 'LGT'
13 | }]
14 | TRAIN:
15 | DEVICE: 'cuda:0'
16 | SCRATCH: False
17 | DETERMINISTIC: True
18 | CRITERION:
19 | DEPTH:
20 | WEIGHT: 0.9
21 | RATIO:
22 | WEIGHT: 0.1
23 | GRAD:
24 | WEIGHT: 0.1
25 | WEIGHTS: [ 1.0, 1.0 ]
26 | BASE_LR:
27 | 1e-4
28 | RESUME_LAST: False
29 | OPTIMIZER:
30 | NAME: 'adam'
31 | EPOCHS: 1000
32 | DATA:
33 | DATASET: 'mp3d'
34 | DIR: 'src/dataset/mp3d'
35 | BATCH_SIZE: 6
36 | NUM_WORKERS: 6
37 | FOR_TEST_INDEX: None
38 | AUG:
39 | FLIP: True
40 | STRETCH: True
41 | ROTATE: True
42 | GAMMA: True
43 | AMP_OPT_LEVEL: 'O0'
44 |
45 |
--------------------------------------------------------------------------------
/src/config/s2d3d.yaml:
--------------------------------------------------------------------------------
1 | COMMENT: 'Training on Stanford2D-3D(train)+PanoContext(whole)'
2 | TAG: 's2d3d'
3 | SEED: 123
4 | MODEL:
5 | NAME: 'LGT_Net'
6 | ARGS: [ {
7 | 'decoder_name': 'SWG_Transformer',
8 | 'win_size': 16,
9 | 'rpe': 'lr_parameter_mirror',
10 | 'dropout': 0.0,
11 | 'depth': 6,
12 | 'output_name': 'LGT'
13 | } ]
14 | TRAIN:
15 | DEVICE: 'cuda:2'
16 | SCRATCH: False
17 | DETERMINISTIC: True
18 | CRITERION:
19 | DEPTH:
20 | WEIGHT: 0.9
21 | RATIO:
22 | WEIGHT: 0.1
23 | GRAD:
24 | WEIGHT: 0.1
25 | WEIGHTS: [ 1.0, 1.0 ]
26 | BASE_LR:
27 | 1e-4
28 | RESUME_LAST: False
29 | OPTIMIZER:
30 | NAME: 'adam'
31 | EPOCHS: 1000
32 | DATA:
33 | DATASET: 'pano_s2d3d_mix'
34 | DIR: 'src/dataset/pano_s2d3d'
35 | SUBSET: 's2d3d'
36 | BATCH_SIZE: 6
37 | NUM_WORKERS: 6
38 | FOR_TEST_INDEX: None
39 | AUG:
40 | FLIP: True
41 | STRETCH: True
42 | ROTATE: True
43 | GAMMA: True
44 | AMP_OPT_LEVEL: 'O0'
45 |
--------------------------------------------------------------------------------
/src/config/ablation_study/wo_gradient.yaml:
--------------------------------------------------------------------------------
1 | COMMENT: 'Ablation Study: w/o Gradient on MatterportLayout'
2 | VAL_NAME: 'test'
3 | TAG: 'ablation_study_wo_gradient'
4 | SEED: 123
5 | MODEL:
6 | NAME: 'LGT_Net'
7 | ARGS: [ {
8 | 'decoder_name': 'SWG_Transformer',
9 | 'win_size': 16,
10 | 'rpe': 'lr_parameter_mirror',
11 | 'dropout': 0.0,
12 | 'output_name': 'LGT'
13 | } ]
14 | TRAIN:
15 | DEVICE: 'cuda:0'
16 | SCRATCH: False
17 | DETERMINISTIC: True
18 | CRITERION:
19 | DEPTH:
20 | WEIGHT: 0.9
21 | RATIO:
22 | WEIGHT: 0.1
23 | GRAD:
24 | WEIGHT: 0.1
25 | WEIGHTS: [ 1.0, 0.0 ] # only normal loss
26 | BASE_LR:
27 | 1e-4
28 | RESUME_LAST: False
29 | OPTIMIZER:
30 | NAME: 'adam'
31 | EPOCHS: 1000
32 | DATA:
33 | DATASET: 'mp3d'
34 | DIR: 'src/dataset/mp3d'
35 | BATCH_SIZE: 6
36 | NUM_WORKERS: 6
37 | FOR_TEST_INDEX: None
38 | AUG:
39 | FLIP: True
40 | STRETCH: True
41 | ROTATE: True
42 | GAMMA: True
43 | AMP_OPT_LEVEL: 'O0'
--------------------------------------------------------------------------------
/src/config/ablation_study/w_vit.yaml:
--------------------------------------------------------------------------------
1 | COMMENT: 'Ablation Study: w/ ViT on MatterportLayout'
2 | VAL_NAME: 'test'
3 | TAG: 'ablation_study_w_vit'
4 | SEED: 123
5 | MODEL:
6 | NAME: 'LGT_Net'
7 | ARGS: [ {
8 | 'decoder_name': 'SWG_Transformer',
9 | 'backbone': 'patch',
10 | 'win_size': 16,
11 | 'rpe': 'lr_parameter_mirror',
12 | 'dropout': 0.0,
13 | 'depth': 16,
14 | 'output_name': 'LGT'
15 | } ]
16 | TRAIN:
17 | DEVICE: 'cuda:0'
18 | SCRATCH: False
19 | DETERMINISTIC: True
20 | CRITERION:
21 | DEPTH:
22 | WEIGHT: 0.9
23 | RATIO:
24 | WEIGHT: 0.1
25 | GRAD:
26 | WEIGHT: 0.1
27 | WEIGHTS: [ 1.0, 1.0 ]
28 | BASE_LR:
29 | 1e-4
30 | RESUME_LAST: False
31 | OPTIMIZER:
32 | NAME: 'adam'
33 | EPOCHS: 1000
34 | DATA:
35 | DATASET: 'mp3d'
36 | DIR: 'src/dataset/mp3d'
37 | BATCH_SIZE: 6
38 | NUM_WORKERS: 6
39 | FOR_TEST_INDEX: None
40 | AUG:
41 | FLIP: True
42 | STRETCH: True
43 | ROTATE: True
44 | GAMMA: True
45 | AMP_OPT_LEVEL: 'O0'
46 |
47 |
--------------------------------------------------------------------------------
/src/config/ablation_study/w_vit_zind.yaml:
--------------------------------------------------------------------------------
1 | COMMENT: 'Ablation Study: w/ ViT on ZInd'
2 | VAL_NAME: 'test'
3 | TAG: 'ablation_study_w_vit_zind'
4 | SEED: 123
5 | MODEL:
6 | NAME: 'LGT_Net'
7 | ARGS: [ {
8 | 'decoder_name': 'SWG_Transformer',
9 | 'backbone': 'patch',
10 | 'win_size': 16,
11 | 'rpe': 'lr_parameter_mirror',
12 | 'dropout': 0.0,
13 | 'depth': 16,
14 | 'output_name': 'LGT'
15 | } ]
16 | TRAIN:
17 | DEVICE: 'cuda:0'
18 | SCRATCH: False
19 | DETERMINISTIC: True
20 | CRITERION:
21 | DEPTH:
22 | WEIGHT: 0.9
23 | RATIO:
24 | WEIGHT: 0.1
25 | GRAD:
26 | WEIGHT: 0.1
27 | WEIGHTS: [ 1.0, 1.0 ]
28 | BASE_LR:
29 | 1e-4
30 | RESUME_LAST: False
31 | OPTIMIZER:
32 | NAME: 'adam'
33 | EPOCHS: 200
34 | DATA:
35 | DATASET: 'zind'
36 | DIR: 'src/dataset/zind'
37 | BATCH_SIZE: 6
38 | NUM_WORKERS: 6
39 | FOR_TEST_INDEX: None
40 | AUG:
41 | FLIP: True
42 | STRETCH: True
43 | ROTATE: True
44 | GAMMA: True
45 | AMP_OPT_LEVEL: 'O0'
46 |
47 |
--------------------------------------------------------------------------------
/visualization/visualizer/shader/geometry_line.glsl:
--------------------------------------------------------------------------------
1 | #version 410
2 |
3 | layout (lines) in;
4 | layout (triangle_strip, max_vertices = 4) out;
5 |
6 | uniform mat4 um4p;
7 | uniform mat4 um4v;
8 | uniform mat4 um4m;
9 |
10 | void main(){
11 | float thickness = 0.04;
12 | float r = thickness / 2;
13 | mat4 mv = um4v * um4m;
14 | vec4 p1 = mv * gl_in[0].gl_Position;
15 | vec4 p2 = mv * gl_in[1].gl_Position;
16 | vec2 tmp = p2.xy - p1.xy;
17 | vec2 dir = normalize(p2.xy - p1.xy);
18 | if (tmp.x+tmp.y==0)
19 | dir = vec2(0, 1);
20 | vec2 normal = vec2(dir.y, -dir.x);
21 | vec4 offset1, offset2;
22 | offset1 = vec4(normal * r, 0, 0);
23 | offset2 = vec4(normal * r, 0, 0);
24 |
25 | vec4 coords[4];
26 | coords[0] = p1 + offset1;
27 | coords[1] = p1 - offset1;
28 | coords[2] = p2 + offset2;
29 | coords[3] = p2 - offset2;
30 | for (int i = 0; i < 4; ++i) {
31 | coords[i] = um4p * coords[i];
32 | gl_Position = coords[i];
33 | EmitVertex();
34 | }
35 | EndPrimitive();
36 | }
37 |
--------------------------------------------------------------------------------
/src/config/mp3d_scheduler.yaml:
--------------------------------------------------------------------------------
1 | COMMENT: 'Training on MatterportLayout'
2 | VAL_NAME: 'test'
3 | TAG: 'mp3d_scheduler'
4 | SEED: 123
5 | MODEL:
6 | NAME: 'LGT_Net'
7 | ARGS: [ {
8 | 'decoder_name': 'SWG_Transformer',
9 | 'win_size': 16,
10 | 'rpe': 'lr_parameter_mirror',
11 | 'dropout': 0.0,
12 | 'depth': 8,
13 | 'output_name': 'LGT'
14 | } ]
15 | TRAIN:
16 | DEVICE: 'cuda:0'
17 | SCRATCH: False
18 | DETERMINISTIC: True
19 | CRITERION:
20 | DEPTH:
21 | WEIGHT: 0.9
22 | RATIO:
23 | WEIGHT: 0.1
24 | GRAD:
25 | WEIGHT: 0.1
26 | WEIGHTS: [ 1.0, 1.0 ]
27 | BASE_LR:
28 | 3e-4
29 | RESUME_LAST: False
30 | OPTIMIZER:
31 | NAME: 'adam'
32 | EPOCHS: 1000
33 | LR_SCHEDULER:
34 | NAME: 'StepLR'
35 | ARGS: [ { 'step_size': 20, 'gamma': 0.9, 'last_epoch': -1} ]
36 | DATA:
37 | DATASET: 'mp3d'
38 | DIR: 'src/dataset/mp3d'
39 | BATCH_SIZE: 6
40 | NUM_WORKERS: 6
41 | FOR_TEST_INDEX: None
42 | AUG:
43 | FLIP: True
44 | STRETCH: True
45 | ROTATE: True
46 | GAMMA: True
47 | AMP_OPT_LEVEL: 'O0'
48 |
--------------------------------------------------------------------------------
/models/other/optimizer.py:
--------------------------------------------------------------------------------
1 | """
2 | @Date: 2021/07/18
3 | @description:
4 | """
5 | from torch import optim as optim
6 |
7 |
8 | def build_optimizer(config, model, logger):
9 | name = config.TRAIN.OPTIMIZER.NAME.lower()
10 |
11 | optimizer = None
12 | if name == 'sgd':
13 | optimizer = optim.SGD(model.parameters(), momentum=config.TRAIN.OPTIMIZER.MOMENTUM, nesterov=True,
14 | lr=config.TRAIN.BASE_LR, weight_decay=config.TRAIN.WEIGHT_DECAY)
15 | elif name == 'adamw':
16 | optimizer = optim.AdamW(model.parameters(), eps=config.TRAIN.OPTIMIZER.EPS, betas=config.TRAIN.OPTIMIZER.BETAS,
17 | lr=config.TRAIN.BASE_LR, weight_decay=config.TRAIN.WEIGHT_DECAY)
18 | elif name == 'adam':
19 | optimizer = optim.Adam(model.parameters(), eps=config.TRAIN.OPTIMIZER.EPS, betas=config.TRAIN.OPTIMIZER.BETAS,
20 | lr=config.TRAIN.BASE_LR, weight_decay=config.TRAIN.WEIGHT_DECAY)
21 |
22 | logger.info(f"Build optimizer: {name}, lr:{config.TRAIN.BASE_LR}")
23 |
24 | return optimizer
25 |
--------------------------------------------------------------------------------
/models/other/init_env.py:
--------------------------------------------------------------------------------
1 | """
2 | @Date: 2021/08/15
3 | @description:
4 | """
5 | import random
6 | import torch
7 | import torch.backends.cudnn as cudnn
8 | import numpy as np
9 | import os
10 | import cv2
11 |
12 |
13 | def init_env(seed, deterministic=False, loader_work_num=0):
14 | # Fix seed
15 | # Python & NumPy
16 | np.random.seed(seed)
17 | random.seed(seed)
18 | os.environ['PYTHONHASHSEED'] = str(seed)
19 |
20 | # PyTorch
21 | torch.manual_seed(seed) # 为CPU设置随机种子
22 | if torch.cuda.is_available():
23 | torch.cuda.manual_seed(seed) # 为当前GPU设置随机种子
24 | torch.cuda.manual_seed_all(seed) # 为所有GPU设置随机种子
25 |
26 | # cuDNN
27 | if deterministic:
28 | # 复现
29 | torch.backends.cudnn.benchmark = False
30 | torch.backends.cudnn.deterministic = True # 将这个 flag 置为 True 的话,每次返回的卷积算法将是确定的,即默认算法
31 | else:
32 | cudnn.benchmark = True # 如果网络的输入数据维度或类型上变化不大,设置true
33 | torch.backends.cudnn.deterministic = False
34 |
35 | # Using multiple threads in Opencv can cause deadlocks
36 | if loader_work_num != 0:
37 | cv2.setNumThreads(0)
38 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2021 ZhiGang Jiang
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/visualization/visualizer/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2021 Fu-En Wang
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/postprocessing/post_process.py:
--------------------------------------------------------------------------------
1 | """
2 | @Date: 2021/10/08
3 | @description:
4 | """
5 | import numpy as np
6 | import cv2
7 |
8 | from postprocessing.dula.layout import fit_layout
9 | from postprocessing.dula.layout_old import fit_layout_old
10 | from utils.conversion import depth2xyz, xyz2depth
11 |
12 |
13 | def post_process(b_depth, type_name='manhattan', need_cube=False):
14 | plan_y = 1
15 | b_xyz = depth2xyz(b_depth, plan_y)
16 |
17 | b_processed_xyz = []
18 | for xyz in b_xyz:
19 | if type_name == 'manhattan':
20 | processed_xz = fit_layout(floor_xz=xyz[..., ::2], need_cube=need_cube, show=False)
21 | elif type_name == 'manhattan_old':
22 | processed_xz = fit_layout_old(floor_xz=xyz[..., ::2], need_cube=need_cube, show=False)
23 | elif type_name == 'atalanta':
24 | processed_xz = cv2.approxPolyDP(xyz[..., ::2].astype(np.float32), 0.1, False)[:, 0, :]
25 | else:
26 | raise NotImplementedError("Unknown post-processing type")
27 |
28 | if need_cube:
29 | assert len(processed_xz) == 4
30 |
31 | processed_xyz = np.insert(processed_xz, 1, plan_y, axis=1)
32 | b_processed_xyz.append(processed_xyz)
33 |
34 | return np.array(b_processed_xyz)
--------------------------------------------------------------------------------
/loss/led_loss.py:
--------------------------------------------------------------------------------
1 | """
2 | @Date: 2021/08/12
3 | @description:
4 | """
5 | import torch
6 | import torch.nn as nn
7 |
8 |
9 | class LEDLoss(nn.Module):
10 | def __init__(self):
11 | super().__init__()
12 | self.loss = nn.L1Loss()
13 |
14 | def forward(self, gt, dt):
15 | camera_height = 1.6
16 | gt_depth = gt['depth'] * camera_height
17 |
18 | dt_ceil_depth = dt['ceil_depth'] * camera_height * gt['ratio']
19 | dt_floor_depth = dt['depth'] * camera_height
20 |
21 | ceil_loss = self.loss(gt_depth, dt_ceil_depth)
22 | floor_loss = self.loss(gt_depth, dt_floor_depth)
23 |
24 | loss = floor_loss + ceil_loss
25 |
26 | return loss
27 |
28 |
29 | if __name__ == '__main__':
30 | import numpy as np
31 | from dataset.mp3d_dataset import MP3DDataset
32 |
33 | mp3d_dataset = MP3DDataset(root_dir='../src/dataset/mp3d', mode='train')
34 | gt = mp3d_dataset.__getitem__(0)
35 |
36 | gt['depth'] = torch.from_numpy(gt['depth'][np.newaxis]) # batch size is 1
37 | gt['ratio'] = torch.from_numpy(gt['ratio'][np.newaxis]) # batch size is 1
38 |
39 | dummy_dt = {
40 | 'depth': gt['depth'].clone(),
41 | 'ceil_depth': gt['depth'] / gt['ratio']
42 | }
43 | # dummy_dt['depth'][..., :20] *= 3 # some different
44 |
45 | led_loss = LEDLoss()
46 | loss = led_loss(gt, dummy_dt)
47 | print(loss)
48 |
--------------------------------------------------------------------------------
/loss/object_loss.py:
--------------------------------------------------------------------------------
1 | """
2 | @Date: 2021/08/12
3 | @description:
4 | """
5 | import torch
6 | import torch.nn as nn
7 | from loss.grad_loss import GradLoss
8 |
9 |
10 | class ObjectLoss(nn.Module):
11 | def __init__(self):
12 | super().__init__()
13 | self.heat_map_loss = HeatmapLoss(reduction='mean') # FocalLoss(reduction='mean')
14 | self.l1_loss = nn.SmoothL1Loss()
15 |
16 | def forward(self, gt, dt):
17 | # TODO::
18 | return 0
19 |
20 |
21 | class HeatmapLoss(nn.Module):
22 | def __init__(self, weight=None, alpha=2, beta=4, reduction='mean'):
23 | super(HeatmapLoss, self).__init__()
24 | self.alpha = alpha
25 | self.beta = beta
26 | self.reduction = reduction
27 |
28 | def forward(self, targets, inputs):
29 | center_id = (targets == 1.0).float()
30 | other_id = (targets != 1.0).float()
31 | center_loss = -center_id * (1.0 - inputs) ** self.alpha * torch.log(inputs + 1e-14)
32 | other_loss = -other_id * (1 - targets) ** self.beta * inputs ** self.alpha * torch.log(1.0 - inputs + 1e-14)
33 | loss = center_loss + other_loss
34 |
35 | batch_size = loss.size(0)
36 | if self.reduction == 'mean':
37 | loss = torch.sum(loss) / batch_size
38 |
39 | if self.reduction == 'sum':
40 | loss = torch.sum(loss) / batch_size
41 |
42 | return loss
43 |
--------------------------------------------------------------------------------
/Post-Porcessing.md:
--------------------------------------------------------------------------------
1 | # Post-Processing
2 |
3 | ## Update
4 | - 2023.5.18 We found that it was unreasonable to use cross product to detect occlusion in manhattan post-processing [layout.py#L70](https://github.com/zhigangjiang/LGT-Net/blob/b642d6288e3a4bf265e54ab93eed3455e760402b/postprocessing/dula/layout.py#L70). We fixed this problem [here](https://github.com/zhigangjiang/LGT-Net/blob/aae66ab9a2f361bc4e16af564f244acff5ec8aee/postprocessing/dula/layout.py#LL80C4-L80C15). In addition, we add some optimizations to force the layout coordinates to align to satisfy the constraints based on the Manhattan assumption.
5 |
6 | ## Step
7 |
8 | 1. Simplify polygon by [DP algorithm](https://en.wikipedia.org/wiki/Ramer%E2%80%93Douglas%E2%80%93Peucker_algorithm)
9 |
10 | 
11 |
12 | 2. Detect occlusion, calculating box fill with 1
13 |
14 | 
15 |
16 | 3. Fill in reasonable sampling section
17 |
18 | 
19 |
20 | 4. Output processed polygon
21 |
22 | 
23 |
24 | ## performance
25 | It works, and a performance comparison on the MatterportLayout dataset:
26 |
27 | | Method | 2D IoU(%) | 3D IoU(%) | RMSE | $\mathbf{\delta_{1}}$ |
28 | |--|--|--|--|--|
29 | without post-proc | 83.52 | 81.11 | 0.204 | 0.951 |
30 | original post-proc |83.12 | 80.71 | 0.230 | 0.936|\
31 | optimized post-proc | 83.48 | 81.08| 0.214 | 0.940 |
32 |
33 | original:
34 |
35 | 
36 |
37 | optimized:
38 |
39 | 
40 |
--------------------------------------------------------------------------------
/utils/writer.py:
--------------------------------------------------------------------------------
1 | """
2 | @Date: 2021/11/06
3 | @description:
4 | """
5 | import cv2
6 | import numpy as np
7 |
8 |
9 | def xyz2json(xyz, ratio, camera_height=1.6):
10 | xyz = xyz * camera_height
11 | ceiling_height = camera_height * ratio
12 | layout_height = camera_height + ceiling_height
13 | data = {
14 | 'cameraHeight': camera_height,
15 | 'layoutHeight': layout_height,
16 | 'cameraCeilingHeight': ceiling_height,
17 | 'layoutObj2ds': {
18 | 'num': 0,
19 | 'obj2ds': []
20 | },
21 | 'layoutPoints': {
22 | 'num': xyz.shape[0],
23 | 'points': []
24 | },
25 | 'layoutWalls': {
26 | 'num': xyz.shape[0],
27 | 'walls': []
28 | }
29 | }
30 |
31 | xyz = np.concatenate([xyz, xyz[0:1, :]], axis=0)
32 | R_180 = cv2.Rodrigues(np.array([0, -1 * np.pi, 0], np.float32))[0]
33 | for i in range(xyz.shape[0] - 1):
34 | a = np.dot(R_180, xyz[i, :])
35 | a[0] *= -1
36 | b = np.dot(R_180, xyz[i + 1, :])
37 | b[0] *= -1
38 | c = a.copy()
39 | c[1] = 0
40 | normal = np.cross(a - b, a - c)
41 | normal /= np.linalg.norm(normal)
42 | d = -np.sum(normal * a)
43 | plane = np.asarray([normal[0], normal[1], normal[2], d])
44 |
45 | data['layoutPoints']['points'].append({'xyz': a.tolist(), 'id': i})
46 |
47 | next_i = 0 if i + 1 >= (xyz.shape[0] - 1) else i + 1
48 | tmp = {
49 | 'normal': normal.tolist(),
50 | 'planeEquation': plane.tolist(),
51 | 'pointsIdx': [i, next_i]
52 | }
53 | data['layoutWalls']['walls'].append(tmp)
54 |
55 | return data
56 |
57 |
--------------------------------------------------------------------------------
/utils/logger.py:
--------------------------------------------------------------------------------
1 | """
2 | @Date: 2021/07/17
3 | @description:
4 | """
5 | import os
6 | import sys
7 | import logging
8 | import functools
9 | from termcolor import colored
10 |
11 |
12 | def build_logger(config):
13 | output_dir = config.LOGGER.DIR
14 | local_rank = config.LOCAL_RANK
15 | name = config.MODEL.NAME
16 | logger = get_logger(output_dir, local_rank, name)
17 | return logger
18 |
19 |
20 | @functools.lru_cache()
21 | def get_logger(output_dir=None, local_rank=None, name="LGTNet"):
22 | if output_dir and not os.path.exists(output_dir):
23 | os.makedirs(output_dir)
24 |
25 | # create logger
26 | logger = logging.getLogger(name)
27 | logger.setLevel(logging.DEBUG)
28 | logger.propagate = False
29 |
30 | # create formatter
31 | fmt = f'[%(asctime)s %(name)s][%(levelname)1.1s](%(filename)s %(lineno)d): %(message)s'
32 | color_fmt = colored(f'[%(asctime)s %(name)s][%(levelname)1.1s][{local_rank}]', 'green') + colored(
33 | f'(%(filename)s %(lineno)d)',
34 | 'yellow') + ': %(message)s'
35 | if local_rank in [0] or local_rank is None:
36 | console_handler = logging.StreamHandler(sys.stdout)
37 | console_handler.setLevel(logging.DEBUG)
38 | console_handler.setFormatter(
39 | logging.Formatter(fmt=color_fmt, datefmt='%Y-%m-%d %H:%M:%S'))
40 | logger.addHandler(console_handler)
41 |
42 | if output_dir is not None:
43 | # create file handlers
44 | file_handler = logging.FileHandler(os.path.join(output_dir, f'log_rank{local_rank}.log'), mode='a')
45 | file_handler.setLevel(logging.DEBUG)
46 | file_handler.setFormatter(logging.Formatter(fmt=fmt, datefmt='%Y-%m-%d %H:%M:%S'))
47 | logger.addHandler(file_handler)
48 |
49 | return logger
50 |
--------------------------------------------------------------------------------
/loss/boundary_loss.py:
--------------------------------------------------------------------------------
1 | """
2 | @Date: 2021/08/12
3 | @description: For HorizonNet, using latitudes to calculate loss.
4 | """
5 | import torch
6 | import torch.nn as nn
7 | from utils.conversion import depth2xyz, xyz2lonlat
8 |
9 |
10 | class BoundaryLoss(nn.Module):
11 | def __init__(self):
12 | super().__init__()
13 | self.loss = nn.L1Loss()
14 |
15 | def forward(self, gt, dt):
16 | gt_floor_xyz = depth2xyz(gt['depth'])
17 | gt_ceil_xyz = gt_floor_xyz.clone()
18 | gt_ceil_xyz[..., 1] = -gt['ratio']
19 |
20 | gt_floor_boundary = xyz2lonlat(gt_floor_xyz)[..., -1:]
21 | gt_ceil_boundary = xyz2lonlat(gt_ceil_xyz)[..., -1:]
22 |
23 | gt_boundary = torch.cat([gt_floor_boundary, gt_ceil_boundary], dim=-1).permute(0, 2, 1)
24 | dt_boundary = dt['boundary']
25 |
26 | loss = self.loss(gt_boundary, dt_boundary)
27 | return loss
28 |
29 |
30 | if __name__ == '__main__':
31 | import numpy as np
32 | from dataset.mp3d_dataset import MP3DDataset
33 |
34 | mp3d_dataset = MP3DDataset(root_dir='../src/dataset/mp3d', mode='train')
35 | gt = mp3d_dataset.__getitem__(0)
36 |
37 | gt['depth'] = torch.from_numpy(gt['depth'][np.newaxis]) # batch size is 1
38 | gt['ratio'] = torch.from_numpy(gt['ratio'][np.newaxis]) # batch size is 1
39 |
40 | dummy_dt = {
41 | 'depth': gt['depth'].clone(),
42 | 'boundary': torch.cat([
43 | xyz2lonlat(depth2xyz(gt['depth']))[..., -1:],
44 | xyz2lonlat(depth2xyz(gt['depth'], plan_y=-gt['ratio']))[..., -1:]
45 | ], dim=-1).permute(0, 2, 1)
46 | }
47 | # dummy_dt['boundary'][:, :, :20] /= 1.2 # some different
48 |
49 | boundary_loss = BoundaryLoss()
50 | loss = boundary_loss(gt, dummy_dt)
51 | print(loss)
52 |
--------------------------------------------------------------------------------
/models/other/scheduler.py:
--------------------------------------------------------------------------------
1 | """
2 | @Date: 2021/09/14
3 | @description:
4 | """
5 |
6 |
7 | class WarmupScheduler:
8 | def __init__(self, optimizer, lr_pow, init_lr, warmup_lr, warmup_step, max_step, **kwargs):
9 | self.lr_pow = lr_pow
10 | self.init_lr = init_lr
11 | self.running_lr = init_lr
12 | self.warmup_lr = warmup_lr
13 | self.warmup_step = warmup_step
14 | self.max_step = max_step
15 | self.optimizer = optimizer
16 |
17 | def step_update(self, cur_step):
18 | if cur_step < self.warmup_step:
19 | frac = cur_step / self.warmup_step
20 | step = self.warmup_lr - self.init_lr
21 | self.running_lr = self.init_lr + step * frac
22 | else:
23 | frac = (float(cur_step) - self.warmup_step) / (self.max_step - self.warmup_step)
24 | scale_running_lr = max((1. - frac), 0.) ** self.lr_pow
25 | self.running_lr = self.warmup_lr * scale_running_lr
26 |
27 | if self.optimizer is not None:
28 | for param_group in self.optimizer.param_groups:
29 | param_group['lr'] = self.running_lr
30 |
31 |
32 | if __name__ == '__main__':
33 | import matplotlib.pyplot as plt
34 |
35 | scheduler = WarmupScheduler(optimizer=None,
36 | lr_pow=4,
37 | init_lr=0.0000003,
38 | warmup_lr=0.00003,
39 | warmup_step=10000,
40 | max_step=100000)
41 |
42 | x = []
43 | y = []
44 | for i in range(100000):
45 | if i == 10000-1:
46 | print()
47 | scheduler.step_update(i)
48 | x.append(i)
49 | y.append(scheduler.running_lr)
50 | plt.plot(x, y, linewidth=1)
51 | plt.show()
52 |
--------------------------------------------------------------------------------
/models/modules/swin_transformer.py:
--------------------------------------------------------------------------------
1 | from models.modules.transformer_modules import *
2 |
3 |
4 | class Swin_Transformer(nn.Module):
5 | def __init__(self, dim, depth, heads, win_size, dim_head, mlp_dim,
6 | dropout=0., patch_num=None, ape=None, rpe=None, rpe_pos=1):
7 | super().__init__()
8 | self.absolute_pos_embed = None if patch_num is None or ape is None else AbsolutePosition(dim, dropout,
9 | patch_num, ape)
10 | self.pos_dropout = nn.Dropout(dropout)
11 | self.layers = nn.ModuleList([])
12 | for i in range(depth):
13 | self.layers.append(nn.ModuleList([
14 | PreNorm(dim, WinAttention(dim, win_size=win_size, shift=0 if (i % 2 == 0) else win_size // 2,
15 | heads=heads, dim_head=dim_head, dropout=dropout, rpe=rpe, rpe_pos=rpe_pos)),
16 | PreNorm(dim, FeedForward(dim, mlp_dim, dropout=dropout)),
17 | ]))
18 |
19 | def forward(self, x):
20 | if self.absolute_pos_embed is not None:
21 | x = self.absolute_pos_embed(x)
22 | x = self.pos_dropout(x)
23 | for attn, ff in self.layers:
24 | x = attn(x) + x
25 | x = ff(x) + x
26 | return x
27 |
28 |
29 | if __name__ == '__main__':
30 | token_dim = 1024
31 | toke_len = 256
32 |
33 | transformer = Swin_Transformer(dim=token_dim,
34 | depth=6,
35 | heads=16,
36 | win_size=8,
37 | dim_head=64,
38 | mlp_dim=2048,
39 | dropout=0.1)
40 |
41 | input = torch.randn(1, toke_len, token_dim)
42 | output = transformer(input)
43 | print(output.shape)
44 |
--------------------------------------------------------------------------------
/models/modules/transformer.py:
--------------------------------------------------------------------------------
1 | from models.modules.transformer_modules import *
2 |
3 |
4 | class Transformer(nn.Module):
5 | def __init__(self, dim, depth, heads, win_size, dim_head, mlp_dim,
6 | dropout=0., patch_num=None, ape=None, rpe=None, rpe_pos=1):
7 | super().__init__()
8 |
9 | self.absolute_pos_embed = None if patch_num is None or ape is None else AbsolutePosition(dim, dropout,
10 | patch_num, ape)
11 | self.pos_dropout = nn.Dropout(dropout)
12 | self.layers = nn.ModuleList([])
13 | for _ in range(depth):
14 | self.layers.append(nn.ModuleList([
15 | PreNorm(dim, Attention(dim, heads=heads, dim_head=dim_head, dropout=dropout, patch_num=patch_num,
16 | rpe=rpe, rpe_pos=rpe_pos)),
17 | PreNorm(dim, FeedForward(dim, mlp_dim, dropout=dropout))
18 | ]))
19 |
20 | def forward(self, x):
21 | if self.absolute_pos_embed is not None:
22 | x = self.absolute_pos_embed(x)
23 | x = self.pos_dropout(x)
24 | for attn, ff in self.layers:
25 | x = attn(x) + x
26 | x = ff(x) + x
27 | return x
28 |
29 |
30 | if __name__ == '__main__':
31 | token_dim = 1024
32 | toke_len = 256
33 |
34 | transformer = Transformer(dim=token_dim, depth=6, heads=16,
35 | dim_head=64, mlp_dim=2048, dropout=0.1,
36 | patch_num=256, ape='lr_parameter', rpe='lr_parameter_mirror')
37 |
38 | total = sum(p.numel() for p in transformer.parameters())
39 | trainable = sum(p.numel() for p in transformer.parameters() if p.requires_grad)
40 | print('parameter total:{:,}, trainable:{:,}'.format(total, trainable))
41 |
42 | input = torch.randn(1, toke_len, token_dim)
43 | output = transformer(input)
44 | print(output.shape)
45 |
--------------------------------------------------------------------------------
/models/modules/patch_feature_extractor.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import torch
3 | import torch.nn as nn
4 | from einops.layers.torch import Rearrange
5 |
6 |
7 | class PatchFeatureExtractor(nn.Module):
8 | x_mean = torch.FloatTensor(np.array([0.485, 0.456, 0.406])[None, :, None, None])
9 | x_std = torch.FloatTensor(np.array([0.229, 0.224, 0.225])[None, :, None, None])
10 |
11 | def __init__(self, patch_num=256, input_shape=None):
12 | super(PatchFeatureExtractor, self).__init__()
13 |
14 | if input_shape is None:
15 | input_shape = [3, 512, 1024]
16 | self.patch_dim = 1024
17 | self.patch_num = patch_num
18 |
19 | img_channel = input_shape[0]
20 | img_h = input_shape[1]
21 | img_w = input_shape[2]
22 |
23 | p_h, p_w = img_h, img_w // self.patch_num
24 | p_dim = p_h * p_w * img_channel
25 |
26 | self.patch_embedding = nn.Sequential(
27 | Rearrange('b c h (p_n p_w) -> b p_n (h p_w c)', p_w=p_w),
28 | nn.Linear(p_dim, self.patch_dim)
29 | )
30 |
31 | self.x_mean.requires_grad = False
32 | self.x_std.requires_grad = False
33 |
34 | def _prepare_x(self, x):
35 | x = x.clone()
36 | if self.x_mean.device != x.device:
37 | self.x_mean = self.x_mean.to(x.device)
38 | self.x_std = self.x_std.to(x.device)
39 | x[:, :3] = (x[:, :3] - self.x_mean) / self.x_std
40 |
41 | return x
42 |
43 | def forward(self, x):
44 | # x [b 3 512 1024]
45 | x = self._prepare_x(x) # [b 3 512 1024]
46 | x = self.patch_embedding(x) # [b 256(patch_num) 1024(d)]
47 | x = x.permute(0, 2, 1) # [b 1024(d) 256(patch_num)]
48 | return x
49 |
50 |
51 | if __name__ == '__main__':
52 | from PIL import Image
53 | extractor = PatchFeatureExtractor()
54 | img = np.array(Image.open("../../src/demo.png")).transpose((2, 0, 1))
55 | input = torch.Tensor([img]) # 1 3 512 1024
56 | feature = extractor(input)
57 | print(feature.shape) # 1, 1024, 256
58 |
--------------------------------------------------------------------------------
/convert_ckpt.py:
--------------------------------------------------------------------------------
1 | """
2 | @date: 2021/11/22
3 | @description: Conversion training ckpt into inference ckpt
4 | """
5 | import argparse
6 | import os
7 |
8 | import torch
9 |
10 | from config.defaults import merge_from_file
11 |
12 |
13 | def parse_option():
14 | parser = argparse.ArgumentParser(description='Conversion training ckpt into inference ckpt')
15 | parser.add_argument('--cfg',
16 | type=str,
17 | required=True,
18 | metavar='FILE',
19 | help='path of config file')
20 |
21 | parser.add_argument('--output_path',
22 | type=str,
23 | help='path of output ckpt')
24 |
25 | args = parser.parse_args()
26 |
27 | print("arguments:")
28 | for arg in vars(args):
29 | print(arg, ":", getattr(args, arg))
30 | print("-" * 50)
31 | return args
32 |
33 |
34 | def convert_ckpt():
35 | args = parse_option()
36 | config = merge_from_file(args.cfg)
37 | ck_dir = os.path.join("checkpoints", f"{config.MODEL.ARGS[0]['decoder_name']}_{config.MODEL.ARGS[0]['output_name']}_Net",
38 | config.TAG)
39 | print(f"Processing {ck_dir}")
40 | model_paths = [name for name in os.listdir(ck_dir) if '_best_' in name]
41 | if len(model_paths) == 0:
42 | print("Not find best ckpt")
43 | return
44 | model_path = os.path.join(ck_dir, model_paths[0])
45 | print(f"Loading {model_path}")
46 | checkpoint = torch.load(model_path, map_location=torch.device('cuda:0'))
47 | net = checkpoint['net']
48 | output_path = None
49 | if args.output_path is None:
50 | output_path = os.path.join(ck_dir, 'best.pkl')
51 | else:
52 | output_path = args.output_path
53 | if output_path is None:
54 | print("Output path is invalid")
55 | print(f"Save on: {output_path}")
56 | os.makedirs(os.path.dirname(output_path), exist_ok=True)
57 | torch.save(net, output_path)
58 |
59 |
60 | if __name__ == '__main__':
61 | convert_ckpt()
62 |
--------------------------------------------------------------------------------
/models/modules/swg_transformer.py:
--------------------------------------------------------------------------------
1 | from models.modules.transformer_modules import *
2 |
3 |
4 | class SWG_Transformer(nn.Module):
5 | def __init__(self, dim, depth, heads, win_size, dim_head, mlp_dim,
6 | dropout=0., patch_num=None, ape=None, rpe=None, rpe_pos=1):
7 | super().__init__()
8 | self.absolute_pos_embed = None if patch_num is None or ape is None else AbsolutePosition(dim, dropout,
9 | patch_num, ape)
10 | self.pos_dropout = nn.Dropout(dropout)
11 | self.layers = nn.ModuleList([])
12 | for i in range(depth):
13 | if i % 2 == 0:
14 | attention = WinAttention(dim, win_size=win_size, shift=0 if (i % 3 == 0) else win_size // 2,
15 | heads=heads, dim_head=dim_head, dropout=dropout, rpe=rpe, rpe_pos=rpe_pos)
16 | else:
17 | attention = Attention(dim, heads=heads, dim_head=dim_head, dropout=dropout,
18 | patch_num=patch_num, rpe=rpe, rpe_pos=rpe_pos)
19 |
20 | self.layers.append(nn.ModuleList([
21 | PreNorm(dim, attention),
22 | PreNorm(dim, FeedForward(dim, mlp_dim, dropout=dropout)),
23 | ]))
24 |
25 | def forward(self, x):
26 | if self.absolute_pos_embed is not None:
27 | x = self.absolute_pos_embed(x)
28 | x = self.pos_dropout(x)
29 | for attn, ff in self.layers:
30 | x = attn(x) + x
31 | x = ff(x) + x
32 | return x
33 |
34 |
35 | if __name__ == '__main__':
36 | token_dim = 1024
37 | toke_len = 256
38 |
39 | transformer = SWG_Transformer(dim=token_dim,
40 | depth=6,
41 | heads=16,
42 | win_size=8,
43 | dim_head=64,
44 | mlp_dim=2048,
45 | dropout=0.1)
46 |
47 | input = torch.randn(1, toke_len, token_dim)
48 | output = transformer(input)
49 | print(output.shape)
50 |
--------------------------------------------------------------------------------
/loss/grad_loss.py:
--------------------------------------------------------------------------------
1 | """
2 | @Date: 2021/08/12
3 | @description:
4 | """
5 |
6 | import torch
7 | import torch.nn as nn
8 | import numpy as np
9 |
10 | from visualization.grad import get_all
11 |
12 |
13 | class GradLoss(nn.Module):
14 | def __init__(self):
15 | super().__init__()
16 | self.loss = nn.L1Loss()
17 | self.cos = nn.CosineSimilarity(dim=-1, eps=0)
18 |
19 | self.grad_conv = nn.Conv1d(1, 1, kernel_size=3, stride=1, padding=0, bias=False, padding_mode='circular')
20 | self.grad_conv.weight = nn.Parameter(torch.tensor([[[1, 0, -1]]]).float())
21 | self.grad_conv.weight.requires_grad = False
22 |
23 | def forward(self, gt, dt):
24 | gt_direction, _, gt_angle_grad = get_all(gt['depth'], self.grad_conv)
25 | dt_direction, _, dt_angle_grad = get_all(dt['depth'], self.grad_conv)
26 |
27 | normal_loss = (1 - self.cos(gt_direction, dt_direction)).mean()
28 | grad_loss = self.loss(gt_angle_grad, dt_angle_grad)
29 | return [normal_loss, grad_loss]
30 |
31 |
32 | if __name__ == '__main__':
33 | from dataset.mp3d_dataset import MP3DDataset
34 | from utils.boundary import depth2boundaries
35 | from utils.conversion import uv2xyz
36 | from visualization.boundary import draw_boundaries
37 | from visualization.floorplan import draw_floorplan
38 |
39 | def show_boundary(image, depth, ratio):
40 | boundary_list = depth2boundaries(ratio, depth, step=None)
41 | draw_boundaries(image.transpose(1, 2, 0), boundary_list=boundary_list, show=True)
42 | draw_floorplan(uv2xyz(boundary_list[0])[..., ::2], show=True, center_color=0.8)
43 |
44 | mp3d_dataset = MP3DDataset(root_dir='../src/dataset/mp3d', mode='train', patch_num=256)
45 | gt = mp3d_dataset.__getitem__(1)
46 | gt['depth'] = torch.from_numpy(gt['depth'][np.newaxis]) # batch size is 1
47 | dummy_dt = {
48 | 'depth': gt['depth'].clone(),
49 | }
50 | # dummy_dt['depth'][..., 20] *= 3 # some different
51 |
52 | # show_boundary(gt['image'], gt['depth'][0].numpy(), gt['ratio'])
53 | # show_boundary(gt['image'], dummy_dt['depth'][0].numpy(), gt['ratio'])
54 |
55 | grad_loss = GradLoss()
56 | loss = grad_loss(gt, dummy_dt)
57 | print(loss)
58 |
--------------------------------------------------------------------------------
/evaluation/eval_visible_iou.py:
--------------------------------------------------------------------------------
1 | """
2 | @Date: 2021/08/02
3 | @description:
4 | The 2DIoU for calculating the visible and full boundaries, such as the MP3D dataset,
5 | has the following data: {'train': 0.9775843958583535, 'test': 0.9828616219607289, 'val': 0.9883810438132491},
6 | indicating that our best performance is limited to below 98.29% 2DIoU using our approach.
7 | """
8 | import numpy as np
9 | import matplotlib.pyplot as plt
10 |
11 | from tqdm import tqdm
12 | from evaluation.iou import calc_IoU_2D
13 | from visualization.floorplan import draw_iou_floorplan
14 | from utils.conversion import depth2xyz, uv2xyz
15 |
16 |
17 | def eval_dataset_visible_IoU(dataset, show=False):
18 | bar = tqdm(dataset, total=len(dataset), ncols=100)
19 | iou2ds = []
20 | for data in bar:
21 | bar.set_description(f"Processing {data['id']}")
22 | corners = data['corners']
23 | corners = corners[corners[..., 0] + corners[..., 1] != 0] # Take effective corners
24 | all_xz = uv2xyz(corners)[..., ::2]
25 | visible_xz = depth2xyz(data['depth'])[..., ::2]
26 | iou2d = calc_IoU_2D(all_xz, visible_xz)
27 | iou2ds.append(iou2d)
28 | if show:
29 | layout_floorplan = draw_iou_floorplan(all_xz, visible_xz, iou2d=iou2d)
30 | plt.imshow(layout_floorplan)
31 | plt.show()
32 |
33 | mean_iou2d = np.array(iou2ds).mean()
34 | return mean_iou2d
35 |
36 |
37 | def execute_eval_dataset_visible_IoU(root_dir, dataset, modes=None):
38 | if modes is None:
39 | modes = ["train", "test", "valid"]
40 |
41 | iou2d_d = {}
42 | for mode in modes:
43 | print("mode: {}".format(mode))
44 | iou2d = eval_dataset_visible_IoU(dataset(root_dir, mode, patch_num=1024,
45 | keys=['depth', 'visible_corners', 'corners', 'id']), show=False)
46 | iou2d_d[mode] = iou2d
47 | return iou2d_d
48 |
49 |
50 | if __name__ == '__main__':
51 | from dataset.mp3d_dataset import MP3DDataset
52 |
53 | iou2d_d = execute_eval_dataset_visible_IoU(root_dir='../src/dataset/mp3d',
54 | dataset=MP3DDataset,
55 | modes=['train', 'test', 'val'])
56 | print(iou2d_d)
57 |
--------------------------------------------------------------------------------
/visualization/visualizer/visualizer.py:
--------------------------------------------------------------------------------
1 | import os
2 | import sys
3 | import cv2
4 | import numpy as np
5 | from imageio import imread
6 | import json
7 | import argparse
8 | import visualization.visualizer.shader
9 |
10 | from PyQt5 import QtWidgets, QtGui, QtOpenGL
11 | from PyQt5.QtWidgets import *
12 | from PyQt5.QtGui import QIcon
13 | import PyQt5.QtCore as QtCore
14 |
15 | import glm
16 | from OpenGL.GL import *
17 | from OpenGL.GLU import *
18 | from OpenGL.GLUT import *
19 |
20 | from visualization.visualizer.Viewer import Utils
21 | from visualization.visualizer.Viewer import LayoutView
22 |
23 |
24 | class TopWindow(QMainWindow):
25 | def __init__(self, img, layout, floor_reverse=False, parent=None):
26 | super().__init__(parent)
27 | sizeObject = QtWidgets.QDesktopWidget().screenGeometry(-1)
28 | [self.h, self.w] = [sizeObject.height(), sizeObject.width()]
29 | ratio = 0.9
30 | self.h = int(self.h * ratio)
31 | self.w = int(self.w * ratio)
32 | self.setGeometry(20, 60, self.w, self.h)
33 | self.setWindowTitle("Layout Visualizer")
34 | self.centeralWidget = QWidget(self)
35 |
36 | self.layout = layout
37 | self.LayoutViewer = LayoutView.GLWindow(img, main=self, parent=self.centeralWidget)
38 | wallNum, wallPoints, lines, mesh = Utils.Label2Mesh(Utils.OldFormat2Mine(self.layout), floor_reverse)
39 | self.LayoutViewer.updateLayoutMesh(wallNum, wallPoints, lines, mesh)
40 |
41 | layout = QGridLayout()
42 | layout.setRowStretch(0, 1)
43 | layout.setColumnStretch(0, 1)
44 | layout.addWidget(self.LayoutViewer, 0, 0, 1, 1)
45 | self.centeralWidget.setLayout(layout)
46 | self.setCentralWidget(self.centeralWidget)
47 |
48 | def enterEvent(self, event):
49 | self.setFocus(True)
50 |
51 |
52 | def visualize_3d(layout, img):
53 | app = QtWidgets.QApplication(sys.argv)
54 | window = TopWindow(img, layout=layout)
55 | window.show()
56 | # cv2.waitKey()
57 | sys.exit(app.exec_())
58 |
59 |
60 | if __name__ == '__main__':
61 | parser = argparse.ArgumentParser(description='360 Layout Visualizer',
62 | formatter_class=argparse.ArgumentDefaultsHelpFormatter)
63 | parser.add_argument('--img', type=str, required=True, help='The panorama path')
64 | parser.add_argument('--json', type=str, required=True, help='The output json path')
65 | args = parser.parse_args()
66 |
67 | img = imread(args.img, pilmode='RGB')
68 | with open(args.json, 'r') as f:
69 | layout = json.load(f)
70 |
71 | visualize_3d(layout, img)
72 |
--------------------------------------------------------------------------------
/models/other/criterion.py:
--------------------------------------------------------------------------------
1 | """
2 | @date: 2021/7/19
3 | @description:
4 | """
5 | import torch
6 | import loss
7 |
8 | from utils.misc import tensor2np
9 |
10 |
11 | def build_criterion(config, logger):
12 | criterion = {}
13 | device = config.TRAIN.DEVICE
14 |
15 | for k in config.TRAIN.CRITERION.keys():
16 | sc = config.TRAIN.CRITERION[k]
17 | if sc.WEIGHT is None or float(sc.WEIGHT) == 0:
18 | continue
19 | criterion[sc.NAME] = {
20 | 'loss': getattr(loss, sc.LOSS)(),
21 | 'weight': float(sc.WEIGHT),
22 | 'sub_weights': sc.WEIGHTS,
23 | 'need_all': sc.NEED_ALL
24 | }
25 |
26 | criterion[sc.NAME]['loss'] = criterion[sc.NAME]['loss'].to(device)
27 | if config.AMP_OPT_LEVEL != "O0" and 'cuda' in device:
28 | criterion[sc.NAME]['loss'] = criterion[sc.NAME]['loss'].type(torch.float16)
29 |
30 | # logger.info(f"Build criterion:{sc.WEIGHT}_{sc.NAME}_{sc.LOSS}_{sc.WEIGHTS}")
31 | return criterion
32 |
33 |
34 | def calc_criterion(criterion, gt, dt, epoch_loss_d):
35 | loss = None
36 | postfix_d = {}
37 | for k in criterion.keys():
38 | if criterion[k]['need_all']:
39 | single_loss = criterion[k]['loss'](gt, dt)
40 | ws_loss = None
41 | for i, sub_weight in enumerate(criterion[k]['sub_weights']):
42 | if sub_weight == 0:
43 | continue
44 | if ws_loss is None:
45 | ws_loss = single_loss[i] * sub_weight
46 | else:
47 | ws_loss = ws_loss + single_loss[i] * sub_weight
48 | single_loss = ws_loss if ws_loss is not None else single_loss
49 | else:
50 | assert k in gt.keys(), "ground label is None:" + k
51 | assert k in dt.keys(), "detection key is None:" + k
52 | if k == 'ratio' and gt[k].shape[-1] != dt[k].shape[-1]:
53 | gt[k] = gt[k].repeat(1, dt[k].shape[-1])
54 | single_loss = criterion[k]['loss'](gt[k], dt[k])
55 |
56 | postfix_d[k] = tensor2np(single_loss)
57 | if k not in epoch_loss_d.keys():
58 | epoch_loss_d[k] = []
59 | epoch_loss_d[k].append(postfix_d[k])
60 |
61 | single_loss = single_loss * criterion[k]['weight']
62 | if loss is None:
63 | loss = single_loss
64 | else:
65 | loss = loss + single_loss
66 |
67 | k = 'loss'
68 | postfix_d[k] = tensor2np(loss)
69 | if k not in epoch_loss_d.keys():
70 | epoch_loss_d[k] = []
71 | epoch_loss_d[k].append(postfix_d[k])
72 | return loss, postfix_d, epoch_loss_d
73 |
--------------------------------------------------------------------------------
/visualization/visualizer/shader/fragment_pano.glsl:
--------------------------------------------------------------------------------
1 | #version 410
2 | #define pi 3.14159265359
3 | layout(location = 0) out vec4 fragColor;
4 | in vec3 modelPosition;
5 |
6 | uniform sampler2D pano;
7 | uniform float alpha;
8 | uniform int wallNum;
9 | uniform vec2 wallPoints[100];
10 |
11 |
12 | bool intersect1D(float a1, float a2, float b1, float b2)
13 | {
14 | if (a1 > a2)
15 | {
16 | float tmp = a1;
17 | a1 = a2;
18 | a2 = tmp;
19 | }
20 | if (b1 > b2)
21 | {
22 | float tmp = b1;
23 | b1 =b2;
24 | b2 = tmp;
25 | }
26 | return max(a1, b1) <= min(a2, b2);
27 | }
28 | float cross(vec2 o, vec2 a, vec2 b)
29 | {
30 | return (a.x-o.x) * (b.y-o.y) - (a.y-o.y) * (b.x-o.x);
31 | }
32 |
33 | bool intersect(vec2 a1, vec2 a2, vec2 b1, vec2 b2)
34 | {
35 | return intersect1D(a1.x, a2.x, b1.x, b2.x)
36 | && intersect1D(a1.y, a2.y, b1.y, b2.y)
37 | && cross(a1, a2, b1) * cross(a1, a2, b2) <= 0
38 | && cross(b1, b2, a1) * cross(b1, b2, a2) <= 0;
39 | }
40 |
41 | bool checkIntersectWalls(vec2 pts){
42 | vec2 a = pts * 0.99;
43 | vec2 b = vec2(0, 0);
44 | for (int i=0; i=min(c.x, d.x) && min(a.y,b.y)<=max(c.y,d.y) && max(a.y, b.y)>=min(c.y, d.y))
49 | if (intersect(a, b, c, d))
50 | return true;
51 | /*
52 | float u=(c.x-a.x)*(b.y-a.y)-(b.x-a.x)*(c.y-a.y);
53 | float v=(d.x-a.x)*(b.y-a.y)-(b.x-a.x)*(d.y-a.y);
54 | float w=(a.x-c.x)*(d.y-c.y)-(d.x-c.x)*(a.y-c.y);
55 | float z=(b.x-c.x)*(d.y-c.y)-(d.x-c.x)*(b.y-c.y);
56 | return (u*v<=1e-5 && w*z<=1e-5);
57 | */
58 | }
59 |
60 | return false;
61 | }
62 |
63 |
64 | void main(){
65 | float x = modelPosition.x;
66 | float y = modelPosition.y;
67 | float z = modelPosition.z;
68 | float normXYZ = sqrt(pow(x, 2) + pow(y, 2) + pow(z, 2));
69 | float normXZ = sqrt(pow(x, 2) + pow(z, 2));
70 | float lon = (atan(x, z) / pi + 1) * 0.5;
71 | float lat = (asin(y / normXYZ) / (0.5*pi) + 1) * 0.5;
72 | vec2 coord = vec2(lon, lat);
73 | if (!checkIntersectWalls(vec2(x, z)))
74 | //if (true)
75 | fragColor = vec4(texture(pano, coord).xyz, alpha);
76 | else{
77 | if (mod(y * 10, 10) < 5 ^^ mod(x * 10, 10) < 5 ^^ mod(z * 10, 10) < 5)
78 | fragColor = vec4(vec3(1.0, 1.0, 1.0), alpha);
79 | else
80 | fragColor = vec4(vec3(0.5, 0.5, 0.5), alpha);
81 | }
82 | }
83 |
--------------------------------------------------------------------------------
/visualization/obj3d.py:
--------------------------------------------------------------------------------
1 | """
2 | @author: Zhigang Jiang
3 | @time: 2022/05/25
4 | @description: reference: https://github.com/sunset1995/PanoPlane360/blob/main/vis_planes.py
5 | """
6 | import open3d
7 | import numpy as np
8 | from utils.conversion import pixel2lonlat
9 |
10 |
11 | def create_3d_obj(img, depth, save_path=None, mesh=True, mesh_show_back_face=False, show=False):
12 | assert img.shape[0] == depth.shape[0], ""
13 | h = img.shape[0]
14 | w = img.shape[1]
15 | # Project to 3d
16 | lon = pixel2lonlat(np.array(range(w)), w=w, axis=0)[None].repeat(h, axis=0)
17 | lat = pixel2lonlat(np.array(range(h)), h=h, axis=1)[..., None].repeat(w, axis=1)
18 |
19 | z = depth * np.sin(lat)
20 | x = depth * np.cos(lat) * np.cos(lon)
21 | y = depth * np.cos(lat) * np.sin(lon)
22 | pts_xyz = np.stack([x, -z, y], -1).reshape(-1, 3)
23 | pts_rgb = img.reshape(-1, 3)
24 |
25 | if mesh:
26 | pid = np.arange(len(pts_xyz)).reshape(h, w)
27 | faces = np.concatenate([
28 | np.stack([
29 | pid[:-1, :-1], pid[1:, :-1], np.roll(pid, -1, axis=1)[:-1, :-1],
30 | ], -1),
31 | np.stack([
32 | pid[1:, :-1], np.roll(pid, -1, axis=1)[1:, :-1], np.roll(pid, -1, axis=1)[:-1, :-1],
33 | ], -1)
34 | ]).reshape(-1, 3).tolist()
35 | scene = open3d.geometry.TriangleMesh()
36 | scene.vertices = open3d.utility.Vector3dVector(pts_xyz)
37 | scene.vertex_colors = open3d.utility.Vector3dVector(pts_rgb)
38 | scene.triangles = open3d.utility.Vector3iVector(faces)
39 |
40 | else:
41 | scene = open3d.geometry.PointCloud()
42 | scene.points = open3d.utility.Vector3dVector(pts_xyz)
43 | scene.colors = open3d.utility.Vector3dVector(pts_rgb)
44 | if save_path:
45 | open3d.io.write_triangle_mesh(save_path, scene, write_triangle_uvs=True)
46 | if show:
47 | open3d.visualization.draw_geometries([scene], mesh_show_back_face=mesh_show_back_face)
48 |
49 |
50 | if __name__ == '__main__':
51 | from dataset.mp3d_dataset import MP3DDataset
52 | from utils.boundary import depth2boundaries, layout2depth
53 | from visualization.boundary import draw_boundaries
54 |
55 | mp3d_dataset = MP3DDataset(root_dir='../src/dataset/mp3d', mode='train', for_test_index=10, patch_num=1024)
56 | gt = mp3d_dataset.__getitem__(3)
57 |
58 | boundary_list = depth2boundaries(gt['ratio'], gt['depth'], step=None)
59 | pano_img = draw_boundaries(gt['image'].transpose(1, 2, 0), boundary_list=boundary_list, show=True)
60 | layout_depth = layout2depth(boundary_list, show=False)
61 | create_3d_obj(gt['image'].transpose(1, 2, 0), layout_depth, save_path=f"../src/output/{gt['id']}_3d.gltf",
62 | mesh=True)
63 |
--------------------------------------------------------------------------------
/evaluation/f1_score.py:
--------------------------------------------------------------------------------
1 | """
2 | @author: Zhigang Jiang
3 | @time: 2022/01/28
4 | @description:
5 | Holistic 3D Vision Challenge on General Room Layout Estimation Track Evaluation Package
6 | Reference: https://github.com/bertjiazheng/indoor-layout-evaluation
7 | """
8 |
9 | from scipy.optimize import linear_sum_assignment
10 | import numpy as np
11 | import scipy
12 |
13 | HEIGHT, WIDTH = 512, 1024
14 | MAX_DISTANCE = np.sqrt(HEIGHT**2 + WIDTH**2)
15 |
16 |
17 | def f1_score_2d(gt_corners, dt_corners, thresholds):
18 | distances = scipy.spatial.distance.cdist(gt_corners, dt_corners)
19 | return eval_junctions(distances, thresholds=thresholds)
20 |
21 |
22 | def eval_junctions(distances, thresholds=5):
23 | thresholds = thresholds if isinstance(thresholds, tuple) or isinstance(
24 | thresholds, list) else list([thresholds])
25 |
26 | num_gts, num_preds = distances.shape
27 |
28 | # filter the matches between ceiling-wall and floor-wall junctions
29 | mask = np.zeros_like(distances, dtype=np.bool)
30 | mask[:num_gts//2, :num_preds//2] = True
31 | mask[num_gts//2:, num_preds//2:] = True
32 | distances[~mask] = np.inf
33 |
34 | # F-measure under different thresholds
35 | Fs = []
36 | Ps = []
37 | Rs = []
38 | for threshold in thresholds:
39 | distances_temp = distances.copy()
40 |
41 | # filter the mis-matched pairs
42 | distances_temp[distances_temp > threshold] = np.inf
43 |
44 | # remain the rows and columns that contain non-inf elements
45 | distances_temp = distances_temp[:, np.any(np.isfinite(distances_temp), axis=0)]
46 |
47 | if np.prod(distances_temp.shape) == 0:
48 | Fs.append(0)
49 | Ps.append(0)
50 | Rs.append(0)
51 | continue
52 |
53 | distances_temp = distances_temp[np.any(np.isfinite(distances_temp), axis=1), :]
54 |
55 | # solve the bipartite graph matching problem
56 | row_ind, col_ind = linear_sum_assignment_with_inf(distances_temp)
57 | true_positive = np.sum(np.isfinite(distances_temp[row_ind, col_ind]))
58 |
59 | # compute precision and recall
60 | precision = true_positive / num_preds
61 | recall = true_positive / num_gts
62 |
63 | # compute F measure
64 | Fs.append(2 * precision * recall / (precision + recall))
65 | Ps.append(precision)
66 | Rs.append(recall)
67 |
68 | return Fs, Ps, Rs
69 |
70 |
71 | def linear_sum_assignment_with_inf(cost_matrix):
72 | """
73 | Deal with linear_sum_assignment with inf according to
74 | https://github.com/scipy/scipy/issues/6900#issuecomment-451735634
75 | """
76 | cost_matrix = np.copy(cost_matrix)
77 | cost_matrix[np.isinf(cost_matrix)] = MAX_DISTANCE
78 | return linear_sum_assignment(cost_matrix)
--------------------------------------------------------------------------------
/evaluation/analyse_layout_type.py:
--------------------------------------------------------------------------------
1 | """
2 | @Date: 2022/01/31
3 | @description:
4 | ZInd:
5 | {'test': {'mw': 2789, 'aw': 381}, 'train': {'mw': 21228, 'aw': 3654}, 'val': {'mw': 2647, 'aw': 433}}
6 |
7 | """
8 | import numpy as np
9 | import matplotlib.pyplot as plt
10 | import json
11 |
12 | from tqdm import tqdm
13 | from evaluation.iou import calc_IoU_2D
14 | from visualization.floorplan import draw_floorplan
15 | from visualization.boundary import draw_boundaries
16 | from utils.conversion import depth2xyz, uv2xyz
17 |
18 |
19 | def analyse_layout_type(dataset, show=False):
20 | bar = tqdm(dataset, total=len(dataset), ncols=100)
21 | manhattan = 0
22 | atlanta = 0
23 | corner_type = {}
24 | for data in bar:
25 | bar.set_description(f"Processing {data['id']}")
26 | corners = data['corners']
27 | corners = corners[corners[..., 0] + corners[..., 1] != 0] # Take effective corners
28 | corners_count = str(len(corners)) if len(corners) < 10 else "10"
29 | if corners_count not in corner_type:
30 | corner_type[corners_count] = 0
31 | corner_type[corners_count] += 1
32 |
33 | all_xz = uv2xyz(corners)[..., ::2]
34 |
35 | c = len(all_xz)
36 | flag = False
37 | for i in range(c - 1):
38 | l1 = all_xz[i + 1] - all_xz[i]
39 | l2 = all_xz[(i + 2) % c] - all_xz[i + 1]
40 | a = (np.linalg.norm(l1)*np.linalg.norm(l2))
41 | if a == 0:
42 | continue
43 | dot = np.dot(l1, l2)/a
44 | if 0.9 > abs(dot) > 0.1:
45 | # cos-1(0.1)=84.26 > angle > cos-1(0.9)=25.84 or
46 | # cos-1(-0.9)=154.16 > angle > cos-1(-0.1)=95.74
47 | flag = True
48 | break
49 | if flag:
50 | atlanta += 1
51 | else:
52 | manhattan += 1
53 |
54 | if flag and show:
55 | draw_floorplan(all_xz, show=True)
56 | draw_boundaries(data['image'].transpose(1, 2, 0), [corners], ratio=data['ratio'], show=True)
57 |
58 | corner_type = dict(sorted(corner_type.items(), key=lambda item: int(item[0])))
59 | return {'manhattan': manhattan, "atlanta": atlanta, "corner_type": corner_type}
60 |
61 |
62 | def execute_analyse_layout_type(root_dir, dataset, modes=None):
63 | if modes is None:
64 | modes = ["train", "val", "test"]
65 |
66 | iou2d_d = {}
67 | for mode in modes:
68 | print("mode: {}".format(mode))
69 | types = analyse_layout_type(dataset(root_dir, mode), show=False)
70 | iou2d_d[mode] = types
71 | print(json.dumps(types, indent=4))
72 | return iou2d_d
73 |
74 |
75 | if __name__ == '__main__':
76 | from dataset.zind_dataset import ZindDataset
77 | from dataset.mp3d_dataset import MP3DDataset
78 |
79 | iou2d_d = execute_analyse_layout_type(root_dir='../src/dataset/mp3d',
80 | dataset=MP3DDataset)
81 | # iou2d_d = execute_analyse_layout_type(root_dir='../src/dataset/zind',
82 | # dataset=ZindDataset)
83 | print(json.dumps(iou2d_d, indent=4))
84 |
--------------------------------------------------------------------------------
/preprocessing/filter.py:
--------------------------------------------------------------------------------
1 | """
2 | @date: 2021/7/5
3 | @description:
4 | """
5 | import json
6 | import math
7 | import shutil
8 |
9 | import numpy as np
10 | from utils.boundary import *
11 | import dataset
12 | import os
13 | from tqdm import tqdm
14 | from PIL import Image
15 | from visualization.boundary import *
16 | from visualization.floorplan import *
17 | from shapely.geometry import Polygon, Point
18 |
19 |
20 | def filter_center(ceil_corners):
21 | xyz = uv2xyz(ceil_corners, plan_y=1.6)
22 | xz = xyz[:, ::2]
23 | poly = Polygon(xz).buffer(-0.01)
24 | return poly.contains(Point(0, 0))
25 |
26 |
27 | def filter_boundary(corners):
28 | if is_ceil_boundary(corners):
29 | return True
30 | elif is_floor_boundary(corners):
31 | return True
32 | else:
33 | # An intersection occurs and an exception is considered
34 | return False
35 |
36 |
37 | def filter_self_intersection(corners):
38 | xz = uv2xyz(corners)[:, ::2]
39 | poly = Polygon(xz)
40 | return poly.is_valid
41 |
42 |
43 | def filter_dataset(dataset, show=False, output_dir=None):
44 | if output_dir is None:
45 | output_dir = os.path.join(dataset.root_dir, dataset.mode)
46 | output_img_dir = os.path.join(output_dir, 'img_align')
47 | output_label_dir = os.path.join(output_dir, 'label_cor_align')
48 | else:
49 | output_dir = os.path.join(output_dir, dataset.mode)
50 | output_img_dir = os.path.join(output_dir, 'img')
51 | output_label_dir = os.path.join(output_dir, 'label_cor')
52 |
53 | if not os.path.exists(output_img_dir):
54 | os.makedirs(output_img_dir)
55 |
56 | if not os.path.exists(output_label_dir):
57 | os.makedirs(output_label_dir)
58 |
59 | bar = tqdm(dataset, total=len(dataset))
60 | for data in bar:
61 | name = data['name']
62 | bar.set_description(f"Processing {name}")
63 | img = data['img']
64 | corners = data['corners']
65 |
66 | if not filter_center(corners[1::2]):
67 | if show:
68 | draw_boundaries(img, corners_list=[corners[0::2], corners[1::2]], show=True)
69 | if not os.path.exists(data['img_path']):
70 | print("already remove")
71 | else:
72 | print(f"move {name}")
73 | shutil.move(data['img_path'], os.path.join(output_img_dir, os.path.basename(data['img_path'])))
74 | shutil.move(data['label_path'], os.path.join(output_label_dir, os.path.basename(data['label_path'])))
75 |
76 |
77 | def execute_filter_dataset(root_dir, dataset_name="PanoS2D3DDataset", modes=None, output_dir=None):
78 | if modes is None:
79 | modes = ["train", "test", "valid"]
80 |
81 | for mode in modes:
82 | print("mode: {}".format(mode))
83 |
84 | filter_dataset(getattr(dataset, dataset_name)(root_dir, mode), show=False, output_dir=output_dir)
85 |
86 |
87 | if __name__ == '__main__':
88 | execute_filter_dataset(root_dir='/root/data/hd/hnet_dataset',
89 | dataset_name="PanoS2D3DDataset", modes=['train', "test", "valid"],
90 | output_dir='/root/data/hd/hnet_dataset_close')
91 |
--------------------------------------------------------------------------------
/models/build.py:
--------------------------------------------------------------------------------
1 | """
2 | @Date: 2021/07/18
3 | @description:
4 | """
5 | import os
6 | import models
7 | import torch.distributed as dist
8 | import torch
9 |
10 | from torch.nn import init
11 | from torch.optim import lr_scheduler
12 | from utils.time_watch import TimeWatch
13 | from models.other.optimizer import build_optimizer
14 | from models.other.criterion import build_criterion
15 |
16 |
17 | def build_model(config, logger):
18 | name = config.MODEL.NAME
19 | w = TimeWatch(f"Build model: {name}", logger)
20 |
21 | ddp = config.WORLD_SIZE > 1
22 | if ddp:
23 | logger.info(f"use ddp")
24 | dist.init_process_group("nccl", init_method='tcp://127.0.0.1:23456', rank=config.LOCAL_RANK,
25 | world_size=config.WORLD_SIZE)
26 |
27 | device = config.TRAIN.DEVICE
28 | logger.info(f"Creating model: {name} to device:{device}, args:{config.MODEL.ARGS[0]}")
29 |
30 | net = getattr(models, name)
31 | ckpt_dir = os.path.abspath(os.path.join(config.CKPT.DIR, os.pardir)) if config.DEBUG else config.CKPT.DIR
32 | if len(config.MODEL.ARGS) != 0:
33 | model = net(ckpt_dir=ckpt_dir, **config.MODEL.ARGS[0])
34 | else:
35 | model = net(ckpt_dir=ckpt_dir)
36 | logger.info(f'model dropout: {model.dropout_d}')
37 | model = model.to(device)
38 | optimizer = None
39 | scheduler = None
40 |
41 | if config.MODE == 'train':
42 | optimizer = build_optimizer(config, model, logger)
43 |
44 | config.defrost()
45 | config.TRAIN.START_EPOCH = model.load(device, logger, optimizer, best=config.MODE != 'train' or not config.TRAIN.RESUME_LAST)
46 | config.freeze()
47 |
48 | if config.MODE == 'train' and len(config.MODEL.FINE_TUNE) > 0:
49 | for param in model.parameters():
50 | param.requires_grad = False
51 | for layer in config.MODEL.FINE_TUNE:
52 | logger.info(f'Fine-tune: {layer}')
53 | getattr(model, layer).requires_grad_(requires_grad=True)
54 | getattr(model, layer).reset_parameters()
55 |
56 | model.show_parameter_number(logger)
57 |
58 | if config.MODE == 'train':
59 | if len(config.TRAIN.LR_SCHEDULER.NAME) > 0:
60 | if 'last_epoch' not in config.TRAIN.LR_SCHEDULER.ARGS[0].keys():
61 | config.TRAIN.LR_SCHEDULER.ARGS[0]['last_epoch'] = config.TRAIN.START_EPOCH - 1
62 |
63 | scheduler = getattr(lr_scheduler, config.TRAIN.LR_SCHEDULER.NAME)(optimizer=optimizer,
64 | **config.TRAIN.LR_SCHEDULER.ARGS[0])
65 | logger.info(f"Use scheduler: name:{config.TRAIN.LR_SCHEDULER.NAME} args: {config.TRAIN.LR_SCHEDULER.ARGS[0]}")
66 | logger.info(f"Current scheduler last lr: {scheduler.get_last_lr()}")
67 | else:
68 | scheduler = None
69 |
70 | if config.AMP_OPT_LEVEL != "O0" and 'cuda' in device:
71 | import apex
72 | logger.info(f"use amp:{config.AMP_OPT_LEVEL}")
73 | model, optimizer = apex.amp.initialize(model, optimizer, opt_level=config.AMP_OPT_LEVEL, verbosity=0)
74 | if ddp:
75 | model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[config.TRAIN.DEVICE],
76 | broadcast_buffers=True) # use rank:0 bn
77 |
78 | criterion = build_criterion(config, logger)
79 | if optimizer is not None:
80 | logger.info(f"Finally lr: {optimizer.param_groups[0]['lr']}")
81 | return model, optimizer, criterion, scheduler
82 |
--------------------------------------------------------------------------------
/visualization/grad.py:
--------------------------------------------------------------------------------
1 | """
2 | @Date: 2021/11/06
3 | @description:
4 | """
5 | import cv2
6 | import numpy as np
7 | import torch
8 | import matplotlib.pyplot as plt
9 |
10 | from utils.conversion import depth2xyz
11 |
12 |
13 | def convert_img(value, h, need_nor=True, cmap=None):
14 | value = value.clone().detach().cpu().numpy()[None]
15 | if need_nor:
16 | value -= value.min()
17 | value /= value.max() - value.min()
18 | grad_img = value.repeat(int(h), axis=0)
19 |
20 | if cmap is None:
21 | grad_img = grad_img[..., np.newaxis].repeat(3, axis=-1)
22 | elif cmap == cv2.COLORMAP_PLASMA:
23 | grad_img = cv2.applyColorMap((grad_img * 255).astype(np.uint8), colormap=cmap)
24 | grad_img = grad_img[..., ::-1]
25 | grad_img = grad_img.astype(np.float) / 255.0
26 | elif cmap == 'HSV':
27 | grad_img = np.round(grad_img * 1000) / 1000.0
28 | grad_img = grad_img[..., np.newaxis].repeat(3, axis=-1)
29 | grad_img[..., 0] = grad_img[..., 0] * 180
30 | grad_img[..., 1] = 255
31 | grad_img[..., 2] = 255
32 | grad_img = grad_img.astype(np.uint8)
33 | grad_img = cv2.cvtColor(grad_img, cv2.COLOR_HSV2RGB)
34 | grad_img = grad_img.astype(np.float) / 255.0
35 | return grad_img
36 |
37 |
38 | def show_grad(depth, grad_conv, h=5, show=False):
39 | """
40 | :param h:
41 | :param depth: [patch_num]
42 | :param grad_conv:
43 | :param show:
44 | :return:
45 | """
46 |
47 | direction, angle, grad = get_all(depth[None], grad_conv)
48 |
49 | # depth_img = convert_img(depth, h)
50 | # angle_img = convert_img(angle[0], h)
51 | # grad_img = convert_img(grad[0], depth.shape[-1] // 4 - h * 2)
52 | depth_img = convert_img(depth, h, cmap=cv2.COLORMAP_PLASMA)
53 | angle_img = convert_img(angle[0], h, cmap='HSV')
54 |
55 | # vis_grad = grad[0] / grad[0].max() / 2 + 0.5
56 | grad_img = convert_img(grad[0], h)
57 | img = np.concatenate([depth_img, angle_img, grad_img], axis=0)
58 | if show:
59 | plt.imshow(img)
60 | plt.show()
61 | return img
62 |
63 |
64 | def get_grad(direction):
65 | """
66 | :param direction: [b patch_num]
67 | :return:[b patch_num]
68 | """
69 | a = torch.roll(direction, -1, dims=1) # xz[i+1]
70 | b = torch.roll(direction, 1, dims=1) # xz[i-1]
71 | grad = torch.acos(torch.clip(a[..., 0] * b[..., 0] + a[..., 1] * b[..., 1], -1+1e-6, 1-1e-6))
72 | return grad
73 |
74 |
75 | def get_grad2(angle, grad_conv):
76 | """
77 | :param angle: [b patch_num]
78 | :param grad_conv:
79 | :return:[b patch_num]
80 | """
81 | angle = torch.sin(angle)
82 | angle = angle + 1
83 |
84 | angle = torch.cat([angle[..., -1:], angle, angle[..., :1]], dim=-1)
85 | grad = grad_conv(angle[:, None]) # [b, patch_num] -> [b, 1, patch_num]
86 | # grad = torch.abs(grad)
87 | return grad.reshape(angle.shape[0], -1)
88 |
89 |
90 | def get_edge_angle(direction):
91 | """
92 | :param direction: [b patch_num 2]
93 | :return:
94 | """
95 | angle = torch.atan2(direction[..., 1], direction[..., 0])
96 | return angle
97 |
98 |
99 | def get_edge_direction(depth):
100 | xz = depth2xyz(depth)[..., ::2]
101 | direction = torch.roll(xz, -1, dims=1) - xz # direct[i] = xz[i+1] - xz[i]
102 | direction = direction / direction.norm(p=2, dim=-1)[..., None]
103 | return direction
104 |
105 |
106 | def get_all(depth, grad_conv):
107 | """
108 |
109 | :param grad_conv:
110 | :param depth: [b patch_num]
111 | :return:
112 | """
113 | direction = get_edge_direction(depth)
114 | angle = get_edge_angle(direction)
115 | # angle_grad = get_grad(direction)
116 | angle_grad = get_grad2(angle, grad_conv) # signed gradient
117 | return direction, angle, angle_grad
118 |
--------------------------------------------------------------------------------
/models/modules/conv_transformer.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn.functional as F
3 |
4 | from torch import nn, einsum
5 | from einops import rearrange
6 |
7 |
8 | class PreNorm(nn.Module):
9 | def __init__(self, dim, fn):
10 | super().__init__()
11 | self.norm = nn.LayerNorm(dim)
12 | self.fn = fn
13 |
14 | def forward(self, x, **kwargs):
15 | return self.fn(self.norm(x), **kwargs)
16 |
17 |
18 | class GELU(nn.Module):
19 | def forward(self, input):
20 | return F.gelu(input)
21 |
22 |
23 | class Attend(nn.Module):
24 |
25 | def __init__(self, dim=None):
26 | super().__init__()
27 | self.dim = dim
28 |
29 | def forward(self, input):
30 | return F.softmax(input, dim=self.dim, dtype=input.dtype)
31 |
32 |
33 | class FeedForward(nn.Module):
34 | def __init__(self, dim, hidden_dim, dropout=0.):
35 | super().__init__()
36 | self.net = nn.Sequential(
37 | nn.Linear(dim, hidden_dim),
38 | GELU(),
39 | nn.Dropout(dropout),
40 | nn.Linear(hidden_dim, dim),
41 | nn.Dropout(dropout)
42 | )
43 |
44 | def forward(self, x):
45 | return self.net(x)
46 |
47 |
48 | class Attention(nn.Module):
49 | def __init__(self, dim, heads=8, dim_head=64, dropout=0.):
50 | super().__init__()
51 | inner_dim = dim_head * heads
52 | project_out = not (heads == 1 and dim_head == dim)
53 |
54 | self.heads = heads
55 | self.scale = dim_head ** -0.5
56 |
57 | self.attend = Attend(dim=-1)
58 | self.to_qkv = nn.Linear(dim, inner_dim * 3, bias=False)
59 |
60 | self.to_out = nn.Sequential(
61 | nn.Linear(inner_dim, dim),
62 | nn.Dropout(dropout)
63 | ) if project_out else nn.Identity()
64 |
65 | def forward(self, x):
66 | b, n, _, h = *x.shape, self.heads
67 | qkv = self.to_qkv(x).chunk(3, dim=-1)
68 | q, k, v = map(lambda t: rearrange(t, 'b n (h d) -> b h n d', h=h), qkv)
69 | dots = einsum('b h i d, b h j d -> b h i j', q, k) * self.scale
70 | attn = self.attend(dots)
71 | out = einsum('b h i j, b h j d -> b h i d', attn, v)
72 | out = rearrange(out, 'b h n d -> b n (h d)')
73 | return self.to_out(out)
74 |
75 |
76 | class Conv(nn.Module):
77 | def __init__(self, dim, dropout=0.):
78 | super().__init__()
79 | self.dim = dim
80 | self.net = nn.Sequential(
81 | nn.Conv1d(dim, dim, kernel_size=3, stride=1, padding=0),
82 | nn.Dropout(dropout)
83 | )
84 |
85 | def forward(self, x):
86 | x = x.transpose(1, 2)
87 | x = torch.cat([x[..., -1:], x, x[..., :1]], dim=-1)
88 | x = self.net(x)
89 | return x.transpose(1, 2)
90 |
91 |
92 | class ConvTransformer(nn.Module):
93 | def __init__(self, dim, depth, heads, dim_head, mlp_dim, dropout=0.):
94 | super().__init__()
95 | self.layers = nn.ModuleList([])
96 | for _ in range(depth):
97 | self.layers.append(nn.ModuleList([
98 | PreNorm(dim, Attention(dim, heads=heads, dim_head=dim_head, dropout=dropout)),
99 | PreNorm(dim, FeedForward(dim, mlp_dim, dropout=dropout)),
100 | PreNorm(dim, Conv(dim, dropout=dropout))
101 | ]))
102 |
103 | def forward(self, x):
104 | for attn, ff, cov in self.layers:
105 | x = attn(x) + x
106 | x = ff(x) + x
107 | x = cov(x) + x
108 | return x
109 |
110 |
111 | if __name__ == '__main__':
112 | token_dim = 1024
113 | toke_len = 256
114 |
115 | transformer = ConvTransformer(dim=token_dim,
116 | depth=6,
117 | heads=16,
118 | dim_head=64,
119 | mlp_dim=2048,
120 | dropout=0.1)
121 |
122 | total = sum(p.numel() for p in transformer.parameters())
123 | trainable = sum(p.numel() for p in transformer.parameters() if p.requires_grad)
124 | print('parameter total:{:,}, trainable:{:,}'.format(total, trainable))
125 |
126 | input = torch.randn(1, toke_len, token_dim)
127 | output = transformer(input)
128 | print(output.shape)
129 |
--------------------------------------------------------------------------------
/dataset/mp3d_dataset.py:
--------------------------------------------------------------------------------
1 | """
2 | @date: 2021/6/25
3 | @description:
4 | """
5 | import os
6 | import json
7 |
8 | from dataset.communal.read import read_image, read_label
9 | from dataset.communal.base_dataset import BaseDataset
10 | from utils.logger import get_logger
11 |
12 |
13 | class MP3DDataset(BaseDataset):
14 | def __init__(self, root_dir, mode, shape=None, max_wall_num=0, aug=None, camera_height=1.6, logger=None,
15 | split_list=None, patch_num=256, keys=None, for_test_index=None):
16 | super().__init__(mode, shape, max_wall_num, aug, camera_height, patch_num, keys)
17 |
18 | if logger is None:
19 | logger = get_logger()
20 | self.root_dir = root_dir
21 |
22 | split_dir = os.path.join(root_dir, 'split')
23 | label_dir = os.path.join(root_dir, 'label')
24 | img_dir = os.path.join(root_dir, 'image')
25 |
26 | if split_list is None:
27 | with open(os.path.join(split_dir, f"{mode}.txt"), 'r') as f:
28 | split_list = [x.rstrip().split() for x in f]
29 |
30 | split_list.sort()
31 | if for_test_index is not None:
32 | split_list = split_list[:for_test_index]
33 |
34 | self.data = []
35 | invalid_num = 0
36 | for name in split_list:
37 | name = "_".join(name)
38 | img_path = os.path.join(img_dir, f"{name}.png")
39 | label_path = os.path.join(label_dir, f"{name}.json")
40 |
41 | if not os.path.exists(img_path):
42 | logger.warning(f"{img_path} not exists")
43 | invalid_num += 1
44 | continue
45 | if not os.path.exists(label_path):
46 | logger.warning(f"{label_path} not exists")
47 | invalid_num += 1
48 | continue
49 |
50 | with open(label_path, 'r') as f:
51 | label = json.load(f)
52 |
53 | if self.max_wall_num >= 10:
54 | if label['layoutWalls']['num'] < self.max_wall_num:
55 | invalid_num += 1
56 | continue
57 | elif self.max_wall_num != 0 and label['layoutWalls']['num'] != self.max_wall_num:
58 | invalid_num += 1
59 | continue
60 |
61 | # print(label['layoutWalls']['num'])
62 | self.data.append([img_path, label_path])
63 |
64 | logger.info(
65 | f"Build dataset mode: {self.mode} max_wall_num: {self.max_wall_num} valid: {len(self.data)} invalid: {invalid_num}")
66 |
67 | def __getitem__(self, idx):
68 | rgb_path, label_path = self.data[idx]
69 | label = read_label(label_path, data_type='MP3D')
70 | image = read_image(rgb_path, self.shape)
71 | output = self.process_data(label, image, self.patch_num)
72 | return output
73 |
74 |
75 | if __name__ == "__main__":
76 | import numpy as np
77 | from PIL import Image
78 |
79 | from tqdm import tqdm
80 | from visualization.boundary import draw_boundaries
81 | from visualization.floorplan import draw_floorplan
82 | from utils.boundary import depth2boundaries
83 | from utils.conversion import uv2xyz
84 |
85 | modes = ['test', 'val']
86 | for i in range(1):
87 | for mode in modes:
88 | print(mode)
89 | mp3d_dataset = MP3DDataset(root_dir='../src/dataset/mp3d', mode=mode, aug={
90 | 'STRETCH': True,
91 | 'ROTATE': True,
92 | 'FLIP': True,
93 | 'GAMMA': True
94 | })
95 | save_dir = f'../src/dataset/mp3d/visualization/{mode}'
96 | if not os.path.isdir(save_dir):
97 | os.makedirs(save_dir)
98 |
99 | bar = tqdm(mp3d_dataset, ncols=100)
100 | for data in bar:
101 | bar.set_description(f"Processing {data['id']}")
102 | boundary_list = depth2boundaries(data['ratio'], data['depth'], step=None)
103 | pano_img = draw_boundaries(data['image'].transpose(1, 2, 0), boundary_list=boundary_list, show=True)
104 | Image.fromarray((pano_img * 255).astype(np.uint8)).save(
105 | os.path.join(save_dir, f"{data['id']}_boundary.png"))
106 |
107 | floorplan = draw_floorplan(uv2xyz(boundary_list[0])[..., ::2], show=True,
108 | marker_color=None, center_color=0.8, show_radius=None)
109 | Image.fromarray((floorplan.squeeze() * 255).astype(np.uint8)).save(
110 | os.path.join(save_dir, f"{data['id']}_floorplan.png"))
111 |
--------------------------------------------------------------------------------
/dataset/build.py:
--------------------------------------------------------------------------------
1 | """
2 | @Date: 2021/07/18
3 | @description:
4 | """
5 | import numpy as np
6 | import torch.utils.data
7 | from dataset.mp3d_dataset import MP3DDataset
8 | from dataset.pano_s2d3d_dataset import PanoS2D3DDataset
9 | from dataset.pano_s2d3d_mix_dataset import PanoS2D3DMixDataset
10 | from dataset.zind_dataset import ZindDataset
11 |
12 |
13 | def build_loader(config, logger):
14 | name = config.DATA.DATASET
15 | ddp = config.WORLD_SIZE > 1
16 | train_dataset = None
17 | train_data_loader = None
18 | if config.MODE == 'train':
19 | train_dataset = build_dataset(mode='train', config=config, logger=logger)
20 |
21 | val_dataset = build_dataset(mode=config.VAL_NAME if config.MODE != 'test' else 'test', config=config, logger=logger)
22 |
23 | train_sampler = None
24 | val_sampler = None
25 | if ddp:
26 | if train_dataset:
27 | train_sampler = torch.utils.data.DistributedSampler(train_dataset, shuffle=True)
28 | val_sampler = torch.utils.data.DistributedSampler(val_dataset, shuffle=False)
29 |
30 | batch_size = config.DATA.BATCH_SIZE
31 | num_workers = 0 if config.DEBUG else config.DATA.NUM_WORKERS
32 | pin_memory = config.DATA.PIN_MEMORY
33 | if train_dataset:
34 | logger.info(f'Train data loader batch size: {batch_size}')
35 | train_data_loader = torch.utils.data.DataLoader(
36 | train_dataset, sampler=train_sampler,
37 | batch_size=batch_size,
38 | shuffle=True,
39 | num_workers=num_workers,
40 | pin_memory=pin_memory,
41 | drop_last=True,
42 | )
43 | batch_size = batch_size - (len(val_dataset) % np.arange(batch_size, 0, -1)).tolist().index(0)
44 | logger.info(f'Val data loader batch size: {batch_size}')
45 | val_data_loader = torch.utils.data.DataLoader(
46 | val_dataset, sampler=val_sampler,
47 | batch_size=batch_size,
48 | shuffle=False,
49 | num_workers=num_workers,
50 | pin_memory=pin_memory,
51 | drop_last=False
52 | )
53 | logger.info(f'Build data loader: num_workers:{num_workers} pin_memory:{pin_memory}')
54 | return train_data_loader, val_data_loader
55 |
56 |
57 | def build_dataset(mode, config, logger):
58 | name = config.DATA.DATASET
59 | if name == 'mp3d':
60 | dataset = MP3DDataset(
61 | root_dir=config.DATA.DIR,
62 | mode=mode,
63 | shape=config.DATA.SHAPE,
64 | max_wall_num=config.DATA.WALL_NUM,
65 | aug=config.DATA.AUG if mode == 'train' else None,
66 | camera_height=config.DATA.CAMERA_HEIGHT,
67 | logger=logger,
68 | for_test_index=config.DATA.FOR_TEST_INDEX,
69 | keys=config.DATA.KEYS
70 | )
71 | elif name == 'pano_s2d3d':
72 | dataset = PanoS2D3DDataset(
73 | root_dir=config.DATA.DIR,
74 | mode=mode,
75 | shape=config.DATA.SHAPE,
76 | max_wall_num=config.DATA.WALL_NUM,
77 | aug=config.DATA.AUG if mode == 'train' else None,
78 | camera_height=config.DATA.CAMERA_HEIGHT,
79 | logger=logger,
80 | for_test_index=config.DATA.FOR_TEST_INDEX,
81 | subset=config.DATA.SUBSET,
82 | keys=config.DATA.KEYS
83 | )
84 | elif name == 'pano_s2d3d_mix':
85 | dataset = PanoS2D3DMixDataset(
86 | root_dir=config.DATA.DIR,
87 | mode=mode,
88 | shape=config.DATA.SHAPE,
89 | max_wall_num=config.DATA.WALL_NUM,
90 | aug=config.DATA.AUG if mode == 'train' else None,
91 | camera_height=config.DATA.CAMERA_HEIGHT,
92 | logger=logger,
93 | for_test_index=config.DATA.FOR_TEST_INDEX,
94 | subset=config.DATA.SUBSET,
95 | keys=config.DATA.KEYS
96 | )
97 | elif name == 'zind':
98 | dataset = ZindDataset(
99 | root_dir=config.DATA.DIR,
100 | mode=mode,
101 | shape=config.DATA.SHAPE,
102 | max_wall_num=config.DATA.WALL_NUM,
103 | aug=config.DATA.AUG if mode == 'train' else None,
104 | camera_height=config.DATA.CAMERA_HEIGHT,
105 | logger=logger,
106 | for_test_index=config.DATA.FOR_TEST_INDEX,
107 | is_simple=True,
108 | is_ceiling_flat=False,
109 | keys=config.DATA.KEYS,
110 | vp_align=config.EVAL.POST_PROCESSING is not None and 'manhattan' in config.EVAL.POST_PROCESSING
111 | )
112 | else:
113 | raise NotImplementedError(f"Unknown dataset: {name}")
114 |
115 | return dataset
116 |
--------------------------------------------------------------------------------
/dataset/pano_s2d3d_dataset.py:
--------------------------------------------------------------------------------
1 | """
2 | @date: 2021/6/16
3 | @description:
4 | """
5 | import math
6 | import os
7 | import numpy as np
8 |
9 | from dataset.communal.read import read_image, read_label
10 | from dataset.communal.base_dataset import BaseDataset
11 | from utils.logger import get_logger
12 |
13 |
14 | class PanoS2D3DDataset(BaseDataset):
15 | def __init__(self, root_dir, mode, shape=None, max_wall_num=0, aug=None, camera_height=1.6, logger=None,
16 | split_list=None, patch_num=256, keys=None, for_test_index=None, subset=None):
17 | super().__init__(mode, shape, max_wall_num, aug, camera_height, patch_num, keys)
18 |
19 | if logger is None:
20 | logger = get_logger()
21 | self.root_dir = root_dir
22 |
23 | if mode is None:
24 | return
25 | label_dir = os.path.join(root_dir, 'valid' if mode == 'val' else mode, 'label_cor')
26 | img_dir = os.path.join(root_dir, 'valid' if mode == 'val' else mode, 'img')
27 |
28 | if split_list is None:
29 | split_list = [name.split('.')[0] for name in os.listdir(label_dir) if
30 | not name.startswith('.') and name.endswith('txt')]
31 |
32 | split_list.sort()
33 |
34 | assert subset == 'pano' or subset == 's2d3d' or subset is None, 'error subset'
35 | if subset == 'pano':
36 | split_list = [name for name in split_list if 'pano_' in name]
37 | logger.info(f"Use PanoContext Dataset")
38 | elif subset == 's2d3d':
39 | split_list = [name for name in split_list if 'camera_' in name]
40 | logger.info(f"Use Stanford2D3D Dataset")
41 |
42 | if for_test_index is not None:
43 | split_list = split_list[:for_test_index]
44 |
45 | self.data = []
46 | invalid_num = 0
47 | for name in split_list:
48 | img_path = os.path.join(img_dir, f"{name}.png")
49 | label_path = os.path.join(label_dir, f"{name}.txt")
50 |
51 | if not os.path.exists(img_path):
52 | logger.warning(f"{img_path} not exists")
53 | invalid_num += 1
54 | continue
55 | if not os.path.exists(label_path):
56 | logger.warning(f"{label_path} not exists")
57 | invalid_num += 1
58 | continue
59 |
60 | with open(label_path, 'r') as f:
61 | lines = [line for line in f.readlines() if
62 | len([c for c in line.split(' ') if c[0].isnumeric()]) > 1]
63 | if len(lines) % 2 != 0:
64 | invalid_num += 1
65 | continue
66 | self.data.append([img_path, label_path])
67 |
68 | logger.info(
69 | f"Build dataset mode: {self.mode} valid: {len(self.data)} invalid: {invalid_num}")
70 |
71 | def __getitem__(self, idx):
72 | rgb_path, label_path = self.data[idx]
73 | label = read_label(label_path, data_type='Pano_S2D3D')
74 | image = read_image(rgb_path, self.shape)
75 | output = self.process_data(label, image, self.patch_num)
76 | return output
77 |
78 |
79 | if __name__ == '__main__':
80 |
81 | modes = ['test', 'val', 'train']
82 | for i in range(1):
83 | for mode in modes:
84 | print(mode)
85 | mp3d_dataset = PanoS2D3DDataset(root_dir='../src/dataset/pano_s2d3d', mode=mode, aug={
86 | # 'STRETCH': True,
87 | # 'ROTATE': True,
88 | # 'FLIP': True,
89 | # 'GAMMA': True
90 | })
91 | continue
92 | save_dir = f'../src/dataset/pano_s2d3d/visualization/{mode}'
93 | if not os.path.isdir(save_dir):
94 | os.makedirs(save_dir)
95 |
96 | bar = tqdm(mp3d_dataset, ncols=100)
97 | for data in bar:
98 | bar.set_description(f"Processing {data['id']}")
99 | boundary_list = depth2boundaries(data['ratio'], data['depth'], step=None)
100 | pano_img = draw_boundaries(data['image'].transpose(1, 2, 0), boundary_list=boundary_list, show=False)
101 | Image.fromarray((pano_img * 255).astype(np.uint8)).save(
102 | os.path.join(save_dir, f"{data['id']}_boundary.png"))
103 |
104 | floorplan = draw_floorplan(uv2xyz(boundary_list[0])[..., ::2], show=False,
105 | marker_color=None, center_color=0.8, show_radius=None)
106 | Image.fromarray((floorplan.squeeze() * 255).astype(np.uint8)).save(
107 | os.path.join(save_dir, f"{data['id']}_floorplan.png"))
108 |
--------------------------------------------------------------------------------
/utils/height.py:
--------------------------------------------------------------------------------
1 | """
2 | @date: 2021/6/30
3 | @description:
4 | """
5 | import numpy as np
6 | from typing import List
7 |
8 | from utils.boundary import *
9 | from scipy.optimize import least_squares
10 | from functools import partial
11 |
12 |
13 | def lsq_fit(ceil_norm, floor_norm):
14 | """
15 | Least Squares
16 | :param ceil_norm:
17 | :param floor_norm:
18 | :return:
19 | """
20 |
21 | def error_fun(ratio, ceil_norm, floor_norm):
22 | error = np.abs(ratio * ceil_norm - floor_norm)
23 | return error
24 |
25 | init_ratio = np.mean(floor_norm / ceil_norm, axis=-1)
26 | error_func = partial(error_fun, ceil_norm=ceil_norm, floor_norm=floor_norm)
27 | ret = least_squares(error_func, init_ratio, verbose=0)
28 | ratio = ret.x[0]
29 | return ratio
30 |
31 |
32 | def mean_percentile_fit(ceil_norm, floor_norm, p1=25, p2=75):
33 | """
34 | :param ceil_norm:
35 | :param floor_norm:
36 | :param p1:
37 | :param p2:
38 | :return:
39 | """
40 | ratio = floor_norm / ceil_norm
41 | r_min = np.percentile(ratio, p1)
42 | r_max = np.percentile(ratio, p2)
43 | return ratio[(r_min <= ratio) & (ratio <= r_max)].mean()
44 |
45 |
46 | def calc_ceil_ratio(boundaries: List[np.array], mode='lsq'):
47 | """
48 | :param boundaries: [ [[cu1, cv1], [cu2, cv2], ...], [[fu1, fv1], [fu2, fv2], ...] ]
49 | :param mode: 'lsq' or 'mean'
50 | :return:
51 | """
52 | assert len(boundaries[0].shape) < 4 and len(boundaries[1].shape) < 4, 'error shape'
53 | if not is_normal_layout(boundaries):
54 | return 0
55 |
56 | ceil_boundary = boundaries[0]
57 | floor_boundary = boundaries[1]
58 | assert ceil_boundary.shape[-2] == floor_boundary.shape[-2], "boundary need same length"
59 |
60 | ceil_xyz = uv2xyz(ceil_boundary, -1)
61 | floor_xyz = uv2xyz(floor_boundary, 1)
62 |
63 | ceil_xz = ceil_xyz[..., ::2]
64 | floor_xz = floor_xyz[..., ::2]
65 |
66 | ceil_norm = np.linalg.norm(ceil_xz, axis=-1)
67 | floor_norm = np.linalg.norm(floor_xz, axis=-1)
68 |
69 | if mode == "lsq":
70 | if len(ceil_norm.shape) == 2:
71 | ratio = np.array([lsq_fit(ceil_norm[i], floor_norm[i]) for i in range(ceil_norm.shape[0])])
72 | else:
73 | ratio = lsq_fit(ceil_norm, floor_norm)
74 | else:
75 | if len(ceil_norm.shape) == 2:
76 | ratio = np.array([mean_percentile_fit(ceil_norm[i], floor_norm[i]) for i in range(ceil_norm.shape[0])])
77 | else:
78 | ratio = mean_percentile_fit(ceil_norm, floor_norm)
79 |
80 | return ratio
81 |
82 |
83 | def calc_ceil_height(boundaries: List[np.array], camera_height=1.6, mode='lsq') -> float:
84 | """
85 | :param boundaries: [ [[cu1, cv1], [cu2, cv2], ...], [[fu1, fv1], [fu2, fv2], ...] ]
86 | :param camera_height:
87 | :param mode:
88 | :return:
89 | """
90 | ratio = calc_ceil_ratio(boundaries, mode)
91 | ceil_height = camera_height * ratio
92 | return ceil_height
93 |
94 |
95 | def calc_room_height(boundaries: List[np.array], camera_height=1.6, mode='lsq') -> float:
96 | """
97 | :param boundaries: also can corners,format: [ [[cu1, cv1], [cu2, cv2], ...], [[fu1, fv1], [fu2, fv2], ...] ],
98 | 0 denotes ceil, 1 denotes floor
99 | :param camera_height: actual camera height determines the scale
100 | :param mode: fitting method lsq or mean
101 | :return:
102 | """
103 | ceil_height = calc_ceil_height(boundaries, camera_height, mode)
104 | room_height = camera_height + ceil_height
105 | return room_height
106 |
107 |
108 | def height2ratio(height, camera_height=1.6):
109 | ceil_height = height - camera_height
110 | ratio = ceil_height / camera_height
111 | return ratio
112 |
113 |
114 | def ratio2height(ratio, camera_height=1.6):
115 | ceil_height = camera_height * ratio
116 | room_height = camera_height + ceil_height
117 | return room_height
118 |
119 |
120 | if __name__ == '__main__':
121 | from dataset.mp3d_dataset import MP3DDataset
122 |
123 | dataset = MP3DDataset(root_dir="../src/dataset/mp3d", mode="train")
124 | for data in dataset:
125 | ceil_corners = data['corners'][::2]
126 | floor_corners = data['corners'][1::2]
127 | # ceil_boundary = corners2boundary(ceil_corners, length=1024)
128 | # floor_boundary = corners2boundary(floor_corners, length=1024)
129 | room_height1 = calc_room_height([ceil_corners, floor_corners], camera_height=1.6, mode='mean')
130 | room_height2 = calc_room_height([ceil_corners, floor_corners], camera_height=1.6, mode='lsq')
131 | print(room_height1, room_height2, data['cameraCeilingHeight'] + 1.6)
132 |
--------------------------------------------------------------------------------
/postprocessing/dula/layout_old.py:
--------------------------------------------------------------------------------
1 | """
2 | @Date: 2021/10/06
3 | @description: Use the approach proposed by DuLa-Net
4 | """
5 | import cv2
6 | import numpy as np
7 | import math
8 | import matplotlib.pyplot as plt
9 |
10 | from visualization.floorplan import draw_floorplan
11 |
12 |
13 | def merge_near(lst, diag):
14 | group = [[0, ]]
15 | for i in range(1, len(lst)):
16 | if lst[i] - np.mean(group[-1]) < diag * 0.02:
17 | group[-1].append(lst[i])
18 | else:
19 | group.append([lst[i], ])
20 | if len(group) == 1:
21 | group = [lst[0], lst[-1]]
22 | else:
23 | group = [int(np.mean(x)) for x in group]
24 | return group
25 |
26 |
27 | def fit_layout_old(floor_xz, need_cube=False, show=False, block_eps=0.05):
28 | show_radius = np.linalg.norm(floor_xz, axis=-1).max()
29 | side_l = 512
30 | floorplan = draw_floorplan(xz=floor_xz, show_radius=show_radius, show=show, scale=1, side_l=side_l).astype(np.uint8)
31 | center = np.array([side_l / 2, side_l / 2])
32 | polys = cv2.findContours(floorplan, 1, 2)
33 | if isinstance(polys, tuple):
34 | if len(polys) == 3:
35 | # opencv 3
36 | polys = list(polys[1])
37 | else:
38 | polys = list(polys[0])
39 | polys.sort(key=lambda x: cv2.contourArea(x), reverse=True)
40 | poly = polys[0]
41 | sub_x, sub_y, w, h = cv2.boundingRect(poly)
42 | floorplan_sub = floorplan[sub_y:sub_y + h, sub_x:sub_x + w]
43 | sub_center = center - np.array([sub_x, sub_y])
44 | polys = cv2.findContours(floorplan_sub, 1, 2)
45 | if isinstance(polys, tuple):
46 | if len(polys) == 3:
47 | polys = polys[1]
48 | else:
49 | polys = polys[0]
50 | poly = polys[0]
51 | epsilon = 0.005 * cv2.arcLength(poly, True)
52 | poly = cv2.approxPolyDP(poly, epsilon, True)
53 |
54 | x_lst = [0, ]
55 | y_lst = [0, ]
56 | for i in range(len(poly)):
57 | p1 = poly[i][0]
58 | p2 = poly[(i + 1) % len(poly)][0]
59 |
60 | if (p2[0] - p1[0]) == 0:
61 | slope = 10
62 | else:
63 | slope = abs((p2[1] - p1[1]) / (p2[0] - p1[0]))
64 |
65 | if slope <= 1:
66 | s = int((p1[1] + p2[1]) / 2)
67 | y_lst.append(s)
68 | elif slope > 1:
69 | s = int((p1[0] + p2[0]) / 2)
70 | x_lst.append(s)
71 |
72 | x_lst.append(floorplan_sub.shape[1])
73 | y_lst.append(floorplan_sub.shape[0])
74 | x_lst.sort()
75 | y_lst.sort()
76 |
77 | diag = math.sqrt(math.pow(floorplan_sub.shape[1], 2) + math.pow(floorplan_sub.shape[0], 2))
78 | x_lst = merge_near(x_lst, diag)
79 | y_lst = merge_near(y_lst, diag)
80 | if need_cube and len(x_lst) > 2:
81 | x_lst = [x_lst[0], x_lst[-1]]
82 | if need_cube and len(y_lst) > 2:
83 | y_lst = [y_lst[0], y_lst[-1]]
84 |
85 | ans = np.zeros((floorplan_sub.shape[0], floorplan_sub.shape[1]))
86 | for i in range(len(x_lst) - 1):
87 | for j in range(len(y_lst) - 1):
88 | sample = floorplan_sub[y_lst[j]:y_lst[j + 1], x_lst[i]:x_lst[i + 1]]
89 | score = 0 if sample.size == 0 else sample.mean()
90 | if score >= 0.3:
91 | ans[y_lst[j]:y_lst[j + 1], x_lst[i]:x_lst[i + 1]] = 1
92 |
93 | pred = np.uint8(ans)
94 | pred_polys = cv2.findContours(pred, 1, 3)
95 | if isinstance(pred_polys, tuple):
96 | if len(pred_polys) == 3:
97 | pred_polys = pred_polys[1]
98 | else:
99 | pred_polys = pred_polys[0]
100 |
101 | polygon = [(p[0][1], p[0][0]) for p in pred_polys[0][::-1]]
102 |
103 | v = np.array([p[0] + sub_y for p in polygon])
104 | u = np.array([p[1] + sub_x for p in polygon])
105 | # side_l
106 | # v<-----------|o
107 | # | | |
108 | # | ----|----z | side_l
109 | # | | |
110 | # | x \|/
111 | # |------------u
112 | side_l = floorplan.shape[0]
113 | pred_xz = np.concatenate((u[:, np.newaxis] - side_l // 2, side_l // 2 - v[:, np.newaxis]), axis=1)
114 |
115 | pred_xz = pred_xz * show_radius / (side_l // 2)
116 | if show:
117 | draw_floorplan(pred_xz, show_radius=show_radius, show=show)
118 | return pred_xz
119 |
120 |
121 | if __name__ == '__main__':
122 | from utils.conversion import uv2xyz
123 |
124 | pano_img = np.zeros([512, 1024, 3])
125 | corners = np.array([[0.1, 0.7],
126 | [0.4, 0.7],
127 | [0.3, 0.6],
128 | [0.6, 0.6],
129 | [0.8, 0.7]])
130 | xz = uv2xyz(corners)[..., ::2]
131 | draw_floorplan(xz, show=True, marker_color=None, center_color=0.8)
132 |
133 | xz = fit_layout_old(xz)
134 | draw_floorplan(xz, show=True, marker_color=None, center_color=0.8)
135 |
--------------------------------------------------------------------------------
/dataset/pano_s2d3d_mix_dataset.py:
--------------------------------------------------------------------------------
1 | """
2 | @date: 2021/6/16
3 | @description:
4 | """
5 |
6 | import os
7 |
8 | from dataset.pano_s2d3d_dataset import PanoS2D3DDataset
9 | from utils.logger import get_logger
10 |
11 |
12 | class PanoS2D3DMixDataset(PanoS2D3DDataset):
13 | def __init__(self, root_dir, mode, shape=None, max_wall_num=0, aug=None, camera_height=1.6, logger=None,
14 | split_list=None, patch_num=256, keys=None, for_test_index=None, subset=None):
15 | assert subset == 's2d3d' or subset == 'pano', 'error subset'
16 | super().__init__(root_dir, None, shape, max_wall_num, aug, camera_height, logger,
17 | split_list, patch_num, keys, None, subset)
18 | if logger is None:
19 | logger = get_logger()
20 | self.mode = mode
21 | if mode == 'train':
22 | if subset == 'pano':
23 | s2d3d_train_data = PanoS2D3DDataset(root_dir, 'train', shape, max_wall_num, aug, camera_height, logger,
24 | split_list, patch_num, keys, None, 's2d3d').data
25 | s2d3d_val_data = PanoS2D3DDataset(root_dir, 'val', shape, max_wall_num, aug, camera_height, logger,
26 | split_list, patch_num, keys, None, 's2d3d').data
27 | s2d3d_test_data = PanoS2D3DDataset(root_dir, 'test', shape, max_wall_num, aug, camera_height, logger,
28 | split_list, patch_num, keys, None, 's2d3d').data
29 | s2d3d_all_data = s2d3d_train_data + s2d3d_val_data + s2d3d_test_data
30 |
31 | pano_train_data = PanoS2D3DDataset(root_dir, 'train', shape, max_wall_num, aug, camera_height, logger,
32 | split_list, patch_num, keys, None, 'pano').data
33 | self.data = s2d3d_all_data + pano_train_data
34 | elif subset == 's2d3d':
35 | pano_train_data = PanoS2D3DDataset(root_dir, 'train', shape, max_wall_num, aug, camera_height, logger,
36 | split_list, patch_num, keys, None, 'pano').data
37 | pano_val_data = PanoS2D3DDataset(root_dir, 'val', shape, max_wall_num, aug, camera_height, logger,
38 | split_list, patch_num, keys, None, 'pano').data
39 | pano_test_data = PanoS2D3DDataset(root_dir, 'test', shape, max_wall_num, aug, camera_height, logger,
40 | split_list, patch_num, keys, None, 'pano').data
41 | pano_all_data = pano_train_data + pano_val_data + pano_test_data
42 |
43 | s2d3d_train_data = PanoS2D3DDataset(root_dir, 'train', shape, max_wall_num, aug, camera_height, logger,
44 | split_list, patch_num, keys, None, 's2d3d').data
45 | self.data = pano_all_data + s2d3d_train_data
46 | else:
47 | self.data = PanoS2D3DDataset(root_dir, mode, shape, max_wall_num, aug, camera_height, logger,
48 | split_list, patch_num, keys, None, subset).data
49 |
50 | if for_test_index is not None:
51 | self.data = self.data[:for_test_index]
52 | logger.info(f"Build dataset mode: {self.mode} valid: {len(self.data)}")
53 |
54 |
55 | if __name__ == '__main__':
56 | import numpy as np
57 | from PIL import Image
58 |
59 | from tqdm import tqdm
60 | from visualization.boundary import draw_boundaries
61 | from visualization.floorplan import draw_floorplan
62 | from utils.boundary import depth2boundaries
63 | from utils.conversion import uv2xyz
64 |
65 | modes = ['test', 'val', 'train']
66 | for i in range(1):
67 | for mode in modes:
68 | print(mode)
69 | mp3d_dataset = PanoS2D3DMixDataset(root_dir='../src/dataset/pano_s2d3d', mode=mode, aug={
70 | # 'STRETCH': True,
71 | # 'ROTATE': True,
72 | # 'FLIP': True,
73 | # 'GAMMA': True
74 | }, subset='pano')
75 | continue
76 | save_dir = f'../src/dataset/pano_s2d3d/visualization1/{mode}'
77 | if not os.path.isdir(save_dir):
78 | os.makedirs(save_dir)
79 |
80 | bar = tqdm(mp3d_dataset, ncols=100)
81 | for data in bar:
82 | bar.set_description(f"Processing {data['id']}")
83 | boundary_list = depth2boundaries(data['ratio'], data['depth'], step=None)
84 | pano_img = draw_boundaries(data['image'].transpose(1, 2, 0), boundary_list=boundary_list, show=False)
85 | Image.fromarray((pano_img * 255).astype(np.uint8)).save(
86 | os.path.join(save_dir, f"{data['id']}_boundary.png"))
87 |
88 | floorplan = draw_floorplan(uv2xyz(boundary_list[0])[..., ::2], show=False,
89 | marker_color=None, center_color=0.8, show_radius=None)
90 | Image.fromarray((floorplan.squeeze() * 255).astype(np.uint8)).save(
91 | os.path.join(save_dir, f"{data['id']}_floorplan.png"))
92 |
--------------------------------------------------------------------------------
/dataset/communal/base_dataset.py:
--------------------------------------------------------------------------------
1 | """
2 | @Date: 2021/07/26
3 | @description:
4 | """
5 | import numpy as np
6 | import torch
7 |
8 | from utils.boundary import corners2boundary, visibility_corners, get_heat_map
9 | from utils.conversion import xyz2depth, uv2xyz, uv2pixel
10 | from dataset.communal.data_augmentation import PanoDataAugmentation
11 |
12 |
13 | class BaseDataset(torch.utils.data.Dataset):
14 | def __init__(self, mode, shape=None, max_wall_num=999, aug=None, camera_height=1.6, patch_num=256, keys=None):
15 | if keys is None or len(keys) == 0:
16 | keys = ['image', 'depth', 'ratio', 'id', 'corners']
17 | if shape is None:
18 | shape = [512, 1024]
19 |
20 | assert mode == 'train' or mode == 'val' or mode == 'test' or mode is None, 'unknown mode!'
21 | self.mode = mode
22 | self.keys = keys
23 | self.shape = shape
24 | self.pano_aug = None if aug is None or mode == 'val' else PanoDataAugmentation(aug)
25 | self.camera_height = camera_height
26 | self.max_wall_num = max_wall_num
27 | self.patch_num = patch_num
28 | self.data = None
29 |
30 | def __len__(self):
31 | return len(self.data)
32 |
33 | @staticmethod
34 | def get_depth(corners, plan_y=1, length=256, visible=True):
35 | visible_floor_boundary = corners2boundary(corners, length=length, visible=visible)
36 | # The horizon-depth relative to plan_y
37 | visible_depth = xyz2depth(uv2xyz(visible_floor_boundary, plan_y), plan_y)
38 | return visible_depth
39 |
40 | def process_data(self, label, image, patch_num):
41 | """
42 | :param label:
43 | :param image:
44 | :param patch_num:
45 | :return:
46 | """
47 | corners = label['corners']
48 | if self.pano_aug is not None:
49 | corners, image = self.pano_aug.execute_aug(corners, image if 'image' in self.keys else None)
50 | eps = 1e-3
51 | corners[:, 1] = np.clip(corners[:, 1], 0.5+eps, 1-eps)
52 |
53 | output = {}
54 | if 'image' in self.keys:
55 | image = image.transpose(2, 0, 1)
56 | output['image'] = image
57 |
58 | visible_corners = None
59 | if 'corner_class' in self.keys or 'depth' in self.keys:
60 | visible_corners = visibility_corners(corners)
61 |
62 | if 'depth' in self.keys:
63 | depth = self.get_depth(visible_corners, length=patch_num, visible=False)
64 | assert len(depth) == patch_num, f"{label['id']}, {len(depth)}, {self.pano_aug.parameters}, {corners}"
65 | output['depth'] = depth
66 |
67 | if 'ratio' in self.keys:
68 | # Why use ratio? Because when floor_height =y_plan=1, we only need to predict ceil_height(ratio).
69 | output['ratio'] = label['ratio']
70 |
71 | if 'id' in self.keys:
72 | output['id'] = label['id']
73 |
74 | if 'corners' in self.keys:
75 | # all corners for evaluating Full_IoU
76 | assert len(label['corners']) <= 32, "len(label['corners']):"+len(label['corners'])
77 | output['corners'] = np.zeros((32, 2), dtype=np.float32)
78 | output['corners'][:len(label['corners'])] = label['corners']
79 |
80 | if 'corner_heat_map' in self.keys:
81 | output['corner_heat_map'] = get_heat_map(visible_corners[..., 0])
82 |
83 | if 'object' in self.keys and 'objects' in label:
84 | output[f'object_heat_map'] = np.zeros((3, patch_num), dtype=np.float32)
85 | output['object_size'] = np.zeros((3, patch_num), dtype=np.float32) # width, height, bottom_height
86 | for i, type in enumerate(label['objects']):
87 | if len(label['objects'][type]) == 0:
88 | continue
89 |
90 | u_s = []
91 | for obj in label['objects'][type]:
92 | center_u = obj['center_u']
93 | u_s.append(center_u)
94 | center_pixel_u = uv2pixel(np.array([center_u]), w=patch_num, axis=0)[0]
95 | output['object_size'][0, center_pixel_u] = obj['width_u']
96 | output['object_size'][1, center_pixel_u] = obj['height_v']
97 | output['object_size'][2, center_pixel_u] = obj['boundary_v']
98 | output[f'object_heat_map'][i] = get_heat_map(np.array(u_s))
99 |
100 | return output
101 |
102 |
103 | if __name__ == '__main__':
104 | from dataset.communal.read import read_image, read_label
105 | from visualization.boundary import draw_boundaries
106 | from utils.boundary import depth2boundaries
107 | from tqdm import trange
108 |
109 | # np.random.seed(0)
110 | dataset = BaseDataset()
111 | dataset.pano_aug = PanoDataAugmentation(aug={
112 | 'STRETCH': True,
113 | 'ROTATE': True,
114 | 'FLIP': True,
115 | })
116 | # pano_img = read_image("../src/demo.png")
117 | # label = read_label("../src/demo.json")
118 | pano_img_path = "../../src/dataset/mp3d/image/yqstnuAEVhm_6589ad7a5a0444b59adbf501c0f0fe53.png"
119 | label_path = "../../src/dataset/mp3d/label/yqstnuAEVhm_6589ad7a5a0444b59adbf501c0f0fe53.json"
120 | pano_img = read_image(pano_img_path)
121 | label = read_label(label_path)
122 |
123 | # batch test
124 | for i in trange(1):
125 | output = dataset.process_data(label, pano_img, 256)
126 | boundary_list = depth2boundaries(output['ratio'], output['depth'], step=None)
127 | draw_boundaries(output['image'].transpose(1, 2, 0), boundary_list=boundary_list, show=True)
128 |
--------------------------------------------------------------------------------
/src/demo/demo.json:
--------------------------------------------------------------------------------
1 | {
2 | "cameraHeight": 1.6,
3 | "layoutHeight": 2.9809624004364013,
4 | "layoutObj2ds": {
5 | "num": 0,
6 | "obj2ds": []
7 | },
8 | "layoutPoints": {
9 | "num": 6,
10 | "points": [
11 | {
12 | "coords": [
13 | 0.7081447345651483,
14 | 0.5
15 | ],
16 | "id": 0,
17 | "xyz": [
18 | 3.0078125,
19 | 0.0,
20 | -0.8097623087756155
21 | ]
22 | },
23 | {
24 | "coords": [
25 | 0.8447738331945455,
26 | 0.5
27 | ],
28 | "id": 0,
29 | "xyz": [
30 | 3.0078125,
31 | 0.0,
32 | 2.03786496
33 | ]
34 | },
35 | {
36 | "coords": [
37 | 0.009142142599636915,
38 | 0.5
39 | ],
40 | "id": 0,
41 | "xyz": [
42 | -0.1171875,
43 | 0.0,
44 | 2.03786496
45 | ]
46 | },
47 | {
48 | "coords": [
49 | 0.02702105153167117,
50 | 0.5
51 | ],
52 | "id": 0,
53 | "xyz": [
54 | -0.1171875,
55 | 0.0,
56 | 0.68359375
57 | ]
58 | },
59 | {
60 | "coords": [
61 | 0.20330907731820486,
62 | 0.5
63 | ],
64 | "id": 0,
65 | "xyz": [
66 | -2.26292525056,
67 | 0.0,
68 | 0.68359375
69 | ]
70 | },
71 | {
72 | "coords": [
73 | 0.304692157890135,
74 | 0.5
75 | ],
76 | "id": 0,
77 | "xyz": [
78 | -2.26292525056,
79 | 0.0,
80 | -0.8097623087756155
81 | ]
82 | }
83 | ]
84 | },
85 | "layoutWalls": {
86 | "num": 6,
87 | "walls": [
88 | {
89 | "id": 0,
90 | "normal": [
91 | 1.0,
92 | 0.0,
93 | -0.0
94 | ],
95 | "planeEquation": [
96 | 1.0,
97 | 0.0,
98 | -0.0,
99 | -3.0078125
100 | ],
101 | "pointsIdx": [
102 | 0,
103 | 1
104 | ],
105 | "width": 2.8476272687756152
106 | },
107 | {
108 | "id": 0,
109 | "normal": [
110 | 0.0,
111 | 0.0,
112 | 1.0
113 | ],
114 | "planeEquation": [
115 | 0.0,
116 | 0.0,
117 | 1.0,
118 | -2.03786496
119 | ],
120 | "pointsIdx": [
121 | 1,
122 | 2
123 | ],
124 | "width": 3.125
125 | },
126 | {
127 | "id": 0,
128 | "normal": [
129 | -1.0,
130 | -0.0,
131 | -0.0
132 | ],
133 | "planeEquation": [
134 | -1.0,
135 | -0.0,
136 | -0.0,
137 | -0.1171875
138 | ],
139 | "pointsIdx": [
140 | 2,
141 | 3
142 | ],
143 | "width": 1.3542712099999998
144 | },
145 | {
146 | "id": 0,
147 | "normal": [
148 | 0.0,
149 | 0.0,
150 | 1.0
151 | ],
152 | "planeEquation": [
153 | 0.0,
154 | 0.0,
155 | 1.0,
156 | -0.68359375
157 | ],
158 | "pointsIdx": [
159 | 3,
160 | 4
161 | ],
162 | "width": 2.14573775056
163 | },
164 | {
165 | "id": 0,
166 | "normal": [
167 | -1.0,
168 | -0.0,
169 | -0.0
170 | ],
171 | "planeEquation": [
172 | -1.0,
173 | -0.0,
174 | -0.0,
175 | -2.26292525056
176 | ],
177 | "pointsIdx": [
178 | 4,
179 | 5
180 | ],
181 | "width": 1.4933560587756154
182 | },
183 | {
184 | "id": 0,
185 | "normal": [
186 | 0.0,
187 | 0.0,
188 | -1.0
189 | ],
190 | "planeEquation": [
191 | 0.0,
192 | 0.0,
193 | -1.0,
194 | -0.8097623087756155
195 | ],
196 | "pointsIdx": [
197 | 5,
198 | 0
199 | ],
200 | "width": 5.27073775056
201 | }
202 | ]
203 | },
204 | "panoId": "nothing"
205 | }
--------------------------------------------------------------------------------
/dataset/zind_dataset.py:
--------------------------------------------------------------------------------
1 | """
2 | @Date: 2021/09/22
3 | @description:
4 | """
5 | import os
6 | import json
7 | import math
8 | import numpy as np
9 |
10 | from dataset.communal.read import read_image, read_label, read_zind
11 | from dataset.communal.base_dataset import BaseDataset
12 | from utils.logger import get_logger
13 | from preprocessing.filter import filter_center, filter_boundary, filter_self_intersection
14 | from utils.boundary import calc_rotation
15 |
16 |
17 | class ZindDataset(BaseDataset):
18 | def __init__(self, root_dir, mode, shape=None, max_wall_num=0, aug=None, camera_height=1.6, logger=None,
19 | split_list=None, patch_num=256, keys=None, for_test_index=None,
20 | is_simple=True, is_ceiling_flat=False, vp_align=False):
21 | # if keys is None:
22 | # keys = ['image', 'depth', 'ratio', 'id', 'corners', 'corner_heat_map', 'object']
23 | super().__init__(mode, shape, max_wall_num, aug, camera_height, patch_num, keys)
24 | if logger is None:
25 | logger = get_logger()
26 | self.root_dir = root_dir
27 | self.vp_align = vp_align
28 |
29 | data_dir = os.path.join(root_dir)
30 | img_dir = os.path.join(root_dir, 'image')
31 |
32 | pano_list = read_zind(partition_path=os.path.join(data_dir, f"zind_partition.json"),
33 | simplicity_path=os.path.join(data_dir, f"room_shape_simplicity_labels.json"),
34 | data_dir=data_dir, mode=mode, is_simple=is_simple, is_ceiling_flat=is_ceiling_flat)
35 |
36 | if for_test_index is not None:
37 | pano_list = pano_list[:for_test_index]
38 | if split_list:
39 | pano_list = [pano for pano in pano_list if pano['id'] in split_list]
40 | self.data = []
41 | invalid_num = 0
42 | for pano in pano_list:
43 | if not os.path.exists(pano['img_path']):
44 | logger.warning(f"{pano['img_path']} not exists")
45 | invalid_num += 1
46 | continue
47 |
48 | if not filter_center(pano['corners']):
49 | # logger.warning(f"{pano['id']} camera center not in layout")
50 | # invalid_num += 1
51 | continue
52 |
53 | if self.max_wall_num >= 10:
54 | if len(pano['corners']) < self.max_wall_num:
55 | invalid_num += 1
56 | continue
57 | elif self.max_wall_num != 0 and len(pano['corners']) != self.max_wall_num:
58 | invalid_num += 1
59 | continue
60 |
61 | if not filter_boundary(pano['corners']):
62 | logger.warning(f"{pano['id']} boundary cross")
63 | invalid_num += 1
64 | continue
65 |
66 | if not filter_self_intersection(pano['corners']):
67 | logger.warning(f"{pano['id']} self_intersection")
68 | invalid_num += 1
69 | continue
70 |
71 | self.data.append(pano)
72 |
73 | logger.info(
74 | f"Build dataset mode: {self.mode} max_wall_num: {self.max_wall_num} valid: {len(self.data)} invalid: {invalid_num}")
75 |
76 | def __getitem__(self, idx):
77 | pano = self.data[idx]
78 | rgb_path = pano['img_path']
79 | label = pano
80 | image = read_image(rgb_path, self.shape)
81 |
82 | if self.vp_align:
83 | # Equivalent to vanishing point alignment step
84 | rotation = calc_rotation(corners=label['corners'])
85 | shift = math.modf(rotation / (2 * np.pi) + 1)[0]
86 | image = np.roll(image, round(shift * self.shape[1]), axis=1)
87 | label['corners'][:, 0] = np.modf(label['corners'][:, 0] + shift)[0]
88 |
89 | output = self.process_data(label, image, self.patch_num)
90 | return output
91 |
92 |
93 | if __name__ == "__main__":
94 | import numpy as np
95 | from PIL import Image
96 |
97 | from tqdm import tqdm
98 | from visualization.boundary import draw_boundaries, draw_object
99 | from visualization.floorplan import draw_floorplan
100 | from utils.boundary import depth2boundaries, calc_rotation
101 | from utils.conversion import uv2xyz
102 | from models.other.init_env import init_env
103 |
104 | init_env(123)
105 |
106 | modes = ['val']
107 | for i in range(1):
108 | for mode in modes:
109 | print(mode)
110 | mp3d_dataset = ZindDataset(root_dir='../src/dataset/zind', mode=mode, aug={
111 | 'STRETCH': False,
112 | 'ROTATE': False,
113 | 'FLIP': False,
114 | 'GAMMA': False
115 | })
116 | # continue
117 | # save_dir = f'../src/dataset/zind/visualization/{mode}'
118 | # if not os.path.isdir(save_dir):
119 | # os.makedirs(save_dir)
120 |
121 | bar = tqdm(mp3d_dataset, ncols=100)
122 | for data in bar:
123 | # if data['id'] != '1079_pano_18':
124 | # continue
125 | bar.set_description(f"Processing {data['id']}")
126 | boundary_list = depth2boundaries(data['ratio'], data['depth'], step=None)
127 |
128 | pano_img = draw_boundaries(data['image'].transpose(1, 2, 0), boundary_list=boundary_list, show=True)
129 | # Image.fromarray((pano_img * 255).astype(np.uint8)).save(
130 | # os.path.join(save_dir, f"{data['id']}_boundary.png"))
131 | # draw_object(pano_img, heat_maps=data['object_heat_map'], depth=data['depth'],
132 | # size=data['object_size'], show=True)
133 | # pass
134 | #
135 | floorplan = draw_floorplan(uv2xyz(boundary_list[0])[..., ::2], show=True,
136 | marker_color=None, center_color=0.2)
137 | # Image.fromarray((floorplan.squeeze() * 255).astype(np.uint8)).save(
138 | # os.path.join(save_dir, f"{data['id']}_floorplan.png"))
139 |
--------------------------------------------------------------------------------
/evaluation/iou.py:
--------------------------------------------------------------------------------
1 | """
2 | @date: 2021/6/29
3 | @description:
4 | The method with "_floorplan" suffix is only for comparison, which is used for calculation in LED2-net.
5 | However, the floorplan is affected by show_radius. Setting too large will result in the decrease of accuracy,
6 | and setting too small will result in the failure of calculation beyond the range.
7 | """
8 | import numpy as np
9 | from shapely.geometry import Polygon
10 |
11 |
12 | def calc_inter_area(dt_xz, gt_xz):
13 | """
14 | :param dt_xz: Prediction boundaries can also be corners, format: [[x1, z1], [x2, z2], ...]
15 | :param gt_xz: Ground truth boundaries can also be corners, format: [[x1, z1], [x2, z2], ...]
16 | :return:
17 | """
18 | dt_polygon = Polygon(dt_xz)
19 | gt_polygon = Polygon(gt_xz)
20 |
21 | dt_area = dt_polygon.area
22 | gt_area = gt_polygon.area
23 | inter_area = dt_polygon.intersection(gt_polygon).area
24 | return dt_area, gt_area, inter_area
25 |
26 |
27 | def calc_IoU_2D(dt_xz, gt_xz):
28 | """
29 | :param dt_xz: Prediction boundaries can also be corners, format: [[x1, z1], [x2, z2], ...]
30 | :param gt_xz: Ground truth boundaries can also be corners, format: [[x1, z1], [x2, z2], ...]
31 | :return:
32 | """
33 | dt_area, gt_area, inter_area = calc_inter_area(dt_xz, gt_xz)
34 | iou_2d = inter_area / (gt_area + dt_area - inter_area)
35 | return iou_2d
36 |
37 |
38 | def calc_IoU_3D(dt_xz, gt_xz, dt_height, gt_height):
39 | """
40 | :param dt_xz: Prediction boundaries can also be corners, format: [[x1, z1], [x2, z2], ...]
41 | :param gt_xz: Ground truth boundaries can also be corners, format: [[x1, z1], [x2, z2], ...]
42 | :param dt_height:
43 | :param gt_height:
44 | :return:
45 | """
46 | dt_area, gt_area, inter_area = calc_inter_area(dt_xz, gt_xz)
47 | dt_volume = dt_area * dt_height
48 | gt_volume = gt_area * gt_height
49 | inter_volume = inter_area * min(dt_height, gt_height)
50 | iou_3d = inter_volume / (dt_volume + gt_volume - inter_volume)
51 | return iou_3d
52 |
53 |
54 | def calc_IoU(dt_xz, gt_xz, dt_height, gt_height):
55 | """
56 | :param dt_xz: Prediction boundaries can also be corners, format: [[x1, z1], [x2, z2], ...]
57 | :param gt_xz: Ground truth boundaries can also be corners, format: [[x1, z1], [x2, z2], ...]
58 | :param dt_height:
59 | :param gt_height:
60 | :return:
61 | """
62 | dt_area, gt_area, inter_area = calc_inter_area(dt_xz, gt_xz)
63 | iou_2d = inter_area / (gt_area + dt_area - inter_area)
64 |
65 | dt_volume = dt_area * dt_height
66 | gt_volume = gt_area * gt_height
67 | inter_volume = inter_area * min(dt_height, gt_height)
68 | iou_3d = inter_volume / (dt_volume + gt_volume - inter_volume)
69 |
70 | return iou_2d, iou_3d
71 |
72 |
73 | def calc_Iou_height(dt_height, gt_height):
74 | return min(dt_height, gt_height) / max(dt_height, gt_height)
75 |
76 |
77 | # the following is for testing only
78 | def calc_inter_area_floorplan(dt_floorplan, gt_floorplan):
79 | intersect = np.sum(np.logical_and(dt_floorplan, gt_floorplan))
80 | dt_area = np.sum(dt_floorplan)
81 | gt_area = np.sum(gt_floorplan)
82 | return dt_area, gt_area, intersect
83 |
84 |
85 | def calc_IoU_2D_floorplan(dt_floorplan, gt_floorplan):
86 | dt_area, gt_area, inter_area = calc_inter_area_floorplan(dt_floorplan, gt_floorplan)
87 | iou_2d = inter_area / (gt_area + dt_area - inter_area)
88 | return iou_2d
89 |
90 |
91 | def calc_IoU_3D_floorplan(dt_floorplan, gt_floorplan, dt_height, gt_height):
92 | dt_area, gt_area, inter_area = calc_inter_area_floorplan(dt_floorplan, gt_floorplan)
93 | dt_volume = dt_area * dt_height
94 | gt_volume = gt_area * gt_height
95 | inter_volume = inter_area * min(dt_height, gt_height)
96 | iou_3d = inter_volume / (dt_volume + gt_volume - inter_volume)
97 | return iou_3d
98 |
99 |
100 | def calc_IoU_floorplan(dt_floorplan, gt_floorplan, dt_height, gt_height):
101 | dt_area, gt_area, inter_area = calc_inter_area_floorplan(dt_floorplan, gt_floorplan)
102 | iou_2d = inter_area / (gt_area + dt_area - inter_area)
103 |
104 | dt_volume = dt_area * dt_height
105 | gt_volume = gt_area * gt_height
106 | inter_volume = inter_area * min(dt_height, gt_height)
107 | iou_3d = inter_volume / (dt_volume + gt_volume - inter_volume)
108 | return iou_2d, iou_3d
109 |
110 |
111 | if __name__ == '__main__':
112 | from visualization.floorplan import draw_floorplan, draw_iou_floorplan
113 | from visualization.boundary import draw_boundaries, corners2boundaries
114 | from utils.conversion import uv2xyz
115 | from utils.height import height2ratio
116 |
117 | # dummy data
118 | dt_floor_corners = np.array([[0.2, 0.7],
119 | [0.4, 0.7],
120 | [0.6, 0.7],
121 | [0.8, 0.7]])
122 | dt_height = 2.8
123 |
124 | gt_floor_corners = np.array([[0.3, 0.7],
125 | [0.5, 0.7],
126 | [0.7, 0.7],
127 | [0.9, 0.7]])
128 | gt_height = 3.2
129 |
130 | dt_xz = uv2xyz(dt_floor_corners)[..., ::2]
131 | gt_xz = uv2xyz(gt_floor_corners)[..., ::2]
132 |
133 | dt_floorplan = draw_floorplan(dt_xz, show=False, show_radius=1)
134 | gt_floorplan = draw_floorplan(gt_xz, show=False, show_radius=1)
135 | # dt_floorplan = draw_floorplan(dt_xz, show=False, show_radius=2)
136 | # gt_floorplan = draw_floorplan(gt_xz, show=False, show_radius=2)
137 |
138 | iou_2d, iou_3d = calc_IoU_floorplan(dt_floorplan, gt_floorplan, dt_height, gt_height)
139 | print('use floor plan image:', iou_2d, iou_3d)
140 |
141 | iou_2d, iou_3d = calc_IoU(dt_xz, gt_xz, dt_height, gt_height)
142 | print('use floor plan polygon:', iou_2d, iou_3d)
143 |
144 | draw_iou_floorplan(dt_xz, gt_xz, show=True, iou_2d=iou_2d, iou_3d=iou_3d)
145 | pano_bd = draw_boundaries(np.zeros([512, 1024, 3]), corners_list=[dt_floor_corners],
146 | boundary_color=[0, 0, 1], ratio=height2ratio(dt_height), draw_corners=False)
147 | pano_bd = draw_boundaries(pano_bd, corners_list=[gt_floor_corners],
148 | boundary_color=[0, 1, 0], ratio=height2ratio(gt_height), show=True, draw_corners=False)
149 |
--------------------------------------------------------------------------------
/visualization/floorplan.py:
--------------------------------------------------------------------------------
1 | """
2 | @date: 2021/6/29
3 | @description:
4 | """
5 | import cv2
6 |
7 |
8 | import matplotlib.pyplot as plt
9 |
10 | from PIL import Image
11 | from utils.boundary import *
12 |
13 |
14 | def draw_floorplan(xz, fill_color=None, border_color=None, side_l=512, show_radius=None, show=False, marker_color=None,
15 | center_color=None, scale=1.5):
16 | """
17 | :param scale:
18 | :param center_color:
19 | :param marker_color: for corners marking
20 | :param fill_color:
21 | :param border_color: boundary color
22 | :param xz: [[x1, z1], [x2, z2], ....]
23 | :param side_l: side length (pixel) of the output result
24 | :param show_radius: The displayed maximum radius m (proportional to the projection plane plan_y of xz),
25 | such as set to 1, means that the pixel value of side_l/2 is expressed as 1m, if not set this value to display all
26 | :param show:
27 | :return:
28 | """
29 | if fill_color is None:
30 | fill_color = [1]
31 |
32 | board = np.zeros([side_l, side_l, len(fill_color)], dtype=np.float)
33 |
34 | if show_radius is None:
35 | show_radius = np.linalg.norm(xz, axis=-1).max()
36 |
37 | xz = xz * side_l / (2*scale) / show_radius
38 | # v<-----------|o
39 | # | | |
40 | # | ----|----z |
41 | # | | |
42 | # | x \|/
43 | # |------------u
44 | xz[:, 1] = -xz[:, 1]
45 | xz += side_l // 2 # moving to center
46 | xz = xz.astype(np.int)
47 | cv2.fillPoly(board, [xz], fill_color)
48 | if border_color:
49 | cv2.drawContours(board, [xz], 0, border_color, 2)
50 |
51 | if marker_color is not None:
52 | for p in xz:
53 | cv2.drawMarker(board, tuple(p), marker_color, markerType=0, markerSize=10, thickness=2)
54 | if center_color is not None:
55 | cv2.drawMarker(board, tuple([side_l // 2, side_l // 2]), center_color, markerType=0, markerSize=10, thickness=2)
56 |
57 | if show:
58 | # plt.rcParams['figure.dpi'] = 300
59 | plt.axis('off')
60 | plt.imshow(board[..., 0] if board.shape[-1] == 1 else board)
61 | plt.show()
62 |
63 | return board
64 |
65 |
66 | def draw_iou_floorplan(dt_xz, gt_xz, show_radius=None, show=False, side_l=512,
67 | iou_2d=None, iou_3d=None, dt_board_color=None, gt_board_color=None):
68 | """
69 | :param gt_board_color:
70 | :param dt_board_color:
71 | :param dt_xz: [[x1, z1], [x2, z2], ....]
72 | :param gt_xz: [[x1, z1], [x2, z2], ....]
73 | :param show:
74 | :param side_l: side length (pixel) of the output result
75 | :param show_radius: The displayed maximum radius m (proportional to the projection plane plan_y of xz),
76 | such as set to 1, means that the pixel value of side_l/2 is expressed as 1m, if not set this value to display all
77 | :param iou_2d:
78 | :param iou_3d:
79 | :return:
80 | """
81 | if dt_board_color is None:
82 | dt_board_color = [0, 1, 0, 1]
83 | if gt_board_color is None:
84 | gt_board_color = [0, 0, 1, 1]
85 | center_color = [1, 0, 0, 1]
86 | fill_color = [0.2, 0.2, 0.2, 0.2]
87 |
88 | if show_radius is None:
89 | # niform scale
90 | gt_radius = np.linalg.norm(gt_xz, axis=-1).max()
91 | dt_radius = np.linalg.norm(dt_xz, axis=-1).max()
92 | show_radius = gt_radius if gt_radius > dt_radius else dt_radius
93 |
94 | dt_floorplan = draw_floorplan(dt_xz, show_radius=show_radius, fill_color=fill_color,
95 | border_color=dt_board_color, side_l=side_l, show=False)
96 | gt_floorplan = draw_floorplan(gt_xz, show_radius=show_radius, fill_color=fill_color,
97 | border_color=gt_board_color, side_l=side_l, show=False,
98 | center_color=[1, 0, 0, 1])
99 |
100 | dt_floorplan = Image.fromarray((dt_floorplan * 255).astype(np.uint8), mode='RGBA')
101 | gt_floorplan = Image.fromarray((gt_floorplan * 255).astype(np.uint8), mode='RGBA')
102 | iou_floorplan = Image.alpha_composite(gt_floorplan, dt_floorplan)
103 |
104 | back = np.zeros([side_l, side_l, len(fill_color)], dtype=np.float)
105 | back[..., :] = [0.8, 0.8, 0.8, 1]
106 | back = Image.fromarray((back * 255).astype(np.uint8), mode='RGBA')
107 |
108 | iou_floorplan = Image.alpha_composite(back, iou_floorplan).convert("RGB")
109 | iou_floorplan = np.array(iou_floorplan) / 255.0
110 |
111 | if iou_2d is not None:
112 | cv2.putText(iou_floorplan, f'2d:{iou_2d * 100:.2f}', (10, 30), 2, 1, (0, 0, 0), 1)
113 | if iou_3d is not None:
114 | cv2.putText(iou_floorplan, f'3d:{iou_3d * 100:.2f}', (10, 60), 2, 1, (0, 0, 0), 1)
115 |
116 | if show:
117 | plt.axis('off')
118 | plt.imshow(iou_floorplan)
119 | plt.show()
120 | return iou_floorplan
121 |
122 |
123 | if __name__ == '__main__':
124 | import numpy as np
125 | from dataset.mp3d_dataset import MP3DDataset
126 | from utils.boundary import depth2boundaries
127 | from utils.conversion import uv2xyz
128 | from visualization.boundary import draw_boundaries
129 |
130 | mp3d_dataset = MP3DDataset(root_dir='../src/dataset/mp3d', mode='train')
131 | gt = mp3d_dataset.__getitem__(0)
132 |
133 | # boundary_list = depth2boundaries(gt['ratio'], gt['depth'], step=None)
134 | # pano_img = draw_boundaries(gt['image'].transpose(1, 2, 0), boundary_list=boundary_list, show=True)
135 | # draw_floorplan(uv2xyz(boundary_list[0])[..., ::2], show=True, marker_color=None, center_color=0.8)
136 | # draw_floorplan(depth2xyz(gt['depth'])[..., ::2], show=True, marker_color=None, center_color=0.8)
137 |
138 | corners = gt['corners'][gt['corners'][..., 0] + gt['corners'][..., 1] != 0]
139 | dt_corners = corners + 0.1
140 | # img = draw_floorplan(uv2xyz(corners)[..., ::2], show=True, fill_color=[0.8, 0.8, 0.8, 0.2],
141 | # marker_color=None, center_color=[1, 0, 0, 1], border_color=[0, 0, 1, 1])
142 | # cv2.imwrite('../src/fig/flp.png', (img*255).astype(np.uint8))
143 |
144 | img = draw_iou_floorplan(uv2xyz(dt_corners)[..., ::2], uv2xyz(corners)[..., ::2], side_l=512, show=True)
145 | img[..., 0:3] = img[..., 0:3][..., ::-1]
146 | # cv2.imwrite('../src/fig/flp.png', (img*255).astype(np.uint8))
147 |
148 |
--------------------------------------------------------------------------------
/models/base_model.py:
--------------------------------------------------------------------------------
1 | """
2 | @Date: 2021/07/17
3 | @description:
4 | """
5 | import os
6 | import torch
7 | import torch.nn as nn
8 | import datetime
9 |
10 |
11 | class BaseModule(nn.Module):
12 | def __init__(self, ckpt_dir=None):
13 | super().__init__()
14 |
15 | self.ckpt_dir = ckpt_dir
16 |
17 | if ckpt_dir:
18 | if not os.path.exists(ckpt_dir):
19 | os.makedirs(ckpt_dir)
20 | else:
21 | self.model_lst = [x for x in sorted(os.listdir(self.ckpt_dir)) if x.endswith('.pkl')]
22 |
23 | self.last_model_path = None
24 | self.best_model_path = None
25 | self.best_accuracy = -float('inf')
26 | self.acc_d = {}
27 |
28 | def show_parameter_number(self, logger):
29 | total = sum(p.numel() for p in self.parameters())
30 | trainable = sum(p.numel() for p in self.parameters() if p.requires_grad)
31 | logger.info('{} parameter total:{:,}, trainable:{:,}'.format(self._get_name(), total, trainable))
32 |
33 | def load(self, device, logger, optimizer=None, best=False):
34 | if len(self.model_lst) == 0:
35 | logger.info('*'*50)
36 | logger.info("Empty model folder! Using initial weights")
37 | logger.info('*'*50)
38 | return 0
39 |
40 | last_model_lst = list(filter(lambda n: '_last_' in n, self.model_lst))
41 | best_model_lst = list(filter(lambda n: '_best_' in n, self.model_lst))
42 |
43 | if len(last_model_lst) == 0 and len(best_model_lst) == 0:
44 | logger.info('*'*50)
45 | ckpt_path = os.path.join(self.ckpt_dir, self.model_lst[0])
46 | logger.info(f"Load: {ckpt_path}")
47 | checkpoint = torch.load(ckpt_path, map_location=torch.device(device))
48 | self.load_state_dict(checkpoint, strict=False)
49 | logger.info('*'*50)
50 | return 0
51 |
52 | checkpoint = None
53 | if len(last_model_lst) > 0:
54 | self.last_model_path = os.path.join(self.ckpt_dir, last_model_lst[-1])
55 | checkpoint = torch.load(self.last_model_path, map_location=torch.device(device))
56 | self.best_accuracy = checkpoint['accuracy']
57 | self.acc_d = checkpoint['acc_d']
58 |
59 | if len(best_model_lst) > 0:
60 | self.best_model_path = os.path.join(self.ckpt_dir, best_model_lst[-1])
61 | best_checkpoint = torch.load(self.best_model_path, map_location=torch.device(device))
62 | self.best_accuracy = best_checkpoint['accuracy']
63 | self.acc_d = best_checkpoint['acc_d']
64 | if best:
65 | checkpoint = best_checkpoint
66 |
67 | for k in self.acc_d:
68 | if isinstance(self.acc_d[k], float):
69 | self.acc_d[k] = {
70 | 'acc': self.acc_d[k],
71 | 'epoch': checkpoint['epoch']
72 | }
73 |
74 | if checkpoint is None:
75 | logger.error("Invalid checkpoint")
76 | return
77 |
78 | self.load_state_dict(checkpoint['net'], strict=False)
79 | if optimizer and not best: # best的时候使用新的优化器比如从adam->sgd
80 | logger.info('Load optimizer')
81 | optimizer.load_state_dict(checkpoint['optimizer'])
82 | for state in optimizer.state.values():
83 | for k, v in state.items():
84 | if torch.is_tensor(v):
85 | state[k] = v.to(device)
86 |
87 | logger.info('*'*50)
88 | if best:
89 | logger.info(f"Lode best: {self.best_model_path}")
90 | else:
91 | logger.info(f"Lode last: {self.last_model_path}")
92 |
93 | logger.info(f"Best accuracy: {self.best_accuracy}")
94 | logger.info(f"Last epoch: {checkpoint['epoch'] + 1}")
95 | logger.info('*'*50)
96 | return checkpoint['epoch'] + 1
97 |
98 | def update_acc(self, acc_d, epoch, logger):
99 | logger.info("-" * 100)
100 | for k in acc_d:
101 | if k not in self.acc_d.keys() or acc_d[k] > self.acc_d[k]['acc']:
102 | self.acc_d[k] = {
103 | 'acc': acc_d[k],
104 | 'epoch': epoch
105 | }
106 | logger.info(f"Update ACC: {k} {self.acc_d[k]['acc']:.4f}({self.acc_d[k]['epoch']}-{epoch})")
107 | logger.info("-" * 100)
108 |
109 | def save(self, optim, epoch, accuracy, logger, replace=True, acc_d=None, config=None):
110 | """
111 |
112 | :param config:
113 | :param optim:
114 | :param epoch:
115 | :param accuracy:
116 | :param logger:
117 | :param replace:
118 | :param acc_d: 其他评估数据,visible_2/3d, full_2/3d, rmse...
119 | :return:
120 | """
121 | if acc_d:
122 | self.update_acc(acc_d, epoch, logger)
123 | name = datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S_last_{:.4f}_{}'.format(accuracy, epoch))
124 | name = f"model_{name}.pkl"
125 | checkpoint = {
126 | 'net': self.state_dict(),
127 | 'optimizer': optim.state_dict(),
128 | 'epoch': epoch,
129 | 'accuracy': accuracy,
130 | 'acc_d': acc_d
131 | }
132 | # FIXME:: delete always true
133 | if (True or config.MODEL.SAVE_LAST) and epoch % config.TRAIN.SAVE_FREQ == 0:
134 | if replace and self.last_model_path and os.path.exists(self.last_model_path):
135 | os.remove(self.last_model_path)
136 | self.last_model_path = os.path.join(self.ckpt_dir, name)
137 | torch.save(checkpoint, self.last_model_path)
138 | logger.info(f"Saved last model: {self.last_model_path}")
139 |
140 | if accuracy > self.best_accuracy:
141 | self.best_accuracy = accuracy
142 | # FIXME:: delete always true
143 | if True or config.MODEL.SAVE_BEST:
144 | if replace and self.best_model_path and os.path.exists(self.best_model_path):
145 | os.remove(self.best_model_path)
146 | self.best_model_path = os.path.join(self.ckpt_dir, name.replace('last', 'best'))
147 | torch.save(checkpoint, self.best_model_path)
148 | logger.info("#" * 100)
149 | logger.info(f"Saved best model: {self.best_model_path}")
150 | logger.info("#" * 100)
--------------------------------------------------------------------------------
/app.py:
--------------------------------------------------------------------------------
1 | '''
2 | @author: Zhigang Jiang
3 | @time: 2022/05/23
4 | @description:
5 | '''
6 |
7 | import gradio as gr
8 | import numpy as np
9 | import os
10 | import torch
11 |
12 | from PIL import Image
13 |
14 | from utils.logger import get_logger
15 | from config.defaults import get_config
16 | from inference import preprocess, run_one_inference
17 | from models.build import build_model
18 | from argparse import Namespace
19 | import gdown
20 |
21 |
22 | def down_ckpt(model_cfg, ckpt_dir):
23 | model_ids = [
24 | ['src/config/mp3d.yaml', '1o97oAmd-yEP5bQrM0eAWFPLq27FjUDbh'],
25 | ['src/config/zind.yaml', '1PzBj-dfDfH_vevgSkRe5kczW0GVl_43I'],
26 | ['src/config/pano.yaml', '1JoeqcPbm_XBPOi6O9GjjWi3_rtyPZS8m'],
27 | ['src/config/s2d3d.yaml', '1PfJzcxzUsbwwMal7yTkBClIFgn8IdEzI'],
28 | ['src/config/ablation_study/full.yaml', '1U16TxUkvZlRwJNaJnq9nAUap-BhCVIha']
29 | ]
30 |
31 | for model_id in model_ids:
32 | if model_id[0] != model_cfg:
33 | continue
34 | path = os.path.join(ckpt_dir, 'best.pkl')
35 | if not os.path.exists(path):
36 | logger.info(f"Downloading {model_id}")
37 | os.makedirs(ckpt_dir, exist_ok=True)
38 | gdown.download(f"https://drive.google.com/uc?id={model_id[1]}", path, False)
39 |
40 |
41 | def greet(img_path, pre_processing, weight_name, post_processing, visualization, mesh_format, mesh_resolution):
42 | args.pre_processing = pre_processing
43 | args.post_processing = post_processing
44 | if weight_name == 'mp3d':
45 | model = mp3d_model
46 | elif weight_name == 'zind':
47 | model = zind_model
48 | else:
49 | logger.error("unknown pre-trained weight name")
50 | raise NotImplementedError
51 |
52 | img_name = os.path.basename(img_path).split('.')[0]
53 | img = np.array(Image.open(img_path).resize((1024, 512), Image.Resampling.BICUBIC))[..., :3]
54 |
55 | vp_cache_path = 'src/demo/default_vp.txt'
56 | if args.pre_processing:
57 | vp_cache_path = os.path.join('src/output', f'{img_name}_vp.txt')
58 | logger.info("pre-processing ...")
59 | img, vp = preprocess(img, vp_cache_path=vp_cache_path)
60 |
61 | img = (img / 255.0).astype(np.float32)
62 | run_one_inference(img, model, args, img_name,
63 | logger=logger, show=False,
64 | show_depth='depth-normal-gradient' in visualization,
65 | show_floorplan='2d-floorplan' in visualization,
66 | mesh_format=mesh_format, mesh_resolution=int(mesh_resolution))
67 |
68 | return [os.path.join(args.output_dir, f"{img_name}_pred.png"),
69 | os.path.join(args.output_dir, f"{img_name}_3d{mesh_format}"),
70 | os.path.join(args.output_dir, f"{img_name}_3d{mesh_format}"),
71 | vp_cache_path,
72 | os.path.join(args.output_dir, f"{img_name}_pred.json")]
73 |
74 |
75 | def get_model(args):
76 | config = get_config(args)
77 | down_ckpt(args.cfg, config.CKPT.DIR)
78 | if ('cuda' in args.device or 'cuda' in config.TRAIN.DEVICE) and not torch.cuda.is_available():
79 | logger.info(f'The {args.device} is not available, will use cpu ...')
80 | config.defrost()
81 | args.device = "cpu"
82 | config.TRAIN.DEVICE = "cpu"
83 | config.freeze()
84 | model, _, _, _ = build_model(config, logger)
85 | return model
86 |
87 |
88 | if __name__ == '__main__':
89 | logger = get_logger()
90 | args = Namespace(device='cuda', output_dir='src/output', visualize_3d=False, output_3d=True)
91 | os.makedirs(args.output_dir, exist_ok=True)
92 |
93 | args.cfg = 'src/config/mp3d.yaml'
94 | mp3d_model = get_model(args)
95 |
96 | args.cfg = 'src/config/zind.yaml'
97 | zind_model = get_model(args)
98 |
99 | description = "This demo of the project " \
100 | "LGT-Net. " \
101 | "It uses the Geometry-Aware Transformer Network to predict the 3d room layout of an rgb panorama."
102 |
103 | demo = gr.Interface(fn=greet,
104 | inputs=[gr.Image(type='filepath', label='input rgb panorama', value='src/demo/pano_demo1.png'),
105 | gr.Checkbox(label='pre-processing', value=True),
106 | gr.Radio(['mp3d', 'zind'],
107 | label='pre-trained weight',
108 | value='mp3d'),
109 | gr.Radio(['manhattan', 'atalanta', 'original'],
110 | label='post-processing method',
111 | value='manhattan'),
112 | gr.CheckboxGroup(['depth-normal-gradient', '2d-floorplan'],
113 | label='2d-visualization',
114 | value=['depth-normal-gradient', '2d-floorplan']),
115 | gr.Radio(['.gltf', '.obj', '.glb'],
116 | label='output format of 3d mesh',
117 | value='.gltf'),
118 | gr.Radio(['128', '256', '512', '1024'],
119 | label='output resolution of 3d mesh',
120 | value='256'),
121 | ],
122 | outputs=[gr.Image(label='predicted result 2d-visualization', type='filepath'),
123 | gr.Model3D(label='3d mesh reconstruction', clear_color=[1.0, 1.0, 1.0, 1.0]),
124 | gr.File(label='3d mesh file'),
125 | gr.File(label='vanishing point information'),
126 | gr.File(label='layout json')],
127 | examples=[
128 | ['src/demo/pano_demo1.png', True, 'mp3d', 'manhattan', ['depth-normal-gradient', '2d-floorplan'], '.gltf', '256'],
129 | ['src/demo/mp3d_demo1.png', False, 'mp3d', 'manhattan', ['depth-normal-gradient', '2d-floorplan'], '.gltf', '256'],
130 | ['src/demo/mp3d_demo2.png', False, 'mp3d', 'manhattan', ['depth-normal-gradient', '2d-floorplan'], '.gltf', '256'],
131 | ['src/demo/mp3d_demo3.png', False, 'mp3d', 'manhattan', ['depth-normal-gradient', '2d-floorplan'], '.gltf', '256'],
132 | ['src/demo/zind_demo1.png', True, 'zind', 'manhattan', ['depth-normal-gradient', '2d-floorplan'], '.gltf', '256'],
133 | ['src/demo/zind_demo2.png', False, 'zind', 'atalanta', ['depth-normal-gradient', '2d-floorplan'], '.gltf', '256'],
134 | ['src/demo/zind_demo3.png', True, 'zind', 'manhattan', ['depth-normal-gradient', '2d-floorplan'], '.gltf', '256'],
135 | ['src/demo/other_demo1.png', False, 'mp3d', 'manhattan', ['depth-normal-gradient', '2d-floorplan'], '.gltf', '256'],
136 | ['src/demo/other_demo2.png', True, 'mp3d', 'manhattan', ['depth-normal-gradient', '2d-floorplan'], '.gltf', '256'],
137 | ], title='LGT-Net', allow_flagging="never", cache_examples=False, description=description)
138 |
139 | demo.launch(debug=True, enable_queue=False)
140 |
--------------------------------------------------------------------------------
/visualization/boundary.py:
--------------------------------------------------------------------------------
1 | """
2 | @date: 2021/06/19
3 | @description:
4 | """
5 |
6 | import matplotlib.pyplot as plt
7 | import cv2
8 | import numpy as np
9 | from utils.conversion import uv2pixel
10 | from utils.boundary import corners2boundary, corners2boundaries, find_peaks, connect_corners_uv, get_object_cor, \
11 | visibility_corners
12 |
13 |
14 | def draw_boundary(pano_img, corners: np.ndarray = None, boundary: np.ndarray = None, draw_corners=True, show=False,
15 | step=0.01, length=None, boundary_color=None, marker_color=None, title=None, visible=True):
16 | if marker_color is None:
17 | marker_color = [0, 0, 1]
18 | if boundary_color is None:
19 | boundary_color = [0, 1, 0]
20 |
21 | assert corners is not None or boundary is not None, "corners or boundary error"
22 |
23 | shape = sorted(pano_img.shape)
24 | assert len(shape) > 1, "pano_img shape error"
25 | w = shape[-1]
26 | h = shape[-2]
27 |
28 | pano_img = pano_img.copy()
29 | if (corners is not None and len(corners) > 2) or \
30 | (boundary is not None and len(boundary) > 2):
31 | if isinstance(boundary_color, list) or isinstance(boundary_color, np.array):
32 | if boundary is None:
33 | boundary = corners2boundary(corners, step, length, visible)
34 |
35 | boundary = uv2pixel(boundary, w, h)
36 | pano_img[boundary[:, 1], boundary[:, 0]] = boundary_color
37 | pano_img[np.clip(boundary[:, 1] + 1, 0, h - 1), boundary[:, 0]] = boundary_color
38 | pano_img[np.clip(boundary[:, 1] - 1, 0, h - 1), boundary[:, 0]] = boundary_color
39 |
40 | if pano_img.shape[1] > 512:
41 | pano_img[np.clip(boundary[:, 1] + 1, 0, h - 1), np.clip(boundary[:, 0] + 1, 0, w - 1)] = boundary_color
42 | pano_img[np.clip(boundary[:, 1] + 1, 0, h - 1), np.clip(boundary[:, 0] - 1, 0, w - 1)] = boundary_color
43 | pano_img[np.clip(boundary[:, 1] - 1, 0, h - 1), np.clip(boundary[:, 0] + 1, 0, w - 1)] = boundary_color
44 | pano_img[np.clip(boundary[:, 1] - 1, 0, h - 1), np.clip(boundary[:, 0] - 1, 0, w - 1)] = boundary_color
45 |
46 | pano_img[boundary[:, 1], np.clip(boundary[:, 0] + 1, 0, w - 1)] = boundary_color
47 | pano_img[boundary[:, 1], np.clip(boundary[:, 0] - 1, 0, w - 1)] = boundary_color
48 |
49 | if corners is not None and draw_corners:
50 | if visible:
51 | corners = visibility_corners(corners)
52 | corners = uv2pixel(corners, w, h)
53 | for corner in corners:
54 | cv2.drawMarker(pano_img, tuple(corner), marker_color, markerType=0, markerSize=10, thickness=2)
55 |
56 | if show:
57 | plt.figure(figsize=(10, 5))
58 | if title is not None:
59 | plt.title(title)
60 |
61 | plt.axis('off')
62 | plt.imshow(pano_img)
63 | plt.show()
64 |
65 | return pano_img
66 |
67 |
68 | def draw_boundaries(pano_img, corners_list: list = None, boundary_list: list = None, draw_corners=True, show=False,
69 | step=0.01, length=None, boundary_color=None, marker_color=None, title=None, ratio=None, visible=True):
70 | """
71 |
72 | :param visible:
73 | :param pano_img:
74 | :param corners_list:
75 | :param boundary_list:
76 | :param draw_corners:
77 | :param show:
78 | :param step:
79 | :param length:
80 | :param boundary_color: RGB color
81 | :param marker_color: RGB color
82 | :param title:
83 | :param ratio: ceil_height/camera_height
84 | :return:
85 | """
86 | assert corners_list is not None or boundary_list is not None, "corners_list or boundary_list error"
87 |
88 | if corners_list is not None:
89 | if ratio is not None and len(corners_list) == 1:
90 | corners_list = corners2boundaries(ratio, corners_uv=corners_list[0], step=None, visible=visible)
91 |
92 | for i, corners in enumerate(corners_list):
93 | pano_img = draw_boundary(pano_img, corners=corners, draw_corners=draw_corners,
94 | show=show if i == len(corners_list) - 1 else False,
95 | step=step, length=length, boundary_color=boundary_color, marker_color=marker_color,
96 | title=title, visible=visible)
97 | elif boundary_list is not None:
98 | if ratio is not None and len(boundary_list) == 1:
99 | boundary_list = corners2boundaries(ratio, corners_uv=boundary_list[0], step=None, visible=visible)
100 |
101 | for i, boundary in enumerate(boundary_list):
102 | pano_img = draw_boundary(pano_img, boundary=boundary, draw_corners=draw_corners,
103 | show=show if i == len(boundary_list) - 1 else False,
104 | step=step, length=length, boundary_color=boundary_color, marker_color=marker_color,
105 | title=title, visible=visible)
106 |
107 | return pano_img
108 |
109 |
110 | def draw_object(pano_img, heat_maps, size, depth, window_width=15, show=False):
111 | # window, door, opening
112 | colors = [[1, 0, 0], [1, 1, 0], [0, 0, 1]]
113 | for i, heat_map in enumerate(heat_maps):
114 | pk_u_s, _ = find_peaks(heat_map, size=window_width*2+1)
115 | for pk_u in pk_u_s:
116 | uv, xyz = get_object_cor(depth, size, center_u=pk_u, patch_num=len(heat_map))
117 |
118 | bottom_poly = connect_corners_uv(uv[0], uv[1], length=pano_img.shape[1])
119 | top_poly = connect_corners_uv(uv[2], uv[3], length=pano_img.shape[1])[::-1]
120 |
121 | bottom_max_index = bottom_poly[..., 0].argmax()
122 | if bottom_max_index != len(bottom_poly)-1:
123 | top_max_index = top_poly[..., 0].argmax()
124 | poly1 = np.concatenate([bottom_poly[:bottom_max_index+1], top_poly[top_max_index:]])
125 | poly1 = uv2pixel(poly1, w=pano_img.shape[1], h=pano_img.shape[0])
126 | poly1 = poly1[:, None, :]
127 |
128 | poly2 = np.concatenate([bottom_poly[bottom_max_index+1:], top_poly[:top_max_index]])
129 | poly2 = uv2pixel(poly2, w=pano_img.shape[1], h=pano_img.shape[0])
130 | poly2 = poly2[:, None, :]
131 |
132 | poly = [poly1, poly2]
133 | else:
134 | poly = np.concatenate([bottom_poly, top_poly])
135 | poly = uv2pixel(poly, w=pano_img.shape[1], h=pano_img.shape[0])
136 | poly = poly[:, None, :]
137 | poly = [poly]
138 |
139 | cv2.drawContours(pano_img, poly, -1, colors[i], 1)
140 | #
141 | # boundary_center_xyz = uv2xyz(np.array([pk_u, pk_v]))
142 | #
143 | # l_b_xyz =
144 | if show:
145 | plt.imshow(pano_img)
146 | plt.show()
147 |
148 |
149 | if __name__ == '__main__':
150 | from visualization.floorplan import draw_floorplan
151 | from utils.conversion import uv2xyz
152 |
153 | pano_img = np.zeros([512, 1024, 3])
154 | corners = np.array([[0.2, 0.7],
155 | [0.4, 0.7],
156 | [0.3, 0.6],
157 | [0.6, 0.6],
158 | [0.8, 0.7]])
159 | # draw_boundary(pano_img, corners, show=True)
160 | draw_boundaries(pano_img, corners_list=[corners], show=True, length=1024, ratio=1.2)
161 | draw_floorplan(uv2xyz(corners)[..., ::2], show=True, marker_color=None, center_color=0.8)
--------------------------------------------------------------------------------
/visualization/visualizer/Viewer/Utils.py:
--------------------------------------------------------------------------------
1 | import cv2
2 | import numpy as np
3 | from visualization.visualizer.earcut import earcut
4 |
5 | def xz2lines(wall_xz, h):
6 | cch = h - 1.6
7 | ch = 1.6
8 | strips_ceiling = []
9 | strips_wall = []
10 | strips_floor = []
11 | for i in range(wall_xz.shape[0] // 2):
12 | pts1 = wall_xz[i*2, :]
13 | pts2 = wall_xz[i*2+1, :]
14 |
15 | a = [[pts1[0], -cch, pts1[1]]]
16 | b = [[pts2[0], -cch, pts2[1]]]
17 | c = [[pts2[0], ch, pts2[1]]]
18 | d = [[pts1[0], ch, pts1[1]]]
19 | #strip = np.concatenate([a, b, b, c, c, d, d, a], axis=0)
20 | ceiling = np.concatenate([a, b], axis=0)
21 | wall = np.concatenate([b, c, d, a], axis=0)
22 | floor = np.concatenate([c, d], axis=0)
23 |
24 | strips_ceiling.append(ceiling)
25 | strips_wall.append(wall)
26 | strips_floor.append(floor)
27 |
28 | strips_ceiling = np.concatenate(strips_ceiling, axis=0).astype(np.float32)
29 | strips_wall = np.concatenate(strips_wall, axis=0).astype(np.float32)
30 | strips_floor = np.concatenate(strips_floor, axis=0).astype(np.float32)
31 |
32 | return strips_ceiling, strips_wall, strips_floor
33 |
34 |
35 | def Label2Mesh(label, reverse=False):
36 | scale = 1.6 / label['cameraHeight']
37 | layout_height = scale * label['layoutHeight']
38 | if 'cameraCeilingHeight' not in label:
39 | label['cameraCeilingHeight'] = label['layoutHeight'] - label['cameraHeight']
40 | up_down_ratio = label['cameraCeilingHeight'] / label['cameraHeight']
41 | xyz = np.asarray(label['points'], np.float32)
42 | xyz *= scale
43 | point_idxs = label['order']
44 |
45 | wall_xz = [np.concatenate((xyz[:, ::2][i[0], :][None, ...], xyz[:, ::2][i[1], :][None, ...]), axis=0) for i in point_idxs]
46 | wall_xz = np.concatenate(wall_xz, axis=0).astype(np.float32)
47 | wall_num = wall_xz.shape[0] // 2
48 | lines_ceiling, lines_wall, lines_floor = xz2lines(wall_xz, layout_height)
49 |
50 | def ProcessOneWall(coord, idx, h):
51 | cch = h - 1.6
52 | ch = 1.6
53 |
54 | A = coord[idx[0], :]
55 | B = coord[idx[1], :]
56 |
57 | a = A.copy()
58 | b = B.copy()
59 | c = B.copy()
60 | a[1] = -cch
61 | b[1] = -cch
62 | c[1] = ch
63 | tmp1 = np.concatenate([a[None, ...], c[None, ...], b[None, ...]], axis=0)
64 |
65 | a = A.copy()
66 | b = A.copy()
67 | c = B.copy()
68 | a[1] = -cch
69 | b[1] = ch
70 | c[1] = ch
71 | tmp2 = np.concatenate([a[None, ...], b[None, ...], c[None, ...]], axis=0)
72 |
73 | return np.concatenate([tmp1[None, ...], tmp2[None, ...]], axis=0)
74 | mesh = [ProcessOneWall(xyz, point_idxs[x], layout_height)[None, ...] for x in range(len(point_idxs))]
75 | mesh = np.concatenate(mesh, axis=0).reshape([-1, 3])
76 | top_xz = []
77 | for i, j in point_idxs:
78 | if not reverse:
79 | tmp = np.concatenate([xyz[i, ::2], xyz[j, ::2]])
80 | else:
81 | tmp = np.concatenate([xyz[j, ::2], xyz[i, ::2]])
82 | top_xz += tmp.tolist()
83 | try:
84 | indices = np.asarray(earcut(top_xz)).reshape([-1, 3])
85 | top_xz = np.asarray(top_xz).reshape([-1, 2])
86 | tmp = []
87 | for i in range(indices.shape[0]):
88 | a = indices[i, 0]
89 | b = indices[i, 1]
90 | c = indices[i, 2]
91 | tmp.append(np.concatenate([top_xz[a:a+1, :], top_xz[b:b+1, :], top_xz[c:c+1, :]], axis=0))
92 | tmp = np.concatenate(tmp, axis=0)
93 | ceiling_mesh = np.zeros([tmp.shape[0], 3], np.float32)
94 | ceiling_mesh[:, ::2] = tmp.copy()
95 | ceiling_mesh[:, 1] = -(layout_height - 1.6)
96 | floor_mesh = ceiling_mesh.copy()
97 | floor_mesh[:, 1] = 1.6
98 | #mesh = np.concatenate([mesh, ceiling_mesh, floor_mesh], axis=0)
99 | mesh = np.concatenate([mesh, floor_mesh], axis=0)
100 | except:
101 | pass
102 | '''
103 | print (top_xz)
104 | top_xz = top_xz[:6]
105 | a = np.zeros([256, 256], np.uint8)
106 | b = ((top_xz - top_xz.min()) * 20).astype(int) + 5
107 | for i in range(0, b.shape[0]-1, 2):
108 | cv2.line(a, (b[i, 0], b[i, 1]), ((b[i+1, 0], b[i+1, 1])), color=255)
109 | import matplotlib.pyplot as plt
110 | plt.imshow(a)
111 | plt.show()
112 | exit()
113 | '''
114 | return wall_num, wall_xz, [lines_ceiling, lines_wall, lines_floor], mesh
115 |
116 | def Label2Points(label):
117 | scale = 1.6 / label['cameraHeight']
118 | layout_height = scale * label['layoutHeight']
119 | up_down_ratio = label['cameraCeilingHeight'] / label['cameraHeight']
120 | xyz = np.asarray(label['points'], np.float32)
121 | point_idxs = label['order']
122 | def ProcessOneWall(coord, idx, h):
123 | cch = h - 1.6
124 | ch = 1.6
125 |
126 | a = coord[idx[0], ...].copy()
127 | b = coord[idx[1], ...].copy()
128 | a[1] = -cch
129 | b[1] = ch
130 | return np.concatenate([a[None, ...], b[None, ...]], axis=0)
131 | pts = [ProcessOneWall(xyz, point_idxs[x], layout_height)[None, ...] for x in range(len(point_idxs))]
132 | pts = np.concatenate(pts, axis=0)
133 | return pts
134 |
135 |
136 | def OldFormat2Mine(label):
137 | scale = 1.6 / label['cameraHeight']
138 | layout_height = scale * label['layoutHeight']
139 | if 'cameraCeilingHeight' not in label:
140 | label['cameraCeilingHeight'] = label['layoutHeight'] - label['cameraHeight']
141 |
142 | up_down_ratio = label['cameraCeilingHeight'] / label['cameraHeight']
143 | xyz = []
144 | planes = []
145 | point_idxs = []
146 | R_180 = cv2.Rodrigues(np.array([0, -1*np.pi, 0], np.float32))[0]
147 | for one in label['layoutPoints']['points']:
148 | xyz.append(one['xyz'])
149 | for one in label['layoutWalls']['walls']:
150 | planes.append(one['planeEquation'])
151 | point_idxs.append(one['pointsIdx'])
152 | xyz = np.asarray(xyz)
153 | xyz[:, 0] *= -1
154 | xyz = xyz.dot(R_180.T)
155 | xyz[:, 1] = 0
156 | xyz *= scale
157 |
158 | data = {
159 | 'cameraHeight': scale*label['cameraHeight'],
160 | 'cameraCeilingHeight': scale*label['cameraCeilingHeight'],
161 | 'layoutHeight': scale*label['layoutHeight'],
162 | 'points': xyz.tolist(),
163 | 'order': point_idxs
164 | }
165 | return data
166 |
167 | def Label2Mesh_oldformat(label):
168 | scale = 1.6 / label['cameraHeight']
169 | layout_height = scale * label['layoutHeight']
170 | up_down_ratio = label['cameraCeilingHeight'] / label['cameraHeight']
171 | xyz = []
172 | planes = []
173 | point_idxs = []
174 | R_180 = cv2.Rodrigues(np.array([0, -1*np.pi, 0], np.float32))[0]
175 | for one in label['layoutPoints']['points']:
176 | xyz.append(one['xyz'])
177 | for one in label['layoutWalls']['walls']:
178 | planes.append(one['planeEquation'])
179 | point_idxs.append(one['pointsIdx'])
180 | xyz = np.asarray(xyz)
181 | xyz[:, 0] *= -1
182 | xyz = xyz.dot(R_180.T)
183 | def ProcessOneWall(coord, idx, h):
184 | cch = h - 1.6
185 | ch = 1.6
186 |
187 | A = coord[idx[0], :]
188 | B = coord[idx[1], :]
189 |
190 | a = A.copy()
191 | b = B.copy()
192 | c = B.copy()
193 | a[1] = -cch
194 | b[1] = -cch
195 | c[1] = ch
196 | tmp1 = np.concatenate([a[None, ...], b[None, ...], c[None, ...]], axis=0)
197 |
198 | a = A.copy()
199 | b = A.copy()
200 | c = B.copy()
201 | a[1] = -cch
202 | b[1] = ch
203 | c[1] = ch
204 | tmp2 = np.concatenate([a[None, ...], b[None, ...], c[None, ...]], axis=0)
205 |
206 | return np.concatenate([tmp1[None, ...], tmp2[None, ...]], axis=0)
207 | mesh = [ProcessOneWall(xyz, point_idxs[x], layout_height)[None, ...] for x in range(label['layoutPoints']['num'])]
208 | mesh = np.concatenate(mesh, axis=0).reshape([-1, 3])
209 | top_xz = []
210 | for i, j in point_idxs:
211 | tmp = np.concatenate([xyz[i, ::2], xyz[j, ::2]])
212 | top_xz += tmp.tolist()
213 | indices = np.asarray(earcut(top_xz)).reshape([-1, 3])
214 | top_xz = np.asarray(top_xz).reshape([-1, 2])
215 | tmp = []
216 | for i in range(indices.shape[0]):
217 | a = indices[i, 0]
218 | b = indices[i, 1]
219 | c = indices[i, 2]
220 | tmp.append(np.concatenate([top_xz[a:a+1, :], top_xz[b:b+1, :], top_xz[c:c+1, :]], axis=0))
221 | tmp = np.concatenate(tmp, axis=0)
222 | ceiling_mesh = np.zeros([tmp.shape[0], 3], np.float32)
223 | ceiling_mesh[:, ::2] = tmp.copy()
224 | ceiling_mesh[:, 1] = -(layout_height - 1.6)
225 | floor_mesh = ceiling_mesh.copy()
226 | floor_mesh[:, 1] = 1.6
227 | #mesh = np.concatenate([mesh, ceiling_mesh, floor_mesh], axis=0)
228 | mesh = np.concatenate([mesh, floor_mesh], axis=0)
229 | return mesh
230 |
--------------------------------------------------------------------------------
/dataset/communal/read.py:
--------------------------------------------------------------------------------
1 | """
2 | @Date: 2021/07/28
3 | @description:
4 | """
5 | import os
6 | import numpy as np
7 | import cv2
8 | import json
9 | from PIL import Image
10 | from utils.conversion import xyz2uv, pixel2uv
11 | from utils.height import calc_ceil_ratio
12 |
13 |
14 | def read_image(image_path, shape=None):
15 | if shape is None:
16 | shape = [512, 1024]
17 | img = np.array(Image.open(image_path)).astype(np.float32) / 255
18 | if img.shape[0] != shape[0] or img.shape[1] != shape[1]:
19 | img = cv2.resize(img, dsize=tuple(shape[::-1]), interpolation=cv2.INTER_AREA)
20 |
21 | return np.array(img)
22 |
23 |
24 | def read_label(label_path, data_type='MP3D'):
25 |
26 | if data_type == 'MP3D':
27 | with open(label_path, 'r') as f:
28 | label = json.load(f)
29 | point_idx = [one['pointsIdx'][0] for one in label['layoutWalls']['walls']]
30 | camera_height = label['cameraHeight']
31 | room_height = label['layoutHeight']
32 | camera_ceiling_height = room_height - camera_height
33 | ratio = camera_ceiling_height / camera_height
34 |
35 | xyz = [one['xyz'] for one in label['layoutPoints']['points']]
36 | assert len(xyz) == len(point_idx), "len(xyz) != len(point_idx)"
37 | xyz = [xyz[i] for i in point_idx]
38 | xyz = np.asarray(xyz, dtype=np.float32)
39 | xyz[:, 2] *= -1
40 | xyz[:, 1] = camera_height
41 | corners = xyz2uv(xyz)
42 | elif data_type == 'Pano_S2D3D':
43 | with open(label_path, 'r') as f:
44 | lines = [line for line in f.readlines() if
45 | len([c for c in line.split(' ') if c[0].isnumeric()]) > 1]
46 |
47 | corners_list = np.array([line.strip().split() for line in lines], np.float32)
48 | uv_list = pixel2uv(corners_list)
49 | ceil_uv = uv_list[::2]
50 | floor_uv = uv_list[1::2]
51 | ratio = calc_ceil_ratio([ceil_uv, floor_uv], mode='mean')
52 | corners = floor_uv
53 | else:
54 | return None
55 |
56 | output = {
57 | 'ratio': np.array([ratio], dtype=np.float32),
58 | 'corners': corners,
59 | 'id': os.path.basename(label_path).split('.')[0]
60 | }
61 | return output
62 |
63 |
64 | def move_not_simple_image(data_dir, simple_panos):
65 | import shutil
66 | for house_index in os.listdir(data_dir):
67 | house_path = os.path.join(data_dir, house_index)
68 | if not os.path.isdir(house_path) or house_index == 'visualization':
69 | continue
70 |
71 | floor_plan_path = os.path.join(house_path, 'floor_plans')
72 | if os.path.exists(floor_plan_path):
73 | print(f'move:{floor_plan_path}')
74 | dst_floor_plan_path = floor_plan_path.replace('zind', 'zind2')
75 | os.makedirs(dst_floor_plan_path, exist_ok=True)
76 | shutil.move(floor_plan_path, dst_floor_plan_path)
77 |
78 | panos_path = os.path.join(house_path, 'panos')
79 | for pano in os.listdir(panos_path):
80 | pano_path = os.path.join(panos_path, pano)
81 | pano_index = '_'.join(pano.split('.')[0].split('_')[-2:])
82 | if f'{house_index}_{pano_index}' not in simple_panos and os.path.exists(pano_path):
83 | print(f'move:{pano_path}')
84 | dst_pano_path = pano_path.replace('zind', 'zind2')
85 | os.makedirs(os.path.dirname(dst_pano_path), exist_ok=True)
86 | shutil.move(pano_path, dst_pano_path)
87 |
88 |
89 | def read_zind(partition_path, simplicity_path, data_dir, mode, is_simple=True,
90 | layout_type='layout_raw', is_ceiling_flat=False, plan_y=1):
91 | with open(simplicity_path, 'r') as f:
92 | simple_tag = json.load(f)
93 | simple_panos = {}
94 | for k in simple_tag.keys():
95 | if not simple_tag[k]:
96 | continue
97 | split = k.split('_')
98 | house_index = split[0]
99 | pano_index = '_'.join(split[-2:])
100 | simple_panos[f'{house_index}_{pano_index}'] = True
101 |
102 | # move_not_simple_image(data_dir, simple_panos)
103 |
104 | pano_list = []
105 | with open(partition_path, 'r') as f1:
106 | house_list = json.load(f1)[mode]
107 |
108 | for house_index in house_list:
109 | with open(os.path.join(data_dir, house_index, f"zind_data.json"), 'r') as f2:
110 | data = json.load(f2)
111 |
112 | panos = []
113 | merger = data['merger']
114 | for floor in merger.values():
115 | for complete_room in floor.values():
116 | for partial_room in complete_room.values():
117 | for pano_index in partial_room:
118 | pano = partial_room[pano_index]
119 | pano['index'] = pano_index
120 | panos.append(pano)
121 |
122 | for pano in panos:
123 | if layout_type not in pano:
124 | continue
125 | pano_index = pano['index']
126 |
127 | if is_simple and f'{house_index}_{pano_index}' not in simple_panos.keys():
128 | continue
129 |
130 | if is_ceiling_flat and not pano['is_ceiling_flat']:
131 | continue
132 |
133 | layout = pano[layout_type]
134 | # corners
135 | corner_xz = np.array(layout['vertices'])
136 | corner_xz[..., 0] = -corner_xz[..., 0]
137 | corner_xyz = np.insert(corner_xz, 1, pano['camera_height'], axis=1)
138 | corners = xyz2uv(corner_xyz).astype(np.float32)
139 |
140 | # ratio
141 | ratio = np.array([(pano['ceiling_height'] - pano['camera_height']) / pano['camera_height']], dtype=np.float32)
142 |
143 | # Ours future work: detection window, door, opening
144 | objects = {
145 | 'windows': [],
146 | 'doors': [],
147 | 'openings': [],
148 | }
149 | for label_index, wdo_type in enumerate(["windows", "doors", "openings"]):
150 | if wdo_type not in layout:
151 | continue
152 |
153 | wdo_vertices = np.array(layout[wdo_type])
154 | if len(wdo_vertices) == 0:
155 | continue
156 |
157 | assert len(wdo_vertices) % 3 == 0
158 |
159 | for i in range(0, len(wdo_vertices), 3):
160 | # In the Zind dataset, the camera height is 1, and the default camera height in our code is also 1,
161 | # so the xyz coordinate here can be used directly
162 | # Since we're taking the opposite z-axis, we're changing the order of left and right
163 |
164 | left_bottom_xyz = np.array(
165 | [-wdo_vertices[i + 1][0], -wdo_vertices[i + 2][0], wdo_vertices[i + 1][1]])
166 | right_bottom_xyz = np.array(
167 | [-wdo_vertices[i][0], -wdo_vertices[i + 2][0], wdo_vertices[i][1]])
168 | center_bottom_xyz = (left_bottom_xyz + right_bottom_xyz) / 2
169 |
170 | center_top_xyz = center_bottom_xyz.copy()
171 | center_top_xyz[1] = -wdo_vertices[i + 2][1]
172 |
173 | center_boundary_xyz = center_bottom_xyz.copy()
174 | center_boundary_xyz[1] = plan_y
175 |
176 | uv = xyz2uv(np.array([left_bottom_xyz, right_bottom_xyz,
177 | center_bottom_xyz, center_top_xyz,
178 | center_boundary_xyz]))
179 |
180 | left_bottom_uv = uv[0]
181 | right_bottom_uv = uv[1]
182 | width_u = abs(right_bottom_uv[0] - left_bottom_uv[0])
183 | width_u = 1 - width_u if width_u > 0.5 else width_u
184 | assert width_u > 0, width_u
185 |
186 | center_bottom_uv = uv[2]
187 | center_top_uv = uv[3]
188 | height_v = center_bottom_uv[1] - center_top_uv[1]
189 |
190 | if height_v < 0:
191 | continue
192 |
193 | center_boundary_uv = uv[4]
194 | boundary_v = center_boundary_uv[1] - center_bottom_uv[1] if wdo_type == 'windows' else 0
195 | boundary_v = 0 if boundary_v < 0 else boundary_v
196 |
197 | center_u = center_bottom_uv[0]
198 |
199 | objects[wdo_type].append({
200 | 'width_u': width_u,
201 | 'height_v': height_v,
202 | 'boundary_v': boundary_v,
203 | 'center_u': center_u
204 | })
205 |
206 | pano_list.append({
207 | 'img_path': os.path.join(data_dir, house_index, pano['image_path']),
208 | 'corners': corners,
209 | 'objects': objects,
210 | 'ratio': ratio,
211 | 'id': f'{house_index}_{pano_index}',
212 | 'is_inside': pano['is_inside']
213 | })
214 | return pano_list
215 |
--------------------------------------------------------------------------------
/models/lgt_net.py:
--------------------------------------------------------------------------------
1 | import torch.nn
2 | import torch
3 | import torch.nn as nn
4 | import models.modules as modules
5 | import numpy as np
6 |
7 | from models.base_model import BaseModule
8 | from models.modules.horizon_net_feature_extractor import HorizonNetFeatureExtractor
9 | from models.modules.patch_feature_extractor import PatchFeatureExtractor
10 | from utils.conversion import uv2depth, get_u, lonlat2depth, get_lon, lonlat2uv
11 | from utils.height import calc_ceil_ratio
12 | from utils.misc import tensor2np
13 |
14 |
15 | class LGT_Net(BaseModule):
16 | def __init__(self, ckpt_dir=None, backbone='resnet50', dropout=0.0, output_name='LGT',
17 | decoder_name='Transformer', win_size=8, depth=6,
18 | ape=None, rpe=None, corner_heat_map=False, rpe_pos=1):
19 | super().__init__(ckpt_dir)
20 |
21 | self.patch_num = 256
22 | self.patch_dim = 1024
23 | self.decoder_name = decoder_name
24 | self.output_name = output_name
25 | self.corner_heat_map = corner_heat_map
26 | self.dropout_d = dropout
27 |
28 | if backbone == 'patch':
29 | self.feature_extractor = PatchFeatureExtractor(patch_num=self.patch_num, input_shape=[3, 512, 1024])
30 | else:
31 | # feature extractor
32 | self.feature_extractor = HorizonNetFeatureExtractor(backbone)
33 |
34 | if 'Transformer' in self.decoder_name:
35 | # transformer encoder
36 | transformer_dim = self.patch_dim
37 | transformer_layers = depth
38 | transformer_heads = 8
39 | transformer_head_dim = transformer_dim // transformer_heads
40 | transformer_ff_dim = 2048
41 | rpe = None if rpe == 'None' else rpe
42 | self.transformer = getattr(modules, decoder_name)(dim=transformer_dim, depth=transformer_layers,
43 | heads=transformer_heads, dim_head=transformer_head_dim,
44 | mlp_dim=transformer_ff_dim, win_size=win_size,
45 | dropout=self.dropout_d, patch_num=self.patch_num,
46 | ape=ape, rpe=rpe, rpe_pos=rpe_pos)
47 | elif self.decoder_name == 'LSTM':
48 | self.bi_rnn = nn.LSTM(input_size=self.feature_extractor.c_last,
49 | hidden_size=self.patch_dim // 2,
50 | num_layers=2,
51 | dropout=self.dropout_d,
52 | batch_first=False,
53 | bidirectional=True)
54 | self.drop_out = nn.Dropout(self.dropout_d)
55 | else:
56 | raise NotImplementedError("Only support *Transformer and LSTM")
57 |
58 | if self.output_name == 'LGT':
59 | # omnidirectional-geometry aware output
60 | self.linear_depth_output = nn.Linear(in_features=self.patch_dim, out_features=1)
61 | self.linear_ratio = nn.Linear(in_features=self.patch_dim, out_features=1)
62 | self.linear_ratio_output = nn.Linear(in_features=self.patch_num, out_features=1)
63 | elif self.output_name == 'LED' or self.output_name == 'Horizon':
64 | # horizon-depth or latitude output
65 | self.linear = nn.Linear(in_features=self.patch_dim, out_features=2)
66 | else:
67 | raise NotImplementedError("Unknown output")
68 |
69 | if self.corner_heat_map:
70 | # corners heat map output
71 | self.linear_corner_heat_map_output = nn.Linear(in_features=self.patch_dim, out_features=1)
72 |
73 | self.name = f"{self.decoder_name}_{self.output_name}_Net"
74 |
75 | def lgt_output(self, x):
76 | """
77 | :param x: [ b, 256(patch_num), 1024(d)]
78 | :return: {
79 | 'depth': [b, 256(patch_num & d)]
80 | 'ratio': [b, 1(d)]
81 | }
82 | """
83 | depth = self.linear_depth_output(x) # [b, 256(patch_num), 1(d)]
84 | depth = depth.view(-1, self.patch_num) # [b, 256(patch_num & d)]
85 |
86 | # ratio represent room height
87 | ratio = self.linear_ratio(x) # [b, 256(patch_num), 1(d)]
88 | ratio = ratio.view(-1, self.patch_num) # [b, 256(patch_num & d)]
89 | ratio = self.linear_ratio_output(ratio) # [b, 1(d)]
90 | output = {
91 | 'depth': depth,
92 | 'ratio': ratio
93 | }
94 | return output
95 |
96 | def led_output(self, x):
97 | """
98 | :param x: [ b, 256(patch_num), 1024(d)]
99 | :return: {
100 | 'depth': [b, 256(patch_num)]
101 | 'ceil_depth': [b, 256(patch_num)]
102 | 'ratio': [b, 1(d)]
103 | }
104 | """
105 | bon = self.linear(x) # [b, 256(patch_num), 2(d)]
106 | bon = bon.permute(0, 2, 1) # [b, 2(d), 256(patch_num)]
107 | bon = torch.sigmoid(bon)
108 |
109 | ceil_v = bon[:, 0, :] * -0.5 + 0.5 # [b, 256(patch_num)]
110 | floor_v = bon[:, 1, :] * 0.5 + 0.5 # [b, 256(patch_num)]
111 | u = get_u(w=self.patch_num, is_np=False, b=ceil_v.shape[0]).to(ceil_v.device)
112 | ceil_boundary = torch.stack((u, ceil_v), axis=-1) # [b, 256(patch_num), 2]
113 | floor_boundary = torch.stack((u, floor_v), axis=-1) # [b, 256(patch_num), 2]
114 | output = {
115 | 'depth': uv2depth(floor_boundary), # [b, 256(patch_num)]
116 | 'ceil_depth': uv2depth(ceil_boundary), # [b, 256(patch_num)]
117 | }
118 | # print(output['depth'].mean())
119 | if not self.training:
120 | # [b, 1(d)]
121 | output['ratio'] = calc_ceil_ratio([tensor2np(ceil_boundary), tensor2np(floor_boundary)], mode='lsq').reshape(-1, 1)
122 | return output
123 |
124 | def horizon_output(self, x):
125 | """
126 | :param x: [ b, 256(patch_num), 1024(d)]
127 | :return: {
128 | 'floor_boundary': [b, 256(patch_num)]
129 | 'ceil_boundary': [b, 256(patch_num)]
130 | }
131 | """
132 | bon = self.linear(x) # [b, 256(patch_num), 2(d)]
133 | bon = bon.permute(0, 2, 1) # [b, 2(d), 256(patch_num)]
134 |
135 | output = {
136 | 'boundary': bon
137 | }
138 | if not self.training:
139 | lon = get_lon(w=self.patch_num, is_np=False, b=bon.shape[0]).to(bon.device)
140 | floor_lat = torch.clip(bon[:, 0, :], 1e-4, np.pi / 2)
141 | ceil_lat = torch.clip(bon[:, 1, :], -np.pi / 2, -1e-4)
142 | floor_lonlat = torch.stack((lon, floor_lat), axis=-1) # [b, 256(patch_num), 2]
143 | ceil_lonlat = torch.stack((lon, ceil_lat), axis=-1) # [b, 256(patch_num), 2]
144 | output['depth'] = lonlat2depth(floor_lonlat)
145 | output['ratio'] = calc_ceil_ratio([tensor2np(lonlat2uv(ceil_lonlat)),
146 | tensor2np(lonlat2uv(floor_lonlat))], mode='mean').reshape(-1, 1)
147 | return output
148 |
149 | def forward(self, x):
150 | """
151 | :param x: [b, 3(d), 512(h), 1024(w)]
152 | :return: {
153 | 'depth': [b, 256(patch_num & d)]
154 | 'ratio': [b, 1(d)]
155 | }
156 | """
157 |
158 | # feature extractor
159 | x = self.feature_extractor(x) # [b 1024(d) 256(w)]
160 |
161 | if 'Transformer' in self.decoder_name:
162 | # transformer decoder
163 | x = x.permute(0, 2, 1) # [b 256(patch_num) 1024(d)]
164 | x = self.transformer(x) # [b 256(patch_num) 1024(d)]
165 | elif self.decoder_name == 'LSTM':
166 | # lstm decoder
167 | x = x.permute(2, 0, 1) # [256(patch_num), b, 1024(d)]
168 | self.bi_rnn.flatten_parameters()
169 | x, _ = self.bi_rnn(x) # [256(patch_num & seq_len), b, 1024(d)]
170 | x = x.permute(1, 0, 2) # [b, 256(patch_num), 1024(d)]
171 | x = self.drop_out(x)
172 |
173 | output = None
174 | if self.output_name == 'LGT':
175 | # plt output
176 | output = self.lgt_output(x)
177 |
178 | elif self.output_name == 'LED':
179 | # led output
180 | output = self.led_output(x)
181 |
182 | elif self.output_name == 'Horizon':
183 | # led output
184 | output = self.horizon_output(x)
185 |
186 | if self.corner_heat_map:
187 | corner_heat_map = self.linear_corner_heat_map_output(x) # [b, 256(patch_num), 1]
188 | corner_heat_map = corner_heat_map.view(-1, self.patch_num)
189 | corner_heat_map = torch.sigmoid(corner_heat_map)
190 | output['corner_heat_map'] = corner_heat_map
191 |
192 | return output
193 |
194 |
195 | if __name__ == '__main__':
196 | from PIL import Image
197 | import numpy as np
198 | from models.other.init_env import init_env
199 |
200 | init_env(0, deterministic=True)
201 |
202 | net = LGT_Net()
203 |
204 | total = sum(p.numel() for p in net.parameters())
205 | trainable = sum(p.numel() for p in net.parameters() if p.requires_grad)
206 | print('parameter total:{:,}, trainable:{:,}'.format(total, trainable))
207 |
208 | img = np.array(Image.open("../src/demo.png")).transpose((2, 0, 1))
209 | input = torch.Tensor([img]) # 1 3 512 1024
210 | output = net(input)
211 |
212 | print(output['depth'].shape) # 1 256
213 | print(output['ratio'].shape) # 1 1
214 |
--------------------------------------------------------------------------------
/postprocessing/dula/layout.py:
--------------------------------------------------------------------------------
1 | """
2 | @Date: 2021/10/06
3 | @description: Use the approach proposed by DuLa-Net
4 | """
5 | import cv2
6 | import numpy as np
7 | import math
8 | import matplotlib.pyplot as plt
9 | import sys
10 | import os.path as osp
11 | sys.path.append(osp.abspath(osp.join(__file__, '../../../')))
12 |
13 | from visualization.floorplan import draw_floorplan
14 |
15 | def calc_angle(v1: np.array, v2: np.array):
16 | norm = np.linalg.norm(v1) * np.linalg.norm(v2)
17 | theta = np.arccos(np.dot(v1, v2) / norm)
18 | return theta
19 |
20 | def merge_near(lst, diag, min):
21 | group = [[min, ]]
22 | for i in range(1, len(lst)):
23 | if lst[i][1] == 0 and lst[i][0] - np.mean(group[-1]) < diag * 0.02:
24 | group[-1].append(lst[i][0])
25 | else:
26 | group.append([lst[i][0], ])
27 | if len(group) == 1:
28 | group = [lst[0][0], lst[-1][0]]
29 | else:
30 | group = [int(np.mean(x)) for x in group]
31 | return group
32 |
33 |
34 | def fit_layout(floor_xz, need_cube=False, show=False, block_eps=5):
35 | show_radius = np.linalg.norm(floor_xz, axis=-1).max()
36 | side_l = 512
37 | floorplan = draw_floorplan(xz=floor_xz, show_radius=show_radius, show=show, scale=1, side_l=side_l).astype(np.uint8)
38 | center = np.array([side_l / 2, side_l / 2])
39 | polys = cv2.findContours(floorplan, 1, 2)
40 | if isinstance(polys, tuple):
41 | if len(polys) == 3:
42 | # opencv 3
43 | polys = list(polys[1])
44 | else:
45 | polys = list(polys[0])
46 | polys.sort(key=lambda x: cv2.contourArea(x), reverse=True)
47 | poly = polys[0]
48 | sub_x, sub_y, w, h = cv2.boundingRect(poly)
49 | floorplan_sub = floorplan[sub_y:sub_y + h, sub_x:sub_x + w]
50 | sub_center = center - np.array([sub_x, sub_y])
51 | polys = cv2.findContours(floorplan_sub, 1, 2)
52 | if isinstance(polys, tuple):
53 | if len(polys) == 3:
54 | polys = polys[1]
55 | else:
56 | polys = polys[0]
57 | poly = polys[0]
58 | epsilon = 0.005 * cv2.arcLength(poly, True)
59 | poly = cv2.approxPolyDP(poly, epsilon, True)
60 |
61 | x_lst = [[poly[:, 0, 0].min(), 0], ]
62 | y_lst = [[poly[:, 0, 1].min(), 0], ]
63 |
64 | ans = np.zeros((floorplan_sub.shape[0], floorplan_sub.shape[1]))
65 |
66 | for i in range(len(poly)):
67 | p1 = poly[i][0]
68 | p2 = poly[(i + 1) % len(poly)][0]
69 | # We added occlusion detection
70 | cp1 = p1 - sub_center
71 | cp2 = p2 - sub_center
72 | p12 = p2 - p1
73 | l1 = np.linalg.norm(cp1)
74 | l2 = np.linalg.norm(cp2)
75 | l3 = np.linalg.norm(p12)
76 | # We added occlusion detection
77 | is_block1 = np.rad2deg(calc_angle(cp1, cp2)) < block_eps
78 | is_block2 = np.rad2deg(calc_angle(cp2, p12)) < block_eps*2
79 | is_block3 = np.rad2deg(calc_angle(cp2, -p12)) < block_eps*2
80 | is_block = is_block1 and (is_block2 or is_block3)
81 |
82 | if (p2[0] - p1[0]) == 0:
83 | slope = 10
84 | else:
85 | slope = abs((p2[1] - p1[1]) / (p2[0] - p1[0]))
86 |
87 | if is_block:
88 | s = p1[1] if l1 < l2 else p2[1]
89 | y_lst.append([s, 1])
90 | s = p1[0] if l1 < l2 else p2[0]
91 | x_lst.append([s, 1])
92 |
93 | left = p1[0] if p1[0] < p2[0] else p2[0]
94 | right = p1[0] if p1[0] > p2[0] else p2[0]
95 | top = p1[1] if p1[1] < p2[1] else p2[1]
96 | bottom = p1[1] if p1[1] > p2[1] else p2[1]
97 | sample = floorplan_sub[top:bottom, left:right]
98 | score = 0 if sample.size == 0 else sample.mean()
99 | if score >= 0.3:
100 | ans[top:bottom, left:right] = 1
101 |
102 | else:
103 | if slope <= 1:
104 | s = int((p1[1] + p2[1]) / 2)
105 | y_lst.append([s, 0])
106 | elif slope > 1:
107 | s = int((p1[0] + p2[0]) / 2)
108 | x_lst.append([s, 0])
109 |
110 | debug_show = False
111 | if debug_show:
112 | plt.figure(dpi=300)
113 | plt.axis('off')
114 | a = cv2.drawMarker(floorplan_sub.copy()*0.5, tuple(sub_center.astype(int)), [1], markerType=0, markerSize=10, thickness=2)
115 | plt.imshow(cv2.drawContours(a, [poly], 0, 1, 1))
116 | plt.savefig('src/1.png', bbox_inches='tight', transparent=True, pad_inches=0)
117 | plt.show()
118 |
119 | plt.figure(dpi=300)
120 | plt.axis('off')
121 | a = cv2.drawMarker(ans.copy()*0.5, tuple(sub_center.astype(int)), [1], markerType=0, markerSize=10, thickness=2)
122 | plt.imshow(cv2.drawContours(a, [poly], 0, 1, 1))
123 | # plt.show()
124 | plt.savefig('src/2.png', bbox_inches='tight', transparent=True, pad_inches=0)
125 | plt.show()
126 |
127 | x_lst.append([poly[:, 0, 0].max(), 0])
128 | y_lst.append([poly[:, 0, 1].max(), 0])
129 | x_lst.sort(key=lambda x: x[0])
130 | y_lst.sort(key=lambda x: x[0])
131 |
132 | diag = math.sqrt(math.pow(floorplan_sub.shape[1], 2) + math.pow(floorplan_sub.shape[0], 2))
133 | x_lst = merge_near(x_lst, diag, poly[:, 0, 0].min())
134 | y_lst = merge_near(y_lst, diag, poly[:, 0, 1].min())
135 | if need_cube and len(x_lst) > 2:
136 | x_lst = [x_lst[0], x_lst[-1]]
137 | if need_cube and len(y_lst) > 2:
138 | y_lst = [y_lst[0], y_lst[-1]]
139 |
140 | for i in range(len(x_lst) - 1):
141 | for j in range(len(y_lst) - 1):
142 | sample = floorplan_sub[y_lst[j]:y_lst[j + 1], x_lst[i]:x_lst[i + 1]]
143 | score = 0 if sample.size == 0 else sample.mean()
144 | if score >= 0.3:
145 | ans[y_lst[j]:y_lst[j + 1], x_lst[i]:x_lst[i + 1]] = 1
146 |
147 | if debug_show:
148 | plt.figure(dpi=300)
149 | plt.axis('off')
150 | a = cv2.drawMarker(ans.copy() * 0.5, tuple(sub_center.astype(int)), [1],
151 | markerType=0, markerSize=10, thickness=2)
152 | plt.imshow(cv2.drawContours(a, [poly], 0, 1, 1))
153 | # plt.show()
154 | plt.savefig('src/3.png', bbox_inches='tight', transparent=True, pad_inches=0)
155 | plt.show()
156 |
157 | pred = np.uint8(ans)
158 | pred_polys = cv2.findContours(pred, 1, 3)
159 | if isinstance(pred_polys, tuple):
160 | if len(pred_polys) == 3:
161 | pred_polys = pred_polys[1]
162 | else:
163 | pred_polys = pred_polys[0]
164 |
165 | pred_polys.sort(key=lambda x: cv2.contourArea(x), reverse=True)
166 | pred_poly = pred_polys[0]
167 | # findContours may produce errors, which are enforced here
168 | for i in range(len(pred_poly)):
169 | p1 = pred_poly[i][0]
170 | p2 = pred_poly[(i+1)%len(pred_poly)][0]
171 | if abs(p1[0] - p2[0]) < abs(p1[1] - p2[1]):
172 | p1[0] = p2[0]
173 | else:
174 | p1[1] = p2[1]
175 |
176 | if debug_show:
177 | plt.figure(dpi=300)
178 | plt.axis('off')
179 | a = cv2.drawMarker(ans.copy() * 0.5, tuple(sub_center.astype(int)), [1],
180 | markerType=0, markerSize=10, thickness=2)
181 | a = cv2.drawContours(a, [poly], 0, 0.8, 1)
182 | a = cv2.drawContours(a, [pred_poly], 0, 1, 1)
183 | plt.imshow(a)
184 | # plt.show()
185 | plt.savefig('src/4.png', bbox_inches='tight', transparent=True, pad_inches=0)
186 | plt.show()
187 |
188 | polygon = [(p[0][1], p[0][0]) for p in pred_poly[::-1]]
189 |
190 | v = np.array([p[0] + sub_y for p in polygon])
191 | u = np.array([p[1] + sub_x for p in polygon])
192 | # side_l
193 | # v<-----------|o
194 | # | | |
195 | # | ----|----z | side_l
196 | # | | |
197 | # | x \|/
198 | # |------------u
199 | side_l = floorplan.shape[0]
200 | pred_xz = np.concatenate((u[:, np.newaxis] - side_l // 2, side_l // 2 - v[:, np.newaxis]), axis=1)
201 |
202 | pred_xz = pred_xz * show_radius / (side_l // 2)
203 | if show:
204 | draw_floorplan(pred_xz, show_radius=show_radius, show=show)
205 |
206 | show_process = False
207 | if show_process:
208 | img = np.zeros((floorplan_sub.shape[0], floorplan_sub.shape[1], 3))
209 | for x in x_lst:
210 | cv2.line(img, (x, 0), (x, floorplan_sub.shape[0]), (0, 255, 0), 1)
211 | for y in y_lst:
212 | cv2.line(img, (0, y), (floorplan_sub.shape[1], y), (255, 0, 0), 1)
213 |
214 | fig = plt.figure()
215 | plt.axis('off')
216 | ax1 = fig.add_subplot(2, 2, 1)
217 | ax1.imshow(floorplan)
218 | ax3 = fig.add_subplot(2, 2, 2)
219 | ax3.imshow(floorplan_sub)
220 | ax4 = fig.add_subplot(2, 2, 3)
221 | ax4.imshow(img)
222 | ax5 = fig.add_subplot(2, 2, 4)
223 | ax5.imshow(ans)
224 | plt.show()
225 |
226 | return pred_xz
227 |
228 |
229 | if __name__ == '__main__':
230 | # processed_xz = fit_layout(floor_xz=np.load('/room_layout_estimation/lgt_net/floor_xz.npy'), need_cube=False, show=False)
231 |
232 | from utils.conversion import uv2xyz
233 |
234 | pano_img = np.zeros([512, 1024, 3])
235 | corners = np.array([[0.1, 0.7],
236 | [0.4, 0.7],
237 | [0.3, 0.6],
238 | [0.6, 0.6],
239 | [0.8, 0.7]])
240 | xz = uv2xyz(corners)[..., ::2]
241 | draw_floorplan(xz, show=True, marker_color=None, center_color=0.8)
242 |
243 | xz = fit_layout(xz)
244 | draw_floorplan(xz, show=True, marker_color=None, center_color=0.8)
245 |
--------------------------------------------------------------------------------