├── dataset ├── __init__.py ├── communal │ ├── __init__.py │ ├── base_dataset.py │ └── read.py ├── mp3d_dataset.py ├── build.py ├── pano_s2d3d_dataset.py ├── pano_s2d3d_mix_dataset.py └── zind_dataset.py ├── visualization ├── visualizer │ ├── Viewer │ │ ├── __init__.py │ │ └── Utils.py │ ├── earcut │ │ └── __init__.py │ ├── __init__.py │ ├── .gitignore │ ├── src │ │ ├── demo.png │ │ ├── 3Dlayout.png │ │ └── example.jpg │ ├── requirements.txt │ ├── shader │ │ ├── __init__.py │ │ ├── vertex_line.py │ │ ├── vertex_pano.py │ │ ├── geometry_line.py │ │ ├── fragment_line.py │ │ ├── fragment_pano.py │ │ ├── vertex_line.glsl │ │ ├── vertex_pano.glsl │ │ ├── fragment_line.glsl │ │ ├── geometry_line.glsl │ │ └── fragment_pano.glsl │ ├── README.md │ ├── LICENSE │ └── visualizer.py ├── __init__.py ├── obj3d.py ├── grad.py ├── floorplan.py └── boundary.py ├── .gitignore ├── models ├── __init__.py ├── other │ ├── __init__.py │ ├── optimizer.py │ ├── init_env.py │ ├── scheduler.py │ └── criterion.py ├── modules │ ├── __init__.py │ ├── swin_transformer.py │ ├── transformer.py │ ├── patch_feature_extractor.py │ ├── swg_transformer.py │ └── conv_transformer.py ├── build.py ├── base_model.py └── lgt_net.py ├── utils ├── __init__.py ├── misc.py ├── time_watch.py ├── writer.py ├── logger.py └── height.py ├── config └── __init__.py ├── evaluation ├── __init__.py ├── eval_visible_iou.py ├── f1_score.py ├── analyse_layout_type.py └── iou.py ├── postprocessing ├── __init__.py ├── dula │ ├── __init__.py │ ├── layout_old.py │ └── layout.py └── post_process.py ├── preprocessing ├── __init__.py └── filter.py ├── src ├── demo │ ├── demo.png │ ├── demo1.png │ └── demo.json ├── fig │ ├── network.png │ ├── demo1_pred.png │ └── post_processing │ │ ├── img_0.png │ │ ├── img_1.png │ │ ├── img_2.png │ │ ├── img_3.png │ │ ├── original.png │ │ └── optimized.png └── config │ ├── other │ ├── led_net_zind.yaml │ ├── horizon_net_zind.yaml │ ├── led_net_mp3d.yaml │ └── horizon_net_mp3d.yaml │ ├── ablation_study │ ├── w_lstm.yaml │ ├── wo_height.yaml │ ├── wo_nomal_gradient.yaml │ ├── wo_pe.yaml │ ├── full.yaml │ ├── w_g_rpe1.yaml │ ├── w_g_rpe2.yaml │ ├── w_ape.yaml │ ├── wo_global.yaml │ ├── wo_window.yaml │ ├── wo_gradient.yaml │ ├── w_vit.yaml │ └── w_vit_zind.yaml │ ├── zind.yaml │ ├── mp3d.yaml │ ├── pano.yaml │ ├── s2d3d.yaml │ └── mp3d_scheduler.yaml ├── loss ├── __init__.py ├── led_loss.py ├── object_loss.py ├── boundary_loss.py └── grad_loss.py ├── requirements.txt ├── LICENSE ├── Post-Porcessing.md ├── convert_ckpt.py └── app.py /dataset/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /visualization/visualizer/Viewer/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | src/output 2 | checkpoints 3 | flagged 4 | -------------------------------------------------------------------------------- /models/__init__.py: -------------------------------------------------------------------------------- 1 | from models.lgt_net import LGT_Net 2 | -------------------------------------------------------------------------------- /utils/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | @date: 2021/06/19 3 | @description: 4 | """ -------------------------------------------------------------------------------- /visualization/visualizer/earcut/__init__.py: -------------------------------------------------------------------------------- 1 | from .earcut import * -------------------------------------------------------------------------------- /config/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | @Date: 2021/07/17 3 | @description: 4 | """ 5 | -------------------------------------------------------------------------------- /evaluation/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | @date: 2021/6/29 3 | @description: 4 | """ 5 | -------------------------------------------------------------------------------- /visualization/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | @date: 2021/06/19 3 | @description: 4 | """ -------------------------------------------------------------------------------- /models/other/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | @Date: 2021/07/18 3 | @description: 4 | """ 5 | -------------------------------------------------------------------------------- /postprocessing/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | @Date: 2021/10/06 3 | @description: 4 | """ 5 | -------------------------------------------------------------------------------- /preprocessing/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | @date: 2021/7/5 3 | @description: 4 | """ 5 | -------------------------------------------------------------------------------- /dataset/communal/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | @Date: 2021/09/22 3 | @description: 4 | """ 5 | -------------------------------------------------------------------------------- /postprocessing/dula/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | @Date: 2021/10/06 3 | @description: 4 | """ 5 | -------------------------------------------------------------------------------- /src/demo/demo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhigangjiang/LGT-Net/HEAD/src/demo/demo.png -------------------------------------------------------------------------------- /src/demo/demo1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhigangjiang/LGT-Net/HEAD/src/demo/demo1.png -------------------------------------------------------------------------------- /visualization/visualizer/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | @Date: 2021/11/06 3 | @description: 4 | """ 5 | -------------------------------------------------------------------------------- /src/fig/network.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhigangjiang/LGT-Net/HEAD/src/fig/network.png -------------------------------------------------------------------------------- /src/fig/demo1_pred.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhigangjiang/LGT-Net/HEAD/src/fig/demo1_pred.png -------------------------------------------------------------------------------- /visualization/visualizer/.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | *.swp 3 | __pycache__ 4 | .DS_store 5 | data*/ 6 | paper_tools/*/ 7 | -------------------------------------------------------------------------------- /src/fig/post_processing/img_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhigangjiang/LGT-Net/HEAD/src/fig/post_processing/img_0.png -------------------------------------------------------------------------------- /src/fig/post_processing/img_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhigangjiang/LGT-Net/HEAD/src/fig/post_processing/img_1.png -------------------------------------------------------------------------------- /src/fig/post_processing/img_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhigangjiang/LGT-Net/HEAD/src/fig/post_processing/img_2.png -------------------------------------------------------------------------------- /src/fig/post_processing/img_3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhigangjiang/LGT-Net/HEAD/src/fig/post_processing/img_3.png -------------------------------------------------------------------------------- /src/fig/post_processing/original.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhigangjiang/LGT-Net/HEAD/src/fig/post_processing/original.png -------------------------------------------------------------------------------- /src/fig/post_processing/optimized.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhigangjiang/LGT-Net/HEAD/src/fig/post_processing/optimized.png -------------------------------------------------------------------------------- /visualization/visualizer/src/demo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhigangjiang/LGT-Net/HEAD/visualization/visualizer/src/demo.png -------------------------------------------------------------------------------- /visualization/visualizer/src/3Dlayout.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhigangjiang/LGT-Net/HEAD/visualization/visualizer/src/3Dlayout.png -------------------------------------------------------------------------------- /visualization/visualizer/src/example.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhigangjiang/LGT-Net/HEAD/visualization/visualizer/src/example.jpg -------------------------------------------------------------------------------- /visualization/visualizer/requirements.txt: -------------------------------------------------------------------------------- 1 | opencv-python 2 | PyQt5 3 | PyOpenGL 4 | pyglm==1.99.0 5 | numpy 6 | scipy 7 | matplotlib 8 | imageio 9 | -------------------------------------------------------------------------------- /visualization/visualizer/shader/__init__.py: -------------------------------------------------------------------------------- 1 | from . import vertex_pano, fragment_pano 2 | from . import vertex_line, fragment_line 3 | from . import geometry_line 4 | -------------------------------------------------------------------------------- /visualization/visualizer/shader/vertex_line.py: -------------------------------------------------------------------------------- 1 | import os 2 | dirname = os.path.dirname(os.path.abspath(__file__)) 3 | with open('%s/vertex_line.glsl'%(dirname), 'r') as f: 4 | src = f.read() 5 | 6 | -------------------------------------------------------------------------------- /visualization/visualizer/shader/vertex_pano.py: -------------------------------------------------------------------------------- 1 | import os 2 | dirname = os.path.dirname(os.path.abspath(__file__)) 3 | with open('%s/vertex_pano.glsl'%(dirname), 'r') as f: 4 | src = f.read() 5 | 6 | -------------------------------------------------------------------------------- /visualization/visualizer/shader/geometry_line.py: -------------------------------------------------------------------------------- 1 | import os 2 | dirname = os.path.dirname(os.path.abspath(__file__)) 3 | with open('%s/geometry_line.glsl'%(dirname), 'r') as f: 4 | src = f.read() 5 | 6 | -------------------------------------------------------------------------------- /visualization/visualizer/shader/fragment_line.py: -------------------------------------------------------------------------------- 1 | import os 2 | dirname = os.path.dirname(os.path.abspath(__file__)) 3 | 4 | with open('%s/fragment_line.glsl'%(dirname), 'r') as f: 5 | src = f.read() 6 | 7 | 8 | -------------------------------------------------------------------------------- /visualization/visualizer/shader/fragment_pano.py: -------------------------------------------------------------------------------- 1 | import os 2 | dirname = os.path.dirname(os.path.abspath(__file__)) 3 | 4 | with open('%s/fragment_pano.glsl'%(dirname), 'r') as f: 5 | src = f.read() 6 | 7 | 8 | -------------------------------------------------------------------------------- /models/modules/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | @Date: 2021/09/01 3 | @description: 4 | """ 5 | 6 | from models.modules.swin_transformer import Swin_Transformer 7 | from models.modules.swg_transformer import SWG_Transformer 8 | from models.modules.transformer import Transformer 9 | -------------------------------------------------------------------------------- /loss/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | @date: 2021/7/19 3 | @description: 4 | """ 5 | 6 | from torch.nn import L1Loss 7 | from loss.led_loss import LEDLoss 8 | from loss.grad_loss import GradLoss 9 | from loss.boundary_loss import BoundaryLoss 10 | from loss.object_loss import ObjectLoss, HeatmapLoss 11 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | numpy==1.21.0 2 | torch==1.7.1 3 | torchvision==0.8.2 4 | yacs==0.1.8 5 | einops==0.3.0 6 | opencv-python==4.5.3.56 7 | pylsd-nova==1.2.0 8 | tqdm==4.64.0 9 | scipy==1.8.1 10 | termcolor==1.1.0 11 | shapely==1.8.2 12 | imageio==2.19.2 13 | open3d==0.15.2 14 | gdown==4.4.0 15 | gradio==3.0.5 -------------------------------------------------------------------------------- /visualization/visualizer/shader/vertex_line.glsl: -------------------------------------------------------------------------------- 1 | #version 410 2 | layout(location=0) in vec3 iv3vertex; 3 | 4 | uniform mat4 um4p; 5 | uniform mat4 um4v; 6 | uniform mat4 um4m; 7 | 8 | void main(){ 9 | //gl_Position = um4p * um4v * um4m * vec4(iv3vertex[0], iv3vertex[1], iv3vertex[2], 1.0); 10 | gl_Position = vec4(iv3vertex*0.999, 1.0); 11 | } 12 | -------------------------------------------------------------------------------- /visualization/visualizer/shader/vertex_pano.glsl: -------------------------------------------------------------------------------- 1 | #version 410 2 | layout(location = 0) in vec3 iv3vertex; 3 | 4 | uniform mat4 um4p; 5 | uniform mat4 um4v; 6 | uniform mat4 um4m; 7 | out vec3 modelPosition; 8 | 9 | void main(){ 10 | gl_Position = um4p * um4v * um4m * vec4(iv3vertex, 1.0); 11 | //gl_Position = vec4(iv3vertex, 1.0); 12 | modelPosition = iv3vertex; 13 | } 14 | -------------------------------------------------------------------------------- /visualization/visualizer/shader/fragment_line.glsl: -------------------------------------------------------------------------------- 1 | #version 410 2 | #define pi 3.14159265359 3 | layout(location = 0) out vec4 fragColor; 4 | uniform int um4f; 5 | 6 | void main(){ 7 | if (um4f==0) 8 | fragColor = vec4(vec3(255, 250, 84)/255.0, 1.0); 9 | else if(um4f==1) 10 | fragColor = vec4(0, 0, 1, 1.0); 11 | else 12 | fragColor = vec4(vec3(154, 255, 154)/255.0, 1.0); 13 | 14 | fragColor = vec4(0.5, 0.5, 0.5, 1.0); 15 | } 16 | -------------------------------------------------------------------------------- /utils/misc.py: -------------------------------------------------------------------------------- 1 | """ 2 | @date: 2021/8/4 3 | @description: 4 | """ 5 | import numpy as np 6 | import torch 7 | 8 | 9 | def tensor2np(t: torch.Tensor) -> np.array: 10 | if isinstance(t, torch.Tensor): 11 | if t.device == 'cpu': 12 | return t.detach().numpy() 13 | else: 14 | return t.detach().cpu().numpy() 15 | else: 16 | return t 17 | 18 | 19 | def tensor2np_d(d: dict) -> dict: 20 | output = {} 21 | for k in d.keys(): 22 | output[k] = tensor2np(d[k]) 23 | return output 24 | -------------------------------------------------------------------------------- /utils/time_watch.py: -------------------------------------------------------------------------------- 1 | """ 2 | @Date: 2021/07/18 3 | @description: 4 | """ 5 | import time 6 | 7 | 8 | class TimeWatch: 9 | def __init__(self, name="", logger=None): 10 | self.name = name 11 | self.start = time.time() 12 | self.logger = logger 13 | 14 | def __del__(self): 15 | end = time.time() 16 | output = f"{self.name} | time use {(end - self.start):.2f}s." 17 | if self.logger: 18 | self.logger.info(output) 19 | else: 20 | print(output) 21 | 22 | 23 | if __name__ == '__main__': 24 | w = TimeWatch("__main__") 25 | time.sleep(2) -------------------------------------------------------------------------------- /src/config/other/led_net_zind.yaml: -------------------------------------------------------------------------------- 1 | COMMENT: 'Using our framework to implement LED2-Net, Training on ZInd' 2 | TAG: 'zind' 3 | SEED: 123 4 | MODEL: 5 | NAME: 'LGT_Net' 6 | ARGS: [ { 7 | 'decoder_name': 'LSTM', 8 | 'output_name': 'LED', 9 | 'dropout': 0.5, 10 | } ] 11 | TRAIN: 12 | DEVICE: 'cuda:0' 13 | SCRATCH: False 14 | DETERMINISTIC: True 15 | CRITERION: 16 | DEPTH: 17 | WEIGHT: 1.0 18 | LOSS: 'LEDLoss' 19 | NEED_ALL: True 20 | BASE_LR: 21 | 3e-4 22 | EPOCHS: 200 23 | RESUME_LAST: False 24 | OPTIMIZER: 25 | NAME: 'adam' 26 | DATA: 27 | DATASET: 'zind' 28 | DIR: 'src/dataset/zind' 29 | BATCH_SIZE: 6 30 | NUM_WORKERS: 6 31 | FOR_TEST_INDEX: None 32 | AUG: 33 | FLIP: True 34 | STRETCH: True 35 | ROTATE: True 36 | GAMMA: True 37 | AMP_OPT_LEVEL: 'O0' -------------------------------------------------------------------------------- /src/config/other/horizon_net_zind.yaml: -------------------------------------------------------------------------------- 1 | COMMENT: 'Using our framework to implement HorizonNet, Training on ZInd' 2 | TAG: 'zind' 3 | SEED: 123 4 | MODEL: 5 | NAME: 'LGT_Net' 6 | ARGS: [ { 7 | 'decoder_name': 'LSTM', 8 | 'output_name': 'Horizon', 9 | 'dropout': 0.5, 10 | } ] 11 | TRAIN: 12 | DEVICE: 'cuda:0' 13 | SCRATCH: False 14 | DETERMINISTIC: True 15 | CRITERION: 16 | DEPTH: 17 | WEIGHT: 1.0 18 | LOSS: 'BoundaryLoss' 19 | NEED_ALL: True 20 | BASE_LR: 21 | 3e-4 22 | EPOCHS: 200 23 | RESUME_LAST: True 24 | OPTIMIZER: 25 | NAME: 'adam' 26 | DATA: 27 | DATASET: 'zind' 28 | DIR: 'src/dataset/zind' 29 | BATCH_SIZE: 6 30 | NUM_WORKERS: 6 31 | FOR_TEST_INDEX: None 32 | AUG: 33 | FLIP: True 34 | STRETCH: True 35 | ROTATE: True 36 | GAMMA: True 37 | AMP_OPT_LEVEL: 'O0' -------------------------------------------------------------------------------- /src/config/other/led_net_mp3d.yaml: -------------------------------------------------------------------------------- 1 | COMMENT: 'Using our framework to implement LED2-Net, Training on MatterportLayout' 2 | TAG: 'mp3d' 3 | SEED: 123 4 | MODEL: 5 | NAME: 'LGT_Net' 6 | ARGS: [ { 7 | 'decoder_name': 'LSTM', 8 | 'output_name': 'LED', 9 | 'dropout': 0.5, 10 | } ] 11 | TRAIN: 12 | DEVICE: 'cuda:0' 13 | SCRATCH: False 14 | DETERMINISTIC: True 15 | CRITERION: 16 | DEPTH: 17 | WEIGHT: 1.0 18 | LOSS: 'LEDLoss' 19 | NEED_ALL: True 20 | BASE_LR: 21 | 3e-4 22 | EPOCHS: 1000 23 | RESUME_LAST: False 24 | OPTIMIZER: 25 | NAME: 'adam' 26 | DATA: 27 | DATASET: 'mp3d' 28 | DIR: 'src/dataset/mp3d' 29 | BATCH_SIZE: 6 30 | NUM_WORKERS: 6 31 | FOR_TEST_INDEX: None 32 | AUG: 33 | FLIP: True 34 | STRETCH: True 35 | ROTATE: True 36 | GAMMA: True 37 | AMP_OPT_LEVEL: 'O0' -------------------------------------------------------------------------------- /src/config/other/horizon_net_mp3d.yaml: -------------------------------------------------------------------------------- 1 | COMMENT: 'Using our framework to implement HorizonNet, Training on MatterportLayout' 2 | TAG: 'mp3d' 3 | SEED: 123 4 | MODEL: 5 | NAME: 'LGT_Net' 6 | ARGS: [ { 7 | 'decoder_name': 'LSTM', 8 | 'output_name': 'Horizon', 9 | 'dropout': 0.5, 10 | } ] 11 | TRAIN: 12 | DEVICE: 'cuda:0' 13 | SCRATCH: False 14 | DETERMINISTIC: True 15 | CRITERION: 16 | DEPTH: 17 | WEIGHT: 1.0 18 | LOSS: 'BoundaryLoss' 19 | NEED_ALL: True 20 | BASE_LR: 21 | 3e-4 22 | EPOCHS: 1000 23 | RESUME_LAST: True 24 | OPTIMIZER: 25 | NAME: 'adam' 26 | DATA: 27 | DATASET: 'mp3d' 28 | DIR: 'src/dataset/mp3d' 29 | BATCH_SIZE: 6 30 | NUM_WORKERS: 6 31 | FOR_TEST_INDEX: None 32 | AUG: 33 | FLIP: True 34 | STRETCH: True 35 | ROTATE: True 36 | GAMMA: True 37 | AMP_OPT_LEVEL: 'O0' -------------------------------------------------------------------------------- /visualization/visualizer/README.md: -------------------------------------------------------------------------------- 1 | # 360LayoutVisualizer 2 | 3 | This repo is a visualization tool for 360 Manhattan layout based on PyQt5 and OpenGL. The layout format follows LayoutMP3D. 4 |

5 | 6 | First, install the corresponding packages with the following command. 7 | ```bash 8 | pip install -r requirements.txt 9 | ``` 10 | Then, run the script for the visualization of our provided example. 11 | ```bash 12 | python visualizer.py --img src/example.jpg --json src/example.json 13 | ``` 14 | You can use mouse and keyboard to control the camera. 15 | ```yaml 16 | w, a, s, d: translate the camera 17 | left-click: rotate the camera 18 | scroll: zoom in/out 19 | ``` 20 |

21 | -------------------------------------------------------------------------------- /src/config/ablation_study/w_lstm.yaml: -------------------------------------------------------------------------------- 1 | COMMENT: 'Ablation Study: w/ Bi-LSTM on MatterportLayout' 2 | VAL_NAME: 'test' 3 | TAG: 'ablation_study_w_lstm' 4 | SEED: 123 5 | MODEL: 6 | NAME: 'LGT_Net' 7 | ARGS: [{ 8 | 'decoder_name': 'LSTM', 9 | 'output_name': 'LGT', 10 | 'dropout': 0.5, 11 | }] 12 | TRAIN: 13 | DEVICE: 'cuda:0' 14 | SCRATCH: False 15 | DETERMINISTIC: True 16 | CRITERION: 17 | DEPTH: 18 | WEIGHT: 0.9 19 | RATIO: 20 | WEIGHT: 0.1 21 | GRAD: 22 | WEIGHT: 0.1 23 | WEIGHTS: [ 1.0, 1.0 ] 24 | BASE_LR: 25 | 1e-04 26 | RESUME_LAST: False 27 | OPTIMIZER: 28 | NAME: 'adam' 29 | EPOCHS: 1000 30 | DATA: 31 | DATASET: 'mp3d' 32 | DIR: 'src/dataset/mp3d' 33 | BATCH_SIZE: 6 34 | NUM_WORKERS: 6 35 | FOR_TEST_INDEX: None 36 | AUG: 37 | FLIP: True 38 | STRETCH: True 39 | ROTATE: True 40 | GAMMA: True 41 | AMP_OPT_LEVEL: 'O0' 42 | -------------------------------------------------------------------------------- /src/config/zind.yaml: -------------------------------------------------------------------------------- 1 | COMMENT: 'Training on ZInd' 2 | TAG: 'zind' 3 | SEED: 123 4 | MODEL: 5 | NAME: 'LGT_Net' 6 | ARGS: [ { 7 | 'decoder_name': 'SWG_Transformer', 8 | 'win_size': 16, 9 | 'rpe': 'lr_parameter_mirror', 10 | 'dropout': 0.0, 11 | 'depth': 6, 12 | 'output_name': 'LGT' 13 | } ] 14 | TRAIN: 15 | DEVICE: 'cuda:0' 16 | SCRATCH: False 17 | DETERMINISTIC: True 18 | CRITERION: 19 | DEPTH: 20 | WEIGHT: 0.9 21 | RATIO: 22 | WEIGHT: 0.1 23 | GRAD: 24 | WEIGHT: 0.1 25 | WEIGHTS: [ 1.0, 1.0 ] 26 | BASE_LR: 27 | 1e-4 28 | RESUME_LAST: False 29 | OPTIMIZER: 30 | NAME: 'adam' 31 | EPOCHS: 1000 32 | DATA: 33 | DATASET: 'zind' 34 | DIR: 'src/dataset/zind' 35 | BATCH_SIZE: 6 36 | NUM_WORKERS: 6 37 | FOR_TEST_INDEX: None 38 | AUG: 39 | FLIP: True 40 | STRETCH: True 41 | ROTATE: True 42 | GAMMA: True 43 | AMP_OPT_LEVEL: 'O0' 44 | -------------------------------------------------------------------------------- /src/config/ablation_study/wo_height.yaml: -------------------------------------------------------------------------------- 1 | COMMENT: 'Ablation Study: w/o Height on MatterportLayout' 2 | VAL_NAME: 'test' 3 | TAG: 'ablation_study_wo_height' 4 | SEED: 123 5 | MODEL: 6 | NAME: 'LGT_Net' 7 | ARGS: [ { 8 | 'decoder_name': 'SWG_Transformer', 9 | 'win_size': 16, 10 | 'rpe': 'lr_parameter_mirror', 11 | 'dropout': 0.0, 12 | 'output_name': 'LED' 13 | } ] 14 | TRAIN: 15 | DEVICE: 'cuda:0' 16 | SCRATCH: False 17 | DETERMINISTIC: True 18 | CRITERION: 19 | DEPTH: 20 | WEIGHT: 1.0 21 | LOSS: 'LEDLoss' 22 | NEED_ALL: True 23 | BASE_LR: 24 | 1e-4 25 | RESUME_LAST: False 26 | OPTIMIZER: 27 | NAME: 'sgd' 28 | EPOCHS: 1000 29 | DATA: 30 | DATASET: 'mp3d' 31 | DIR: 'src/dataset/mp3d' 32 | BATCH_SIZE: 6 33 | NUM_WORKERS: 6 34 | FOR_TEST_INDEX: None 35 | AUG: 36 | FLIP: True 37 | STRETCH: True 38 | ROTATE: True 39 | GAMMA: True 40 | AMP_OPT_LEVEL: 'O0' -------------------------------------------------------------------------------- /src/config/mp3d.yaml: -------------------------------------------------------------------------------- 1 | COMMENT: 'Training on MatterportLayout' 2 | TAG: 'mp3d' 3 | SEED: 123 4 | MODEL: 5 | NAME: 'LGT_Net' 6 | ARGS: [ { 7 | 'decoder_name': 'SWG_Transformer', 8 | 'win_size': 16, 9 | 'rpe': 'lr_parameter_mirror', 10 | 'dropout': 0.0, 11 | 'depth': 8, 12 | 'output_name': 'LGT' 13 | } ] 14 | TRAIN: 15 | DEVICE: 'cuda:0' 16 | SCRATCH: False 17 | DETERMINISTIC: True 18 | CRITERION: 19 | DEPTH: 20 | WEIGHT: 0.9 21 | RATIO: 22 | WEIGHT: 0.1 23 | GRAD: 24 | WEIGHT: 0.1 25 | WEIGHTS: [ 1.0, 1.0 ] 26 | BASE_LR: 27 | 1e-4 28 | RESUME_LAST: False 29 | OPTIMIZER: 30 | NAME: 'adam' 31 | EPOCHS: 1000 32 | DATA: 33 | DATASET: 'mp3d' 34 | DIR: 'src/dataset/mp3d' 35 | BATCH_SIZE: 6 36 | NUM_WORKERS: 6 37 | FOR_TEST_INDEX: None 38 | AUG: 39 | FLIP: True 40 | STRETCH: True 41 | ROTATE: True 42 | GAMMA: True 43 | AMP_OPT_LEVEL: 'O0' 44 | -------------------------------------------------------------------------------- /src/config/ablation_study/wo_nomal_gradient.yaml: -------------------------------------------------------------------------------- 1 | COMMENT: 'Ablation Study: w/o Normal+Gradient on MatterportLayout' 2 | VAL_NAME: 'test' 3 | TAG: 'ablation_study_wo_normal_gradient' 4 | SEED: 123 5 | MODEL: 6 | NAME: 'LGT_Net' 7 | ARGS: [ { 8 | 'decoder_name': 'SWG_Transformer', 9 | 'win_size': 16, 10 | 'rpe': 'lr_parameter_mirror', 11 | 'dropout': 0.0, 12 | 'output_name': 'LGT' 13 | } ] 14 | TRAIN: 15 | DEVICE: 'cuda:0' 16 | SCRATCH: False 17 | DETERMINISTIC: True 18 | CRITERION: 19 | DEPTH: 20 | WEIGHT: 0.9 21 | RATIO: 22 | WEIGHT: 0.1 23 | EPOCHS: 1000 24 | BASE_LR: 25 | 1e-4 26 | RESUME_LAST: False 27 | OPTIMIZER: 28 | NAME: 'adam' 29 | DATA: 30 | DATASET: 'mp3d' 31 | DIR: 'src/dataset/mp3d' 32 | BATCH_SIZE: 6 33 | NUM_WORKERS: 6 34 | FOR_TEST_INDEX: None 35 | AUG: 36 | FLIP: True 37 | STRETCH: True 38 | ROTATE: True 39 | GAMMA: True 40 | AMP_OPT_LEVEL: 'O0' -------------------------------------------------------------------------------- /src/config/ablation_study/wo_pe.yaml: -------------------------------------------------------------------------------- 1 | COMMENT: 'Ablation Study: w/o PE on MatterportLayout' 2 | VAL_NAME: 'test' 3 | TAG: 'ablation_study_wo_pe' 4 | SEED: 123 5 | MODEL: 6 | NAME: 'LGT_Net' 7 | ARGS: [ { 8 | 'decoder_name': 'SWG_Transformer', 9 | 'win_size': 16, 10 | 'rpe': None, 11 | 'dropout': 0.0, 12 | 'output_name': 'LGT' 13 | } ] 14 | TRAIN: 15 | DEVICE: 'cuda:0' 16 | SCRATCH: False 17 | DETERMINISTIC: True 18 | CRITERION: 19 | DEPTH: 20 | WEIGHT: 0.9 21 | RATIO: 22 | WEIGHT: 0.1 23 | GRAD: 24 | WEIGHT: 0.1 25 | WEIGHTS: [ 1.0, 1.0 ] 26 | BASE_LR: 27 | 1e-4 28 | RESUME_LAST: False 29 | OPTIMIZER: 30 | NAME: 'adam' 31 | EPOCHS: 1000 32 | DATA: 33 | DATASET: 'mp3d' 34 | DIR: 'src/dataset/mp3d' 35 | BATCH_SIZE: 6 36 | NUM_WORKERS: 6 37 | FOR_TEST_INDEX: None 38 | AUG: 39 | FLIP: True 40 | STRETCH: True 41 | ROTATE: True 42 | GAMMA: True 43 | AMP_OPT_LEVEL: 'O0' 44 | -------------------------------------------------------------------------------- /src/config/ablation_study/full.yaml: -------------------------------------------------------------------------------- 1 | COMMENT: 'Ablation Study: Ours (full) on MatterportLayout' 2 | VAL_NAME: 'test' 3 | TAG: 'ablation_study_full' 4 | SEED: 123 5 | MODEL: 6 | NAME: 'LGT_Net' 7 | ARGS: [ { 8 | 'decoder_name': 'SWG_Transformer', 9 | 'win_size': 16, 10 | 'rpe': 'lr_parameter_mirror', 11 | 'dropout': 0.0, 12 | 'output_name': 'LGT' 13 | } ] 14 | TRAIN: 15 | DEVICE: 'cuda:0' 16 | SCRATCH: False 17 | DETERMINISTIC: True 18 | CRITERION: 19 | DEPTH: 20 | WEIGHT: 0.9 21 | RATIO: 22 | WEIGHT: 0.1 23 | GRAD: 24 | WEIGHT: 0.1 25 | WEIGHTS: [ 1.0, 1.0 ] 26 | BASE_LR: 27 | 1e-4 28 | RESUME_LAST: False 29 | OPTIMIZER: 30 | NAME: 'adam' 31 | EPOCHS: 1000 32 | DATA: 33 | DATASET: 'mp3d' 34 | DIR: 'src/dataset/mp3d' 35 | BATCH_SIZE: 6 36 | NUM_WORKERS: 6 37 | FOR_TEST_INDEX: None 38 | AUG: 39 | FLIP: True 40 | STRETCH: True 41 | ROTATE: True 42 | GAMMA: True 43 | AMP_OPT_LEVEL: 'O0' 44 | -------------------------------------------------------------------------------- /src/config/ablation_study/w_g_rpe1.yaml: -------------------------------------------------------------------------------- 1 | COMMENT: 'Ablation Study: w G-RPE1 on MatterportLayout' 2 | VAL_NAME: 'test' 3 | TAG: 'ablation_study_w_g_rpe1' 4 | SEED: 123 5 | MODEL: 6 | NAME: 'LGT_Net' 7 | ARGS: [ { 8 | 'decoder_name': 'SWG_Transformer', 9 | 'win_size': 16, 10 | 'rpe': 'lr_parameter', 11 | 'dropout': 0.0, 12 | 'output_name': 'LGT' 13 | } ] 14 | TRAIN: 15 | DEVICE: 'cuda:0' 16 | SCRATCH: False 17 | DETERMINISTIC: True 18 | CRITERION: 19 | DEPTH: 20 | WEIGHT: 0.9 21 | RATIO: 22 | WEIGHT: 0.1 23 | GRAD: 24 | WEIGHT: 0.1 25 | WEIGHTS: [ 1.0, 1.0 ] 26 | BASE_LR: 27 | 1e-4 28 | RESUME_LAST: False 29 | OPTIMIZER: 30 | NAME: 'adam' 31 | EPOCHS: 1000 32 | DATA: 33 | DATASET: 'mp3d' 34 | DIR: 'src/dataset/mp3d' 35 | BATCH_SIZE: 6 36 | NUM_WORKERS: 6 37 | FOR_TEST_INDEX: None 38 | AUG: 39 | FLIP: True 40 | STRETCH: True 41 | ROTATE: True 42 | GAMMA: True 43 | AMP_OPT_LEVEL: 'O0' 44 | -------------------------------------------------------------------------------- /src/config/ablation_study/w_g_rpe2.yaml: -------------------------------------------------------------------------------- 1 | COMMENT: 'Ablation Study: w G-RPE2 on MatterportLayout' 2 | VAL_NAME: 'test' 3 | TAG: 'ablation_study_w_g_rpe2' 4 | SEED: 123 5 | MODEL: 6 | NAME: 'LGT_Net' 7 | ARGS: [ { 8 | 'decoder_name': 'SWG_Transformer', 9 | 'win_size': 16, 10 | 'rpe': 'lr_parameter_half', 11 | 'dropout': 0.0, 12 | 'output_name': 'LGT' 13 | } ] 14 | TRAIN: 15 | DEVICE: 'cuda:0' 16 | SCRATCH: False 17 | DETERMINISTIC: True 18 | CRITERION: 19 | DEPTH: 20 | WEIGHT: 0.9 21 | RATIO: 22 | WEIGHT: 0.1 23 | GRAD: 24 | WEIGHT: 0.1 25 | WEIGHTS: [ 1.0, 1.0 ] 26 | BASE_LR: 27 | 1e-4 28 | RESUME_LAST: False 29 | OPTIMIZER: 30 | NAME: 'adam' 31 | EPOCHS: 1000 32 | DATA: 33 | DATASET: 'mp3d' 34 | DIR: 'src/dataset/mp3d' 35 | BATCH_SIZE: 6 36 | NUM_WORKERS: 6 37 | FOR_TEST_INDEX: None 38 | AUG: 39 | FLIP: True 40 | STRETCH: True 41 | ROTATE: True 42 | GAMMA: True 43 | AMP_OPT_LEVEL: 'O0' 44 | -------------------------------------------------------------------------------- /src/config/ablation_study/w_ape.yaml: -------------------------------------------------------------------------------- 1 | COMMENT: 'Ablation Study: w APE on MatterportLayout' 2 | VAL_NAME: 'test' 3 | TAG: 'ablation_study_w_ape' 4 | SEED: 123 5 | MODEL: 6 | NAME: 'LGT_Net' 7 | ARGS: [ { 8 | 'decoder_name': 'SWG_Transformer', 9 | 'win_size': 16, 10 | 'ape': 'lr_parameter', 11 | 'rpe': None, 12 | 'dropout': 0.0, 13 | 'output_name': 'LGT' 14 | } ] 15 | TRAIN: 16 | DEVICE: 'cuda:0' 17 | SCRATCH: False 18 | DETERMINISTIC: True 19 | CRITERION: 20 | DEPTH: 21 | WEIGHT: 0.9 22 | RATIO: 23 | WEIGHT: 0.1 24 | GRAD: 25 | WEIGHT: 0.1 26 | WEIGHTS: [ 1.0, 1.0 ] 27 | BASE_LR: 28 | 1e-4 29 | RESUME_LAST: False 30 | OPTIMIZER: 31 | NAME: 'adam' 32 | EPOCHS: 1000 33 | DATA: 34 | DATASET: 'mp3d' 35 | DIR: 'src/dataset/mp3d' 36 | BATCH_SIZE: 6 37 | NUM_WORKERS: 6 38 | FOR_TEST_INDEX: None 39 | AUG: 40 | FLIP: True 41 | STRETCH: True 42 | ROTATE: True 43 | GAMMA: True 44 | AMP_OPT_LEVEL: 'O0' 45 | -------------------------------------------------------------------------------- /src/config/pano.yaml: -------------------------------------------------------------------------------- 1 | COMMENT: 'Training on PanoContext(train)+Stanford2D-3D(whole)' 2 | TAG: 'pano' 3 | SEED: 123 4 | MODEL: 5 | NAME: 'LGT_Net' 6 | ARGS: [ { 7 | 'decoder_name': 'SWG_Transformer', 8 | 'win_size': 16, 9 | 'rpe': 'lr_parameter_mirror', 10 | 'dropout': 0.0, 11 | 'depth': 6, 12 | 'output_name': 'LGT' 13 | } ] 14 | TRAIN: 15 | DEVICE: 'cuda:0' 16 | SCRATCH: False 17 | DETERMINISTIC: True 18 | CRITERION: 19 | DEPTH: 20 | WEIGHT: 0.9 21 | RATIO: 22 | WEIGHT: 0.1 23 | GRAD: 24 | WEIGHT: 0.1 25 | WEIGHTS: [ 1.0, 1.0 ] 26 | BASE_LR: 27 | 1e-4 28 | RESUME_LAST: False 29 | OPTIMIZER: 30 | NAME: 'adam' 31 | EPOCHS: 1000 32 | DATA: 33 | DATASET: 'pano_s2d3d_mix' 34 | DIR: 'src/dataset/pano_s2d3d' 35 | SUBSET: 'pano' 36 | BATCH_SIZE: 6 37 | NUM_WORKERS: 6 38 | FOR_TEST_INDEX: None 39 | AUG: 40 | FLIP: True 41 | STRETCH: True 42 | ROTATE: True 43 | GAMMA: True 44 | AMP_OPT_LEVEL: 'O0' 45 | -------------------------------------------------------------------------------- /src/config/ablation_study/wo_global.yaml: -------------------------------------------------------------------------------- 1 | COMMENT: 'Ablation Study: w/o Global Block on MatterportLayout' 2 | VAL_NAME: 'test' 3 | TAG: 'ablation_study_wo_global' 4 | SEED: 123 5 | MODEL: 6 | NAME: 'LGT_Net' 7 | ARGS: [{ 8 | 'decoder_name': 'Swin_Transformer', 9 | 'win_size': 16, 10 | 'rpe':'lr_parameter_mirror', 11 | 'dropout': 0.0, 12 | 'output_name': 'LGT' 13 | }] 14 | TRAIN: 15 | DEVICE: 'cuda:0' 16 | SCRATCH: False 17 | DETERMINISTIC: True 18 | CRITERION: 19 | DEPTH: 20 | WEIGHT: 0.9 21 | RATIO: 22 | WEIGHT: 0.1 23 | GRAD: 24 | WEIGHT: 0.1 25 | WEIGHTS: [ 1.0, 1.0 ] 26 | BASE_LR: 27 | 1e-4 28 | RESUME_LAST: False 29 | OPTIMIZER: 30 | NAME: 'adam' 31 | EPOCHS: 1000 32 | DATA: 33 | DATASET: 'mp3d' 34 | DIR: 'src/dataset/mp3d' 35 | BATCH_SIZE: 6 36 | NUM_WORKERS: 6 37 | FOR_TEST_INDEX: None 38 | AUG: 39 | FLIP: True 40 | STRETCH: True 41 | ROTATE: True 42 | GAMMA: True 43 | AMP_OPT_LEVEL: 'O0' 44 | -------------------------------------------------------------------------------- /src/config/ablation_study/wo_window.yaml: -------------------------------------------------------------------------------- 1 | COMMENT: 'Ablation Study: w/o Window Block on MatterportLayout' 2 | VAL_NAME: 'test' 3 | TAG: 'ablation_study_wo_window' 4 | SEED: 123 5 | MODEL: 6 | NAME: 'LGT_Net' 7 | ARGS: [{ 8 | 'decoder_name': 'Transformer', 9 | 'win_size': 16, 10 | 'rpe':'lr_parameter_mirror', 11 | 'dropout': 0.0, 12 | 'output_name': 'LGT' 13 | }] 14 | TRAIN: 15 | DEVICE: 'cuda:0' 16 | SCRATCH: False 17 | DETERMINISTIC: True 18 | CRITERION: 19 | DEPTH: 20 | WEIGHT: 0.9 21 | RATIO: 22 | WEIGHT: 0.1 23 | GRAD: 24 | WEIGHT: 0.1 25 | WEIGHTS: [ 1.0, 1.0 ] 26 | BASE_LR: 27 | 1e-4 28 | RESUME_LAST: False 29 | OPTIMIZER: 30 | NAME: 'adam' 31 | EPOCHS: 1000 32 | DATA: 33 | DATASET: 'mp3d' 34 | DIR: 'src/dataset/mp3d' 35 | BATCH_SIZE: 6 36 | NUM_WORKERS: 6 37 | FOR_TEST_INDEX: None 38 | AUG: 39 | FLIP: True 40 | STRETCH: True 41 | ROTATE: True 42 | GAMMA: True 43 | AMP_OPT_LEVEL: 'O0' 44 | 45 | -------------------------------------------------------------------------------- /src/config/s2d3d.yaml: -------------------------------------------------------------------------------- 1 | COMMENT: 'Training on Stanford2D-3D(train)+PanoContext(whole)' 2 | TAG: 's2d3d' 3 | SEED: 123 4 | MODEL: 5 | NAME: 'LGT_Net' 6 | ARGS: [ { 7 | 'decoder_name': 'SWG_Transformer', 8 | 'win_size': 16, 9 | 'rpe': 'lr_parameter_mirror', 10 | 'dropout': 0.0, 11 | 'depth': 6, 12 | 'output_name': 'LGT' 13 | } ] 14 | TRAIN: 15 | DEVICE: 'cuda:2' 16 | SCRATCH: False 17 | DETERMINISTIC: True 18 | CRITERION: 19 | DEPTH: 20 | WEIGHT: 0.9 21 | RATIO: 22 | WEIGHT: 0.1 23 | GRAD: 24 | WEIGHT: 0.1 25 | WEIGHTS: [ 1.0, 1.0 ] 26 | BASE_LR: 27 | 1e-4 28 | RESUME_LAST: False 29 | OPTIMIZER: 30 | NAME: 'adam' 31 | EPOCHS: 1000 32 | DATA: 33 | DATASET: 'pano_s2d3d_mix' 34 | DIR: 'src/dataset/pano_s2d3d' 35 | SUBSET: 's2d3d' 36 | BATCH_SIZE: 6 37 | NUM_WORKERS: 6 38 | FOR_TEST_INDEX: None 39 | AUG: 40 | FLIP: True 41 | STRETCH: True 42 | ROTATE: True 43 | GAMMA: True 44 | AMP_OPT_LEVEL: 'O0' 45 | -------------------------------------------------------------------------------- /src/config/ablation_study/wo_gradient.yaml: -------------------------------------------------------------------------------- 1 | COMMENT: 'Ablation Study: w/o Gradient on MatterportLayout' 2 | VAL_NAME: 'test' 3 | TAG: 'ablation_study_wo_gradient' 4 | SEED: 123 5 | MODEL: 6 | NAME: 'LGT_Net' 7 | ARGS: [ { 8 | 'decoder_name': 'SWG_Transformer', 9 | 'win_size': 16, 10 | 'rpe': 'lr_parameter_mirror', 11 | 'dropout': 0.0, 12 | 'output_name': 'LGT' 13 | } ] 14 | TRAIN: 15 | DEVICE: 'cuda:0' 16 | SCRATCH: False 17 | DETERMINISTIC: True 18 | CRITERION: 19 | DEPTH: 20 | WEIGHT: 0.9 21 | RATIO: 22 | WEIGHT: 0.1 23 | GRAD: 24 | WEIGHT: 0.1 25 | WEIGHTS: [ 1.0, 0.0 ] # only normal loss 26 | BASE_LR: 27 | 1e-4 28 | RESUME_LAST: False 29 | OPTIMIZER: 30 | NAME: 'adam' 31 | EPOCHS: 1000 32 | DATA: 33 | DATASET: 'mp3d' 34 | DIR: 'src/dataset/mp3d' 35 | BATCH_SIZE: 6 36 | NUM_WORKERS: 6 37 | FOR_TEST_INDEX: None 38 | AUG: 39 | FLIP: True 40 | STRETCH: True 41 | ROTATE: True 42 | GAMMA: True 43 | AMP_OPT_LEVEL: 'O0' -------------------------------------------------------------------------------- /src/config/ablation_study/w_vit.yaml: -------------------------------------------------------------------------------- 1 | COMMENT: 'Ablation Study: w/ ViT on MatterportLayout' 2 | VAL_NAME: 'test' 3 | TAG: 'ablation_study_w_vit' 4 | SEED: 123 5 | MODEL: 6 | NAME: 'LGT_Net' 7 | ARGS: [ { 8 | 'decoder_name': 'SWG_Transformer', 9 | 'backbone': 'patch', 10 | 'win_size': 16, 11 | 'rpe': 'lr_parameter_mirror', 12 | 'dropout': 0.0, 13 | 'depth': 16, 14 | 'output_name': 'LGT' 15 | } ] 16 | TRAIN: 17 | DEVICE: 'cuda:0' 18 | SCRATCH: False 19 | DETERMINISTIC: True 20 | CRITERION: 21 | DEPTH: 22 | WEIGHT: 0.9 23 | RATIO: 24 | WEIGHT: 0.1 25 | GRAD: 26 | WEIGHT: 0.1 27 | WEIGHTS: [ 1.0, 1.0 ] 28 | BASE_LR: 29 | 1e-4 30 | RESUME_LAST: False 31 | OPTIMIZER: 32 | NAME: 'adam' 33 | EPOCHS: 1000 34 | DATA: 35 | DATASET: 'mp3d' 36 | DIR: 'src/dataset/mp3d' 37 | BATCH_SIZE: 6 38 | NUM_WORKERS: 6 39 | FOR_TEST_INDEX: None 40 | AUG: 41 | FLIP: True 42 | STRETCH: True 43 | ROTATE: True 44 | GAMMA: True 45 | AMP_OPT_LEVEL: 'O0' 46 | 47 | -------------------------------------------------------------------------------- /src/config/ablation_study/w_vit_zind.yaml: -------------------------------------------------------------------------------- 1 | COMMENT: 'Ablation Study: w/ ViT on ZInd' 2 | VAL_NAME: 'test' 3 | TAG: 'ablation_study_w_vit_zind' 4 | SEED: 123 5 | MODEL: 6 | NAME: 'LGT_Net' 7 | ARGS: [ { 8 | 'decoder_name': 'SWG_Transformer', 9 | 'backbone': 'patch', 10 | 'win_size': 16, 11 | 'rpe': 'lr_parameter_mirror', 12 | 'dropout': 0.0, 13 | 'depth': 16, 14 | 'output_name': 'LGT' 15 | } ] 16 | TRAIN: 17 | DEVICE: 'cuda:0' 18 | SCRATCH: False 19 | DETERMINISTIC: True 20 | CRITERION: 21 | DEPTH: 22 | WEIGHT: 0.9 23 | RATIO: 24 | WEIGHT: 0.1 25 | GRAD: 26 | WEIGHT: 0.1 27 | WEIGHTS: [ 1.0, 1.0 ] 28 | BASE_LR: 29 | 1e-4 30 | RESUME_LAST: False 31 | OPTIMIZER: 32 | NAME: 'adam' 33 | EPOCHS: 200 34 | DATA: 35 | DATASET: 'zind' 36 | DIR: 'src/dataset/zind' 37 | BATCH_SIZE: 6 38 | NUM_WORKERS: 6 39 | FOR_TEST_INDEX: None 40 | AUG: 41 | FLIP: True 42 | STRETCH: True 43 | ROTATE: True 44 | GAMMA: True 45 | AMP_OPT_LEVEL: 'O0' 46 | 47 | -------------------------------------------------------------------------------- /visualization/visualizer/shader/geometry_line.glsl: -------------------------------------------------------------------------------- 1 | #version 410 2 | 3 | layout (lines) in; 4 | layout (triangle_strip, max_vertices = 4) out; 5 | 6 | uniform mat4 um4p; 7 | uniform mat4 um4v; 8 | uniform mat4 um4m; 9 | 10 | void main(){ 11 | float thickness = 0.04; 12 | float r = thickness / 2; 13 | mat4 mv = um4v * um4m; 14 | vec4 p1 = mv * gl_in[0].gl_Position; 15 | vec4 p2 = mv * gl_in[1].gl_Position; 16 | vec2 tmp = p2.xy - p1.xy; 17 | vec2 dir = normalize(p2.xy - p1.xy); 18 | if (tmp.x+tmp.y==0) 19 | dir = vec2(0, 1); 20 | vec2 normal = vec2(dir.y, -dir.x); 21 | vec4 offset1, offset2; 22 | offset1 = vec4(normal * r, 0, 0); 23 | offset2 = vec4(normal * r, 0, 0); 24 | 25 | vec4 coords[4]; 26 | coords[0] = p1 + offset1; 27 | coords[1] = p1 - offset1; 28 | coords[2] = p2 + offset2; 29 | coords[3] = p2 - offset2; 30 | for (int i = 0; i < 4; ++i) { 31 | coords[i] = um4p * coords[i]; 32 | gl_Position = coords[i]; 33 | EmitVertex(); 34 | } 35 | EndPrimitive(); 36 | } 37 | -------------------------------------------------------------------------------- /src/config/mp3d_scheduler.yaml: -------------------------------------------------------------------------------- 1 | COMMENT: 'Training on MatterportLayout' 2 | VAL_NAME: 'test' 3 | TAG: 'mp3d_scheduler' 4 | SEED: 123 5 | MODEL: 6 | NAME: 'LGT_Net' 7 | ARGS: [ { 8 | 'decoder_name': 'SWG_Transformer', 9 | 'win_size': 16, 10 | 'rpe': 'lr_parameter_mirror', 11 | 'dropout': 0.0, 12 | 'depth': 8, 13 | 'output_name': 'LGT' 14 | } ] 15 | TRAIN: 16 | DEVICE: 'cuda:0' 17 | SCRATCH: False 18 | DETERMINISTIC: True 19 | CRITERION: 20 | DEPTH: 21 | WEIGHT: 0.9 22 | RATIO: 23 | WEIGHT: 0.1 24 | GRAD: 25 | WEIGHT: 0.1 26 | WEIGHTS: [ 1.0, 1.0 ] 27 | BASE_LR: 28 | 3e-4 29 | RESUME_LAST: False 30 | OPTIMIZER: 31 | NAME: 'adam' 32 | EPOCHS: 1000 33 | LR_SCHEDULER: 34 | NAME: 'StepLR' 35 | ARGS: [ { 'step_size': 20, 'gamma': 0.9, 'last_epoch': -1} ] 36 | DATA: 37 | DATASET: 'mp3d' 38 | DIR: 'src/dataset/mp3d' 39 | BATCH_SIZE: 6 40 | NUM_WORKERS: 6 41 | FOR_TEST_INDEX: None 42 | AUG: 43 | FLIP: True 44 | STRETCH: True 45 | ROTATE: True 46 | GAMMA: True 47 | AMP_OPT_LEVEL: 'O0' 48 | -------------------------------------------------------------------------------- /models/other/optimizer.py: -------------------------------------------------------------------------------- 1 | """ 2 | @Date: 2021/07/18 3 | @description: 4 | """ 5 | from torch import optim as optim 6 | 7 | 8 | def build_optimizer(config, model, logger): 9 | name = config.TRAIN.OPTIMIZER.NAME.lower() 10 | 11 | optimizer = None 12 | if name == 'sgd': 13 | optimizer = optim.SGD(model.parameters(), momentum=config.TRAIN.OPTIMIZER.MOMENTUM, nesterov=True, 14 | lr=config.TRAIN.BASE_LR, weight_decay=config.TRAIN.WEIGHT_DECAY) 15 | elif name == 'adamw': 16 | optimizer = optim.AdamW(model.parameters(), eps=config.TRAIN.OPTIMIZER.EPS, betas=config.TRAIN.OPTIMIZER.BETAS, 17 | lr=config.TRAIN.BASE_LR, weight_decay=config.TRAIN.WEIGHT_DECAY) 18 | elif name == 'adam': 19 | optimizer = optim.Adam(model.parameters(), eps=config.TRAIN.OPTIMIZER.EPS, betas=config.TRAIN.OPTIMIZER.BETAS, 20 | lr=config.TRAIN.BASE_LR, weight_decay=config.TRAIN.WEIGHT_DECAY) 21 | 22 | logger.info(f"Build optimizer: {name}, lr:{config.TRAIN.BASE_LR}") 23 | 24 | return optimizer 25 | -------------------------------------------------------------------------------- /models/other/init_env.py: -------------------------------------------------------------------------------- 1 | """ 2 | @Date: 2021/08/15 3 | @description: 4 | """ 5 | import random 6 | import torch 7 | import torch.backends.cudnn as cudnn 8 | import numpy as np 9 | import os 10 | import cv2 11 | 12 | 13 | def init_env(seed, deterministic=False, loader_work_num=0): 14 | # Fix seed 15 | # Python & NumPy 16 | np.random.seed(seed) 17 | random.seed(seed) 18 | os.environ['PYTHONHASHSEED'] = str(seed) 19 | 20 | # PyTorch 21 | torch.manual_seed(seed) # 为CPU设置随机种子 22 | if torch.cuda.is_available(): 23 | torch.cuda.manual_seed(seed) # 为当前GPU设置随机种子 24 | torch.cuda.manual_seed_all(seed) # 为所有GPU设置随机种子 25 | 26 | # cuDNN 27 | if deterministic: 28 | # 复现 29 | torch.backends.cudnn.benchmark = False 30 | torch.backends.cudnn.deterministic = True # 将这个 flag 置为 True 的话,每次返回的卷积算法将是确定的,即默认算法 31 | else: 32 | cudnn.benchmark = True # 如果网络的输入数据维度或类型上变化不大,设置true 33 | torch.backends.cudnn.deterministic = False 34 | 35 | # Using multiple threads in Opencv can cause deadlocks 36 | if loader_work_num != 0: 37 | cv2.setNumThreads(0) 38 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2021 ZhiGang Jiang 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /visualization/visualizer/LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2021 Fu-En Wang 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /postprocessing/post_process.py: -------------------------------------------------------------------------------- 1 | """ 2 | @Date: 2021/10/08 3 | @description: 4 | """ 5 | import numpy as np 6 | import cv2 7 | 8 | from postprocessing.dula.layout import fit_layout 9 | from postprocessing.dula.layout_old import fit_layout_old 10 | from utils.conversion import depth2xyz, xyz2depth 11 | 12 | 13 | def post_process(b_depth, type_name='manhattan', need_cube=False): 14 | plan_y = 1 15 | b_xyz = depth2xyz(b_depth, plan_y) 16 | 17 | b_processed_xyz = [] 18 | for xyz in b_xyz: 19 | if type_name == 'manhattan': 20 | processed_xz = fit_layout(floor_xz=xyz[..., ::2], need_cube=need_cube, show=False) 21 | elif type_name == 'manhattan_old': 22 | processed_xz = fit_layout_old(floor_xz=xyz[..., ::2], need_cube=need_cube, show=False) 23 | elif type_name == 'atalanta': 24 | processed_xz = cv2.approxPolyDP(xyz[..., ::2].astype(np.float32), 0.1, False)[:, 0, :] 25 | else: 26 | raise NotImplementedError("Unknown post-processing type") 27 | 28 | if need_cube: 29 | assert len(processed_xz) == 4 30 | 31 | processed_xyz = np.insert(processed_xz, 1, plan_y, axis=1) 32 | b_processed_xyz.append(processed_xyz) 33 | 34 | return np.array(b_processed_xyz) -------------------------------------------------------------------------------- /loss/led_loss.py: -------------------------------------------------------------------------------- 1 | """ 2 | @Date: 2021/08/12 3 | @description: 4 | """ 5 | import torch 6 | import torch.nn as nn 7 | 8 | 9 | class LEDLoss(nn.Module): 10 | def __init__(self): 11 | super().__init__() 12 | self.loss = nn.L1Loss() 13 | 14 | def forward(self, gt, dt): 15 | camera_height = 1.6 16 | gt_depth = gt['depth'] * camera_height 17 | 18 | dt_ceil_depth = dt['ceil_depth'] * camera_height * gt['ratio'] 19 | dt_floor_depth = dt['depth'] * camera_height 20 | 21 | ceil_loss = self.loss(gt_depth, dt_ceil_depth) 22 | floor_loss = self.loss(gt_depth, dt_floor_depth) 23 | 24 | loss = floor_loss + ceil_loss 25 | 26 | return loss 27 | 28 | 29 | if __name__ == '__main__': 30 | import numpy as np 31 | from dataset.mp3d_dataset import MP3DDataset 32 | 33 | mp3d_dataset = MP3DDataset(root_dir='../src/dataset/mp3d', mode='train') 34 | gt = mp3d_dataset.__getitem__(0) 35 | 36 | gt['depth'] = torch.from_numpy(gt['depth'][np.newaxis]) # batch size is 1 37 | gt['ratio'] = torch.from_numpy(gt['ratio'][np.newaxis]) # batch size is 1 38 | 39 | dummy_dt = { 40 | 'depth': gt['depth'].clone(), 41 | 'ceil_depth': gt['depth'] / gt['ratio'] 42 | } 43 | # dummy_dt['depth'][..., :20] *= 3 # some different 44 | 45 | led_loss = LEDLoss() 46 | loss = led_loss(gt, dummy_dt) 47 | print(loss) 48 | -------------------------------------------------------------------------------- /loss/object_loss.py: -------------------------------------------------------------------------------- 1 | """ 2 | @Date: 2021/08/12 3 | @description: 4 | """ 5 | import torch 6 | import torch.nn as nn 7 | from loss.grad_loss import GradLoss 8 | 9 | 10 | class ObjectLoss(nn.Module): 11 | def __init__(self): 12 | super().__init__() 13 | self.heat_map_loss = HeatmapLoss(reduction='mean') # FocalLoss(reduction='mean') 14 | self.l1_loss = nn.SmoothL1Loss() 15 | 16 | def forward(self, gt, dt): 17 | # TODO:: 18 | return 0 19 | 20 | 21 | class HeatmapLoss(nn.Module): 22 | def __init__(self, weight=None, alpha=2, beta=4, reduction='mean'): 23 | super(HeatmapLoss, self).__init__() 24 | self.alpha = alpha 25 | self.beta = beta 26 | self.reduction = reduction 27 | 28 | def forward(self, targets, inputs): 29 | center_id = (targets == 1.0).float() 30 | other_id = (targets != 1.0).float() 31 | center_loss = -center_id * (1.0 - inputs) ** self.alpha * torch.log(inputs + 1e-14) 32 | other_loss = -other_id * (1 - targets) ** self.beta * inputs ** self.alpha * torch.log(1.0 - inputs + 1e-14) 33 | loss = center_loss + other_loss 34 | 35 | batch_size = loss.size(0) 36 | if self.reduction == 'mean': 37 | loss = torch.sum(loss) / batch_size 38 | 39 | if self.reduction == 'sum': 40 | loss = torch.sum(loss) / batch_size 41 | 42 | return loss 43 | -------------------------------------------------------------------------------- /Post-Porcessing.md: -------------------------------------------------------------------------------- 1 | # Post-Processing 2 | 3 | ## Update 4 | - 2023.5.18 We found that it was unreasonable to use cross product to detect occlusion in manhattan post-processing [layout.py#L70](https://github.com/zhigangjiang/LGT-Net/blob/b642d6288e3a4bf265e54ab93eed3455e760402b/postprocessing/dula/layout.py#L70). We fixed this problem [here](https://github.com/zhigangjiang/LGT-Net/blob/aae66ab9a2f361bc4e16af564f244acff5ec8aee/postprocessing/dula/layout.py#LL80C4-L80C15). In addition, we add some optimizations to force the layout coordinates to align to satisfy the constraints based on the Manhattan assumption. 5 | 6 | ## Step 7 | 8 | 1. Simplify polygon by [DP algorithm](https://en.wikipedia.org/wiki/Ramer%E2%80%93Douglas%E2%80%93Peucker_algorithm) 9 | 10 | ![img.png](src/fig/post_processing/img_0.png) 11 | 12 | 2. Detect occlusion, calculating box fill with 1 13 | 14 | ![img.png](src/fig/post_processing/img_1.png) 15 | 16 | 3. Fill in reasonable sampling section 17 | 18 | ![img.png](src/fig/post_processing/img_2.png) 19 | 20 | 4. Output processed polygon 21 | 22 | ![img.png](src/fig/post_processing/img_3.png) 23 | 24 | ## performance 25 | It works, and a performance comparison on the MatterportLayout dataset: 26 | 27 | | Method | 2D IoU(%) | 3D IoU(%) | RMSE | $\mathbf{\delta_{1}}$ | 28 | |--|--|--|--|--| 29 | without post-proc | 83.52 | 81.11 | 0.204 | 0.951 | 30 | original post-proc |83.12 | 80.71 | 0.230 | 0.936|\ 31 | optimized post-proc | 83.48 | 81.08| 0.214 | 0.940 | 32 | 33 | original: 34 | 35 | ![img.png](src/fig/post_processing/original.png) 36 | 37 | optimized: 38 | 39 | ![img.png](src/fig/post_processing/optimized.png) 40 | -------------------------------------------------------------------------------- /utils/writer.py: -------------------------------------------------------------------------------- 1 | """ 2 | @Date: 2021/11/06 3 | @description: 4 | """ 5 | import cv2 6 | import numpy as np 7 | 8 | 9 | def xyz2json(xyz, ratio, camera_height=1.6): 10 | xyz = xyz * camera_height 11 | ceiling_height = camera_height * ratio 12 | layout_height = camera_height + ceiling_height 13 | data = { 14 | 'cameraHeight': camera_height, 15 | 'layoutHeight': layout_height, 16 | 'cameraCeilingHeight': ceiling_height, 17 | 'layoutObj2ds': { 18 | 'num': 0, 19 | 'obj2ds': [] 20 | }, 21 | 'layoutPoints': { 22 | 'num': xyz.shape[0], 23 | 'points': [] 24 | }, 25 | 'layoutWalls': { 26 | 'num': xyz.shape[0], 27 | 'walls': [] 28 | } 29 | } 30 | 31 | xyz = np.concatenate([xyz, xyz[0:1, :]], axis=0) 32 | R_180 = cv2.Rodrigues(np.array([0, -1 * np.pi, 0], np.float32))[0] 33 | for i in range(xyz.shape[0] - 1): 34 | a = np.dot(R_180, xyz[i, :]) 35 | a[0] *= -1 36 | b = np.dot(R_180, xyz[i + 1, :]) 37 | b[0] *= -1 38 | c = a.copy() 39 | c[1] = 0 40 | normal = np.cross(a - b, a - c) 41 | normal /= np.linalg.norm(normal) 42 | d = -np.sum(normal * a) 43 | plane = np.asarray([normal[0], normal[1], normal[2], d]) 44 | 45 | data['layoutPoints']['points'].append({'xyz': a.tolist(), 'id': i}) 46 | 47 | next_i = 0 if i + 1 >= (xyz.shape[0] - 1) else i + 1 48 | tmp = { 49 | 'normal': normal.tolist(), 50 | 'planeEquation': plane.tolist(), 51 | 'pointsIdx': [i, next_i] 52 | } 53 | data['layoutWalls']['walls'].append(tmp) 54 | 55 | return data 56 | 57 | -------------------------------------------------------------------------------- /utils/logger.py: -------------------------------------------------------------------------------- 1 | """ 2 | @Date: 2021/07/17 3 | @description: 4 | """ 5 | import os 6 | import sys 7 | import logging 8 | import functools 9 | from termcolor import colored 10 | 11 | 12 | def build_logger(config): 13 | output_dir = config.LOGGER.DIR 14 | local_rank = config.LOCAL_RANK 15 | name = config.MODEL.NAME 16 | logger = get_logger(output_dir, local_rank, name) 17 | return logger 18 | 19 | 20 | @functools.lru_cache() 21 | def get_logger(output_dir=None, local_rank=None, name="LGTNet"): 22 | if output_dir and not os.path.exists(output_dir): 23 | os.makedirs(output_dir) 24 | 25 | # create logger 26 | logger = logging.getLogger(name) 27 | logger.setLevel(logging.DEBUG) 28 | logger.propagate = False 29 | 30 | # create formatter 31 | fmt = f'[%(asctime)s %(name)s][%(levelname)1.1s](%(filename)s %(lineno)d): %(message)s' 32 | color_fmt = colored(f'[%(asctime)s %(name)s][%(levelname)1.1s][{local_rank}]', 'green') + colored( 33 | f'(%(filename)s %(lineno)d)', 34 | 'yellow') + ': %(message)s' 35 | if local_rank in [0] or local_rank is None: 36 | console_handler = logging.StreamHandler(sys.stdout) 37 | console_handler.setLevel(logging.DEBUG) 38 | console_handler.setFormatter( 39 | logging.Formatter(fmt=color_fmt, datefmt='%Y-%m-%d %H:%M:%S')) 40 | logger.addHandler(console_handler) 41 | 42 | if output_dir is not None: 43 | # create file handlers 44 | file_handler = logging.FileHandler(os.path.join(output_dir, f'log_rank{local_rank}.log'), mode='a') 45 | file_handler.setLevel(logging.DEBUG) 46 | file_handler.setFormatter(logging.Formatter(fmt=fmt, datefmt='%Y-%m-%d %H:%M:%S')) 47 | logger.addHandler(file_handler) 48 | 49 | return logger 50 | -------------------------------------------------------------------------------- /loss/boundary_loss.py: -------------------------------------------------------------------------------- 1 | """ 2 | @Date: 2021/08/12 3 | @description: For HorizonNet, using latitudes to calculate loss. 4 | """ 5 | import torch 6 | import torch.nn as nn 7 | from utils.conversion import depth2xyz, xyz2lonlat 8 | 9 | 10 | class BoundaryLoss(nn.Module): 11 | def __init__(self): 12 | super().__init__() 13 | self.loss = nn.L1Loss() 14 | 15 | def forward(self, gt, dt): 16 | gt_floor_xyz = depth2xyz(gt['depth']) 17 | gt_ceil_xyz = gt_floor_xyz.clone() 18 | gt_ceil_xyz[..., 1] = -gt['ratio'] 19 | 20 | gt_floor_boundary = xyz2lonlat(gt_floor_xyz)[..., -1:] 21 | gt_ceil_boundary = xyz2lonlat(gt_ceil_xyz)[..., -1:] 22 | 23 | gt_boundary = torch.cat([gt_floor_boundary, gt_ceil_boundary], dim=-1).permute(0, 2, 1) 24 | dt_boundary = dt['boundary'] 25 | 26 | loss = self.loss(gt_boundary, dt_boundary) 27 | return loss 28 | 29 | 30 | if __name__ == '__main__': 31 | import numpy as np 32 | from dataset.mp3d_dataset import MP3DDataset 33 | 34 | mp3d_dataset = MP3DDataset(root_dir='../src/dataset/mp3d', mode='train') 35 | gt = mp3d_dataset.__getitem__(0) 36 | 37 | gt['depth'] = torch.from_numpy(gt['depth'][np.newaxis]) # batch size is 1 38 | gt['ratio'] = torch.from_numpy(gt['ratio'][np.newaxis]) # batch size is 1 39 | 40 | dummy_dt = { 41 | 'depth': gt['depth'].clone(), 42 | 'boundary': torch.cat([ 43 | xyz2lonlat(depth2xyz(gt['depth']))[..., -1:], 44 | xyz2lonlat(depth2xyz(gt['depth'], plan_y=-gt['ratio']))[..., -1:] 45 | ], dim=-1).permute(0, 2, 1) 46 | } 47 | # dummy_dt['boundary'][:, :, :20] /= 1.2 # some different 48 | 49 | boundary_loss = BoundaryLoss() 50 | loss = boundary_loss(gt, dummy_dt) 51 | print(loss) 52 | -------------------------------------------------------------------------------- /models/other/scheduler.py: -------------------------------------------------------------------------------- 1 | """ 2 | @Date: 2021/09/14 3 | @description: 4 | """ 5 | 6 | 7 | class WarmupScheduler: 8 | def __init__(self, optimizer, lr_pow, init_lr, warmup_lr, warmup_step, max_step, **kwargs): 9 | self.lr_pow = lr_pow 10 | self.init_lr = init_lr 11 | self.running_lr = init_lr 12 | self.warmup_lr = warmup_lr 13 | self.warmup_step = warmup_step 14 | self.max_step = max_step 15 | self.optimizer = optimizer 16 | 17 | def step_update(self, cur_step): 18 | if cur_step < self.warmup_step: 19 | frac = cur_step / self.warmup_step 20 | step = self.warmup_lr - self.init_lr 21 | self.running_lr = self.init_lr + step * frac 22 | else: 23 | frac = (float(cur_step) - self.warmup_step) / (self.max_step - self.warmup_step) 24 | scale_running_lr = max((1. - frac), 0.) ** self.lr_pow 25 | self.running_lr = self.warmup_lr * scale_running_lr 26 | 27 | if self.optimizer is not None: 28 | for param_group in self.optimizer.param_groups: 29 | param_group['lr'] = self.running_lr 30 | 31 | 32 | if __name__ == '__main__': 33 | import matplotlib.pyplot as plt 34 | 35 | scheduler = WarmupScheduler(optimizer=None, 36 | lr_pow=4, 37 | init_lr=0.0000003, 38 | warmup_lr=0.00003, 39 | warmup_step=10000, 40 | max_step=100000) 41 | 42 | x = [] 43 | y = [] 44 | for i in range(100000): 45 | if i == 10000-1: 46 | print() 47 | scheduler.step_update(i) 48 | x.append(i) 49 | y.append(scheduler.running_lr) 50 | plt.plot(x, y, linewidth=1) 51 | plt.show() 52 | -------------------------------------------------------------------------------- /models/modules/swin_transformer.py: -------------------------------------------------------------------------------- 1 | from models.modules.transformer_modules import * 2 | 3 | 4 | class Swin_Transformer(nn.Module): 5 | def __init__(self, dim, depth, heads, win_size, dim_head, mlp_dim, 6 | dropout=0., patch_num=None, ape=None, rpe=None, rpe_pos=1): 7 | super().__init__() 8 | self.absolute_pos_embed = None if patch_num is None or ape is None else AbsolutePosition(dim, dropout, 9 | patch_num, ape) 10 | self.pos_dropout = nn.Dropout(dropout) 11 | self.layers = nn.ModuleList([]) 12 | for i in range(depth): 13 | self.layers.append(nn.ModuleList([ 14 | PreNorm(dim, WinAttention(dim, win_size=win_size, shift=0 if (i % 2 == 0) else win_size // 2, 15 | heads=heads, dim_head=dim_head, dropout=dropout, rpe=rpe, rpe_pos=rpe_pos)), 16 | PreNorm(dim, FeedForward(dim, mlp_dim, dropout=dropout)), 17 | ])) 18 | 19 | def forward(self, x): 20 | if self.absolute_pos_embed is not None: 21 | x = self.absolute_pos_embed(x) 22 | x = self.pos_dropout(x) 23 | for attn, ff in self.layers: 24 | x = attn(x) + x 25 | x = ff(x) + x 26 | return x 27 | 28 | 29 | if __name__ == '__main__': 30 | token_dim = 1024 31 | toke_len = 256 32 | 33 | transformer = Swin_Transformer(dim=token_dim, 34 | depth=6, 35 | heads=16, 36 | win_size=8, 37 | dim_head=64, 38 | mlp_dim=2048, 39 | dropout=0.1) 40 | 41 | input = torch.randn(1, toke_len, token_dim) 42 | output = transformer(input) 43 | print(output.shape) 44 | -------------------------------------------------------------------------------- /models/modules/transformer.py: -------------------------------------------------------------------------------- 1 | from models.modules.transformer_modules import * 2 | 3 | 4 | class Transformer(nn.Module): 5 | def __init__(self, dim, depth, heads, win_size, dim_head, mlp_dim, 6 | dropout=0., patch_num=None, ape=None, rpe=None, rpe_pos=1): 7 | super().__init__() 8 | 9 | self.absolute_pos_embed = None if patch_num is None or ape is None else AbsolutePosition(dim, dropout, 10 | patch_num, ape) 11 | self.pos_dropout = nn.Dropout(dropout) 12 | self.layers = nn.ModuleList([]) 13 | for _ in range(depth): 14 | self.layers.append(nn.ModuleList([ 15 | PreNorm(dim, Attention(dim, heads=heads, dim_head=dim_head, dropout=dropout, patch_num=patch_num, 16 | rpe=rpe, rpe_pos=rpe_pos)), 17 | PreNorm(dim, FeedForward(dim, mlp_dim, dropout=dropout)) 18 | ])) 19 | 20 | def forward(self, x): 21 | if self.absolute_pos_embed is not None: 22 | x = self.absolute_pos_embed(x) 23 | x = self.pos_dropout(x) 24 | for attn, ff in self.layers: 25 | x = attn(x) + x 26 | x = ff(x) + x 27 | return x 28 | 29 | 30 | if __name__ == '__main__': 31 | token_dim = 1024 32 | toke_len = 256 33 | 34 | transformer = Transformer(dim=token_dim, depth=6, heads=16, 35 | dim_head=64, mlp_dim=2048, dropout=0.1, 36 | patch_num=256, ape='lr_parameter', rpe='lr_parameter_mirror') 37 | 38 | total = sum(p.numel() for p in transformer.parameters()) 39 | trainable = sum(p.numel() for p in transformer.parameters() if p.requires_grad) 40 | print('parameter total:{:,}, trainable:{:,}'.format(total, trainable)) 41 | 42 | input = torch.randn(1, toke_len, token_dim) 43 | output = transformer(input) 44 | print(output.shape) 45 | -------------------------------------------------------------------------------- /models/modules/patch_feature_extractor.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | import torch.nn as nn 4 | from einops.layers.torch import Rearrange 5 | 6 | 7 | class PatchFeatureExtractor(nn.Module): 8 | x_mean = torch.FloatTensor(np.array([0.485, 0.456, 0.406])[None, :, None, None]) 9 | x_std = torch.FloatTensor(np.array([0.229, 0.224, 0.225])[None, :, None, None]) 10 | 11 | def __init__(self, patch_num=256, input_shape=None): 12 | super(PatchFeatureExtractor, self).__init__() 13 | 14 | if input_shape is None: 15 | input_shape = [3, 512, 1024] 16 | self.patch_dim = 1024 17 | self.patch_num = patch_num 18 | 19 | img_channel = input_shape[0] 20 | img_h = input_shape[1] 21 | img_w = input_shape[2] 22 | 23 | p_h, p_w = img_h, img_w // self.patch_num 24 | p_dim = p_h * p_w * img_channel 25 | 26 | self.patch_embedding = nn.Sequential( 27 | Rearrange('b c h (p_n p_w) -> b p_n (h p_w c)', p_w=p_w), 28 | nn.Linear(p_dim, self.patch_dim) 29 | ) 30 | 31 | self.x_mean.requires_grad = False 32 | self.x_std.requires_grad = False 33 | 34 | def _prepare_x(self, x): 35 | x = x.clone() 36 | if self.x_mean.device != x.device: 37 | self.x_mean = self.x_mean.to(x.device) 38 | self.x_std = self.x_std.to(x.device) 39 | x[:, :3] = (x[:, :3] - self.x_mean) / self.x_std 40 | 41 | return x 42 | 43 | def forward(self, x): 44 | # x [b 3 512 1024] 45 | x = self._prepare_x(x) # [b 3 512 1024] 46 | x = self.patch_embedding(x) # [b 256(patch_num) 1024(d)] 47 | x = x.permute(0, 2, 1) # [b 1024(d) 256(patch_num)] 48 | return x 49 | 50 | 51 | if __name__ == '__main__': 52 | from PIL import Image 53 | extractor = PatchFeatureExtractor() 54 | img = np.array(Image.open("../../src/demo.png")).transpose((2, 0, 1)) 55 | input = torch.Tensor([img]) # 1 3 512 1024 56 | feature = extractor(input) 57 | print(feature.shape) # 1, 1024, 256 58 | -------------------------------------------------------------------------------- /convert_ckpt.py: -------------------------------------------------------------------------------- 1 | """ 2 | @date: 2021/11/22 3 | @description: Conversion training ckpt into inference ckpt 4 | """ 5 | import argparse 6 | import os 7 | 8 | import torch 9 | 10 | from config.defaults import merge_from_file 11 | 12 | 13 | def parse_option(): 14 | parser = argparse.ArgumentParser(description='Conversion training ckpt into inference ckpt') 15 | parser.add_argument('--cfg', 16 | type=str, 17 | required=True, 18 | metavar='FILE', 19 | help='path of config file') 20 | 21 | parser.add_argument('--output_path', 22 | type=str, 23 | help='path of output ckpt') 24 | 25 | args = parser.parse_args() 26 | 27 | print("arguments:") 28 | for arg in vars(args): 29 | print(arg, ":", getattr(args, arg)) 30 | print("-" * 50) 31 | return args 32 | 33 | 34 | def convert_ckpt(): 35 | args = parse_option() 36 | config = merge_from_file(args.cfg) 37 | ck_dir = os.path.join("checkpoints", f"{config.MODEL.ARGS[0]['decoder_name']}_{config.MODEL.ARGS[0]['output_name']}_Net", 38 | config.TAG) 39 | print(f"Processing {ck_dir}") 40 | model_paths = [name for name in os.listdir(ck_dir) if '_best_' in name] 41 | if len(model_paths) == 0: 42 | print("Not find best ckpt") 43 | return 44 | model_path = os.path.join(ck_dir, model_paths[0]) 45 | print(f"Loading {model_path}") 46 | checkpoint = torch.load(model_path, map_location=torch.device('cuda:0')) 47 | net = checkpoint['net'] 48 | output_path = None 49 | if args.output_path is None: 50 | output_path = os.path.join(ck_dir, 'best.pkl') 51 | else: 52 | output_path = args.output_path 53 | if output_path is None: 54 | print("Output path is invalid") 55 | print(f"Save on: {output_path}") 56 | os.makedirs(os.path.dirname(output_path), exist_ok=True) 57 | torch.save(net, output_path) 58 | 59 | 60 | if __name__ == '__main__': 61 | convert_ckpt() 62 | -------------------------------------------------------------------------------- /models/modules/swg_transformer.py: -------------------------------------------------------------------------------- 1 | from models.modules.transformer_modules import * 2 | 3 | 4 | class SWG_Transformer(nn.Module): 5 | def __init__(self, dim, depth, heads, win_size, dim_head, mlp_dim, 6 | dropout=0., patch_num=None, ape=None, rpe=None, rpe_pos=1): 7 | super().__init__() 8 | self.absolute_pos_embed = None if patch_num is None or ape is None else AbsolutePosition(dim, dropout, 9 | patch_num, ape) 10 | self.pos_dropout = nn.Dropout(dropout) 11 | self.layers = nn.ModuleList([]) 12 | for i in range(depth): 13 | if i % 2 == 0: 14 | attention = WinAttention(dim, win_size=win_size, shift=0 if (i % 3 == 0) else win_size // 2, 15 | heads=heads, dim_head=dim_head, dropout=dropout, rpe=rpe, rpe_pos=rpe_pos) 16 | else: 17 | attention = Attention(dim, heads=heads, dim_head=dim_head, dropout=dropout, 18 | patch_num=patch_num, rpe=rpe, rpe_pos=rpe_pos) 19 | 20 | self.layers.append(nn.ModuleList([ 21 | PreNorm(dim, attention), 22 | PreNorm(dim, FeedForward(dim, mlp_dim, dropout=dropout)), 23 | ])) 24 | 25 | def forward(self, x): 26 | if self.absolute_pos_embed is not None: 27 | x = self.absolute_pos_embed(x) 28 | x = self.pos_dropout(x) 29 | for attn, ff in self.layers: 30 | x = attn(x) + x 31 | x = ff(x) + x 32 | return x 33 | 34 | 35 | if __name__ == '__main__': 36 | token_dim = 1024 37 | toke_len = 256 38 | 39 | transformer = SWG_Transformer(dim=token_dim, 40 | depth=6, 41 | heads=16, 42 | win_size=8, 43 | dim_head=64, 44 | mlp_dim=2048, 45 | dropout=0.1) 46 | 47 | input = torch.randn(1, toke_len, token_dim) 48 | output = transformer(input) 49 | print(output.shape) 50 | -------------------------------------------------------------------------------- /loss/grad_loss.py: -------------------------------------------------------------------------------- 1 | """ 2 | @Date: 2021/08/12 3 | @description: 4 | """ 5 | 6 | import torch 7 | import torch.nn as nn 8 | import numpy as np 9 | 10 | from visualization.grad import get_all 11 | 12 | 13 | class GradLoss(nn.Module): 14 | def __init__(self): 15 | super().__init__() 16 | self.loss = nn.L1Loss() 17 | self.cos = nn.CosineSimilarity(dim=-1, eps=0) 18 | 19 | self.grad_conv = nn.Conv1d(1, 1, kernel_size=3, stride=1, padding=0, bias=False, padding_mode='circular') 20 | self.grad_conv.weight = nn.Parameter(torch.tensor([[[1, 0, -1]]]).float()) 21 | self.grad_conv.weight.requires_grad = False 22 | 23 | def forward(self, gt, dt): 24 | gt_direction, _, gt_angle_grad = get_all(gt['depth'], self.grad_conv) 25 | dt_direction, _, dt_angle_grad = get_all(dt['depth'], self.grad_conv) 26 | 27 | normal_loss = (1 - self.cos(gt_direction, dt_direction)).mean() 28 | grad_loss = self.loss(gt_angle_grad, dt_angle_grad) 29 | return [normal_loss, grad_loss] 30 | 31 | 32 | if __name__ == '__main__': 33 | from dataset.mp3d_dataset import MP3DDataset 34 | from utils.boundary import depth2boundaries 35 | from utils.conversion import uv2xyz 36 | from visualization.boundary import draw_boundaries 37 | from visualization.floorplan import draw_floorplan 38 | 39 | def show_boundary(image, depth, ratio): 40 | boundary_list = depth2boundaries(ratio, depth, step=None) 41 | draw_boundaries(image.transpose(1, 2, 0), boundary_list=boundary_list, show=True) 42 | draw_floorplan(uv2xyz(boundary_list[0])[..., ::2], show=True, center_color=0.8) 43 | 44 | mp3d_dataset = MP3DDataset(root_dir='../src/dataset/mp3d', mode='train', patch_num=256) 45 | gt = mp3d_dataset.__getitem__(1) 46 | gt['depth'] = torch.from_numpy(gt['depth'][np.newaxis]) # batch size is 1 47 | dummy_dt = { 48 | 'depth': gt['depth'].clone(), 49 | } 50 | # dummy_dt['depth'][..., 20] *= 3 # some different 51 | 52 | # show_boundary(gt['image'], gt['depth'][0].numpy(), gt['ratio']) 53 | # show_boundary(gt['image'], dummy_dt['depth'][0].numpy(), gt['ratio']) 54 | 55 | grad_loss = GradLoss() 56 | loss = grad_loss(gt, dummy_dt) 57 | print(loss) 58 | -------------------------------------------------------------------------------- /evaluation/eval_visible_iou.py: -------------------------------------------------------------------------------- 1 | """ 2 | @Date: 2021/08/02 3 | @description: 4 | The 2DIoU for calculating the visible and full boundaries, such as the MP3D dataset, 5 | has the following data: {'train': 0.9775843958583535, 'test': 0.9828616219607289, 'val': 0.9883810438132491}, 6 | indicating that our best performance is limited to below 98.29% 2DIoU using our approach. 7 | """ 8 | import numpy as np 9 | import matplotlib.pyplot as plt 10 | 11 | from tqdm import tqdm 12 | from evaluation.iou import calc_IoU_2D 13 | from visualization.floorplan import draw_iou_floorplan 14 | from utils.conversion import depth2xyz, uv2xyz 15 | 16 | 17 | def eval_dataset_visible_IoU(dataset, show=False): 18 | bar = tqdm(dataset, total=len(dataset), ncols=100) 19 | iou2ds = [] 20 | for data in bar: 21 | bar.set_description(f"Processing {data['id']}") 22 | corners = data['corners'] 23 | corners = corners[corners[..., 0] + corners[..., 1] != 0] # Take effective corners 24 | all_xz = uv2xyz(corners)[..., ::2] 25 | visible_xz = depth2xyz(data['depth'])[..., ::2] 26 | iou2d = calc_IoU_2D(all_xz, visible_xz) 27 | iou2ds.append(iou2d) 28 | if show: 29 | layout_floorplan = draw_iou_floorplan(all_xz, visible_xz, iou2d=iou2d) 30 | plt.imshow(layout_floorplan) 31 | plt.show() 32 | 33 | mean_iou2d = np.array(iou2ds).mean() 34 | return mean_iou2d 35 | 36 | 37 | def execute_eval_dataset_visible_IoU(root_dir, dataset, modes=None): 38 | if modes is None: 39 | modes = ["train", "test", "valid"] 40 | 41 | iou2d_d = {} 42 | for mode in modes: 43 | print("mode: {}".format(mode)) 44 | iou2d = eval_dataset_visible_IoU(dataset(root_dir, mode, patch_num=1024, 45 | keys=['depth', 'visible_corners', 'corners', 'id']), show=False) 46 | iou2d_d[mode] = iou2d 47 | return iou2d_d 48 | 49 | 50 | if __name__ == '__main__': 51 | from dataset.mp3d_dataset import MP3DDataset 52 | 53 | iou2d_d = execute_eval_dataset_visible_IoU(root_dir='../src/dataset/mp3d', 54 | dataset=MP3DDataset, 55 | modes=['train', 'test', 'val']) 56 | print(iou2d_d) 57 | -------------------------------------------------------------------------------- /visualization/visualizer/visualizer.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import cv2 4 | import numpy as np 5 | from imageio import imread 6 | import json 7 | import argparse 8 | import visualization.visualizer.shader 9 | 10 | from PyQt5 import QtWidgets, QtGui, QtOpenGL 11 | from PyQt5.QtWidgets import * 12 | from PyQt5.QtGui import QIcon 13 | import PyQt5.QtCore as QtCore 14 | 15 | import glm 16 | from OpenGL.GL import * 17 | from OpenGL.GLU import * 18 | from OpenGL.GLUT import * 19 | 20 | from visualization.visualizer.Viewer import Utils 21 | from visualization.visualizer.Viewer import LayoutView 22 | 23 | 24 | class TopWindow(QMainWindow): 25 | def __init__(self, img, layout, floor_reverse=False, parent=None): 26 | super().__init__(parent) 27 | sizeObject = QtWidgets.QDesktopWidget().screenGeometry(-1) 28 | [self.h, self.w] = [sizeObject.height(), sizeObject.width()] 29 | ratio = 0.9 30 | self.h = int(self.h * ratio) 31 | self.w = int(self.w * ratio) 32 | self.setGeometry(20, 60, self.w, self.h) 33 | self.setWindowTitle("Layout Visualizer") 34 | self.centeralWidget = QWidget(self) 35 | 36 | self.layout = layout 37 | self.LayoutViewer = LayoutView.GLWindow(img, main=self, parent=self.centeralWidget) 38 | wallNum, wallPoints, lines, mesh = Utils.Label2Mesh(Utils.OldFormat2Mine(self.layout), floor_reverse) 39 | self.LayoutViewer.updateLayoutMesh(wallNum, wallPoints, lines, mesh) 40 | 41 | layout = QGridLayout() 42 | layout.setRowStretch(0, 1) 43 | layout.setColumnStretch(0, 1) 44 | layout.addWidget(self.LayoutViewer, 0, 0, 1, 1) 45 | self.centeralWidget.setLayout(layout) 46 | self.setCentralWidget(self.centeralWidget) 47 | 48 | def enterEvent(self, event): 49 | self.setFocus(True) 50 | 51 | 52 | def visualize_3d(layout, img): 53 | app = QtWidgets.QApplication(sys.argv) 54 | window = TopWindow(img, layout=layout) 55 | window.show() 56 | # cv2.waitKey() 57 | sys.exit(app.exec_()) 58 | 59 | 60 | if __name__ == '__main__': 61 | parser = argparse.ArgumentParser(description='360 Layout Visualizer', 62 | formatter_class=argparse.ArgumentDefaultsHelpFormatter) 63 | parser.add_argument('--img', type=str, required=True, help='The panorama path') 64 | parser.add_argument('--json', type=str, required=True, help='The output json path') 65 | args = parser.parse_args() 66 | 67 | img = imread(args.img, pilmode='RGB') 68 | with open(args.json, 'r') as f: 69 | layout = json.load(f) 70 | 71 | visualize_3d(layout, img) 72 | -------------------------------------------------------------------------------- /models/other/criterion.py: -------------------------------------------------------------------------------- 1 | """ 2 | @date: 2021/7/19 3 | @description: 4 | """ 5 | import torch 6 | import loss 7 | 8 | from utils.misc import tensor2np 9 | 10 | 11 | def build_criterion(config, logger): 12 | criterion = {} 13 | device = config.TRAIN.DEVICE 14 | 15 | for k in config.TRAIN.CRITERION.keys(): 16 | sc = config.TRAIN.CRITERION[k] 17 | if sc.WEIGHT is None or float(sc.WEIGHT) == 0: 18 | continue 19 | criterion[sc.NAME] = { 20 | 'loss': getattr(loss, sc.LOSS)(), 21 | 'weight': float(sc.WEIGHT), 22 | 'sub_weights': sc.WEIGHTS, 23 | 'need_all': sc.NEED_ALL 24 | } 25 | 26 | criterion[sc.NAME]['loss'] = criterion[sc.NAME]['loss'].to(device) 27 | if config.AMP_OPT_LEVEL != "O0" and 'cuda' in device: 28 | criterion[sc.NAME]['loss'] = criterion[sc.NAME]['loss'].type(torch.float16) 29 | 30 | # logger.info(f"Build criterion:{sc.WEIGHT}_{sc.NAME}_{sc.LOSS}_{sc.WEIGHTS}") 31 | return criterion 32 | 33 | 34 | def calc_criterion(criterion, gt, dt, epoch_loss_d): 35 | loss = None 36 | postfix_d = {} 37 | for k in criterion.keys(): 38 | if criterion[k]['need_all']: 39 | single_loss = criterion[k]['loss'](gt, dt) 40 | ws_loss = None 41 | for i, sub_weight in enumerate(criterion[k]['sub_weights']): 42 | if sub_weight == 0: 43 | continue 44 | if ws_loss is None: 45 | ws_loss = single_loss[i] * sub_weight 46 | else: 47 | ws_loss = ws_loss + single_loss[i] * sub_weight 48 | single_loss = ws_loss if ws_loss is not None else single_loss 49 | else: 50 | assert k in gt.keys(), "ground label is None:" + k 51 | assert k in dt.keys(), "detection key is None:" + k 52 | if k == 'ratio' and gt[k].shape[-1] != dt[k].shape[-1]: 53 | gt[k] = gt[k].repeat(1, dt[k].shape[-1]) 54 | single_loss = criterion[k]['loss'](gt[k], dt[k]) 55 | 56 | postfix_d[k] = tensor2np(single_loss) 57 | if k not in epoch_loss_d.keys(): 58 | epoch_loss_d[k] = [] 59 | epoch_loss_d[k].append(postfix_d[k]) 60 | 61 | single_loss = single_loss * criterion[k]['weight'] 62 | if loss is None: 63 | loss = single_loss 64 | else: 65 | loss = loss + single_loss 66 | 67 | k = 'loss' 68 | postfix_d[k] = tensor2np(loss) 69 | if k not in epoch_loss_d.keys(): 70 | epoch_loss_d[k] = [] 71 | epoch_loss_d[k].append(postfix_d[k]) 72 | return loss, postfix_d, epoch_loss_d 73 | -------------------------------------------------------------------------------- /visualization/visualizer/shader/fragment_pano.glsl: -------------------------------------------------------------------------------- 1 | #version 410 2 | #define pi 3.14159265359 3 | layout(location = 0) out vec4 fragColor; 4 | in vec3 modelPosition; 5 | 6 | uniform sampler2D pano; 7 | uniform float alpha; 8 | uniform int wallNum; 9 | uniform vec2 wallPoints[100]; 10 | 11 | 12 | bool intersect1D(float a1, float a2, float b1, float b2) 13 | { 14 | if (a1 > a2) 15 | { 16 | float tmp = a1; 17 | a1 = a2; 18 | a2 = tmp; 19 | } 20 | if (b1 > b2) 21 | { 22 | float tmp = b1; 23 | b1 =b2; 24 | b2 = tmp; 25 | } 26 | return max(a1, b1) <= min(a2, b2); 27 | } 28 | float cross(vec2 o, vec2 a, vec2 b) 29 | { 30 | return (a.x-o.x) * (b.y-o.y) - (a.y-o.y) * (b.x-o.x); 31 | } 32 | 33 | bool intersect(vec2 a1, vec2 a2, vec2 b1, vec2 b2) 34 | { 35 | return intersect1D(a1.x, a2.x, b1.x, b2.x) 36 | && intersect1D(a1.y, a2.y, b1.y, b2.y) 37 | && cross(a1, a2, b1) * cross(a1, a2, b2) <= 0 38 | && cross(b1, b2, a1) * cross(b1, b2, a2) <= 0; 39 | } 40 | 41 | bool checkIntersectWalls(vec2 pts){ 42 | vec2 a = pts * 0.99; 43 | vec2 b = vec2(0, 0); 44 | for (int i=0; i=min(c.x, d.x) && min(a.y,b.y)<=max(c.y,d.y) && max(a.y, b.y)>=min(c.y, d.y)) 49 | if (intersect(a, b, c, d)) 50 | return true; 51 | /* 52 | float u=(c.x-a.x)*(b.y-a.y)-(b.x-a.x)*(c.y-a.y); 53 | float v=(d.x-a.x)*(b.y-a.y)-(b.x-a.x)*(d.y-a.y); 54 | float w=(a.x-c.x)*(d.y-c.y)-(d.x-c.x)*(a.y-c.y); 55 | float z=(b.x-c.x)*(d.y-c.y)-(d.x-c.x)*(b.y-c.y); 56 | return (u*v<=1e-5 && w*z<=1e-5); 57 | */ 58 | } 59 | 60 | return false; 61 | } 62 | 63 | 64 | void main(){ 65 | float x = modelPosition.x; 66 | float y = modelPosition.y; 67 | float z = modelPosition.z; 68 | float normXYZ = sqrt(pow(x, 2) + pow(y, 2) + pow(z, 2)); 69 | float normXZ = sqrt(pow(x, 2) + pow(z, 2)); 70 | float lon = (atan(x, z) / pi + 1) * 0.5; 71 | float lat = (asin(y / normXYZ) / (0.5*pi) + 1) * 0.5; 72 | vec2 coord = vec2(lon, lat); 73 | if (!checkIntersectWalls(vec2(x, z))) 74 | //if (true) 75 | fragColor = vec4(texture(pano, coord).xyz, alpha); 76 | else{ 77 | if (mod(y * 10, 10) < 5 ^^ mod(x * 10, 10) < 5 ^^ mod(z * 10, 10) < 5) 78 | fragColor = vec4(vec3(1.0, 1.0, 1.0), alpha); 79 | else 80 | fragColor = vec4(vec3(0.5, 0.5, 0.5), alpha); 81 | } 82 | } 83 | -------------------------------------------------------------------------------- /visualization/obj3d.py: -------------------------------------------------------------------------------- 1 | """ 2 | @author: Zhigang Jiang 3 | @time: 2022/05/25 4 | @description: reference: https://github.com/sunset1995/PanoPlane360/blob/main/vis_planes.py 5 | """ 6 | import open3d 7 | import numpy as np 8 | from utils.conversion import pixel2lonlat 9 | 10 | 11 | def create_3d_obj(img, depth, save_path=None, mesh=True, mesh_show_back_face=False, show=False): 12 | assert img.shape[0] == depth.shape[0], "" 13 | h = img.shape[0] 14 | w = img.shape[1] 15 | # Project to 3d 16 | lon = pixel2lonlat(np.array(range(w)), w=w, axis=0)[None].repeat(h, axis=0) 17 | lat = pixel2lonlat(np.array(range(h)), h=h, axis=1)[..., None].repeat(w, axis=1) 18 | 19 | z = depth * np.sin(lat) 20 | x = depth * np.cos(lat) * np.cos(lon) 21 | y = depth * np.cos(lat) * np.sin(lon) 22 | pts_xyz = np.stack([x, -z, y], -1).reshape(-1, 3) 23 | pts_rgb = img.reshape(-1, 3) 24 | 25 | if mesh: 26 | pid = np.arange(len(pts_xyz)).reshape(h, w) 27 | faces = np.concatenate([ 28 | np.stack([ 29 | pid[:-1, :-1], pid[1:, :-1], np.roll(pid, -1, axis=1)[:-1, :-1], 30 | ], -1), 31 | np.stack([ 32 | pid[1:, :-1], np.roll(pid, -1, axis=1)[1:, :-1], np.roll(pid, -1, axis=1)[:-1, :-1], 33 | ], -1) 34 | ]).reshape(-1, 3).tolist() 35 | scene = open3d.geometry.TriangleMesh() 36 | scene.vertices = open3d.utility.Vector3dVector(pts_xyz) 37 | scene.vertex_colors = open3d.utility.Vector3dVector(pts_rgb) 38 | scene.triangles = open3d.utility.Vector3iVector(faces) 39 | 40 | else: 41 | scene = open3d.geometry.PointCloud() 42 | scene.points = open3d.utility.Vector3dVector(pts_xyz) 43 | scene.colors = open3d.utility.Vector3dVector(pts_rgb) 44 | if save_path: 45 | open3d.io.write_triangle_mesh(save_path, scene, write_triangle_uvs=True) 46 | if show: 47 | open3d.visualization.draw_geometries([scene], mesh_show_back_face=mesh_show_back_face) 48 | 49 | 50 | if __name__ == '__main__': 51 | from dataset.mp3d_dataset import MP3DDataset 52 | from utils.boundary import depth2boundaries, layout2depth 53 | from visualization.boundary import draw_boundaries 54 | 55 | mp3d_dataset = MP3DDataset(root_dir='../src/dataset/mp3d', mode='train', for_test_index=10, patch_num=1024) 56 | gt = mp3d_dataset.__getitem__(3) 57 | 58 | boundary_list = depth2boundaries(gt['ratio'], gt['depth'], step=None) 59 | pano_img = draw_boundaries(gt['image'].transpose(1, 2, 0), boundary_list=boundary_list, show=True) 60 | layout_depth = layout2depth(boundary_list, show=False) 61 | create_3d_obj(gt['image'].transpose(1, 2, 0), layout_depth, save_path=f"../src/output/{gt['id']}_3d.gltf", 62 | mesh=True) 63 | -------------------------------------------------------------------------------- /evaluation/f1_score.py: -------------------------------------------------------------------------------- 1 | """ 2 | @author: Zhigang Jiang 3 | @time: 2022/01/28 4 | @description: 5 | Holistic 3D Vision Challenge on General Room Layout Estimation Track Evaluation Package 6 | Reference: https://github.com/bertjiazheng/indoor-layout-evaluation 7 | """ 8 | 9 | from scipy.optimize import linear_sum_assignment 10 | import numpy as np 11 | import scipy 12 | 13 | HEIGHT, WIDTH = 512, 1024 14 | MAX_DISTANCE = np.sqrt(HEIGHT**2 + WIDTH**2) 15 | 16 | 17 | def f1_score_2d(gt_corners, dt_corners, thresholds): 18 | distances = scipy.spatial.distance.cdist(gt_corners, dt_corners) 19 | return eval_junctions(distances, thresholds=thresholds) 20 | 21 | 22 | def eval_junctions(distances, thresholds=5): 23 | thresholds = thresholds if isinstance(thresholds, tuple) or isinstance( 24 | thresholds, list) else list([thresholds]) 25 | 26 | num_gts, num_preds = distances.shape 27 | 28 | # filter the matches between ceiling-wall and floor-wall junctions 29 | mask = np.zeros_like(distances, dtype=np.bool) 30 | mask[:num_gts//2, :num_preds//2] = True 31 | mask[num_gts//2:, num_preds//2:] = True 32 | distances[~mask] = np.inf 33 | 34 | # F-measure under different thresholds 35 | Fs = [] 36 | Ps = [] 37 | Rs = [] 38 | for threshold in thresholds: 39 | distances_temp = distances.copy() 40 | 41 | # filter the mis-matched pairs 42 | distances_temp[distances_temp > threshold] = np.inf 43 | 44 | # remain the rows and columns that contain non-inf elements 45 | distances_temp = distances_temp[:, np.any(np.isfinite(distances_temp), axis=0)] 46 | 47 | if np.prod(distances_temp.shape) == 0: 48 | Fs.append(0) 49 | Ps.append(0) 50 | Rs.append(0) 51 | continue 52 | 53 | distances_temp = distances_temp[np.any(np.isfinite(distances_temp), axis=1), :] 54 | 55 | # solve the bipartite graph matching problem 56 | row_ind, col_ind = linear_sum_assignment_with_inf(distances_temp) 57 | true_positive = np.sum(np.isfinite(distances_temp[row_ind, col_ind])) 58 | 59 | # compute precision and recall 60 | precision = true_positive / num_preds 61 | recall = true_positive / num_gts 62 | 63 | # compute F measure 64 | Fs.append(2 * precision * recall / (precision + recall)) 65 | Ps.append(precision) 66 | Rs.append(recall) 67 | 68 | return Fs, Ps, Rs 69 | 70 | 71 | def linear_sum_assignment_with_inf(cost_matrix): 72 | """ 73 | Deal with linear_sum_assignment with inf according to 74 | https://github.com/scipy/scipy/issues/6900#issuecomment-451735634 75 | """ 76 | cost_matrix = np.copy(cost_matrix) 77 | cost_matrix[np.isinf(cost_matrix)] = MAX_DISTANCE 78 | return linear_sum_assignment(cost_matrix) -------------------------------------------------------------------------------- /evaluation/analyse_layout_type.py: -------------------------------------------------------------------------------- 1 | """ 2 | @Date: 2022/01/31 3 | @description: 4 | ZInd: 5 | {'test': {'mw': 2789, 'aw': 381}, 'train': {'mw': 21228, 'aw': 3654}, 'val': {'mw': 2647, 'aw': 433}} 6 | 7 | """ 8 | import numpy as np 9 | import matplotlib.pyplot as plt 10 | import json 11 | 12 | from tqdm import tqdm 13 | from evaluation.iou import calc_IoU_2D 14 | from visualization.floorplan import draw_floorplan 15 | from visualization.boundary import draw_boundaries 16 | from utils.conversion import depth2xyz, uv2xyz 17 | 18 | 19 | def analyse_layout_type(dataset, show=False): 20 | bar = tqdm(dataset, total=len(dataset), ncols=100) 21 | manhattan = 0 22 | atlanta = 0 23 | corner_type = {} 24 | for data in bar: 25 | bar.set_description(f"Processing {data['id']}") 26 | corners = data['corners'] 27 | corners = corners[corners[..., 0] + corners[..., 1] != 0] # Take effective corners 28 | corners_count = str(len(corners)) if len(corners) < 10 else "10" 29 | if corners_count not in corner_type: 30 | corner_type[corners_count] = 0 31 | corner_type[corners_count] += 1 32 | 33 | all_xz = uv2xyz(corners)[..., ::2] 34 | 35 | c = len(all_xz) 36 | flag = False 37 | for i in range(c - 1): 38 | l1 = all_xz[i + 1] - all_xz[i] 39 | l2 = all_xz[(i + 2) % c] - all_xz[i + 1] 40 | a = (np.linalg.norm(l1)*np.linalg.norm(l2)) 41 | if a == 0: 42 | continue 43 | dot = np.dot(l1, l2)/a 44 | if 0.9 > abs(dot) > 0.1: 45 | # cos-1(0.1)=84.26 > angle > cos-1(0.9)=25.84 or 46 | # cos-1(-0.9)=154.16 > angle > cos-1(-0.1)=95.74 47 | flag = True 48 | break 49 | if flag: 50 | atlanta += 1 51 | else: 52 | manhattan += 1 53 | 54 | if flag and show: 55 | draw_floorplan(all_xz, show=True) 56 | draw_boundaries(data['image'].transpose(1, 2, 0), [corners], ratio=data['ratio'], show=True) 57 | 58 | corner_type = dict(sorted(corner_type.items(), key=lambda item: int(item[0]))) 59 | return {'manhattan': manhattan, "atlanta": atlanta, "corner_type": corner_type} 60 | 61 | 62 | def execute_analyse_layout_type(root_dir, dataset, modes=None): 63 | if modes is None: 64 | modes = ["train", "val", "test"] 65 | 66 | iou2d_d = {} 67 | for mode in modes: 68 | print("mode: {}".format(mode)) 69 | types = analyse_layout_type(dataset(root_dir, mode), show=False) 70 | iou2d_d[mode] = types 71 | print(json.dumps(types, indent=4)) 72 | return iou2d_d 73 | 74 | 75 | if __name__ == '__main__': 76 | from dataset.zind_dataset import ZindDataset 77 | from dataset.mp3d_dataset import MP3DDataset 78 | 79 | iou2d_d = execute_analyse_layout_type(root_dir='../src/dataset/mp3d', 80 | dataset=MP3DDataset) 81 | # iou2d_d = execute_analyse_layout_type(root_dir='../src/dataset/zind', 82 | # dataset=ZindDataset) 83 | print(json.dumps(iou2d_d, indent=4)) 84 | -------------------------------------------------------------------------------- /preprocessing/filter.py: -------------------------------------------------------------------------------- 1 | """ 2 | @date: 2021/7/5 3 | @description: 4 | """ 5 | import json 6 | import math 7 | import shutil 8 | 9 | import numpy as np 10 | from utils.boundary import * 11 | import dataset 12 | import os 13 | from tqdm import tqdm 14 | from PIL import Image 15 | from visualization.boundary import * 16 | from visualization.floorplan import * 17 | from shapely.geometry import Polygon, Point 18 | 19 | 20 | def filter_center(ceil_corners): 21 | xyz = uv2xyz(ceil_corners, plan_y=1.6) 22 | xz = xyz[:, ::2] 23 | poly = Polygon(xz).buffer(-0.01) 24 | return poly.contains(Point(0, 0)) 25 | 26 | 27 | def filter_boundary(corners): 28 | if is_ceil_boundary(corners): 29 | return True 30 | elif is_floor_boundary(corners): 31 | return True 32 | else: 33 | # An intersection occurs and an exception is considered 34 | return False 35 | 36 | 37 | def filter_self_intersection(corners): 38 | xz = uv2xyz(corners)[:, ::2] 39 | poly = Polygon(xz) 40 | return poly.is_valid 41 | 42 | 43 | def filter_dataset(dataset, show=False, output_dir=None): 44 | if output_dir is None: 45 | output_dir = os.path.join(dataset.root_dir, dataset.mode) 46 | output_img_dir = os.path.join(output_dir, 'img_align') 47 | output_label_dir = os.path.join(output_dir, 'label_cor_align') 48 | else: 49 | output_dir = os.path.join(output_dir, dataset.mode) 50 | output_img_dir = os.path.join(output_dir, 'img') 51 | output_label_dir = os.path.join(output_dir, 'label_cor') 52 | 53 | if not os.path.exists(output_img_dir): 54 | os.makedirs(output_img_dir) 55 | 56 | if not os.path.exists(output_label_dir): 57 | os.makedirs(output_label_dir) 58 | 59 | bar = tqdm(dataset, total=len(dataset)) 60 | for data in bar: 61 | name = data['name'] 62 | bar.set_description(f"Processing {name}") 63 | img = data['img'] 64 | corners = data['corners'] 65 | 66 | if not filter_center(corners[1::2]): 67 | if show: 68 | draw_boundaries(img, corners_list=[corners[0::2], corners[1::2]], show=True) 69 | if not os.path.exists(data['img_path']): 70 | print("already remove") 71 | else: 72 | print(f"move {name}") 73 | shutil.move(data['img_path'], os.path.join(output_img_dir, os.path.basename(data['img_path']))) 74 | shutil.move(data['label_path'], os.path.join(output_label_dir, os.path.basename(data['label_path']))) 75 | 76 | 77 | def execute_filter_dataset(root_dir, dataset_name="PanoS2D3DDataset", modes=None, output_dir=None): 78 | if modes is None: 79 | modes = ["train", "test", "valid"] 80 | 81 | for mode in modes: 82 | print("mode: {}".format(mode)) 83 | 84 | filter_dataset(getattr(dataset, dataset_name)(root_dir, mode), show=False, output_dir=output_dir) 85 | 86 | 87 | if __name__ == '__main__': 88 | execute_filter_dataset(root_dir='/root/data/hd/hnet_dataset', 89 | dataset_name="PanoS2D3DDataset", modes=['train', "test", "valid"], 90 | output_dir='/root/data/hd/hnet_dataset_close') 91 | -------------------------------------------------------------------------------- /models/build.py: -------------------------------------------------------------------------------- 1 | """ 2 | @Date: 2021/07/18 3 | @description: 4 | """ 5 | import os 6 | import models 7 | import torch.distributed as dist 8 | import torch 9 | 10 | from torch.nn import init 11 | from torch.optim import lr_scheduler 12 | from utils.time_watch import TimeWatch 13 | from models.other.optimizer import build_optimizer 14 | from models.other.criterion import build_criterion 15 | 16 | 17 | def build_model(config, logger): 18 | name = config.MODEL.NAME 19 | w = TimeWatch(f"Build model: {name}", logger) 20 | 21 | ddp = config.WORLD_SIZE > 1 22 | if ddp: 23 | logger.info(f"use ddp") 24 | dist.init_process_group("nccl", init_method='tcp://127.0.0.1:23456', rank=config.LOCAL_RANK, 25 | world_size=config.WORLD_SIZE) 26 | 27 | device = config.TRAIN.DEVICE 28 | logger.info(f"Creating model: {name} to device:{device}, args:{config.MODEL.ARGS[0]}") 29 | 30 | net = getattr(models, name) 31 | ckpt_dir = os.path.abspath(os.path.join(config.CKPT.DIR, os.pardir)) if config.DEBUG else config.CKPT.DIR 32 | if len(config.MODEL.ARGS) != 0: 33 | model = net(ckpt_dir=ckpt_dir, **config.MODEL.ARGS[0]) 34 | else: 35 | model = net(ckpt_dir=ckpt_dir) 36 | logger.info(f'model dropout: {model.dropout_d}') 37 | model = model.to(device) 38 | optimizer = None 39 | scheduler = None 40 | 41 | if config.MODE == 'train': 42 | optimizer = build_optimizer(config, model, logger) 43 | 44 | config.defrost() 45 | config.TRAIN.START_EPOCH = model.load(device, logger, optimizer, best=config.MODE != 'train' or not config.TRAIN.RESUME_LAST) 46 | config.freeze() 47 | 48 | if config.MODE == 'train' and len(config.MODEL.FINE_TUNE) > 0: 49 | for param in model.parameters(): 50 | param.requires_grad = False 51 | for layer in config.MODEL.FINE_TUNE: 52 | logger.info(f'Fine-tune: {layer}') 53 | getattr(model, layer).requires_grad_(requires_grad=True) 54 | getattr(model, layer).reset_parameters() 55 | 56 | model.show_parameter_number(logger) 57 | 58 | if config.MODE == 'train': 59 | if len(config.TRAIN.LR_SCHEDULER.NAME) > 0: 60 | if 'last_epoch' not in config.TRAIN.LR_SCHEDULER.ARGS[0].keys(): 61 | config.TRAIN.LR_SCHEDULER.ARGS[0]['last_epoch'] = config.TRAIN.START_EPOCH - 1 62 | 63 | scheduler = getattr(lr_scheduler, config.TRAIN.LR_SCHEDULER.NAME)(optimizer=optimizer, 64 | **config.TRAIN.LR_SCHEDULER.ARGS[0]) 65 | logger.info(f"Use scheduler: name:{config.TRAIN.LR_SCHEDULER.NAME} args: {config.TRAIN.LR_SCHEDULER.ARGS[0]}") 66 | logger.info(f"Current scheduler last lr: {scheduler.get_last_lr()}") 67 | else: 68 | scheduler = None 69 | 70 | if config.AMP_OPT_LEVEL != "O0" and 'cuda' in device: 71 | import apex 72 | logger.info(f"use amp:{config.AMP_OPT_LEVEL}") 73 | model, optimizer = apex.amp.initialize(model, optimizer, opt_level=config.AMP_OPT_LEVEL, verbosity=0) 74 | if ddp: 75 | model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[config.TRAIN.DEVICE], 76 | broadcast_buffers=True) # use rank:0 bn 77 | 78 | criterion = build_criterion(config, logger) 79 | if optimizer is not None: 80 | logger.info(f"Finally lr: {optimizer.param_groups[0]['lr']}") 81 | return model, optimizer, criterion, scheduler 82 | -------------------------------------------------------------------------------- /visualization/grad.py: -------------------------------------------------------------------------------- 1 | """ 2 | @Date: 2021/11/06 3 | @description: 4 | """ 5 | import cv2 6 | import numpy as np 7 | import torch 8 | import matplotlib.pyplot as plt 9 | 10 | from utils.conversion import depth2xyz 11 | 12 | 13 | def convert_img(value, h, need_nor=True, cmap=None): 14 | value = value.clone().detach().cpu().numpy()[None] 15 | if need_nor: 16 | value -= value.min() 17 | value /= value.max() - value.min() 18 | grad_img = value.repeat(int(h), axis=0) 19 | 20 | if cmap is None: 21 | grad_img = grad_img[..., np.newaxis].repeat(3, axis=-1) 22 | elif cmap == cv2.COLORMAP_PLASMA: 23 | grad_img = cv2.applyColorMap((grad_img * 255).astype(np.uint8), colormap=cmap) 24 | grad_img = grad_img[..., ::-1] 25 | grad_img = grad_img.astype(np.float) / 255.0 26 | elif cmap == 'HSV': 27 | grad_img = np.round(grad_img * 1000) / 1000.0 28 | grad_img = grad_img[..., np.newaxis].repeat(3, axis=-1) 29 | grad_img[..., 0] = grad_img[..., 0] * 180 30 | grad_img[..., 1] = 255 31 | grad_img[..., 2] = 255 32 | grad_img = grad_img.astype(np.uint8) 33 | grad_img = cv2.cvtColor(grad_img, cv2.COLOR_HSV2RGB) 34 | grad_img = grad_img.astype(np.float) / 255.0 35 | return grad_img 36 | 37 | 38 | def show_grad(depth, grad_conv, h=5, show=False): 39 | """ 40 | :param h: 41 | :param depth: [patch_num] 42 | :param grad_conv: 43 | :param show: 44 | :return: 45 | """ 46 | 47 | direction, angle, grad = get_all(depth[None], grad_conv) 48 | 49 | # depth_img = convert_img(depth, h) 50 | # angle_img = convert_img(angle[0], h) 51 | # grad_img = convert_img(grad[0], depth.shape[-1] // 4 - h * 2) 52 | depth_img = convert_img(depth, h, cmap=cv2.COLORMAP_PLASMA) 53 | angle_img = convert_img(angle[0], h, cmap='HSV') 54 | 55 | # vis_grad = grad[0] / grad[0].max() / 2 + 0.5 56 | grad_img = convert_img(grad[0], h) 57 | img = np.concatenate([depth_img, angle_img, grad_img], axis=0) 58 | if show: 59 | plt.imshow(img) 60 | plt.show() 61 | return img 62 | 63 | 64 | def get_grad(direction): 65 | """ 66 | :param direction: [b patch_num] 67 | :return:[b patch_num] 68 | """ 69 | a = torch.roll(direction, -1, dims=1) # xz[i+1] 70 | b = torch.roll(direction, 1, dims=1) # xz[i-1] 71 | grad = torch.acos(torch.clip(a[..., 0] * b[..., 0] + a[..., 1] * b[..., 1], -1+1e-6, 1-1e-6)) 72 | return grad 73 | 74 | 75 | def get_grad2(angle, grad_conv): 76 | """ 77 | :param angle: [b patch_num] 78 | :param grad_conv: 79 | :return:[b patch_num] 80 | """ 81 | angle = torch.sin(angle) 82 | angle = angle + 1 83 | 84 | angle = torch.cat([angle[..., -1:], angle, angle[..., :1]], dim=-1) 85 | grad = grad_conv(angle[:, None]) # [b, patch_num] -> [b, 1, patch_num] 86 | # grad = torch.abs(grad) 87 | return grad.reshape(angle.shape[0], -1) 88 | 89 | 90 | def get_edge_angle(direction): 91 | """ 92 | :param direction: [b patch_num 2] 93 | :return: 94 | """ 95 | angle = torch.atan2(direction[..., 1], direction[..., 0]) 96 | return angle 97 | 98 | 99 | def get_edge_direction(depth): 100 | xz = depth2xyz(depth)[..., ::2] 101 | direction = torch.roll(xz, -1, dims=1) - xz # direct[i] = xz[i+1] - xz[i] 102 | direction = direction / direction.norm(p=2, dim=-1)[..., None] 103 | return direction 104 | 105 | 106 | def get_all(depth, grad_conv): 107 | """ 108 | 109 | :param grad_conv: 110 | :param depth: [b patch_num] 111 | :return: 112 | """ 113 | direction = get_edge_direction(depth) 114 | angle = get_edge_angle(direction) 115 | # angle_grad = get_grad(direction) 116 | angle_grad = get_grad2(angle, grad_conv) # signed gradient 117 | return direction, angle, angle_grad 118 | -------------------------------------------------------------------------------- /models/modules/conv_transformer.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn.functional as F 3 | 4 | from torch import nn, einsum 5 | from einops import rearrange 6 | 7 | 8 | class PreNorm(nn.Module): 9 | def __init__(self, dim, fn): 10 | super().__init__() 11 | self.norm = nn.LayerNorm(dim) 12 | self.fn = fn 13 | 14 | def forward(self, x, **kwargs): 15 | return self.fn(self.norm(x), **kwargs) 16 | 17 | 18 | class GELU(nn.Module): 19 | def forward(self, input): 20 | return F.gelu(input) 21 | 22 | 23 | class Attend(nn.Module): 24 | 25 | def __init__(self, dim=None): 26 | super().__init__() 27 | self.dim = dim 28 | 29 | def forward(self, input): 30 | return F.softmax(input, dim=self.dim, dtype=input.dtype) 31 | 32 | 33 | class FeedForward(nn.Module): 34 | def __init__(self, dim, hidden_dim, dropout=0.): 35 | super().__init__() 36 | self.net = nn.Sequential( 37 | nn.Linear(dim, hidden_dim), 38 | GELU(), 39 | nn.Dropout(dropout), 40 | nn.Linear(hidden_dim, dim), 41 | nn.Dropout(dropout) 42 | ) 43 | 44 | def forward(self, x): 45 | return self.net(x) 46 | 47 | 48 | class Attention(nn.Module): 49 | def __init__(self, dim, heads=8, dim_head=64, dropout=0.): 50 | super().__init__() 51 | inner_dim = dim_head * heads 52 | project_out = not (heads == 1 and dim_head == dim) 53 | 54 | self.heads = heads 55 | self.scale = dim_head ** -0.5 56 | 57 | self.attend = Attend(dim=-1) 58 | self.to_qkv = nn.Linear(dim, inner_dim * 3, bias=False) 59 | 60 | self.to_out = nn.Sequential( 61 | nn.Linear(inner_dim, dim), 62 | nn.Dropout(dropout) 63 | ) if project_out else nn.Identity() 64 | 65 | def forward(self, x): 66 | b, n, _, h = *x.shape, self.heads 67 | qkv = self.to_qkv(x).chunk(3, dim=-1) 68 | q, k, v = map(lambda t: rearrange(t, 'b n (h d) -> b h n d', h=h), qkv) 69 | dots = einsum('b h i d, b h j d -> b h i j', q, k) * self.scale 70 | attn = self.attend(dots) 71 | out = einsum('b h i j, b h j d -> b h i d', attn, v) 72 | out = rearrange(out, 'b h n d -> b n (h d)') 73 | return self.to_out(out) 74 | 75 | 76 | class Conv(nn.Module): 77 | def __init__(self, dim, dropout=0.): 78 | super().__init__() 79 | self.dim = dim 80 | self.net = nn.Sequential( 81 | nn.Conv1d(dim, dim, kernel_size=3, stride=1, padding=0), 82 | nn.Dropout(dropout) 83 | ) 84 | 85 | def forward(self, x): 86 | x = x.transpose(1, 2) 87 | x = torch.cat([x[..., -1:], x, x[..., :1]], dim=-1) 88 | x = self.net(x) 89 | return x.transpose(1, 2) 90 | 91 | 92 | class ConvTransformer(nn.Module): 93 | def __init__(self, dim, depth, heads, dim_head, mlp_dim, dropout=0.): 94 | super().__init__() 95 | self.layers = nn.ModuleList([]) 96 | for _ in range(depth): 97 | self.layers.append(nn.ModuleList([ 98 | PreNorm(dim, Attention(dim, heads=heads, dim_head=dim_head, dropout=dropout)), 99 | PreNorm(dim, FeedForward(dim, mlp_dim, dropout=dropout)), 100 | PreNorm(dim, Conv(dim, dropout=dropout)) 101 | ])) 102 | 103 | def forward(self, x): 104 | for attn, ff, cov in self.layers: 105 | x = attn(x) + x 106 | x = ff(x) + x 107 | x = cov(x) + x 108 | return x 109 | 110 | 111 | if __name__ == '__main__': 112 | token_dim = 1024 113 | toke_len = 256 114 | 115 | transformer = ConvTransformer(dim=token_dim, 116 | depth=6, 117 | heads=16, 118 | dim_head=64, 119 | mlp_dim=2048, 120 | dropout=0.1) 121 | 122 | total = sum(p.numel() for p in transformer.parameters()) 123 | trainable = sum(p.numel() for p in transformer.parameters() if p.requires_grad) 124 | print('parameter total:{:,}, trainable:{:,}'.format(total, trainable)) 125 | 126 | input = torch.randn(1, toke_len, token_dim) 127 | output = transformer(input) 128 | print(output.shape) 129 | -------------------------------------------------------------------------------- /dataset/mp3d_dataset.py: -------------------------------------------------------------------------------- 1 | """ 2 | @date: 2021/6/25 3 | @description: 4 | """ 5 | import os 6 | import json 7 | 8 | from dataset.communal.read import read_image, read_label 9 | from dataset.communal.base_dataset import BaseDataset 10 | from utils.logger import get_logger 11 | 12 | 13 | class MP3DDataset(BaseDataset): 14 | def __init__(self, root_dir, mode, shape=None, max_wall_num=0, aug=None, camera_height=1.6, logger=None, 15 | split_list=None, patch_num=256, keys=None, for_test_index=None): 16 | super().__init__(mode, shape, max_wall_num, aug, camera_height, patch_num, keys) 17 | 18 | if logger is None: 19 | logger = get_logger() 20 | self.root_dir = root_dir 21 | 22 | split_dir = os.path.join(root_dir, 'split') 23 | label_dir = os.path.join(root_dir, 'label') 24 | img_dir = os.path.join(root_dir, 'image') 25 | 26 | if split_list is None: 27 | with open(os.path.join(split_dir, f"{mode}.txt"), 'r') as f: 28 | split_list = [x.rstrip().split() for x in f] 29 | 30 | split_list.sort() 31 | if for_test_index is not None: 32 | split_list = split_list[:for_test_index] 33 | 34 | self.data = [] 35 | invalid_num = 0 36 | for name in split_list: 37 | name = "_".join(name) 38 | img_path = os.path.join(img_dir, f"{name}.png") 39 | label_path = os.path.join(label_dir, f"{name}.json") 40 | 41 | if not os.path.exists(img_path): 42 | logger.warning(f"{img_path} not exists") 43 | invalid_num += 1 44 | continue 45 | if not os.path.exists(label_path): 46 | logger.warning(f"{label_path} not exists") 47 | invalid_num += 1 48 | continue 49 | 50 | with open(label_path, 'r') as f: 51 | label = json.load(f) 52 | 53 | if self.max_wall_num >= 10: 54 | if label['layoutWalls']['num'] < self.max_wall_num: 55 | invalid_num += 1 56 | continue 57 | elif self.max_wall_num != 0 and label['layoutWalls']['num'] != self.max_wall_num: 58 | invalid_num += 1 59 | continue 60 | 61 | # print(label['layoutWalls']['num']) 62 | self.data.append([img_path, label_path]) 63 | 64 | logger.info( 65 | f"Build dataset mode: {self.mode} max_wall_num: {self.max_wall_num} valid: {len(self.data)} invalid: {invalid_num}") 66 | 67 | def __getitem__(self, idx): 68 | rgb_path, label_path = self.data[idx] 69 | label = read_label(label_path, data_type='MP3D') 70 | image = read_image(rgb_path, self.shape) 71 | output = self.process_data(label, image, self.patch_num) 72 | return output 73 | 74 | 75 | if __name__ == "__main__": 76 | import numpy as np 77 | from PIL import Image 78 | 79 | from tqdm import tqdm 80 | from visualization.boundary import draw_boundaries 81 | from visualization.floorplan import draw_floorplan 82 | from utils.boundary import depth2boundaries 83 | from utils.conversion import uv2xyz 84 | 85 | modes = ['test', 'val'] 86 | for i in range(1): 87 | for mode in modes: 88 | print(mode) 89 | mp3d_dataset = MP3DDataset(root_dir='../src/dataset/mp3d', mode=mode, aug={ 90 | 'STRETCH': True, 91 | 'ROTATE': True, 92 | 'FLIP': True, 93 | 'GAMMA': True 94 | }) 95 | save_dir = f'../src/dataset/mp3d/visualization/{mode}' 96 | if not os.path.isdir(save_dir): 97 | os.makedirs(save_dir) 98 | 99 | bar = tqdm(mp3d_dataset, ncols=100) 100 | for data in bar: 101 | bar.set_description(f"Processing {data['id']}") 102 | boundary_list = depth2boundaries(data['ratio'], data['depth'], step=None) 103 | pano_img = draw_boundaries(data['image'].transpose(1, 2, 0), boundary_list=boundary_list, show=True) 104 | Image.fromarray((pano_img * 255).astype(np.uint8)).save( 105 | os.path.join(save_dir, f"{data['id']}_boundary.png")) 106 | 107 | floorplan = draw_floorplan(uv2xyz(boundary_list[0])[..., ::2], show=True, 108 | marker_color=None, center_color=0.8, show_radius=None) 109 | Image.fromarray((floorplan.squeeze() * 255).astype(np.uint8)).save( 110 | os.path.join(save_dir, f"{data['id']}_floorplan.png")) 111 | -------------------------------------------------------------------------------- /dataset/build.py: -------------------------------------------------------------------------------- 1 | """ 2 | @Date: 2021/07/18 3 | @description: 4 | """ 5 | import numpy as np 6 | import torch.utils.data 7 | from dataset.mp3d_dataset import MP3DDataset 8 | from dataset.pano_s2d3d_dataset import PanoS2D3DDataset 9 | from dataset.pano_s2d3d_mix_dataset import PanoS2D3DMixDataset 10 | from dataset.zind_dataset import ZindDataset 11 | 12 | 13 | def build_loader(config, logger): 14 | name = config.DATA.DATASET 15 | ddp = config.WORLD_SIZE > 1 16 | train_dataset = None 17 | train_data_loader = None 18 | if config.MODE == 'train': 19 | train_dataset = build_dataset(mode='train', config=config, logger=logger) 20 | 21 | val_dataset = build_dataset(mode=config.VAL_NAME if config.MODE != 'test' else 'test', config=config, logger=logger) 22 | 23 | train_sampler = None 24 | val_sampler = None 25 | if ddp: 26 | if train_dataset: 27 | train_sampler = torch.utils.data.DistributedSampler(train_dataset, shuffle=True) 28 | val_sampler = torch.utils.data.DistributedSampler(val_dataset, shuffle=False) 29 | 30 | batch_size = config.DATA.BATCH_SIZE 31 | num_workers = 0 if config.DEBUG else config.DATA.NUM_WORKERS 32 | pin_memory = config.DATA.PIN_MEMORY 33 | if train_dataset: 34 | logger.info(f'Train data loader batch size: {batch_size}') 35 | train_data_loader = torch.utils.data.DataLoader( 36 | train_dataset, sampler=train_sampler, 37 | batch_size=batch_size, 38 | shuffle=True, 39 | num_workers=num_workers, 40 | pin_memory=pin_memory, 41 | drop_last=True, 42 | ) 43 | batch_size = batch_size - (len(val_dataset) % np.arange(batch_size, 0, -1)).tolist().index(0) 44 | logger.info(f'Val data loader batch size: {batch_size}') 45 | val_data_loader = torch.utils.data.DataLoader( 46 | val_dataset, sampler=val_sampler, 47 | batch_size=batch_size, 48 | shuffle=False, 49 | num_workers=num_workers, 50 | pin_memory=pin_memory, 51 | drop_last=False 52 | ) 53 | logger.info(f'Build data loader: num_workers:{num_workers} pin_memory:{pin_memory}') 54 | return train_data_loader, val_data_loader 55 | 56 | 57 | def build_dataset(mode, config, logger): 58 | name = config.DATA.DATASET 59 | if name == 'mp3d': 60 | dataset = MP3DDataset( 61 | root_dir=config.DATA.DIR, 62 | mode=mode, 63 | shape=config.DATA.SHAPE, 64 | max_wall_num=config.DATA.WALL_NUM, 65 | aug=config.DATA.AUG if mode == 'train' else None, 66 | camera_height=config.DATA.CAMERA_HEIGHT, 67 | logger=logger, 68 | for_test_index=config.DATA.FOR_TEST_INDEX, 69 | keys=config.DATA.KEYS 70 | ) 71 | elif name == 'pano_s2d3d': 72 | dataset = PanoS2D3DDataset( 73 | root_dir=config.DATA.DIR, 74 | mode=mode, 75 | shape=config.DATA.SHAPE, 76 | max_wall_num=config.DATA.WALL_NUM, 77 | aug=config.DATA.AUG if mode == 'train' else None, 78 | camera_height=config.DATA.CAMERA_HEIGHT, 79 | logger=logger, 80 | for_test_index=config.DATA.FOR_TEST_INDEX, 81 | subset=config.DATA.SUBSET, 82 | keys=config.DATA.KEYS 83 | ) 84 | elif name == 'pano_s2d3d_mix': 85 | dataset = PanoS2D3DMixDataset( 86 | root_dir=config.DATA.DIR, 87 | mode=mode, 88 | shape=config.DATA.SHAPE, 89 | max_wall_num=config.DATA.WALL_NUM, 90 | aug=config.DATA.AUG if mode == 'train' else None, 91 | camera_height=config.DATA.CAMERA_HEIGHT, 92 | logger=logger, 93 | for_test_index=config.DATA.FOR_TEST_INDEX, 94 | subset=config.DATA.SUBSET, 95 | keys=config.DATA.KEYS 96 | ) 97 | elif name == 'zind': 98 | dataset = ZindDataset( 99 | root_dir=config.DATA.DIR, 100 | mode=mode, 101 | shape=config.DATA.SHAPE, 102 | max_wall_num=config.DATA.WALL_NUM, 103 | aug=config.DATA.AUG if mode == 'train' else None, 104 | camera_height=config.DATA.CAMERA_HEIGHT, 105 | logger=logger, 106 | for_test_index=config.DATA.FOR_TEST_INDEX, 107 | is_simple=True, 108 | is_ceiling_flat=False, 109 | keys=config.DATA.KEYS, 110 | vp_align=config.EVAL.POST_PROCESSING is not None and 'manhattan' in config.EVAL.POST_PROCESSING 111 | ) 112 | else: 113 | raise NotImplementedError(f"Unknown dataset: {name}") 114 | 115 | return dataset 116 | -------------------------------------------------------------------------------- /dataset/pano_s2d3d_dataset.py: -------------------------------------------------------------------------------- 1 | """ 2 | @date: 2021/6/16 3 | @description: 4 | """ 5 | import math 6 | import os 7 | import numpy as np 8 | 9 | from dataset.communal.read import read_image, read_label 10 | from dataset.communal.base_dataset import BaseDataset 11 | from utils.logger import get_logger 12 | 13 | 14 | class PanoS2D3DDataset(BaseDataset): 15 | def __init__(self, root_dir, mode, shape=None, max_wall_num=0, aug=None, camera_height=1.6, logger=None, 16 | split_list=None, patch_num=256, keys=None, for_test_index=None, subset=None): 17 | super().__init__(mode, shape, max_wall_num, aug, camera_height, patch_num, keys) 18 | 19 | if logger is None: 20 | logger = get_logger() 21 | self.root_dir = root_dir 22 | 23 | if mode is None: 24 | return 25 | label_dir = os.path.join(root_dir, 'valid' if mode == 'val' else mode, 'label_cor') 26 | img_dir = os.path.join(root_dir, 'valid' if mode == 'val' else mode, 'img') 27 | 28 | if split_list is None: 29 | split_list = [name.split('.')[0] for name in os.listdir(label_dir) if 30 | not name.startswith('.') and name.endswith('txt')] 31 | 32 | split_list.sort() 33 | 34 | assert subset == 'pano' or subset == 's2d3d' or subset is None, 'error subset' 35 | if subset == 'pano': 36 | split_list = [name for name in split_list if 'pano_' in name] 37 | logger.info(f"Use PanoContext Dataset") 38 | elif subset == 's2d3d': 39 | split_list = [name for name in split_list if 'camera_' in name] 40 | logger.info(f"Use Stanford2D3D Dataset") 41 | 42 | if for_test_index is not None: 43 | split_list = split_list[:for_test_index] 44 | 45 | self.data = [] 46 | invalid_num = 0 47 | for name in split_list: 48 | img_path = os.path.join(img_dir, f"{name}.png") 49 | label_path = os.path.join(label_dir, f"{name}.txt") 50 | 51 | if not os.path.exists(img_path): 52 | logger.warning(f"{img_path} not exists") 53 | invalid_num += 1 54 | continue 55 | if not os.path.exists(label_path): 56 | logger.warning(f"{label_path} not exists") 57 | invalid_num += 1 58 | continue 59 | 60 | with open(label_path, 'r') as f: 61 | lines = [line for line in f.readlines() if 62 | len([c for c in line.split(' ') if c[0].isnumeric()]) > 1] 63 | if len(lines) % 2 != 0: 64 | invalid_num += 1 65 | continue 66 | self.data.append([img_path, label_path]) 67 | 68 | logger.info( 69 | f"Build dataset mode: {self.mode} valid: {len(self.data)} invalid: {invalid_num}") 70 | 71 | def __getitem__(self, idx): 72 | rgb_path, label_path = self.data[idx] 73 | label = read_label(label_path, data_type='Pano_S2D3D') 74 | image = read_image(rgb_path, self.shape) 75 | output = self.process_data(label, image, self.patch_num) 76 | return output 77 | 78 | 79 | if __name__ == '__main__': 80 | 81 | modes = ['test', 'val', 'train'] 82 | for i in range(1): 83 | for mode in modes: 84 | print(mode) 85 | mp3d_dataset = PanoS2D3DDataset(root_dir='../src/dataset/pano_s2d3d', mode=mode, aug={ 86 | # 'STRETCH': True, 87 | # 'ROTATE': True, 88 | # 'FLIP': True, 89 | # 'GAMMA': True 90 | }) 91 | continue 92 | save_dir = f'../src/dataset/pano_s2d3d/visualization/{mode}' 93 | if not os.path.isdir(save_dir): 94 | os.makedirs(save_dir) 95 | 96 | bar = tqdm(mp3d_dataset, ncols=100) 97 | for data in bar: 98 | bar.set_description(f"Processing {data['id']}") 99 | boundary_list = depth2boundaries(data['ratio'], data['depth'], step=None) 100 | pano_img = draw_boundaries(data['image'].transpose(1, 2, 0), boundary_list=boundary_list, show=False) 101 | Image.fromarray((pano_img * 255).astype(np.uint8)).save( 102 | os.path.join(save_dir, f"{data['id']}_boundary.png")) 103 | 104 | floorplan = draw_floorplan(uv2xyz(boundary_list[0])[..., ::2], show=False, 105 | marker_color=None, center_color=0.8, show_radius=None) 106 | Image.fromarray((floorplan.squeeze() * 255).astype(np.uint8)).save( 107 | os.path.join(save_dir, f"{data['id']}_floorplan.png")) 108 | -------------------------------------------------------------------------------- /utils/height.py: -------------------------------------------------------------------------------- 1 | """ 2 | @date: 2021/6/30 3 | @description: 4 | """ 5 | import numpy as np 6 | from typing import List 7 | 8 | from utils.boundary import * 9 | from scipy.optimize import least_squares 10 | from functools import partial 11 | 12 | 13 | def lsq_fit(ceil_norm, floor_norm): 14 | """ 15 | Least Squares 16 | :param ceil_norm: 17 | :param floor_norm: 18 | :return: 19 | """ 20 | 21 | def error_fun(ratio, ceil_norm, floor_norm): 22 | error = np.abs(ratio * ceil_norm - floor_norm) 23 | return error 24 | 25 | init_ratio = np.mean(floor_norm / ceil_norm, axis=-1) 26 | error_func = partial(error_fun, ceil_norm=ceil_norm, floor_norm=floor_norm) 27 | ret = least_squares(error_func, init_ratio, verbose=0) 28 | ratio = ret.x[0] 29 | return ratio 30 | 31 | 32 | def mean_percentile_fit(ceil_norm, floor_norm, p1=25, p2=75): 33 | """ 34 | :param ceil_norm: 35 | :param floor_norm: 36 | :param p1: 37 | :param p2: 38 | :return: 39 | """ 40 | ratio = floor_norm / ceil_norm 41 | r_min = np.percentile(ratio, p1) 42 | r_max = np.percentile(ratio, p2) 43 | return ratio[(r_min <= ratio) & (ratio <= r_max)].mean() 44 | 45 | 46 | def calc_ceil_ratio(boundaries: List[np.array], mode='lsq'): 47 | """ 48 | :param boundaries: [ [[cu1, cv1], [cu2, cv2], ...], [[fu1, fv1], [fu2, fv2], ...] ] 49 | :param mode: 'lsq' or 'mean' 50 | :return: 51 | """ 52 | assert len(boundaries[0].shape) < 4 and len(boundaries[1].shape) < 4, 'error shape' 53 | if not is_normal_layout(boundaries): 54 | return 0 55 | 56 | ceil_boundary = boundaries[0] 57 | floor_boundary = boundaries[1] 58 | assert ceil_boundary.shape[-2] == floor_boundary.shape[-2], "boundary need same length" 59 | 60 | ceil_xyz = uv2xyz(ceil_boundary, -1) 61 | floor_xyz = uv2xyz(floor_boundary, 1) 62 | 63 | ceil_xz = ceil_xyz[..., ::2] 64 | floor_xz = floor_xyz[..., ::2] 65 | 66 | ceil_norm = np.linalg.norm(ceil_xz, axis=-1) 67 | floor_norm = np.linalg.norm(floor_xz, axis=-1) 68 | 69 | if mode == "lsq": 70 | if len(ceil_norm.shape) == 2: 71 | ratio = np.array([lsq_fit(ceil_norm[i], floor_norm[i]) for i in range(ceil_norm.shape[0])]) 72 | else: 73 | ratio = lsq_fit(ceil_norm, floor_norm) 74 | else: 75 | if len(ceil_norm.shape) == 2: 76 | ratio = np.array([mean_percentile_fit(ceil_norm[i], floor_norm[i]) for i in range(ceil_norm.shape[0])]) 77 | else: 78 | ratio = mean_percentile_fit(ceil_norm, floor_norm) 79 | 80 | return ratio 81 | 82 | 83 | def calc_ceil_height(boundaries: List[np.array], camera_height=1.6, mode='lsq') -> float: 84 | """ 85 | :param boundaries: [ [[cu1, cv1], [cu2, cv2], ...], [[fu1, fv1], [fu2, fv2], ...] ] 86 | :param camera_height: 87 | :param mode: 88 | :return: 89 | """ 90 | ratio = calc_ceil_ratio(boundaries, mode) 91 | ceil_height = camera_height * ratio 92 | return ceil_height 93 | 94 | 95 | def calc_room_height(boundaries: List[np.array], camera_height=1.6, mode='lsq') -> float: 96 | """ 97 | :param boundaries: also can corners,format: [ [[cu1, cv1], [cu2, cv2], ...], [[fu1, fv1], [fu2, fv2], ...] ], 98 | 0 denotes ceil, 1 denotes floor 99 | :param camera_height: actual camera height determines the scale 100 | :param mode: fitting method lsq or mean 101 | :return: 102 | """ 103 | ceil_height = calc_ceil_height(boundaries, camera_height, mode) 104 | room_height = camera_height + ceil_height 105 | return room_height 106 | 107 | 108 | def height2ratio(height, camera_height=1.6): 109 | ceil_height = height - camera_height 110 | ratio = ceil_height / camera_height 111 | return ratio 112 | 113 | 114 | def ratio2height(ratio, camera_height=1.6): 115 | ceil_height = camera_height * ratio 116 | room_height = camera_height + ceil_height 117 | return room_height 118 | 119 | 120 | if __name__ == '__main__': 121 | from dataset.mp3d_dataset import MP3DDataset 122 | 123 | dataset = MP3DDataset(root_dir="../src/dataset/mp3d", mode="train") 124 | for data in dataset: 125 | ceil_corners = data['corners'][::2] 126 | floor_corners = data['corners'][1::2] 127 | # ceil_boundary = corners2boundary(ceil_corners, length=1024) 128 | # floor_boundary = corners2boundary(floor_corners, length=1024) 129 | room_height1 = calc_room_height([ceil_corners, floor_corners], camera_height=1.6, mode='mean') 130 | room_height2 = calc_room_height([ceil_corners, floor_corners], camera_height=1.6, mode='lsq') 131 | print(room_height1, room_height2, data['cameraCeilingHeight'] + 1.6) 132 | -------------------------------------------------------------------------------- /postprocessing/dula/layout_old.py: -------------------------------------------------------------------------------- 1 | """ 2 | @Date: 2021/10/06 3 | @description: Use the approach proposed by DuLa-Net 4 | """ 5 | import cv2 6 | import numpy as np 7 | import math 8 | import matplotlib.pyplot as plt 9 | 10 | from visualization.floorplan import draw_floorplan 11 | 12 | 13 | def merge_near(lst, diag): 14 | group = [[0, ]] 15 | for i in range(1, len(lst)): 16 | if lst[i] - np.mean(group[-1]) < diag * 0.02: 17 | group[-1].append(lst[i]) 18 | else: 19 | group.append([lst[i], ]) 20 | if len(group) == 1: 21 | group = [lst[0], lst[-1]] 22 | else: 23 | group = [int(np.mean(x)) for x in group] 24 | return group 25 | 26 | 27 | def fit_layout_old(floor_xz, need_cube=False, show=False, block_eps=0.05): 28 | show_radius = np.linalg.norm(floor_xz, axis=-1).max() 29 | side_l = 512 30 | floorplan = draw_floorplan(xz=floor_xz, show_radius=show_radius, show=show, scale=1, side_l=side_l).astype(np.uint8) 31 | center = np.array([side_l / 2, side_l / 2]) 32 | polys = cv2.findContours(floorplan, 1, 2) 33 | if isinstance(polys, tuple): 34 | if len(polys) == 3: 35 | # opencv 3 36 | polys = list(polys[1]) 37 | else: 38 | polys = list(polys[0]) 39 | polys.sort(key=lambda x: cv2.contourArea(x), reverse=True) 40 | poly = polys[0] 41 | sub_x, sub_y, w, h = cv2.boundingRect(poly) 42 | floorplan_sub = floorplan[sub_y:sub_y + h, sub_x:sub_x + w] 43 | sub_center = center - np.array([sub_x, sub_y]) 44 | polys = cv2.findContours(floorplan_sub, 1, 2) 45 | if isinstance(polys, tuple): 46 | if len(polys) == 3: 47 | polys = polys[1] 48 | else: 49 | polys = polys[0] 50 | poly = polys[0] 51 | epsilon = 0.005 * cv2.arcLength(poly, True) 52 | poly = cv2.approxPolyDP(poly, epsilon, True) 53 | 54 | x_lst = [0, ] 55 | y_lst = [0, ] 56 | for i in range(len(poly)): 57 | p1 = poly[i][0] 58 | p2 = poly[(i + 1) % len(poly)][0] 59 | 60 | if (p2[0] - p1[0]) == 0: 61 | slope = 10 62 | else: 63 | slope = abs((p2[1] - p1[1]) / (p2[0] - p1[0])) 64 | 65 | if slope <= 1: 66 | s = int((p1[1] + p2[1]) / 2) 67 | y_lst.append(s) 68 | elif slope > 1: 69 | s = int((p1[0] + p2[0]) / 2) 70 | x_lst.append(s) 71 | 72 | x_lst.append(floorplan_sub.shape[1]) 73 | y_lst.append(floorplan_sub.shape[0]) 74 | x_lst.sort() 75 | y_lst.sort() 76 | 77 | diag = math.sqrt(math.pow(floorplan_sub.shape[1], 2) + math.pow(floorplan_sub.shape[0], 2)) 78 | x_lst = merge_near(x_lst, diag) 79 | y_lst = merge_near(y_lst, diag) 80 | if need_cube and len(x_lst) > 2: 81 | x_lst = [x_lst[0], x_lst[-1]] 82 | if need_cube and len(y_lst) > 2: 83 | y_lst = [y_lst[0], y_lst[-1]] 84 | 85 | ans = np.zeros((floorplan_sub.shape[0], floorplan_sub.shape[1])) 86 | for i in range(len(x_lst) - 1): 87 | for j in range(len(y_lst) - 1): 88 | sample = floorplan_sub[y_lst[j]:y_lst[j + 1], x_lst[i]:x_lst[i + 1]] 89 | score = 0 if sample.size == 0 else sample.mean() 90 | if score >= 0.3: 91 | ans[y_lst[j]:y_lst[j + 1], x_lst[i]:x_lst[i + 1]] = 1 92 | 93 | pred = np.uint8(ans) 94 | pred_polys = cv2.findContours(pred, 1, 3) 95 | if isinstance(pred_polys, tuple): 96 | if len(pred_polys) == 3: 97 | pred_polys = pred_polys[1] 98 | else: 99 | pred_polys = pred_polys[0] 100 | 101 | polygon = [(p[0][1], p[0][0]) for p in pred_polys[0][::-1]] 102 | 103 | v = np.array([p[0] + sub_y for p in polygon]) 104 | u = np.array([p[1] + sub_x for p in polygon]) 105 | # side_l 106 | # v<-----------|o 107 | # | | | 108 | # | ----|----z | side_l 109 | # | | | 110 | # | x \|/ 111 | # |------------u 112 | side_l = floorplan.shape[0] 113 | pred_xz = np.concatenate((u[:, np.newaxis] - side_l // 2, side_l // 2 - v[:, np.newaxis]), axis=1) 114 | 115 | pred_xz = pred_xz * show_radius / (side_l // 2) 116 | if show: 117 | draw_floorplan(pred_xz, show_radius=show_radius, show=show) 118 | return pred_xz 119 | 120 | 121 | if __name__ == '__main__': 122 | from utils.conversion import uv2xyz 123 | 124 | pano_img = np.zeros([512, 1024, 3]) 125 | corners = np.array([[0.1, 0.7], 126 | [0.4, 0.7], 127 | [0.3, 0.6], 128 | [0.6, 0.6], 129 | [0.8, 0.7]]) 130 | xz = uv2xyz(corners)[..., ::2] 131 | draw_floorplan(xz, show=True, marker_color=None, center_color=0.8) 132 | 133 | xz = fit_layout_old(xz) 134 | draw_floorplan(xz, show=True, marker_color=None, center_color=0.8) 135 | -------------------------------------------------------------------------------- /dataset/pano_s2d3d_mix_dataset.py: -------------------------------------------------------------------------------- 1 | """ 2 | @date: 2021/6/16 3 | @description: 4 | """ 5 | 6 | import os 7 | 8 | from dataset.pano_s2d3d_dataset import PanoS2D3DDataset 9 | from utils.logger import get_logger 10 | 11 | 12 | class PanoS2D3DMixDataset(PanoS2D3DDataset): 13 | def __init__(self, root_dir, mode, shape=None, max_wall_num=0, aug=None, camera_height=1.6, logger=None, 14 | split_list=None, patch_num=256, keys=None, for_test_index=None, subset=None): 15 | assert subset == 's2d3d' or subset == 'pano', 'error subset' 16 | super().__init__(root_dir, None, shape, max_wall_num, aug, camera_height, logger, 17 | split_list, patch_num, keys, None, subset) 18 | if logger is None: 19 | logger = get_logger() 20 | self.mode = mode 21 | if mode == 'train': 22 | if subset == 'pano': 23 | s2d3d_train_data = PanoS2D3DDataset(root_dir, 'train', shape, max_wall_num, aug, camera_height, logger, 24 | split_list, patch_num, keys, None, 's2d3d').data 25 | s2d3d_val_data = PanoS2D3DDataset(root_dir, 'val', shape, max_wall_num, aug, camera_height, logger, 26 | split_list, patch_num, keys, None, 's2d3d').data 27 | s2d3d_test_data = PanoS2D3DDataset(root_dir, 'test', shape, max_wall_num, aug, camera_height, logger, 28 | split_list, patch_num, keys, None, 's2d3d').data 29 | s2d3d_all_data = s2d3d_train_data + s2d3d_val_data + s2d3d_test_data 30 | 31 | pano_train_data = PanoS2D3DDataset(root_dir, 'train', shape, max_wall_num, aug, camera_height, logger, 32 | split_list, patch_num, keys, None, 'pano').data 33 | self.data = s2d3d_all_data + pano_train_data 34 | elif subset == 's2d3d': 35 | pano_train_data = PanoS2D3DDataset(root_dir, 'train', shape, max_wall_num, aug, camera_height, logger, 36 | split_list, patch_num, keys, None, 'pano').data 37 | pano_val_data = PanoS2D3DDataset(root_dir, 'val', shape, max_wall_num, aug, camera_height, logger, 38 | split_list, patch_num, keys, None, 'pano').data 39 | pano_test_data = PanoS2D3DDataset(root_dir, 'test', shape, max_wall_num, aug, camera_height, logger, 40 | split_list, patch_num, keys, None, 'pano').data 41 | pano_all_data = pano_train_data + pano_val_data + pano_test_data 42 | 43 | s2d3d_train_data = PanoS2D3DDataset(root_dir, 'train', shape, max_wall_num, aug, camera_height, logger, 44 | split_list, patch_num, keys, None, 's2d3d').data 45 | self.data = pano_all_data + s2d3d_train_data 46 | else: 47 | self.data = PanoS2D3DDataset(root_dir, mode, shape, max_wall_num, aug, camera_height, logger, 48 | split_list, patch_num, keys, None, subset).data 49 | 50 | if for_test_index is not None: 51 | self.data = self.data[:for_test_index] 52 | logger.info(f"Build dataset mode: {self.mode} valid: {len(self.data)}") 53 | 54 | 55 | if __name__ == '__main__': 56 | import numpy as np 57 | from PIL import Image 58 | 59 | from tqdm import tqdm 60 | from visualization.boundary import draw_boundaries 61 | from visualization.floorplan import draw_floorplan 62 | from utils.boundary import depth2boundaries 63 | from utils.conversion import uv2xyz 64 | 65 | modes = ['test', 'val', 'train'] 66 | for i in range(1): 67 | for mode in modes: 68 | print(mode) 69 | mp3d_dataset = PanoS2D3DMixDataset(root_dir='../src/dataset/pano_s2d3d', mode=mode, aug={ 70 | # 'STRETCH': True, 71 | # 'ROTATE': True, 72 | # 'FLIP': True, 73 | # 'GAMMA': True 74 | }, subset='pano') 75 | continue 76 | save_dir = f'../src/dataset/pano_s2d3d/visualization1/{mode}' 77 | if not os.path.isdir(save_dir): 78 | os.makedirs(save_dir) 79 | 80 | bar = tqdm(mp3d_dataset, ncols=100) 81 | for data in bar: 82 | bar.set_description(f"Processing {data['id']}") 83 | boundary_list = depth2boundaries(data['ratio'], data['depth'], step=None) 84 | pano_img = draw_boundaries(data['image'].transpose(1, 2, 0), boundary_list=boundary_list, show=False) 85 | Image.fromarray((pano_img * 255).astype(np.uint8)).save( 86 | os.path.join(save_dir, f"{data['id']}_boundary.png")) 87 | 88 | floorplan = draw_floorplan(uv2xyz(boundary_list[0])[..., ::2], show=False, 89 | marker_color=None, center_color=0.8, show_radius=None) 90 | Image.fromarray((floorplan.squeeze() * 255).astype(np.uint8)).save( 91 | os.path.join(save_dir, f"{data['id']}_floorplan.png")) 92 | -------------------------------------------------------------------------------- /dataset/communal/base_dataset.py: -------------------------------------------------------------------------------- 1 | """ 2 | @Date: 2021/07/26 3 | @description: 4 | """ 5 | import numpy as np 6 | import torch 7 | 8 | from utils.boundary import corners2boundary, visibility_corners, get_heat_map 9 | from utils.conversion import xyz2depth, uv2xyz, uv2pixel 10 | from dataset.communal.data_augmentation import PanoDataAugmentation 11 | 12 | 13 | class BaseDataset(torch.utils.data.Dataset): 14 | def __init__(self, mode, shape=None, max_wall_num=999, aug=None, camera_height=1.6, patch_num=256, keys=None): 15 | if keys is None or len(keys) == 0: 16 | keys = ['image', 'depth', 'ratio', 'id', 'corners'] 17 | if shape is None: 18 | shape = [512, 1024] 19 | 20 | assert mode == 'train' or mode == 'val' or mode == 'test' or mode is None, 'unknown mode!' 21 | self.mode = mode 22 | self.keys = keys 23 | self.shape = shape 24 | self.pano_aug = None if aug is None or mode == 'val' else PanoDataAugmentation(aug) 25 | self.camera_height = camera_height 26 | self.max_wall_num = max_wall_num 27 | self.patch_num = patch_num 28 | self.data = None 29 | 30 | def __len__(self): 31 | return len(self.data) 32 | 33 | @staticmethod 34 | def get_depth(corners, plan_y=1, length=256, visible=True): 35 | visible_floor_boundary = corners2boundary(corners, length=length, visible=visible) 36 | # The horizon-depth relative to plan_y 37 | visible_depth = xyz2depth(uv2xyz(visible_floor_boundary, plan_y), plan_y) 38 | return visible_depth 39 | 40 | def process_data(self, label, image, patch_num): 41 | """ 42 | :param label: 43 | :param image: 44 | :param patch_num: 45 | :return: 46 | """ 47 | corners = label['corners'] 48 | if self.pano_aug is not None: 49 | corners, image = self.pano_aug.execute_aug(corners, image if 'image' in self.keys else None) 50 | eps = 1e-3 51 | corners[:, 1] = np.clip(corners[:, 1], 0.5+eps, 1-eps) 52 | 53 | output = {} 54 | if 'image' in self.keys: 55 | image = image.transpose(2, 0, 1) 56 | output['image'] = image 57 | 58 | visible_corners = None 59 | if 'corner_class' in self.keys or 'depth' in self.keys: 60 | visible_corners = visibility_corners(corners) 61 | 62 | if 'depth' in self.keys: 63 | depth = self.get_depth(visible_corners, length=patch_num, visible=False) 64 | assert len(depth) == patch_num, f"{label['id']}, {len(depth)}, {self.pano_aug.parameters}, {corners}" 65 | output['depth'] = depth 66 | 67 | if 'ratio' in self.keys: 68 | # Why use ratio? Because when floor_height =y_plan=1, we only need to predict ceil_height(ratio). 69 | output['ratio'] = label['ratio'] 70 | 71 | if 'id' in self.keys: 72 | output['id'] = label['id'] 73 | 74 | if 'corners' in self.keys: 75 | # all corners for evaluating Full_IoU 76 | assert len(label['corners']) <= 32, "len(label['corners']):"+len(label['corners']) 77 | output['corners'] = np.zeros((32, 2), dtype=np.float32) 78 | output['corners'][:len(label['corners'])] = label['corners'] 79 | 80 | if 'corner_heat_map' in self.keys: 81 | output['corner_heat_map'] = get_heat_map(visible_corners[..., 0]) 82 | 83 | if 'object' in self.keys and 'objects' in label: 84 | output[f'object_heat_map'] = np.zeros((3, patch_num), dtype=np.float32) 85 | output['object_size'] = np.zeros((3, patch_num), dtype=np.float32) # width, height, bottom_height 86 | for i, type in enumerate(label['objects']): 87 | if len(label['objects'][type]) == 0: 88 | continue 89 | 90 | u_s = [] 91 | for obj in label['objects'][type]: 92 | center_u = obj['center_u'] 93 | u_s.append(center_u) 94 | center_pixel_u = uv2pixel(np.array([center_u]), w=patch_num, axis=0)[0] 95 | output['object_size'][0, center_pixel_u] = obj['width_u'] 96 | output['object_size'][1, center_pixel_u] = obj['height_v'] 97 | output['object_size'][2, center_pixel_u] = obj['boundary_v'] 98 | output[f'object_heat_map'][i] = get_heat_map(np.array(u_s)) 99 | 100 | return output 101 | 102 | 103 | if __name__ == '__main__': 104 | from dataset.communal.read import read_image, read_label 105 | from visualization.boundary import draw_boundaries 106 | from utils.boundary import depth2boundaries 107 | from tqdm import trange 108 | 109 | # np.random.seed(0) 110 | dataset = BaseDataset() 111 | dataset.pano_aug = PanoDataAugmentation(aug={ 112 | 'STRETCH': True, 113 | 'ROTATE': True, 114 | 'FLIP': True, 115 | }) 116 | # pano_img = read_image("../src/demo.png") 117 | # label = read_label("../src/demo.json") 118 | pano_img_path = "../../src/dataset/mp3d/image/yqstnuAEVhm_6589ad7a5a0444b59adbf501c0f0fe53.png" 119 | label_path = "../../src/dataset/mp3d/label/yqstnuAEVhm_6589ad7a5a0444b59adbf501c0f0fe53.json" 120 | pano_img = read_image(pano_img_path) 121 | label = read_label(label_path) 122 | 123 | # batch test 124 | for i in trange(1): 125 | output = dataset.process_data(label, pano_img, 256) 126 | boundary_list = depth2boundaries(output['ratio'], output['depth'], step=None) 127 | draw_boundaries(output['image'].transpose(1, 2, 0), boundary_list=boundary_list, show=True) 128 | -------------------------------------------------------------------------------- /src/demo/demo.json: -------------------------------------------------------------------------------- 1 | { 2 | "cameraHeight": 1.6, 3 | "layoutHeight": 2.9809624004364013, 4 | "layoutObj2ds": { 5 | "num": 0, 6 | "obj2ds": [] 7 | }, 8 | "layoutPoints": { 9 | "num": 6, 10 | "points": [ 11 | { 12 | "coords": [ 13 | 0.7081447345651483, 14 | 0.5 15 | ], 16 | "id": 0, 17 | "xyz": [ 18 | 3.0078125, 19 | 0.0, 20 | -0.8097623087756155 21 | ] 22 | }, 23 | { 24 | "coords": [ 25 | 0.8447738331945455, 26 | 0.5 27 | ], 28 | "id": 0, 29 | "xyz": [ 30 | 3.0078125, 31 | 0.0, 32 | 2.03786496 33 | ] 34 | }, 35 | { 36 | "coords": [ 37 | 0.009142142599636915, 38 | 0.5 39 | ], 40 | "id": 0, 41 | "xyz": [ 42 | -0.1171875, 43 | 0.0, 44 | 2.03786496 45 | ] 46 | }, 47 | { 48 | "coords": [ 49 | 0.02702105153167117, 50 | 0.5 51 | ], 52 | "id": 0, 53 | "xyz": [ 54 | -0.1171875, 55 | 0.0, 56 | 0.68359375 57 | ] 58 | }, 59 | { 60 | "coords": [ 61 | 0.20330907731820486, 62 | 0.5 63 | ], 64 | "id": 0, 65 | "xyz": [ 66 | -2.26292525056, 67 | 0.0, 68 | 0.68359375 69 | ] 70 | }, 71 | { 72 | "coords": [ 73 | 0.304692157890135, 74 | 0.5 75 | ], 76 | "id": 0, 77 | "xyz": [ 78 | -2.26292525056, 79 | 0.0, 80 | -0.8097623087756155 81 | ] 82 | } 83 | ] 84 | }, 85 | "layoutWalls": { 86 | "num": 6, 87 | "walls": [ 88 | { 89 | "id": 0, 90 | "normal": [ 91 | 1.0, 92 | 0.0, 93 | -0.0 94 | ], 95 | "planeEquation": [ 96 | 1.0, 97 | 0.0, 98 | -0.0, 99 | -3.0078125 100 | ], 101 | "pointsIdx": [ 102 | 0, 103 | 1 104 | ], 105 | "width": 2.8476272687756152 106 | }, 107 | { 108 | "id": 0, 109 | "normal": [ 110 | 0.0, 111 | 0.0, 112 | 1.0 113 | ], 114 | "planeEquation": [ 115 | 0.0, 116 | 0.0, 117 | 1.0, 118 | -2.03786496 119 | ], 120 | "pointsIdx": [ 121 | 1, 122 | 2 123 | ], 124 | "width": 3.125 125 | }, 126 | { 127 | "id": 0, 128 | "normal": [ 129 | -1.0, 130 | -0.0, 131 | -0.0 132 | ], 133 | "planeEquation": [ 134 | -1.0, 135 | -0.0, 136 | -0.0, 137 | -0.1171875 138 | ], 139 | "pointsIdx": [ 140 | 2, 141 | 3 142 | ], 143 | "width": 1.3542712099999998 144 | }, 145 | { 146 | "id": 0, 147 | "normal": [ 148 | 0.0, 149 | 0.0, 150 | 1.0 151 | ], 152 | "planeEquation": [ 153 | 0.0, 154 | 0.0, 155 | 1.0, 156 | -0.68359375 157 | ], 158 | "pointsIdx": [ 159 | 3, 160 | 4 161 | ], 162 | "width": 2.14573775056 163 | }, 164 | { 165 | "id": 0, 166 | "normal": [ 167 | -1.0, 168 | -0.0, 169 | -0.0 170 | ], 171 | "planeEquation": [ 172 | -1.0, 173 | -0.0, 174 | -0.0, 175 | -2.26292525056 176 | ], 177 | "pointsIdx": [ 178 | 4, 179 | 5 180 | ], 181 | "width": 1.4933560587756154 182 | }, 183 | { 184 | "id": 0, 185 | "normal": [ 186 | 0.0, 187 | 0.0, 188 | -1.0 189 | ], 190 | "planeEquation": [ 191 | 0.0, 192 | 0.0, 193 | -1.0, 194 | -0.8097623087756155 195 | ], 196 | "pointsIdx": [ 197 | 5, 198 | 0 199 | ], 200 | "width": 5.27073775056 201 | } 202 | ] 203 | }, 204 | "panoId": "nothing" 205 | } -------------------------------------------------------------------------------- /dataset/zind_dataset.py: -------------------------------------------------------------------------------- 1 | """ 2 | @Date: 2021/09/22 3 | @description: 4 | """ 5 | import os 6 | import json 7 | import math 8 | import numpy as np 9 | 10 | from dataset.communal.read import read_image, read_label, read_zind 11 | from dataset.communal.base_dataset import BaseDataset 12 | from utils.logger import get_logger 13 | from preprocessing.filter import filter_center, filter_boundary, filter_self_intersection 14 | from utils.boundary import calc_rotation 15 | 16 | 17 | class ZindDataset(BaseDataset): 18 | def __init__(self, root_dir, mode, shape=None, max_wall_num=0, aug=None, camera_height=1.6, logger=None, 19 | split_list=None, patch_num=256, keys=None, for_test_index=None, 20 | is_simple=True, is_ceiling_flat=False, vp_align=False): 21 | # if keys is None: 22 | # keys = ['image', 'depth', 'ratio', 'id', 'corners', 'corner_heat_map', 'object'] 23 | super().__init__(mode, shape, max_wall_num, aug, camera_height, patch_num, keys) 24 | if logger is None: 25 | logger = get_logger() 26 | self.root_dir = root_dir 27 | self.vp_align = vp_align 28 | 29 | data_dir = os.path.join(root_dir) 30 | img_dir = os.path.join(root_dir, 'image') 31 | 32 | pano_list = read_zind(partition_path=os.path.join(data_dir, f"zind_partition.json"), 33 | simplicity_path=os.path.join(data_dir, f"room_shape_simplicity_labels.json"), 34 | data_dir=data_dir, mode=mode, is_simple=is_simple, is_ceiling_flat=is_ceiling_flat) 35 | 36 | if for_test_index is not None: 37 | pano_list = pano_list[:for_test_index] 38 | if split_list: 39 | pano_list = [pano for pano in pano_list if pano['id'] in split_list] 40 | self.data = [] 41 | invalid_num = 0 42 | for pano in pano_list: 43 | if not os.path.exists(pano['img_path']): 44 | logger.warning(f"{pano['img_path']} not exists") 45 | invalid_num += 1 46 | continue 47 | 48 | if not filter_center(pano['corners']): 49 | # logger.warning(f"{pano['id']} camera center not in layout") 50 | # invalid_num += 1 51 | continue 52 | 53 | if self.max_wall_num >= 10: 54 | if len(pano['corners']) < self.max_wall_num: 55 | invalid_num += 1 56 | continue 57 | elif self.max_wall_num != 0 and len(pano['corners']) != self.max_wall_num: 58 | invalid_num += 1 59 | continue 60 | 61 | if not filter_boundary(pano['corners']): 62 | logger.warning(f"{pano['id']} boundary cross") 63 | invalid_num += 1 64 | continue 65 | 66 | if not filter_self_intersection(pano['corners']): 67 | logger.warning(f"{pano['id']} self_intersection") 68 | invalid_num += 1 69 | continue 70 | 71 | self.data.append(pano) 72 | 73 | logger.info( 74 | f"Build dataset mode: {self.mode} max_wall_num: {self.max_wall_num} valid: {len(self.data)} invalid: {invalid_num}") 75 | 76 | def __getitem__(self, idx): 77 | pano = self.data[idx] 78 | rgb_path = pano['img_path'] 79 | label = pano 80 | image = read_image(rgb_path, self.shape) 81 | 82 | if self.vp_align: 83 | # Equivalent to vanishing point alignment step 84 | rotation = calc_rotation(corners=label['corners']) 85 | shift = math.modf(rotation / (2 * np.pi) + 1)[0] 86 | image = np.roll(image, round(shift * self.shape[1]), axis=1) 87 | label['corners'][:, 0] = np.modf(label['corners'][:, 0] + shift)[0] 88 | 89 | output = self.process_data(label, image, self.patch_num) 90 | return output 91 | 92 | 93 | if __name__ == "__main__": 94 | import numpy as np 95 | from PIL import Image 96 | 97 | from tqdm import tqdm 98 | from visualization.boundary import draw_boundaries, draw_object 99 | from visualization.floorplan import draw_floorplan 100 | from utils.boundary import depth2boundaries, calc_rotation 101 | from utils.conversion import uv2xyz 102 | from models.other.init_env import init_env 103 | 104 | init_env(123) 105 | 106 | modes = ['val'] 107 | for i in range(1): 108 | for mode in modes: 109 | print(mode) 110 | mp3d_dataset = ZindDataset(root_dir='../src/dataset/zind', mode=mode, aug={ 111 | 'STRETCH': False, 112 | 'ROTATE': False, 113 | 'FLIP': False, 114 | 'GAMMA': False 115 | }) 116 | # continue 117 | # save_dir = f'../src/dataset/zind/visualization/{mode}' 118 | # if not os.path.isdir(save_dir): 119 | # os.makedirs(save_dir) 120 | 121 | bar = tqdm(mp3d_dataset, ncols=100) 122 | for data in bar: 123 | # if data['id'] != '1079_pano_18': 124 | # continue 125 | bar.set_description(f"Processing {data['id']}") 126 | boundary_list = depth2boundaries(data['ratio'], data['depth'], step=None) 127 | 128 | pano_img = draw_boundaries(data['image'].transpose(1, 2, 0), boundary_list=boundary_list, show=True) 129 | # Image.fromarray((pano_img * 255).astype(np.uint8)).save( 130 | # os.path.join(save_dir, f"{data['id']}_boundary.png")) 131 | # draw_object(pano_img, heat_maps=data['object_heat_map'], depth=data['depth'], 132 | # size=data['object_size'], show=True) 133 | # pass 134 | # 135 | floorplan = draw_floorplan(uv2xyz(boundary_list[0])[..., ::2], show=True, 136 | marker_color=None, center_color=0.2) 137 | # Image.fromarray((floorplan.squeeze() * 255).astype(np.uint8)).save( 138 | # os.path.join(save_dir, f"{data['id']}_floorplan.png")) 139 | -------------------------------------------------------------------------------- /evaluation/iou.py: -------------------------------------------------------------------------------- 1 | """ 2 | @date: 2021/6/29 3 | @description: 4 | The method with "_floorplan" suffix is only for comparison, which is used for calculation in LED2-net. 5 | However, the floorplan is affected by show_radius. Setting too large will result in the decrease of accuracy, 6 | and setting too small will result in the failure of calculation beyond the range. 7 | """ 8 | import numpy as np 9 | from shapely.geometry import Polygon 10 | 11 | 12 | def calc_inter_area(dt_xz, gt_xz): 13 | """ 14 | :param dt_xz: Prediction boundaries can also be corners, format: [[x1, z1], [x2, z2], ...] 15 | :param gt_xz: Ground truth boundaries can also be corners, format: [[x1, z1], [x2, z2], ...] 16 | :return: 17 | """ 18 | dt_polygon = Polygon(dt_xz) 19 | gt_polygon = Polygon(gt_xz) 20 | 21 | dt_area = dt_polygon.area 22 | gt_area = gt_polygon.area 23 | inter_area = dt_polygon.intersection(gt_polygon).area 24 | return dt_area, gt_area, inter_area 25 | 26 | 27 | def calc_IoU_2D(dt_xz, gt_xz): 28 | """ 29 | :param dt_xz: Prediction boundaries can also be corners, format: [[x1, z1], [x2, z2], ...] 30 | :param gt_xz: Ground truth boundaries can also be corners, format: [[x1, z1], [x2, z2], ...] 31 | :return: 32 | """ 33 | dt_area, gt_area, inter_area = calc_inter_area(dt_xz, gt_xz) 34 | iou_2d = inter_area / (gt_area + dt_area - inter_area) 35 | return iou_2d 36 | 37 | 38 | def calc_IoU_3D(dt_xz, gt_xz, dt_height, gt_height): 39 | """ 40 | :param dt_xz: Prediction boundaries can also be corners, format: [[x1, z1], [x2, z2], ...] 41 | :param gt_xz: Ground truth boundaries can also be corners, format: [[x1, z1], [x2, z2], ...] 42 | :param dt_height: 43 | :param gt_height: 44 | :return: 45 | """ 46 | dt_area, gt_area, inter_area = calc_inter_area(dt_xz, gt_xz) 47 | dt_volume = dt_area * dt_height 48 | gt_volume = gt_area * gt_height 49 | inter_volume = inter_area * min(dt_height, gt_height) 50 | iou_3d = inter_volume / (dt_volume + gt_volume - inter_volume) 51 | return iou_3d 52 | 53 | 54 | def calc_IoU(dt_xz, gt_xz, dt_height, gt_height): 55 | """ 56 | :param dt_xz: Prediction boundaries can also be corners, format: [[x1, z1], [x2, z2], ...] 57 | :param gt_xz: Ground truth boundaries can also be corners, format: [[x1, z1], [x2, z2], ...] 58 | :param dt_height: 59 | :param gt_height: 60 | :return: 61 | """ 62 | dt_area, gt_area, inter_area = calc_inter_area(dt_xz, gt_xz) 63 | iou_2d = inter_area / (gt_area + dt_area - inter_area) 64 | 65 | dt_volume = dt_area * dt_height 66 | gt_volume = gt_area * gt_height 67 | inter_volume = inter_area * min(dt_height, gt_height) 68 | iou_3d = inter_volume / (dt_volume + gt_volume - inter_volume) 69 | 70 | return iou_2d, iou_3d 71 | 72 | 73 | def calc_Iou_height(dt_height, gt_height): 74 | return min(dt_height, gt_height) / max(dt_height, gt_height) 75 | 76 | 77 | # the following is for testing only 78 | def calc_inter_area_floorplan(dt_floorplan, gt_floorplan): 79 | intersect = np.sum(np.logical_and(dt_floorplan, gt_floorplan)) 80 | dt_area = np.sum(dt_floorplan) 81 | gt_area = np.sum(gt_floorplan) 82 | return dt_area, gt_area, intersect 83 | 84 | 85 | def calc_IoU_2D_floorplan(dt_floorplan, gt_floorplan): 86 | dt_area, gt_area, inter_area = calc_inter_area_floorplan(dt_floorplan, gt_floorplan) 87 | iou_2d = inter_area / (gt_area + dt_area - inter_area) 88 | return iou_2d 89 | 90 | 91 | def calc_IoU_3D_floorplan(dt_floorplan, gt_floorplan, dt_height, gt_height): 92 | dt_area, gt_area, inter_area = calc_inter_area_floorplan(dt_floorplan, gt_floorplan) 93 | dt_volume = dt_area * dt_height 94 | gt_volume = gt_area * gt_height 95 | inter_volume = inter_area * min(dt_height, gt_height) 96 | iou_3d = inter_volume / (dt_volume + gt_volume - inter_volume) 97 | return iou_3d 98 | 99 | 100 | def calc_IoU_floorplan(dt_floorplan, gt_floorplan, dt_height, gt_height): 101 | dt_area, gt_area, inter_area = calc_inter_area_floorplan(dt_floorplan, gt_floorplan) 102 | iou_2d = inter_area / (gt_area + dt_area - inter_area) 103 | 104 | dt_volume = dt_area * dt_height 105 | gt_volume = gt_area * gt_height 106 | inter_volume = inter_area * min(dt_height, gt_height) 107 | iou_3d = inter_volume / (dt_volume + gt_volume - inter_volume) 108 | return iou_2d, iou_3d 109 | 110 | 111 | if __name__ == '__main__': 112 | from visualization.floorplan import draw_floorplan, draw_iou_floorplan 113 | from visualization.boundary import draw_boundaries, corners2boundaries 114 | from utils.conversion import uv2xyz 115 | from utils.height import height2ratio 116 | 117 | # dummy data 118 | dt_floor_corners = np.array([[0.2, 0.7], 119 | [0.4, 0.7], 120 | [0.6, 0.7], 121 | [0.8, 0.7]]) 122 | dt_height = 2.8 123 | 124 | gt_floor_corners = np.array([[0.3, 0.7], 125 | [0.5, 0.7], 126 | [0.7, 0.7], 127 | [0.9, 0.7]]) 128 | gt_height = 3.2 129 | 130 | dt_xz = uv2xyz(dt_floor_corners)[..., ::2] 131 | gt_xz = uv2xyz(gt_floor_corners)[..., ::2] 132 | 133 | dt_floorplan = draw_floorplan(dt_xz, show=False, show_radius=1) 134 | gt_floorplan = draw_floorplan(gt_xz, show=False, show_radius=1) 135 | # dt_floorplan = draw_floorplan(dt_xz, show=False, show_radius=2) 136 | # gt_floorplan = draw_floorplan(gt_xz, show=False, show_radius=2) 137 | 138 | iou_2d, iou_3d = calc_IoU_floorplan(dt_floorplan, gt_floorplan, dt_height, gt_height) 139 | print('use floor plan image:', iou_2d, iou_3d) 140 | 141 | iou_2d, iou_3d = calc_IoU(dt_xz, gt_xz, dt_height, gt_height) 142 | print('use floor plan polygon:', iou_2d, iou_3d) 143 | 144 | draw_iou_floorplan(dt_xz, gt_xz, show=True, iou_2d=iou_2d, iou_3d=iou_3d) 145 | pano_bd = draw_boundaries(np.zeros([512, 1024, 3]), corners_list=[dt_floor_corners], 146 | boundary_color=[0, 0, 1], ratio=height2ratio(dt_height), draw_corners=False) 147 | pano_bd = draw_boundaries(pano_bd, corners_list=[gt_floor_corners], 148 | boundary_color=[0, 1, 0], ratio=height2ratio(gt_height), show=True, draw_corners=False) 149 | -------------------------------------------------------------------------------- /visualization/floorplan.py: -------------------------------------------------------------------------------- 1 | """ 2 | @date: 2021/6/29 3 | @description: 4 | """ 5 | import cv2 6 | 7 | 8 | import matplotlib.pyplot as plt 9 | 10 | from PIL import Image 11 | from utils.boundary import * 12 | 13 | 14 | def draw_floorplan(xz, fill_color=None, border_color=None, side_l=512, show_radius=None, show=False, marker_color=None, 15 | center_color=None, scale=1.5): 16 | """ 17 | :param scale: 18 | :param center_color: 19 | :param marker_color: for corners marking 20 | :param fill_color: 21 | :param border_color: boundary color 22 | :param xz: [[x1, z1], [x2, z2], ....] 23 | :param side_l: side length (pixel) of the output result 24 | :param show_radius: The displayed maximum radius m (proportional to the projection plane plan_y of xz), 25 | such as set to 1, means that the pixel value of side_l/2 is expressed as 1m, if not set this value to display all 26 | :param show: 27 | :return: 28 | """ 29 | if fill_color is None: 30 | fill_color = [1] 31 | 32 | board = np.zeros([side_l, side_l, len(fill_color)], dtype=np.float) 33 | 34 | if show_radius is None: 35 | show_radius = np.linalg.norm(xz, axis=-1).max() 36 | 37 | xz = xz * side_l / (2*scale) / show_radius 38 | # v<-----------|o 39 | # | | | 40 | # | ----|----z | 41 | # | | | 42 | # | x \|/ 43 | # |------------u 44 | xz[:, 1] = -xz[:, 1] 45 | xz += side_l // 2 # moving to center 46 | xz = xz.astype(np.int) 47 | cv2.fillPoly(board, [xz], fill_color) 48 | if border_color: 49 | cv2.drawContours(board, [xz], 0, border_color, 2) 50 | 51 | if marker_color is not None: 52 | for p in xz: 53 | cv2.drawMarker(board, tuple(p), marker_color, markerType=0, markerSize=10, thickness=2) 54 | if center_color is not None: 55 | cv2.drawMarker(board, tuple([side_l // 2, side_l // 2]), center_color, markerType=0, markerSize=10, thickness=2) 56 | 57 | if show: 58 | # plt.rcParams['figure.dpi'] = 300 59 | plt.axis('off') 60 | plt.imshow(board[..., 0] if board.shape[-1] == 1 else board) 61 | plt.show() 62 | 63 | return board 64 | 65 | 66 | def draw_iou_floorplan(dt_xz, gt_xz, show_radius=None, show=False, side_l=512, 67 | iou_2d=None, iou_3d=None, dt_board_color=None, gt_board_color=None): 68 | """ 69 | :param gt_board_color: 70 | :param dt_board_color: 71 | :param dt_xz: [[x1, z1], [x2, z2], ....] 72 | :param gt_xz: [[x1, z1], [x2, z2], ....] 73 | :param show: 74 | :param side_l: side length (pixel) of the output result 75 | :param show_radius: The displayed maximum radius m (proportional to the projection plane plan_y of xz), 76 | such as set to 1, means that the pixel value of side_l/2 is expressed as 1m, if not set this value to display all 77 | :param iou_2d: 78 | :param iou_3d: 79 | :return: 80 | """ 81 | if dt_board_color is None: 82 | dt_board_color = [0, 1, 0, 1] 83 | if gt_board_color is None: 84 | gt_board_color = [0, 0, 1, 1] 85 | center_color = [1, 0, 0, 1] 86 | fill_color = [0.2, 0.2, 0.2, 0.2] 87 | 88 | if show_radius is None: 89 | # niform scale 90 | gt_radius = np.linalg.norm(gt_xz, axis=-1).max() 91 | dt_radius = np.linalg.norm(dt_xz, axis=-1).max() 92 | show_radius = gt_radius if gt_radius > dt_radius else dt_radius 93 | 94 | dt_floorplan = draw_floorplan(dt_xz, show_radius=show_radius, fill_color=fill_color, 95 | border_color=dt_board_color, side_l=side_l, show=False) 96 | gt_floorplan = draw_floorplan(gt_xz, show_radius=show_radius, fill_color=fill_color, 97 | border_color=gt_board_color, side_l=side_l, show=False, 98 | center_color=[1, 0, 0, 1]) 99 | 100 | dt_floorplan = Image.fromarray((dt_floorplan * 255).astype(np.uint8), mode='RGBA') 101 | gt_floorplan = Image.fromarray((gt_floorplan * 255).astype(np.uint8), mode='RGBA') 102 | iou_floorplan = Image.alpha_composite(gt_floorplan, dt_floorplan) 103 | 104 | back = np.zeros([side_l, side_l, len(fill_color)], dtype=np.float) 105 | back[..., :] = [0.8, 0.8, 0.8, 1] 106 | back = Image.fromarray((back * 255).astype(np.uint8), mode='RGBA') 107 | 108 | iou_floorplan = Image.alpha_composite(back, iou_floorplan).convert("RGB") 109 | iou_floorplan = np.array(iou_floorplan) / 255.0 110 | 111 | if iou_2d is not None: 112 | cv2.putText(iou_floorplan, f'2d:{iou_2d * 100:.2f}', (10, 30), 2, 1, (0, 0, 0), 1) 113 | if iou_3d is not None: 114 | cv2.putText(iou_floorplan, f'3d:{iou_3d * 100:.2f}', (10, 60), 2, 1, (0, 0, 0), 1) 115 | 116 | if show: 117 | plt.axis('off') 118 | plt.imshow(iou_floorplan) 119 | plt.show() 120 | return iou_floorplan 121 | 122 | 123 | if __name__ == '__main__': 124 | import numpy as np 125 | from dataset.mp3d_dataset import MP3DDataset 126 | from utils.boundary import depth2boundaries 127 | from utils.conversion import uv2xyz 128 | from visualization.boundary import draw_boundaries 129 | 130 | mp3d_dataset = MP3DDataset(root_dir='../src/dataset/mp3d', mode='train') 131 | gt = mp3d_dataset.__getitem__(0) 132 | 133 | # boundary_list = depth2boundaries(gt['ratio'], gt['depth'], step=None) 134 | # pano_img = draw_boundaries(gt['image'].transpose(1, 2, 0), boundary_list=boundary_list, show=True) 135 | # draw_floorplan(uv2xyz(boundary_list[0])[..., ::2], show=True, marker_color=None, center_color=0.8) 136 | # draw_floorplan(depth2xyz(gt['depth'])[..., ::2], show=True, marker_color=None, center_color=0.8) 137 | 138 | corners = gt['corners'][gt['corners'][..., 0] + gt['corners'][..., 1] != 0] 139 | dt_corners = corners + 0.1 140 | # img = draw_floorplan(uv2xyz(corners)[..., ::2], show=True, fill_color=[0.8, 0.8, 0.8, 0.2], 141 | # marker_color=None, center_color=[1, 0, 0, 1], border_color=[0, 0, 1, 1]) 142 | # cv2.imwrite('../src/fig/flp.png', (img*255).astype(np.uint8)) 143 | 144 | img = draw_iou_floorplan(uv2xyz(dt_corners)[..., ::2], uv2xyz(corners)[..., ::2], side_l=512, show=True) 145 | img[..., 0:3] = img[..., 0:3][..., ::-1] 146 | # cv2.imwrite('../src/fig/flp.png', (img*255).astype(np.uint8)) 147 | 148 | -------------------------------------------------------------------------------- /models/base_model.py: -------------------------------------------------------------------------------- 1 | """ 2 | @Date: 2021/07/17 3 | @description: 4 | """ 5 | import os 6 | import torch 7 | import torch.nn as nn 8 | import datetime 9 | 10 | 11 | class BaseModule(nn.Module): 12 | def __init__(self, ckpt_dir=None): 13 | super().__init__() 14 | 15 | self.ckpt_dir = ckpt_dir 16 | 17 | if ckpt_dir: 18 | if not os.path.exists(ckpt_dir): 19 | os.makedirs(ckpt_dir) 20 | else: 21 | self.model_lst = [x for x in sorted(os.listdir(self.ckpt_dir)) if x.endswith('.pkl')] 22 | 23 | self.last_model_path = None 24 | self.best_model_path = None 25 | self.best_accuracy = -float('inf') 26 | self.acc_d = {} 27 | 28 | def show_parameter_number(self, logger): 29 | total = sum(p.numel() for p in self.parameters()) 30 | trainable = sum(p.numel() for p in self.parameters() if p.requires_grad) 31 | logger.info('{} parameter total:{:,}, trainable:{:,}'.format(self._get_name(), total, trainable)) 32 | 33 | def load(self, device, logger, optimizer=None, best=False): 34 | if len(self.model_lst) == 0: 35 | logger.info('*'*50) 36 | logger.info("Empty model folder! Using initial weights") 37 | logger.info('*'*50) 38 | return 0 39 | 40 | last_model_lst = list(filter(lambda n: '_last_' in n, self.model_lst)) 41 | best_model_lst = list(filter(lambda n: '_best_' in n, self.model_lst)) 42 | 43 | if len(last_model_lst) == 0 and len(best_model_lst) == 0: 44 | logger.info('*'*50) 45 | ckpt_path = os.path.join(self.ckpt_dir, self.model_lst[0]) 46 | logger.info(f"Load: {ckpt_path}") 47 | checkpoint = torch.load(ckpt_path, map_location=torch.device(device)) 48 | self.load_state_dict(checkpoint, strict=False) 49 | logger.info('*'*50) 50 | return 0 51 | 52 | checkpoint = None 53 | if len(last_model_lst) > 0: 54 | self.last_model_path = os.path.join(self.ckpt_dir, last_model_lst[-1]) 55 | checkpoint = torch.load(self.last_model_path, map_location=torch.device(device)) 56 | self.best_accuracy = checkpoint['accuracy'] 57 | self.acc_d = checkpoint['acc_d'] 58 | 59 | if len(best_model_lst) > 0: 60 | self.best_model_path = os.path.join(self.ckpt_dir, best_model_lst[-1]) 61 | best_checkpoint = torch.load(self.best_model_path, map_location=torch.device(device)) 62 | self.best_accuracy = best_checkpoint['accuracy'] 63 | self.acc_d = best_checkpoint['acc_d'] 64 | if best: 65 | checkpoint = best_checkpoint 66 | 67 | for k in self.acc_d: 68 | if isinstance(self.acc_d[k], float): 69 | self.acc_d[k] = { 70 | 'acc': self.acc_d[k], 71 | 'epoch': checkpoint['epoch'] 72 | } 73 | 74 | if checkpoint is None: 75 | logger.error("Invalid checkpoint") 76 | return 77 | 78 | self.load_state_dict(checkpoint['net'], strict=False) 79 | if optimizer and not best: # best的时候使用新的优化器比如从adam->sgd 80 | logger.info('Load optimizer') 81 | optimizer.load_state_dict(checkpoint['optimizer']) 82 | for state in optimizer.state.values(): 83 | for k, v in state.items(): 84 | if torch.is_tensor(v): 85 | state[k] = v.to(device) 86 | 87 | logger.info('*'*50) 88 | if best: 89 | logger.info(f"Lode best: {self.best_model_path}") 90 | else: 91 | logger.info(f"Lode last: {self.last_model_path}") 92 | 93 | logger.info(f"Best accuracy: {self.best_accuracy}") 94 | logger.info(f"Last epoch: {checkpoint['epoch'] + 1}") 95 | logger.info('*'*50) 96 | return checkpoint['epoch'] + 1 97 | 98 | def update_acc(self, acc_d, epoch, logger): 99 | logger.info("-" * 100) 100 | for k in acc_d: 101 | if k not in self.acc_d.keys() or acc_d[k] > self.acc_d[k]['acc']: 102 | self.acc_d[k] = { 103 | 'acc': acc_d[k], 104 | 'epoch': epoch 105 | } 106 | logger.info(f"Update ACC: {k} {self.acc_d[k]['acc']:.4f}({self.acc_d[k]['epoch']}-{epoch})") 107 | logger.info("-" * 100) 108 | 109 | def save(self, optim, epoch, accuracy, logger, replace=True, acc_d=None, config=None): 110 | """ 111 | 112 | :param config: 113 | :param optim: 114 | :param epoch: 115 | :param accuracy: 116 | :param logger: 117 | :param replace: 118 | :param acc_d: 其他评估数据,visible_2/3d, full_2/3d, rmse... 119 | :return: 120 | """ 121 | if acc_d: 122 | self.update_acc(acc_d, epoch, logger) 123 | name = datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S_last_{:.4f}_{}'.format(accuracy, epoch)) 124 | name = f"model_{name}.pkl" 125 | checkpoint = { 126 | 'net': self.state_dict(), 127 | 'optimizer': optim.state_dict(), 128 | 'epoch': epoch, 129 | 'accuracy': accuracy, 130 | 'acc_d': acc_d 131 | } 132 | # FIXME:: delete always true 133 | if (True or config.MODEL.SAVE_LAST) and epoch % config.TRAIN.SAVE_FREQ == 0: 134 | if replace and self.last_model_path and os.path.exists(self.last_model_path): 135 | os.remove(self.last_model_path) 136 | self.last_model_path = os.path.join(self.ckpt_dir, name) 137 | torch.save(checkpoint, self.last_model_path) 138 | logger.info(f"Saved last model: {self.last_model_path}") 139 | 140 | if accuracy > self.best_accuracy: 141 | self.best_accuracy = accuracy 142 | # FIXME:: delete always true 143 | if True or config.MODEL.SAVE_BEST: 144 | if replace and self.best_model_path and os.path.exists(self.best_model_path): 145 | os.remove(self.best_model_path) 146 | self.best_model_path = os.path.join(self.ckpt_dir, name.replace('last', 'best')) 147 | torch.save(checkpoint, self.best_model_path) 148 | logger.info("#" * 100) 149 | logger.info(f"Saved best model: {self.best_model_path}") 150 | logger.info("#" * 100) -------------------------------------------------------------------------------- /app.py: -------------------------------------------------------------------------------- 1 | ''' 2 | @author: Zhigang Jiang 3 | @time: 2022/05/23 4 | @description: 5 | ''' 6 | 7 | import gradio as gr 8 | import numpy as np 9 | import os 10 | import torch 11 | 12 | from PIL import Image 13 | 14 | from utils.logger import get_logger 15 | from config.defaults import get_config 16 | from inference import preprocess, run_one_inference 17 | from models.build import build_model 18 | from argparse import Namespace 19 | import gdown 20 | 21 | 22 | def down_ckpt(model_cfg, ckpt_dir): 23 | model_ids = [ 24 | ['src/config/mp3d.yaml', '1o97oAmd-yEP5bQrM0eAWFPLq27FjUDbh'], 25 | ['src/config/zind.yaml', '1PzBj-dfDfH_vevgSkRe5kczW0GVl_43I'], 26 | ['src/config/pano.yaml', '1JoeqcPbm_XBPOi6O9GjjWi3_rtyPZS8m'], 27 | ['src/config/s2d3d.yaml', '1PfJzcxzUsbwwMal7yTkBClIFgn8IdEzI'], 28 | ['src/config/ablation_study/full.yaml', '1U16TxUkvZlRwJNaJnq9nAUap-BhCVIha'] 29 | ] 30 | 31 | for model_id in model_ids: 32 | if model_id[0] != model_cfg: 33 | continue 34 | path = os.path.join(ckpt_dir, 'best.pkl') 35 | if not os.path.exists(path): 36 | logger.info(f"Downloading {model_id}") 37 | os.makedirs(ckpt_dir, exist_ok=True) 38 | gdown.download(f"https://drive.google.com/uc?id={model_id[1]}", path, False) 39 | 40 | 41 | def greet(img_path, pre_processing, weight_name, post_processing, visualization, mesh_format, mesh_resolution): 42 | args.pre_processing = pre_processing 43 | args.post_processing = post_processing 44 | if weight_name == 'mp3d': 45 | model = mp3d_model 46 | elif weight_name == 'zind': 47 | model = zind_model 48 | else: 49 | logger.error("unknown pre-trained weight name") 50 | raise NotImplementedError 51 | 52 | img_name = os.path.basename(img_path).split('.')[0] 53 | img = np.array(Image.open(img_path).resize((1024, 512), Image.Resampling.BICUBIC))[..., :3] 54 | 55 | vp_cache_path = 'src/demo/default_vp.txt' 56 | if args.pre_processing: 57 | vp_cache_path = os.path.join('src/output', f'{img_name}_vp.txt') 58 | logger.info("pre-processing ...") 59 | img, vp = preprocess(img, vp_cache_path=vp_cache_path) 60 | 61 | img = (img / 255.0).astype(np.float32) 62 | run_one_inference(img, model, args, img_name, 63 | logger=logger, show=False, 64 | show_depth='depth-normal-gradient' in visualization, 65 | show_floorplan='2d-floorplan' in visualization, 66 | mesh_format=mesh_format, mesh_resolution=int(mesh_resolution)) 67 | 68 | return [os.path.join(args.output_dir, f"{img_name}_pred.png"), 69 | os.path.join(args.output_dir, f"{img_name}_3d{mesh_format}"), 70 | os.path.join(args.output_dir, f"{img_name}_3d{mesh_format}"), 71 | vp_cache_path, 72 | os.path.join(args.output_dir, f"{img_name}_pred.json")] 73 | 74 | 75 | def get_model(args): 76 | config = get_config(args) 77 | down_ckpt(args.cfg, config.CKPT.DIR) 78 | if ('cuda' in args.device or 'cuda' in config.TRAIN.DEVICE) and not torch.cuda.is_available(): 79 | logger.info(f'The {args.device} is not available, will use cpu ...') 80 | config.defrost() 81 | args.device = "cpu" 82 | config.TRAIN.DEVICE = "cpu" 83 | config.freeze() 84 | model, _, _, _ = build_model(config, logger) 85 | return model 86 | 87 | 88 | if __name__ == '__main__': 89 | logger = get_logger() 90 | args = Namespace(device='cuda', output_dir='src/output', visualize_3d=False, output_3d=True) 91 | os.makedirs(args.output_dir, exist_ok=True) 92 | 93 | args.cfg = 'src/config/mp3d.yaml' 94 | mp3d_model = get_model(args) 95 | 96 | args.cfg = 'src/config/zind.yaml' 97 | zind_model = get_model(args) 98 | 99 | description = "This demo of the project " \ 100 | "LGT-Net. " \ 101 | "It uses the Geometry-Aware Transformer Network to predict the 3d room layout of an rgb panorama." 102 | 103 | demo = gr.Interface(fn=greet, 104 | inputs=[gr.Image(type='filepath', label='input rgb panorama', value='src/demo/pano_demo1.png'), 105 | gr.Checkbox(label='pre-processing', value=True), 106 | gr.Radio(['mp3d', 'zind'], 107 | label='pre-trained weight', 108 | value='mp3d'), 109 | gr.Radio(['manhattan', 'atalanta', 'original'], 110 | label='post-processing method', 111 | value='manhattan'), 112 | gr.CheckboxGroup(['depth-normal-gradient', '2d-floorplan'], 113 | label='2d-visualization', 114 | value=['depth-normal-gradient', '2d-floorplan']), 115 | gr.Radio(['.gltf', '.obj', '.glb'], 116 | label='output format of 3d mesh', 117 | value='.gltf'), 118 | gr.Radio(['128', '256', '512', '1024'], 119 | label='output resolution of 3d mesh', 120 | value='256'), 121 | ], 122 | outputs=[gr.Image(label='predicted result 2d-visualization', type='filepath'), 123 | gr.Model3D(label='3d mesh reconstruction', clear_color=[1.0, 1.0, 1.0, 1.0]), 124 | gr.File(label='3d mesh file'), 125 | gr.File(label='vanishing point information'), 126 | gr.File(label='layout json')], 127 | examples=[ 128 | ['src/demo/pano_demo1.png', True, 'mp3d', 'manhattan', ['depth-normal-gradient', '2d-floorplan'], '.gltf', '256'], 129 | ['src/demo/mp3d_demo1.png', False, 'mp3d', 'manhattan', ['depth-normal-gradient', '2d-floorplan'], '.gltf', '256'], 130 | ['src/demo/mp3d_demo2.png', False, 'mp3d', 'manhattan', ['depth-normal-gradient', '2d-floorplan'], '.gltf', '256'], 131 | ['src/demo/mp3d_demo3.png', False, 'mp3d', 'manhattan', ['depth-normal-gradient', '2d-floorplan'], '.gltf', '256'], 132 | ['src/demo/zind_demo1.png', True, 'zind', 'manhattan', ['depth-normal-gradient', '2d-floorplan'], '.gltf', '256'], 133 | ['src/demo/zind_demo2.png', False, 'zind', 'atalanta', ['depth-normal-gradient', '2d-floorplan'], '.gltf', '256'], 134 | ['src/demo/zind_demo3.png', True, 'zind', 'manhattan', ['depth-normal-gradient', '2d-floorplan'], '.gltf', '256'], 135 | ['src/demo/other_demo1.png', False, 'mp3d', 'manhattan', ['depth-normal-gradient', '2d-floorplan'], '.gltf', '256'], 136 | ['src/demo/other_demo2.png', True, 'mp3d', 'manhattan', ['depth-normal-gradient', '2d-floorplan'], '.gltf', '256'], 137 | ], title='LGT-Net', allow_flagging="never", cache_examples=False, description=description) 138 | 139 | demo.launch(debug=True, enable_queue=False) 140 | -------------------------------------------------------------------------------- /visualization/boundary.py: -------------------------------------------------------------------------------- 1 | """ 2 | @date: 2021/06/19 3 | @description: 4 | """ 5 | 6 | import matplotlib.pyplot as plt 7 | import cv2 8 | import numpy as np 9 | from utils.conversion import uv2pixel 10 | from utils.boundary import corners2boundary, corners2boundaries, find_peaks, connect_corners_uv, get_object_cor, \ 11 | visibility_corners 12 | 13 | 14 | def draw_boundary(pano_img, corners: np.ndarray = None, boundary: np.ndarray = None, draw_corners=True, show=False, 15 | step=0.01, length=None, boundary_color=None, marker_color=None, title=None, visible=True): 16 | if marker_color is None: 17 | marker_color = [0, 0, 1] 18 | if boundary_color is None: 19 | boundary_color = [0, 1, 0] 20 | 21 | assert corners is not None or boundary is not None, "corners or boundary error" 22 | 23 | shape = sorted(pano_img.shape) 24 | assert len(shape) > 1, "pano_img shape error" 25 | w = shape[-1] 26 | h = shape[-2] 27 | 28 | pano_img = pano_img.copy() 29 | if (corners is not None and len(corners) > 2) or \ 30 | (boundary is not None and len(boundary) > 2): 31 | if isinstance(boundary_color, list) or isinstance(boundary_color, np.array): 32 | if boundary is None: 33 | boundary = corners2boundary(corners, step, length, visible) 34 | 35 | boundary = uv2pixel(boundary, w, h) 36 | pano_img[boundary[:, 1], boundary[:, 0]] = boundary_color 37 | pano_img[np.clip(boundary[:, 1] + 1, 0, h - 1), boundary[:, 0]] = boundary_color 38 | pano_img[np.clip(boundary[:, 1] - 1, 0, h - 1), boundary[:, 0]] = boundary_color 39 | 40 | if pano_img.shape[1] > 512: 41 | pano_img[np.clip(boundary[:, 1] + 1, 0, h - 1), np.clip(boundary[:, 0] + 1, 0, w - 1)] = boundary_color 42 | pano_img[np.clip(boundary[:, 1] + 1, 0, h - 1), np.clip(boundary[:, 0] - 1, 0, w - 1)] = boundary_color 43 | pano_img[np.clip(boundary[:, 1] - 1, 0, h - 1), np.clip(boundary[:, 0] + 1, 0, w - 1)] = boundary_color 44 | pano_img[np.clip(boundary[:, 1] - 1, 0, h - 1), np.clip(boundary[:, 0] - 1, 0, w - 1)] = boundary_color 45 | 46 | pano_img[boundary[:, 1], np.clip(boundary[:, 0] + 1, 0, w - 1)] = boundary_color 47 | pano_img[boundary[:, 1], np.clip(boundary[:, 0] - 1, 0, w - 1)] = boundary_color 48 | 49 | if corners is not None and draw_corners: 50 | if visible: 51 | corners = visibility_corners(corners) 52 | corners = uv2pixel(corners, w, h) 53 | for corner in corners: 54 | cv2.drawMarker(pano_img, tuple(corner), marker_color, markerType=0, markerSize=10, thickness=2) 55 | 56 | if show: 57 | plt.figure(figsize=(10, 5)) 58 | if title is not None: 59 | plt.title(title) 60 | 61 | plt.axis('off') 62 | plt.imshow(pano_img) 63 | plt.show() 64 | 65 | return pano_img 66 | 67 | 68 | def draw_boundaries(pano_img, corners_list: list = None, boundary_list: list = None, draw_corners=True, show=False, 69 | step=0.01, length=None, boundary_color=None, marker_color=None, title=None, ratio=None, visible=True): 70 | """ 71 | 72 | :param visible: 73 | :param pano_img: 74 | :param corners_list: 75 | :param boundary_list: 76 | :param draw_corners: 77 | :param show: 78 | :param step: 79 | :param length: 80 | :param boundary_color: RGB color 81 | :param marker_color: RGB color 82 | :param title: 83 | :param ratio: ceil_height/camera_height 84 | :return: 85 | """ 86 | assert corners_list is not None or boundary_list is not None, "corners_list or boundary_list error" 87 | 88 | if corners_list is not None: 89 | if ratio is not None and len(corners_list) == 1: 90 | corners_list = corners2boundaries(ratio, corners_uv=corners_list[0], step=None, visible=visible) 91 | 92 | for i, corners in enumerate(corners_list): 93 | pano_img = draw_boundary(pano_img, corners=corners, draw_corners=draw_corners, 94 | show=show if i == len(corners_list) - 1 else False, 95 | step=step, length=length, boundary_color=boundary_color, marker_color=marker_color, 96 | title=title, visible=visible) 97 | elif boundary_list is not None: 98 | if ratio is not None and len(boundary_list) == 1: 99 | boundary_list = corners2boundaries(ratio, corners_uv=boundary_list[0], step=None, visible=visible) 100 | 101 | for i, boundary in enumerate(boundary_list): 102 | pano_img = draw_boundary(pano_img, boundary=boundary, draw_corners=draw_corners, 103 | show=show if i == len(boundary_list) - 1 else False, 104 | step=step, length=length, boundary_color=boundary_color, marker_color=marker_color, 105 | title=title, visible=visible) 106 | 107 | return pano_img 108 | 109 | 110 | def draw_object(pano_img, heat_maps, size, depth, window_width=15, show=False): 111 | # window, door, opening 112 | colors = [[1, 0, 0], [1, 1, 0], [0, 0, 1]] 113 | for i, heat_map in enumerate(heat_maps): 114 | pk_u_s, _ = find_peaks(heat_map, size=window_width*2+1) 115 | for pk_u in pk_u_s: 116 | uv, xyz = get_object_cor(depth, size, center_u=pk_u, patch_num=len(heat_map)) 117 | 118 | bottom_poly = connect_corners_uv(uv[0], uv[1], length=pano_img.shape[1]) 119 | top_poly = connect_corners_uv(uv[2], uv[3], length=pano_img.shape[1])[::-1] 120 | 121 | bottom_max_index = bottom_poly[..., 0].argmax() 122 | if bottom_max_index != len(bottom_poly)-1: 123 | top_max_index = top_poly[..., 0].argmax() 124 | poly1 = np.concatenate([bottom_poly[:bottom_max_index+1], top_poly[top_max_index:]]) 125 | poly1 = uv2pixel(poly1, w=pano_img.shape[1], h=pano_img.shape[0]) 126 | poly1 = poly1[:, None, :] 127 | 128 | poly2 = np.concatenate([bottom_poly[bottom_max_index+1:], top_poly[:top_max_index]]) 129 | poly2 = uv2pixel(poly2, w=pano_img.shape[1], h=pano_img.shape[0]) 130 | poly2 = poly2[:, None, :] 131 | 132 | poly = [poly1, poly2] 133 | else: 134 | poly = np.concatenate([bottom_poly, top_poly]) 135 | poly = uv2pixel(poly, w=pano_img.shape[1], h=pano_img.shape[0]) 136 | poly = poly[:, None, :] 137 | poly = [poly] 138 | 139 | cv2.drawContours(pano_img, poly, -1, colors[i], 1) 140 | # 141 | # boundary_center_xyz = uv2xyz(np.array([pk_u, pk_v])) 142 | # 143 | # l_b_xyz = 144 | if show: 145 | plt.imshow(pano_img) 146 | plt.show() 147 | 148 | 149 | if __name__ == '__main__': 150 | from visualization.floorplan import draw_floorplan 151 | from utils.conversion import uv2xyz 152 | 153 | pano_img = np.zeros([512, 1024, 3]) 154 | corners = np.array([[0.2, 0.7], 155 | [0.4, 0.7], 156 | [0.3, 0.6], 157 | [0.6, 0.6], 158 | [0.8, 0.7]]) 159 | # draw_boundary(pano_img, corners, show=True) 160 | draw_boundaries(pano_img, corners_list=[corners], show=True, length=1024, ratio=1.2) 161 | draw_floorplan(uv2xyz(corners)[..., ::2], show=True, marker_color=None, center_color=0.8) -------------------------------------------------------------------------------- /visualization/visualizer/Viewer/Utils.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import numpy as np 3 | from visualization.visualizer.earcut import earcut 4 | 5 | def xz2lines(wall_xz, h): 6 | cch = h - 1.6 7 | ch = 1.6 8 | strips_ceiling = [] 9 | strips_wall = [] 10 | strips_floor = [] 11 | for i in range(wall_xz.shape[0] // 2): 12 | pts1 = wall_xz[i*2, :] 13 | pts2 = wall_xz[i*2+1, :] 14 | 15 | a = [[pts1[0], -cch, pts1[1]]] 16 | b = [[pts2[0], -cch, pts2[1]]] 17 | c = [[pts2[0], ch, pts2[1]]] 18 | d = [[pts1[0], ch, pts1[1]]] 19 | #strip = np.concatenate([a, b, b, c, c, d, d, a], axis=0) 20 | ceiling = np.concatenate([a, b], axis=0) 21 | wall = np.concatenate([b, c, d, a], axis=0) 22 | floor = np.concatenate([c, d], axis=0) 23 | 24 | strips_ceiling.append(ceiling) 25 | strips_wall.append(wall) 26 | strips_floor.append(floor) 27 | 28 | strips_ceiling = np.concatenate(strips_ceiling, axis=0).astype(np.float32) 29 | strips_wall = np.concatenate(strips_wall, axis=0).astype(np.float32) 30 | strips_floor = np.concatenate(strips_floor, axis=0).astype(np.float32) 31 | 32 | return strips_ceiling, strips_wall, strips_floor 33 | 34 | 35 | def Label2Mesh(label, reverse=False): 36 | scale = 1.6 / label['cameraHeight'] 37 | layout_height = scale * label['layoutHeight'] 38 | if 'cameraCeilingHeight' not in label: 39 | label['cameraCeilingHeight'] = label['layoutHeight'] - label['cameraHeight'] 40 | up_down_ratio = label['cameraCeilingHeight'] / label['cameraHeight'] 41 | xyz = np.asarray(label['points'], np.float32) 42 | xyz *= scale 43 | point_idxs = label['order'] 44 | 45 | wall_xz = [np.concatenate((xyz[:, ::2][i[0], :][None, ...], xyz[:, ::2][i[1], :][None, ...]), axis=0) for i in point_idxs] 46 | wall_xz = np.concatenate(wall_xz, axis=0).astype(np.float32) 47 | wall_num = wall_xz.shape[0] // 2 48 | lines_ceiling, lines_wall, lines_floor = xz2lines(wall_xz, layout_height) 49 | 50 | def ProcessOneWall(coord, idx, h): 51 | cch = h - 1.6 52 | ch = 1.6 53 | 54 | A = coord[idx[0], :] 55 | B = coord[idx[1], :] 56 | 57 | a = A.copy() 58 | b = B.copy() 59 | c = B.copy() 60 | a[1] = -cch 61 | b[1] = -cch 62 | c[1] = ch 63 | tmp1 = np.concatenate([a[None, ...], c[None, ...], b[None, ...]], axis=0) 64 | 65 | a = A.copy() 66 | b = A.copy() 67 | c = B.copy() 68 | a[1] = -cch 69 | b[1] = ch 70 | c[1] = ch 71 | tmp2 = np.concatenate([a[None, ...], b[None, ...], c[None, ...]], axis=0) 72 | 73 | return np.concatenate([tmp1[None, ...], tmp2[None, ...]], axis=0) 74 | mesh = [ProcessOneWall(xyz, point_idxs[x], layout_height)[None, ...] for x in range(len(point_idxs))] 75 | mesh = np.concatenate(mesh, axis=0).reshape([-1, 3]) 76 | top_xz = [] 77 | for i, j in point_idxs: 78 | if not reverse: 79 | tmp = np.concatenate([xyz[i, ::2], xyz[j, ::2]]) 80 | else: 81 | tmp = np.concatenate([xyz[j, ::2], xyz[i, ::2]]) 82 | top_xz += tmp.tolist() 83 | try: 84 | indices = np.asarray(earcut(top_xz)).reshape([-1, 3]) 85 | top_xz = np.asarray(top_xz).reshape([-1, 2]) 86 | tmp = [] 87 | for i in range(indices.shape[0]): 88 | a = indices[i, 0] 89 | b = indices[i, 1] 90 | c = indices[i, 2] 91 | tmp.append(np.concatenate([top_xz[a:a+1, :], top_xz[b:b+1, :], top_xz[c:c+1, :]], axis=0)) 92 | tmp = np.concatenate(tmp, axis=0) 93 | ceiling_mesh = np.zeros([tmp.shape[0], 3], np.float32) 94 | ceiling_mesh[:, ::2] = tmp.copy() 95 | ceiling_mesh[:, 1] = -(layout_height - 1.6) 96 | floor_mesh = ceiling_mesh.copy() 97 | floor_mesh[:, 1] = 1.6 98 | #mesh = np.concatenate([mesh, ceiling_mesh, floor_mesh], axis=0) 99 | mesh = np.concatenate([mesh, floor_mesh], axis=0) 100 | except: 101 | pass 102 | ''' 103 | print (top_xz) 104 | top_xz = top_xz[:6] 105 | a = np.zeros([256, 256], np.uint8) 106 | b = ((top_xz - top_xz.min()) * 20).astype(int) + 5 107 | for i in range(0, b.shape[0]-1, 2): 108 | cv2.line(a, (b[i, 0], b[i, 1]), ((b[i+1, 0], b[i+1, 1])), color=255) 109 | import matplotlib.pyplot as plt 110 | plt.imshow(a) 111 | plt.show() 112 | exit() 113 | ''' 114 | return wall_num, wall_xz, [lines_ceiling, lines_wall, lines_floor], mesh 115 | 116 | def Label2Points(label): 117 | scale = 1.6 / label['cameraHeight'] 118 | layout_height = scale * label['layoutHeight'] 119 | up_down_ratio = label['cameraCeilingHeight'] / label['cameraHeight'] 120 | xyz = np.asarray(label['points'], np.float32) 121 | point_idxs = label['order'] 122 | def ProcessOneWall(coord, idx, h): 123 | cch = h - 1.6 124 | ch = 1.6 125 | 126 | a = coord[idx[0], ...].copy() 127 | b = coord[idx[1], ...].copy() 128 | a[1] = -cch 129 | b[1] = ch 130 | return np.concatenate([a[None, ...], b[None, ...]], axis=0) 131 | pts = [ProcessOneWall(xyz, point_idxs[x], layout_height)[None, ...] for x in range(len(point_idxs))] 132 | pts = np.concatenate(pts, axis=0) 133 | return pts 134 | 135 | 136 | def OldFormat2Mine(label): 137 | scale = 1.6 / label['cameraHeight'] 138 | layout_height = scale * label['layoutHeight'] 139 | if 'cameraCeilingHeight' not in label: 140 | label['cameraCeilingHeight'] = label['layoutHeight'] - label['cameraHeight'] 141 | 142 | up_down_ratio = label['cameraCeilingHeight'] / label['cameraHeight'] 143 | xyz = [] 144 | planes = [] 145 | point_idxs = [] 146 | R_180 = cv2.Rodrigues(np.array([0, -1*np.pi, 0], np.float32))[0] 147 | for one in label['layoutPoints']['points']: 148 | xyz.append(one['xyz']) 149 | for one in label['layoutWalls']['walls']: 150 | planes.append(one['planeEquation']) 151 | point_idxs.append(one['pointsIdx']) 152 | xyz = np.asarray(xyz) 153 | xyz[:, 0] *= -1 154 | xyz = xyz.dot(R_180.T) 155 | xyz[:, 1] = 0 156 | xyz *= scale 157 | 158 | data = { 159 | 'cameraHeight': scale*label['cameraHeight'], 160 | 'cameraCeilingHeight': scale*label['cameraCeilingHeight'], 161 | 'layoutHeight': scale*label['layoutHeight'], 162 | 'points': xyz.tolist(), 163 | 'order': point_idxs 164 | } 165 | return data 166 | 167 | def Label2Mesh_oldformat(label): 168 | scale = 1.6 / label['cameraHeight'] 169 | layout_height = scale * label['layoutHeight'] 170 | up_down_ratio = label['cameraCeilingHeight'] / label['cameraHeight'] 171 | xyz = [] 172 | planes = [] 173 | point_idxs = [] 174 | R_180 = cv2.Rodrigues(np.array([0, -1*np.pi, 0], np.float32))[0] 175 | for one in label['layoutPoints']['points']: 176 | xyz.append(one['xyz']) 177 | for one in label['layoutWalls']['walls']: 178 | planes.append(one['planeEquation']) 179 | point_idxs.append(one['pointsIdx']) 180 | xyz = np.asarray(xyz) 181 | xyz[:, 0] *= -1 182 | xyz = xyz.dot(R_180.T) 183 | def ProcessOneWall(coord, idx, h): 184 | cch = h - 1.6 185 | ch = 1.6 186 | 187 | A = coord[idx[0], :] 188 | B = coord[idx[1], :] 189 | 190 | a = A.copy() 191 | b = B.copy() 192 | c = B.copy() 193 | a[1] = -cch 194 | b[1] = -cch 195 | c[1] = ch 196 | tmp1 = np.concatenate([a[None, ...], b[None, ...], c[None, ...]], axis=0) 197 | 198 | a = A.copy() 199 | b = A.copy() 200 | c = B.copy() 201 | a[1] = -cch 202 | b[1] = ch 203 | c[1] = ch 204 | tmp2 = np.concatenate([a[None, ...], b[None, ...], c[None, ...]], axis=0) 205 | 206 | return np.concatenate([tmp1[None, ...], tmp2[None, ...]], axis=0) 207 | mesh = [ProcessOneWall(xyz, point_idxs[x], layout_height)[None, ...] for x in range(label['layoutPoints']['num'])] 208 | mesh = np.concatenate(mesh, axis=0).reshape([-1, 3]) 209 | top_xz = [] 210 | for i, j in point_idxs: 211 | tmp = np.concatenate([xyz[i, ::2], xyz[j, ::2]]) 212 | top_xz += tmp.tolist() 213 | indices = np.asarray(earcut(top_xz)).reshape([-1, 3]) 214 | top_xz = np.asarray(top_xz).reshape([-1, 2]) 215 | tmp = [] 216 | for i in range(indices.shape[0]): 217 | a = indices[i, 0] 218 | b = indices[i, 1] 219 | c = indices[i, 2] 220 | tmp.append(np.concatenate([top_xz[a:a+1, :], top_xz[b:b+1, :], top_xz[c:c+1, :]], axis=0)) 221 | tmp = np.concatenate(tmp, axis=0) 222 | ceiling_mesh = np.zeros([tmp.shape[0], 3], np.float32) 223 | ceiling_mesh[:, ::2] = tmp.copy() 224 | ceiling_mesh[:, 1] = -(layout_height - 1.6) 225 | floor_mesh = ceiling_mesh.copy() 226 | floor_mesh[:, 1] = 1.6 227 | #mesh = np.concatenate([mesh, ceiling_mesh, floor_mesh], axis=0) 228 | mesh = np.concatenate([mesh, floor_mesh], axis=0) 229 | return mesh 230 | -------------------------------------------------------------------------------- /dataset/communal/read.py: -------------------------------------------------------------------------------- 1 | """ 2 | @Date: 2021/07/28 3 | @description: 4 | """ 5 | import os 6 | import numpy as np 7 | import cv2 8 | import json 9 | from PIL import Image 10 | from utils.conversion import xyz2uv, pixel2uv 11 | from utils.height import calc_ceil_ratio 12 | 13 | 14 | def read_image(image_path, shape=None): 15 | if shape is None: 16 | shape = [512, 1024] 17 | img = np.array(Image.open(image_path)).astype(np.float32) / 255 18 | if img.shape[0] != shape[0] or img.shape[1] != shape[1]: 19 | img = cv2.resize(img, dsize=tuple(shape[::-1]), interpolation=cv2.INTER_AREA) 20 | 21 | return np.array(img) 22 | 23 | 24 | def read_label(label_path, data_type='MP3D'): 25 | 26 | if data_type == 'MP3D': 27 | with open(label_path, 'r') as f: 28 | label = json.load(f) 29 | point_idx = [one['pointsIdx'][0] for one in label['layoutWalls']['walls']] 30 | camera_height = label['cameraHeight'] 31 | room_height = label['layoutHeight'] 32 | camera_ceiling_height = room_height - camera_height 33 | ratio = camera_ceiling_height / camera_height 34 | 35 | xyz = [one['xyz'] for one in label['layoutPoints']['points']] 36 | assert len(xyz) == len(point_idx), "len(xyz) != len(point_idx)" 37 | xyz = [xyz[i] for i in point_idx] 38 | xyz = np.asarray(xyz, dtype=np.float32) 39 | xyz[:, 2] *= -1 40 | xyz[:, 1] = camera_height 41 | corners = xyz2uv(xyz) 42 | elif data_type == 'Pano_S2D3D': 43 | with open(label_path, 'r') as f: 44 | lines = [line for line in f.readlines() if 45 | len([c for c in line.split(' ') if c[0].isnumeric()]) > 1] 46 | 47 | corners_list = np.array([line.strip().split() for line in lines], np.float32) 48 | uv_list = pixel2uv(corners_list) 49 | ceil_uv = uv_list[::2] 50 | floor_uv = uv_list[1::2] 51 | ratio = calc_ceil_ratio([ceil_uv, floor_uv], mode='mean') 52 | corners = floor_uv 53 | else: 54 | return None 55 | 56 | output = { 57 | 'ratio': np.array([ratio], dtype=np.float32), 58 | 'corners': corners, 59 | 'id': os.path.basename(label_path).split('.')[0] 60 | } 61 | return output 62 | 63 | 64 | def move_not_simple_image(data_dir, simple_panos): 65 | import shutil 66 | for house_index in os.listdir(data_dir): 67 | house_path = os.path.join(data_dir, house_index) 68 | if not os.path.isdir(house_path) or house_index == 'visualization': 69 | continue 70 | 71 | floor_plan_path = os.path.join(house_path, 'floor_plans') 72 | if os.path.exists(floor_plan_path): 73 | print(f'move:{floor_plan_path}') 74 | dst_floor_plan_path = floor_plan_path.replace('zind', 'zind2') 75 | os.makedirs(dst_floor_plan_path, exist_ok=True) 76 | shutil.move(floor_plan_path, dst_floor_plan_path) 77 | 78 | panos_path = os.path.join(house_path, 'panos') 79 | for pano in os.listdir(panos_path): 80 | pano_path = os.path.join(panos_path, pano) 81 | pano_index = '_'.join(pano.split('.')[0].split('_')[-2:]) 82 | if f'{house_index}_{pano_index}' not in simple_panos and os.path.exists(pano_path): 83 | print(f'move:{pano_path}') 84 | dst_pano_path = pano_path.replace('zind', 'zind2') 85 | os.makedirs(os.path.dirname(dst_pano_path), exist_ok=True) 86 | shutil.move(pano_path, dst_pano_path) 87 | 88 | 89 | def read_zind(partition_path, simplicity_path, data_dir, mode, is_simple=True, 90 | layout_type='layout_raw', is_ceiling_flat=False, plan_y=1): 91 | with open(simplicity_path, 'r') as f: 92 | simple_tag = json.load(f) 93 | simple_panos = {} 94 | for k in simple_tag.keys(): 95 | if not simple_tag[k]: 96 | continue 97 | split = k.split('_') 98 | house_index = split[0] 99 | pano_index = '_'.join(split[-2:]) 100 | simple_panos[f'{house_index}_{pano_index}'] = True 101 | 102 | # move_not_simple_image(data_dir, simple_panos) 103 | 104 | pano_list = [] 105 | with open(partition_path, 'r') as f1: 106 | house_list = json.load(f1)[mode] 107 | 108 | for house_index in house_list: 109 | with open(os.path.join(data_dir, house_index, f"zind_data.json"), 'r') as f2: 110 | data = json.load(f2) 111 | 112 | panos = [] 113 | merger = data['merger'] 114 | for floor in merger.values(): 115 | for complete_room in floor.values(): 116 | for partial_room in complete_room.values(): 117 | for pano_index in partial_room: 118 | pano = partial_room[pano_index] 119 | pano['index'] = pano_index 120 | panos.append(pano) 121 | 122 | for pano in panos: 123 | if layout_type not in pano: 124 | continue 125 | pano_index = pano['index'] 126 | 127 | if is_simple and f'{house_index}_{pano_index}' not in simple_panos.keys(): 128 | continue 129 | 130 | if is_ceiling_flat and not pano['is_ceiling_flat']: 131 | continue 132 | 133 | layout = pano[layout_type] 134 | # corners 135 | corner_xz = np.array(layout['vertices']) 136 | corner_xz[..., 0] = -corner_xz[..., 0] 137 | corner_xyz = np.insert(corner_xz, 1, pano['camera_height'], axis=1) 138 | corners = xyz2uv(corner_xyz).astype(np.float32) 139 | 140 | # ratio 141 | ratio = np.array([(pano['ceiling_height'] - pano['camera_height']) / pano['camera_height']], dtype=np.float32) 142 | 143 | # Ours future work: detection window, door, opening 144 | objects = { 145 | 'windows': [], 146 | 'doors': [], 147 | 'openings': [], 148 | } 149 | for label_index, wdo_type in enumerate(["windows", "doors", "openings"]): 150 | if wdo_type not in layout: 151 | continue 152 | 153 | wdo_vertices = np.array(layout[wdo_type]) 154 | if len(wdo_vertices) == 0: 155 | continue 156 | 157 | assert len(wdo_vertices) % 3 == 0 158 | 159 | for i in range(0, len(wdo_vertices), 3): 160 | # In the Zind dataset, the camera height is 1, and the default camera height in our code is also 1, 161 | # so the xyz coordinate here can be used directly 162 | # Since we're taking the opposite z-axis, we're changing the order of left and right 163 | 164 | left_bottom_xyz = np.array( 165 | [-wdo_vertices[i + 1][0], -wdo_vertices[i + 2][0], wdo_vertices[i + 1][1]]) 166 | right_bottom_xyz = np.array( 167 | [-wdo_vertices[i][0], -wdo_vertices[i + 2][0], wdo_vertices[i][1]]) 168 | center_bottom_xyz = (left_bottom_xyz + right_bottom_xyz) / 2 169 | 170 | center_top_xyz = center_bottom_xyz.copy() 171 | center_top_xyz[1] = -wdo_vertices[i + 2][1] 172 | 173 | center_boundary_xyz = center_bottom_xyz.copy() 174 | center_boundary_xyz[1] = plan_y 175 | 176 | uv = xyz2uv(np.array([left_bottom_xyz, right_bottom_xyz, 177 | center_bottom_xyz, center_top_xyz, 178 | center_boundary_xyz])) 179 | 180 | left_bottom_uv = uv[0] 181 | right_bottom_uv = uv[1] 182 | width_u = abs(right_bottom_uv[0] - left_bottom_uv[0]) 183 | width_u = 1 - width_u if width_u > 0.5 else width_u 184 | assert width_u > 0, width_u 185 | 186 | center_bottom_uv = uv[2] 187 | center_top_uv = uv[3] 188 | height_v = center_bottom_uv[1] - center_top_uv[1] 189 | 190 | if height_v < 0: 191 | continue 192 | 193 | center_boundary_uv = uv[4] 194 | boundary_v = center_boundary_uv[1] - center_bottom_uv[1] if wdo_type == 'windows' else 0 195 | boundary_v = 0 if boundary_v < 0 else boundary_v 196 | 197 | center_u = center_bottom_uv[0] 198 | 199 | objects[wdo_type].append({ 200 | 'width_u': width_u, 201 | 'height_v': height_v, 202 | 'boundary_v': boundary_v, 203 | 'center_u': center_u 204 | }) 205 | 206 | pano_list.append({ 207 | 'img_path': os.path.join(data_dir, house_index, pano['image_path']), 208 | 'corners': corners, 209 | 'objects': objects, 210 | 'ratio': ratio, 211 | 'id': f'{house_index}_{pano_index}', 212 | 'is_inside': pano['is_inside'] 213 | }) 214 | return pano_list 215 | -------------------------------------------------------------------------------- /models/lgt_net.py: -------------------------------------------------------------------------------- 1 | import torch.nn 2 | import torch 3 | import torch.nn as nn 4 | import models.modules as modules 5 | import numpy as np 6 | 7 | from models.base_model import BaseModule 8 | from models.modules.horizon_net_feature_extractor import HorizonNetFeatureExtractor 9 | from models.modules.patch_feature_extractor import PatchFeatureExtractor 10 | from utils.conversion import uv2depth, get_u, lonlat2depth, get_lon, lonlat2uv 11 | from utils.height import calc_ceil_ratio 12 | from utils.misc import tensor2np 13 | 14 | 15 | class LGT_Net(BaseModule): 16 | def __init__(self, ckpt_dir=None, backbone='resnet50', dropout=0.0, output_name='LGT', 17 | decoder_name='Transformer', win_size=8, depth=6, 18 | ape=None, rpe=None, corner_heat_map=False, rpe_pos=1): 19 | super().__init__(ckpt_dir) 20 | 21 | self.patch_num = 256 22 | self.patch_dim = 1024 23 | self.decoder_name = decoder_name 24 | self.output_name = output_name 25 | self.corner_heat_map = corner_heat_map 26 | self.dropout_d = dropout 27 | 28 | if backbone == 'patch': 29 | self.feature_extractor = PatchFeatureExtractor(patch_num=self.patch_num, input_shape=[3, 512, 1024]) 30 | else: 31 | # feature extractor 32 | self.feature_extractor = HorizonNetFeatureExtractor(backbone) 33 | 34 | if 'Transformer' in self.decoder_name: 35 | # transformer encoder 36 | transformer_dim = self.patch_dim 37 | transformer_layers = depth 38 | transformer_heads = 8 39 | transformer_head_dim = transformer_dim // transformer_heads 40 | transformer_ff_dim = 2048 41 | rpe = None if rpe == 'None' else rpe 42 | self.transformer = getattr(modules, decoder_name)(dim=transformer_dim, depth=transformer_layers, 43 | heads=transformer_heads, dim_head=transformer_head_dim, 44 | mlp_dim=transformer_ff_dim, win_size=win_size, 45 | dropout=self.dropout_d, patch_num=self.patch_num, 46 | ape=ape, rpe=rpe, rpe_pos=rpe_pos) 47 | elif self.decoder_name == 'LSTM': 48 | self.bi_rnn = nn.LSTM(input_size=self.feature_extractor.c_last, 49 | hidden_size=self.patch_dim // 2, 50 | num_layers=2, 51 | dropout=self.dropout_d, 52 | batch_first=False, 53 | bidirectional=True) 54 | self.drop_out = nn.Dropout(self.dropout_d) 55 | else: 56 | raise NotImplementedError("Only support *Transformer and LSTM") 57 | 58 | if self.output_name == 'LGT': 59 | # omnidirectional-geometry aware output 60 | self.linear_depth_output = nn.Linear(in_features=self.patch_dim, out_features=1) 61 | self.linear_ratio = nn.Linear(in_features=self.patch_dim, out_features=1) 62 | self.linear_ratio_output = nn.Linear(in_features=self.patch_num, out_features=1) 63 | elif self.output_name == 'LED' or self.output_name == 'Horizon': 64 | # horizon-depth or latitude output 65 | self.linear = nn.Linear(in_features=self.patch_dim, out_features=2) 66 | else: 67 | raise NotImplementedError("Unknown output") 68 | 69 | if self.corner_heat_map: 70 | # corners heat map output 71 | self.linear_corner_heat_map_output = nn.Linear(in_features=self.patch_dim, out_features=1) 72 | 73 | self.name = f"{self.decoder_name}_{self.output_name}_Net" 74 | 75 | def lgt_output(self, x): 76 | """ 77 | :param x: [ b, 256(patch_num), 1024(d)] 78 | :return: { 79 | 'depth': [b, 256(patch_num & d)] 80 | 'ratio': [b, 1(d)] 81 | } 82 | """ 83 | depth = self.linear_depth_output(x) # [b, 256(patch_num), 1(d)] 84 | depth = depth.view(-1, self.patch_num) # [b, 256(patch_num & d)] 85 | 86 | # ratio represent room height 87 | ratio = self.linear_ratio(x) # [b, 256(patch_num), 1(d)] 88 | ratio = ratio.view(-1, self.patch_num) # [b, 256(patch_num & d)] 89 | ratio = self.linear_ratio_output(ratio) # [b, 1(d)] 90 | output = { 91 | 'depth': depth, 92 | 'ratio': ratio 93 | } 94 | return output 95 | 96 | def led_output(self, x): 97 | """ 98 | :param x: [ b, 256(patch_num), 1024(d)] 99 | :return: { 100 | 'depth': [b, 256(patch_num)] 101 | 'ceil_depth': [b, 256(patch_num)] 102 | 'ratio': [b, 1(d)] 103 | } 104 | """ 105 | bon = self.linear(x) # [b, 256(patch_num), 2(d)] 106 | bon = bon.permute(0, 2, 1) # [b, 2(d), 256(patch_num)] 107 | bon = torch.sigmoid(bon) 108 | 109 | ceil_v = bon[:, 0, :] * -0.5 + 0.5 # [b, 256(patch_num)] 110 | floor_v = bon[:, 1, :] * 0.5 + 0.5 # [b, 256(patch_num)] 111 | u = get_u(w=self.patch_num, is_np=False, b=ceil_v.shape[0]).to(ceil_v.device) 112 | ceil_boundary = torch.stack((u, ceil_v), axis=-1) # [b, 256(patch_num), 2] 113 | floor_boundary = torch.stack((u, floor_v), axis=-1) # [b, 256(patch_num), 2] 114 | output = { 115 | 'depth': uv2depth(floor_boundary), # [b, 256(patch_num)] 116 | 'ceil_depth': uv2depth(ceil_boundary), # [b, 256(patch_num)] 117 | } 118 | # print(output['depth'].mean()) 119 | if not self.training: 120 | # [b, 1(d)] 121 | output['ratio'] = calc_ceil_ratio([tensor2np(ceil_boundary), tensor2np(floor_boundary)], mode='lsq').reshape(-1, 1) 122 | return output 123 | 124 | def horizon_output(self, x): 125 | """ 126 | :param x: [ b, 256(patch_num), 1024(d)] 127 | :return: { 128 | 'floor_boundary': [b, 256(patch_num)] 129 | 'ceil_boundary': [b, 256(patch_num)] 130 | } 131 | """ 132 | bon = self.linear(x) # [b, 256(patch_num), 2(d)] 133 | bon = bon.permute(0, 2, 1) # [b, 2(d), 256(patch_num)] 134 | 135 | output = { 136 | 'boundary': bon 137 | } 138 | if not self.training: 139 | lon = get_lon(w=self.patch_num, is_np=False, b=bon.shape[0]).to(bon.device) 140 | floor_lat = torch.clip(bon[:, 0, :], 1e-4, np.pi / 2) 141 | ceil_lat = torch.clip(bon[:, 1, :], -np.pi / 2, -1e-4) 142 | floor_lonlat = torch.stack((lon, floor_lat), axis=-1) # [b, 256(patch_num), 2] 143 | ceil_lonlat = torch.stack((lon, ceil_lat), axis=-1) # [b, 256(patch_num), 2] 144 | output['depth'] = lonlat2depth(floor_lonlat) 145 | output['ratio'] = calc_ceil_ratio([tensor2np(lonlat2uv(ceil_lonlat)), 146 | tensor2np(lonlat2uv(floor_lonlat))], mode='mean').reshape(-1, 1) 147 | return output 148 | 149 | def forward(self, x): 150 | """ 151 | :param x: [b, 3(d), 512(h), 1024(w)] 152 | :return: { 153 | 'depth': [b, 256(patch_num & d)] 154 | 'ratio': [b, 1(d)] 155 | } 156 | """ 157 | 158 | # feature extractor 159 | x = self.feature_extractor(x) # [b 1024(d) 256(w)] 160 | 161 | if 'Transformer' in self.decoder_name: 162 | # transformer decoder 163 | x = x.permute(0, 2, 1) # [b 256(patch_num) 1024(d)] 164 | x = self.transformer(x) # [b 256(patch_num) 1024(d)] 165 | elif self.decoder_name == 'LSTM': 166 | # lstm decoder 167 | x = x.permute(2, 0, 1) # [256(patch_num), b, 1024(d)] 168 | self.bi_rnn.flatten_parameters() 169 | x, _ = self.bi_rnn(x) # [256(patch_num & seq_len), b, 1024(d)] 170 | x = x.permute(1, 0, 2) # [b, 256(patch_num), 1024(d)] 171 | x = self.drop_out(x) 172 | 173 | output = None 174 | if self.output_name == 'LGT': 175 | # plt output 176 | output = self.lgt_output(x) 177 | 178 | elif self.output_name == 'LED': 179 | # led output 180 | output = self.led_output(x) 181 | 182 | elif self.output_name == 'Horizon': 183 | # led output 184 | output = self.horizon_output(x) 185 | 186 | if self.corner_heat_map: 187 | corner_heat_map = self.linear_corner_heat_map_output(x) # [b, 256(patch_num), 1] 188 | corner_heat_map = corner_heat_map.view(-1, self.patch_num) 189 | corner_heat_map = torch.sigmoid(corner_heat_map) 190 | output['corner_heat_map'] = corner_heat_map 191 | 192 | return output 193 | 194 | 195 | if __name__ == '__main__': 196 | from PIL import Image 197 | import numpy as np 198 | from models.other.init_env import init_env 199 | 200 | init_env(0, deterministic=True) 201 | 202 | net = LGT_Net() 203 | 204 | total = sum(p.numel() for p in net.parameters()) 205 | trainable = sum(p.numel() for p in net.parameters() if p.requires_grad) 206 | print('parameter total:{:,}, trainable:{:,}'.format(total, trainable)) 207 | 208 | img = np.array(Image.open("../src/demo.png")).transpose((2, 0, 1)) 209 | input = torch.Tensor([img]) # 1 3 512 1024 210 | output = net(input) 211 | 212 | print(output['depth'].shape) # 1 256 213 | print(output['ratio'].shape) # 1 1 214 | -------------------------------------------------------------------------------- /postprocessing/dula/layout.py: -------------------------------------------------------------------------------- 1 | """ 2 | @Date: 2021/10/06 3 | @description: Use the approach proposed by DuLa-Net 4 | """ 5 | import cv2 6 | import numpy as np 7 | import math 8 | import matplotlib.pyplot as plt 9 | import sys 10 | import os.path as osp 11 | sys.path.append(osp.abspath(osp.join(__file__, '../../../'))) 12 | 13 | from visualization.floorplan import draw_floorplan 14 | 15 | def calc_angle(v1: np.array, v2: np.array): 16 | norm = np.linalg.norm(v1) * np.linalg.norm(v2) 17 | theta = np.arccos(np.dot(v1, v2) / norm) 18 | return theta 19 | 20 | def merge_near(lst, diag, min): 21 | group = [[min, ]] 22 | for i in range(1, len(lst)): 23 | if lst[i][1] == 0 and lst[i][0] - np.mean(group[-1]) < diag * 0.02: 24 | group[-1].append(lst[i][0]) 25 | else: 26 | group.append([lst[i][0], ]) 27 | if len(group) == 1: 28 | group = [lst[0][0], lst[-1][0]] 29 | else: 30 | group = [int(np.mean(x)) for x in group] 31 | return group 32 | 33 | 34 | def fit_layout(floor_xz, need_cube=False, show=False, block_eps=5): 35 | show_radius = np.linalg.norm(floor_xz, axis=-1).max() 36 | side_l = 512 37 | floorplan = draw_floorplan(xz=floor_xz, show_radius=show_radius, show=show, scale=1, side_l=side_l).astype(np.uint8) 38 | center = np.array([side_l / 2, side_l / 2]) 39 | polys = cv2.findContours(floorplan, 1, 2) 40 | if isinstance(polys, tuple): 41 | if len(polys) == 3: 42 | # opencv 3 43 | polys = list(polys[1]) 44 | else: 45 | polys = list(polys[0]) 46 | polys.sort(key=lambda x: cv2.contourArea(x), reverse=True) 47 | poly = polys[0] 48 | sub_x, sub_y, w, h = cv2.boundingRect(poly) 49 | floorplan_sub = floorplan[sub_y:sub_y + h, sub_x:sub_x + w] 50 | sub_center = center - np.array([sub_x, sub_y]) 51 | polys = cv2.findContours(floorplan_sub, 1, 2) 52 | if isinstance(polys, tuple): 53 | if len(polys) == 3: 54 | polys = polys[1] 55 | else: 56 | polys = polys[0] 57 | poly = polys[0] 58 | epsilon = 0.005 * cv2.arcLength(poly, True) 59 | poly = cv2.approxPolyDP(poly, epsilon, True) 60 | 61 | x_lst = [[poly[:, 0, 0].min(), 0], ] 62 | y_lst = [[poly[:, 0, 1].min(), 0], ] 63 | 64 | ans = np.zeros((floorplan_sub.shape[0], floorplan_sub.shape[1])) 65 | 66 | for i in range(len(poly)): 67 | p1 = poly[i][0] 68 | p2 = poly[(i + 1) % len(poly)][0] 69 | # We added occlusion detection 70 | cp1 = p1 - sub_center 71 | cp2 = p2 - sub_center 72 | p12 = p2 - p1 73 | l1 = np.linalg.norm(cp1) 74 | l2 = np.linalg.norm(cp2) 75 | l3 = np.linalg.norm(p12) 76 | # We added occlusion detection 77 | is_block1 = np.rad2deg(calc_angle(cp1, cp2)) < block_eps 78 | is_block2 = np.rad2deg(calc_angle(cp2, p12)) < block_eps*2 79 | is_block3 = np.rad2deg(calc_angle(cp2, -p12)) < block_eps*2 80 | is_block = is_block1 and (is_block2 or is_block3) 81 | 82 | if (p2[0] - p1[0]) == 0: 83 | slope = 10 84 | else: 85 | slope = abs((p2[1] - p1[1]) / (p2[0] - p1[0])) 86 | 87 | if is_block: 88 | s = p1[1] if l1 < l2 else p2[1] 89 | y_lst.append([s, 1]) 90 | s = p1[0] if l1 < l2 else p2[0] 91 | x_lst.append([s, 1]) 92 | 93 | left = p1[0] if p1[0] < p2[0] else p2[0] 94 | right = p1[0] if p1[0] > p2[0] else p2[0] 95 | top = p1[1] if p1[1] < p2[1] else p2[1] 96 | bottom = p1[1] if p1[1] > p2[1] else p2[1] 97 | sample = floorplan_sub[top:bottom, left:right] 98 | score = 0 if sample.size == 0 else sample.mean() 99 | if score >= 0.3: 100 | ans[top:bottom, left:right] = 1 101 | 102 | else: 103 | if slope <= 1: 104 | s = int((p1[1] + p2[1]) / 2) 105 | y_lst.append([s, 0]) 106 | elif slope > 1: 107 | s = int((p1[0] + p2[0]) / 2) 108 | x_lst.append([s, 0]) 109 | 110 | debug_show = False 111 | if debug_show: 112 | plt.figure(dpi=300) 113 | plt.axis('off') 114 | a = cv2.drawMarker(floorplan_sub.copy()*0.5, tuple(sub_center.astype(int)), [1], markerType=0, markerSize=10, thickness=2) 115 | plt.imshow(cv2.drawContours(a, [poly], 0, 1, 1)) 116 | plt.savefig('src/1.png', bbox_inches='tight', transparent=True, pad_inches=0) 117 | plt.show() 118 | 119 | plt.figure(dpi=300) 120 | plt.axis('off') 121 | a = cv2.drawMarker(ans.copy()*0.5, tuple(sub_center.astype(int)), [1], markerType=0, markerSize=10, thickness=2) 122 | plt.imshow(cv2.drawContours(a, [poly], 0, 1, 1)) 123 | # plt.show() 124 | plt.savefig('src/2.png', bbox_inches='tight', transparent=True, pad_inches=0) 125 | plt.show() 126 | 127 | x_lst.append([poly[:, 0, 0].max(), 0]) 128 | y_lst.append([poly[:, 0, 1].max(), 0]) 129 | x_lst.sort(key=lambda x: x[0]) 130 | y_lst.sort(key=lambda x: x[0]) 131 | 132 | diag = math.sqrt(math.pow(floorplan_sub.shape[1], 2) + math.pow(floorplan_sub.shape[0], 2)) 133 | x_lst = merge_near(x_lst, diag, poly[:, 0, 0].min()) 134 | y_lst = merge_near(y_lst, diag, poly[:, 0, 1].min()) 135 | if need_cube and len(x_lst) > 2: 136 | x_lst = [x_lst[0], x_lst[-1]] 137 | if need_cube and len(y_lst) > 2: 138 | y_lst = [y_lst[0], y_lst[-1]] 139 | 140 | for i in range(len(x_lst) - 1): 141 | for j in range(len(y_lst) - 1): 142 | sample = floorplan_sub[y_lst[j]:y_lst[j + 1], x_lst[i]:x_lst[i + 1]] 143 | score = 0 if sample.size == 0 else sample.mean() 144 | if score >= 0.3: 145 | ans[y_lst[j]:y_lst[j + 1], x_lst[i]:x_lst[i + 1]] = 1 146 | 147 | if debug_show: 148 | plt.figure(dpi=300) 149 | plt.axis('off') 150 | a = cv2.drawMarker(ans.copy() * 0.5, tuple(sub_center.astype(int)), [1], 151 | markerType=0, markerSize=10, thickness=2) 152 | plt.imshow(cv2.drawContours(a, [poly], 0, 1, 1)) 153 | # plt.show() 154 | plt.savefig('src/3.png', bbox_inches='tight', transparent=True, pad_inches=0) 155 | plt.show() 156 | 157 | pred = np.uint8(ans) 158 | pred_polys = cv2.findContours(pred, 1, 3) 159 | if isinstance(pred_polys, tuple): 160 | if len(pred_polys) == 3: 161 | pred_polys = pred_polys[1] 162 | else: 163 | pred_polys = pred_polys[0] 164 | 165 | pred_polys.sort(key=lambda x: cv2.contourArea(x), reverse=True) 166 | pred_poly = pred_polys[0] 167 | # findContours may produce errors, which are enforced here 168 | for i in range(len(pred_poly)): 169 | p1 = pred_poly[i][0] 170 | p2 = pred_poly[(i+1)%len(pred_poly)][0] 171 | if abs(p1[0] - p2[0]) < abs(p1[1] - p2[1]): 172 | p1[0] = p2[0] 173 | else: 174 | p1[1] = p2[1] 175 | 176 | if debug_show: 177 | plt.figure(dpi=300) 178 | plt.axis('off') 179 | a = cv2.drawMarker(ans.copy() * 0.5, tuple(sub_center.astype(int)), [1], 180 | markerType=0, markerSize=10, thickness=2) 181 | a = cv2.drawContours(a, [poly], 0, 0.8, 1) 182 | a = cv2.drawContours(a, [pred_poly], 0, 1, 1) 183 | plt.imshow(a) 184 | # plt.show() 185 | plt.savefig('src/4.png', bbox_inches='tight', transparent=True, pad_inches=0) 186 | plt.show() 187 | 188 | polygon = [(p[0][1], p[0][0]) for p in pred_poly[::-1]] 189 | 190 | v = np.array([p[0] + sub_y for p in polygon]) 191 | u = np.array([p[1] + sub_x for p in polygon]) 192 | # side_l 193 | # v<-----------|o 194 | # | | | 195 | # | ----|----z | side_l 196 | # | | | 197 | # | x \|/ 198 | # |------------u 199 | side_l = floorplan.shape[0] 200 | pred_xz = np.concatenate((u[:, np.newaxis] - side_l // 2, side_l // 2 - v[:, np.newaxis]), axis=1) 201 | 202 | pred_xz = pred_xz * show_radius / (side_l // 2) 203 | if show: 204 | draw_floorplan(pred_xz, show_radius=show_radius, show=show) 205 | 206 | show_process = False 207 | if show_process: 208 | img = np.zeros((floorplan_sub.shape[0], floorplan_sub.shape[1], 3)) 209 | for x in x_lst: 210 | cv2.line(img, (x, 0), (x, floorplan_sub.shape[0]), (0, 255, 0), 1) 211 | for y in y_lst: 212 | cv2.line(img, (0, y), (floorplan_sub.shape[1], y), (255, 0, 0), 1) 213 | 214 | fig = plt.figure() 215 | plt.axis('off') 216 | ax1 = fig.add_subplot(2, 2, 1) 217 | ax1.imshow(floorplan) 218 | ax3 = fig.add_subplot(2, 2, 2) 219 | ax3.imshow(floorplan_sub) 220 | ax4 = fig.add_subplot(2, 2, 3) 221 | ax4.imshow(img) 222 | ax5 = fig.add_subplot(2, 2, 4) 223 | ax5.imshow(ans) 224 | plt.show() 225 | 226 | return pred_xz 227 | 228 | 229 | if __name__ == '__main__': 230 | # processed_xz = fit_layout(floor_xz=np.load('/room_layout_estimation/lgt_net/floor_xz.npy'), need_cube=False, show=False) 231 | 232 | from utils.conversion import uv2xyz 233 | 234 | pano_img = np.zeros([512, 1024, 3]) 235 | corners = np.array([[0.1, 0.7], 236 | [0.4, 0.7], 237 | [0.3, 0.6], 238 | [0.6, 0.6], 239 | [0.8, 0.7]]) 240 | xz = uv2xyz(corners)[..., ::2] 241 | draw_floorplan(xz, show=True, marker_color=None, center_color=0.8) 242 | 243 | xz = fit_layout(xz) 244 | draw_floorplan(xz, show=True, marker_color=None, center_color=0.8) 245 | --------------------------------------------------------------------------------