├── SSR
    ├── datasets
    │   ├── __init__.py
    │   ├── replica
    │   │   ├── __init__.py
    │   │   └── replica_datasets.py
    │   ├── scannet
    │   │   ├── __init__.py
    │   │   ├── scannet_reader.py
    │   │   ├── scannet_utils.py
    │   │   └── scannet_datasets.py
    │   └── replica_nyu
    │   │   ├── __init__.py
    │   │   └── replica_nyu_cnn_datasets.py
    ├── geometry
    │   ├── __init__.py
    │   └── occupancy.py
    ├── utils
    │   ├── __init__.py
    │   ├── ndc_derivation.pdf
    │   └── image_utils.py
    ├── visualisation
    │   ├── __init__.py
    │   ├── tensorboard_vis.py
    │   └── open3d_utils.py
    ├── training
    │   ├── __init__.py
    │   └── training_utils.py
    ├── __init__.py
    ├── models
    │   ├── __init__.py
    │   ├── model_utils.py
    │   ├── semantic_nerf.py
    │   └── rays.py
    ├── data_generation
    │   ├── replica_render_config_vMAP.yaml
    │   ├── README.md
    │   ├── extract_inst_obj.py
    │   ├── transformation.py
    │   ├── settings.py
    │   └── habitat_renderer.py
    ├── configs
    │   ├── SSR_room0_config.yaml
    │   └── SSR_ScanNet_config.yaml
    └── extract_colour_mesh.py
├── imgs
    ├── teaser.png
    └── sem_mesh_room0.png
├── requirements.txt
├── .gitignore
├── README.md
├── LICENSE
└── train_SSR_main.py


/SSR/datasets/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/SSR/datasets/replica/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/SSR/datasets/scannet/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/SSR/datasets/replica_nyu/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/SSR/geometry/__init__.py:
--------------------------------------------------------------------------------
1 | from . import occupancy


--------------------------------------------------------------------------------
/SSR/utils/__init__.py:
--------------------------------------------------------------------------------
1 | from . import image_utils


--------------------------------------------------------------------------------
/SSR/visualisation/__init__.py:
--------------------------------------------------------------------------------
1 | from . import open3d_utils
2 | 


--------------------------------------------------------------------------------
/SSR/training/__init__.py:
--------------------------------------------------------------------------------
1 | from . import trainer
2 | from . import training_utils


--------------------------------------------------------------------------------
/imgs/teaser.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Harry-Zhi/semantic_nerf/HEAD/imgs/teaser.png


--------------------------------------------------------------------------------
/imgs/sem_mesh_room0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Harry-Zhi/semantic_nerf/HEAD/imgs/sem_mesh_room0.png


--------------------------------------------------------------------------------
/SSR/utils/ndc_derivation.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Harry-Zhi/semantic_nerf/HEAD/SSR/utils/ndc_derivation.pdf


--------------------------------------------------------------------------------
/SSR/__init__.py:
--------------------------------------------------------------------------------
1 | from . import configs
2 | from . import datasets
3 | from . import geometry
4 | from . import models
5 | from . import training
6 | from . import utils
7 | from . import visualisation
8 | __version__ = "0.0.1"


--------------------------------------------------------------------------------
/SSR/models/__init__.py:
--------------------------------------------------------------------------------
1 | # from . import iMAP_model_utils
2 | # from . import iMAP_nerf
3 | # from . import semantic_nerf
4 | # from . import model_utils
5 | # from . import rays
6 | 
7 | # we do not pre-load iMAP_model_utils and model_utils here since they contain functions with the same name and will cause conflicts


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | torch==1.6.0
 2 | torchvision==0.7.0
 3 | tensorboard==2.4.1
 4 | imageio==2.9.0
 5 | imageio-ffmpeg==0.4.2
 6 | matplotlib==3.3.2
 7 | scikit-image==0.17.2
 8 | scikit-learn==0.23.2
 9 | tqdm==4.54.1
10 | tensorboard==2.4.1
11 | pyyaml==5.3.1
12 | trimesh==3.9.9
13 | imgviz==1.2.2
14 | open3d==0.12.0
15 | opencv-python==4.4.0.44
16 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | **/.ipynb_checkpoints
 2 | **/__pycache__
 3 | *.mp4
 4 | *.npy
 5 | *.npz
 6 | *.dae
 7 | data/*
 8 | logs/*
 9 | 
10 | .idea/
11 | .anaconda3/
12 | SSR/data/
13 | SSR/results/
14 | # Compiled python modules.
15 | *.pyc
16 | 
17 | # Setuptools distribution folder.
18 | /dist/
19 | 
20 | # vim
21 | **/*.swp
22 | 
23 | # vscode
24 | .vscode/
25 | ../.vscode/
26 | 
27 | # Python egg metadata, regenerated from source files by setuptools.
28 | /*.egg-info
29 | 
30 | *.json
31 | 
32 | SSR/configs/SSR_room0_config_test.yaml
33 | 


--------------------------------------------------------------------------------
/SSR/visualisation/tensorboard_vis.py:
--------------------------------------------------------------------------------
 1 | from torch.utils.tensorboard import SummaryWriter
 2 | import os
 3 | import yaml
 4 | 
 5 | class TFVisualizer(object):
 6 |     def __init__(self, log_dir, vis_interval, config):
 7 |         self.tb_writer = SummaryWriter(log_dir=os.path.join(log_dir))
 8 |         self.vis_interval = vis_interval
 9 |         self.config = config
10 | 
11 |         # dump args to tensorboard
12 |         args_str = '{}'.format(yaml.dump(config, sort_keys=False, indent=4))
13 |         self.tb_writer.add_text('Exp_args', args_str, 0)
14 | 
15 |     def vis_scalars(self, i_iter, losses, names):
16 |         for i, loss in enumerate(losses):
17 |             self.tb_writer.add_scalar(names[i], loss, i_iter)
18 | 
19 |             
20 |     def vis_histogram(self, i_iter, value, names):
21 |             self.tb_writer.add_histogram(tag=names, values=value, global_step=i_iter)
22 | 


--------------------------------------------------------------------------------
/SSR/data_generation/replica_render_config_vMAP.yaml:
--------------------------------------------------------------------------------
 1 | # Agent settings
 2 | default_agent: 0
 3 | gpu_id: 0
 4 | width: 1200 #1280
 5 | height: 680 #960
 6 | sensor_height: 0
 7 | 
 8 | color_sensor: true  # RGB sensor
 9 | semantic_sensor: true # Semantic sensor
10 | depth_sensor: true  # Depth sensor
11 | enable_semantics: true
12 | 
13 | # room_0
14 | scene_file: "/home/xin/data/vmap/room_0/habitat/mesh_semantic.ply"
15 | instance2class_mapping: "/home/xin/data/vmap/room_0/habitat/info_semantic.json"
16 | save_path: "/home/xin/data/vmap/room_0/vmap/00/"
17 | pose_file: "/home/xin/data/vmap/room_0/vmap/00/traj_w_c.txt"
18 | ## HDR texture
19 | ## issue https://github.com/facebookresearch/Replica-Dataset/issues/41#issuecomment-566251467
20 | #scene_file: "/home/xin/data/vmap/room_0/mesh.ply"
21 | #instance2class_mapping: "/home/xin/data/vmap/room_0/habitat/info_semantic.json"
22 | #save_path: "/home/xin/data/vmap/room_0/vmap/00/"
23 | #pose_file: "/home/xin/data/vmap/room_0/vmap/00/traj_w_c.txt"


--------------------------------------------------------------------------------
/SSR/data_generation/README.md:
--------------------------------------------------------------------------------
 1 | ## Replica Data Generation
 2 | 
 3 | ### Download Replica Dataset
 4 | Download 3D models and info files from [Replica](https://github.com/facebookresearch/Replica-Dataset)
 5 | 
 6 | ### 3D Object Mesh Extraction
 7 | Change the input path in `./data_generation/extract_inst_obj.py` and run
 8 | ```angular2html
 9 | python ./data_generation/extract_inst_obj.py
10 | ```
11 | 
12 | ### Camera Trajectory Generation
13 | Please refer to [Semantic-NeRF](https://github.com/Harry-Zhi/semantic_nerf/issues/25#issuecomment-1340595427) for more details. The random trajectory generation only works for single room scene. For multiple rooms scene, collision checking is needed. Welcome contributions.
14 | 
15 | ### Rendering 2D Images
16 | Given camera trajectory t_wc (change pose_file in configs), we use [Habitat-Sim](https://github.com/facebookresearch/habitat-sim) to render RGB, Depth, Semantic and Instance images.
17 | 
18 | #### Install Habitat-Sim 0.2.1
19 | We recommend to use conda to install habitat-sim 0.2.1.
20 | ```angular2html
21 | conda create -n habitat python=3.8.12 cmake=3.14.0
22 | conda activate habitat
23 | conda install habitat-sim=0.2.1 withbullet -c conda-forge -c aihabitat 
24 | conda install numba=0.54.1
25 | ```
26 | 
27 | #### Run rendering with configs
28 | ```angular2html
29 | python ./data_generation/habitat_renderer.py --config ./data_generation/replica_render_config_vMAP.yaml 
30 | ```
31 | Note that to get HDR img, use mesh.ply not semantic_mesh.ply. Change path in configs. And copy rgb folder to replace previous high exposure rgb.
32 | ```angular2html
33 | python ./data_generation/habitat_renderer.py --config ./data_generation/replica_render_config_vMAP.yaml 
34 | ```


--------------------------------------------------------------------------------
/SSR/data_generation/extract_inst_obj.py:
--------------------------------------------------------------------------------
 1 | # reference https://github.com/facebookresearch/Replica-Dataset/issues/17#issuecomment-538757418
 2 | 
 3 | from plyfile import *
 4 | import numpy as np
 5 | import trimesh
 6 | 
 7 | 
 8 | # path_in = 'path/to/mesh_semantic.ply'
 9 | path_in = '/home/xin/data/vmap/room_0_debug/habitat/mesh_semantic.ply'
10 | 
11 | print("Reading input...")
12 | mesh = trimesh.load(path_in)
13 | # mesh.show()
14 | file_in = PlyData.read(path_in)
15 | vertices_in = file_in.elements[0]
16 | faces_in = file_in.elements[1]
17 | 
18 | print("Filtering data...")
19 | objects = {}
20 | sub_mesh_indices = {}
21 | for i, f in enumerate(faces_in):
22 |      object_id = f[1]
23 |      if not object_id in objects:
24 |          objects[object_id] = []
25 |          sub_mesh_indices[object_id] = []
26 |      objects[object_id].append((f[0],))
27 |      sub_mesh_indices[object_id].append(i)
28 |      sub_mesh_indices[object_id].append(i+faces_in.data.shape[0])
29 | 
30 | 
31 | print("Writing data...")
32 | for object_id, faces in objects.items():
33 |     path_out = path_in + f"_{object_id}.ply"
34 |     # print("sub_mesh_indices[object_id] ", sub_mesh_indices[object_id])
35 |     obj_mesh = mesh.submesh([sub_mesh_indices[object_id]], append=True)
36 |     in_n = len(sub_mesh_indices[object_id])
37 |     out_n = obj_mesh.faces.shape[0]
38 |     # print("obj id ", object_id)
39 |     # print("in_n ", in_n)
40 |     # print("out_n ", out_n)
41 |     # print("faces ", len(faces))
42 |     # assert in_n == out_n
43 |     obj_mesh.export(path_out)
44 |     # faces_out = PlyElement.describe(np.array(faces, dtype=[('vertex_indices', 'O')]), 'face')
45 |     # print("faces out ", len(PlyData([vertices_in, faces_out]).elements[1].data))
46 |     # PlyData([vertices_in, faces_out]).write(path_out+"_cmp.ply")
47 | 
48 | 


--------------------------------------------------------------------------------
/SSR/data_generation/transformation.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import quaternion
 3 | import trimesh
 4 | 
 5 | def habitat_world_transformations():
 6 |     import habitat_sim
 7 |     # Transforms between the habitat frame H (y-up) and the world frame W (z-up).
 8 |     T_wh = np.identity(4)
 9 | 
10 |     # https://stackoverflow.com/questions/1171849/finding-quaternion-representing-the-rotation-from-one-vector-to-another
11 |     T_wh[0:3, 0:3] = quaternion.as_rotation_matrix(habitat_sim.utils.common.quat_from_two_vectors(
12 |             habitat_sim.geo.GRAVITY, np.array([0.0, 0.0, -1.0])))
13 | 
14 |     T_hw = np.linalg.inv(T_wh)
15 | 
16 |     return T_wh, T_hw
17 | 
18 | def opencv_to_opengl_camera(transform=None):
19 |     if transform is None:
20 |         transform = np.eye(4)
21 |     return transform @ trimesh.transformations.rotation_matrix(
22 |         np.deg2rad(180), [1, 0, 0]
23 |     )
24 | 
25 | def opengl_to_opencv_camera(transform=None):
26 |     if transform is None:
27 |         transform = np.eye(4)
28 |     return transform @ trimesh.transformations.rotation_matrix(
29 |         np.deg2rad(-180), [1, 0, 0]
30 |     )
31 | 
32 | def Twc_to_Thc(T_wc):  # opencv-camera to world transformation ---> habitat-caemra to habitat world transformation
33 |     T_wh, T_hw = habitat_world_transformations()
34 |     T_hc = T_hw @ T_wc @ opengl_to_opencv_camera()
35 |     return T_hc
36 | 
37 | 
38 | def Thc_to_Twc(T_hc):  # habitat-caemra to habitat world transformation --->  opencv-camera to world transformation
39 |     T_wh, T_hw = habitat_world_transformations()
40 |     T_wc = T_wh @ T_hc @ opencv_to_opengl_camera()
41 |     return T_wc
42 | 
43 | 
44 | def combine_pose(t: np.array, q: quaternion.quaternion) -> np.array:
45 |     T = np.identity(4)
46 |     T[0:3, 3] = t
47 |     T[0:3, 0:3] = quaternion.as_rotation_matrix(q)
48 |     return T


--------------------------------------------------------------------------------
/SSR/configs/SSR_room0_config.yaml:
--------------------------------------------------------------------------------
 1 | 
 2 | experiment:
 3 |   scene_file: "PATHtoREPLICA/Replica/mesh/room_0/habitat/" # room_0,room_01, etc.
 4 |   save_dir: "PATHtoLOGS"  # where to store ckpts and rendering
 5 |   dataset_dir: "PATHtoRENDERED_REPLICA_DATA"
 6 |   convention: "opencv"
 7 |   width: 320
 8 |   height: 240
 9 |   gpu: "0"
10 |   
11 |   enable_semantic: True
12 |   enable_depth: True
13 |   endpoint_feat: False
14 | 
15 | model:
16 |   netdepth: 8
17 |   netwidth: 256
18 |   netdepth_fine: 8
19 |   netwidth_fine: 256
20 |   chunk: 1024*128  # number of rays processed in parallel, decrease if running out of memory
21 |   netchunk: 1024*128  # number of pts sent through network in parallel, decrease if running out of memory
22 | 
23 | render:
24 |     N_rays: 32*32*1  # average number of rays sampled from each sample within a batch
25 |     N_samples: 64  # Number of different times to sample along each ray.
26 |     N_importance: 128  # Number of additional fine samples per ray
27 |     perturb: 1
28 |     use_viewdirs: true
29 |     i_embed: 0 # 'set 0 for default positional encoding, -1 for none'
30 |     multires: 10  # log2 of max freq for positional encoding (3D location)'
31 |     multires_views: 4  # 'log2 of max freq for positional encoding (2D direction)'
32 |     raw_noise_std: 1  # 'std dev of noise added to regularize sigma_a output, 1e0 recommended')
33 |     test_viz_factor: 1  # down scaling factor when rendering test and training images
34 |     no_batching: True  # True-sample random pixels from random images; False-sample from all random pixels from all images
35 |     depth_range: [0.1, 10.0]
36 |     white_bkgd: false  # set to render synthetic data on a white bkgd (always use for dvoxels)
37 | 
38 | train:
39 |     lrate: 5e-4
40 |     lrate_decay: 250e3
41 |     N_iters: 200000
42 |     wgt_sem: 4e-2
43 | 
44 | 
45 | 
46 | logging: # logging/saving options
47 |     step_log_print: 1  # 'frequency of console print'
48 |     step_log_tfb: 500
49 |     step_save_ckpt: 20000
50 |     step_val: 5000 # frequency of rendering on unseen data
51 |     step_vis_train: 5000


--------------------------------------------------------------------------------
/SSR/datasets/scannet/scannet_reader.py:
--------------------------------------------------------------------------------
 1 | # modified from https://github.com/ScanNet/ScanNet/blob/master/SensReader/python/reader.py
 2 | # github: https://github.com/ScanNet/ScanNet/tree/master/SensReader/python
 3 | # python 2.7 is recommended.
 4 | 
 5 | 
 6 | import argparse
 7 | import os, sys
 8 | 
 9 | import os
10 | import numpy as np
11 | import argparse
12 | import random
13 | from tqdm import tqdm
14 | 
15 | from SensorData import SensorData
16 | 
17 | def parse_raw_data(output_path, data_filename):
18 |   if not os.path.exists(output_path):
19 |     os.makedirs(output_path)
20 |   # load the data
21 |   sys.stdout.write('loading %s...' % data_filename)
22 |   sd = SensorData(data_filename)
23 |   sys.stdout.write('loaded!\n')
24 |   if opt.export_depth_images:
25 |     sd.export_depth_images(os.path.join(output_path, 'depth'))
26 |   if opt.export_color_images:
27 |     sd.export_color_images(os.path.join(output_path, 'color'))
28 |   if opt.export_poses:
29 |     sd.export_poses(os.path.join(output_path, 'pose'))
30 |   if opt.export_intrinsics:
31 |     sd.export_intrinsics(os.path.join(output_path, 'intrinsic'))
32 | 
33 | 
34 | # params
35 | parser = argparse.ArgumentParser()
36 | # data paths
37 | parser.add_argument('--export_depth_images', dest='export_depth_images', action='store_true')
38 | parser.add_argument('--export_color_images', dest='export_color_images', action='store_true')
39 | parser.add_argument('--export_poses', dest='export_poses', action='store_true')
40 | parser.add_argument('--export_intrinsics', dest='export_intrinsics', action='store_true')
41 | parser.set_defaults(export_depth_images=True, export_color_images=True, export_poses=True, export_intrinsics=True)
42 | 
43 | 
44 | opt = parser.parse_args()
45 | print(opt)
46 | 
47 | 
48 | data_dir = "PATH_TO_SCANNET/ScanNet/scans_val/" # path to list of scannet scenes
49 | val_seqs = os.listdir(data_dir)
50 | with open("PATH_TO_SCANNET/ScanNet/tasks/scannetv2_val.txt") as f:
51 |     val_seq_ids = f.readlines()
52 |     val_seq_ids = [s.strip() for s in val_seq_ids]
53 | 
54 | for i in tqdm(range(len(val_seqs))):
55 |   val_id = val_seqs[i]
56 |   val_seq_dir = os.path.join(data_dir, val_id, "renders")
57 |   raw_data_filename = os.path.join(data_dir, val_id, val_id+".sens")
58 |   parse_raw_data(val_seq_dir, raw_data_filename)
59 | 
60 | if __name__ == '__main__':
61 |     main()


--------------------------------------------------------------------------------
/SSR/configs/SSR_ScanNet_config.yaml:
--------------------------------------------------------------------------------
 1 | 
 2 | experiment:
 3 |   save_dir: "PATHtoLOGS"  # where to store ckpts and rendering
 4 |   dataset_dir: "PATHtoScanNet_Scene_Folder" # e.g., "xxx/ScanNet/scans/scene0010_00"
 5 | 
 6 |   # All parsed scannet images per scenes are arranged into a unified folder called "renders" using scannet_reader.py, 
 7 |   # where the subfolders "pose", "color", "depth" contains the corresponding data.
 8 |   # e.g., "xxx/ScanNet/scans/scene0010_00/renders/color/00001.jpg"
 9 |   
10 |   sample_step: 100 # this is the sampling interval of the whole ScanNet sequence to determine the overall amount of training/testing images.
11 |   convention: "opencv"
12 |   width: 320
13 |   height: 240
14 |   gpu: "0"
15 |   
16 |   enable_semantic: True
17 |   enable_depth: True
18 |   endpoint_feat: False
19 | 
20 | model:
21 |   netdepth: 8
22 |   netwidth: 256
23 |   netdepth_fine: 8
24 |   netwidth_fine: 256
25 |   chunk: 1024*128  # number of rays processed in parallel, decrease if running out of memory
26 |   netchunk: 1024*128  # number of pts sent through network in parallel, decrease if running out of memory
27 | 
28 | render:
29 |     N_rays: 32*32*1  # average number of rays sampled from each sample within a batch
30 |     N_samples: 64  # Number of different times to sample along each ray.
31 |     N_importance: 128  # Number of additional fine samples per ray
32 |     perturb: 1
33 |     use_viewdirs: true
34 |     i_embed: 0 # 'set 0 for default positional encoding, -1 for none'
35 |     multires: 10  # log2 of max freq for positional encoding (3D location)'
36 |     multires_views: 4  # 'log2 of max freq for positional encoding (2D direction)'
37 |     raw_noise_std: 1  # 'std dev of noise added to regularize sigma_a output, 1e0 recommended')
38 |     test_viz_factor: 1  # down scaling factor when rendering test and training images
39 |     no_batching: True  # True-sample random pixels from random images; False-sample from all random pixels from all images
40 |     depth_range: [0.1, 10.0]
41 |     white_bkgd: false  # set to render synthetic data on a white bkgd (always use for dvoxels)
42 | 
43 | train:
44 |     lrate: 5e-4
45 |     lrate_decay: 250e3
46 |     N_iters: 200000
47 |     wgt_sem: 4e-2
48 | 
49 | 
50 | 
51 | logging: # logging/saving options
52 |     step_log_print: 1  # 'frequency of console print'
53 |     step_log_tfb: 500
54 |     step_save_ckpt: 20000
55 |     step_val: 5000 # frequency of rendering on unseen data
56 |     step_vis_train: 5000


--------------------------------------------------------------------------------
/SSR/datasets/scannet/scannet_utils.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import os
 3 | import numpy as np
 4 | import csv
 5 | 
 6 | def load_scannet_label_mapping(path):
 7 |     """ Returns a dict mapping scannet category label strings to scannet Ids
 8 | 
 9 |     scene****_**.aggregation.json contains the category labels as strings 
10 |     so this maps the strings to the integer scannet Id
11 | 
12 |     Args:
13 |         path: Path to the original scannet data.
14 |               This is used to get scannetv2-labels.combined.tsv
15 | 
16 |     Returns:
17 |         mapping: A dict from strings to ints
18 |             example:
19 |                 {'wall': 1,
20 |                  'chair: 2,
21 |                  'books': 22}
22 | 
23 |     """
24 | 
25 |     mapping = {}
26 |     with open(os.path.join(path, 'scannetv2-labels.combined.tsv')) as tsvfile:
27 |         tsvreader = csv.reader(tsvfile, delimiter='\t')
28 |         for i, line in enumerate(tsvreader):
29 |             if i==0:
30 |                 continue
31 |             scannet_id, name = int(line[0]), line[1]
32 |             mapping[name] = scannet_id
33 | 
34 |     return mapping
35 | 
36 | 
37 | def load_scannet_nyu40_mapping(path):
38 |     """ Returns a dict mapping scannet Ids to NYU40 Ids
39 | 
40 |     Args:
41 |         path: Path to the original scannet data. 
42 |             This is used to get scannetv2-labels.combined.tsv
43 | 
44 |     Returns:
45 |         mapping: A dict from ints to ints
46 |             example:
47 |                 {1: 1,
48 |                  2: 5,
49 |                  22: 23}
50 | 
51 |     """
52 | 
53 |     mapping = {}
54 |     with open(os.path.join(path, 'scannetv2-labels.combined.tsv')) as tsvfile:
55 |         tsvreader = csv.reader(tsvfile, delimiter='\t')
56 |         for i, line in enumerate(tsvreader):
57 |             if i==0:
58 |                 continue
59 |             scannet_id, nyu40id = int(line[0]), int(line[4])
60 |             mapping[scannet_id] = nyu40id
61 |     return mapping
62 | 
63 | 
64 | def load_scannet_nyu13_mapping(path):
65 |     """ Returns a dict mapping scannet Ids to NYU40 Ids
66 | 
67 |     Args:
68 |         path: Path to the original scannet data. 
69 |             This is used to get scannetv2-labels.combined.tsv
70 | 
71 |     Returns:
72 |         mapping: A dict from ints to ints
73 |             example:
74 |                 {1: 1,
75 |                  2: 5,
76 |                  22: 23}
77 | 
78 |     """
79 | 
80 |     mapping = {}
81 |     with open(os.path.join(path, 'scannetv2-labels.combined.tsv')) as tsvfile:
82 |         tsvreader = csv.reader(tsvfile, delimiter='\t')
83 |         for i, line in enumerate(tsvreader):
84 |             if i==0:
85 |                 continue
86 |             scannet_id, nyu40id = int(line[0]), int(line[5])
87 |             mapping[scannet_id] = nyu40id
88 |     return mapping


--------------------------------------------------------------------------------
/SSR/geometry/occupancy.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import numpy as np
 3 | 
 4 | 
 5 | def grid_within_bound(occ_range, extents, transform, grid_dim):
 6 |         range_dist = occ_range[1] - occ_range[0]
 7 |         bounds_tranform_np = transform
 8 | 
 9 |         bounds_tranform = torch.from_numpy(bounds_tranform_np).float()
10 |         scene_scale_np = extents / (range_dist * 0.9)
11 |         scene_scale = torch.from_numpy(scene_scale_np).float()
12 | 
13 |         # todo: only make grid once, then only transform!
14 |         grid_pc = make_3D_grid(
15 |             occ_range,
16 |             grid_dim,
17 |             transform=bounds_tranform,
18 |             scale=scene_scale,
19 |         )
20 |         grid_pc = grid_pc.view(-1, 1, 3)
21 | 
22 |         return grid_pc, scene_scale
23 | 
24 | def make_3D_grid(occ_range, dim, transform=None, scale=None):
25 |     t = torch.linspace(occ_range[0], occ_range[1], steps=dim)
26 |     grid = torch.meshgrid(t, t, t)
27 |     grid_3d_norm = torch.cat(
28 |         (grid[0][..., None],
29 |          grid[1][..., None],
30 |          grid[2][..., None]), dim=3
31 |     )
32 | 
33 |     if scale is not None:
34 |         grid_3d = grid_3d_norm * scale
35 |     if transform is not None:
36 |         R1 = transform[None, None, None, 0, :3]
37 |         R2 = transform[None, None, None, 1, :3]
38 |         R3 = transform[None, None, None, 2, :3]
39 | 
40 |         grid1 = (R1 * grid_3d).sum(-1, keepdim=True)
41 |         grid2 = (R2 * grid_3d).sum(-1, keepdim=True)
42 |         grid3 = (R3 * grid_3d).sum(-1, keepdim=True)
43 |         grid_3d = torch.cat([grid1, grid2, grid3], dim=-1)
44 | 
45 |         trans = transform[None, None, None, :3, 3]
46 |         grid_3d = grid_3d + trans
47 | 
48 |     return grid_3d
49 | 
50 | def make_3D_grid_np(occ_range, dim, device, transform=None, scale=None):
51 |     t = torch.linspace(occ_range[0], occ_range[1], steps=dim, device=device)
52 |     t = np.linspace(occ_range[0], occ_range[1], num=dim)
53 |     grid = np.meshgrid(t, t, t) # list of 3 elements of shape [dim, dim, dim]
54 | 
55 |     grid_3d_norm = np.concatenate(
56 |         (grid[0][..., None],
57 |          grid[1][..., None],
58 |          grid[2][..., None]), axis=3
59 |     ) # shape of [dim, dim, dim, 3]
60 | 
61 |     if scale is not None:
62 |         grid_3d = grid_3d_norm * scale
63 |     if transform is not None:
64 |         R1 = transform[None, None, None, 0, :3]
65 |         R2 = transform[None, None, None, 1, :3]
66 |         R3 = transform[None, None, None, 2, :3]
67 | 
68 |         grid1 = (R1 * grid_3d).sum(-1, keepdim=True)
69 |         grid2 = (R2 * grid_3d).sum(-1, keepdim=True)
70 |         grid3 = (R3 * grid_3d).sum(-1, keepdim=True)
71 |         grid_3d = np.concatenate([grid1, grid2, grid3], dim=-1)
72 | 
73 |         trans = transform[None, None, None, :3, 3]
74 |         grid_3d = grid_3d + trans
75 | 
76 |     return grid_3d
77 | 
78 | 
79 | 
80 | def chunk_alphas(pc, chunk_size, fc_occ_map, n_embed_funcs, B_layer,):
81 |     n_pts = pc.shape[0]
82 |     n_chunks = int(np.ceil(n_pts / chunk_size))
83 |     alphas = []
84 |     for n in range(n_chunks):
85 |         start = n * chunk_size
86 |         end = start + chunk_size
87 |         chunk = pc[start:end, :]
88 |         points_embedding = embedding.positional_encoding(
89 |             chunk, B_layer, num_encoding_functions=n_embed_funcs
90 |         )
91 |         alpha = fc_occ_map(points_embedding, full=True).squeeze(dim=-1)
92 |         alphas.append(alpha)
93 |     alphas = torch.cat(alphas, dim=-1)
94 | 
95 |     return alphas
96 | 


--------------------------------------------------------------------------------
/SSR/models/model_utils.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn.functional as F
  3 | from SSR.training.training_utils import batchify
  4 | 
  5 | 
  6 | def run_network_compund(inputs, fn, embed_fn, netchunk=1024 * 64):
  7 |     """Prepares inputs and applies network 'fn'.
  8 | 
  9 |     Input: [N_rays, N_samples, 3]
 10 |     """
 11 |     inputs_flat = torch.reshape(inputs, [-1, inputs.shape[-1]])
 12 |     compund_fn = lambda x: fn(embed_fn(x))
 13 | 
 14 |     outputs_flat = batchify(compund_fn, netchunk)(inputs_flat)
 15 |     outputs = torch.reshape(outputs_flat, list(inputs.shape[:-1]) + [outputs_flat.shape[-1]])
 16 |     return outputs
 17 | 
 18 | 
 19 | def run_network(inputs, viewdirs, fn, embed_fn, embeddirs_fn, netchunk=1024 * 64):
 20 |     """Prepares inputs and applies network 'fn'.
 21 | 
 22 |     Input: [N_rays, N_samples, 3]
 23 |     """
 24 |     inputs_flat = torch.reshape(inputs, [-1, inputs.shape[-1]])
 25 |     embedded = embed_fn(inputs_flat)
 26 | 
 27 |     if viewdirs is not None:
 28 |         input_dirs = viewdirs[:, None].expand(inputs.shape)
 29 |         input_dirs_flat = torch.reshape(input_dirs, [-1, input_dirs.shape[-1]])
 30 |         embedded_dirs = embeddirs_fn(input_dirs_flat)
 31 |         embedded = torch.cat([embedded, embedded_dirs], -1)
 32 | 
 33 |     outputs_flat = batchify(fn, netchunk)(embedded)
 34 |     outputs = torch.reshape(outputs_flat, list(inputs.shape[:-1]) + [outputs_flat.shape[-1]])
 35 |     return outputs
 36 | 
 37 | 
 38 | 
 39 | def raw2outputs(raw, z_vals, rays_d, raw_noise_std=0, white_bkgd=False, enable_semantic=True, 
 40 |                 num_sem_class=0, endpoint_feat=False):
 41 |     """Transforms model's predictions to semantically meaningful values.
 42 |     Args:
 43 |         raw: [num_rays, num_samples along ray, 4]. Prediction from model.
 44 |         z_vals: [num_rays, num_samples along ray]. Integration time.
 45 |         rays_d: [num_rays, 3]. Direction of each ray.
 46 |         raw_noise_std: random perturbations added to ray samples
 47 |         
 48 |     Returns:
 49 |         rgb_map: [num_rays, 3]. Estimated RGB color of a ray.
 50 |         disp_map: [num_rays]. Disparity map. Inverse of depth map.
 51 |         acc_map: [num_rays]. Sum of weights along each ray.
 52 |         weights: [num_rays, num_samples]. Weights assigned to each sampled color.
 53 |         depth_map: [num_rays]. Estimated distance to object.
 54 |     """
 55 |     raw2alpha = lambda raw, dists, act_fn=F.relu: 1.-torch.exp(-act_fn(raw)*dists)
 56 | 
 57 |     dists = z_vals[..., 1:] - z_vals[..., :-1]  # # (N_rays, N_samples_-1)
 58 |     dists = torch.cat([dists, torch.Tensor([1e10]).expand(dists[..., :1].shape).cuda()], -1)  # [N_rays, N_samples]
 59 | 
 60 |     # Multiply each distance by the norm of its corresponding direction ray
 61 |     # to convert to real world distance (accounts for non-unit directions).
 62 |     dists = dists * torch.norm(rays_d[..., None, :], dim=-1)
 63 | 
 64 |     rgb = torch.sigmoid(raw[..., :3])  # [N_rays, N_samples, 3]
 65 | 
 66 |     if raw_noise_std > 0.:
 67 |         noise = torch.randn(raw[..., 3].shape) * raw_noise_std
 68 |         noise = noise.cuda()
 69 |     else:
 70 |         noise = 0.
 71 | 
 72 |     alpha = raw2alpha(raw[..., 3] + noise, dists)  # [N_rays, N_samples]
 73 | 
 74 | 
 75 |     # weights = alpha * tf.math.cumprod(1.-alpha + 1e-10, -1, exclusive=True)
 76 |     weights = alpha * torch.cumprod(torch.cat([torch.ones((alpha.shape[0], 1)).cuda(), 1.-alpha + 1e-10], -1), -1)[:, :-1]
 77 |     # [1, 1-a1, 1-a2, ...]
 78 |     # [N_rays, N_samples+1] sliced by [:, :-1] to [N_rays, N_samples]
 79 | 
 80 |     rgb_map = torch.sum(weights[..., None] * rgb, -2)  # [N_rays, 3]
 81 |     # [N_rays, 3], the accumulated opacity along the rays, equals "1 - (1-a1)(1-a2)...(1-an)" mathematically
 82 | 
 83 |     if enable_semantic:
 84 |         assert num_sem_class>0
 85 |         # https://discuss.pytorch.org/t/multi-class-cross-entropy-loss-and-softmax-in-pytorch/24920/2
 86 |         sem_logits = raw[..., 4:4+num_sem_class]  # [N_rays, N_samples, num_class]
 87 |         sem_map = torch.sum(weights[..., None] * sem_logits, -2)  # [N_rays, num_class]
 88 |     else:
 89 |         sem_map = torch.tensor(0)
 90 | 
 91 | 
 92 |     if endpoint_feat:
 93 |         feat = raw[..., -128:] # [N_rays, N_samples, feat_dim] take the last 128 dim from predictions
 94 |         feat_map = torch.sum(weights[..., None] * feat, -2)  # [N_rays, feat_dim]
 95 |     else:
 96 |         feat_map = torch.tensor(0)
 97 | 
 98 |     depth_map = torch.sum(weights * z_vals, -1)  # (N_rays,)
 99 |     disp_map = 1./torch.max(1e-10 * torch.ones_like(depth_map), depth_map / torch.sum(weights, -1))
100 |     acc_map = torch.sum(weights, -1)
101 | 
102 |     if white_bkgd:
103 |         rgb_map = rgb_map + (1.-acc_map[..., None])
104 |         if enable_semantic:
105 |             sem_map = sem_map + (1.-acc_map[..., None])
106 |     
107 |     return rgb_map, disp_map, acc_map, weights, depth_map, sem_map, feat_map
108 | 
109 | 


--------------------------------------------------------------------------------
/SSR/training/training_utils.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import numpy as np
  3 | from sklearn.metrics import confusion_matrix
  4 | 
  5 | def batchify_rays(render_fn, rays_flat, chunk=1024 * 32):
  6 |     """Render rays in smaller minibatches to avoid OOM.
  7 |     """
  8 |     all_ret = {}
  9 |     for i in range(0, rays_flat.shape[0], chunk):
 10 |         ret = render_fn(rays_flat[i:i + chunk])
 11 |         for k in ret:
 12 |             if k not in all_ret:
 13 |                 all_ret[k] = []
 14 |             all_ret[k].append(ret[k])
 15 | 
 16 |     all_ret = {k: torch.cat(all_ret[k], 0) for k in all_ret}
 17 |     return all_ret
 18 | 
 19 | 
 20 | def batchify(fn, chunk):
 21 |     """Constructs a version of 'fn' that applies to smaller batches.
 22 |     """
 23 |     if chunk is None:
 24 |         return fn
 25 | 
 26 |     def ret(inputs):
 27 |         return torch.cat([fn(inputs[i:i + chunk]) for i in range(0, inputs.shape[0], chunk)], 0)
 28 | 
 29 |     return ret
 30 | 
 31 | 
 32 | def lr_poly_decay(base_lr, iter, max_iter, power):
 33 |     """ Polynomial learning rate decay
 34 |     Polynomial Decay provides a smoother decay using a polynomial function and reaches a learning rate of 0
 35 |     after max_update iterations.
 36 |     https://kiranscaria.github.io/general/2019/08/16/learning-rate-schedules.html
 37 | 
 38 |     max_iter: number of iterations to perform before the learning rate is taken to .
 39 |     power: the degree of the polynomial function. Smaller values of power produce slower decay and
 40 |         large values of learning rate for longer periods.
 41 |     """
 42 |     return base_lr * ((1 - float(iter) / max_iter) ** (power))
 43 | 
 44 | 
 45 | def lr_exp_decay(base_lr, exp_base_lr, current_step, decay_steps):
 46 |     """ lr = lr0 * decay_base^(−kt)
 47 |     """
 48 |     new_lrate = base_lr * (exp_base_lr ** (current_step / decay_steps))
 49 |     return new_lrate
 50 | 
 51 | 
 52 | def nanmean(data, **args):
 53 |     # This makes it ignore the first 'background' class
 54 |     return np.ma.masked_array(data, np.isnan(data)).mean(**args)
 55 |     # In np.ma.masked_array(data, np.isnan(data), elements of data == np.nan is invalid and will be ingorned during computation of np.mean()
 56 | 
 57 | 
 58 | def calculate_segmentation_metrics(true_labels, predicted_labels, number_classes, ignore_label):
 59 |     if (true_labels == ignore_label).all():
 60 |         return [0]*4
 61 | 
 62 |     true_labels = true_labels.flatten()
 63 |     predicted_labels = predicted_labels.flatten()
 64 |     valid_pix_ids = true_labels!=ignore_label
 65 |     predicted_labels = predicted_labels[valid_pix_ids] 
 66 |     true_labels = true_labels[valid_pix_ids]
 67 |     
 68 |     conf_mat = confusion_matrix(true_labels, predicted_labels, labels=list(range(number_classes)))
 69 |     norm_conf_mat = np.transpose(
 70 |         np.transpose(conf_mat) / conf_mat.astype(np.float).sum(axis=1))
 71 | 
 72 |     missing_class_mask = np.isnan(norm_conf_mat.sum(1)) # missing class will have NaN at corresponding class
 73 |     exsiting_class_mask = ~ missing_class_mask
 74 | 
 75 |     class_average_accuracy = nanmean(np.diagonal(norm_conf_mat))
 76 |     total_accuracy = (np.sum(np.diagonal(conf_mat)) / np.sum(conf_mat))
 77 |     ious = np.zeros(number_classes)
 78 |     for class_id in range(number_classes):
 79 |         ious[class_id] = (conf_mat[class_id, class_id] / (
 80 |                 np.sum(conf_mat[class_id, :]) + np.sum(conf_mat[:, class_id]) -
 81 |                 conf_mat[class_id, class_id]))
 82 |     miou = nanmean(ious)
 83 |     miou_valid_class = np.mean(ious[exsiting_class_mask])
 84 |     return miou, miou_valid_class, total_accuracy, class_average_accuracy, ious
 85 | 
 86 | 
 87 | def calculate_depth_metrics(depth_trgt, depth_pred):
 88 |     """ Computes 2d metrics between two depth maps
 89 |     
 90 |     Args:
 91 |         depth_pred: mxn np.array containing prediction
 92 |         depth_trgt: mxn np.array containing ground truth
 93 |     Returns:
 94 |         Dict of metrics
 95 |     """
 96 |     mask1 = depth_pred>0 # ignore values where prediction is 0 (% complete)
 97 |     mask = (depth_trgt<10) * (depth_trgt>0) * mask1
 98 | 
 99 |     depth_pred = depth_pred[mask]
100 |     depth_trgt = depth_trgt[mask]
101 |     abs_diff = np.abs(depth_pred-depth_trgt)
102 |     abs_rel = abs_diff/depth_trgt
103 |     sq_diff = abs_diff**2
104 |     sq_rel = sq_diff/depth_trgt
105 |     sq_log_diff = (np.log(depth_pred)-np.log(depth_trgt))**2
106 |     thresh = np.maximum((depth_trgt / depth_pred), (depth_pred / depth_trgt))
107 |     r1 = (thresh < 1.25).astype('float')
108 |     r2 = (thresh < 1.25**2).astype('float')
109 |     r3 = (thresh < 1.25**3).astype('float')
110 | 
111 |     metrics = {}
112 |     metrics['AbsRel'] = np.mean(abs_rel)
113 |     metrics['AbsDiff'] = np.mean(abs_diff)
114 |     metrics['SqRel'] = np.mean(sq_rel)
115 |     metrics['RMSE'] = np.sqrt(np.mean(sq_diff))
116 |     metrics['LogRMSE'] = np.sqrt(np.mean(sq_log_diff))
117 |     metrics['r1'] = np.mean(r1)
118 |     metrics['r2'] = np.mean(r2)
119 |     metrics['r3'] = np.mean(r3)
120 |     metrics['complete'] = np.mean(mask1.astype('float'))
121 | 
122 |     return metrics


--------------------------------------------------------------------------------
/SSR/models/semantic_nerf.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | torch.autograd.set_detect_anomaly(True)
  3 | import torch.nn as nn
  4 | import torch.nn.functional as F
  5 | import numpy as np
  6 | 
  7 | # Misc
  8 | img2mse = lambda x, y: torch.mean((x - y) ** 2)
  9 | mse2psnr = lambda x: -10. * torch.log(x) / torch.log(torch.Tensor([10.]))
 10 | to8b = lambda x: (255 * np.clip(x, 0, 1)).astype(np.uint8)
 11 | 
 12 | 
 13 | # Positional encoding (section 5.1)
 14 | class Embedder:
 15 |     def __init__(self, **kwargs):
 16 |         self.kwargs = kwargs
 17 |         self.create_embedding_fn()
 18 | 
 19 |     def create_embedding_fn(self):
 20 |         """
 21 |         Embeds x to (x, sin(2^k x), cos(2^k x), ...)
 22 |         """
 23 |         embed_fns = []
 24 |         d = self.kwargs['input_dims']
 25 |         out_dim = 0
 26 |         if self.kwargs['include_input']:  # original raw input "x" is also included in the output
 27 |             embed_fns.append(lambda x: x)
 28 |             out_dim += d
 29 | 
 30 |         max_freq = self.kwargs['max_freq_log2']
 31 |         N_freqs = self.kwargs['num_freqs']
 32 | 
 33 |         if self.kwargs['log_sampling']:
 34 |             freq_bands = 2. ** torch.linspace(0., max_freq, steps=N_freqs)
 35 |         else:
 36 |             freq_bands = torch.linspace(2. ** 0., 2. ** max_freq, steps=N_freqs)
 37 | 
 38 |         for freq in freq_bands:
 39 |             for p_fn in self.kwargs['periodic_fns']:
 40 |                 embed_fns.append(lambda x, p_fn=p_fn, freq=freq: p_fn(x * freq))
 41 |                 out_dim += d
 42 | 
 43 |         self.embed_fns = embed_fns
 44 |         self.out_dim = out_dim
 45 | 
 46 |     def embed(self, inputs):
 47 |         return torch.cat([fn(inputs) for fn in self.embed_fns], -1)
 48 | 
 49 | 
 50 | def get_embedder(multires, i=0, scalar_factor=1):
 51 |     if i == -1:
 52 |         return nn.Identity(), 3
 53 | 
 54 |     embed_kwargs = {
 55 |         'include_input': True,
 56 |         'input_dims': 3,
 57 |         'max_freq_log2': multires - 1,
 58 |         'num_freqs': multires,
 59 |         'log_sampling': True,
 60 |         'periodic_fns': [torch.sin, torch.cos],
 61 |     }
 62 | 
 63 |     embedder_obj = Embedder(**embed_kwargs)
 64 |     embed = lambda x, eo=embedder_obj: eo.embed(x/scalar_factor)
 65 |     return embed, embedder_obj.out_dim
 66 | 
 67 | 
 68 | def fc_block(in_f, out_f):
 69 |     return torch.nn.Sequential(
 70 |         torch.nn.Linear(in_f, out_f),
 71 |         torch.nn.ReLU(out_f)
 72 |     )
 73 | 
 74 | class Semantic_NeRF(nn.Module):
 75 |     """
 76 |     Compared to the NeRF class wich also predicts semantic logits from MLPs, here we make the semantic label only a function of 3D position 
 77 |     instead of both positon and viewing directions.
 78 |     """
 79 |     def __init__(self, enable_semantic, num_semantic_classes, D=8, W=256, input_ch=3, input_ch_views=3, output_ch=4, skips=[4], use_viewdirs=False,
 80 |                  ):
 81 |         super(Semantic_NeRF, self).__init__()
 82 |         """
 83 |                 D: number of layers for density (sigma) encoder
 84 |                 W: number of hidden units in each layer
 85 |                 input_ch: number of input channels for xyz (3+3*10*2=63 by default)
 86 |                 in_channels_dir: number of input channels for direction (3+3*4*2=27 by default)
 87 |                 skips: layer index to add skip connection in the Dth layer
 88 |         """
 89 |         self.D = D
 90 |         self.W = W
 91 |         self.input_ch = input_ch
 92 |         self.input_ch_views = input_ch_views
 93 |         self.skips = skips
 94 |         self.use_viewdirs = use_viewdirs
 95 |         self.enable_semantic = enable_semantic
 96 | 
 97 |         # build the encoder
 98 |         self.pts_linears = nn.ModuleList(
 99 |             [nn.Linear(input_ch, W)] + [nn.Linear(W, W) if i not in self.skips else nn.Linear(W + input_ch, W) for i in
100 |                                         range(D - 1)])
101 | 
102 |         ### Implementation according to the official code release (https://github.com/bmild/nerf/blob/master/run_nerf_helpers.py#L104-L105)
103 | 
104 |         # Another layer is used to 
105 |         self.views_linears = nn.ModuleList([nn.Linear(input_ch_views + W, W // 2)])
106 |         if use_viewdirs:
107 |             self.feature_linear = nn.Linear(W, W)
108 |             self.alpha_linear = nn.Linear(W, 1)
109 |             if enable_semantic:
110 |                 self.semantic_linear = nn.Sequential(fc_block(W, W // 2), nn.Linear(W // 2, num_semantic_classes))
111 |             self.rgb_linear = nn.Linear(W // 2, 3)
112 |         else:
113 |             self.output_linear = nn.Linear(W, output_ch)
114 | 
115 |     def forward(self, x, show_endpoint=False):
116 |         """
117 |         Encodes input (xyz+dir) to rgb+sigma+semantics raw output
118 |         Inputs:
119 |             x: (B, self.in_channels_xyz(+self.in_channels_dir))
120 |                the embedded vector of 3D xyz position and viewing direction
121 |         """
122 |         input_pts, input_views = torch.split(x, [self.input_ch, self.input_ch_views], dim=-1)
123 |         h = input_pts
124 |         for i, l in enumerate(self.pts_linears):
125 |             h = self.pts_linears[i](h)
126 |             h = F.relu(h)
127 |             if i in self.skips:
128 |                 h = torch.cat([input_pts, h], -1)
129 | 
130 |         if self.use_viewdirs:
131 |             # if using view-dirs, output occupancy alpha as well as features for concatenation
132 |             alpha = self.alpha_linear(h)
133 |             if self.enable_semantic:
134 |                 sem_logits = self.semantic_linear(h)
135 |             feature = self.feature_linear(h)
136 | 
137 |             h = torch.cat([feature, input_views], -1)
138 | 
139 |             for i, l in enumerate(self.views_linears):
140 |                 h = self.views_linears[i](h)
141 |                 h = F.relu(h)
142 |                 
143 |             if show_endpoint:
144 |                 endpoint_feat = h
145 |             rgb = self.rgb_linear(h)
146 | 
147 |             if self.enable_semantic:
148 |                 outputs = torch.cat([rgb, alpha, sem_logits], -1)
149 |             else:
150 |                 outputs = torch.cat([rgb, alpha], -1)
151 |         else:
152 |             outputs = self.output_linear(h)
153 | 
154 |         if show_endpoint is False:
155 |             return outputs
156 |         else:
157 |             return torch.cat([outputs, endpoint_feat], -1)
158 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Semantic-NeRF: Semantic Neural Radiance Fields
  2 | 
  3 | ### [Project Page](https://shuaifengzhi.com/Semantic-NeRF/) | [Video](https://youtu.be/FpShWO7LVbM) | [Paper](https://arxiv.org/abs/2103.15875) | [Data(DropBox)](https://www.dropbox.com/sh/9yu1elddll00sdl/AAC-rSJdLX0C6HhKXGKMOIija?dl=0)| [Data(BaiduYun)<font size=2>[code:nerf]</font>](https://pan.baidu.com/s/1UmABiPQKm_S5Elq_ffXzPA)
  4 | 
  5 | 
  6 | [In-Place Scene Labelling and Understanding with Implicit Scene Representation](https://shuaifengzhi.com/Semantic-NeRF/)  
  7 |  [Shuaifeng Zhi](https://shuaifengzhi.com/),
  8 |  [Tristan Laidlow](https://wp.doc.ic.ac.uk/twl15/),
  9 |  [Stefan Leutenegger](https://wp.doc.ic.ac.uk/sleutene/),
 10 |  [Andrew J. Davison](https://www.doc.ic.ac.uk/~ajd/),
 11 |  <br>
 12 | Dyson Robotics Laboratory at Imperial College \
 13 | Published in ICCV 2021 (Oral Presentation)
 14 | 
 15 | <img src='imgs/teaser.png'/>
 16 | 
 17 | We build upon neural radiance fields to create a scene-specific implicit 3D semantic representation, Semantic-NeRF.
 18 | 
 19 | 
 20 | ## Latest Updates.
 21 | - **Release of Replica Data Generation Codes.** We have provided data generation scripts for Replica sequences at [SSR/data_generation](https://github.com/Harry-Zhi/semantic_nerf/tree/main/SSR/data_generation) folder. Thanks Xin of [vMAP](https://github.com/kxhit/vMAP) for cleaning up.
 22 | - **Instance Label Maps Available.** We have also provided corresponding instance label maps of pre-rendered Replica sequences in [dropbox](https://www.dropbox.com/home/Public_Hosting/Semantic_NeRF(ICCV2021)/Replica_Dataset) as a zip file *Replica_Instance_Segmentation.zip*.
 23 | 
 24 | ## Getting Started
 25 | 
 26 | For flawless reproduction of our results, the Ubuntu OS 20.04 is recommended. The models have been tested using Python 3.7, Pytorch 1.6.0, CUDA10.1. Higher versions should also perform similarly.
 27 | 
 28 | ### Dependencies
 29 | Main python dependencies are listed below:
 30 | - Python >=3.7
 31 | - torch>=1.6.0 (integrate *searchsorted* API, otherwise need to use the third party implementation [SearchSorted](https://github.com/aliutkus/torchsearchsorted) )
 32 | - cudatoolkit>=10.1
 33 | 
 34 | Following packages are used for 3D mesh reconstruction:
 35 | - trimesh==3.9.9
 36 | - open3d==0.12.0
 37 | 
 38 | With Anaconda, you can simply create a virtual environment and install dependencies with CONDA by:
 39 | - `conda create -n semantic_nerf python=3.7`
 40 | - `conda activate semantic_nerf`
 41 | - `pip install -r requirements.txt`
 42 | 
 43 | ## Datasets
 44 | We mainly use [Replica](https://github.com/facebookresearch/Replica-Dataset) and [ScanNet](http://www.scan-net.org/) datasets for experiments, where we train a new Semantic-NeRF model on each 3D scene. Other similar indoor datasets with colour images, semantic labels and poses can also be used.
 45 | 
 46 | ### We also provide [pre-rendered Replica data](https://www.dropbox.com/sh/9yu1elddll00sdl/AAC-rSJdLX0C6HhKXGKMOIija?dl=0) that can be directly used by Semantic-NeRF.
 47 | 
 48 | 
 49 | ## Running code
 50 | After cloning the codes, we can start to run Semantic-NeRF in the root directory of the repository.
 51 | 
 52 | #### Semantic-NeRF training
 53 | For standard Semantic-NeRF training with full dense semantic supervision. You can simply run following command with a chosen config file specifying data directory and hyper-params.
 54 | ```
 55 | python3 train_SSR_main.py --config_file /SSR/configs/SSR_room0_config.yaml
 56 | ```
 57 | 
 58 | Different working modes and set-ups can be chosen via commands:
 59 | #### Semantic View Synthesis with Sparse Labels:
 60 | ```
 61 | python3 train_SSR_main.py --sparse_views --sparse_ratio 0.6
 62 | ```
 63 | Sparse ratio here is the portion of **dropped** frames in the training sequence.
 64 | 
 65 | #### Pixel-wise Denoising Task:
 66 | ```
 67 | python3 train_SSR_main.py --pixel_denoising --pixel_noise_ratio 0.5
 68 | ```
 69 | 
 70 | We could also use a sparse set of frames along with denoising task:
 71 | ```
 72 | python3 train_SSR_main.py --pixel_denoising --pixel_noise_ratio 0.5 --sparse_views --sparse_ratio 0.6
 73 | ```
 74 | 
 75 | #### Region-wise Denoising task (For Replica Room2):
 76 | ```
 77 | python3 train_SSR_main.py --region_denoising --region_noise_ratio 0.3
 78 | ```
 79 | The argument **uniform_flip** corresponds to the two modes of "Even/Sort"in region-wise denoising task.
 80 | 
 81 | #### Super-Resolution Task:
 82 | For super-resolution with **dense** labels, please run
 83 | ```
 84 | python3 train_SSR_main.py --super_resolution --sr_factor 8 --dense_sr
 85 | ```
 86 | 
 87 | For super-resolution with **sparse** labels, please run
 88 | ```
 89 | python3 train_SSR_main.py --super_resolution --sr_factor 8
 90 | ```
 91 | 
 92 | #### Label Propagation Task:
 93 | For label propagation task with single-click seed regions, please run
 94 | ```
 95 | python3 train_SSR_main.py --label_propagation --partial_perc 0
 96 | ```
 97 | 
 98 | In order to improve reproducibility, for denoising and label-propagation tasks, we can also include `--visualise_save` and `--load_saved` to save/load randomly generated labels.
 99 | 
100 | 
101 | #### 3D Reconstruction of Replica Scenes
102 | We also provide codes for extracting 3D semantic mesh from a trained Seamntic-NeRF model.
103 | 
104 | ```
105 | python3 SSR/extract_colour_mesh.py --sem --mesh_dir PATH_TO_MESH --mesh_dir PATH_TO_MESH  --training_data_dir PATH_TO_TRAINING_DATA --save_dir PATH_TO_SAVE_DIR
106 | ```
107 | <img src='imgs/sem_mesh_room0.png'/>
108 | 
109 | 
110 | ### For more demos and qualitative results, please check our [project page](https://shuaifengzhi.com/Semantic-NeRF/) and [video](https://youtu.be/FpShWO7LVbM).
111 | 
112 | 
113 | ## Acknowledgements
114 | Thanks [nerf](https://github.com/bmild/nerf), [nerf-pytorch](https://github.com/yenchenlin/nerf-pytorch) and [nerf_pl](https://github.com/kwea123/nerf_pl) for providing nice and inspiring implementations of NeRF. Thank [Atlas](https://github.com/magicleap/Atlas) for scripts in processing ScanNet dataset.
115 | 
116 | ## Citation
117 | If you found this code/work to be useful in your own research, please consider citing the following:
118 | ```
119 | @inproceedings{Zhi:etal:ICCV2021,
120 |   title={In-Place Scene Labelling and Understanding with Implicit Scene Representation},
121 |   author={Shuaifeng Zhi and Tristan Laidlow and Stefan Leutenegger and Andrew J. Davison},
122 |   booktitle=ICCV,
123 |   year={2021}
124 | }
125 | ```
126 | 
127 | ## Contact
128 | If you have any questions, please contact s.zhi17@imperial.ac.uk or zhishuaifeng@outlook.com.
129 | 
130 | 


--------------------------------------------------------------------------------
/SSR/visualisation/open3d_utils.py:
--------------------------------------------------------------------------------
  1 | import open3d as o3d
  2 | import numpy as np
  3 | 
  4 | 
  5 | def draw_segment(t1, t2, color=(1., 1., 0.)):
  6 |     points = [t1, t2]
  7 | 
  8 |     lines = [[0, 1]]
  9 | 
 10 |     colors = [color for i in range(len(lines))]
 11 |     line_set = o3d.geometry.LineSet(
 12 |         points=o3d.utility.Vector3dVector(points),
 13 |         lines=o3d.utility.Vector2iVector(lines),
 14 |     )
 15 |     line_set.colors = o3d.utility.Vector3dVector(colors)
 16 | 
 17 |     return line_set # line-segment
 18 | 
 19 | 
 20 | def draw_trajectory(scene, transform_wc, color=(1., 1., 0.), name="trajectory"):
 21 |     for i in range(trajectory.shape[0] - 1):
 22 |         t1 = transform_wc[i, :3, 3]
 23 |         t2 = transform_wc[i+1, :3, 3]
 24 |         segment = draw_segment(t1, t2, color)
 25 |         scene.scene.add_geometry("{}_{}".format(name, i), segment, material)
 26 |         scene.force_redraw()
 27 | 
 28 | def draw_camera_frustrums(scene, material, intrinsics, transform_wc, scale=1.0, color=(1, 0, 0), name="camera"):
 29 |     for i in range(len(transform_wc)):
 30 |         camera_frustum = gen_camera_frustrum(intrinsics, transform_wc[i])
 31 |         scene.scene.add_geometry("{}_{}".format(name, i), camera_frustum, material)
 32 |         scene.force_redraw()
 33 | 
 34 | 
 35 | def gen_camera_frustrum(intrinsics, transform_wc, scale=1.0, color=(1, 0, 0)):
 36 |     """
 37 |     intrinsics: camera intrinsic matrix
 38 |     scale: the depth of the frustum front plane
 39 |     color: frustum line colours
 40 |     """
 41 |     print("Draw camera frustum using o3d.geometry.LineSet.")
 42 |     w = intrinsics['cx'] * 2
 43 |     h = intrinsics['cy'] * 2
 44 |     xl = scale * -intrinsics['cx'] / intrinsics['fx'] # 3D coordinate of minimum x
 45 |     xh = scale * (w - intrinsics['cx']) / intrinsics['fx'] # 3D coordinate of maximum x
 46 |     yl = scale * -intrinsics['cy'] / intrinsics['fy'] # 3D coordinate of minimum y
 47 |     yh = scale * (h - intrinsics['cy']) / intrinsics['fy'] # 3D coordinate of maximum y
 48 |     verts = [
 49 |             0, 0, 0, # 0 - camera center
 50 |             xl, yl, scale, # 1 - upper left
 51 |             xh, yl, scale,  # 2 - upper right
 52 |             xh, yh, scale,  # 3 - bottom right
 53 |             xl, yh, scale,  # 4 - bottom leff
 54 |             ]
 55 |     
 56 |     lines = [
 57 |     [0, 1],
 58 |     [0, 2],
 59 |     [0, 3],
 60 |     [0, 4],
 61 |     [1, 2],
 62 |     [1, 4],
 63 |     [3, 2],
 64 |     [3, 4],
 65 |     ]
 66 | 
 67 |     colors = [color for i in range(len(lines))]
 68 |     line_set = o3d.geometry.LineSet(
 69 |         points=o3d.utility.Vector3dVector(points),
 70 |         lines=o3d.utility.Vector2iVector(lines),
 71 |     )
 72 |     line_set.colors = o3d.utility.Vector3dVector(colors)
 73 | 
 74 |     line_set = line_set.transform(transform_wc)
 75 |     return line_set # camera frustun
 76 | 
 77 | 
 78 | 
 79 | def integrate_rgbd_tsdf(tsdf_volume, rgb, dep, depth_trunc, T_wc, intrinsic):
 80 |     for i in range(0, len(T_wc)):
 81 |         print("Integrate {:d}-th image into the volume.".format(i))
 82 |         color = o3d.geometry.Image(rgb[i])
 83 |         depth = o3d.geometry.Image(dep[i])
 84 |         rgbd = o3d.geometry.RGBDImage.create_from_color_and_depth(
 85 |             color,
 86 |             depth,
 87 |             depth_trunc=depth_trunc,
 88 |             depth_scale=1,
 89 |             convert_rgb_to_intensity=False,
 90 |         )
 91 | 
 92 |         T_cw = np.linalg.inv(T_wc[i])
 93 | 
 94 |         tsdf_volume.integrate(
 95 |             image=rgbd,
 96 |             intrinsic=intrinsic,
 97 |             extrinsic=T_cw,
 98 |         )
 99 |     return tsdf_volume
100 | 
101 | def tsdf2mesh(tsdf):
102 |         mesh = tsdf.extract_triangle_mesh()
103 |         mesh.compute_vertex_normals()
104 |         return mesh
105 | 
106 | 
107 | 
108 | def integrate_dep_pcd(dep, T_wc, intrinsic):
109 |     # http://www.open3d.org/docs/latest/tutorial/Advanced/multiway_registration.html#Make-a-combined-point-cloud
110 |     
111 |     pcd_list = []
112 |     pcd_combined = o3d.geometry.PointCloud()
113 |     for i in range(0, len(T_wc)):
114 |         depth = o3d.geometry.Image(dep[i])
115 |         pcd = o3d.geometry.PointCloud.create_from_depth_image(
116 |             depth_map,
117 |             intrinsic,
118 |             depth_scale=1,
119 |             stride=1,
120 |             project_valid_depth_only=True)
121 |         pcd.transform(T_WC[i])
122 |         pcd_combined+= pcd
123 | 
124 |     # pcd_combined = pcd_combined.voxel_down_sample(voxel_size=0.02)
125 |     print("Merge point clouds from multiple views.")
126 |     # Flip it, otherwise the pointcloud will be upside down
127 |     pcd_combined.transform([[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]])
128 |     o3d.visualization.draw_geometries([pcd_combined],
129 |                                   zoom=0.3412,
130 |                                   front=[0.4257, -0.2125, -0.8795],
131 |                                   lookat=[2.6172, 2.0475, 1.532],
132 |                                   up=[-0.0694, -0.9768, 0.2024])
133 |     return pcd_combined
134 | 
135 | 
136 | def draw_pc(batch_size,
137 |             pcs_cam,
138 |             T_WC_batch_np,
139 |             im_batch=None,
140 |             scene=None):
141 | 
142 |     pcs_w = []
143 |     for batch_i in range(batch_size):
144 |         T_WC = T_WC_batch_np[batch_i]
145 |         pc_cam = pcs_cam[batch_i]
146 | 
147 |         col = None
148 |         if im_batch is not None:
149 |             img = im_batch[batch_i]
150 |             col = img.reshape(-1, 3)
151 | 
152 |         pc_tri = trimesh.PointCloud(vertices=pc_cam, colors=col)
153 |         pc_tri.apply_transform(T_WC)
154 |         pcs_w.append(pc_tri.vertices)
155 | 
156 |         if scene is not None:
157 |             scene.add_geometry(pc_tri)
158 | 
159 |     pcs_w = np.concatenate(pcs_w, axis=0)
160 |     return pcs_w
161 | 
162 | 
163 | 
164 | def trimesh_to_open3d(src):
165 |     dst = o3d.geometry.TriangleMesh()
166 |     dst.vertices = o3d.utility.Vector3dVector(src.vertices)
167 |     dst.triangles = o3d.utility.Vector3iVector(src.faces)
168 |     vertex_colors = src.visual.vertex_colors[:, :3].astype(np.float) / 255.0
169 |     dst.vertex_colors = o3d.utility.Vector3dVector(vertex_colors)
170 |     dst.compute_vertex_normals()
171 | 
172 |     return dst
173 | 
174 | 
175 | def clean_mesh(o3d_mesh, keep_single_cluster=False, min_num_cluster=200):
176 |     import copy
177 | 
178 |     o3d_mesh_clean = copy.deepcopy(o3d_mesh)
179 |     # http://www.open3d.org/docs/release/tutorial/geometry/mesh.html?highlight=cluster_connected_triangles
180 |     triangle_clusters, cluster_n_triangles, cluster_area = o3d_mesh_clean.cluster_connected_triangles()
181 | 
182 |     triangle_clusters = np.asarray(triangle_clusters)
183 |     cluster_n_triangles = np.asarray(cluster_n_triangles)
184 |     cluster_area = np.asarray(cluster_area)
185 | 
186 |     if keep_single_cluster:
187 |         # keep the largest cluster.!
188 |         largest_cluster_idx = np.argmax(cluster_n_triangles)
189 |         triangles_to_remove = triangle_clusters != largest_cluster_idx
190 |         o3d_mesh_clean.remove_triangles_by_mask(triangles_to_remove)
191 |         o3d_mesh_clean.remove_unreferenced_vertices()
192 |         print("Show mesh with largest cluster kept")
193 |     else:
194 |         # remove small clusters
195 |         triangles_to_remove = cluster_n_triangles[triangle_clusters] < min_num_cluster
196 |         o3d_mesh_clean.remove_triangles_by_mask(triangles_to_remove)
197 |         o3d_mesh_clean.remove_unreferenced_vertices()
198 |         print("Show mesh with small clusters removed")
199 | 
200 | 
201 |     return o3d_mesh_clean


--------------------------------------------------------------------------------
/SSR/datasets/replica_nyu/replica_nyu_cnn_datasets.py:
--------------------------------------------------------------------------------
  1 | 
  2 | import os
  3 | import glob
  4 | import numpy as np
  5 | from torch.utils.data import Dataset
  6 | import cv2
  7 | 
  8 | class Replica_CNN_NYU(Dataset):
  9 |     def __init__(self, data_dir, train_ids, test_ids, nyu_mode, img_h=None, img_w=None,  load_softmax=False):
 10 |         
 11 |         assert nyu_mode == "nyu13" or nyu_mode == "nyu34" or  nyu_mode == "gt_nyu13"
 12 | 
 13 |         traj_file = os.path.join(data_dir, "traj_w_c.txt")
 14 |         self.rgb_dir = os.path.join(data_dir, "rgb")
 15 |         self.depth_dir = os.path.join(data_dir, "depth")  # depth is in mm uint
 16 |         # self.cnn_semantic_class_dir = os.path.join(data_dir, "CNN_semantic_class_{}".format(nyu_mode))
 17 |         if nyu_mode == "nyu13":
 18 |             self.cnn_semantic_class_dir = os.path.join(data_dir, "CNN_semantic_class_nyu13")
 19 |             self.gt_semantic_class_dir = os.path.join(data_dir, "semantic_class_nyu13_remap")
 20 |         elif nyu_mode=="nyu34":
 21 |             self.cnn_semantic_class_dir = os.path.join(data_dir, "CNN_semantic_class_nyu34")
 22 |             self.gt_semantic_class_dir = os.path.join(data_dir, "semantic_class_nyu40_remap_nyu34")
 23 |         elif nyu_mode == "gt_nyu13":
 24 |             self.cnn_semantic_class_dir = os.path.join(data_dir, "semantic_class_nyu13_remap")
 25 |             self.gt_semantic_class_dir = os.path.join(data_dir, "semantic_class_nyu13_remap")
 26 |         
 27 |         # self.cnn_softmax_dir = os.path.join(data_dir, "semantic_prob_CNN")
 28 |         
 29 | 
 30 |         self.nyu_mode = nyu_mode
 31 |         self.load_softmax = load_softmax
 32 |         
 33 |         self.train_ids = train_ids
 34 |         self.train_num = len(train_ids)
 35 |         self.test_ids = test_ids
 36 |         self.test_num = len(test_ids)
 37 | 
 38 |         self.img_h = img_h
 39 |         self.img_w = img_w
 40 | 
 41 |         self.Ts_full = np.loadtxt(traj_file, delimiter=" ").reshape(-1, 4, 4)
 42 | 
 43 |         self.rgb_list = sorted(glob.glob(self.rgb_dir + '/rgb*.png'), key=lambda file_name: int(file_name.split("_")[-1][:-4]))
 44 |         self.depth_list = sorted(glob.glob(self.depth_dir + '/depth*.png'), key=lambda file_name: int(file_name.split("_")[-1][:-4]))
 45 |         self.cnn_semantic_list = sorted(glob.glob(self.cnn_semantic_class_dir + '/semantic_class_*.png'), key=lambda file_name: int(file_name.split("_")[-1][:-4]))
 46 |         self.gt_semantic_list = sorted(glob.glob(self.gt_semantic_class_dir + '/semantic_class_*.png'), key=lambda file_name: int(file_name.split("_")[-1][:-4]))
 47 | 
 48 |         if load_softmax:
 49 |             self.cnn_softmax_list = sorted(glob.glob(self.cnn_softmax_dir + '/softmax_prob_*.npy'), key=lambda file_name: int(file_name.split("_")[-1][:-4]))
 50 | 
 51 | 
 52 | 
 53 |         self.train_samples = {'image': [], 'depth': [],
 54 |                           'cnn_semantic': [], 
 55 |                           'gt_semantic': [],
 56 |                           'cnn_softmax': [],
 57 |                           'cnn_entropy':[],
 58 |                           'T_wc': []}
 59 | 
 60 |         self.test_samples = {'image': [], 'depth': [],
 61 |                           'cnn_semantic': [], 
 62 |                           'gt_semantic': [],
 63 |                           'cnn_softmax': [],
 64 |                           'cnn_entropy':[],
 65 |                           'T_wc': []}
 66 |        # training samples
 67 |         for idx in train_ids:
 68 |             image = cv2.imread(self.rgb_list[idx])[:,:,::-1] / 255.0  # change from BGR uinit 8 to RGB float
 69 |             depth = cv2.imread(self.depth_list[idx], cv2.IMREAD_UNCHANGED) / 1000.0  # uint16 mm depth, then turn depth from mm to meter
 70 |             cnn_semantic = cv2.imread(self.cnn_semantic_list[idx], cv2.IMREAD_UNCHANGED)
 71 |             gt_semantic = cv2.imread(self.gt_semantic_list[idx], cv2.IMREAD_UNCHANGED)
 72 | 
 73 | 
 74 |             if (self.img_h is not None and self.img_h != image.shape[0]) or \
 75 |                     (self.img_w is not None and self.img_w != image.shape[1]):
 76 |                 image = cv2.resize(image, (self.img_w, self.img_h), interpolation=cv2.INTER_LINEAR)
 77 |                 depth = cv2.resize(depth, (self.img_w, self.img_h), interpolation=cv2.INTER_LINEAR)
 78 |                 cnn_semantic = cv2.resize(cnn_semantic, (self.img_w, self.img_h), interpolation=cv2.INTER_NEAREST)
 79 |                 gt_semantic = cv2.resize(gt_semantic, (self.img_w, self.img_h), interpolation=cv2.INTER_NEAREST)
 80 |             T_wc = self.Ts_full[idx]
 81 | 
 82 |             self.train_samples["image"].append(image)
 83 |             self.train_samples["depth"].append(depth)
 84 |             self.train_samples["cnn_semantic"].append(cnn_semantic)
 85 |             self.train_samples["gt_semantic"].append(gt_semantic)
 86 |             self.train_samples["T_wc"].append(T_wc)
 87 | 
 88 | 
 89 |         # test samples
 90 |         for idx in test_ids:
 91 |             image = cv2.imread(self.rgb_list[idx])[:,:,::-1] / 255.0  # change from BGR uinit 8 to RGB float
 92 |             depth = cv2.imread(self.depth_list[idx], cv2.IMREAD_UNCHANGED) / 1000.0  # uint16 mm depth, then turn depth from mm to meter
 93 |             cnn_semantic = cv2.imread(self.cnn_semantic_list[idx], cv2.IMREAD_UNCHANGED)
 94 |             gt_semantic = cv2.imread(self.gt_semantic_list[idx], cv2.IMREAD_UNCHANGED)
 95 | 
 96 | 
 97 |             if (self.img_h is not None and self.img_h != image.shape[0]) or \
 98 |                     (self.img_w is not None and self.img_w != image.shape[1]):
 99 |                 image = cv2.resize(image, (self.img_w, self.img_h), interpolation=cv2.INTER_LINEAR)
100 |                 depth = cv2.resize(depth, (self.img_w, self.img_h), interpolation=cv2.INTER_LINEAR)
101 |                 cnn_semantic = cv2.resize(cnn_semantic, (self.img_w, self.img_h), interpolation=cv2.INTER_NEAREST)
102 |                 gt_semantic = cv2.resize(gt_semantic, (self.img_w, self.img_h), interpolation=cv2.INTER_NEAREST)
103 |             T_wc = self.Ts_full[idx]
104 | 
105 |             self.test_samples["image"].append(image)
106 |             self.test_samples["depth"].append(depth)
107 |             self.test_samples["cnn_semantic"].append(cnn_semantic)
108 |             self.test_samples["gt_semantic"].append(gt_semantic)
109 |             self.test_samples["T_wc"].append(T_wc)
110 | 
111 | 
112 | 
113 |         if load_softmax is True:
114 |             softmax_2_entropy_np = lambda x, axis: np.sum(-np.log2(x+1e-12)*x, axis=axis, keepdims=False)  # H,W
115 |             # training samples
116 |             cnt = 0
117 |             for idx in train_ids:
118 |                 cnn_softmax = np.clip(np.load(self.cnn_softmax_list[idx]), a_min=0, a_max=1.0)
119 |                 if (self.img_h is not None and self.img_h != cnn_softmax.shape[0]) or \
120 |                         (self.img_w is not None and self.img_w != cnn_softmax.shape[1]):
121 |                     cnn_softmax = cv2.resize(cnn_softmax, (self.img_w, self.img_h), interpolation=cv2.INTER_LINEAR)
122 |                     # opencv resize support resize 512 channel at maximum
123 |         
124 |                 valid_mask = self.train_samples["gt_semantic"][cnt]>0
125 |                 entropy = softmax_2_entropy_np(cnn_softmax, -1)*valid_mask
126 |                 cnn_softmax = cnn_softmax*valid_mask[:,:,None]
127 |                 self.train_samples["cnn_softmax"].append(cnn_softmax)
128 |                 self.train_samples["cnn_entropy"].append(entropy)
129 |                 cnt += 1
130 |             assert cnt==len(train_ids)
131 | 
132 |             # test samples
133 |             cnt = 0
134 |             for idx in test_ids:
135 |                 cnn_softmax = np.load(self.cnn_softmax_list[idx])
136 |                 assert cnn_softmax.shape[-1]==34
137 |                 if (self.img_h is not None and self.img_h != cnn_softmax.shape[0]) or \
138 |                         (self.img_w is not None and self.img_w != cnn_softmax.shape[1]):
139 |                     cnn_softmax = cv2.resize(cnn_softmax, (self.img_w, self.img_h), interpolation=cv2.INTER_LINEAR)
140 |                 # we do not need softmax for testing, can also save memory
141 |                 valid_mask = self.test_samples["gt_semantic"][cnt]>0
142 |                 entropy = softmax_2_entropy_np(cnn_softmax, -1)*valid_mask
143 |                 self.test_samples["cnn_entropy"].append(entropy)
144 |                 cnt += 1
145 |             assert cnt==len(test_ids)
146 | 
147 | 
148 |         for key in self.test_samples.keys():  # transform list of np array to array with batch dimension
149 |             self.train_samples[key] = np.asarray(self.train_samples[key])
150 |             self.test_samples[key] = np.asarray(self.test_samples[key])
151 | 
152 |         if nyu_mode == "nyu13" or nyu_mode == "gt_nyu13":
153 |             self.semantic_classes = np.arange(14) # 0-void, 1-13 valid classes
154 |             self.num_semantic_class = 14 # 13 valid class + 1 void class
155 |             from SSR.utils import image_utils
156 |             self.colour_map_np = image_utils.nyu13_colour_code
157 |         elif nyu_mode=="nyu34":
158 |             self.semantic_classes = np.arange(35)  # 0-void, 1-34 valid classes
159 |             self.num_semantic_class = 35 # 34 valid class + 1 void class
160 |             self.colour_map_np = image_utils.nyu34_colour_code
161 | 
162 |         self.mask_ids = np.ones(self.train_num)  # init self.mask_ids as full ones
163 |         # 1 means the correspinding label map is used for semantic loss during training, while 0 means no semantic loss
164 |         self.train_samples["cnn_semantic_clean"] = self.train_samples["cnn_semantic"].copy()
165 |         
166 |         print()
167 |         print("Training Sample Summary:")
168 |         for key in self.train_samples.keys(): 
169 |             print("{} has shape of {}, type {}.".format(key, self.train_samples[key].shape, self.train_samples[key].dtype))
170 |         print()
171 |         print("Testing Sample Summary:")
172 |         for key in self.test_samples.keys(): 
173 |             print("{} has shape of {}, type {}.".format(key, self.test_samples[key].shape, self.test_samples[key].dtype))


--------------------------------------------------------------------------------
/SSR/models/rays.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import numpy as np
  3 | 
  4 | # Ray helpers
  5 | def get_rays(H, W, focal, c2w):
  6 |     i, j = torch.meshgrid(torch.linspace(0, W-1, W), torch.linspace(0, H-1, H))  # pytorch's meshgrid has indexing='ij'
  7 |     i = i.t()
  8 |     j = j.t()
  9 |     dirs = torch.stack([(i-W*.5)/focal, -(j-H*.5)/focal, -torch.ones_like(i)], -1)
 10 |     # Rotate ray directions from camera frame to the world frame
 11 |     rays_d = torch.sum(dirs[..., np.newaxis, :] * c2w[:3,:3], -1)  # dot product, equals to: [c2w.dot(dir) for dir in dirs]
 12 |     # Translate camera frame's origin to the world frame. It is the origin of all rays.
 13 |     rays_o = c2w[:3,-1].expand(rays_d.shape)
 14 |     return rays_o, rays_d
 15 | 
 16 | 
 17 | def get_rays_np(H, W, focal, c2w):
 18 |     i, j = np.meshgrid(np.arange(W, dtype=np.float32), np.arange(H, dtype=np.float32), indexing='xy')
 19 |     # Rotate ray directions from camera frame to the world frame
 20 |     rays_d = np.sum(dirs[..., np.newaxis, :] * c2w[:3,:3], -1)  # dot product, equals to: [c2w.dot(dir) for dir in dirs]
 21 |     # Translate camera frame's origin to the world frame. It is the origin of all rays.
 22 |     rays_o = np.broadcast_to(c2w[:3,-1], np.shape(rays_d))
 23 |     return rays_o, rays_d
 24 | 
 25 | 
 26 | # Ray helpers
 27 | def get_rays_camera(B, H, W, fx, fy,  cx, cy, depth_type, convention="opencv"):
 28 | 
 29 |     assert depth_type is "z" or depth_type is "euclidean"
 30 |     i, j = torch.meshgrid(torch.arange(W), torch.arange(H))  # pytorch's meshgrid has indexing='ij', we transpose to "xy" moode
 31 | 
 32 |     i = i.t().float()
 33 |     j = j.t().float()
 34 | 
 35 |     size = [B, H, W]
 36 | 
 37 |     i_batch = torch.empty(size)
 38 |     j_batch = torch.empty(size)
 39 |     i_batch[:, :, :] = i[None, :, :]
 40 |     j_batch[:, :, :] = j[None, :, :]
 41 | 
 42 |     if convention == "opencv":
 43 |         x = (i_batch - cx) / fx
 44 |         y = (j_batch - cy) / fy
 45 |         z = torch.ones(size)
 46 |     elif convention == "opengl":
 47 |         x = (i_batch - cx) / fx
 48 |         y = -(j_batch - cy) / fy
 49 |         z = -torch.ones(size)
 50 |     else:
 51 |         assert False
 52 | 
 53 |     dirs = torch.stack((x, y, z), dim=3)  # shape of [B, H, W, 3]
 54 | 
 55 |     if depth_type == 'euclidean':
 56 |         norm = torch.norm(dirs, dim=3, keepdim=True)
 57 |         dirs = dirs * (1. / norm)
 58 | 
 59 |     return dirs
 60 | 
 61 | 
 62 | def get_rays_world(T_WC, dirs_C):
 63 |     R_WC = T_WC[:, :3, :3]  # Bx3x3
 64 |     dirs_W = torch.matmul(R_WC[:, None, ...], dirs_C[..., None]).squeeze(-1)
 65 |     origins = T_WC[:, :3, -1]  # Bx3
 66 |     origins = torch.broadcast_tensors(origins[:, None, :], dirs_W)[0]
 67 |     return origins, dirs_W
 68 | 
 69 | 
 70 | def get_rays_camera_np(B, H, W, fx, fy,  cx, cy, depth_type, convention="opencv"):
 71 |     assert depth_type is "z" or depth_type is "euclidean"
 72 |     i, j = np.meshgrid(np.arange(W, dtype=np.float32),
 73 |                        np.arange(H, dtype=np.float32), indexing='xy')  # pytorch's meshgrid has default indexing='ij'
 74 | 
 75 |     size = [B, H, W]
 76 | 
 77 |     i_batch = np.empty(size, dtype=np.float32)
 78 |     j_batch = np.empty(size, dtype=np.float32)
 79 |     i_batch[:, :, :] = i[None, :, :]
 80 |     j_batch[:, :, :] = j[None, :, :]
 81 | 
 82 |     if convention == "opencv":
 83 |         x = (i_batch - cx) / fx
 84 |         y = (j_batch - cy) / fy
 85 |         z = np.ones(size, dtype=np.float32)
 86 |     elif convention == "opengl":
 87 |         x = (i_batch - cx) / fx
 88 |         y = -(j_batch - cy) / fy
 89 |         z = -np.ones(size, dtype=np.float32)
 90 |     else:
 91 |         assert False
 92 | 
 93 |     dirs = np.stack((x, y, z), axis=3)  # shape of [B, H, W, 3]
 94 | 
 95 |     if depth_type == 'euclidean':
 96 |         norm = np.norm(dirs, axis=3, keepdim=True)
 97 |         dirs = dirs * (1. / norm)
 98 | 
 99 |     return dirs
100 | 
101 | 
102 | def get_rays_world_np(T_WC, dirs_C):
103 |     R_WC = T_WC[:, :3, :3]  # Bx3x3
104 |     dirs_W = (R_WC * dirs_C[..., np.newaxis, :]).sum(axis=-1)  # dot product, equals to: [c2w.dot(dir) for dir in dirs]
105 |     # sum([B,3,3] * [B, H, W, 1, 3], axis=-1)  -->  [B, H, W, 3]
106 |     origins = T_WC[:, :3, -1]  # Bx3
107 | 
108 |     return origins, dirs_W
109 | 
110 | 
111 | def ndc_rays(H, W, focal, near, rays_o, rays_d):
112 | 
113 |     # Shift ray origins to near plane
114 |     # solves for the t value such that o + t * d = -near
115 |     t = -(near + rays_o[..., 2]) / rays_d[..., 2]
116 |     rays_o = rays_o + t[..., None] * rays_d
117 | 
118 |     # Projection
119 |     o0 = -1. / (W / (2. * focal)) * rays_o[..., 0] / rays_o[..., 2]
120 |     o1 = -1. / (H / (2. * focal)) * rays_o[..., 1] / rays_o[..., 2]
121 |     o2 = 1. + 2. * near / rays_o[..., 2]
122 | 
123 |     d0 = -1. / (W / (2. * focal)) * (rays_d[..., 0] / rays_d[..., 2] - rays_o[..., 0] / rays_o[..., 2])
124 |     d1 = -1. / (H / (2. * focal)) * (rays_d[..., 1] / rays_d[..., 2] - rays_o[..., 1] / rays_o[..., 2])
125 |     d2 = -2. * near / rays_o[..., 2]
126 | 
127 |     rays_o = torch.stack([o0, o1, o2], -1)
128 |     rays_d = torch.stack([d0, d1, d2], -1)
129 | 
130 |     return rays_o, rays_d
131 | 
132 | 
133 | def stratified_bins(min_depth,
134 |                     max_depth,
135 |                     n_bins,
136 |                     n_rays,
137 |                     device):
138 | 
139 |     bin_limits = torch.linspace(
140 |         min_depth,
141 |         max_depth,
142 |         n_bins + 1,
143 |         device=device,
144 |     )
145 |     lower_limits = bin_limits[:-1]
146 |     bin_length = (max_depth - min_depth) / (n_bins)
147 |     increments = torch.rand(n_rays, n_bins, device=device) * bin_length
148 |     z_vals = lower_limits[None, :] + increments
149 | 
150 |     return z_vals
151 | 
152 | 
153 | def sampling_index(n_rays, batch_size, h, w):
154 | 
155 |     index_b = np.random.choice(np.arange(batch_size)).reshape((1, 1))  # sample one image from the full trainiing set
156 |     index_hw = torch.randint(0, h * w, (1, n_rays))
157 | 
158 |     return index_b, index_hw
159 | 
160 | 
161 | # Hierarchical sampling using inverse CDF transformations
162 | def sample_pdf(bins, weights, N_samples, det=False):
163 |     """ Sample @N_importance samples from @bins with distribution defined by @weights.
164 | 
165 |     Inputs:
166 |         bins: N_rays x (N_samples_coarse - 1)
167 |         weights: N_rays x (N_samples_coarse - 2)
168 |         N_samples: N_samples_fine
169 |         det: deterministic or not
170 |     """
171 |     # Get pdf
172 |     weights = weights + 1e-5  # prevent nans, prevent division by zero (don't do inplace op!)
173 |     pdf = weights / torch.sum(weights, -1, keepdim=True)
174 |     cdf = torch.cumsum(pdf, -1)  # N_rays x (N_samples - 2)
175 |     cdf = torch.cat([torch.zeros_like(cdf[..., :1]), cdf], -1)  # N_rays x (N_samples_coarse - 1)
176 |     # padded to 0~1 inclusive, (N_rays, N_samples-1)
177 | 
178 |     # Take uniform samples
179 |     if det:  # generate deterministic samples
180 |         u = torch.linspace(0., 1., steps=N_samples,  device=bins.device)
181 |         u = u.expand(list(cdf.shape[:-1]) + [N_samples])
182 |     else:
183 |         u = torch.rand(list(cdf.shape[:-1]) + [N_samples],  device=bins.device)
184 |         # (N_rays, N_samples_fine)
185 | 
186 |     # Invert CDF
187 |     u = u.contiguous()
188 |     inds = torch.searchsorted(cdf.detach(), u, right=True)  # N_rays x N_samples_fine
189 |     below = torch.max(torch.zeros_like(inds-1), inds-1)
190 |     above = torch.min((cdf.shape[-1]-1) * torch.ones_like(inds), inds)
191 |     inds_g = torch.stack([below, above], -1)  # (N_rays, N_samples_fine, 2)
192 | 
193 |     matched_shape = [inds_g.shape[0], inds_g.shape[1], cdf.shape[-1]]  # (N_rays, N_samples_fine, N_samples_coarse - 1)
194 | 
195 |     cdf_g = torch.gather(cdf.unsqueeze(1).expand(matched_shape), 2, inds_g)  # N_rays, N_samples_fine, 2
196 |     bins_g = torch.gather(bins.unsqueeze(1).expand(matched_shape), 2, inds_g)  # N_rays, N_samples_fine, 2
197 | 
198 |     denom = (cdf_g[..., 1]-cdf_g[..., 0])  # # N_rays, N_samples_fine
199 |     denom = torch.where(denom < 1e-5, torch.ones_like(denom), denom)
200 |     # denom equals 0 means a bin has weight 0, in which case it will not be sampled
201 |     # anyway, therefore any value for it is fine (set to 1 here)
202 | 
203 |     t = (u-cdf_g[..., 0])/denom
204 |     samples = bins_g[..., 0] + t * (bins_g[...,1]-bins_g[...,0])
205 | 
206 |     return samples
207 | 
208 | 
209 | def create_rays(num_rays, Ts_c2w, height, width, fx, fy, cx, cy, near, far, c2w_staticcam=None, depth_type="z",
210 |               use_viewdirs=True, convention="opencv"):
211 |     """
212 |     convention: 
213 |     "opencv" or "opengl". It defines the coordinates convention of rays from cameras.
214 |     OpenCv defines x,y,z as right, down, forward while OpenGl defines x,y,z as right, up, backward (camera looking towards forward direction still, -z!)
215 |     Note: Use either convention is fine, but the corresponding pose should follow the same convention.
216 | 
217 |     """
218 |     print('prepare rays')
219 | 
220 |     rays_cam = get_rays_camera(num_rays, height, width, fx, fy, cx, cy, depth_type=depth_type, convention=convention) # [N, H, W, 3]
221 | 
222 |     dirs_C = rays_cam.view(num_rays, -1, 3)  # [N, HW, 3]
223 |     rays_o, rays_d = get_rays_world(Ts_c2w, dirs_C)  # origins: [B, HW, 3], dirs_W: [B, HW, 3]
224 | 
225 |     if use_viewdirs:
226 |         # provide ray directions as input
227 |         viewdirs = rays_d
228 |         if c2w_staticcam is not None:
229 |             # c2w_staticcam: If not None, use this transformation matrix for camera,
230 |             # while using other c2w argument for viewing directions.
231 |             # special case to visualize effect of viewdirs
232 |             rays_o, rays_d = get_rays_world(c2w_staticcam, dirs_C)  # origins: [B, HW, 3], dirs_W: [B, HW, 3]
233 | 
234 |         viewdirs = viewdirs / torch.norm(viewdirs, dim=-1, keepdim=True).float()
235 | 
236 |     near, far = near * torch.ones_like(rays_d[..., :1]), far * torch.ones_like(rays_d[..., :1])
237 |     rays = torch.cat([rays_o, rays_d, near, far], -1)
238 | 
239 |     if use_viewdirs:
240 |         rays = torch.cat([rays, viewdirs], -1)
241 |     return rays
242 | 
243 | 


--------------------------------------------------------------------------------
/SSR/data_generation/settings.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Facebook, Inc. and its affiliates.
  2 | # This source code is licensed under the MIT license found in the
  3 | # LICENSE file in the root directory of this source tree.
  4 | 
  5 | import habitat_sim
  6 | import habitat_sim.agent
  7 | 
  8 | default_sim_settings = {
  9 |     # settings shared by example.py and benchmark.py
 10 |     "max_frames": 1000,
 11 |     "width": 640,
 12 |     "height": 480,
 13 |     "default_agent": 0,
 14 |     "sensor_height": 1.5,
 15 |     "hfov": 90,
 16 |     "color_sensor": True,  # RGB sensor (default: ON)
 17 |     "semantic_sensor": False,  # semantic sensor (default: OFF)
 18 |     "depth_sensor": False,  # depth sensor (default: OFF)
 19 |     "ortho_rgba_sensor": False,  # Orthographic RGB sensor (default: OFF)
 20 |     "ortho_depth_sensor": False,  # Orthographic depth sensor (default: OFF)
 21 |     "ortho_semantic_sensor": False,  # Orthographic semantic sensor (default: OFF)
 22 |     "fisheye_rgba_sensor": False,
 23 |     "fisheye_depth_sensor": False,
 24 |     "fisheye_semantic_sensor": False,
 25 |     "equirect_rgba_sensor": False,
 26 |     "equirect_depth_sensor": False,
 27 |     "equirect_semantic_sensor": False,
 28 |     "seed": 1,
 29 |     "silent": False,  # do not print log info (default: OFF)
 30 |     # settings exclusive to example.py
 31 |     "save_png": False,  # save the pngs to disk (default: OFF)
 32 |     "print_semantic_scene": False,
 33 |     "print_semantic_mask_stats": False,
 34 |     "compute_shortest_path": False,
 35 |     "compute_action_shortest_path": False,
 36 |     "scene": "data/scene_datasets/habitat-test-scenes/skokloster-castle.glb",
 37 |     "test_scene_data_url": "http://dl.fbaipublicfiles.com/habitat/habitat-test-scenes.zip",
 38 |     "goal_position": [5.047, 0.199, 11.145],
 39 |     "enable_physics": False,
 40 |     "enable_gfx_replay_save": False,
 41 |     "physics_config_file": "./data/default.physics_config.json",
 42 |     "num_objects": 10,
 43 |     "test_object_index": 0,
 44 |     "frustum_culling": True,
 45 | }
 46 | 
 47 | # build SimulatorConfiguration
 48 | def make_cfg(settings):
 49 |     sim_cfg = habitat_sim.SimulatorConfiguration()
 50 |     if "frustum_culling" in settings:
 51 |         sim_cfg.frustum_culling = settings["frustum_culling"]
 52 |     else:
 53 |         sim_cfg.frustum_culling = False
 54 |     if "enable_physics" in settings:
 55 |         sim_cfg.enable_physics = settings["enable_physics"]
 56 |     if "physics_config_file" in settings:
 57 |         sim_cfg.physics_config_file = settings["physics_config_file"]
 58 |     # if not settings["silent"]:
 59 |     #     print("sim_cfg.physics_config_file = " + sim_cfg.physics_config_file)
 60 |     if "scene_light_setup" in settings:
 61 |         sim_cfg.scene_light_setup = settings["scene_light_setup"]
 62 |     sim_cfg.gpu_device_id = 0
 63 |     if not hasattr(sim_cfg, "scene_id"):
 64 |         raise RuntimeError(
 65 |             "Error: Please upgrade habitat-sim. SimulatorConfig API version mismatch"
 66 |         )
 67 |     sim_cfg.scene_id = settings["scene_file"]
 68 | 
 69 |     # define default sensor parameters (see src/esp/Sensor/Sensor.h)
 70 |     sensor_specs = []
 71 | 
 72 |     def create_camera_spec(**kw_args):
 73 |         camera_sensor_spec = habitat_sim.CameraSensorSpec()
 74 |         camera_sensor_spec.sensor_type = habitat_sim.SensorType.COLOR
 75 |         camera_sensor_spec.resolution = [settings["height"], settings["width"]]
 76 |         camera_sensor_spec.position = [0, settings["sensor_height"], 0]
 77 |         for k in kw_args:
 78 |             setattr(camera_sensor_spec, k, kw_args[k])
 79 |         return camera_sensor_spec
 80 | 
 81 |     if settings["color_sensor"]:
 82 |         color_sensor_spec = create_camera_spec(
 83 |             uuid="color_sensor",
 84 |             # hfov=settings["hfov"],
 85 |             sensor_type=habitat_sim.SensorType.COLOR,
 86 |             sensor_subtype=habitat_sim.SensorSubType.PINHOLE,
 87 |         )
 88 |         sensor_specs.append(color_sensor_spec)
 89 | 
 90 |     if settings["depth_sensor"]:
 91 |         depth_sensor_spec = create_camera_spec(
 92 |             uuid="depth_sensor",
 93 |             # hfov=settings["hfov"],
 94 |             sensor_type=habitat_sim.SensorType.DEPTH,
 95 |             channels=1,
 96 |             sensor_subtype=habitat_sim.SensorSubType.PINHOLE,
 97 |         )
 98 |         sensor_specs.append(depth_sensor_spec)
 99 | 
100 |     if settings["semantic_sensor"]:
101 |         semantic_sensor_spec = create_camera_spec(
102 |             uuid="semantic_sensor",
103 |             # hfov=settings["hfov"],
104 |             sensor_type=habitat_sim.SensorType.SEMANTIC,
105 |             channels=1,
106 |             sensor_subtype=habitat_sim.SensorSubType.PINHOLE,
107 |         )
108 |         sensor_specs.append(semantic_sensor_spec)
109 | 
110 |     # if settings["ortho_rgba_sensor"]:
111 |     #     ortho_rgba_sensor_spec = create_camera_spec(
112 |     #         uuid="ortho_rgba_sensor",
113 |     #         sensor_type=habitat_sim.SensorType.COLOR,
114 |     #         sensor_subtype=habitat_sim.SensorSubType.ORTHOGRAPHIC,
115 |     #     )
116 |     #     sensor_specs.append(ortho_rgba_sensor_spec)
117 |     #
118 |     # if settings["ortho_depth_sensor"]:
119 |     #     ortho_depth_sensor_spec = create_camera_spec(
120 |     #         uuid="ortho_depth_sensor",
121 |     #         sensor_type=habitat_sim.SensorType.DEPTH,
122 |     #         channels=1,
123 |     #         sensor_subtype=habitat_sim.SensorSubType.ORTHOGRAPHIC,
124 |     #     )
125 |     #     sensor_specs.append(ortho_depth_sensor_spec)
126 |     #
127 |     # if settings["ortho_semantic_sensor"]:
128 |     #     ortho_semantic_sensor_spec = create_camera_spec(
129 |     #         uuid="ortho_semantic_sensor",
130 |     #         sensor_type=habitat_sim.SensorType.SEMANTIC,
131 |     #         channels=1,
132 |     #         sensor_subtype=habitat_sim.SensorSubType.ORTHOGRAPHIC,
133 |     #     )
134 |     #     sensor_specs.append(ortho_semantic_sensor_spec)
135 | 
136 |     # TODO Figure out how to implement copying of specs
137 |     def create_fisheye_spec(**kw_args):
138 |         fisheye_sensor_spec = habitat_sim.FisheyeSensorDoubleSphereSpec()
139 |         fisheye_sensor_spec.uuid = "fisheye_sensor"
140 |         fisheye_sensor_spec.sensor_type = habitat_sim.SensorType.COLOR
141 |         fisheye_sensor_spec.sensor_model_type = (
142 |             habitat_sim.FisheyeSensorModelType.DOUBLE_SPHERE
143 |         )
144 | 
145 |         # The default value (alpha, xi) is set to match the lens "GoPro" found in Table 3 of this paper:
146 |         # Vladyslav Usenko, Nikolaus Demmel and Daniel Cremers: The Double Sphere
147 |         # Camera Model, The International Conference on 3D Vision (3DV), 2018
148 |         # You can find the intrinsic parameters for the other lenses in the same table as well.
149 |         fisheye_sensor_spec.xi = -0.27
150 |         fisheye_sensor_spec.alpha = 0.57
151 |         fisheye_sensor_spec.focal_length = [364.84, 364.86]
152 | 
153 |         fisheye_sensor_spec.resolution = [settings["height"], settings["width"]]
154 |         # The default principal_point_offset is the middle of the image
155 |         fisheye_sensor_spec.principal_point_offset = None
156 |         # default: fisheye_sensor_spec.principal_point_offset = [i/2 for i in fisheye_sensor_spec.resolution]
157 |         fisheye_sensor_spec.position = [0, settings["sensor_height"], 0]
158 |         for k in kw_args:
159 |             setattr(fisheye_sensor_spec, k, kw_args[k])
160 |         return fisheye_sensor_spec
161 | 
162 |     # if settings["fisheye_rgba_sensor"]:
163 |     #     fisheye_rgba_sensor_spec = create_fisheye_spec(uuid="fisheye_rgba_sensor")
164 |     #     sensor_specs.append(fisheye_rgba_sensor_spec)
165 |     # if settings["fisheye_depth_sensor"]:
166 |     #     fisheye_depth_sensor_spec = create_fisheye_spec(
167 |     #         uuid="fisheye_depth_sensor",
168 |     #         sensor_type=habitat_sim.SensorType.DEPTH,
169 |     #         channels=1,
170 |     #     )
171 |     #     sensor_specs.append(fisheye_depth_sensor_spec)
172 |     # if settings["fisheye_semantic_sensor"]:
173 |     #     fisheye_semantic_sensor_spec = create_fisheye_spec(
174 |     #         uuid="fisheye_semantic_sensor",
175 |     #         sensor_type=habitat_sim.SensorType.SEMANTIC,
176 |     #         channels=1,
177 |     #     )
178 |     #     sensor_specs.append(fisheye_semantic_sensor_spec)
179 | 
180 |     def create_equirect_spec(**kw_args):
181 |         equirect_sensor_spec = habitat_sim.EquirectangularSensorSpec()
182 |         equirect_sensor_spec.uuid = "equirect_rgba_sensor"
183 |         equirect_sensor_spec.sensor_type = habitat_sim.SensorType.COLOR
184 |         equirect_sensor_spec.resolution = [settings["height"], settings["width"]]
185 |         equirect_sensor_spec.position = [0, settings["sensor_height"], 0]
186 |         for k in kw_args:
187 |             setattr(equirect_sensor_spec, k, kw_args[k])
188 |         return equirect_sensor_spec
189 | 
190 |     # if settings["equirect_rgba_sensor"]:
191 |     #     equirect_rgba_sensor_spec = create_equirect_spec(uuid="equirect_rgba_sensor")
192 |     #     sensor_specs.append(equirect_rgba_sensor_spec)
193 |     #
194 |     # if settings["equirect_depth_sensor"]:
195 |     #     equirect_depth_sensor_spec = create_equirect_spec(
196 |     #         uuid="equirect_depth_sensor",
197 |     #         sensor_type=habitat_sim.SensorType.DEPTH,
198 |     #         channels=1,
199 |     #     )
200 |     #     sensor_specs.append(equirect_depth_sensor_spec)
201 |     #
202 |     # if settings["equirect_semantic_sensor"]:
203 |     #     equirect_semantic_sensor_spec = create_equirect_spec(
204 |     #         uuid="equirect_semantic_sensor",
205 |     #         sensor_type=habitat_sim.SensorType.SEMANTIC,
206 |     #         channels=1,
207 |     #     )
208 |     #     sensor_specs.append(equirect_semantic_sensor_spec)
209 | 
210 |     # create agent specifications
211 |     agent_cfg = habitat_sim.agent.AgentConfiguration()
212 |     agent_cfg.sensor_specifications = sensor_specs
213 |     agent_cfg.action_space = {
214 |         "move_forward": habitat_sim.agent.ActionSpec(
215 |             "move_forward", habitat_sim.agent.ActuationSpec(amount=0.25)
216 |         ),
217 |         "turn_left": habitat_sim.agent.ActionSpec(
218 |             "turn_left", habitat_sim.agent.ActuationSpec(amount=10.0)
219 |         ),
220 |         "turn_right": habitat_sim.agent.ActionSpec(
221 |             "turn_right", habitat_sim.agent.ActuationSpec(amount=10.0)
222 |         ),
223 |     }
224 | 
225 |     # override action space to no-op to test physics
226 |     if sim_cfg.enable_physics:
227 |         agent_cfg.action_space = {
228 |             "move_forward": habitat_sim.agent.ActionSpec(
229 |                 "move_forward", habitat_sim.agent.ActuationSpec(amount=0.0)
230 |             )
231 |         }
232 | 
233 |     return habitat_sim.Configuration(sim_cfg, [agent_cfg])
234 | 


--------------------------------------------------------------------------------
/SSR/data_generation/habitat_renderer.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | import os, sys, argparse
  3 | os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
  4 | os.environ["CUDA_VISIBLE_DEVICES"] = "0"
  5 | import cv2
  6 | import logging
  7 | import habitat_sim as hs
  8 | import numpy as np
  9 | import quaternion
 10 | import yaml
 11 | import json
 12 | from typing import Any, Dict, List, Tuple, Union
 13 | from imgviz import label_colormap
 14 | from PIL import Image
 15 | import matplotlib.pyplot as plt
 16 | import transformation
 17 | import imgviz
 18 | from datetime import datetime
 19 | import time
 20 | from settings import make_cfg
 21 | 
 22 | # Custom type definitions
 23 | Config = Dict[str, Any]
 24 | Observation = hs.sensor.Observation
 25 | Sim = hs.Simulator
 26 | 
 27 | def init_habitat(config) :
 28 |     """Initialize the Habitat simulator with sensors and scene file"""
 29 |     _cfg = make_cfg(config)
 30 |     sim = Sim(_cfg)
 31 |     sim_cfg = hs.SimulatorConfiguration()
 32 |     sim_cfg.gpu_device_id = 0
 33 |     # Note: all sensors must have the same resolution
 34 |     camera_resolution = [config["height"], config["width"]]
 35 |     sensors = {
 36 |         "color_sensor": {
 37 |             "sensor_type": hs.SensorType.COLOR,
 38 |             "resolution": camera_resolution,
 39 |             "position": [0.0, config["sensor_height"], 0.0],
 40 |         },
 41 |         "depth_sensor": {
 42 |             "sensor_type": hs.SensorType.DEPTH,
 43 |             "resolution": camera_resolution,
 44 |             "position": [0.0, config["sensor_height"], 0.0],
 45 |         },
 46 |         "semantic_sensor": {
 47 |             "sensor_type": hs.SensorType.SEMANTIC,
 48 |             "resolution": camera_resolution,
 49 |             "position": [0.0, config["sensor_height"], 0.0],
 50 |         },
 51 |     }
 52 | 
 53 |     sensor_specs = []
 54 |     for sensor_uuid, sensor_params in sensors.items():
 55 |         if config[sensor_uuid]:
 56 |             sensor_spec = hs.SensorSpec()
 57 |             sensor_spec.uuid = sensor_uuid
 58 |             sensor_spec.sensor_type = sensor_params["sensor_type"]
 59 |             sensor_spec.resolution = sensor_params["resolution"]
 60 |             sensor_spec.position = sensor_params["position"]
 61 | 
 62 |             sensor_specs.append(sensor_spec)
 63 | 
 64 |     # Here you can specify the amount of displacement in a forward action and the turn angle
 65 |     agent_cfg = hs.agent.AgentConfiguration()
 66 |     agent_cfg.sensor_specifications = sensor_specs
 67 |     agent_cfg.action_space = {
 68 |         "move_forward": hs.agent.ActionSpec(
 69 |             "move_forward", hs.agent.ActuationSpec(amount=0.25)
 70 |         ),
 71 |         "turn_left": hs.agent.ActionSpec(
 72 |             "turn_left", hs.agent.ActuationSpec(amount=30.0)
 73 |         ),
 74 |         "turn_right": hs.agent.ActionSpec(
 75 |             "turn_right", hs.agent.ActuationSpec(amount=30.0)
 76 |         ),
 77 |     }
 78 | 
 79 |     hs_cfg = hs.Configuration(sim_cfg, [agent_cfg])
 80 |     # sim = Sim(hs_cfg)
 81 | 
 82 |     if config["enable_semantics"]: # extract instance to class mapping function
 83 |         assert os.path.exists(config["instance2class_mapping"])
 84 |         with open(config["instance2class_mapping"], "r") as f:
 85 |             annotations = json.load(f)
 86 |         instance_id_to_semantic_label_id = np.array(annotations["id_to_label"])
 87 |         num_classes = len(annotations["classes"])
 88 |         label_colour_map = label_colormap()
 89 |         config["instance2semantic"] = instance_id_to_semantic_label_id
 90 |         config["classes"] = annotations["classes"]
 91 |         config["objects"] = annotations["objects"]
 92 | 
 93 |         config["num_classes"] = num_classes
 94 |         config["label_colour_map"] = label_colormap()
 95 |         config["instance_colour_map"] = label_colormap(500)
 96 | 
 97 | 
 98 |     # add camera intrinsic
 99 |     # hfov = float(agent_cfg.sensor_specifications[0].parameters['hfov']) * np.pi / 180.
100 |     # https://aihabitat.org/docs/habitat-api/view-transform-warp.html
101 |     # config['K'] = K
102 |     # config['K'] = np.array([[fx, 0.0, 0.0], [0.0, fx, 0.0], [0.0, 0.0, 1.0]],
103 |     #                        dtype=np.float64)
104 | 
105 |     # hfov = float(agent_cfg.sensor_specifications[0].parameters['hfov'])
106 |     # fx = 1.0 / np.tan(hfov / 2.0)
107 |     # config['K'] = np.array([[fx, 0.0, 0.0], [0.0, fx, 0.0], [0.0, 0.0, 1.0]],
108 |     #                        dtype=np.float64)
109 | 
110 |     # Get the intrinsic camera parameters
111 | 
112 | 
113 |     logging.info('Habitat simulator initialized')
114 | 
115 |     return sim, hs_cfg, config
116 | 
117 | def save_renders(save_path, observation, enable_semantic, suffix=""):
118 |     save_path_rgb = os.path.join(save_path, "rgb")
119 |     save_path_depth = os.path.join(save_path, "depth")
120 |     save_path_sem_class = os.path.join(save_path, "semantic_class")
121 |     save_path_sem_instance = os.path.join(save_path, "semantic_instance")
122 | 
123 |     if not os.path.exists(save_path_rgb):
124 |         os.makedirs(save_path_rgb)
125 |     if not os.path.exists(save_path_depth):
126 |         os.makedirs(save_path_depth)
127 |     if not os.path.exists(save_path_sem_class):
128 |         os.makedirs(save_path_sem_class)
129 |     if not os.path.exists(save_path_sem_instance):
130 |         os.makedirs(save_path_sem_instance)
131 | 
132 |     cv2.imwrite(os.path.join(save_path_rgb, "rgb{}.png".format(suffix)), observation["color_sensor"][:,:,::-1])  # change from RGB to BGR for opencv write
133 |     cv2.imwrite(os.path.join(save_path_depth, "depth{}.png".format(suffix)), observation["depth_sensor_mm"])
134 | 
135 |     if enable_semantic:
136 |         cv2.imwrite(os.path.join(save_path_sem_class, "semantic_class{}.png".format(suffix)), observation["semantic_class"])
137 |         cv2.imwrite(os.path.join(save_path_sem_class, "vis_sem_class{}.png".format(suffix)), observation["vis_sem_class"][:,:,::-1])
138 | 
139 |         cv2.imwrite(os.path.join(save_path_sem_instance, "semantic_instance{}.png".format(suffix)), observation["semantic_instance"])
140 |         cv2.imwrite(os.path.join(save_path_sem_instance, "vis_sem_instance{}.png".format(suffix)), observation["vis_sem_instance"][:,:,::-1])
141 | 
142 | 
143 | def render(sim, config):
144 |     """Return the sensor observations and ground truth pose"""
145 |     observation = sim.get_sensor_observations()
146 | 
147 |     # process rgb imagem change from RGBA to RGB
148 |     observation['color_sensor'] = observation['color_sensor'][..., 0:3]
149 |     rgb_img = observation['color_sensor']
150 | 
151 |     # process depth
152 |     depth_mm = (observation['depth_sensor'].copy()*1000).astype(np.uint16)  # change meters to mm
153 |     observation['depth_sensor_mm'] = depth_mm
154 | 
155 |     # process semantics
156 |     if config['enable_semantics']:
157 | 
158 |         # Assuming the scene has no more than 65534 objects
159 |         observation['semantic_instance'] = np.clip(observation['semantic_sensor'].astype(np.uint16), 0, 65535)
160 |         # observation['semantic_instance'][observation['semantic_instance']==12]=0 # mask out certain instance
161 |         # Convert instance IDs to class IDs
162 | 
163 | 
164 |         # observation['semantic_classes'] = np.zeros(observation['semantic'].shape, dtype=np.uint8)
165 |         # TODO make this conversion more efficient
166 |         semantic_class = config["instance2semantic"][observation['semantic_instance']]
167 |         semantic_class[semantic_class < 0] = 0
168 | 
169 |         vis_sem_class = config["label_colour_map"][semantic_class]
170 |         vis_sem_instance = config["instance_colour_map"][observation['semantic_instance']]  # may cause error when having more than 255 instances in the scene
171 | 
172 |         observation['semantic_class'] = semantic_class.astype(np.uint8)
173 |         observation["vis_sem_class"] = vis_sem_class.astype(np.uint8)
174 |         observation["vis_sem_instance"] = vis_sem_instance.astype(np.uint8)
175 | 
176 |         # del observation["semantic_sensor"]
177 | 
178 |     # Get the camera ground truth pose (T_HC) in the habitat frame from the
179 |     # position and orientation
180 |     t_HC = sim.get_agent(0).get_state().position
181 |     q_HC = sim.get_agent(0).get_state().rotation
182 |     T_HC = transformation.combine_pose(t_HC, q_HC)
183 | 
184 |     observation['T_HC'] = T_HC
185 |     observation['T_WC'] = transformation.Thc_to_Twc(T_HC)
186 | 
187 |     return observation
188 | 
189 | def set_agent_position(sim, pose):
190 |     # Move the agent
191 |     R = pose[:3, :3]
192 |     orientation_quat = quaternion.from_rotation_matrix(R)
193 |     t = pose[:3, 3]
194 |     position = t
195 | 
196 |     orientation = [orientation_quat.x, orientation_quat.y, orientation_quat.z, orientation_quat.w]
197 |     agent = sim.get_agent(0)
198 |     agent_state = hs.agent.AgentState(position, orientation)
199 |     # agent.set_state(agent_state, reset_sensors=False)
200 |     agent.set_state(agent_state)
201 | 
202 | def main():
203 |     parser = argparse.ArgumentParser(description='Render Colour, Depth, Semantic, Instance labeling from Habitat-Simultation.')
204 |     parser.add_argument('--config_file', type=str,
205 |                         default="./data_generation/replica_render_config_vMAP.yaml",
206 |                         help='the path to custom config file.')
207 |     args = parser.parse_args()
208 | 
209 |     """Initialize the config dict and Habitat simulator"""
210 |     # Read YAML file
211 |     with open(args.config_file, 'r') as f:
212 |         config = yaml.safe_load(f)
213 | 
214 |     config["save_path"] = os.path.join(config["save_path"])
215 |     if not os.path.exists(config["save_path"]):
216 |         os.makedirs(config["save_path"])
217 | 
218 |     T_wc = np.loadtxt(config["pose_file"]).reshape(-1, 4, 4)
219 |     Ts_cam2world = T_wc
220 | 
221 |     print("-----Initialise and Set Habitat-Sim-----")
222 |     sim, hs_cfg, config = init_habitat(config)
223 |     # Set agent state
224 |     sim.initialize_agent(config["default_agent"])
225 | 
226 |     """Set agent state"""
227 |     print("-----Render Images from Habitat-Sim-----")
228 |     with open(os.path.join(config["save_path"], 'render_config.yaml'), 'w') as outfile:
229 |             yaml.dump(config, outfile, default_flow_style=False)
230 |     start_time = time.time()
231 |     total_render_num = Ts_cam2world.shape[0]
232 |     for i in range(total_render_num):
233 |         if i % 100 == 0 :
234 |             print("Rendering Process: {}/{}".format(i, total_render_num))
235 |         set_agent_position(sim, transformation.Twc_to_Thc(Ts_cam2world[i]))
236 | 
237 |         # replica mode
238 |         observation = render(sim, config)
239 |         save_renders(config["save_path"], observation, config["enable_semantics"], suffix="_{}".format(i))
240 | 
241 |     end_time = time.time()
242 |     print("-----Finish Habitat Rendering, Showing Trajectories.-----")
243 |     print("Average rendering time per image is {} seconds.".format((end_time-start_time)/Ts_cam2world.shape[0]))
244 | 
245 | if __name__ == "__main__":
246 |     main()
247 | 
248 | 
249 | 


--------------------------------------------------------------------------------
/SSR/utils/image_utils.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import cv2
  3 | import numpy as np
  4 | import imgviz
  5 | from imgviz import label_colormap
  6 | from imgviz import draw as draw_module
  7 | import matplotlib.pyplot as plt
  8 | 
  9 | def numpy2cv(image):
 10 |     """
 11 | 
 12 |     :param image: a floating numpy images of shape [H,W,3] within range [0, 1]
 13 |     :return:
 14 |     """
 15 | 
 16 |     image_cv = np.copy(image)
 17 |     image_cv = np.astype(np.clip(image_cv, 0, 1)*255, np.uint8)[:, :, ::-1]  # uint8 BGR opencv format
 18 |     return image_cv
 19 | 
 20 | 
 21 | 
 22 | 
 23 | def plot_semantic_legend(
 24 |     label, 
 25 |     label_name, 
 26 |     colormap=None, 
 27 |     font_size=30,
 28 |     font_path=None,
 29 |     save_path=None,
 30 |     img_name=None):
 31 | 
 32 | 
 33 |     """Plot Colour Legend for Semantic Classes
 34 | 
 35 |     Parameters
 36 |     ----------
 37 |     label: numpy.ndarray, (N,), int
 38 |         One-dimensional array containing the unique labels of exsiting semantic classes
 39 |     label_names: list of string
 40 |         Label id to label name.
 41 |     font_size: int
 42 |         Font size (default: 30).
 43 |     colormap: numpy.ndarray, (M, 3), numpy.uint8
 44 |         Label id to color.
 45 |         By default, :func:`~imgviz.label_colormap` is used.
 46 |     font_path: str
 47 |         Font path.
 48 | 
 49 |     Returns
 50 |     -------
 51 |     res: numpy.ndarray, (H, W, 3), numpy.uint8
 52 |     Legend image of visualising semantic labels.
 53 | 
 54 |     """
 55 | 
 56 |     label = np.unique(label)
 57 |     if colormap is None:
 58 |         colormap = label_colormap()
 59 | 
 60 |     text_sizes = np.array(
 61 |             [
 62 |                 draw_module.text_size(
 63 |                     label_name[l], font_size, font_path=font_path
 64 |                 )
 65 |                 for l in label
 66 |             ]
 67 |         )
 68 | 
 69 |     text_height, text_width = text_sizes.max(axis=0)
 70 |     legend_height = text_height * len(label) + 5
 71 |     legend_width = text_width + 20 + (text_height - 10)
 72 | 
 73 | 
 74 |     legend = np.zeros((legend_height+50, legend_width+50, 3), dtype=np.uint8)
 75 |     aabb1 = np.array([25, 25], dtype=float)
 76 |     aabb2 = aabb1 + (legend_height, legend_width)
 77 | 
 78 |     legend = draw_module.rectangle(
 79 |         legend, aabb1, aabb2, fill=(255, 255, 255)
 80 |     )  # fill the legend area by white colour
 81 | 
 82 |     y1, x1 = aabb1.round().astype(int)
 83 |     y2, x2 = aabb2.round().astype(int)
 84 | 
 85 |     for i, l in enumerate(label):
 86 |         box_aabb1 = aabb1 + (i * text_height + 5, 5)
 87 |         box_aabb2 = box_aabb1 + (text_height - 10, text_height - 10)
 88 |         legend = draw_module.rectangle(
 89 |             legend, aabb1=box_aabb1, aabb2=box_aabb2, fill=colormap[l]
 90 |         )
 91 |         legend = draw_module.text(
 92 |             legend,
 93 |             yx=aabb1 + (i * text_height, 10 + (text_height - 10)),
 94 |             text=label_name[l],
 95 |             size=font_size,
 96 |             font_path=font_path,
 97 |             )
 98 | 
 99 |     
100 |     plt.figure(1)
101 |     plt.title("Semantic Legend!")
102 |     plt.imshow(legend)
103 |     plt.axis("off")
104 | 
105 |     img_arr = imgviz.io.pyplot_to_numpy()
106 |     plt.close()
107 |     if save_path is not None:
108 |         import cv2
109 |         if img_name is not None:
110 |             sav_dir = os.path.join(save_path, img_name)
111 |         else:
112 |             sav_dir = os.path.join(save_path, "semantic_class_Legend.png")
113 |         # plt.savefig(sav_dir, bbox_inches='tight', pad_inches=0)
114 |         cv2.imwrite(sav_dir, img_arr[:,:,::-1])
115 |     return img_arr
116 | 
117 | 
118 | 
119 | 
120 | def image_vis(
121 |     pred_data_dict,
122 |     gt_data_dict,
123 |     # enable_sem = True
124 |     ):
125 |     to8b_np = lambda x: (255 * np.clip(x, 0, 1)).astype(np.uint8)
126 |     batch_size = pred_data_dict["vis_deps"].shape[0]
127 | 
128 |     gt_dep_row = np.concatenate(np.split(gt_data_dict["vis_deps"], batch_size, 0), axis=-2)[0]
129 |     gt_raw_dep_row = np.concatenate(np.split(gt_data_dict["deps"], batch_size, 0), axis=-1)[0]
130 | 
131 |     gt_sem_row = np.concatenate(np.split(gt_data_dict["vis_sems"], batch_size, 0), axis=-2)[0]
132 |     gt_sem_clean_row = np.concatenate(np.split(gt_data_dict["vis_sems_clean"], batch_size, 0), axis=-2)[0]
133 |     gt_rgb_row = np.concatenate(np.split(gt_data_dict["rgbs"], batch_size, 0), axis=-2)[0]
134 |         
135 |     pred_dep_row = np.concatenate(np.split(pred_data_dict["vis_deps"], batch_size, 0), axis=-2)[0]
136 |     pred_raw_dep_row = np.concatenate(np.split(pred_data_dict["deps"], batch_size, 0), axis=-1)[0]
137 | 
138 |     pred_sem_row = np.concatenate(np.split(pred_data_dict["vis_sems"], batch_size, 0), axis=-2)[0]
139 |     pred_entropy_row = np.concatenate(np.split(pred_data_dict["vis_sem_uncers"], batch_size, 0), axis=-2)[0]
140 |     pred_rgb_row = np.concatenate(np.split(pred_data_dict["rgbs"], batch_size, 0), axis=-2)[0]
141 | 
142 |     rgb_diff = np.abs(gt_rgb_row - pred_rgb_row)
143 | 
144 |     dep_diff = np.abs(gt_raw_dep_row - pred_raw_dep_row)
145 |     dep_diff[gt_raw_dep_row== 0] = 0
146 |     dep_diff_vis = imgviz.depth2rgb(dep_diff)
147 | 
148 |     views = [to8b_np(gt_rgb_row), to8b_np(pred_rgb_row), to8b_np(rgb_diff),
149 |             gt_dep_row, pred_dep_row, dep_diff_vis,
150 |             gt_sem_clean_row, gt_sem_row, pred_sem_row, pred_entropy_row]
151 | 
152 |     viz = np.vstack(views)
153 |     return viz
154 | 
155 | 
156 | 
157 | 
158 | nyu13_colour_code = (np.array([[0, 0, 0],
159 |                        [0, 0, 1], # BED
160 |                        [0.9137,0.3490,0.1882], #BOOKS
161 |                        [0, 0.8549, 0], #CEILING
162 |                        [0.5843,0,0.9412], #CHAIR
163 |                        [0.8706,0.9451,0.0941], #FLOOR
164 |                        [1.0000,0.8078,0.8078], #FURNITURE
165 |                        [0,0.8784,0.8980], #OBJECTS
166 |                        [0.4157,0.5333,0.8000], #PAINTING
167 |                        [0.4588,0.1137,0.1608], #SOFA
168 |                        [0.9412,0.1373,0.9216], #TABLE
169 |                        [0,0.6549,0.6118], #TV
170 |                        [0.9765,0.5451,0], #WALL
171 |                        [0.8824,0.8980,0.7608]])*255).astype(np.uint8)
172 | 
173 | 
174 | # color palette for nyu34 labels
175 | nyu34_colour_code = np.array([
176 |        (0, 0, 0),
177 | 
178 |        (174, 199, 232),		# wall
179 |        (152, 223, 138),		# floor
180 |        (31, 119, 180), 		# cabinet
181 |        (255, 187, 120),		# bed
182 |        (188, 189, 34), 		# chair
183 | 
184 |        (140, 86, 75),  		# sofa
185 |        (255, 152, 150),		# table
186 |        (214, 39, 40),  		# door
187 |        (197, 176, 213),		# window
188 |     #    (148, 103, 189),		# bookshelf
189 | 
190 |        (196, 156, 148),		# picture
191 |        (23, 190, 207), 		# counter
192 |        (178, 76, 76),       # blinds
193 |        (247, 182, 210),		# desk
194 |        (66, 188, 102),      # shelves
195 | 
196 |        (219, 219, 141),		# curtain
197 |     #    (140, 57, 197),    # dresser
198 |        (202, 185, 52),      # pillow
199 |     #    (51, 176, 203),    # mirror
200 |        (200, 54, 131),      # floor
201 | 
202 |        (92, 193, 61),       # clothes
203 |        (78, 71, 183),       # ceiling
204 |        (172, 114, 82),      # books
205 |        (255, 127, 14), 		# refrigerator
206 |        (91, 163, 138),      # tv
207 | 
208 |        (153, 98, 156),      # paper
209 |        (140, 153, 101),     # towel
210 |     #    (158, 218, 229),		# shower curtain
211 |        (100, 125, 154),     # box
212 |     #    (178, 127, 135),       # white board
213 | 
214 |     #    (120, 185, 128),       # person
215 |        (146, 111, 194),     # night stand
216 |        (44, 160, 44),  		# toilet
217 |        (112, 128, 144),		# sink
218 |        (96, 207, 209),      # lamp
219 | 
220 |        (227, 119, 194),		# bathtub
221 |        (213, 92, 176),      # bag
222 |        (94, 106, 211),      # other struct
223 |        (82, 84, 163),  		# otherfurn
224 |        (100, 85, 144)       # other prop
225 |     ]).astype(np.uint8)
226 | 
227 | 
228 | 
229 | # color palette for nyu40 labels
230 | nyu40_colour_code = np.array([
231 |        (0, 0, 0),
232 | 
233 |        (174, 199, 232),		# wall
234 |        (152, 223, 138),		# floor
235 |        (31, 119, 180), 		# cabinet
236 |        (255, 187, 120),		# bed
237 |        (188, 189, 34), 		# chair
238 | 
239 |        (140, 86, 75),  		# sofa
240 |        (255, 152, 150),		# table
241 |        (214, 39, 40),  		# door
242 |        (197, 176, 213),		# window
243 |        (148, 103, 189),		# bookshelf
244 | 
245 |        (196, 156, 148),		# picture
246 |        (23, 190, 207), 		# counter
247 |        (178, 76, 76),       # blinds
248 |        (247, 182, 210),		# desk
249 |        (66, 188, 102),      # shelves
250 | 
251 |        (219, 219, 141),		# curtain
252 |        (140, 57, 197),    # dresser
253 |        (202, 185, 52),      # pillow
254 |        (51, 176, 203),    # mirror
255 |        (200, 54, 131),      # floor
256 | 
257 |        (92, 193, 61),       # clothes
258 |        (78, 71, 183),       # ceiling
259 |        (172, 114, 82),      # books
260 |        (255, 127, 14), 		# refrigerator
261 |        (91, 163, 138),      # tv
262 | 
263 |        (153, 98, 156),      # paper
264 |        (140, 153, 101),     # towel
265 |        (158, 218, 229),		# shower curtain
266 |        (100, 125, 154),     # box
267 |        (178, 127, 135),       # white board
268 | 
269 |        (120, 185, 128),       # person
270 |        (146, 111, 194),     # night stand
271 |        (44, 160, 44),  		# toilet
272 |        (112, 128, 144),		# sink
273 |        (96, 207, 209),      # lamp
274 | 
275 |        (227, 119, 194),		# bathtub
276 |        (213, 92, 176),      # bag
277 |        (94, 106, 211),      # other struct
278 |        (82, 84, 163),  		# otherfurn
279 |        (100, 85, 144)       # other prop
280 |     ]).astype(np.uint8)
281 | 
282 | 
283 | if __name__ == "__main__":
284 |         # nyu40_class_name_string = ["void",
285 |         # "wall", "floor", "cabinet", "bed", "chair",
286 |         # "sofa", "table", "door", "window", "book", 
287 |         # "picture", "counter", "blinds", "desk", "shelves",
288 |         # "curtain", "dresser", "pillow", "mirror", "floor",
289 |         # "clothes", "ceiling", "books", "fridge", "tv",
290 |         # "paper", "towel", "shower curtain", "box", "white board",
291 |         # "person", "night stand", "toilet", "sink", "lamp",
292 |         # "bath tub", "bag", "other struct", "other furntr", "other prop"] # NYUv2-40-class
293 | 
294 |         # legend_img_arr = plot_semantic_legend(np.arange(41), nyu40_class_name_string, 
295 |         # colormap=nyu40_colour_code,
296 |         # save_path="/home/shuaifeng/Documents/PhD_Research/SemanticSceneRepresentations/SSR",
297 |         # img_name="nyu40_legned.png")
298 | 
299 | 
300 |         nyu13_class_name_string = ["void",
301 |                     "bed", "books", "ceiling", "chair", "floor",
302 |                     "furniture", "objects", "painting/picture", "sofa", "table",
303 |                     "TV", "wall", "window"] # NYUv2-13-class
304 | 
305 |         legend_img_arr = plot_semantic_legend(np.arange(14), nyu13_class_name_string, 
306 |         colormap=nyu13_colour_code,
307 |         save_path="/home/shuaifeng/Documents/PhD_Research/SemanticSceneRepresentations/SSR",
308 |         img_name="nyu13_legned.png")


--------------------------------------------------------------------------------
/SSR/extract_colour_mesh.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | from collections import defaultdict
  3 | import torch
  4 | import torch.nn as nn
  5 | import torch.nn.functional as F
  6 | import argparse
  7 | from SSR.datasets.replica import replica_datasets
  8 | from SSR.datasets.scannet import scannet_datasets
  9 | from SSR.datasets.replica_nyu import replica_nyu_cnn_datasets
 10 | from SSR.datasets.scannet import scannet_datasets
 11 | import open3d as o3d
 12 | 
 13 | from SSR.training import trainer
 14 | from SSR.models.model_utils import run_network
 15 | from SSR.geometry.occupancy import grid_within_bound
 16 | from SSR.visualisation import open3d_utils
 17 | import numpy as np
 18 | import yaml
 19 | import json
 20 | 
 21 | import skimage.measure as ski_measure
 22 | import time
 23 | from imgviz import label_colormap
 24 | import trimesh
 25 | 
 26 | 
 27 | @torch.no_grad()
 28 | def render_fn(trainer, rays, chunk):
 29 |     """Do batched inference on rays using chunk."""
 30 |     B = rays.shape[0]
 31 |     results = defaultdict(list)
 32 |     for i in range(0, B, chunk):
 33 |         rendered_ray_chunks = \
 34 |             trainer.render_rays(rays[i:i+chunk])
 35 | 
 36 |         for k, v in rendered_ray_chunks.items():
 37 |             results[k] += [v.cpu()]
 38 | 
 39 |     for k, v in results.items():
 40 |         results[k] = torch.cat(v, 0)
 41 |     return results
 42 | 
 43 | 
 44 | def train():
 45 |     parser = argparse.ArgumentParser()
 46 |     parser.add_argument('--config_file', type=str, default="/home/shuaifeng/Documents/PhD_Research/CodeRelease/SemanticSceneRepresentations/SSR/configs/SSR_room0_config_test.yaml", help='config file name.')
 47 | 
 48 |     parser.add_argument('--mesh_dir', type=str, required=True, help='Path to scene file, e.g., ROOT_PATH/Replica/mesh/room_0/')
 49 |     parser.add_argument('--training_data_dir', type=str, required=True, help='Path to rendered data.')
 50 |     parser.add_argument('--save_dir', type=str, required=True, help='Path to the directory saving training logs and ckpts.')
 51 | 
 52 |     parser.add_argument('--use_vertex_normal', action="store_true", help='use vertex normals to compute color')
 53 |     parser.add_argument('--near_t', type=float, default=2.0, help='the near bound factor to start the ray')
 54 |     parser.add_argument('--sem', action="store_true")
 55 |     parser.add_argument('--grid_dim', type=int, default=256)
 56 |     parser.add_argument('--gpu', type=str, default="", help='GPU IDs.')
 57 | 
 58 | 
 59 | 
 60 |     args = parser.parse_args()
 61 | 
 62 |     config_file_path = args.config_file
 63 | 
 64 |     # Read YAML file
 65 |     with open(config_file_path, 'r') as f:
 66 |         config = yaml.safe_load(f)
 67 |     if len(args.gpu)>0:
 68 |         config["experiment"]["gpu"] = args.gpu
 69 |     print("Experiment GPU is {}.".format(config["experiment"]["gpu"]))
 70 |     trainer.select_gpus(config["experiment"]["gpu"])
 71 |     
 72 | 
 73 |     to8b_np = lambda x: (255 * np.clip(x, 0, 1)).astype(np.uint8)
 74 |     logits_2_label = lambda x: torch.argmax(torch.nn.functional.softmax(x, dim=-1),dim=-1)
 75 |         
 76 |     # Cast intrinsics to right types
 77 |     ssr_trainer = trainer.SSRTrainer(config)
 78 | 
 79 | 
 80 |     near_t = args.near_t
 81 |     mesh_dir = args.mesh_dir
 82 |     training_data_dir = args.training_data_dir
 83 |     save_dir = args.save_dir
 84 |     mesh_recon_save_dir = os.path.join(save_dir, "mesh_reconstruction")
 85 |     os.makedirs(mesh_recon_save_dir, exist_ok=True)
 86 | 
 87 | 
 88 |     info_mesh_file = os.path.join(mesh_dir, "habitat", "info_semantic.json")
 89 |     with open(info_mesh_file, "r") as f:
 90 |         annotations = json.load(f)
 91 |         
 92 |     instance_id_to_semantic_label_id = np.array(annotations["id_to_label"])
 93 |     instance_id_to_semantic_label_id[instance_id_to_semantic_label_id<=0] = 0
 94 |     semantic_classes = np.unique(instance_id_to_semantic_label_id)
 95 |     num_classes = len(semantic_classes) # including void class--0
 96 |     label_colour_map = label_colormap()[semantic_classes]
 97 |     valid_colour_map = label_colour_map[1:]
 98 | 
 99 |     total_num = 900
100 |     step = 5
101 |     ids = list(range(total_num))
102 |     train_ids = list(range(0, total_num, step))
103 |     test_ids = [x+2 for x in train_ids]    
104 | 
105 |     replica_data_loader = replica_datasets.ReplicaDatasetCache(data_dir=training_data_dir,
106 |                                                                 train_ids=train_ids, test_ids=test_ids,
107 |                                                                 img_h=config["experiment"]["height"],
108 |                                                                 img_w=config["experiment"]["width"])
109 | 
110 |     ssr_trainer.set_params_replica()
111 |     ssr_trainer.prepare_data_replica(replica_data_loader)
112 | 
113 |     ##########################
114 | 
115 |     # Create nerf model, init optimizer
116 |     ssr_trainer.create_ssr()
117 |     # Create rays in world coordinates
118 |     ssr_trainer.init_rays()
119 | 
120 |     # load_ckpt into NeRF
121 |     ckpt_path = os.path.join(save_dir, "checkpoints", "200000.ckpt")
122 |     print('Reloading from', ckpt_path)
123 |     ckpt = torch.load(ckpt_path)
124 | 
125 |     start = ckpt['global_step']
126 |     ssr_trainer.ssr_net_coarse.load_state_dict(ckpt['network_coarse_state_dict'])
127 |     ssr_trainer.ssr_net_fine.load_state_dict(ckpt['network_fine_state_dict'])
128 |     ssr_trainer.optimizer.load_state_dict(ckpt["optimizer_state_dict"])
129 |     ssr_trainer.training = False  # enable testing mode before rendering results, need to set back during training!
130 |     ssr_trainer.ssr_net_coarse.eval()
131 |     ssr_trainer.ssr_net_fine.eval()
132 | 
133 | 
134 |     level = 0.45 # level = 0
135 |     threshold = 0.2
136 |     draw_cameras = True
137 |     grid_dim = args.grid_dim
138 |             
139 |     train_Ts_np =  replica_data_loader.train_samples["T_wc"]
140 |     mesh_file = os.path.join(mesh_dir,"mesh.ply")
141 |     assert os.path.exists(mesh_file)
142 | 
143 |     trimesh_scene = trimesh.load(mesh_file, process=False)
144 | 
145 |     to_origin_transform, extents = trimesh.bounds.oriented_bounds(trimesh_scene)
146 |     T_extent_to_scene = np.linalg.inv(to_origin_transform)
147 |     scene_transform = T_extent_to_scene
148 |     scene_extents = extents
149 |     grid_query_pts, scene_scale = grid_within_bound([-1.0, 1.0], scene_extents, scene_transform, grid_dim=grid_dim)
150 | 
151 |     grid_query_pts = grid_query_pts.cuda().reshape(-1,1,3) # Num_rays, 1, 3-xyz
152 |     viewdirs = torch.zeros_like(grid_query_pts).reshape(-1, 3) 
153 |     st = time.time()
154 |     print("Initialise Trimesh Scenes")
155 | 
156 |     with torch.no_grad():
157 |         chunk = 1024
158 |         run_MLP_fn  =  lambda pts: run_network(inputs=pts, viewdirs=torch.zeros_like(pts).squeeze(1), 
159 |             fn=ssr_trainer.ssr_net_fine, embed_fn=ssr_trainer.embed_fn,
160 |             embeddirs_fn=ssr_trainer.embeddirs_fn, netchunk=int(2048*128))
161 | 
162 |         raw = torch.cat([run_MLP_fn(grid_query_pts[i: i+chunk]).cpu() for i in range(0, grid_query_pts.shape[0], chunk)], dim=0)
163 |         rgb = torch.sigmoid(raw[..., :3])  # [N_rays, N_samples, 3]
164 |         alpha = raw[..., 3] # [N]
165 |         sem_logits = raw[..., 4:]  # [N_rays, N_samples, num_class]
166 |         label_fine = logits_2_label(sem_logits).view(-1).cpu().numpy()
167 |         vis_label_colour = label_colour_map[label_fine+1]
168 | 
169 |     print("Finish Computing Semantics!")
170 |     print()
171 | 
172 |     def occupancy_activation(alpha, distances):
173 |         occ = 1.0 - torch.exp(-F.relu(alpha) * distances)
174 |         # notice we apply RELU to raw sigma before computing alpha
175 |         return occ
176 | 
177 |     # voxel_size = (ssr_trainer.far - ssr_trainer.near) / grid_dim # or self.N_importance
178 |     voxel_size = (ssr_trainer.far - ssr_trainer.near) / ssr_trainer.N_importance # or self.N_importance
179 |     occ = occupancy_activation(alpha, voxel_size)
180 |     print("Compute Occupancy Grids")
181 |     occ = occ.reshape(grid_dim, grid_dim, grid_dim)
182 |     occupancy_grid = occ.detach().cpu().numpy()
183 | 
184 |     print('fraction occupied:', (occupancy_grid > threshold).mean())
185 |     print('Max Occ: {}, Min Occ: {}, Mean Occ: {}'.format(occupancy_grid.max(), occupancy_grid.min(), occupancy_grid.mean()))
186 |     vertices, faces, vertex_normals, _ = ski_measure.marching_cubes(occupancy_grid, level=level, gradient_direction='ascent')
187 |     print()
188 | 
189 |     dim = occupancy_grid.shape[0]
190 |     vertices = vertices / (dim - 1)
191 |     mesh = trimesh.Trimesh(vertices=vertices, vertex_normals=vertex_normals, faces=faces)
192 | 
193 |     # Transform to [-1, 1] range
194 |     mesh_canonical = mesh.copy()
195 |     mesh_canonical.apply_translation([-0.5, -0.5, -0.5])
196 |     mesh_canonical.apply_scale(2)
197 | 
198 |     scene_scale = scene_extents/2.0
199 |     # Transform to scene coordinates
200 |     mesh_canonical.apply_scale(scene_scale)
201 |     mesh_canonical.apply_transform(scene_transform)
202 |     # mesh.show()
203 |     exported = trimesh.exchange.export.export_mesh(mesh_canonical, os.path.join(mesh_recon_save_dir, 'mesh_canonical.ply'))
204 |     print("Saving Marching Cubes mesh to mesh_canonical.ply !")
205 |     exported = trimesh.exchange.export.export_mesh(mesh_canonical, os.path.join(mesh_recon_save_dir, 'mesh.ply'))
206 |     print("Saving Marching Cubes mesh to mesh.ply !")
207 | 
208 | 
209 |     o3d_mesh = open3d_utils.trimesh_to_open3d(mesh)
210 |     o3d_mesh_canonical  = open3d_utils.trimesh_to_open3d(mesh_canonical)
211 | 
212 |     print('Removing noise ...')
213 |     print(f'Original Mesh has {len(o3d_mesh_canonical.vertices)/1e6:.2f} M vertices and {len(o3d_mesh_canonical.triangles)/1e6:.2f} M faces.')
214 |     o3d_mesh_canonical_clean = open3d_utils.clean_mesh(o3d_mesh_canonical, keep_single_cluster=False, min_num_cluster=400)
215 | 
216 |     vertices_ = np.array(o3d_mesh_canonical_clean.vertices).reshape([-1, 3]).astype(np.float32)
217 |     triangles = np.asarray(o3d_mesh_canonical_clean.triangles) # (n, 3) int
218 |     N_vertices = vertices_.shape[0]
219 |     print(f'Denoised Mesh has {len(o3d_mesh_canonical_clean.vertices)/1e6:.2f} M vertices and {len(o3d_mesh_canonical_clean.triangles)/1e6:.2f} M faces.')
220 | 
221 |     print("###########################################")
222 |     print()
223 |     print("Using Normals for colour predictions!")
224 |     print()
225 |     print("###########################################")
226 |     
227 |     ## use normal vector method as suggested by the author, see https://github.com/bmild/nerf/issues/44
228 |     mesh_recon_save_dir = os.path.join(mesh_recon_save_dir,"use_vertex_normal")
229 |     os.makedirs(mesh_recon_save_dir, exist_ok=True)
230 | 
231 |     selected_mesh = o3d_mesh_canonical_clean
232 |     rays_d = - torch.FloatTensor(np.asarray(selected_mesh.vertex_normals)) # use negative normal directions as ray marching directions
233 |     near = 0.1  * torch.ones_like(rays_d[:, :1])
234 |     far = 10.0 * torch.ones_like(rays_d[:, :1])
235 |     rays_o = torch.FloatTensor(vertices_) - rays_d * near * args.near_t
236 |     viewdirs = rays_d
237 |     viewdirs = viewdirs / torch.norm(viewdirs, dim=-1, keepdim=True).float()
238 |     rays = torch.cat([rays_o, rays_d, near, far, viewdirs], -1)
239 | 
240 |     # provide ray directions as input
241 |     rays = rays.cuda()
242 |     with torch.no_grad():
243 |         chunk=4096
244 |         # chunk=80*1024
245 |         results = render_fn(ssr_trainer, rays, chunk)
246 | 
247 |         # combine the output and write to file
248 |         if args.sem:
249 |             labels = logits_2_label(results["sem_logits_fine"]).numpy()
250 |             vis_labels =  valid_colour_map[labels]
251 |             v_colors  =  vis_labels
252 |         else:
253 |             rgbs = results["rgb_fine"].numpy()
254 |             rgbs = to8b_np(rgbs)
255 |             v_colors  =  rgbs
256 | 
257 |     v_colors = v_colors.astype(np.uint8)
258 | 
259 | 
260 |     o3d_mesh_canonical_clean.vertex_colors = o3d.utility.Vector3dVector(v_colors/255.0)
261 | 
262 |     if args.sem:
263 |         o3d.io.write_triangle_mesh(os.path.join(mesh_recon_save_dir, 'semantic_mesh_canonical_dim{}neart_{}.ply'.format(grid_dim, near_t)), o3d_mesh_canonical_clean)
264 |         print("Saving Marching Cubes mesh to semantic_mesh_canonical_dim{}neart_{}.ply".format(grid_dim, near_t))
265 |     else:
266 |         o3d.io.write_triangle_mesh(os.path.join(mesh_recon_save_dir, 'colour_mesh_canonical_dim{}neart_{}.ply'.format(grid_dim, near_t)), o3d_mesh_canonical_clean)
267 |         print("Saving Marching Cubes mesh to colour_mesh_canonical_dim{}neart_{}.ply".format(grid_dim, near_t))
268 | 
269 |     print('Done!')
270 | 
271 | 
272 | if __name__=='__main__':
273 |     train()


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 | Semantic-NeRF SOFTWARE
  2 | 
  3 | LICENCE AGREEMENT
  4 | 
  5 | WE (Imperial College of Science, Technology and Medicine, (“Imperial College
  6 | London”)) ARE WILLING TO LICENSE THIS SOFTWARE TO YOU (a licensee “You”) ONLY
  7 | ON THE CONDITION THAT YOU ACCEPT ALL OF THE TERMS CONTAINED IN THE FOLLOWING
  8 | AGREEMENT. PLEASE READ THE AGREEMENT CAREFULLY BEFORE DOWNLOADING THE SOFTWARE.
  9 | BY EXERCISING THE OPTION TO DOWNLOAD THE SOFTWARE YOU AGREE TO BE BOUND BY THE
 10 | TERMS OF THE AGREEMENT.
 11 | 
 12 | SOFTWARE LICENCE AGREEMENT (EXCLUDING BSD COMPONENTS)
 13 | 
 14 | 1.This Agreement pertains to a worldwide, non-exclusive, temporary, fully
 15 | paid-up, royalty free, non-transferable, non-sub- licensable licence (the
 16 | “Licence”) to use the elastic fusion source code, including any modification,
 17 | part or derivative (the “Software”).
 18 | 
 19 | Ownership and Licence. Your rights to use and download the Software onto your
 20 | computer, and all other copies that You are authorised to make, are specified
 21 | in this Agreement. However, we (or our licensors) retain all rights, including
 22 | but not limited to all copyright and other intellectual property rights
 23 | anywhere in the world, in the Software not expressly granted to You in this
 24 | Agreement.
 25 | 
 26 | 2. Permitted use of the Licence:
 27 | 
 28 | (a) You may download and install the Software onto one computer or server for
 29 | use in accordance with Clause 2(b) of this Agreement provided that You ensure
 30 | that the Software is not accessible by other users unless they have themselves
 31 | accepted the terms of this licence agreement.
 32 | 
 33 | (b) You may use the Software solely for non-commercial, internal  or academic
 34 | research purposes and only in accordance with the terms of this Agreement. You
 35 | may not use the Software for commercial purposes, including but not limited to
 36 | (1) integration of all or part of the source code or the Software into a
 37 | product for sale or licence by or on behalf of You to third parties or (2) use
 38 | of the Software or any derivative of it for research to develop software
 39 | products for sale or licence to a third party or (3) use of the Software or any
 40 | derivative of it for research to develop non-software products for sale or
 41 | licence to a third party, or (4) use of the Software to provide any service to
 42 | an external organisation for which payment is received.
 43 | 
 44 | Should You wish to use the Software for commercial purposes, You shall
 45 | email researchcontracts.engineering@imperial.ac.uk .
 46 | 
 47 | (c) Right to Copy. You may copy the Software for back-up and archival purposes,
 48 | provided that each copy is kept in your possession and provided You reproduce
 49 | our copyright notice (set out in Schedule 1) on each copy.
 50 | 
 51 | (d) Transfer and sub-licensing. You may not rent, lend, or lease the Software
 52 | and You may not transmit, transfer or sub-license this licence to use the
 53 | Software or any of your rights or obligations under this Agreement to another
 54 | party.
 55 | 
 56 | (e) Identity of Licensee. The licence granted herein is personal to You. You
 57 | shall not permit any third party to access, modify or otherwise use the
 58 | Software nor shall You access modify or otherwise use the Software on behalf of
 59 | any third party. If You wish to obtain a licence for mutiple users or a site
 60 | licence for the Software please contact us
 61 | at researchcontracts.engineering@imperial.ac.uk .
 62 | 
 63 | (f) Publications and presentations. You may make public, results or data
 64 | obtained from, dependent on or arising from research carried out using the
 65 | Software, provided that any such presentation or publication identifies the
 66 | Software as the source of the results or the data, including the Copyright
 67 | Notice given in each element of the Software, and stating that the Software has
 68 | been made available for use by You under licence from Imperial College London
 69 | and You provide a copy of any such publication to Imperial College London.
 70 | 
 71 | 3. Prohibited Uses. You may not, without written permission from us
 72 | at researchcontracts.engineering@imperial.ac.uk :
 73 | 
 74 | (a) Use, copy, modify, merge, or transfer copies of the Software or any
 75 | documentation provided by us which relates to the Software except as provided
 76 | in this Agreement;
 77 | 
 78 | (b) Use any back-up or archival copies of the Software (or allow anyone else to
 79 | use such copies) for any purpose other than to replace the original copy in the
 80 | event it is destroyed or becomes defective; or
 81 | 
 82 | (c) Disassemble, decompile or "unlock", reverse translate, or in any manner
 83 | decode the Software for any reason.
 84 | 
 85 | 4. Warranty Disclaimer
 86 | 
 87 | (a) Disclaimer. The Software has been developed for research purposes only. You
 88 | acknowledge that we are providing the Software to You under this licence
 89 | agreement free of charge and on condition that the disclaimer set out below
 90 | shall apply. We do not represent or warrant that the Software as to: (i) the
 91 | quality, accuracy or reliability of the Software; (ii) the suitability of the
 92 | Software for any particular use or for use under any specific conditions; and
 93 | (iii) whether use of the Software will infringe third-party rights.
 94 | 
 95 | You acknowledge that You have reviewed and evaluated the Software to determine
 96 | that it meets your needs and that You assume all responsibility and liability
 97 | for determining the suitability of the Software as fit for your particular
 98 | purposes and requirements. Subject to Clause 4(b), we exclude and expressly
 99 | disclaim all express and implied representations, warranties, conditions and
100 | terms not stated herein (including the implied conditions or warranties of
101 | satisfactory quality, merchantable quality, merchantability and fitness for
102 | purpose).
103 | 
104 | (b) Savings. Some jurisdictions may imply warranties, conditions or terms or
105 | impose obligations upon us which cannot, in whole or in part, be excluded,
106 | restricted or modified or otherwise do not allow the exclusion of implied
107 | warranties, conditions or terms, in which case the above warranty disclaimer
108 | and exclusion will only apply to You to the extent permitted in the relevant
109 | jurisdiction and does not in any event exclude any implied warranties,
110 | conditions or terms which may not under applicable law be excluded.
111 | 
112 | (c) Imperial College London disclaims all responsibility for the use which is
113 | made of the Software and any liability for the outcomes arising from using the
114 | Software.
115 | 
116 | 5. Limitation of Liability
117 | 
118 | (a) You acknowledge that we are providing the Software to You under this
119 | licence agreement free of charge and on condition that the limitation of
120 | liability set out below shall apply. Accordingly, subject to Clause 5(b), we
121 | exclude all liability whether in contract, tort, negligence or otherwise, in
122 | respect of the Software and/or any related documentation provided to You by us
123 | including, but not limited to, liability for loss or corruption of data, loss
124 | of contracts, loss of income, loss of profits, loss of cover and any
125 | consequential or indirect loss or damage of any kind arising out of or in
126 | connection with this licence agreement, however caused. This exclusion shall
127 | apply even if we have been advised of the possibility of such loss or damage.
128 | 
129 | (b) You agree to indemnify Imperial College London and hold it harmless from
130 | and against any and all claims, damages and liabilities asserted by third
131 | parties (including claims for negligence) which arise directly or indirectly
132 | from the use of the Software or any derivative of it or the sale of any
133 | products based on the Software. You undertake to make no liability claim
134 | against any employee, student, agent or appointee of Imperial College London,
135 | in connection with this Licence or the Software.
136 | 
137 | (c) Nothing in this Agreement shall have the effect of excluding or limiting
138 | our statutory liability.
139 | 
140 | (d) Some jurisdictions do not allow these limitations or exclusions either
141 | wholly or in part, and, to that extent, they may not apply to you. Nothing in
142 | this licence agreement will affect your statutory rights or other relevant
143 | statutory provisions which cannot be excluded, restricted or modified, and its
144 | terms and conditions must be read and construed subject to any such statutory
145 | rights and/or provisions.
146 | 
147 | 6. Confidentiality. You agree not to disclose any confidential information
148 | provided to You by us pursuant to this Agreement to any third party without our
149 | prior written consent. The obligations in this Clause 6 shall survive the
150 | termination of this Agreement for any reason.
151 | 
152 | 7. Termination.
153 | 
154 | (a) We may terminate this licence agreement and your right to use the Software
155 | at any time with immediate effect upon written notice to You.
156 | 
157 | (b) This licence agreement and your right to use the Software automatically
158 | terminate if You:
159 | 
160 |   (i) fail to comply with any provisions of this Agreement; or
161 | 
162 |   (ii) destroy the copies of the Software in your possession, or voluntarily
163 |   return the Software to us.
164 | 
165 | (c) Upon termination You will destroy all copies of the Software.
166 | 
167 | (d) Otherwise, the restrictions on your rights to use the Software will expire
168 | 10 (ten) years after first use of the Software under this licence agreement.
169 | 
170 | 8. Miscellaneous Provisions.
171 | 
172 | (a) This Agreement will be governed by and construed in accordance with the
173 | substantive laws of England and Wales whose courts shall have exclusive
174 | jurisdiction over all disputes which may arise between us.
175 | 
176 | (b) This is the entire agreement between us relating to the Software, and
177 | supersedes any prior purchase order, communications, advertising or
178 | representations concerning the Software.
179 | 
180 | (c) No change or modification of this Agreement will be valid unless it is in
181 | writing, and is signed by us.
182 | 
183 | (d) The unenforceability or invalidity of any part of this Agreement will not
184 | affect the enforceability or validity of the remaining parts.
185 | 
186 | BSD Elements of the Software
187 | 
188 | For BSD elements of the Software, the following terms shall apply:
189 | Copyright as indicated in the header of the individual element of the Software.
190 | All rights reserved.
191 | 
192 | Redistribution and use in source and binary forms, with or without
193 | modification, are permitted provided that the following conditions are met:
194 | 
195 | 1. Redistributions of source code must retain the above copyright notice, this
196 | list of conditions and the following disclaimer.
197 | 
198 | 2. Redistributions in binary form must reproduce the above copyright notice,
199 | this list of conditions and the following disclaimer in the documentation
200 | and/or other materials provided with the distribution.
201 | 
202 | 3. Neither the name of the copyright holder nor the names of its contributors
203 | may be used to endorse or promote products derived from this software without
204 | specific prior written permission.
205 | 
206 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
207 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
208 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
209 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
210 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
211 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
212 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
213 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
214 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
215 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
216 |  
217 | SCHEDULE 1
218 | 
219 | The Software
220 | 
221 | Semantic-NeRF is a scene-specific 3D semantic representation built upon Neural Radiance Fields (NeRF), jointly encoding semantics with appearance and geometry. It can be efficiently learned with a small amount of in-place supervision and reach complete and achieves accurate 2D semantic labels in room-scale
222 | scenes.  It is based on the techniques described in the following publication:
223 | 
224 |     • Shuaifeng Zhi, Tristan Laidlow, Stefan Leutenegger, Andrew J. Davison. In-Place Scene Labelling and Understanding with Implicit Scene Representation. International Conference on Computer Vision (ICCV), 2021
225 | _________________________
226 | 
227 | Acknowledgments
228 | 
229 | If you use the software, you should reference the following paper in any
230 | publication:
231 |  
232 |     • Shuaifeng Zhi, Tristan Laidlow, Stefan Leutenegger, Andrew J. Davison. In-Place Scene Labelling and Understanding with Implicit Scene Representation. International Conference on Computer Vision (ICCV), 2021


--------------------------------------------------------------------------------
/train_SSR_main.py:
--------------------------------------------------------------------------------
  1 | import yaml
  2 | import os
  3 | import argparse
  4 | 
  5 | from SSR.datasets.replica import replica_datasets
  6 | from SSR.datasets.scannet import scannet_datasets
  7 | from SSR.datasets.replica_nyu import replica_nyu_cnn_datasets
  8 | from SSR.datasets.scannet import scannet_datasets
  9 | 
 10 | from SSR.training import trainer
 11 | 
 12 | from tqdm import  trange
 13 | import time
 14 | 
 15 | def train():
 16 |     parser = argparse.ArgumentParser()
 17 |     # parser.add_argument('--config_file', type=str, default="/home/shuaifeng/Documents/PhD_Research/CodeRelease/SemanticSceneRepresentations/SSR/configs/SSR_room2_config_release.yaml", 
 18 |     #                     help='config file name.')
 19 |     parser.add_argument('--config_file', type=str, default="/home/shuaifeng/Documents/PhD_Research/CodeRelease/SemanticSceneRepresentations/SSR/configs/SSR_room0_config_test.yaml", 
 20 |                     help='config file name.')
 21 |     parser.add_argument('--dataset_type', type=str, default="replica", choices= ["replica", "replica_nyu_cnn", "scannet"], 
 22 |                         help='the dataset to be used,')
 23 | 
 24 |     ### working mode and specific options
 25 | 
 26 |     # sparse-views
 27 |     parser.add_argument("--sparse_views", action='store_true',
 28 |                         help='Use labels from a sparse set of frames')
 29 |     parser.add_argument("--sparse_ratio", type=float, default=0,
 30 |                         help='The portion of dropped labelling frames during training, which can be used along with all working modes.')    
 31 |     parser.add_argument("--label_map_ids", nargs='*', type=int, default=[],
 32 |                         help='In sparse view mode, use selected frame ids from sequences as supervision.')
 33 |     parser.add_argument("--random_sample", action='store_true', help='Whether to randomly/evenly sample frames from the sequence.')
 34 | 
 35 |     # denoising---pixel-wsie
 36 |     parser.add_argument("--pixel_denoising", action='store_true',
 37 |                         help='Whether to work in pixel-denoising tasks.')
 38 |     parser.add_argument("--pixel_noise_ratio", type=float, default=0,
 39 |                         help='In sparse view mode, if pixel_noise_ratio > 0, the percentage of pixels to be perturbed in each sampled frame  for pixel-wise denoising task..')
 40 |                         
 41 |     # denoising---region-wsie
 42 |     parser.add_argument("--region_denoising", action='store_true',
 43 |                         help='Whether to work in region-denoising tasks by flipping class labels of chair instances in Replica Room_2')
 44 |     parser.add_argument("--region_noise_ratio", type=float, default=0,
 45 |                         help='In region-wise denoising task, region_noise_ratio is the percentage of chair instances to be perturbed in each sampled frame for region-wise denoising task.')
 46 |     parser.add_argument("--uniform_flip", action='store_true',
 47 |                         help='In region-wise denoising task, whether to change chair labels uniformly or not, i.e., by ascending area ratios. This corresponds to two set-ups mentioned in the paper.')
 48 |     parser.add_argument("--instance_id", nargs='*', type=int, default=[3, 6, 7, 9, 11, 12, 13, 48],
 49 |                         help='In region-wise denoising task, the chair instance ids in Replica Room_2 to be randomly perturbed. The ids of all 8 chairs are [3, 6, 7, 9, 11, 12, 13, 48]')
 50 |        
 51 |     # super-resolution
 52 |     parser.add_argument("--super_resolution", action='store_true',
 53 |                         help='set to render synthetic data on a white bkgd (always use for dvoxels)')
 54 |     parser.add_argument('--dense_sr',  action='store_true', help='Whether to use dense or sparse labels for SR instead of dense labels.')
 55 |     parser.add_argument('--sr_factor',  type=int, default=8, help='Scaling factor of super-resolution.')
 56 | 
 57 |     # label propagation
 58 |     parser.add_argument("--label_propagation", action='store_true',
 59 |                         help='Label propagation using partial seed regions.')
 60 |     parser.add_argument("--partial_perc", type=float, default=0,
 61 |                         help='0: single-click propagation; 1: using 1-percent sub-regions for label propagation, 5: using 5-percent sub-regions for label propagation')
 62 | 
 63 |     # misc.
 64 |     parser.add_argument('--visualise_save',  action='store_true', help='whether to save the noisy labels into harddrive for later usage')
 65 |     parser.add_argument('--load_saved',  action='store_true', help='use trained noisy labels for training to ensure consistency betwwen experiments')
 66 |     parser.add_argument('--gpu', type=str, default="", help='GPU IDs.')
 67 | 
 68 |     args = parser.parse_args()
 69 |     # Read YAML file
 70 |     with open(args.config_file, 'r') as f:
 71 |         config = yaml.safe_load(f)
 72 |     if len(args.gpu)>0:
 73 |         config["experiment"]["gpu"] = args.gpu
 74 |     print("Experiment GPU is {}.".format(config["experiment"]["gpu"]))
 75 |     trainer.select_gpus(config["experiment"]["gpu"])
 76 |     config["experiment"].update(vars(args))
 77 |     # Cast intrinsics to right types
 78 |     ssr_trainer = trainer.SSRTrainer(config)
 79 |   
 80 |     if args.dataset_type == "replica":
 81 |         print("----- Replica Dataset -----")
 82 | 
 83 |         total_num = 900
 84 |         step = 5
 85 |         train_ids = list(range(0, total_num, step))
 86 |         test_ids = [x+step//2 for x in train_ids]  
 87 |         #add ids to config for later saving.
 88 |         config["experiment"]["train_ids"] = train_ids
 89 |         config["experiment"]["test_ids"] = test_ids
 90 | 
 91 |         # Todo: like nerf, creating sprial/test poses. Make training and test poses/ids interleaved
 92 |         replica_data_loader = replica_datasets.ReplicaDatasetCache(data_dir=config["experiment"]["dataset_dir"],
 93 |                                                                     train_ids=train_ids, test_ids=test_ids,
 94 |                                                                     img_h=config["experiment"]["height"],
 95 |                                                                     img_w=config["experiment"]["width"])
 96 | 
 97 | 
 98 |         print("--------------------")
 99 |         if args.super_resolution:
100 |             print("Super Resolution Mode! Dense Label Flag is {}, SR Factor is {}".format(args.dense_sr,args.sr_factor))
101 |             replica_data_loader.super_resolve_label(down_scale_factor=args.sr_factor, dense_supervision=args.dense_sr)
102 |         elif args.label_propagation:
103 |             print("Label Propagation Mode! Partial labelling percentage is: {} ".format(args.partial_perc))
104 |             replica_data_loader.simulate_user_click_partial(perc=args.partial_perc, load_saved=args.load_saved, visualise_save=args.visualise_save)
105 |             if  args.sparse_views: # add view-point sampling to partial sampling
106 |                 print("Sparse Viewing Labels Mode under ***Patial Labelling***! Sparse Ratio is ", args.sparse_ratio)
107 |                 replica_data_loader.sample_label_maps(sparse_ratio=args.sparse_ratio, random_sample=args.random_sample, load_saved=args.load_saved)
108 |         elif args.pixel_denoising:
109 |             print("Pixel-Denoising Mode! Noise Ratio is ", args.pixel_noise_ratio)
110 |             replica_data_loader.add_pixel_wise_noise_label(sparse_views=args.sparse_views,
111 |                                 sparse_ratio=args.sparse_ratio, 
112 |                                 random_sample=args.random_sample,
113 |                                 noise_ratio=args.pixel_noise_ratio, 
114 |                                 visualise_save=args.visualise_save, 
115 |                                 load_saved=args.load_saved)
116 |         elif args.region_denoising:
117 |             print("Chair Label Flipping for Region-wise Denoising, Flip ratio is {}, Uniform Sampling is {}".format( args.region_noise_ratio, args.uniform_flip))
118 |             replica_data_loader.add_instance_wise_noise_label(sparse_views=args.sparse_views, sparse_ratio=args.sparse_ratio, random_sample=args.random_sample,
119 |             flip_ratio=args.region_noise_ratio, uniform_flip=args.uniform_flip, instance_id= args.instance_id, 
120 |             load_saved=args.load_saved, visualise_save=args.visualise_save,)
121 |         
122 |         elif args.sparse_views:
123 |             if len(args.label_map_ids)>0:
124 |                 print("Use label maps only for selected frames, ", args.label_map_ids)
125 |                 replica_data_loader.sample_specific_labels(args.label_map_ids, train_ids)
126 |             else:
127 |                 print("Sparse Labels Mode! Sparsity Ratio is ", args.sparse_ratio)
128 |                 replica_data_loader.sample_label_maps(sparse_ratio=args.sparse_ratio, random_sample=args.random_sample, load_saved=args.load_saved)
129 | 
130 |         else:
131 |             print("Standard setup with full dense supervision.")
132 |         ssr_trainer.set_params_replica()
133 |         ssr_trainer.prepare_data_replica(replica_data_loader)
134 | 
135 |     elif args.dataset_type == "replica_nyu_cnn":
136 |         print("----- Replica Dataset with NYUv2-13 CNN Predictions -----")
137 | 
138 |         print("Replica_nyu_cnn mode using labels from trained CNNs: {}".format(config["experiment"]["nyu_mode"]))
139 | 
140 |         total_num = 900
141 |         step = 5
142 | 
143 |         train_ids = list(range(0, total_num, step))
144 |         test_ids = [x+step//2 for x in train_ids]  
145 | 
146 |         #add ids to config for later saving.
147 |         config["experiment"]["train_ids"] = train_ids
148 |         config["experiment"]["test_ids"] = test_ids
149 | 
150 |         replica_nyu_cnn_data_loader = replica_nyu_cnn_datasets.Replica_CNN_NYU(data_dir=config["experiment"]["dataset_dir"],
151 |                                                                     train_ids=train_ids, test_ids=test_ids,
152 |                                                                     img_h=config["experiment"]["height"],
153 |                                                                     img_w=config["experiment"]["width"],
154 |                                                                     nyu_mode = config["experiment"]["nyu_mode"],
155 |                                                                     load_softmax=config["experiment"]["load_softmax"])
156 | 
157 |         ssr_trainer.set_params_replica()  # we still call params of replica here since the image sources are from Replica still
158 |         ssr_trainer.prepare_data_replica_nyu_cnn(replica_nyu_cnn_data_loader)
159 | 
160 |     elif args.dataset_type == "scannet":
161 |         print("----- ScanNet Dataset with NYUv2-40 Conventions-----")
162 | 
163 |         print("processing ScanNet scene: ", os.path.basename(config["experiment"]["dataset_dir"]))
164 |         # Todo: like nerf, creating sprial/test poses. Make training and test poses/ids interleaved
165 |         scannet_data_loader = scannet_datasets.ScanNet_Dataset( scene_dir=config["experiment"]["dataset_dir"],
166 |                                                                     img_h=config["experiment"]["height"],
167 |                                                                     img_w=config["experiment"]["width"],
168 |                                                                     sample_step=config["experiment"]["sample_step"],
169 |                                                                     save_dir=config["experiment"]["dataset_dir"])
170 | 
171 | 
172 |         print("--------------------")
173 |         if args.super_resolution:
174 |             print("Super Resolution Mode! Dense Label Flag is {}, SR Factor is {}".format(args.dense_sr,args.sr_factor))
175 |             scannet_data_loader.super_resolve_label(down_scale_factor=args.sr_factor, dense_supervision=args.dense_sr)
176 | 
177 |         elif args.label_propagation:
178 |             print("Partial Segmentation Mode! Partial percentage is: {} ", args.partial_perc)
179 |             scannet_data_loader.simulate_user_click_partial(perc=args.partial_perc, load_saved=args.load_saved, visualise_save=args.visualise_save)
180 | 
181 |         elif  args.pixel_denoising:
182 |             print("Pixel-Denoising Mode! Noise Ratio is ", args.pixel_noise_ratio)
183 |             scannet_data_loader.add_pixel_wise_noise_label(sparse_views=args.sparse_views,
184 |                                 sparse_ratio=args.sparse_ratio, 
185 |                                 random_sample=args.random_sample,
186 |                                 noise_ratio=args.pixel_noise_ratio, 
187 |                                 visualise_save=args.visualise_save, 
188 |                                 load_saved=args.load_saved)
189 |         elif args.sparse_views:
190 |                 print("Sparse Viewing Labels Mode! Sparse Ratio is ", args.sparse_ratio)
191 |                 scannet_data_loader.sample_label_maps(sparse_ratio=args.sparse_ratio, random_sample=args.random_sample, load_saved=args.load_saved)
192 | 
193 |         ssr_trainer.set_params_scannet(scannet_data_loader)
194 |         ssr_trainer.prepare_data_scannet(scannet_data_loader)
195 | 
196 |     
197 |     # Create nerf model, init optimizer
198 |     ssr_trainer.create_ssr()
199 |     # Create rays in world coordinates
200 |     ssr_trainer.init_rays()
201 | 
202 |     start = 0
203 | 
204 |     N_iters = int(float(config["train"]["N_iters"])) + 1
205 |     global_step = start
206 |     ##########################
207 |     print('Begin')
208 |     #####  Training loop  #####
209 |     for i in trange(start, N_iters):
210 | 
211 |         time0 = time.time()
212 |         ssr_trainer.step(global_step)
213 | 
214 |         dt = time.time()-time0
215 |         print()
216 |         print("Time per step is :", dt)
217 |         global_step += 1
218 | 
219 | 
220 |     print('done')
221 | 
222 | 
223 | if __name__=='__main__':
224 |     train()


--------------------------------------------------------------------------------
/SSR/datasets/scannet/scannet_datasets.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import glob
  3 | import numpy as np
  4 | from skimage.io import imread
  5 | import cv2
  6 | import imageio
  7 | 
  8 | from SSR.datasets.scannet.scannet_utils import load_scannet_nyu40_mapping, load_scannet_nyu13_mapping
  9 | from SSR.utils import image_utils
 10 | class ScanNet_Dataset(object):
 11 |     def __init__(self, scene_dir, img_h=None, img_w=None, sample_step=1, save_dir=None, mode="nyu40"):
 12 |         # we only use rgb+poses from Scannet
 13 |         self.img_h = img_h
 14 |         self.img_w = img_w
 15 | 
 16 |         self.scene_dir = scene_dir # scene_dir is the root directory of each sequence, i.e., xxx/ScanNet/scans/scene0088_00"
 17 |         # scene_dir = "/home/shuaifeng/Documents/Datasets/ScanNet/scans/scene0088_00"
 18 |         scene_name = os.path.basename(scene_dir)
 19 |         data_dir = os.path.dirname(scene_dir)
 20 | 
 21 |         instance_filt_dir =  os.path.join(scene_dir, scene_name+'_2d-instance-filt')
 22 |         label_filt_dir =  os.path.join(scene_dir, scene_name+'_2d-label-filt')
 23 |         self.semantic_class_dir = label_filt_dir
 24 | 
 25 |         # (0 corresponds to unannotated or no depth).
 26 |         if mode=="nyu40":
 27 |             label_mapping_nyu = load_scannet_nyu40_mapping(scene_dir)
 28 |             colour_map_np = image_utils.nyu40_colour_code
 29 |             assert colour_map_np.shape[0] == 41
 30 |         elif mode=="nyu13":
 31 |             label_mapping_nyu = load_scannet_nyu13_mapping(scene_dir)
 32 |             colour_map_np = image_utils.nyu13_colour_code
 33 |             assert colour_map_np.shape[0] == 14
 34 |         else:
 35 |             assert False
 36 | 
 37 |         # get camera intrinsics
 38 |         # we use color camera intrinsics and resize depth to match
 39 |         with open(os.path.join(scene_dir, "{}.txt".format(scene_name))) as info_f:
 40 |             info = [line.rstrip().split(' = ') for line in info_f]
 41 |             info = {key:value for key, value in info}
 42 |             intrinsics = [
 43 |                 [float(info['fx_color']), 0, float(info['mx_color'])],
 44 |                 [0, float(info['fy_color']), float(info['my_color'])],
 45 |                 [0, 0, 1]]
 46 | 
 47 |             original_colour_h = int(info["colorHeight"])
 48 |             original_colour_w = int(info["colorWidth"])
 49 |             original_depth_h = int(info["depthHeight"])
 50 |             original_depth_w = int(info["depthWidth"])
 51 |             assert original_colour_h==968 and original_colour_w==1296 and original_depth_h==480 and original_depth_w==640
 52 | 
 53 |         # load 2D colour frames and poses
 54 | 
 55 |         frame_ids = os.listdir(os.path.join(scene_dir, "renders", 'color'))
 56 |         frame_ids = [int(os.path.splitext(frame)[0]) for frame in frame_ids]
 57 |         frame_ids =  sorted(frame_ids)
 58 | 
 59 |         frames_file_list = []
 60 |         for i, frame_id in enumerate(frame_ids):
 61 |             if i%25==0:
 62 |                 print('preparing %s frame %d/%d'%(scene_name, i, len(frame_ids)))
 63 | 
 64 |             pose = np.loadtxt(os.path.join(scene_dir, "renders", 'pose', '%d.txt' % frame_id))
 65 | 
 66 |             # skip frames with no valid pose
 67 |             if not np.all(np.isfinite(pose)):
 68 |                 continue
 69 | 
 70 |             frame = {'file_name_image': 
 71 |                         os.path.join(scene_dir, "renders", 'color', '%d.jpg'%frame_id),
 72 |                     'file_name_depth': 
 73 |                         os.path.join(scene_dir, "renders", 'depth', '%d.png'%frame_id),
 74 |                     'file_name_instance': 
 75 |                         os.path.join(instance_filt_dir, 'instance-filt', '%d.png'%frame_id),
 76 |                     'file_name_label': 
 77 |                         os.path.join(label_filt_dir, 'label-filt', '%d.png'%frame_id),
 78 |                     'intrinsics': intrinsics,
 79 |                     'pose': pose,
 80 |                     }
 81 | 
 82 |             frames_file_list.append(frame)
 83 | 
 84 |         step = sample_step
 85 |         valid_data_num = len(frames_file_list)
 86 |         self.valid_data_num = valid_data_num
 87 |         total_ids = range(valid_data_num)
 88 |         train_ids = total_ids[::step]
 89 |         test_ids = [x+ (step//2) for x in train_ids]   
 90 |         if test_ids[-1]>valid_data_num-1:
 91 |             test_ids.pop(-1)
 92 |         self.train_ids = train_ids
 93 |         self.train_num = len(train_ids)
 94 |         self.test_ids = test_ids
 95 |         self.test_num = len(test_ids)
 96 | 
 97 |         self.train_samples = {'image': [], 'depth': [],
 98 |                               'semantic_raw': [],  # raw scannet label id
 99 |                               'semantic': [],   # nyu40 id
100 |                               'T_wc': [],
101 |                               'instance': []}
102 | 
103 |     
104 |         self.test_samples = {'image': [], 'depth': [],
105 |                               'semantic_raw': [], 
106 |                               'semantic': [], 
107 |                               'T_wc': [],
108 |                               'instance': []}
109 | 
110 |         # training samples
111 |         for idx in train_ids:
112 |             image = cv2.imread(frames_file_list[idx]["file_name_image"])[:,:,::-1] # change from BGR uinit 8 to RGB float
113 |             image = cv2.copyMakeBorder(src=image, top=2, bottom=2, left=0, right=0, borderType=cv2.BORDER_CONSTANT, value=[0,0,0]) # pad 4 pixels to height so that images have aspect ratio of 4:3
114 |             assert image.shape[0]/image.shape[1]==3/4 and image.shape[1]==original_colour_w and image.shape[0] == 972
115 |             image = image/255.0
116 | 
117 |             depth = cv2.imread(frames_file_list[idx]["file_name_depth"], cv2.IMREAD_UNCHANGED) / 1000.0  # uint16 mm depth, then turn depth from mm to meter
118 | 
119 |             semantic = cv2.imread(frames_file_list[idx]["file_name_label"], cv2.IMREAD_UNCHANGED)
120 |             semantic = cv2.copyMakeBorder(src=semantic, top=2, bottom=2, left=0, right=0, borderType=cv2.BORDER_CONSTANT, value=0)
121 | 
122 |             instance = cv2.imread(frames_file_list[idx]["file_name_instance"], cv2.IMREAD_UNCHANGED)
123 |             instance = cv2.copyMakeBorder(src=instance, top=2, bottom=2, left=0, right=0, borderType=cv2.BORDER_CONSTANT, value=0)
124 | 
125 |             T_wc = frames_file_list[idx]["pose"].reshape((4, 4))
126 | 
127 |             if (self.img_h is not None and self.img_h != image.shape[0]) or \
128 |                     (self.img_w is not None and self.img_w != image.shape[1]):
129 |                 image = cv2.resize(image, (self.img_w, self.img_h), interpolation=cv2.INTER_LINEAR)
130 |                 depth = cv2.resize(depth, (self.img_w, self.img_h), interpolation=cv2.INTER_LINEAR)
131 |                 semantic = cv2.resize(semantic, (self.img_w, self.img_h), interpolation=cv2.INTER_NEAREST)
132 |                 instance = cv2.resize(instance, (self.img_w, self.img_h), interpolation=cv2.INTER_NEAREST)
133 | 
134 |             self.train_samples["image"].append(image)
135 |             self.train_samples["depth"].append(depth)
136 |             self.train_samples["semantic_raw"].append(semantic)
137 |             self.train_samples["instance"].append(instance)
138 |             self.train_samples["T_wc"].append(T_wc)
139 | 
140 | 
141 |         # test samples
142 |         for idx in test_ids:
143 |             image = cv2.imread(frames_file_list[idx]["file_name_image"])[:,:,::-1] # change from BGR uinit 8 to RGB float
144 |             image = cv2.copyMakeBorder(src=image, top=2, bottom=2, left=0, right=0, borderType=cv2.BORDER_CONSTANT, value=[0,0,0]) # pad 4 pixels to height so that images have aspect ratio of 4:3
145 |             assert image.shape[0]/image.shape[1]==3/4 and image.shape[1]==original_colour_w and image.shape[0] == 972
146 |             image = image/255.0
147 | 
148 |             depth = cv2.imread(frames_file_list[idx]["file_name_depth"], cv2.IMREAD_UNCHANGED) / 1000.0  # uint16 mm depth, then turn depth from mm to meter
149 |             
150 |             semantic = cv2.imread(frames_file_list[idx]["file_name_label"], cv2.IMREAD_UNCHANGED)
151 |             semantic = cv2.copyMakeBorder(src=semantic, top=2, bottom=2, left=0, right=0, borderType=cv2.BORDER_CONSTANT, value=0)
152 | 
153 |             instance = cv2.imread(frames_file_list[idx]["file_name_instance"], cv2.IMREAD_UNCHANGED)
154 |             instance = cv2.copyMakeBorder(src=instance, top=2, bottom=2, left=0, right=0, borderType=cv2.BORDER_CONSTANT, value=0)
155 | 
156 |             T_wc = frames_file_list[idx]["pose"].reshape((4, 4))
157 | 
158 |             if (self.img_h is not None and self.img_h != image.shape[0]) or \
159 |                     (self.img_w is not None and self.img_w != image.shape[1]):
160 |                 image = cv2.resize(image, (self.img_w, self.img_h), interpolation=cv2.INTER_LINEAR)
161 |                 depth = cv2.resize(depth, (self.img_w, self.img_h), interpolation=cv2.INTER_LINEAR)
162 |                 semantic = cv2.resize(semantic, (self.img_w, self.img_h), interpolation=cv2.INTER_NEAREST)
163 |                 instance = cv2.resize(instance, (self.img_w, self.img_h), interpolation=cv2.INTER_NEAREST)
164 |             
165 | 
166 |             self.test_samples["image"].append(image)
167 |             self.test_samples["depth"].append(depth)
168 |             self.test_samples["semantic_raw"].append(semantic)
169 |             self.test_samples["instance"].append(instance)
170 |             self.test_samples["T_wc"].append(T_wc)
171 | 
172 | 
173 |         scale_y = image.shape[0]/(original_colour_h+4)
174 |         scale_x = image.shape[1]/original_colour_w
175 |         assert scale_x == scale_y # this requires the desired shape to also has a aspect ratio of 4:3
176 | 
177 |         # we modify the camera intrinsics considering the padding and scaling
178 |         self.intrinsics = np.asarray(intrinsics)
179 |         self.intrinsics[1,2] += 2 # we add c_y by 2 since we pad the height by 4 pixels
180 |         self.intrinsics[0, 0] = self.intrinsics[0, 0]*scale_x # fx
181 |         self.intrinsics[1, 1] = self.intrinsics[1, 1]*scale_x # fy
182 | 
183 |         self.intrinsics[0, 2] = self.intrinsics[0, 2]*scale_x  # cx
184 |         self.intrinsics[1, 2] = self.intrinsics[1, 2]*scale_x  # cy
185 | 
186 | 
187 |         for key in self.test_samples.keys():  # transform list of np array to array with batch dimension
188 |             self.train_samples[key] = np.asarray(self.train_samples[key])
189 |             self.test_samples[key] = np.asarray(self.test_samples[key])
190 | 
191 |         # map scannet classes to nyu definition
192 |         train_semantic = self.train_samples["semantic_raw"]
193 |         test_semantic = self.test_samples["semantic_raw"]
194 | 
195 |         train_semantic_nyu = train_semantic.copy()
196 |         test_semantic_nyu = test_semantic.copy()
197 | 
198 |         for scan_id, nyu_id in label_mapping_nyu.items():
199 |             train_semantic_nyu[train_semantic==scan_id] = nyu_id
200 |             test_semantic_nyu[test_semantic==scan_id] = nyu_id
201 | 
202 |         self.train_samples["semantic"] = train_semantic_nyu
203 |         self.test_samples["semantic"] = test_semantic_nyu
204 | 
205 | 
206 |         self.semantic_classes = np.unique(
207 |             np.concatenate(
208 |                 (np.unique(self.train_samples["semantic"]), 
209 |                 np.unique(self.test_samples["semantic"])))
210 |                 ).astype(np.uint8)
211 |         # each scene may not contain all 40-classes
212 |         
213 |         self.num_semantic_class = self.semantic_classes.shape[0]  # number of semantic classes
214 | 
215 |         colour_map_np_remap = colour_map_np.copy()[self.semantic_classes] # take corresponding colour map
216 |         self.colour_map_np = colour_map_np
217 |         self.colour_map_np_remap = colour_map_np_remap
218 |         self.mask_ids = np.ones(self.train_num)  # init self.mask_ids as full ones
219 |         # 1 means the correspinding label map is used for semantic loss during training, while 0 means no semantic loss
220 |     
221 |         # save colourised ground truth label to img folder
222 |         if save_dir is not None:
223 |             # save colourised ground truth label to img folder
224 |             vis_label_save_dir = os.path.join(save_dir, "vis-sampled-label-filt")
225 |             os.makedirs(vis_label_save_dir, exist_ok=True)
226 |             vis_train_label = colour_map_np[self.train_samples["semantic"]]
227 |             vis_test_label = colour_map_np[self.test_samples["semantic"]]
228 |             for i in range(self.train_num):
229 |                 label = vis_train_label[i].astype(np.uint8)
230 |                 cv2.imwrite(os.path.join(vis_label_save_dir, "train_vis_sem_{}.png".format(i)),label[...,::-1])
231 | 
232 |             for i in range(self.test_num):
233 |                 label = vis_test_label[i].astype(np.uint8)
234 |                 cv2.imwrite(os.path.join(vis_label_save_dir, "test_vis_sem_{}.png".format(i)),label[...,::-1])
235 | 
236 | 
237 |         # remap existing semantic class labels to continuous label ranging from 0 to num_class-1
238 |         self.train_samples["semantic_clean"] = self.train_samples["semantic"].copy()
239 |         self.train_samples["semantic_remap"] = self.train_samples["semantic"].copy()
240 |         self.train_samples["semantic_remap_clean"] = self.train_samples["semantic_clean"].copy()
241 | 
242 |         self.test_samples["semantic_remap"] = self.test_samples["semantic"].copy()
243 | 
244 |         for i in range(self.num_semantic_class):
245 |             self.train_samples["semantic_remap"][self.train_samples["semantic"]== self.semantic_classes[i]] = i
246 |             self.train_samples["semantic_remap_clean"][self.train_samples["semantic_clean"]== self.semantic_classes[i]] = i
247 |             self.test_samples["semantic_remap"][self.test_samples["semantic"]== self.semantic_classes[i]] = i
248 | 
249 | 
250 |         self.train_samples["semantic_remap"] = self.train_samples["semantic_remap"].astype(np.uint8)
251 |         self.train_samples["semantic_remap_clean"] = self.train_samples["semantic_remap_clean"].astype(np.uint8)
252 |         self.test_samples["semantic_remap"] = self.test_samples["semantic_remap"].astype(np.uint8)
253 | 
254 |         print()
255 |         print("Training Sample Summary:")
256 |         for key in self.train_samples.keys(): 
257 |             print("{} has shape of {}, type {}.".format(key, self.train_samples[key].shape, self.train_samples[key].dtype))
258 |         print()
259 |         print("Testing Sample Summary:")
260 |         for key in self.test_samples.keys(): 
261 |             print("{} has shape of {}, type {}.".format(key, self.test_samples[key].shape, self.test_samples[key].dtype))
262 | 
263 | 
264 |     def sample_label_maps(self, sparse_ratio=0.5, random_sample=False, load_saved=False):
265 |         if load_saved is False:
266 |             K = int(self.train_num*sparse_ratio)  # number of skipped training frames, mask=0
267 |             N = self.train_num-K  # number of used training frames,  mask=1
268 |             assert np.sum(self.mask_ids) == self.train_num  # sanity check that all masks are avaible before sampling
269 | 
270 |             if K==0: # incase sparse_ratio==0:
271 |                 return 
272 |         
273 |             if random_sample:
274 |                 self.mask_ids[:K] = 0
275 |                 np.random.shuffle(self.mask_ids)
276 |             else:  # sample evenly
277 |                 if sparse_ratio<=0.5: # skip less/equal than half frames
278 |                     assert K <= self.train_num/2
279 |                     q, r = divmod(self.train_num, K)
280 |                     indices = [q*i + min(i, r) for i in range(K)]
281 |                     self.mask_ids[indices] = 0
282 | 
283 |                 else: # skip more than half frames
284 |                     assert K > self.train_num/2
285 |                     self.mask_ids = np.zeros_like(self.mask_ids)  # disable all images and  evenly enable N images in total
286 |                     q, r = divmod(self.train_num, N)
287 |                     indices = [q*i + min(i, r) for i in range(N)]
288 |                     self.mask_ids[indices] = 1 
289 |             print("{} of {} semantic labels are sampled (sparse ratio: {}).".format(sum(self.mask_ids), len(self.mask_ids), sparse_ratio))
290 |             noisy_sem_dir = os.path.join(self.scene_dir, "renders", "noisy_pixel_sems_sr{}".format(sparse_ratio))
291 |             if not os.path.exists(noisy_sem_dir):
292 |                 os.makedirs(noisy_sem_dir)
293 |             with open(os.path.join(noisy_sem_dir, "mask_ids.npy"), 'wb') as f:
294 |                 np.save(f, self.mask_ids)
295 |         elif load_saved is True:
296 |             noisy_sem_dir = os.path.join(self.scene_dir, "renders", "noisy_pixel_sems_sr{}".format(sparse_ratio))
297 |             self.mask_ids = np.load(os.path.join(noisy_sem_dir, "mask_ids.npy"))
298 | 
299 | 
300 |     def add_pixel_wise_noise_label(self, 
301 |         sparse_views=False, sparse_ratio=0.5, random_sample=False, 
302 |         noise_ratio=0.3, visualise_save=False, load_saved=False):
303 |         if not load_saved:
304 |             if sparse_views:
305 |                 self.sample_label_maps(sparse_ratio=sparse_ratio, random_sample=random_sample)
306 |             num_pixel = self.img_h * self.img_w
307 |             num_pixel_noisy = int(num_pixel*noise_ratio)
308 |             train_sem = self.train_samples["semantic_remap"]
309 | 
310 |             for i in range(len(self.mask_ids)):
311 |                 if self.mask_ids[i] == 1:  # add label noise to unmasked/available labels
312 |                     noisy_index_1d = np.random.permutation(num_pixel)[:num_pixel_noisy]
313 |                     faltten_sem = train_sem[i].flatten()
314 |                     faltten_sem[noisy_index_1d] = np.random.choice(self.num_semantic_class, num_pixel_noisy)
315 |                     # we replace the label of randomly selected num_pixel_noisy pixels to random labels from [1, self.num_semantic_class], 0 class is the none class
316 |                     train_sem[i] = faltten_sem.reshape(self.img_h, self.img_w)
317 | 
318 |             print("{} of {} semantic labels are added noise {} percent area ratio.".format(sum(self.mask_ids), len(self.mask_ids), noise_ratio))
319 | 
320 |             if visualise_save:
321 |                 noisy_sem_dir = os.path.join(self.scene_dir, "renders", "noisy_pixel_sems_sr{}_nr{}".format(sparse_ratio, noise_ratio))
322 |                 if not os.path.exists(noisy_sem_dir):
323 |                     os.makedirs(noisy_sem_dir)
324 |                 with open(os.path.join(noisy_sem_dir, "mask_ids.npy"), 'wb') as f:
325 |                     np.save(f, self.mask_ids)
326 | 
327 | 
328 |                 vis_noisy_semantic_list = []
329 |                 vis_semantic_clean_list = []
330 | 
331 |                 colour_map_np = self.colour_map_np_remap 
332 | 
333 |                 semantic_remap = self.train_samples["semantic_remap"] # [H, W, 3]
334 |                 semantic_remap_clean = self.train_samples["semantic_remap_clean"] # [H, W, 3]
335 | 
336 |                 for i in range(len(self.mask_ids)):
337 |                     if self.mask_ids[i] == 1:  # add label noise to unmasked/available labels
338 |                         vis_noisy_semantic = colour_map_np[semantic_remap[i]] # [H, W, 3]
339 |                         vis_semantic_clean = colour_map_np[semantic_remap_clean[i]] # [H, W, 3]
340 | 
341 |                         imageio.imwrite(os.path.join(noisy_sem_dir, "semantic_class_{}.png".format(i)), semantic_remap[i])
342 |                         imageio.imwrite(os.path.join(noisy_sem_dir, "vis_sem_class_{}.png".format(i)), vis_noisy_semantic)
343 | 
344 |                         vis_noisy_semantic_list.append(vis_noisy_semantic)
345 |                         vis_semantic_clean_list.append(vis_semantic_clean)
346 |                     else:
347 |                         # for mask_ids of 0, we skip these frames during training and do not add noise
348 |                         vis_noisy_semantic = colour_map_np[semantic_remap[i]] # [H, W, 3]
349 |                         vis_semantic_clean = colour_map_np[semantic_remap_clean[i]] # [H, W, 3]
350 |                         assert np.all(vis_noisy_semantic==vis_semantic_clean)
351 | 
352 |                         imageio.imwrite(os.path.join(noisy_sem_dir, "semantic_class_{}.png".format(i)), semantic_remap[i])
353 |                         imageio.imwrite(os.path.join(noisy_sem_dir, "vis_sem_class_{}.png".format(i)), vis_noisy_semantic)
354 | 
355 |                         vis_noisy_semantic_list.append(vis_noisy_semantic)
356 |                         vis_semantic_clean_list.append(vis_semantic_clean)
357 | 
358 |                 imageio.mimwrite(os.path.join(noisy_sem_dir, 'noisy_sem_ratio_{}.mp4'.format(noise_ratio)), 
359 |                         np.stack(vis_noisy_semantic_list, 0), fps=30, quality=8)
360 |                 
361 |                 imageio.mimwrite(os.path.join(noisy_sem_dir, 'clean_sem.mp4'), 
362 |                         np.stack(vis_semantic_clean_list, 0), fps=30, quality=8)
363 |         else:
364 |             print("Load saved noisy labels.")
365 |             noisy_sem_dir = os.path.join(self.scene_dir, "renders", "noisy_pixel_sems_sr{}_nr{}".format(sparse_ratio, noise_ratio))
366 |             self.mask_ids = np.load(os.path.join(noisy_sem_dir, "mask_ids.npy"))
367 |             semantic_img_list = []
368 |             semantic_path_list = sorted(glob.glob(noisy_sem_dir + '/semantic_class_*.png'), key=lambda file_name: int(file_name.split("_")[-1][:-4]))
369 |             assert len(semantic_path_list)>0
370 |             for idx in range(len(self.mask_ids)):
371 |                 semantic = imread(semantic_path_list[idx])
372 |                 semantic_img_list.append(semantic)
373 |             self.train_samples["semantic_remap"]  = np.asarray(semantic_img_list)
374 | 
375 | 
376 |     def super_resolve_label(self, down_scale_factor=8, dense_supervision=True):
377 |         if down_scale_factor==1:
378 |             return 
379 |         if dense_supervision:  # train down-scale and up-scale again
380 |             scaled_low_res_train_label = []
381 |             for i in range(self.train_num):
382 |                 low_res_label = cv2.resize(self.train_samples["semantic_remap"][i], 
383 |                 (self.img_w//down_scale_factor, self.img_h//down_scale_factor),
384 |                 interpolation=cv2.INTER_NEAREST)
385 | 
386 |                 scaled_low_res_label = cv2.resize(low_res_label, (self.img_w, self.img_h), interpolation=cv2.INTER_NEAREST)
387 |                 scaled_low_res_train_label.append(scaled_low_res_label)
388 | 
389 |             scaled_low_res_train_label = np.asarray(scaled_low_res_train_label)
390 | 
391 |             self.train_samples["semantic_remap"] = scaled_low_res_train_label
392 | 
393 |         else: # we only penalise strictly on valid pixel positions
394 |             valid_low_res_pixel_mask = np.zeros((self.img_h, self.img_w))
395 |             valid_low_res_pixel_mask[::down_scale_factor, ::down_scale_factor]=1
396 |             self.train_samples["semantic_remap"] = (self.train_samples["semantic_remap"]*valid_low_res_pixel_mask[None,...]).astype(np.uint8)
397 |             # we mask all the decimated pixel label to void class==0
398 | 
399 | 
400 | 
401 |     def simulate_user_click_partial(self, perc=0, load_saved=False, visualise_save=True):
402 |         assert perc<=100 and perc >= 0
403 |         assert self.train_num == self.train_samples["semantic_remap"].shape[0]
404 |         single_click=True if perc==0 else False # single_click: whether to use single click only from each class 
405 |         perc = perc/100.0 # make perc
406 |         if not load_saved:
407 | 
408 |             if single_click:
409 |                 click_semantic_map = []
410 |                 for i in range(self.train_num):
411 |                     if (i+1)%5==10:
412 |                         print("Generating partial label of ratio {} for frame {}/{}.".format(perc, i, self.train_num))
413 |                     im  = self.train_samples["semantic_remap"][i]
414 |                     void_class = [0]
415 |                     label_class = np.unique(im).tolist()
416 |                     valid_class = [i for i in label_class if i not in void_class]
417 |                     im_ = np.zeros_like(im)
418 |                     for l in valid_class:
419 |                         label_idx = np.transpose(np.nonzero(im == l))
420 |                         sample_ind = np.random.choice(label_idx.shape[0], 1, replace=False)
421 |                         label_idx_ = label_idx[sample_ind]
422 |                         im_[label_idx_[:, 0], label_idx_[:, 1]] = l
423 |                     click_semantic_map.append(im_)
424 |                 click_semantic_map = np.asarray(click_semantic_map).astype(np.uint8)
425 |                 self.train_samples["semantic_remap"] = click_semantic_map
426 |             
427 |                 print('Partial Label images with centroid sampling (extreme) has completed.')
428 | 
429 |             elif perc>0 and not single_click:
430 |                 click_semantic_map = []
431 |                 for i in range(self.train_num):
432 |                     if (i+1)%5==10:
433 |                         print("Generating partial label of ratio {} for frame {}/{}.".format(perc, i, self.train_num))
434 |                     im  = self.train_samples["semantic_remap"][i]
435 |                     void_class = [0]
436 |                     label_class = np.unique(im).tolist() # find the unique class-ids in the current training label
437 |                     valid_class = [c for c in label_class if c not in void_class]
438 | 
439 |                     im_ = np.zeros_like(im)
440 |                     for l in valid_class:
441 |                         label_mask = np.zeros_like(im)
442 |                         label_mask_ = im == l # binary mask of pixels equal to class-l 
443 |                         label_idx = np.transpose(np.nonzero(label_mask_)) # Nx2
444 |                         sample_ind = np.random.choice(label_idx.shape[0], 1, replace=False) # shape [1,]
445 |                         label_idx_ = label_idx[sample_ind] # shape [1, 2]
446 |                         target_num = int(perc * label_mask_.sum()) # find the target and total number of pixels belong to class-l in the current image
447 |                         label_mask[label_idx_[0, 0], label_idx_[0, 1]] = 1 # full-zero mask with only selected pixel to be 1
448 |                         label_mask_true = label_mask
449 |                         # label_mask_true initially has only 1 True pixel, we continuously grow mask until reach expected percentage
450 | 
451 |                         while label_mask_true.sum() < target_num:
452 |                             num_before_grow = label_mask_true.sum()
453 |                             label_mask = cv2.dilate(label_mask, kernel=np.ones([5, 5]))
454 |                             label_mask_true = label_mask * label_mask_
455 |                             num_after_grow = label_mask_true.sum()
456 |                             # print("Before growth: {}, After growth: {}".format(num_before_grow, num_after_grow))
457 |                             if num_after_grow==num_before_grow: 
458 |                                 print("Initialise Another Seed for Growing!")
459 |                                 # the region does not grow means the very local has been filled,
460 |                                 #  and we need to initiate another seed to keep growing
461 |                                 uncovered_region_mask = label_mask_ - label_mask_true # pixel equal to 1 means un-sampled regions belong to current class
462 |                                 label_idx = np.transpose(np.nonzero(uncovered_region_mask)) # Nx2
463 |                                 sample_ind = np.random.choice(label_idx.shape[0], 1, replace=False) # shape [1,]
464 |                                 label_idx_ = label_idx[sample_ind] # shape [1, 2]
465 |                                 label_mask[label_idx_[0, 0], label_idx_[0, 1]] = 1 
466 | 
467 |                         im_[label_mask_true.astype(bool)] = l
468 |                     click_semantic_map.append(im_)
469 | 
470 |                 click_semantic_map = np.asarray(click_semantic_map).astype(np.uint8)
471 |                 self.train_samples["semantic_remap"] = click_semantic_map
472 |                 print('Partial Label images with centroid sampling has completed.')
473 |             else:
474 |                 assert False
475 | 
476 |             if visualise_save:
477 |                 partial_sem_dir = os.path.join(self.semantic_class_dir, "partial_perc_{}".format(perc))
478 |                 if not os.path.exists(partial_sem_dir):
479 |                     os.makedirs(partial_sem_dir)
480 |                 colour_map_np = self.colour_map_np_remap
481 |                 vis_partial_sem = []
482 |                 for i in range(self.train_num):
483 |                     vis_partial_semantic = colour_map_np[self.train_samples["semantic_remap"][i]] # [H, W, 3]
484 |                     imageio.imwrite(os.path.join(partial_sem_dir, "semantic_class_{}.png".format(i)), self.train_samples["semantic_remap"][i])
485 |                     imageio.imwrite(os.path.join(partial_sem_dir, "vis_sem_class_{}.png".format(i)), vis_partial_semantic)
486 |                     vis_partial_sem.append(vis_partial_semantic)
487 |             
488 |                 imageio.mimwrite(os.path.join(partial_sem_dir, 'partial_sem.mp4'), self.train_samples["semantic_remap"], fps=30, quality=8)
489 |                 imageio.mimwrite(os.path.join(partial_sem_dir, 'vis_partial_sem.mp4'), np.stack(vis_partial_sem, 0), fps=30, quality=8)
490 |         
491 |         else: # load saved single-click/partial semantics
492 |             saved_partial_sem_dir = os.path.join(self.semantic_class_dir, "partial_perc_{}".format(perc))
493 |             semantic_img_list = []
494 |             semantic_path_list = sorted(glob.glob(saved_partial_sem_dir + '/semantic_class_*.png'), key=lambda file_name: int(file_name.split("_")[-1][:-4]))
495 |             assert len(semantic_path_list)>0
496 |             for idx in range(self.train_num):
497 |                 semantic = imread(semantic_path_list[idx])
498 |                 semantic_img_list.append(semantic)
499 |             self.train_samples["semantic_remap"]  = np.asarray(semantic_img_list).astype(np.uint8)
500 | 


--------------------------------------------------------------------------------
/SSR/datasets/replica/replica_datasets.py:
--------------------------------------------------------------------------------
  1 | import os, sys
  2 | import glob
  3 | import numpy as np
  4 | from skimage.io import imread
  5 | from torch.utils.data import Dataset
  6 | import cv2
  7 | import imageio
  8 | from imgviz import label_colormap
  9 | 
 10 | class ReplicaDatasetCache(Dataset):
 11 |     def __init__(self, data_dir, train_ids, test_ids, img_h=None, img_w=None):
 12 | 
 13 |         traj_file = os.path.join(data_dir, "traj_w_c.txt")
 14 |         self.rgb_dir = os.path.join(data_dir, "rgb")
 15 |         self.depth_dir = os.path.join(data_dir, "depth")  # depth is in mm uint
 16 |         self.semantic_class_dir = os.path.join(data_dir, "semantic_class")
 17 |         self.semantic_instance_dir = os.path.join(data_dir, "semantic_instance")
 18 |         if not os.path.exists(self.semantic_instance_dir):
 19 |             self.semantic_instance_dir = None
 20 | 
 21 | 
 22 |         self.train_ids = train_ids
 23 |         self.train_num = len(train_ids)
 24 |         self.test_ids = test_ids
 25 |         self.test_num = len(test_ids)
 26 | 
 27 |         self.img_h = img_h
 28 |         self.img_w = img_w
 29 | 
 30 |         self.Ts_full = np.loadtxt(traj_file, delimiter=" ").reshape(-1, 4, 4)
 31 | 
 32 |         self.rgb_list = sorted(glob.glob(self.rgb_dir + '/rgb*.png'), key=lambda file_name: int(file_name.split("_")[-1][:-4]))
 33 |         self.depth_list = sorted(glob.glob(self.depth_dir + '/depth*.png'), key=lambda file_name: int(file_name.split("_")[-1][:-4]))
 34 |         self.semantic_list = sorted(glob.glob(self.semantic_class_dir + '/semantic_class_*.png'), key=lambda file_name: int(file_name.split("_")[-1][:-4]))
 35 |         if self.semantic_instance_dir is not None:
 36 |             self.instance_list = sorted(glob.glob(self.semantic_instance_dir + '/semantic_instance_*.png'), key=lambda file_name: int(file_name.split("_")[-1][:-4]))
 37 | 
 38 |         self.train_samples = {'image': [], 'depth': [],
 39 |                           'semantic': [], 'T_wc': [],
 40 |                           'instance': []}
 41 | 
 42 |         self.test_samples = {'image': [], 'depth': [],
 43 |                           'semantic': [], 'T_wc': [],
 44 |                           'instance': []}
 45 | 
 46 |        # training samples
 47 |         for idx in train_ids:
 48 |             image = cv2.imread(self.rgb_list[idx])[:,:,::-1] / 255.0  # change from BGR uinit 8 to RGB float
 49 |             depth = cv2.imread(self.depth_list[idx], cv2.IMREAD_UNCHANGED) / 1000.0  # uint16 mm depth, then turn depth from mm to meter
 50 |             semantic = cv2.imread(self.semantic_list[idx], cv2.IMREAD_UNCHANGED)
 51 |             if self.semantic_instance_dir is not None:
 52 |                 instance = cv2.imread(self.instance_list[idx], cv2.IMREAD_UNCHANGED) # uint16
 53 | 
 54 |             if (self.img_h is not None and self.img_h != image.shape[0]) or \
 55 |                     (self.img_w is not None and self.img_w != image.shape[1]):
 56 |                 image = cv2.resize(image, (self.img_w, self.img_h), interpolation=cv2.INTER_LINEAR)
 57 |                 depth = cv2.resize(depth, (self.img_w, self.img_h), interpolation=cv2.INTER_LINEAR)
 58 |                 semantic = cv2.resize(semantic, (self.img_w, self.img_h), interpolation=cv2.INTER_NEAREST)
 59 |                 if self.semantic_instance_dir is not None:
 60 |                     instance = cv2.resize(instance, (self.img_w, self.img_h), interpolation=cv2.INTER_NEAREST)
 61 | 
 62 |             T_wc = self.Ts_full[idx]
 63 | 
 64 |             self.train_samples["image"].append(image)
 65 |             self.train_samples["depth"].append(depth)
 66 |             self.train_samples["semantic"].append(semantic)
 67 |             if self.semantic_instance_dir is not None:
 68 |                 self.train_samples["instance"].append(instance)
 69 |             self.train_samples["T_wc"].append(T_wc)
 70 | 
 71 | 
 72 |         # test samples
 73 |         for idx in test_ids:
 74 |             image = cv2.imread(self.rgb_list[idx])[:,:,::-1] / 255.0  # change from BGR uinit 8 to RGB float
 75 |             depth = cv2.imread(self.depth_list[idx], cv2.IMREAD_UNCHANGED) / 1000.0  # uint16 mm depth, then turn depth from mm to meter
 76 |             semantic = cv2.imread(self.semantic_list[idx], cv2.IMREAD_UNCHANGED)
 77 |             if self.semantic_instance_dir is not None:
 78 |                 instance = cv2.imread(self.instance_list[idx], cv2.IMREAD_UNCHANGED) # uint16
 79 | 
 80 |             if (self.img_h is not None and self.img_h != image.shape[0]) or \
 81 |                     (self.img_w is not None and self.img_w != image.shape[1]):
 82 |                 image = cv2.resize(image, (self.img_w, self.img_h), interpolation=cv2.INTER_LINEAR)
 83 |                 depth = cv2.resize(depth, (self.img_w, self.img_h), interpolation=cv2.INTER_LINEAR)
 84 |                 semantic = cv2.resize(semantic, (self.img_w, self.img_h), interpolation=cv2.INTER_NEAREST)
 85 |                 if self.semantic_instance_dir is not None:
 86 |                     instance = cv2.resize(instance, (self.img_w, self.img_h), interpolation=cv2.INTER_NEAREST)
 87 |             T_wc = self.Ts_full[idx]
 88 | 
 89 |             self.test_samples["image"].append(image)
 90 |             self.test_samples["depth"].append(depth)
 91 |             self.test_samples["semantic"].append(semantic)
 92 |             if self.semantic_instance_dir is not None:
 93 |                 self.test_samples["instance"].append(instance)
 94 |             self.test_samples["T_wc"].append(T_wc)
 95 | 
 96 |         for key in self.test_samples.keys():  # transform list of np array to array with batch dimension
 97 |             self.train_samples[key] = np.asarray(self.train_samples[key])
 98 |             self.test_samples[key] = np.asarray(self.test_samples[key])
 99 | 
100 |         self.semantic_classes = np.unique(
101 |             np.concatenate(
102 |                 (np.unique(self.train_samples["semantic"]), 
103 |             np.unique(self.test_samples["semantic"])))).astype(np.uint8)
104 |         self.num_semantic_class = self.semantic_classes.shape[0]  # number of semantic classes, including the void class of 0
105 | 
106 |         self.colour_map_np = label_colormap()[self.semantic_classes]
107 |         self.mask_ids = np.ones(self.train_num)  # init self.mask_ids as full ones
108 |         # 1 means the correspinding label map is used for semantic loss during training, while 0 means no semantic loss is applied on this frame
109 | 
110 |         # remap existing semantic class labels to continuous label ranging from 0 to num_class-1
111 |         self.train_samples["semantic_clean"] = self.train_samples["semantic"].copy()
112 |         self.train_samples["semantic_remap"] = self.train_samples["semantic"].copy()
113 |         self.train_samples["semantic_remap_clean"] = self.train_samples["semantic_clean"].copy()
114 | 
115 |         self.test_samples["semantic_remap"] = self.test_samples["semantic"].copy()
116 | 
117 |         for i in range(self.num_semantic_class):
118 |             self.train_samples["semantic_remap"][self.train_samples["semantic"]== self.semantic_classes[i]] = i
119 |             self.train_samples["semantic_remap_clean"][self.train_samples["semantic_clean"]== self.semantic_classes[i]] = i
120 |             self.test_samples["semantic_remap"][self.test_samples["semantic"]== self.semantic_classes[i]] = i
121 | 
122 | 
123 |         print()
124 |         print("Training Sample Summary:")
125 |         for key in self.train_samples.keys(): 
126 |             print("{} has shape of {}, type {}.".format(key, self.train_samples[key].shape, self.train_samples[key].dtype))
127 |         print()
128 |         print("Testing Sample Summary:")
129 |         for key in self.test_samples.keys(): 
130 |             print("{} has shape of {}, type {}.".format(key, self.test_samples[key].shape, self.test_samples[key].dtype))
131 | 
132 | 
133 |     def sample_label_maps(self, sparse_ratio=0.5, K=0, random_sample=False, load_saved=False):
134 |         """
135 |         sparse_ratio means the ratio of removed training images, e.g., 0.3 means 30% of semantic labels are removed
136 |         Input:
137 |             sparse_ratio: the percentage of semantic label frames to be *removed*
138 |             K: the number of frames to be removed, if this is speficied we override the results computed from sparse_ratio
139 |             random_sample: whether to random sample frames or interleavely/evenly sample, True--random sample; False--interleavely sample
140 |             load_saved: use pre-computed mask_ids from previous experiments
141 |         """
142 |         if load_saved is False:
143 |             if K==0:
144 |                 K = int(self.train_num*sparse_ratio)  # number of skipped training frames, mask=0
145 | 
146 |             N = self.train_num-K  # number of used training frames,  mask=1
147 |             assert np.sum(self.mask_ids) == self.train_num  # sanity check that all masks are avaible before sampling
148 | 
149 |             if K==0: # in case sparse_ratio==0:
150 |                 return 
151 |         
152 |             if random_sample:
153 |                 self.mask_ids[:K] = 0
154 |                 np.random.shuffle(self.mask_ids)
155 |             else:  # sample interleave
156 |                 if sparse_ratio<=0.5: # skip less/equal than half frames
157 |                     assert K <= self.train_num/2
158 |                     q, r = divmod(self.train_num, K)
159 |                     indices = [q*i + min(i, r) for i in range(K)]
160 |                     self.mask_ids[indices] = 0
161 | 
162 |                 else: # skip more than half frames
163 |                     assert K > self.train_num/2
164 |                     self.mask_ids = np.zeros_like(self.mask_ids)  # disable all images and  evenly enable N images in total
165 |                     q, r = divmod(self.train_num, N)
166 |                     indices = [q*i + min(i, r) for i in range(N)]
167 |                     self.mask_ids[indices] = 1 
168 | 
169 |             print("{} of {} semantic labels are sampled (sparse ratio: {}).".format(sum(self.mask_ids), len(self.mask_ids), sparse_ratio))
170 |             noisy_sem_dir = os.path.join(self.semantic_class_dir, "noisy_pixel_sems_sr{}".format(sparse_ratio))
171 |             if not os.path.exists(noisy_sem_dir):
172 |                 os.makedirs(noisy_sem_dir)
173 |             with open(os.path.join(noisy_sem_dir, "mask_ids.npy"), 'wb') as f:
174 |                 np.save(f, self.mask_ids)
175 |         elif load_saved is True:
176 |             noisy_sem_dir = os.path.join(self.semantic_class_dir, "noisy_pixel_sems_sr{}".format(sparse_ratio))
177 |             self.mask_ids = np.load(os.path.join(noisy_sem_dir, "mask_ids.npy"))
178 | 
179 | 
180 |     
181 |     def sample_specific_labels(self, frame_ids, train_ids):
182 |         """
183 |         Only use dense label maps for specific/selected frames.
184 |         """
185 |         assert np.sum(self.mask_ids) == self.train_num  # sanity check that all masks are avaible before sampling
186 | 
187 |         self.mask_ids = np.zeros_like(self.mask_ids)
188 | 
189 |         if len(frame_ids)==1 and frame_ids[0] is None:
190 |             # we do not add any semantic supervision 
191 |             return
192 | 
193 |         relative_ids = [train_ids.index(x) for x in frame_ids]
194 | 
195 |         self.mask_ids[relative_ids] = 1
196 | 
197 | 
198 |     def add_pixel_wise_noise_label(self, 
199 |         sparse_views=False, sparse_ratio=0.0, random_sample=False, 
200 |         noise_ratio=0.0, visualise_save=False, load_saved=False):
201 |         """
202 |         sparse_views: whether we sample a subset of dense semantic labels for training
203 |         sparse_ratio: the ratio of frames to be removed/skipped if sampling a subset of labels
204 |         random_sample: whether to random sample frames or interleavely/evenly sample, True--random sample; False--interleavely sample
205 |         noise_ratio: the ratio of num pixels per-frame to be randomly perturbed
206 |         visualise_save: whether to save the noisy labels into harddrive for later usage
207 |         load_saved: use trained noisy labels for training to ensure consistency betwwen experiments
208 |         """
209 | 
210 |         if not load_saved:
211 |             if sparse_views:
212 |                 self.sample_label_maps(sparse_ratio=sparse_ratio, random_sample=random_sample)
213 |             num_pixel = self.img_h * self.img_w
214 |             num_pixel_noisy = int(num_pixel*noise_ratio)
215 |             train_sem = self.train_samples["semantic_remap"]
216 | 
217 |             for i in range(len(self.mask_ids)):
218 |                 if self.mask_ids[i] == 1:  # add label noise to unmasked/available labels
219 |                     noisy_index_1d = np.random.permutation(num_pixel)[:num_pixel_noisy]
220 |                     faltten_sem = train_sem[i].flatten()
221 | 
222 |                     faltten_sem[noisy_index_1d] = np.random.choice(self.num_semantic_class, num_pixel_noisy)
223 |                     # we replace the label of randomly selected num_pixel_noisy pixels to random labels from [1, self.num_semantic_class], 0 class is the none class
224 |                     train_sem[i] = faltten_sem.reshape(self.img_h, self.img_w)
225 | 
226 |             print("{} of {} semantic labels are added noise {} percent area ratio.".format(sum(self.mask_ids), len(self.mask_ids), noise_ratio))
227 | 
228 |             if visualise_save:
229 |                 noisy_sem_dir = os.path.join(self.semantic_class_dir, "noisy_pixel_sems_sr{}_nr{}".format(sparse_ratio, noise_ratio))
230 |                 if not os.path.exists(noisy_sem_dir):
231 |                     os.makedirs(noisy_sem_dir)
232 |                 with open(os.path.join(noisy_sem_dir, "mask_ids.npy"), 'wb') as f:
233 |                     np.save(f, self.mask_ids)
234 | 
235 |                 vis_noisy_semantic_list = []
236 |                 vis_semantic_clean_list = []
237 | 
238 |                 colour_map_np = self.colour_map_np
239 |                 # 101 classes in total from Replica, select the existing class from total colour map
240 | 
241 |                 semantic_remap = self.train_samples["semantic_remap"] # [H, W, 3]
242 |                 semantic_remap_clean = self.train_samples["semantic_remap_clean"] # [H, W, 3]
243 | 
244 |                 # save semantic labels
245 |                 for i in range(len(self.mask_ids)):
246 |                     if self.mask_ids[i] == 1: 
247 |                         vis_noisy_semantic = colour_map_np[semantic_remap[i]] # [H, W, 3]
248 |                         vis_semantic_clean = colour_map_np[semantic_remap_clean[i]] # [H, W, 3]
249 | 
250 |                         imageio.imwrite(os.path.join(noisy_sem_dir, "semantic_class_{}.png".format(i)), semantic_remap[i])
251 |                         imageio.imwrite(os.path.join(noisy_sem_dir, "vis_sem_class_{}.png".format(i)), vis_noisy_semantic)
252 | 
253 |                         vis_noisy_semantic_list.append(vis_noisy_semantic)
254 |                         vis_semantic_clean_list.append(vis_semantic_clean)
255 |                     else:
256 |                         # for mask_ids of 0, we skip these frames during training and do not add noise
257 |                         vis_noisy_semantic = colour_map_np[semantic_remap[i]] # [H, W, 3]
258 |                         vis_semantic_clean = colour_map_np[semantic_remap_clean[i]] # [H, W, 3]
259 |                         assert np.all(vis_noisy_semantic==vis_semantic_clean) # apply this check to skipped frames
260 | 
261 |                         imageio.imwrite(os.path.join(noisy_sem_dir, "semantic_class_{}.png".format(i)), semantic_remap[i])
262 |                         imageio.imwrite(os.path.join(noisy_sem_dir, "vis_sem_class_{}.png".format(i)), vis_noisy_semantic)
263 | 
264 |                         vis_noisy_semantic_list.append(vis_noisy_semantic)
265 |                         vis_semantic_clean_list.append(vis_semantic_clean)
266 | 
267 |                 imageio.mimwrite(os.path.join(noisy_sem_dir, 'noisy_sem_ratio_{}.mp4'.format(noise_ratio)), 
268 |                         np.stack(vis_noisy_semantic_list, 0), fps=30, quality=8)
269 |                 
270 |                 imageio.mimwrite(os.path.join(noisy_sem_dir, 'clean_sem.mp4'), 
271 |                         np.stack(vis_semantic_clean_list, 0), fps=30, quality=8)
272 |         else:
273 |             print("Load saved noisy labels.")
274 |             noisy_sem_dir = os.path.join(self.semantic_class_dir, "noisy_pixel_sems_sr{}_nr{}".format(sparse_ratio, noise_ratio))
275 |             assert os.path.exists(noisy_sem_dir)
276 |             self.mask_ids = np.load(os.path.join(noisy_sem_dir, "mask_ids.npy"))
277 |             semantic_img_list = []
278 |             semantic_path_list = sorted(glob.glob(noisy_sem_dir + '/semantic_class_*.png'), key=lambda file_name: int(file_name.split("_")[-1][:-4]))
279 |             assert len(semantic_path_list)>0
280 |             for idx in range(len(self.mask_ids)):
281 |                 semantic = imread(semantic_path_list[idx])
282 |                 semantic_img_list.append(semantic)
283 |             self.train_samples["semantic_remap"]  = np.asarray(semantic_img_list)
284 | 
285 | 
286 |     def add_instance_wise_noise_label(self, sparse_views=False, sparse_ratio=0.0, random_sample=False,
287 |             flip_ratio=0.0, uniform_flip=False, 
288 |             instance_id=[3, 6, 7, 9, 11, 12, 13, 48],  
289 |             load_saved=False,
290 |             visualise_save=False):
291 | 
292 |         """ In this function, we try to test if semantic-NERF can correct the wrong instance label after fusion (training).
293 |         For selected instances, we randomly pick a portion of frames and change their class labels.
294 |         Input:
295 |             sparse_views: if we use a subset of sampled original training set or not.
296 |             sparse_ratio: the ratio of frames to be dropped.
297 |             random_sample: whether to random sample frames or interleavely/evenly sample, True--random sample; False--interleavely sample
298 |             flip_ratio: for all the frames containing certain instances, the ratio of changing labels
299 |             uniform_flip: True: after sorting the candidate frames by instance area ratio,
300 |                                   we uniform sample frames to flip certain instances' semantic class; 
301 |                           False: we take the frames with least instance area ratio to change color.
302 |             instance_id: the instance id of all 8 chairs in Replica Room_2, used for adding region-wise noise
303 |             load_saved: whether to load the saved self.mask_ids or not
304 |             visualise_save: If true, save processed partial labels into local harddrive/folders for futher usage.
305 | 
306 | 
307 |         """
308 |         num_pixel = self.img_w * self.img_h
309 | 
310 |         if not load_saved:
311 |             if sparse_views:
312 |                 self.sample_label_maps(sparse_ratio=sparse_ratio, random_sample=random_sample, load_saved=load_saved)
313 |             assert self.semantic_instance_dir is not None
314 |             # instance_id = [3, 6, 7, 9, 11,12, 13, 48]
315 |             # instance_maps_dict = dict.fromkeys(instance_id, [])  # using this one will make all the keys share the same value due to list [] is mutable
316 |             instance_maps_dict = dict.fromkeys(instance_id)
317 |             for k in instance_maps_dict.keys():
318 |                 instance_maps_dict[k] = list()
319 | 
320 | 
321 |             # find which training images contrain the instance we want to flip labels
322 |             for img_idx in range(self.train_num):
323 |                 instance_label_map = self.train_samples["instance"][img_idx]
324 |                 for ins_idx in instance_id:
325 |                     instance_ratio = np.sum(instance_label_map==ins_idx)/num_pixel
326 |                     if instance_ratio > 0 and self.mask_ids[img_idx]==1: # larger than 1% image area and the image is also sampled into training set
327 |                         instance_maps_dict[ins_idx].append([img_idx, instance_ratio])
328 | 
329 |             num_frame_per_instance_id = np.asarray([len(x) for x in  instance_maps_dict.values()])
330 |             num_flip_frame_per_instance_id =  (num_frame_per_instance_id*flip_ratio).astype(np.int)
331 | 
332 |             for k, v in instance_maps_dict.items():
333 |                 instance_maps_dict[k] = sorted(instance_maps_dict[k], key=lambda x: x[1])  # sorted, default is ascending order
334 |             if not uniform_flip: 
335 |             # we flip the labels with minimum area ratio, 
336 |             # the intuition is that the observation is partial and is likely to be wrong.
337 |                 for i in range(len(instance_id)):  # loop over instance id
338 |                     selected_frame_id = [x[0] for x in instance_maps_dict[instance_id[i]][:num_flip_frame_per_instance_id[i]]]
339 |                     for m in selected_frame_id: # loop over image ids having the selected instance
340 |                         self.train_samples["semantic_remap"][m][self.train_samples["instance"][m]==instance_id[i]] = np.random.choice(self.num_semantic_class, 1)
341 |             else:
342 |                 if flip_ratio<=0.5: # flip less/equal than half frames
343 |                     for i in range(len(instance_id)):  # loop over instance id
344 |                         K = num_flip_frame_per_instance_id[i]
345 |                         q, r = divmod(num_frame_per_instance_id[i], K)
346 |                         indices_to_flip = [q*i + min(i, r) for i in range(K)]
347 |                         valid_frame_id_list = [x[0] for x in instance_maps_dict[instance_id[i]]]
348 |                         selected_frame_id = [valid_frame_id_list[flip_id] for flip_id in indices_to_flip]
349 |                         for m in selected_frame_id: # loop over image ids having the selected instance
350 |                             self.train_samples["semantic_remap"][m][self.train_samples["instance"][m]==instance_id[i]] = np.random.choice(self.num_semantic_class, 1)
351 |                 
352 |                 else: # flip more than half frames
353 |                     for i in range(len(instance_id)):  # loop over instance id
354 |                         K = num_flip_frame_per_instance_id[i]
355 |                         N = num_frame_per_instance_id[i] - K
356 |                         q, r = divmod(num_frame_per_instance_id[i], N)
357 |                         indices_NOT_flip = [q*i + min(i, r) for i in range(N)]
358 |                         indices_to_flip = [x for x in range(num_frame_per_instance_id[i]) if x not in indices_NOT_flip]
359 |                         valid_frame_id_list = [x[0] for x in instance_maps_dict[instance_id[i]]]
360 |                         selected_frame_id = [valid_frame_id_list[flip_id] for flip_id in indices_to_flip]
361 |                         for m in selected_frame_id: # loop over image ids having the selected instance
362 |                             self.train_samples["semantic_remap"][m][self.train_samples["instance"][m]==instance_id[i]] = np.random.choice(self.num_semantic_class, 1)
363 |         
364 |             colour_map_np = self.colour_map_np
365 |             vis_flip_semantic = [colour_map_np[sem] for sem in self.train_samples["semantic_remap"]]
366 |             vis_gt_semantic = [colour_map_np[sem] for sem in self.train_samples["semantic_remap_clean"]]
367 | 
368 |             if visualise_save:
369 |                 flip_sem_dir  = os.path.join(self.semantic_class_dir, "flipped_chair_nr_{}".format(flip_ratio))
370 |                 if not os.path.exists(flip_sem_dir):
371 |                     os.makedirs(flip_sem_dir)
372 |                 
373 |                 with open(os.path.join(flip_sem_dir, "mask_ids.npy"), 'wb') as f:
374 |                     np.save(f, self.mask_ids)
375 |                     
376 |                 for i in range(len(vis_flip_semantic)):
377 |                     imageio.imwrite(os.path.join(flip_sem_dir, "semantic_class_{}.png".format(i)), self.train_samples["semantic_remap"][i])
378 |                     imageio.imwrite(os.path.join(flip_sem_dir, "vis_sem_class_{}.png".format(i)), vis_flip_semantic[i])
379 |                     imageio.imwrite(os.path.join(flip_sem_dir, "vis_gt_{}.png".format(i)), vis_gt_semantic[i])
380 |         else:
381 |             print("Load saved noisy labels.")
382 |             flip_sem_dir  = os.path.join(self.semantic_class_dir, "flipped_chair_nr_{}".format(flip_ratio))
383 |             assert os.path.exists(flip_sem_dir)
384 |             self.mask_ids = np.load(os.path.join(flip_sem_dir, "mask_ids.npy"))
385 |             semantic_img_list = []
386 |             semantic_path_list = sorted(glob.glob(flip_sem_dir + '/semantic_class_*.png'), key=lambda file_name: int(file_name.split("_")[-1][:-4]))
387 |             assert len(semantic_path_list)>0
388 |             for idx in range(len(self.mask_ids)):
389 |                 semantic = imread(semantic_path_list[idx])
390 |                 semantic_img_list.append(semantic)
391 |             self.train_samples["semantic_remap"]  = np.asarray(semantic_img_list)
392 | 
393 |     def super_resolve_label(self, down_scale_factor=8, dense_supervision=True):
394 |         """ In super-resolution mode, to create training supervisions, we downscale the ground truth label by certain scaling factor to 
395 |         throw away information. We then upscale the image back to original size.
396 | 
397 |         Two setups for upscaling: 
398 |             (1) Sparse label: we set the interpolated label pixels to void label==0, so we only have losses on grid of every 8 pixels
399 |             (2) Dense label: we penalise also on interpolated pixel values
400 | 
401 |         down_scale_factor: the scaling factor for down-sampling and up-sampling
402 |         dense_supervision: dense label mode or not.
403 |         """
404 |         if down_scale_factor==1:
405 |             return 
406 |         if dense_supervision:  # for dense labelling,  we down-scale and up-scale label maps again
407 |             scaled_low_res_train_label = []
408 |             for i in range(self.train_num):
409 |                 low_res_label = cv2.resize(self.train_samples["semantic_remap"][i], 
410 |                 (self.img_w//down_scale_factor, self.img_h//down_scale_factor),
411 |                 interpolation=cv2.INTER_NEAREST)
412 | 
413 |                 scaled_low_res_label = cv2.resize(low_res_label, (self.img_w, self.img_h), interpolation=cv2.INTER_NEAREST)
414 |                 scaled_low_res_train_label.append(scaled_low_res_label)
415 | 
416 |             scaled_low_res_train_label = np.asarray(scaled_low_res_train_label)
417 | 
418 |             self.train_samples["semantic_remap"] = scaled_low_res_train_label
419 | 
420 |         else: # for sparse labelling, we only penalise strictly on valid pixel positions
421 |             valid_low_res_pixel_mask = np.zeros((self.img_h, self.img_w))
422 |             valid_low_res_pixel_mask[::down_scale_factor, ::down_scale_factor]=1
423 |             self.train_samples["semantic_remap"] = (self.train_samples["semantic_remap"]*valid_low_res_pixel_mask[None,...]).astype(np.uint8)
424 |             # we mask all the decimated pixel label to void class==0
425 | 
426 |     def simulate_user_click_partial(self, perc=0, load_saved=False, visualise_save=True):
427 |         """
428 |         Generate partial label maps for label propagation task.
429 |         perc: the percentage of pixels per class per image to be preserved to simulate partial user clicks
430 |             0: single-clicks
431 |             1: 1% user clicks
432 |             5: 5% user clicks
433 | 
434 |         load_saved: If true, load saved partial clicks to guarantee reproductability. False, create new partial laels
435 |         visualise_save: If true, save processed partial labels into local harddrive/folders for futher usage like visualisation.
436 |         """
437 |         assert perc<=100 and perc >= 0
438 |         assert self.train_num == self.train_samples["semantic_remap"].shape[0]
439 |         single_click=True if perc==0 else False # single_click: whether to use single click only from each class 
440 |         perc = perc/100.0 # make perc value into percentage
441 |         if not load_saved:
442 | 
443 |             if single_click:
444 |                 click_semantic_map = []
445 |                 for i in range(self.train_num):
446 |                     if (i+1)%10==0:
447 |                         print("Generating partial label of ratio {} for frame {}/{}.".format(perc, i, self.train_num))
448 |                     im  = self.train_samples["semantic_remap"][i]
449 |                     void_class = [0]
450 |                     label_class = np.unique(im).tolist()
451 |                     valid_class = [i for i in label_class if i not in void_class]
452 |                     im_ = np.zeros_like(im)
453 |                     for l in valid_class:
454 |                         label_idx = np.transpose(np.nonzero(im == l))
455 |                         sample_ind = np.random.choice(label_idx.shape[0], 1, replace=False)
456 |                         label_idx_ = label_idx[sample_ind]
457 |                         im_[label_idx_[:, 0], label_idx_[:, 1]] = l
458 |                     click_semantic_map.append(im_)
459 |                 click_semantic_map = np.asarray(click_semantic_map).astype(np.uint8)
460 |                 self.train_samples["semantic_remap"] = click_semantic_map
461 |             
462 |                 print('Partial Label images with centroid sampling (extreme) has completed.')
463 | 
464 |             elif perc>0 and not single_click:
465 |                 click_semantic_map = []
466 |                 for i in range(self.train_num):
467 |                     if (i+1)%10==0:
468 |                         print("Generating partial label of ratio {} for frame {}/{}.".format(perc, i, self.train_num))
469 |                     im  = self.train_samples["semantic_remap"][i]
470 |                     void_class = [0]
471 |                     label_class = np.unique(im).tolist() # find the unique class-ids in the current training label
472 |                     valid_class = [c for c in label_class if c not in void_class]
473 | 
474 |                     im_ = np.zeros_like(im)
475 |                     for l in valid_class:
476 |                         label_mask = np.zeros_like(im)
477 |                         label_mask_ = im == l # binary mask of pixels equal to class-l 
478 |                         label_idx = np.transpose(np.nonzero(label_mask_)) # Nx2
479 |                         sample_ind = np.random.choice(label_idx.shape[0], 1, replace=False) # shape [1,]
480 |                         label_idx_ = label_idx[sample_ind] # shape [1, 2]
481 |                         target_num = int(perc * label_mask_.sum()) # find the target and total number of pixels belong to class-l in the current image
482 |                         label_mask[label_idx_[0, 0], label_idx_[0, 1]] = 1 # full-zero mask with only selected pixel to be 1
483 |                         label_mask_true = label_mask
484 |                         # label_mask_true initially has only 1 True pixel, we continuously grow mask until reach expected percentage
485 | 
486 |                         while label_mask_true.sum() < target_num:
487 |                             num_before_grow = label_mask_true.sum()
488 |                             label_mask = cv2.dilate(label_mask, kernel=np.ones([5, 5]))
489 |                             label_mask_true = label_mask * label_mask_
490 |                             num_after_grow = label_mask_true.sum()
491 |                             if num_after_grow==num_before_grow: 
492 |                                 print("Initialise Another Seed for Growing!")
493 |                                 # The current region stop growing which means the very local area has been filled,
494 |                                 #  so we need to initiate another seed to keep it growing
495 |                                 uncovered_region_mask = label_mask_ - label_mask_true # pixels which are equal to 1 are un-sampled regions and belong to current class
496 |                                 label_idx = np.transpose(np.nonzero(uncovered_region_mask)) # Nx2
497 |                                 sample_ind = np.random.choice(label_idx.shape[0], 1, replace=False) # shape [1,]
498 |                                 label_idx_ = label_idx[sample_ind] # shape [1, 2]
499 |                                 label_mask[label_idx_[0, 0], label_idx_[0, 1]] = 1 
500 | 
501 |                         im_[label_mask_true.astype(bool)] = l
502 |                     click_semantic_map.append(im_)
503 | 
504 |                 click_semantic_map = np.asarray(click_semantic_map).astype(np.uint8)
505 |                 self.train_samples["semantic_remap"] = click_semantic_map
506 |                 print('Partial Label images with centroid sampling has completed.')
507 |             else:
508 |                 assert False
509 | 
510 |             if visualise_save:
511 |                 partial_sem_dir = os.path.join(self.semantic_class_dir, "partial_perc_{}".format(perc))
512 |                 if not os.path.exists(partial_sem_dir):
513 |                     os.makedirs(partial_sem_dir)
514 |                 colour_map_np = self.colour_map_np
515 |                 vis_partial_sem = []
516 |                 for i in range(self.train_num):
517 |                     vis_partial_semantic = colour_map_np[self.train_samples["semantic_remap"][i]] # [H, W, 3]
518 |                     imageio.imwrite(os.path.join(partial_sem_dir, "semantic_class_{}.png".format(i)), self.train_samples["semantic_remap"][i])
519 |                     imageio.imwrite(os.path.join(partial_sem_dir, "vis_sem_class_{}.png".format(i)), vis_partial_semantic)
520 |                     vis_partial_sem.append(vis_partial_semantic)
521 |             
522 |                 imageio.mimwrite(os.path.join(partial_sem_dir, 'partial_sem.mp4'), self.train_samples["semantic_remap"], fps=30, quality=8)
523 |                 imageio.mimwrite(os.path.join(partial_sem_dir, 'vis_partial_sem.mp4'), np.stack(vis_partial_sem, 0), fps=30, quality=8)
524 |         
525 |         else: # load saved single-click/partial semantics
526 |             saved_partial_sem_dir = os.path.join(self.semantic_class_dir, "partial_perc_{}".format(perc))
527 |             semantic_img_list = []
528 |             semantic_path_list = sorted(glob.glob(saved_partial_sem_dir + '/semantic_class_*.png'), key=lambda file_name: int(file_name.split("_")[-1][:-4]))
529 |             assert len(semantic_path_list)>0
530 |             for idx in range(self.train_num):
531 |                 semantic = imread(semantic_path_list[idx])
532 |                 semantic_img_list.append(semantic)
533 |             self.train_samples["semantic_remap"]  = np.asarray(semantic_img_list).astype(np.uint8)


--------------------------------------------------------------------------------