├── .gitignore ├── README.md ├── __init__.py ├── calibration.py ├── data ├── asymmetric_box.obj ├── asymmetric_box_annotated.glb ├── asymmetric_box_annotated.obj ├── asymmetric_box_rotated.obj ├── asymmetric_box_rotated.obj.mtl ├── snapshot00.png ├── snapshot01.png ├── snapshot02.png ├── snapshot03.png └── subsampled_asymmetric_structure_corrected.ply ├── example ├── KA03_Depth.pgm ├── KA04_Depth.pgm ├── KA05_Depth.pgm ├── KA06_Depth.pgm ├── KAX1_Depth.pgm ├── KAX2_Depth.pgm ├── input.pgm └── input_visualization.png ├── inference.py ├── main.py ├── pyrender_patch ├── camera.diff ├── constants.diff └── renderer.diff ├── requirements.txt └── src ├── __init__.py ├── dataset ├── __init__.py ├── box_pose_dataset.py ├── box_pose_dataset_factory.py ├── calibration_dataset.py ├── depthmap_dataset.py ├── depthmap_val_dataset.py ├── distance_unit.py ├── noise │ ├── __init__.py │ ├── noise.py │ └── noise_adder.py ├── real_dataloader.py ├── rendering │ ├── __init__.py │ ├── box_renderer.py │ └── transformations.py ├── round_robin_multidataset.py └── samplers │ ├── __init__.py │ ├── background │ ├── __init__.py │ ├── image_background_sampler.py │ └── noisy_background_generator.py │ ├── base_sampler.py │ ├── intrinsics_generator.py │ ├── pose │ ├── __init__.py │ ├── pose_sampler.py │ └── pose_sampler_utils.py │ └── random_sampler.py ├── io ├── __init__.py ├── box_model_loader.py ├── calibration_result.py ├── multidimentional_imsave.py └── plywrite.py ├── models ├── UNet_mask_max.py ├── __init__.py └── blocks.py ├── other ├── __init__.py ├── misc.py └── opt.py ├── supervision ├── __init__.py ├── losses.py └── metrics.py └── utils ├── __init__.py ├── box_collider.py ├── geometric.py ├── image_utils.py ├── projections.py ├── save_pointcloud.py ├── train_utils.py ├── transformations.py └── visualization.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | target/ 76 | 77 | # Jupyter Notebook 78 | .ipynb_checkpoints 79 | 80 | # IPython 81 | profile_default/ 82 | ipython_config.py 83 | 84 | # pyenv 85 | .python-version 86 | 87 | # pipenv 88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 91 | # install all needed dependencies. 92 | #Pipfile.lock 93 | 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 95 | __pypackages__/ 96 | 97 | # Celery stuff 98 | celerybeat-schedule 99 | celerybeat.pid 100 | 101 | # SageMath parsed files 102 | *.sage.py 103 | 104 | # Environments 105 | .env 106 | .venv 107 | env/ 108 | venv/ 109 | ENV/ 110 | env.bak/ 111 | venv.bak/ 112 | 113 | # Spyder project settings 114 | .spyderproject 115 | .spyproject 116 | 117 | # Rope project settings 118 | .ropeproject 119 | 120 | # mkdocs documentation 121 | /site 122 | 123 | # mypy 124 | .mypy_cache/ 125 | .dmypy.json 126 | dmypy.json 127 | 128 | # Pyre type checker 129 | .pyre/ 130 | 131 | .vscode/* 132 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Deep Soft Procrustes for Markerless Volumetric Sensor Alignment 2 | An easy to use depth sensor extrinsics calibration method. It is integrated and being used in a robust [Volumetric Capture](https://vcl3d.github.io/VolumetricCapture/) system. 3 | 4 | 8 | [![Paper](http://img.shields.io/badge/paper-arxiv.2003.10176-critical.svg?style=plastic)](https://arxiv.org/pdf/2003.10176.pdf) 9 | [![Conference](http://img.shields.io/badge/IEEEVR-2020-blue.svg?style=plastic)](http://ieeevr.org/2020/) 10 | [![Project Page](http://img.shields.io/badge/Project-Page-blueviolet.svg?style=plastic)](https://vcl3d.github.io/StructureNet/) 11 | 12 | ## Abstract 13 | 14 | With the advent of consumer grade depth sensors, low-cost volumetric capture systems are easier to deploy. Their wider adoption though depends on their usability and by extension on the practicality of spatially aligning multiple sensors. Most existing alignment approaches employ visual patterns, e.g. checkerboards, or markers and require high user involvement and technical knowledge. More user-friendly and easier-to-use approaches rely on markerless methods that exploit geometric patterns of a physical structure. However, current SoA approaches are bounded by restrictions in the placement and the number of sensors. In this work, we improve markerless data-driven correspondence estimation to achieve more robust and flexible multi-sensor spatial alignment. In particular, we incorporate geometric constraints in an end-to-end manner into a typical segmentation based model and bridge the intermediate dense classification task with the targeted pose estimation one. This is accomplished by a soft, differentiable procrustes analysis that regularizes the segmentation and achieves higher extrinsic calibration performance in expanded sensor placement configurations, while being unrestricted by the number of sensors of the volumetric capture system. Our model is experimentally shown to achieve similar results with marker-based methods and outperform the markerless ones, while also being robust to the pose variations of the calibration structure. 15 | 16 | 17 | 18 | ## Requirements 19 | - Python 3.6.7 20 | - [PyTorch 1.2 + cuda 9.2](https://pytorch.org/get-started/previous-versions/#v120) 21 | 22 | ## Installation 23 | (New python enviroment is highly recommended) 24 | - Install required packages with `pip install -r requirements.txt` 25 | **Only for training** 26 | - Install [tinyobjloader](https://github.com/tinyobjloader/tinyobjloader) by cloning/downloading this repository, navigate to python folder and run `python setup.py install` 27 | - Install our custom patch for disabling multisampling in pyrender 28 | - Download [UnixUtils](https://sourceforge.net/projects/unxutils/files/latest/download) and add the executable to path 29 | - Execute `patch.exe -u pyrender_patch/renderer.diff` 30 | - Execute `patch.exe -u pyrender_patch/constants.diff` 31 | - Execute `patch.exe -u pyrender_patch/camera.diff` 32 | 33 | ## Download the model 34 | We provide a pretrained model [here](https://drive.google.com/file/d/1e3nZRYTsNRD1Nn8_48tpFTmYa4EZdraz) for inference purposes. 35 | 36 | ## Inference 37 | In order to run our code, a pretrained model must be present either from a training or it can be downloaded [here](#download-the-model). 38 | Once every requirement is installed, simply rum `python inference.py [options...]` 39 | 40 | **Important options** 41 | 42 | `--input_path` : directory which contains depthmaps (in .pgm format) (see [example of input data](./example) ) 43 | 44 | `--output_path` : directory where results will be saved 45 | 46 | `--scale` : multiplication factor that converts depthmap data to meters 47 | 48 | `--saved_params_path` : path to the downloaded model 49 | 50 | In order to see all available options with a brief description, please execute `python inference.py -h` 51 | 52 | ## Training 53 | In order to train our model from scratch, one has to download backgrounds that are used in training time for augmentation. 54 | **TBD: upload and add links**. 55 | Once every requirement is installed and backgrounds are downloaded, it is time to train our model. 56 | Execute `python main.py -h` to see all available options. 57 | 58 | # Video 59 | A video explaining our method, accompanying our submission to IEEEVR 2020, can be found at https://www.youtube.com/watch?v=0l5neSMt-2Y 60 | 61 | # Citation 62 | If you use this code and/or models, please cite the following: 63 | ``` 64 | @inproceedings{sterzentsenko2020deepsoftprocrustes, 65 | title={Deep Soft Procrustes for Markerless Volumetric Sensor Alignment}, 66 | author={Vladimiros Sterzentsenko and Alexandros Doumanoglou and Spyridon Thermos and Nikolaos Zioulis and and Dimitrios Zarpalas and Petros Daras}, 67 | booktitle={2020 IEEE Conference on Virtual Reality and 3D User Interfaces (VR)}, 68 | year={2020}, 69 | organization={IEEE} 70 | } 71 | ``` 72 | -------------------------------------------------------------------------------- /__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VCL3D/StructureNet/d23e13d24fb18861d84371c651a510f8a83d78a0/__init__.py -------------------------------------------------------------------------------- /calibration.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torchvision.utils as tu 3 | import cv2 4 | import numpy as np 5 | import os 6 | import sys 7 | import argparse 8 | import src.models as models 9 | from src.utils.geometric import ExtrinsicsCalculator, BoxRenderFlags 10 | from src.utils.save_pointcloud import save_ply 11 | 12 | 13 | 14 | STATIC_IMAGE_SIZE = (180, 320) # (height,width) 15 | STATIC_BOX_FLAG = BoxRenderFlags.LABEL_DOWN_AS_BACKGROUND 16 | STATIC_DEVICE = 'cpu' 17 | 18 | def parse_arguments(args): 19 | usage_text = ( 20 | "Calibration script." 21 | "Usage: python calibration.py [options]," 22 | " with [options]:" 23 | ) 24 | parser = argparse.ArgumentParser(description=usage_text) 25 | parser.add_argument("-d","--depth", type = str, help = "Path to depthmap", required = True) 26 | parser.add_argument("-m","--model_path", type = str, help = "Path to saved model params", required = True) 27 | parser.add_argument("-o","--save_path", type = str, help = "Path to save results", required = True) 28 | parser.add_argument("-b","--box_path", type = str, help = "Path to box", default = r"data/asymmetric_box.obj") 29 | parser.add_argument("-s","--scale", type = float, help = "Factor that converts depthmap to meters") 30 | parser.add_argument("-i","--intrinsics", nargs=4, metavar=('fx', 'cx', 'fy', 'cy',), 31 | help="camera instrinsic factors", type=float, 32 | default=None) 33 | return parser.parse_known_args(args) 34 | 35 | def align( 36 | model : torch.nn.Module, 37 | depthmap : torch.Tensor, 38 | intrinsics : torch.Tensor, 39 | box_path : str, 40 | device : str, 41 | save_path : str, 42 | box_flag : BoxRenderFlags = STATIC_BOX_FLAG, 43 | confidence : float = 0.75, 44 | ) -> None: 45 | os.makedirs(save_path, exist_ok=True) 46 | predictions = model(depthmap)[1] 47 | _, nclasses, height, width = predictions.shape 48 | 49 | labels = predictions.argmax(dim = 1, keepdim = True) 50 | one_hot = torch.nn.functional.one_hot(labels.squeeze(),num_classes = nclasses).permute(2,0,1).unsqueeze(0) 51 | 52 | extrinsics_calculator = ExtrinsicsCalculator(box_path, device, box_flag) 53 | 54 | extrinsics, _, pointclouds = extrinsics_calculator.forward(depthmap, one_hot, intrinsics) 55 | extrinsics = extrinsics.squeeze().numpy().T 56 | pointclouds = pointclouds[0].permute(1,2,0).reshape(-1,3).numpy() 57 | save_ply(os.path.join(save_path, "original.ply"),pointclouds , scale = 1) 58 | pcloud_homo = np.concatenate([pointclouds, np.ones((height * width, 1))], axis = 1) 59 | transformed_pcloud = pcloud_homo.dot(extrinsics) 60 | save_ply(os.path.join(save_path, "transformed.ply"),transformed_pcloud[:,:3], scale = 1) 61 | np.savetxt(os.path.join(save_path, "extrinsics.txt"), extrinsics) 62 | print(extrinsics) 63 | 64 | 65 | 66 | def loadModel( 67 | path_to_model : str, 68 | device : str 69 | ) -> torch.nn.Module: 70 | print("Loading previously saved model from {}".format(path_to_model)) 71 | checkpoint = torch.load(path_to_model) 72 | model_params = { 73 | 'width': 320, 74 | 'height': 180, 75 | 'ndf': 32, 76 | 'upsample_type': "nearest", 77 | } 78 | 79 | 80 | model_name = checkpoint['model_name'] 81 | if 'nclasses' in checkpoint: 82 | nclasses = checkpoint['nclasses'] 83 | 84 | if 'ndf' in checkpoint: 85 | model_params['ndf'] = checkpoint['ndf'] 86 | 87 | model_params['nclasses'] = nclasses 88 | model = models.get_UNet_model(model_name, model_params) 89 | model.load_state_dict(checkpoint['state_dict']) 90 | model.to(device) 91 | model.eval() 92 | return model 93 | 94 | def loadData( 95 | path_to_depthmap : str, 96 | scale : float 97 | ) -> torch.Tensor: 98 | depth_np = cv2.imread(path_to_depthmap, -1).astype(np.float32) 99 | depth_np = cv2.resize(depth_np, STATIC_IMAGE_SIZE[::-1], interpolation=cv2.INTER_NEAREST) 100 | depth_t = torch.from_numpy(depth_np).unsqueeze(0).unsqueeze(0) / scale 101 | return depth_t 102 | 103 | if __name__ == "__main__": 104 | args, _ = parse_arguments(sys.argv) 105 | intrinsics = torch.FloatTensor([ 106 | args.intrinsics[0], 107 | 0.0, 108 | args.intrinsics[1], 109 | 0.0, 110 | args.intrinsics[2], 111 | args.intrinsics[3], 112 | 0.0, 113 | 0.0, 114 | 1.0 115 | ]).view((3,3)).unsqueeze(0) 116 | 117 | model = loadModel(args.model_path,STATIC_DEVICE) 118 | depthmap = loadData(args.depth, args.scale) 119 | 120 | align( 121 | model, 122 | depthmap, 123 | intrinsics, 124 | args.box_path, 125 | STATIC_DEVICE, 126 | args.save_path 127 | ) 128 | 129 | -------------------------------------------------------------------------------- /data/asymmetric_box.obj: -------------------------------------------------------------------------------- 1 | # Blender v2.78 (sub 0) OBJ File: '' 2 | # www.blender.org 3 | mtllib SemanticAsymmetricCalibrationNew.mtl 4 | o mid_bottom_front_2f 5 | v 50.000397 -165.000549 -0.000796 6 | v 50.000427 390 -0.000583 7 | v 50.000397 -165.000396 -405.000305 8 | v 50.000427 390.999604 -405.000092 9 | vn 1.0000 -0.0000 -0.0000 10 | usemtl material_0 11 | s 1 12 | f 3//1 4//1 2//1 13 | f 2//1 1//1 3//1 14 | o mid_bottom_back_2b 15 | v -279.999908 -165.000762 -0.000796 16 | v -279.999847 390 -405.000305 17 | v -279.999908 -165.000762 -405.000305 18 | v -279.999847 390 -0.000583 19 | vn -1.0000 0.0000 0.0000 20 | usemtl material_0 21 | s 1 22 | f 7//2 5//2 8//2 23 | f 8//2 6//2 7//2 24 | o mid_bottom_down_2d 25 | v -279.999847 390 -405.000305 26 | v 50.000397 -165.000396 -405.000305 27 | v -279.999908 -165.00061 -405.000305 28 | v 50.000427 390 -405.000305 29 | vn 0.0000 0.0000 -1.0000 30 | usemtl material_0 31 | s 1 32 | f 9//3 10//3 11//3 33 | f 10//3 9//3 12//3 34 | o mid_bottom_up_2u 35 | v 50.000397 -165.000549 -0.000796 36 | v 50.000427 390 -0.000583 37 | v -279.999908 -165.000762 -0.000796 38 | v -279.999847 390 -0.000583 39 | vn 0.0000 -0.0000 1.0000 40 | usemtl material_0 41 | s 1 42 | f 13//4 14//4 15//4 43 | f 16//4 15//4 14//4 44 | o mid_bottom_right_2r 45 | v 50.000397 -165.000549 -0.000796 46 | v -279.999908 -165.000762 -0.000796 47 | v 50.000397 -165.000396 -405.000305 48 | v -279.999908 -165.00061 -405.000305 49 | vn 0.0000 -1.0000 -0.0000 50 | usemtl material_0 51 | s 1 52 | f 19//5 18//5 20//5 53 | f 19//5 17//5 18//5 54 | o mid_bottom_left_2l 55 | v 50.000427 390 -0.000583 56 | v -279.999847 390 -405.000305 57 | v 50.000427 390 -405.000305 58 | v -279.999847 390 -0.000583 59 | vn -0.0000 1.0000 0.0000 60 | usemtl material_0 61 | s 1 62 | f 24//6 21//6 23//6 63 | f 23//6 22//6 24//6 64 | o mid_top_right_3r 65 | v -505 -165.000549 405.000061 66 | v 50.000427 -165.000549 405.000061 67 | v -505 -165.000488 0.000554 68 | v 50.000427 -165.000488 0.000554 69 | vn 0.0000 -1.0000 -0.0000 70 | usemtl material_0 71 | s 1 72 | f 27//7 28//7 26//7 73 | f 26//7 25//7 27//7 74 | o mid_top_left_3l 75 | v -505 164.999451 405.000122 76 | v 50.000427 164.999512 0.000608 77 | v -505 164.999512 0.000608 78 | v 50.000427 164.999451 405.000122 79 | vn 0.0000 1.0000 0.0000 80 | usemtl material_0 81 | s 1 82 | f 31//8 29//8 32//8 83 | f 32//8 30//8 31//8 84 | o mid_top_down_3d 85 | v 50.000427 164.999512 0.000608 86 | v -505 -165.000488 0.000554 87 | v -505 164.999512 0.000608 88 | v 50.000427 -165.000488 0.000554 89 | vn 0.0000 0.0000 -1.0000 90 | usemtl material_0 91 | s 1 92 | f 33//9 34//9 35//9 93 | f 34//9 33//9 36//9 94 | o mid_top_up_3u 95 | v -505 -165.000549 405.000061 96 | v 50.000427 -165.000549 405.000061 97 | v -505 164.999451 405.000122 98 | v 50.000427 164.999451 405.000122 99 | vn 0.0000 -0.0000 1.0000 100 | usemtl material_0 101 | s 1 102 | f 37//10 38//10 39//10 103 | f 40//10 39//10 38//10 104 | o mid_top_front_3f 105 | v 50.000427 -165.000549 405.000061 106 | v 50.000427 164.999512 0.000608 107 | v 50.000427 -165.000488 0.000554 108 | v 50.000427 164.999451 405.000122 109 | vn 1.0000 0.0000 0.0000 110 | usemtl material_0 111 | s 1 112 | f 44//11 41//11 43//11 113 | f 43//11 42//11 44//11 114 | o mid_top_back_3b 115 | v -505 -165.000549 405.000061 116 | v -505 164.999451 405.000122 117 | v -505 -165.000488 0.000554 118 | v -505 164.999512 0.000608 119 | vn -1.0000 -0.0000 0.0000 120 | usemtl material_0 121 | s 1 122 | f 47//12 46//12 48//12 123 | f 47//12 45//12 46//12 124 | o bottom_right_1r 125 | v -279.999969 -165.000427 -405.000305 126 | v 275 -165.000427 -405.000305 127 | v -279.999969 -165.000351 -810 128 | v 275 -165.000351 -810 129 | vn 0.0000 -1.0000 -0.0000 130 | usemtl material_0 131 | s 1 132 | f 51//13 52//13 50//13 133 | f 50//13 49//13 51//13 134 | o bottom_up_1u 135 | v -279.999969 -165.000427 -405.000305 136 | v 275 -165.000427 -405.000305 137 | v -279.999969 164.999573 -405.000305 138 | v 275 164.999573 -405.000305 139 | vn 0.0000 -0.0000 1.0000 140 | usemtl material_0 141 | s 1 142 | f 53//14 54//14 55//14 143 | f 56//14 55//14 54//14 144 | o bottom_back_1b 145 | v -279.999969 -165.000427 -405.000305 146 | v -279.999969 164.999573 -405.000305 147 | v -279.999969 -165.000351 -810 148 | v -279.999969 164.999649 -810 149 | vn -1.0000 0.0000 0.0000 150 | usemtl material_0 151 | s 1 152 | f 59//15 58//15 60//15 153 | f 59//15 57//15 58//15 154 | o bottom_front_1f 155 | v 275 -165.000427 -405.000305 156 | v 275 164.999649 -810 157 | v 275 -165.000351 -810 158 | v 275 164.999573 -405.000305 159 | vn 1.0000 0.0000 0.0000 160 | usemtl material_0 161 | s 1 162 | f 64//16 61//16 63//16 163 | f 63//16 62//16 64//16 164 | o bottom_down_1d 165 | v 275 164.999649 -810 166 | v -279.999969 -165.000351 -810 167 | v -279.999969 164.999649 -810 168 | v 275 -165.000351 -810 169 | vn 0.0000 0.0000 -1.0000 170 | usemtl material_0 171 | s 1 172 | f 65//17 66//17 67//17 173 | f 66//17 65//17 68//17 174 | o bottom_left_1l 175 | v -279.999969 164.999573 -405.000305 176 | v 275 164.999649 -810 177 | v -279.999969 164.999649 -810 178 | v 275 164.999573 -405.000305 179 | vn 0.0000 1.0000 0.0000 180 | usemtl material_0 181 | s 1 182 | f 71//18 69//18 72//18 183 | f 72//18 70//18 71//18 184 | o top_front_4f 185 | v 50.000427 -390 810 186 | v 50.000458 165.000504 810 187 | v 50.000427 -390 405 188 | v 50.000458 165.000702 405 189 | vn 1.0000 -0.0000 -0.0000 190 | usemtl material_0 191 | s 1 192 | f 75//19 76//19 74//19 193 | f 74//19 73//19 75//19 194 | o top_down_4d 195 | v -279.999817 165.000458 405.000305 196 | v 50.000427 -390 405.000092 197 | v -279.999878 -390 405.000092 198 | v 50.000458 165.000702 405.000305 199 | vn -0.0000 0.0000 -1.0000 200 | usemtl material_0 201 | s 1 202 | f 77//20 78//20 79//20 203 | f 78//20 77//20 80//20 204 | o top_right_4r 205 | v 50.000427 -390 810 206 | v -279.999878 -390 810 207 | v 50.000427 -390 405.000092 208 | v -279.999878 -390 405.000092 209 | vn 0.0000 -1.0000 -0.0000 210 | usemtl material_0 211 | s 1 212 | f 83//21 82//21 84//21 213 | f 83//21 81//21 82//21 214 | o top_up_4u 215 | v 50.000427 -390 810 216 | v 50.000458 165.000504 810 217 | v -279.999878 -390 810 218 | v -279.999817 165.000259 810 219 | vn 0.0000 -0.0000 1.0000 220 | usemtl material_0 221 | s 1 222 | f 85//22 86//22 87//22 223 | f 88//22 87//22 86//22 224 | o top_back_4b 225 | v -279.999878 -390 810 226 | v -279.999817 165.000458 405.000305 227 | v -279.999878 -390 405.000092 228 | v -279.999817 165.000259 810 229 | vn -1.0000 0.0000 0.0000 230 | usemtl material_0 231 | s 1 232 | f 91//23 89//23 92//23 233 | f 92//23 90//23 91//23 234 | o top_left_4l 235 | v 50.000458 165.000504 810 236 | v -279.999817 165.000458 405.000305 237 | v 50.000458 165.000702 405.000305 238 | v -279.999817 165.000259 810 239 | vn -0.0000 1.0000 0.0000 240 | usemtl material_0 241 | s 1 242 | f 96//24 93//24 95//24 243 | f 95//24 94//24 96//24 244 | -------------------------------------------------------------------------------- /data/asymmetric_box_annotated.glb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VCL3D/StructureNet/d23e13d24fb18861d84371c651a510f8a83d78a0/data/asymmetric_box_annotated.glb -------------------------------------------------------------------------------- /data/asymmetric_box_annotated.obj: -------------------------------------------------------------------------------- 1 | # Blender v2.78 (sub 0) OBJ File: '' 2 | # www.blender.org 3 | mtllib SemanticAsymmetricCalibrationNew.mtl 4 | o mid_bottom_front_2f 5 | v 50.000397 -165.000549 -0.000796 6 | v 50.000427 394.999421 -0.000583 7 | v 50.000397 -165.000396 -410.000305 8 | v 50.000427 394.999604 -410.000092 9 | vn 1.0000 -0.0000 -0.0000 10 | usemtl material_0 11 | s 1 12 | f 3//1 4//1 2//1 13 | f 2//1 1//1 3//1 14 | o mid_bottom_back_2b 15 | v -279.999908 -165.000762 -0.000796 16 | v -279.999847 394.99936 -410.000092 17 | v -279.999908 -165.000762 -410.000305 18 | v -279.999847 394.99936 -0.000583 19 | vn -1.0000 0.0000 0.0000 20 | usemtl material_0 21 | s 1 22 | f 7//2 5//2 8//2 23 | f 8//2 6//2 7//2 24 | o mid_bottom_down_2d 25 | v -279.999847 394.99936 -410.000092 26 | v 50.000397 -165.000396 -410.000305 27 | v -279.999908 -165.00061 -410.000305 28 | v 50.000427 394.999604 -410.000092 29 | vn 0.0000 0.0000 -1.0000 30 | usemtl material_0 31 | s 1 32 | f 9//3 10//3 11//3 33 | f 10//3 9//3 12//3 34 | o mid_bottom_up_2u 35 | v 50.000397 -165.000549 -0.000796 36 | v 50.000427 394.999421 -0.000583 37 | v -279.999908 -165.000762 -0.000796 38 | v -279.999847 394.999177 -0.000583 39 | vn 0.0000 -0.0000 1.0000 40 | usemtl material_0 41 | s 1 42 | f 13//4 14//4 15//4 43 | f 16//4 15//4 14//4 44 | o mid_bottom_right_2r 45 | v 50.000397 -165.000549 -0.000796 46 | v -279.999908 -165.000762 -0.000796 47 | v 50.000397 -165.000396 -410.000305 48 | v -279.999908 -165.00061 -410.000305 49 | vn 0.0000 -1.0000 -0.0000 50 | usemtl material_0 51 | s 1 52 | f 19//5 18//5 20//5 53 | f 19//5 17//5 18//5 54 | o mid_bottom_left_2l 55 | v 50.000427 394.999421 -0.000583 56 | v -279.999847 394.99936 -410.000092 57 | v 50.000427 394.999604 -410.000092 58 | v -279.999847 394.999177 -0.000583 59 | vn -0.0000 1.0000 0.0000 60 | usemtl material_0 61 | s 1 62 | f 24//6 21//6 23//6 63 | f 23//6 22//6 24//6 64 | o mid_top_right_3r 65 | v -509.999573 -165.000549 410.000061 66 | v 50.000427 -165.000549 410.000061 67 | v -509.999573 -165.000488 0.000554 68 | v 50.000427 -165.000488 0.000554 69 | vn 0.0000 -1.0000 -0.0000 70 | usemtl material_0 71 | s 1 72 | f 27//7 28//7 26//7 73 | f 26//7 25//7 27//7 74 | o mid_top_left_3l 75 | v -509.999573 164.999451 410.000122 76 | v 50.000427 164.999512 0.000608 77 | v -509.999573 164.999512 0.000608 78 | v 50.000427 164.999451 410.000122 79 | vn 0.0000 1.0000 0.0000 80 | usemtl material_0 81 | s 1 82 | f 31//8 29//8 32//8 83 | f 32//8 30//8 31//8 84 | o mid_top_down_3d 85 | v 50.000427 164.999512 0.000608 86 | v -509.999573 -165.000488 0.000554 87 | v -509.999573 164.999512 0.000608 88 | v 50.000427 -165.000488 0.000554 89 | vn 0.0000 0.0000 -1.0000 90 | usemtl material_0 91 | s 1 92 | f 33//9 34//9 35//9 93 | f 34//9 33//9 36//9 94 | o mid_top_up_3u 95 | v -509.999573 -165.000549 410.000061 96 | v 50.000427 -165.000549 410.000061 97 | v -509.999573 164.999451 410.000122 98 | v 50.000427 164.999451 410.000122 99 | vn 0.0000 -0.0000 1.0000 100 | usemtl material_0 101 | s 1 102 | f 37//10 38//10 39//10 103 | f 40//10 39//10 38//10 104 | o mid_top_front_3f 105 | v 50.000427 -165.000549 410.000061 106 | v 50.000427 164.999512 0.000608 107 | v 50.000427 -165.000488 0.000554 108 | v 50.000427 164.999451 410.000122 109 | vn 1.0000 0.0000 0.0000 110 | usemtl material_0 111 | s 1 112 | f 44//11 41//11 43//11 113 | f 43//11 42//11 44//11 114 | o mid_top_back_3b 115 | v -509.999573 -165.000549 410.000061 116 | v -509.999573 164.999451 410.000122 117 | v -509.999573 -165.000488 0.000554 118 | v -509.999573 164.999512 0.000608 119 | vn -1.0000 -0.0000 0.0000 120 | usemtl material_0 121 | s 1 122 | f 47//12 46//12 48//12 123 | f 47//12 45//12 46//12 124 | o bottom_right_1r 125 | v -279.999969 -165.000427 -410.000336 126 | v 280.000031 -165.000427 -410.000336 127 | v -279.999969 -165.000351 -819.999817 128 | v 280.000031 -165.000351 -819.999817 129 | vn 0.0000 -1.0000 -0.0000 130 | usemtl material_0 131 | s 1 132 | f 51//13 52//13 50//13 133 | f 50//13 49//13 51//13 134 | o bottom_up_1u 135 | v -279.999969 -165.000427 -410.000336 136 | v 280.000031 -165.000427 -410.000336 137 | v -279.999969 164.999573 -410.000275 138 | v 280.000031 164.999573 -410.000275 139 | vn 0.0000 -0.0000 1.0000 140 | usemtl material_0 141 | s 1 142 | f 53//14 54//14 55//14 143 | f 56//14 55//14 54//14 144 | o bottom_back_1b 145 | v -279.999969 -165.000427 -410.000336 146 | v -279.999969 164.999573 -410.000275 147 | v -279.999969 -165.000351 -819.999817 148 | v -279.999969 164.999649 -819.999817 149 | vn -1.0000 0.0000 0.0000 150 | usemtl material_0 151 | s 1 152 | f 59//15 58//15 60//15 153 | f 59//15 57//15 58//15 154 | o bottom_front_1f 155 | v 280.000031 -165.000427 -410.000336 156 | v 280.000031 164.999649 -819.999817 157 | v 280.000031 -165.000351 -819.999817 158 | v 280.000031 164.999573 -410.000275 159 | vn 1.0000 0.0000 0.0000 160 | usemtl material_0 161 | s 1 162 | f 64//16 61//16 63//16 163 | f 63//16 62//16 64//16 164 | o bottom_down_1d 165 | v 280.000031 164.999649 -819.999817 166 | v -279.999969 -165.000351 -819.999817 167 | v -279.999969 164.999649 -819.999817 168 | v 280.000031 -165.000351 -819.999817 169 | vn 0.0000 0.0000 -1.0000 170 | usemtl material_0 171 | s 1 172 | f 65//17 66//17 67//17 173 | f 66//17 65//17 68//17 174 | o bottom_left_1l 175 | v -279.999969 164.999573 -410.000275 176 | v 280.000031 164.999649 -819.999817 177 | v -279.999969 164.999649 -819.999817 178 | v 280.000031 164.999573 -410.000275 179 | vn 0.0000 1.0000 0.0000 180 | usemtl material_0 181 | s 1 182 | f 71//18 69//18 72//18 183 | f 72//18 70//18 71//18 184 | o top_front_4f 185 | v 50.000427 -394.999451 819.999573 186 | v 50.000458 165.000504 819.999756 187 | v 50.000427 -394.999298 410.000092 188 | v 50.000458 165.000702 410.000305 189 | vn 1.0000 -0.0000 -0.0000 190 | usemtl material_0 191 | s 1 192 | f 75//19 76//19 74//19 193 | f 74//19 73//19 75//19 194 | o top_down_4d 195 | v -279.999817 165.000458 410.000305 196 | v 50.000427 -394.999298 410.000092 197 | v -279.999878 -394.999512 410.000092 198 | v 50.000458 165.000702 410.000305 199 | vn -0.0000 0.0000 -1.0000 200 | usemtl material_0 201 | s 1 202 | f 77//20 78//20 79//20 203 | f 78//20 77//20 80//20 204 | o top_right_4r 205 | v 50.000427 -394.999451 819.999573 206 | v -279.999878 -394.999664 819.999573 207 | v 50.000427 -394.999298 410.000092 208 | v -279.999878 -394.999512 410.000092 209 | vn 0.0000 -1.0000 -0.0000 210 | usemtl material_0 211 | s 1 212 | f 83//21 82//21 84//21 213 | f 83//21 81//21 82//21 214 | o top_up_4u 215 | v 50.000427 -394.999451 819.999573 216 | v 50.000458 165.000504 819.999756 217 | v -279.999878 -394.999664 819.999573 218 | v -279.999817 165.000259 819.999756 219 | vn 0.0000 -0.0000 1.0000 220 | usemtl material_0 221 | s 1 222 | f 85//22 86//22 87//22 223 | f 88//22 87//22 86//22 224 | o top_back_4b 225 | v -279.999878 -394.999664 819.999573 226 | v -279.999817 165.000458 410.000305 227 | v -279.999878 -394.999512 410.000092 228 | v -279.999817 165.000259 819.999756 229 | vn -1.0000 0.0000 0.0000 230 | usemtl material_0 231 | s 1 232 | f 91//23 89//23 92//23 233 | f 92//23 90//23 91//23 234 | o top_left_4l 235 | v 50.000458 165.000504 819.999756 236 | v -279.999817 165.000458 410.000305 237 | v 50.000458 165.000702 410.000305 238 | v -279.999817 165.000259 819.999756 239 | vn -0.0000 1.0000 0.0000 240 | usemtl material_0 241 | s 1 242 | f 96//24 93//24 95//24 243 | f 95//24 94//24 96//24 244 | -------------------------------------------------------------------------------- /data/asymmetric_box_rotated.obj: -------------------------------------------------------------------------------- 1 | #### 2 | # 3 | # OBJ File Generated by Meshlab 4 | # 5 | #### 6 | # Object asymmetric_box_rotated.obj 7 | # 8 | # Vertices: 96 9 | # Faces: 48 10 | # 11 | #### 12 | mtllib ./asymmetric_box_rotated.obj.mtl 13 | 14 | vn 1.000000 0.000000 -0.000000 15 | v 50.000397 0.000796 -165.000549 0.752941 0.752941 0.752941 16 | vn 1.000000 0.000000 -0.000000 17 | v 50.000427 0.000583 394.999420 0.752941 0.752941 0.752941 18 | vn 1.000000 0.000000 -0.000000 19 | v 50.000397 410.000305 -165.000397 0.752941 0.752941 0.752941 20 | vn 1.000000 0.000000 -0.000000 21 | v 50.000427 410.000092 394.999603 0.752941 0.752941 0.752941 22 | vn -1.000000 0.000000 0.000000 23 | v -279.999908 0.000796 -165.000763 0.752941 0.752941 0.752941 24 | vn -1.000000 0.000000 0.000000 25 | v -279.999847 410.000092 394.999359 0.752941 0.752941 0.752941 26 | vn -1.000000 0.000000 0.000000 27 | v -279.999908 410.000305 -165.000763 0.752941 0.752941 0.752941 28 | vn -1.000000 0.000000 0.000000 29 | v -279.999847 0.000583 394.999359 0.752941 0.752941 0.752941 30 | vn -0.000000 1.000000 0.000000 31 | v -279.999847 410.000092 394.999359 0.752941 0.752941 0.752941 32 | vn -0.000000 1.000000 0.000000 33 | v 50.000397 410.000305 -165.000397 0.752941 0.752941 0.752941 34 | vn -0.000000 1.000000 0.000000 35 | v -279.999908 410.000305 -165.000610 0.752941 0.752941 0.752941 36 | vn -0.000000 1.000000 0.000000 37 | v 50.000427 410.000092 394.999603 0.752941 0.752941 0.752941 38 | vn 0.000000 -1.000000 -0.000000 39 | v 50.000397 0.000796 -165.000549 0.752941 0.752941 0.752941 40 | vn 0.000000 -1.000000 -0.000000 41 | v 50.000427 0.000583 394.999420 0.752941 0.752941 0.752941 42 | vn 0.000000 -1.000000 -0.000000 43 | v -279.999908 0.000796 -165.000763 0.752941 0.752941 0.752941 44 | vn 0.000000 -1.000000 -0.000000 45 | v -279.999847 0.000583 394.999176 0.752941 0.752941 0.752941 46 | vn 0.000001 0.000000 -1.000000 47 | v 50.000397 0.000796 -165.000549 0.752941 0.752941 0.752941 48 | vn 0.000001 0.000000 -1.000000 49 | v -279.999908 0.000796 -165.000763 0.752941 0.752941 0.752941 50 | vn 0.000001 0.000000 -1.000000 51 | v 50.000397 410.000305 -165.000397 0.752941 0.752941 0.752941 52 | vn 0.000001 0.000000 -1.000000 53 | v -279.999908 410.000305 -165.000610 0.752941 0.752941 0.752941 54 | vn -0.000001 -0.000000 1.000000 55 | v 50.000427 0.000583 394.999420 0.752941 0.752941 0.752941 56 | vn -0.000001 -0.000000 1.000000 57 | v -279.999847 410.000092 394.999359 0.752941 0.752941 0.752941 58 | vn -0.000001 -0.000000 1.000000 59 | v 50.000427 410.000092 394.999603 0.752941 0.752941 0.752941 60 | vn -0.000001 -0.000000 1.000000 61 | v -279.999847 0.000583 394.999176 0.752941 0.752941 0.752941 62 | vn 0.000000 0.000000 -1.000000 63 | v -509.999573 -410.000061 -165.000549 0.752941 0.752941 0.752941 64 | vn 0.000000 0.000000 -1.000000 65 | v 50.000427 -410.000061 -165.000549 0.752941 0.752941 0.752941 66 | vn 0.000000 0.000000 -1.000000 67 | v -509.999573 -0.000554 -165.000488 0.752941 0.752941 0.752941 68 | vn 0.000000 0.000000 -1.000000 69 | v 50.000427 -0.000554 -165.000488 0.752941 0.752941 0.752941 70 | vn 0.000000 -0.000000 1.000000 71 | v -509.999573 -410.000122 164.999451 0.752941 0.752941 0.752941 72 | vn 0.000000 -0.000000 1.000000 73 | v 50.000427 -0.000608 164.999512 0.752941 0.752941 0.752941 74 | vn 0.000000 -0.000000 1.000000 75 | v -509.999573 -0.000608 164.999512 0.752941 0.752941 0.752941 76 | vn 0.000000 -0.000000 1.000000 77 | v 50.000427 -410.000122 164.999451 0.752941 0.752941 0.752941 78 | vn 0.000000 1.000000 0.000000 79 | v 50.000427 -0.000608 164.999512 0.752941 0.752941 0.752941 80 | vn 0.000000 1.000000 0.000000 81 | v -509.999573 -0.000554 -165.000488 0.752941 0.752941 0.752941 82 | vn 0.000000 1.000000 0.000000 83 | v -509.999573 -0.000608 164.999512 0.752941 0.752941 0.752941 84 | vn 0.000000 1.000000 0.000000 85 | v 50.000427 -0.000554 -165.000488 0.752941 0.752941 0.752941 86 | vn 0.000000 -1.000000 -0.000000 87 | v -509.999573 -410.000061 -165.000549 0.752941 0.752941 0.752941 88 | vn 0.000000 -1.000000 -0.000000 89 | v 50.000427 -410.000061 -165.000549 0.752941 0.752941 0.752941 90 | vn 0.000000 -1.000000 -0.000000 91 | v -509.999573 -410.000122 164.999451 0.752941 0.752941 0.752941 92 | vn 0.000000 -1.000000 -0.000000 93 | v 50.000427 -410.000122 164.999451 0.752941 0.752941 0.752941 94 | vn 1.000000 0.000000 0.000000 95 | v 50.000427 -410.000061 -165.000549 0.752941 0.752941 0.752941 96 | vn 1.000000 0.000000 0.000000 97 | v 50.000427 -0.000608 164.999512 0.752941 0.752941 0.752941 98 | vn 1.000000 0.000000 0.000000 99 | v 50.000427 -0.000554 -165.000488 0.752941 0.752941 0.752941 100 | vn 1.000000 0.000000 0.000000 101 | v 50.000427 -410.000122 164.999451 0.752941 0.752941 0.752941 102 | vn -1.000000 0.000000 0.000000 103 | v -509.999573 -410.000061 -165.000549 0.752941 0.752941 0.752941 104 | vn -1.000000 0.000000 0.000000 105 | v -509.999573 -410.000122 164.999451 0.752941 0.752941 0.752941 106 | vn -1.000000 0.000000 0.000000 107 | v -509.999573 -0.000554 -165.000488 0.752941 0.752941 0.752941 108 | vn -1.000000 0.000000 0.000000 109 | v -509.999573 -0.000608 164.999512 0.752941 0.752941 0.752941 110 | vn 0.000000 0.000000 -1.000000 111 | v -279.999969 410.000336 -165.000427 0.752941 0.752941 0.752941 112 | vn 0.000000 0.000000 -1.000000 113 | v 280.000031 410.000336 -165.000427 0.752941 0.752941 0.752941 114 | vn 0.000000 0.000000 -1.000000 115 | v -279.999969 819.999817 -165.000351 0.752941 0.752941 0.752941 116 | vn 0.000000 0.000000 -1.000000 117 | v 280.000031 819.999817 -165.000351 0.752941 0.752941 0.752941 118 | vn 0.000000 -1.000000 -0.000000 119 | v -279.999969 410.000336 -165.000427 0.752941 0.752941 0.752941 120 | vn 0.000000 -1.000000 -0.000000 121 | v 280.000031 410.000336 -165.000427 0.752941 0.752941 0.752941 122 | vn 0.000000 -1.000000 -0.000000 123 | v -279.999969 410.000275 164.999573 0.752941 0.752941 0.752941 124 | vn 0.000000 -1.000000 -0.000000 125 | v 280.000031 410.000275 164.999573 0.752941 0.752941 0.752941 126 | vn -1.000000 0.000000 0.000000 127 | v -279.999969 410.000336 -165.000427 0.752941 0.752941 0.752941 128 | vn -1.000000 0.000000 0.000000 129 | v -279.999969 410.000275 164.999573 0.752941 0.752941 0.752941 130 | vn -1.000000 0.000000 0.000000 131 | v -279.999969 819.999817 -165.000351 0.752941 0.752941 0.752941 132 | vn -1.000000 0.000000 0.000000 133 | v -279.999969 819.999817 164.999649 0.752941 0.752941 0.752941 134 | vn 1.000000 0.000000 0.000000 135 | v 280.000031 410.000336 -165.000427 0.752941 0.752941 0.752941 136 | vn 1.000000 0.000000 0.000000 137 | v 280.000031 819.999817 164.999649 0.752941 0.752941 0.752941 138 | vn 1.000000 0.000000 0.000000 139 | v 280.000031 819.999817 -165.000351 0.752941 0.752941 0.752941 140 | vn 1.000000 0.000000 0.000000 141 | v 280.000031 410.000275 164.999573 0.752941 0.752941 0.752941 142 | vn 0.000000 1.000000 0.000000 143 | v 280.000031 819.999817 164.999649 0.752941 0.752941 0.752941 144 | vn 0.000000 1.000000 0.000000 145 | v -279.999969 819.999817 -165.000351 0.752941 0.752941 0.752941 146 | vn 0.000000 1.000000 0.000000 147 | v -279.999969 819.999817 164.999649 0.752941 0.752941 0.752941 148 | vn 0.000000 1.000000 0.000000 149 | v 280.000031 819.999817 -165.000351 0.752941 0.752941 0.752941 150 | vn 0.000000 -0.000000 1.000000 151 | v -279.999969 410.000275 164.999573 0.752941 0.752941 0.752941 152 | vn 0.000000 -0.000000 1.000000 153 | v 280.000031 819.999817 164.999649 0.752941 0.752941 0.752941 154 | vn 0.000000 -0.000000 1.000000 155 | v -279.999969 819.999817 164.999649 0.752941 0.752941 0.752941 156 | vn 0.000000 -0.000000 1.000000 157 | v 280.000031 410.000275 164.999573 0.752941 0.752941 0.752941 158 | vn 1.000000 0.000000 -0.000000 159 | v 50.000427 -819.999573 -394.999451 0.752941 0.752941 0.752941 160 | vn 1.000000 0.000000 -0.000000 161 | v 50.000458 -819.999756 165.000504 0.752941 0.752941 0.752941 162 | vn 1.000000 0.000000 -0.000000 163 | v 50.000427 -410.000092 -394.999298 0.752941 0.752941 0.752941 164 | vn 1.000000 0.000000 -0.000000 165 | v 50.000458 -410.000305 165.000702 0.752941 0.752941 0.752941 166 | vn -0.000000 1.000000 0.000000 167 | v -279.999817 -410.000305 165.000458 0.752941 0.752941 0.752941 168 | vn -0.000000 1.000000 0.000000 169 | v 50.000427 -410.000092 -394.999298 0.752941 0.752941 0.752941 170 | vn -0.000000 1.000000 0.000000 171 | v -279.999878 -410.000092 -394.999512 0.752941 0.752941 0.752941 172 | vn -0.000000 1.000000 0.000000 173 | v 50.000458 -410.000305 165.000702 0.752941 0.752941 0.752941 174 | vn 0.000001 0.000000 -1.000000 175 | v 50.000427 -819.999573 -394.999451 0.752941 0.752941 0.752941 176 | vn 0.000001 0.000000 -1.000000 177 | v -279.999878 -819.999573 -394.999664 0.752941 0.752941 0.752941 178 | vn 0.000001 0.000000 -1.000000 179 | v 50.000427 -410.000092 -394.999298 0.752941 0.752941 0.752941 180 | vn 0.000001 0.000000 -1.000000 181 | v -279.999878 -410.000092 -394.999512 0.752941 0.752941 0.752941 182 | vn 0.000000 -1.000000 -0.000000 183 | v 50.000427 -819.999573 -394.999451 0.752941 0.752941 0.752941 184 | vn 0.000000 -1.000000 -0.000000 185 | v 50.000458 -819.999756 165.000504 0.752941 0.752941 0.752941 186 | vn 0.000000 -1.000000 -0.000000 187 | v -279.999878 -819.999573 -394.999664 0.752941 0.752941 0.752941 188 | vn 0.000000 -1.000000 -0.000000 189 | v -279.999817 -819.999756 165.000259 0.752941 0.752941 0.752941 190 | vn -1.000000 -0.000000 0.000000 191 | v -279.999878 -819.999573 -394.999664 0.752941 0.752941 0.752941 192 | vn -1.000000 -0.000000 0.000000 193 | v -279.999817 -410.000305 165.000458 0.752941 0.752941 0.752941 194 | vn -1.000000 -0.000000 0.000000 195 | v -279.999878 -410.000092 -394.999512 0.752941 0.752941 0.752941 196 | vn -1.000000 -0.000000 0.000000 197 | v -279.999817 -819.999756 165.000259 0.752941 0.752941 0.752941 198 | vn -0.000001 -0.000000 1.000000 199 | v 50.000458 -819.999756 165.000504 0.752941 0.752941 0.752941 200 | vn -0.000001 -0.000000 1.000000 201 | v -279.999817 -410.000305 165.000458 0.752941 0.752941 0.752941 202 | vn -0.000001 -0.000000 1.000000 203 | v 50.000458 -410.000305 165.000702 0.752941 0.752941 0.752941 204 | vn -0.000001 -0.000000 1.000000 205 | v -279.999817 -819.999756 165.000259 0.752941 0.752941 0.752941 206 | # 96 vertices, 0 vertices normals 207 | 208 | 209 | usemtl material_0 210 | f 3//3 4//4 2//2 211 | f 2//2 1//1 3//3 212 | f 7//7 5//5 8//8 213 | f 8//8 6//6 7//7 214 | f 9//9 10//10 11//11 215 | f 10//10 9//9 12//12 216 | f 13//13 14//14 15//15 217 | f 16//16 15//15 14//14 218 | f 19//19 18//18 20//20 219 | f 19//19 17//17 18//18 220 | f 24//24 21//21 23//23 221 | f 23//23 22//22 24//24 222 | f 27//27 28//28 26//26 223 | f 26//26 25//25 27//27 224 | f 31//31 29//29 32//32 225 | f 32//32 30//30 31//31 226 | f 33//33 34//34 35//35 227 | f 34//34 33//33 36//36 228 | f 37//37 38//38 39//39 229 | f 40//40 39//39 38//38 230 | f 44//44 41//41 43//43 231 | f 43//43 42//42 44//44 232 | f 47//47 46//46 48//48 233 | f 47//47 45//45 46//46 234 | f 51//51 52//52 50//50 235 | f 50//50 49//49 51//51 236 | f 53//53 54//54 55//55 237 | f 56//56 55//55 54//54 238 | f 59//59 58//58 60//60 239 | f 59//59 57//57 58//58 240 | f 64//64 61//61 63//63 241 | f 63//63 62//62 64//64 242 | f 65//65 66//66 67//67 243 | f 66//66 65//65 68//68 244 | f 71//71 69//69 72//72 245 | f 72//72 70//70 71//71 246 | f 75//75 76//76 74//74 247 | f 74//74 73//73 75//75 248 | f 77//77 78//78 79//79 249 | f 78//78 77//77 80//80 250 | f 83//83 82//82 84//84 251 | f 83//83 81//81 82//82 252 | f 85//85 86//86 87//87 253 | f 88//88 87//87 86//86 254 | f 91//91 89//89 92//92 255 | f 92//92 90//90 91//91 256 | f 96//96 93//93 95//95 257 | f 95//95 94//94 96//96 258 | # 48 faces, 0 coords texture 259 | 260 | # End of File 261 | -------------------------------------------------------------------------------- /data/asymmetric_box_rotated.obj.mtl: -------------------------------------------------------------------------------- 1 | # 2 | # Wavefront material file 3 | # Converted by Meshlab Group 4 | # 5 | 6 | newmtl material_0 7 | Ka 0.200000 0.200000 0.200000 8 | Kd 0.752941 0.752941 0.752941 9 | Ks 1.000000 1.000000 1.000000 10 | Tr 1.000000 11 | illum 2 12 | Ns 0.000000 13 | 14 | -------------------------------------------------------------------------------- /data/snapshot00.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VCL3D/StructureNet/d23e13d24fb18861d84371c651a510f8a83d78a0/data/snapshot00.png -------------------------------------------------------------------------------- /data/snapshot01.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VCL3D/StructureNet/d23e13d24fb18861d84371c651a510f8a83d78a0/data/snapshot01.png -------------------------------------------------------------------------------- /data/snapshot02.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VCL3D/StructureNet/d23e13d24fb18861d84371c651a510f8a83d78a0/data/snapshot02.png -------------------------------------------------------------------------------- /data/snapshot03.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VCL3D/StructureNet/d23e13d24fb18861d84371c651a510f8a83d78a0/data/snapshot03.png -------------------------------------------------------------------------------- /example/input_visualization.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VCL3D/StructureNet/d23e13d24fb18861d84371c651a510f8a83d78a0/example/input_visualization.png -------------------------------------------------------------------------------- /inference.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | import sys 4 | import datetime 5 | 6 | import torch 7 | import torch.nn as nn 8 | 9 | import src.models as models 10 | import src.dataset.box_pose_dataset_factory as dataset_factory 11 | import src.dataset.samplers.pose.pose_sampler as pose_sampler 12 | import src.dataset.samplers.intrinsics_generator as intrinsics_generator 13 | import src.dataset.depthmap_dataset as depthmap_dataset 14 | from src.utils import geometric 15 | from src.io import plywrite, box_model_loader 16 | #from src.io import multidimentional_imsave 17 | from src.utils.image_utils import colorize_label_map, get_color_map_nclasses_17, get_color_map_nclasses_25, get_color_map_nclasses_21 18 | 19 | import cv2 20 | import numpy as np 21 | import random 22 | import subprocess 23 | from tqdm import tqdm 24 | 25 | from src.utils import projections 26 | from src.dataset.rendering.box_renderer import BoxRenderFlags 27 | from src.utils.geometric import compute_soft_correspondences 28 | 29 | def str2bool(v): 30 | if isinstance(v, bool): 31 | return v 32 | if v.lower() in ('yes', 'true', 't', 'y', '1'): 33 | return True 34 | elif v.lower() in ('no', 'false', 'f', 'n', '0'): 35 | return False 36 | else: 37 | raise argparse.ArgumentTypeError('Boolean value expected.') 38 | 39 | def parse_arguments(args): 40 | usage_text = ( 41 | "StructureNet inference." 42 | "Usage: python inference.py [options]," 43 | " with [options]:" 44 | ) 45 | parser = argparse.ArgumentParser(description=usage_text) 46 | # evaluation 47 | parser.add_argument('--confidence_threshold', type = float, default = 0.0, help ='confidence probability threshold to reject uncofident predictions') 48 | parser.add_argument('--scale', type = float, default = 0.001, help = 'Factor that converts input to meters') 49 | # gpu 50 | parser.add_argument('--batch_size', type = int, default = 24, help = 'Batch size for inference') 51 | # paths 52 | parser.add_argument('--input_path', type = str, help = "Path to the input depth maps to test against") 53 | parser.add_argument('--output_path', type = str, help = "Path to output directory") 54 | 55 | #model 56 | parser.add_argument('--saved_params_path', default = "default", type=str, help = 'Path to model params file') 57 | parser.add_argument('--nclasses', default = 25, type=int, help = 'Number of classes of the model, if not defined inside the checkpoint file') 58 | parser.add_argument('--ndf', default=8, type = int,help = 'Ndf of model') 59 | 60 | # hardware 61 | parser.add_argument('-g','--gpu', type=str, default='0', help='The ids of the GPU(s) that will be utilized. (e.g. 0 or 0,1, or 0,2). Use -1 for CPU.') 62 | # debug 63 | parser.add_argument('--debug', type=int, default=0, help ="debug output. 1 true, 0 false") 64 | 65 | return parser.parse_known_args(args) 66 | 67 | 68 | 69 | 70 | def inference(args,device): 71 | 72 | #create model parameters 73 | model_params = { 74 | 'width': 320, 75 | 'height': 180, 76 | 'ndf': 32, 77 | 'upsample_type': "nearest", 78 | } 79 | 80 | #random setup 81 | rnd_seed = 1234 82 | random.seed(rnd_seed) # this will generate fixed seeds of subcomponents that create the datasets (factory uses random.random() to initialize seeds) 83 | torch.random.manual_seed(rnd_seed) 84 | 85 | print("Loading previously saved model from {}".format(args.saved_params_path)) 86 | checkpoint = torch.load(args.saved_params_path) 87 | 88 | color_func = { 89 | 17 : get_color_map_nclasses_17, 90 | 21 : get_color_map_nclasses_21, 91 | 25 : get_color_map_nclasses_25 92 | } 93 | 94 | model_name = checkpoint['model_name'] 95 | if 'nclasses' in checkpoint: 96 | nclasses = checkpoint['nclasses'] 97 | else: 98 | nclasses = args.nclasses 99 | 100 | if 'ndf' in checkpoint: 101 | model_params['ndf'] = checkpoint['ndf'] 102 | 103 | model_params['nclasses'] = nclasses 104 | model = models.get_UNet_model(model_name, model_params) 105 | model.load_state_dict(checkpoint['state_dict']) 106 | model.to(device) 107 | model.eval() 108 | 109 | print('Loaded model name: {}'.format(model_name)) 110 | 111 | datasetparams = depthmap_dataset.DepthmapDatasetParams(args.input_path, args.scale) # scale millimeters to meters 112 | dsiterator = depthmap_dataset.DepthmapDataset(datasetparams) 113 | 114 | dataset = torch.utils.data.DataLoader(dsiterator,\ 115 | batch_size = args.batch_size, shuffle=False,\ 116 | num_workers = 0, pin_memory=False) 117 | 118 | confidence_threshold = args.confidence_threshold 119 | frame_index = 0 120 | 121 | pbar = tqdm(total=dataset.__len__()) 122 | 123 | 124 | for batch_id, batch in enumerate(dataset): 125 | 126 | #resize input 127 | _,_,h,w = batch.shape 128 | batch_d = nn.functional.interpolate(batch, size=[180, 320], mode='nearest').to(device) 129 | 130 | #inference 131 | 132 | pred = model(batch_d) 133 | if (len(pred) == 2): 134 | activs, out = pred 135 | elif (len(pred) == 3): 136 | activs, heat_pred, out = pred 137 | elif (len(pred) == 4): 138 | activs, heat_pred, out, normals = pred 139 | else: 140 | print("unexpected model return value. expected tuple of length 2, 3 or 4.") 141 | break 142 | 143 | batch_size = batch.shape[0] 144 | for index in range(batch_size): 145 | 146 | fpath_label_pred = args.output_path + "\\" + str(frame_index) + '_label_pred.png' 147 | 148 | confidence_t, labels_pred_t = out[index].max(0) 149 | confidence_t = torch.exp(confidence_t) # convert log probability to probability 150 | labels_pred_t [confidence_t < confidence_threshold] = nclasses # uncertain classs 151 | 152 | 153 | labels_pred_t = nn.functional.interpolate(labels_pred_t.unsqueeze(0).unsqueeze(0).float(), size=[h, w], mode='nearest').to(device).squeeze().long() 154 | labels_pred = labels_pred_t.cpu().data.numpy() 155 | 156 | labels_pred_n = colorize_label_map(labels_pred, color_func[nclasses]()) 157 | 158 | 159 | cv2.imwrite(fpath_label_pred,labels_pred_n) 160 | fpath_normals_gt = args.output_path + "\\" + str(frame_index) + '_normals_gt.png' 161 | 162 | 163 | 164 | 165 | frame_index += 1 166 | pbar.update() 167 | 168 | 169 | 170 | if __name__ == "__main__": 171 | args, uknown = parse_arguments(sys.argv) 172 | gpus = [int(id) for id in args.gpu.split(',') if int(id) >= 0] 173 | device = torch.device("cuda:{}" .format(gpus[0]) if torch.cuda.is_available() and len(gpus) > 0 and gpus[0] >= 0 else "cpu") 174 | 175 | inference(args, device) -------------------------------------------------------------------------------- /pyrender_patch/camera.diff: -------------------------------------------------------------------------------- 1 | --- E:\PyEnvs\env\Lib\site-packages\pyrender\camera_original.py Wed Jun 26 16:18:23 2019 2 | +++ E:\PyEnvs\env\Lib\site-packages\pyrender\camera.py Wed Jun 26 16:21:35 2019 3 | @@ -332,6 +332,8 @@ 4 | If not specified, defaults to 100.0. 5 | name : str, optional 6 | The user-defined name of this object. 7 | + 8 | + aldoum note: fx,fy not in meters but in pixels. since get camera matrix width/height are in pixels ! verified mathematically about its correctness! 9 | """ 10 | 11 | def __init__(self, 12 | -------------------------------------------------------------------------------- /pyrender_patch/constants.diff: -------------------------------------------------------------------------------- 1 | --- constants_original.py Mon Jun 24 16:48:40 2019 2 | +++ constants.py Mon Jun 24 16:32:24 2019 3 | @@ -49,7 +49,8 @@ 4 | """Do not cull back faces.""" 5 | RGBA = 2048 6 | """Render the color buffer with the alpha channel enabled.""" 7 | - 8 | + DISABLE_MULTISAMPLING = 4096 9 | + """Disable multi sampling.""" 10 | 11 | class TextAlign: 12 | """Text alignment options for captions. 13 | -------------------------------------------------------------------------------- /pyrender_patch/renderer.diff: -------------------------------------------------------------------------------- 1 | --- renderer_original.py Mon Jun 24 16:48:40 2019 2 | +++ renderer.py Mon Jun 24 16:40:27 2019 3 | @@ -324,7 +324,8 @@ 4 | # Clear it 5 | glClearColor(*scene.bg_color) 6 | glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT) 7 | - glEnable(GL_MULTISAMPLE) 8 | + if not (flags & RenderFlags.DISABLE_MULTISAMPLING): 9 | + glEnable(GL_MULTISAMPLE) 10 | 11 | # Set up camera matrices 12 | V, P = self._get_camera_matrices(scene) 13 | @@ -972,7 +973,7 @@ 14 | 15 | # If using offscreen render, bind main framebuffer 16 | if flags & RenderFlags.OFFSCREEN: 17 | - self._configure_main_framebuffer() 18 | + self._configure_main_framebuffer(flags) 19 | glBindFramebuffer(GL_DRAW_FRAMEBUFFER, self._main_fb_ms) 20 | else: 21 | glBindFramebuffer(GL_DRAW_FRAMEBUFFER, 0) 22 | @@ -1014,7 +1015,7 @@ 23 | if self._shadow_fb is not None: 24 | glDeleteFramebuffers(1, [self._shadow_fb]) 25 | 26 | - def _configure_main_framebuffer(self): 27 | + def _configure_main_framebuffer(self, flags): 28 | # If mismatch with prior framebuffer, delete it 29 | if (self._main_fb is not None and 30 | self.viewport_width != self._main_fb_dims[0] or 31 | @@ -1052,10 +1053,14 @@ 32 | # Generate multisample buffer 33 | self._main_cb_ms, self._main_db_ms = glGenRenderbuffers(2) 34 | glBindRenderbuffer(GL_RENDERBUFFER, self._main_cb_ms) 35 | - glRenderbufferStorageMultisample( 36 | - GL_RENDERBUFFER, 4, GL_RGBA, 37 | - self.viewport_width, self.viewport_height 38 | - ) 39 | + if not (flags & RenderFlags.DISABLE_MULTISAMPLING): 40 | + glRenderbufferStorageMultisample( 41 | + GL_RENDERBUFFER, 4, GL_RGBA, 42 | + self.viewport_width, self.viewport_height 43 | + ) 44 | + else: 45 | + glRenderbufferStorage(GL_RENDERBUFFER, GL_RGBA, self.viewport_width, self.viewport_height) 46 | + 47 | glBindRenderbuffer(GL_RENDERBUFFER, self._main_db_ms) 48 | glRenderbufferStorageMultisample( 49 | GL_RENDERBUFFER, 4, GL_DEPTH_COMPONENT24, 50 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | tqdm==4.35.0 2 | visdom==0.1.8.8 3 | opencv-python==4.2.0.32 4 | trimesh==2.38.40 5 | pyrender==0.1.28 6 | mgen==1.2.0 7 | shapely -------------------------------------------------------------------------------- /src/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VCL3D/StructureNet/d23e13d24fb18861d84371c651a510f8a83d78a0/src/__init__.py -------------------------------------------------------------------------------- /src/dataset/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /src/dataset/box_pose_dataset.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | from torch.utils.data.dataset import Dataset 3 | from .rendering.box_renderer import * 4 | from .samplers.random_sampler import RandomSampler as rs 5 | from .samplers.pose.pose_sampler import * 6 | from .samplers.intrinsics_generator import * 7 | from ..utils.image_utils import * 8 | from .noise.noise_adder import HoleNoiseAdder 9 | class BoxPoseDatasetParams: 10 | 11 | def __init__(self, box_renderer : BoxRenderer, pose_sampler : PoseSampler, 12 | intrinsics_generator : IntrinsicsGenerator, 13 | renderer_params: BoxRendererParams, 14 | background_samplers : list, 15 | background_samplers_probabilities : list, # list of floats 16 | noise_adders : list, 17 | noise_adders_probabilities : list, # list of floats 18 | border_noise_adders : list, 19 | border_noise_adders_probabilities : list, # list of floats 20 | hole_adder : HoleNoiseAdder, 21 | output_resolution : tuple = None, # default output resolution is determined by intrinsics generator. if specified, the output will be interpolated to new resolution (width x height) 22 | background_sampler_seed = 1111, 23 | noise_sampler_seed = 3333, 24 | border_noise_adder_seed = 3456): 25 | 26 | self.box_renderer = box_renderer 27 | self.pose_sampler = pose_sampler 28 | self.intrinsics_generator = intrinsics_generator 29 | self.renderer_params = renderer_params 30 | self.background_samplers = background_samplers 31 | self.background_samplers_probabilities = background_samplers_probabilities 32 | self.noise_adders = noise_adders 33 | self.noise_adders_probabilities = noise_adders_probabilities 34 | self.hole_adder = hole_adder 35 | self.border_noise_adders = border_noise_adders 36 | self.border_noise_adders_probabilities = border_noise_adders_probabilities 37 | self.border_noise_adder_seed = border_noise_adder_seed 38 | self.output_resolution = output_resolution 39 | self.noise_sampler_seed = noise_sampler_seed 40 | self.background_sampler_seed = background_sampler_seed 41 | 42 | class BoxPoseDataset(Dataset): 43 | 44 | def __init__(self, params : BoxPoseDatasetParams): 45 | super().__init__() 46 | self._params = params 47 | self.background_sampler = rs(params.background_samplers,params.background_samplers_probabilities,rnd_seed = params.background_sampler_seed) 48 | self.noise_adder_sampler = rs(params.noise_adders,params.noise_adders_probabilities, rnd_seed = params.noise_sampler_seed) 49 | self.border_noise_adder_sampler = rs(params.border_noise_adders, params.border_noise_adders_probabilities, rnd_seed = params.border_noise_adder_seed) 50 | 51 | def _create_intrinsics_matrix( 52 | self, 53 | camera_intrinsics : list, 54 | aspect_ratios : tuple, 55 | ) -> torch.tensor: 56 | fx = camera_intrinsics[0] / aspect_ratios[0] #fx 57 | cx = camera_intrinsics[2] / aspect_ratios[0] #cx 58 | 59 | fy = camera_intrinsics[1] / aspect_ratios[1] #fy 60 | cy = camera_intrinsics[3] / aspect_ratios[1] #cy 61 | 62 | r = torch.eye(3) 63 | r[0,0] = fx 64 | r[1,1] = fy 65 | r[0,2] = cx 66 | r[1,2] = cy 67 | 68 | return r 69 | 70 | def __len__(self): 71 | return self._params.pose_sampler.transformations.shape[0] 72 | 73 | 74 | def __getitem__(self,idx): 75 | camera_pose = self._params.pose_sampler.transformations[idx] 76 | self._params.box_renderer.canvas_width = self._params.intrinsics_generator.width 77 | self._params.box_renderer.canvas_height = self._params.intrinsics_generator.height 78 | camera_intrinsics = self._params.intrinsics_generator.sample() 79 | 80 | color, depth, normals , labels = self._params.box_renderer.render(camera_pose = camera_pose, camera_intrinsics = camera_intrinsics, 81 | znear = self._params.renderer_params.depth_zmin, zfar = self._params.renderer_params.depth_zmax, 82 | render_flags = self._params.renderer_params.render_flags) 83 | 84 | bg = np.float32(self.background_sampler.sample().sample()) 85 | bg = random_crop_and_scale_to_fit_target(bg,self._params.intrinsics_generator.width,self._params.intrinsics_generator.height) 86 | 87 | hole_depth = self._params.hole_adder.add_noise(depth) 88 | nd = self.noise_adder_sampler.sample().add_noise(hole_depth) 89 | mask1 = nd == 0 90 | 91 | bnd = self.border_noise_adder_sampler.sample().add_noise(nd) 92 | mask2 = bnd == 0 93 | 94 | fmask = mask2 * (~mask1) 95 | 96 | final_depth = bg 97 | #final_depth[nd > 0.0] = nd[nd > 0.0] 98 | final_depth[depth > 0.0] = nd[depth > 0.0] 99 | #final_depth = self.border_noise_adder_sampler.sample().add_noise(final_depth) 100 | final_depth[fmask] = bnd[fmask] 101 | 102 | normals [final_depth == 0] = 0 103 | labels = np.ascontiguousarray(labels) 104 | labels [final_depth == 0] = 0 105 | color = np.ascontiguousarray(color) 106 | color [final_depth == 0] = 0 107 | 108 | fdepth = torch.from_numpy(final_depth).unsqueeze(0) 109 | fnormals = torch.from_numpy(normals).permute(2,0,1) 110 | flabels = torch.from_numpy(labels).unsqueeze(0) 111 | fcolor = torch.from_numpy(color).permute(2,0,1) 112 | 113 | aspect_ratio = (1.0,1.0) 114 | if self._params.output_resolution != None: 115 | out_width, out_height = self._params.output_resolution 116 | fdepth = nn.functional.interpolate(fdepth.unsqueeze(0), size=[out_height, out_width], mode='nearest').squeeze(0) 117 | fnormals = nn.functional.interpolate(fnormals.unsqueeze(0), size=[out_height, out_width], mode='nearest').squeeze(0) 118 | flabels = nn.functional.interpolate(flabels.unsqueeze(0).float(), size=[out_height, out_width], mode='nearest').squeeze(0).to(torch.uint8) 119 | fcolor = nn.functional.interpolate(fcolor.unsqueeze(0).float(), size=[out_height, out_width], mode='nearest').squeeze(0).to(torch.uint8) 120 | aspect_ratio = (self._params.box_renderer.canvas_width/out_width, 121 | self._params.box_renderer.canvas_height/out_height) 122 | 123 | intrinsics = self._create_intrinsics_matrix(camera_intrinsics,aspect_ratio) 124 | 125 | r = { 126 | "depth" : fdepth.float(), 127 | "normals" : fnormals.float(), 128 | "labels" : flabels, 129 | "color" : fcolor, 130 | "intrinsics_original" : torch.from_numpy(np.ascontiguousarray(camera_intrinsics)).float(), # maybe used to deprojet depth 131 | "intrinsics" : intrinsics.float(), 132 | "camera_resolution" : (self._params.intrinsics_generator.width, self._params.intrinsics_generator.height), # original resolution before transform to use to deproject with intrinsics after resizing image 133 | "camera_pose" : torch.from_numpy(camera_pose).float(), 134 | "type" : "synthetic" 135 | } 136 | return r 137 | 138 | 139 | 140 | -------------------------------------------------------------------------------- /src/dataset/calibration_dataset.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import json 3 | import os 4 | import cv2 5 | import numpy 6 | 7 | from torch.utils.data.dataset import Dataset 8 | 9 | def correct_labels(labels, number_of_classes): 10 | if labels is None: 11 | return None 12 | 13 | if number_of_classes == 17: 14 | #map from 25 to 17 class labels 15 | #labels[labels == 1] = 1 16 | #labels[labels == 2] = 2 17 | labels[labels == 3] = 0 18 | labels[labels == 4] = 0 19 | labels[labels == 5] = 3 20 | labels[labels == 6] = 4 21 | 22 | 23 | labels[labels == 7] = 5 24 | labels[labels == 8] = 6 25 | labels[labels == 9] = 0 26 | labels[labels == 10] = 0 27 | labels[labels == 11] = 7 28 | labels[labels == 12] = 8 29 | 30 | labels[labels == 13] = 9 31 | labels[labels == 14] = 0 32 | labels[labels == 15] = 10 33 | labels[labels == 16] = 11 34 | labels[labels == 17] = 0 35 | labels[labels == 18] = 12 36 | 37 | labels[labels == 19] = 13 38 | labels[labels == 20] = 0 39 | labels[labels == 21] = 14 40 | labels[labels == 22] = 0 41 | labels[labels == 23] = 15 42 | labels[labels == 24] = 16 43 | elif number_of_classes == 21: 44 | #map from 25 to 21 class labels 45 | #aka bot as background 46 | #labels[labels == 1] = 1 47 | #labels[labels == 2] = 2 48 | labels[labels == 3] = 0 49 | labels[labels == 4] = 3 50 | labels[labels == 5] = 4 51 | labels[labels == 6] = 5 52 | 53 | 54 | labels[labels == 7] = 6 55 | labels[labels == 8] = 7 56 | labels[labels == 9] = 0 57 | labels[labels == 10] = 8 58 | labels[labels == 11] = 9 59 | labels[labels == 12] = 10 60 | 61 | labels[labels == 13] = 11 62 | labels[labels == 14] = 12 63 | labels[labels == 15] = 13 64 | labels[labels == 16] = 14 65 | labels[labels == 17] = 0 66 | labels[labels == 18] = 15 67 | 68 | labels[labels == 19] = 16 69 | labels[labels == 20] = 0 70 | labels[labels == 21] = 17 71 | labels[labels == 22] = 18 72 | labels[labels == 23] = 19 73 | labels[labels == 24] = 20 74 | return labels 75 | 76 | def load_intrinsics_repository(filename): 77 | #global intrinsics_dict 78 | with open(filename, 'r') as json_file: 79 | intrinsics_repository = json.load(json_file) 80 | intrinsics_dict = dict((intrinsics['Device'], \ 81 | intrinsics['Depth Intrinsics'][0]['1280x720'])\ 82 | for intrinsics in intrinsics_repository) 83 | return intrinsics_dict 84 | 85 | def get_intrinsics(name, intrinsics_dict, scale=1, data_type=torch.float32): 86 | #global intrinsics_dict 87 | if intrinsics_dict is not None: 88 | intrinsics_data = numpy.array(intrinsics_dict[name]) 89 | intrinsics = torch.tensor(intrinsics_data).reshape(3, 3).type(data_type) 90 | intrinsics[0, 0] = intrinsics[0, 0] / scale 91 | intrinsics[0, 2] = intrinsics[0, 2] / scale 92 | intrinsics[1, 1] = intrinsics[1, 1] / scale 93 | intrinsics[1, 2] = intrinsics[1, 2] / scale 94 | intrinsics_inv = intrinsics.inverse() 95 | return intrinsics, intrinsics_inv 96 | raise ValueError("Intrinsics repository is empty") 97 | 98 | class DepthmapDatasetCalibrationParams: 99 | def __init__( 100 | self, 101 | path_to_dataset : str, 102 | path_to_device_repo : str, 103 | scale : float, 104 | name_pos : int, 105 | extension : str, 106 | decimation_scale : int, 107 | nclasses : int, 108 | duplicate_devices = False 109 | ): 110 | ''' 111 | scale: value to scale loaded dataset (to adjust for metersmillimeters) 112 | name_pos: where in filename device name appears (for xxx_yyy_DEVICENAME_zzz.pgm name_pos = 2) 113 | ''' 114 | self.path_to_dataset = path_to_dataset 115 | self.path_to_device_repo = path_to_device_repo 116 | self.scale = scale 117 | self.name_pos = name_pos 118 | self.extension = extension 119 | self.decimation_scale = decimation_scale 120 | self.duplicate_devices = duplicate_devices 121 | self.nclasses = nclasses 122 | 123 | 124 | 125 | 126 | class DepthmapDatasetCalibration(Dataset): 127 | 128 | def __init__(self, params : DepthmapDatasetCalibrationParams): 129 | super().__init__() 130 | self._params = params 131 | self._get_filelist() 132 | self.intrinsics_dict = load_intrinsics_repository(params.path_to_device_repo) 133 | 134 | def __len__(self): 135 | return len(self._filepaths) 136 | 137 | def _get_filelist(self): 138 | self._filepaths = [os.path.join(self._params.path_to_dataset, x) for x in os.listdir(self._params.path_to_dataset) if os.path.isfile(os.path.join(self._params.path_to_dataset,x)) and x.split(".")[1] == self._params.extension] 139 | self._labels = [os.path.join(self._params.path_to_dataset, x) for x in os.listdir(self._params.path_to_dataset) if os.path.isfile(os.path.join(self._params.path_to_dataset,x)) and (x.split(".")[1] == 'png') and ("label" in x)] 140 | 141 | self._filepaths.sort() 142 | self._labels.sort() 143 | 144 | def __getitem__(self,idx): 145 | 146 | fname = self._filepaths[idx] 147 | img = cv2.imread(fname,cv2.IMREAD_ANYDEPTH).astype(numpy.float32) * self._params.scale 148 | 149 | if self._labels: 150 | img_labels = torch.from_numpy(cv2.imread(self._labels[idx],cv2.IMREAD_ANYDEPTH).astype(numpy.float32)).unsqueeze(0) 151 | else: 152 | img_labels = torch.tensor([]) 153 | 154 | device_name = os.path.basename(fname).split("_")[self._params.name_pos] 155 | 156 | intrinsics, intrinsics_inv = get_intrinsics(device_name[:-1] if self._params.duplicate_devices else device_name, self.intrinsics_dict, self._params.decimation_scale) 157 | 158 | 159 | return { 160 | "depth" :torch.from_numpy(img).unsqueeze(0), 161 | "filename" :fname, 162 | "device" :device_name, 163 | "intrinsics" :intrinsics, 164 | "intrinsics_inv" :intrinsics_inv, 165 | "labels" :correct_labels(img_labels, self._params.nclasses), 166 | "has_labels" :True if self._labels else False 167 | } 168 | 169 | if __name__ == "__main__": 170 | params = DepthmapDatasetCalibrationParams( 171 | "D:\\VCL\\Users\\vlad\\Datasets\\SMPL_playground_data\\new_perfcap_recs\\akiz\\Dump\\Dataset\\root\\Data", 172 | "D:\\Projects\\vs\\RealSenz\\immerzion\\vs\\immerzion\\x64\\Release\\device_repository.json", 173 | 0.001, 174 | 1, 175 | "pgm", 176 | 4) 177 | d = DepthmapDatasetCalibration(params) 178 | dataset = torch.utils.data.DataLoader(d,\ 179 | batch_size = 1, shuffle=False,\ 180 | num_workers = 0, pin_memory=False) 181 | 182 | for batch_id, batch in enumerate(d): 183 | bbb = batch 184 | bp = True -------------------------------------------------------------------------------- /src/dataset/depthmap_dataset.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import numpy as np 3 | import os 4 | import torch 5 | from torch.utils.data import Dataset 6 | 7 | class DepthmapDatasetParams: 8 | def __init__(self, path_to_dataset : str, scale : float, max_len : int = None): 9 | ''' 10 | scale: value to scale loaded dataset (to adjust for metersmillimeters) 11 | max_len: use this to trim dataset to first max_len elements 12 | ''' 13 | self.path_to_dataset = path_to_dataset 14 | self.scale = scale 15 | self.max_len = max_len 16 | 17 | class DepthmapDataset(Dataset): 18 | 19 | def __init__(self, params : DepthmapDatasetParams): 20 | super().__init__() 21 | self._params = params 22 | self._get_filelist() 23 | 24 | def __len__(self): 25 | return len(self._filepaths) if (self._params.max_len == None) or (len(self._filepaths) < self._params.max_len) else self._params.max_len 26 | 27 | def _get_filelist(self): 28 | self._filepaths = list(map(lambda x: os.path.join(self._params.path_to_dataset,x),os.listdir(self._params.path_to_dataset))) 29 | self._filepaths = [x for x in self._filepaths if x.endswith(".pgm")] 30 | self._filepaths.sort(key = lambda x: int(x.split("\\")[-1].split(".")[0].split("_")[2])) 31 | 32 | def __getitem__(self,idx): 33 | 34 | fname = self._filepaths[idx] 35 | img = cv2.imread(fname,cv2.IMREAD_ANYDEPTH).astype(np.float32) * self._params.scale 36 | 37 | return torch.from_numpy(img).unsqueeze(0) 38 | 39 | -------------------------------------------------------------------------------- /src/dataset/depthmap_val_dataset.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import numpy as np 3 | import os 4 | import torch 5 | from torch.utils.data import Dataset 6 | 7 | def get_modality(x): 8 | curr_split = x.split(".") 9 | if curr_split[-1] == "pgm": 10 | return 0 11 | else: 12 | label_split = x.split("_")[-2] 13 | if label_split == "id": 14 | return 1 15 | else: 16 | return 2 17 | 18 | 19 | 20 | 21 | 22 | class DepthmapDatasetParams: 23 | def __init__(self, path_to_dataset : str, scale : float, max_len : int = None, number_of_classes = 17, valset = True): 24 | ''' 25 | scale: value to scale loaded dataset (to adjust for metersmillimeters) 26 | max_len: use this to trim dataset to first max_len elements 27 | ''' 28 | self.path_to_dataset = path_to_dataset 29 | self.scale = scale 30 | self.max_len = max_len 31 | self.number_of_classes = number_of_classes 32 | self.valset = valset 33 | 34 | class DepthmapDataset(Dataset): 35 | 36 | def __init__(self, params : DepthmapDatasetParams): 37 | super().__init__() 38 | self._params = params 39 | self._get_filelist() 40 | 41 | def __len__(self): 42 | return len(self._filepaths_depth) if (self._params.max_len == None) or (len(self._filepaths_depth) < self._params.max_len) else self._params.max_len 43 | 44 | def _get_filelist(self): 45 | self._filepaths = list(map(lambda x: os.path.join(self._params.path_to_dataset,x),os.listdir(self._params.path_to_dataset))) 46 | self._names = list(os.listdir(self._params.path_to_dataset)) 47 | self._filepaths_depth = [] 48 | self._filepaths_labels = [] 49 | 50 | # for file in self._filepaths: 51 | # mode = get_modality(file) 52 | # if mode == 0: 53 | # self._filepaths_depth.append(file) 54 | # elif mode == 1: 55 | # self._filepaths_labels.append(file) 56 | # else: 57 | # continue 58 | 59 | if not self._params.valset: 60 | self._filepaths_depth = \ 61 | [os.path.join(self._params.path_to_dataset,x) \ 62 | for x in sorted(self._names, key=lambda x: int(x.split('.')[0].split("_")[2]), reverse=False) if "pgm" in x] 63 | 64 | self._filepaths_labels = \ 65 | [os.path.join(self._params.path_to_dataset,x) \ 66 | for x in sorted(self._names, key=lambda x: int(x.split('.')[0].split("_")[2]), reverse=False) if "id" in x] 67 | 68 | self._names = \ 69 | [x \ 70 | for x in sorted(self._names, key=lambda x: int(x.split('.')[0].split("_")[2]), reverse=False) if "id" in x] 71 | else: 72 | self._filepaths_depth = \ 73 | [os.path.join(self._params.path_to_dataset,x) \ 74 | for x in sorted(self._names, key=lambda x: x.split('.')[0].split("_")[0], reverse=False) if "pgm" in x] 75 | 76 | self._filepaths_labels = \ 77 | [os.path.join(self._params.path_to_dataset,x) \ 78 | for x in sorted(self._names, key=lambda x: x.split('.')[0].split("_")[0], reverse=False) if "id" in x] 79 | 80 | self._names = \ 81 | [x \ 82 | for x in sorted(self._names, key=lambda x: x.split('.')[0].split("_")[0], reverse=False) if "id" in x] 83 | 84 | 85 | 86 | def __getitem__(self,idx): 87 | 88 | frame = {} 89 | frame["depth"] = {} 90 | frame["labels"] = {} 91 | dname = self._filepaths_depth[idx] 92 | lname = self._filepaths_labels[idx] 93 | frame["name"] = self._names[idx].split(".")[0] 94 | img = cv2.imread(dname,cv2.IMREAD_ANYDEPTH).astype(np.float32) * self._params.scale 95 | img_labels = cv2.imread(lname,cv2.IMREAD_ANYDEPTH).astype(np.float32) 96 | 97 | frame["depth"] = torch.from_numpy(img).unsqueeze(0) 98 | 99 | labels = torch.from_numpy(img_labels).unsqueeze(0) 100 | 101 | if self._params.number_of_classes == 17: 102 | #map from 25 to 17 class labels 103 | #labels[labels == 1] = 1 104 | #labels[labels == 2] = 2 105 | labels[labels == 3] = 0 106 | labels[labels == 4] = 0 107 | labels[labels == 5] = 3 108 | labels[labels == 6] = 4 109 | 110 | 111 | labels[labels == 7] = 5 112 | labels[labels == 8] = 6 113 | labels[labels == 9] = 0 114 | labels[labels == 10] = 0 115 | labels[labels == 11] = 7 116 | labels[labels == 12] = 8 117 | 118 | labels[labels == 13] = 9 119 | labels[labels == 14] = 0 120 | labels[labels == 15] = 10 121 | labels[labels == 16] = 11 122 | labels[labels == 17] = 0 123 | labels[labels == 18] = 12 124 | 125 | labels[labels == 19] = 13 126 | labels[labels == 20] = 0 127 | labels[labels == 21] = 14 128 | labels[labels == 22] = 0 129 | labels[labels == 23] = 15 130 | labels[labels == 24] = 16 131 | elif self._params.number_of_classes == 21: 132 | #map from 25 to 21 class labels 133 | #aka bot as background 134 | #labels[labels == 1] = 1 135 | #labels[labels == 2] = 2 136 | labels[labels == 3] = 0 137 | labels[labels == 4] = 3 138 | labels[labels == 5] = 4 139 | labels[labels == 6] = 5 140 | 141 | 142 | labels[labels == 7] = 6 143 | labels[labels == 8] = 7 144 | labels[labels == 9] = 0 145 | labels[labels == 10] = 8 146 | labels[labels == 11] = 9 147 | labels[labels == 12] = 10 148 | 149 | labels[labels == 13] = 11 150 | labels[labels == 14] = 12 151 | labels[labels == 15] = 13 152 | labels[labels == 16] = 14 153 | labels[labels == 17] = 0 154 | labels[labels == 18] = 15 155 | 156 | labels[labels == 19] = 16 157 | labels[labels == 20] = 0 158 | labels[labels == 21] = 17 159 | labels[labels == 22] = 18 160 | labels[labels == 23] = 19 161 | labels[labels == 24] = 20 162 | 163 | 164 | 165 | 166 | frame["labels"] = labels 167 | 168 | return frame 169 | 170 | 171 | # #set background as class 17 172 | # labels[labels == 16] = 99 173 | # labels[labels == 0] = 16 174 | # labels[labels == 1] = 0 175 | # labels[labels == 2] = 1 176 | # labels[labels == 3] = 2 177 | # labels[labels == 4] = 3 178 | # labels[labels == 5] = 4 179 | # labels[labels == 6] = 5 180 | # labels[labels == 7] = 6 181 | # labels[labels == 8] = 7 182 | # labels[labels == 9] = 8 183 | # labels[labels == 10] = 9 184 | # labels[labels == 11] = 10 185 | # labels[labels == 12] = 11 186 | # labels[labels == 13] = 12 187 | # labels[labels == 14] = 13 188 | # labels[labels == 15] = 14 189 | # labels[labels == 99] = 15 190 | 191 | # labels += 1 -------------------------------------------------------------------------------- /src/dataset/distance_unit.py: -------------------------------------------------------------------------------- 1 | from enum import Enum 2 | 3 | class DistanceUnit(Enum): 4 | Millimeters = 1 5 | Meters = 2 6 | -------------------------------------------------------------------------------- /src/dataset/noise/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VCL3D/StructureNet/d23e13d24fb18861d84371c651a510f8a83d78a0/src/dataset/noise/__init__.py -------------------------------------------------------------------------------- /src/dataset/noise/noise.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import random 3 | 4 | def create_image_domain_grid(width, height, data_type=torch.float32): 5 | v_range = ( 6 | torch.arange(0, height) # [0 - h] 7 | .view(1, height, 1) # [1, [0 - h], 1] 8 | .expand(1, height, width) # [1, [0 - h], W] 9 | .type(data_type) # [1, H, W] 10 | ) 11 | u_range = ( 12 | torch.arange(0, width) # [0 - w] 13 | .view(1, 1, width) # [1, 1, [0 - w]] 14 | .expand(1, height, width) # [1, H, [0 - w]] 15 | .type(data_type) # [1, H, W] 16 | ) 17 | return torch.stack((u_range, v_range), dim=1) # [1, 3, H, W] 18 | 19 | def disparity_noise(depth, sigma_depth=(1.0/6.0), sigma_space=(1.0/2.0), mean_space=0.5): 20 | b, c, h, w = depth.size() 21 | uvgrid = create_image_domain_grid(w, h) 22 | spatial_distribution = torch.randn_like(uvgrid) * sigma_space + mean_space 23 | offseted = (uvgrid + spatial_distribution).type(torch.int64) 24 | offseted[:, 0, :, :] = torch.clamp(offseted[:, 0, :, :], min=0, max=w-1) 25 | offseted[:, 1, :, :] = torch.clamp(offseted[:, 1, :, :], min=0, max=h-1) 26 | offsets = offseted[:, 1, :, :] * w + offseted[:, 0, :, :] 27 | linear_offsets = offsets.reshape(h*w) 28 | resampled_depth = torch.index_select(depth.reshape(h*w), 0, linear_offsets).reshape(b, c, h, w) 29 | depth_distribution = torch.randn_like(depth) * sigma_depth * sigma_depth 30 | baseline = torch.tensor(35130.0, dtype=torch.float32) 31 | denom = torch.round(baseline / (resampled_depth * 100.0) + depth_distribution + 0.5) 32 | noisy_depth = baseline / denom / 100.0 33 | return noisy_depth, resampled_depth 34 | 35 | def tof_noise(depth, sigma_fraction=0.1): 36 | rand = torch.rand_like(depth) 37 | sign = torch.ones_like(depth) 38 | sign[rand < 0.5] = -1.0 39 | sigma = sigma_fraction * depth 40 | magnitude = sigma * (1.0 - torch.exp(-0.5 * rand * rand)) 41 | noisy_depth = depth + sign * magnitude 42 | return noisy_depth 43 | 44 | -------------------------------------------------------------------------------- /src/dataset/noise/noise_adder.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from . import noise 3 | import numpy as np 4 | from abc import ABC, abstractclassmethod 5 | import cv2 6 | 7 | class BaseNoiseAdder(ABC): 8 | def __init__(self): 9 | super().__init__() 10 | 11 | @abstractclassmethod 12 | def add_noise(self, depth): 13 | pass 14 | 15 | class DisparityNoiseParams: 16 | 17 | def __init__(self, depth_pre_scale_factor : float = 1.0, sigma_depth : float = (1.0/6.0), sigma_space : float = (1.0/2.0), mean_space : float = 0.5): 18 | ''' 19 | depth_pre_scale_factor: scale to multiply depth with, in order to make it in meters 20 | sigma_depth, sigma_space, mean_space: set these values for meter units 21 | ''' 22 | self.depth_pre_scale_factor = depth_pre_scale_factor 23 | self.sigma_depth = sigma_depth 24 | self.sigma_space = sigma_space 25 | self.mean_space = mean_space 26 | 27 | class TofNoiseParams: 28 | def __init__(self, sigma_fraction : float = 0.1): 29 | self.sigma_fraction = sigma_fraction 30 | 31 | class DisparityNoiseAdder(BaseNoiseAdder): 32 | def __init__(self, params : DisparityNoiseParams): 33 | self._params = params 34 | def add_noise(self, depth : np.array) -> np.array: 35 | torch_depth = torch.from_numpy(depth).unsqueeze(0).unsqueeze(0) 36 | #scale = 1.0 if self._params.disparity_noise_params.distance_unit == DistanceUnit.Meters else 0.001 # disparity noise model requires input in meters 37 | scale = self._params.depth_pre_scale_factor 38 | noisy_depth_torch , _ = noise.disparity_noise(scale * torch_depth,self._params.sigma_depth, \ 39 | self._params.sigma_space, self._params.mean_space) 40 | noisy_depth = torch.squeeze(1/scale * noisy_depth_torch).data.numpy() 41 | return noisy_depth 42 | 43 | class TofNoiseAdder(BaseNoiseAdder): 44 | def __init__(self, params : TofNoiseParams): 45 | self._params = params 46 | def add_noise(self,depth : np.array) -> np.array: 47 | torch_depth = torch.from_numpy(depth).unsqueeze(0).unsqueeze(0) 48 | noisy_depth = torch.squeeze(noise.tof_noise(torch_depth,self._params.sigma_fraction)).data.numpy() 49 | return noisy_depth 50 | 51 | 52 | class HoleNoiseParams: 53 | def __init__(self, min_radius : int, max_radius: int, min_hole_count : int , max_hole_count : int, rnd_seed : int = 4567): 54 | ''' 55 | min_radius: minimum radius in pixels 56 | max_radius: max radius in pixels 57 | min_hole_count : min number of holes to create 58 | max_hole_count : max number of holes to create 59 | ''' 60 | self.min_radius = min_radius 61 | self.max_radius = max_radius 62 | self.min_hole_count = min_hole_count 63 | self.max_hole_count = max_hole_count 64 | self.rnd_seed = rnd_seed 65 | 66 | 67 | class HoleNoiseAdder(BaseNoiseAdder): 68 | def __init__(self, params : HoleNoiseParams) : 69 | self._params = params 70 | self._rnd_gen = np.random.RandomState(self._params.rnd_seed) 71 | 72 | def add_noise(self, depth : np.array) -> np.array: 73 | 74 | height = depth.shape[0] 75 | width = depth.shape[1] 76 | 77 | mask = depth != 0 78 | #loc = [(y,x) for x in range(width) for y in range(height) if mask[y,x] != 0] 79 | loc = np.concatenate((np.expand_dims(np.nonzero(mask)[0], axis = 1),np.expand_dims(np.nonzero(mask)[1], axis = 1)),axis = 1).tolist() 80 | 81 | noisy_depth = depth.copy() 82 | 83 | if(len(loc) == 0): 84 | return noisy_depth 85 | 86 | hcount = self._rnd_gen.randint(low = self._params.min_hole_count, high = self._params.max_hole_count) 87 | 88 | hole_center_ind = self._rnd_gen.randint(low = 0, high = len(loc), size = hcount) 89 | 90 | for idx in range(len(hole_center_ind)): 91 | y, x = loc[hole_center_ind[idx]] 92 | radius = self._rnd_gen.randint(low = self._params.min_radius, high = self._params.max_radius) 93 | 94 | noisy_depth = cv2.circle(noisy_depth , tuple(loc[hole_center_ind[idx]]), radius, color = 0.0 , thickness=cv2.FILLED,lineType=8) 95 | 96 | return noisy_depth 97 | 98 | class BorderNoiseParams: 99 | 100 | def __init__(self, border_width : int = 3, iterations: int = 1): 101 | self.border_width = border_width 102 | self.iterations = iterations 103 | 104 | class BorderErodeNoiseAdder(BaseNoiseAdder): 105 | 106 | def __init__(self, params : BorderNoiseParams): 107 | self._params = params 108 | 109 | def add_noise(self, depth : np.array) -> np.array: 110 | 111 | # Taking a matrix of a kernel 112 | kernel = np.ones((self._params.border_width,self._params.border_width), np.float32) 113 | 114 | mask = np.float32(depth != 0.0) 115 | 116 | eroded_mask = cv2.erode(mask,kernel,iterations = self._params.iterations) 117 | 118 | noisy_depth = depth.copy() 119 | noisy_depth[mask != eroded_mask] = 0.0 120 | 121 | return noisy_depth 122 | 123 | class BorderDilateNoiseAdder(BaseNoiseAdder): 124 | 125 | def __init__(self, params : BorderNoiseParams): 126 | self._params = params 127 | 128 | def add_noise(self, depth : np.array) -> np.array: 129 | # Taking a matrix of a kernel 130 | kernel = np.ones((self._params.border_width,self._params.border_width), np.float32) 131 | 132 | mask = np.float32(depth != 0.0) 133 | 134 | dilated_mask = cv2.dilate(mask,kernel,iterations = self._params.iterations) 135 | 136 | noisy_depth = depth.copy() 137 | noisy_depth[mask != dilated_mask] = 0.0 138 | 139 | return noisy_depth 140 | -------------------------------------------------------------------------------- /src/dataset/real_dataloader.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import torch 4 | import numpy 5 | from torch.utils.data.dataset import Dataset 6 | from torch.utils.data import DataLoader 7 | import warnings 8 | import json 9 | import cv2 10 | #testing 11 | # sys.path.append('E:\\Projects\\vsc\\deep_depth_denoising\\denoise') 12 | # import importers 13 | 14 | def load_intrinsics_repository(filename): 15 | #global intrinsics_dict 16 | with open(filename, 'r') as json_file: 17 | intrinsics_repository = json.load(json_file) 18 | intrinsics_dict = dict((intrinsics['Device'], \ 19 | intrinsics['Depth Intrinsics'][0]['1280x720'])\ 20 | for intrinsics in intrinsics_repository) 21 | return intrinsics_dict 22 | 23 | def get_intrinsics(name, intrinsics_dict, scale=1, data_type=torch.float32): 24 | #global intrinsics_dict 25 | if intrinsics_dict is not None: 26 | intrinsics_data = numpy.array(intrinsics_dict[name]) 27 | intrinsics = torch.tensor(intrinsics_data).reshape(3, 3).type(data_type) 28 | intrinsics[0, 0] = intrinsics[0, 0] / scale 29 | intrinsics[0, 2] = intrinsics[0, 2] / scale 30 | intrinsics[1, 1] = intrinsics[1, 1] / scale 31 | intrinsics[1, 2] = intrinsics[1, 2] / scale 32 | intrinsics_inv = intrinsics.inverse() 33 | return intrinsics, intrinsics_inv 34 | raise ValueError("Intrinsics repository is empty") 35 | 36 | 37 | ''' 38 | Dataset importer. We assume that data follows the below structure. 39 | root_path 40 | device_repository.json 41 | | 42 | |-----recording_i 43 | | |-----Data 44 | | |-----Calibration 45 | | 46 | |-----recording_i+1 47 | | |-----Data 48 | | |-----Calibration 49 | | 50 | ''' 51 | 52 | class DataLoaderParams: 53 | def __init__(self 54 | ,root_path 55 | ,device_list 56 | ,decimation_scale = 2 57 | ,device_repository_path = "." 58 | ,depth_scale = 0.001 59 | ,depth_threshold = 5): 60 | self.root_path = root_path 61 | self.device_list = device_list 62 | self.device_repository_path = device_repository_path 63 | self.depth_scale = depth_scale 64 | self.decimation_scale = decimation_scale 65 | self.depth_threshold = depth_threshold 66 | 67 | class DataLoad(Dataset): 68 | def __init__(self, params): 69 | super(DataLoad,self).__init__() 70 | self.params = params 71 | 72 | device_repo_path = os.path.join(self.params.device_repository_path,"device_repository.json") 73 | if not os.path.exists(device_repo_path): 74 | raise ValueError("{} does not exist, exiting.".format(device_repo_path)) 75 | self.device_repository = load_intrinsics_repository(device_repo_path) 76 | 77 | root_path = self.params.root_path 78 | 79 | 80 | if not os.path.exists(root_path): 81 | #TODO maybe log? 82 | raise ValueError("{} does not exist, exiting.".format(root_path)) 83 | 84 | self.data = {} 85 | 86 | #Iterate over each recorded folder 87 | for recording in os.listdir(root_path): 88 | abs_recording_path = os.path.join(root_path,recording) 89 | if not os.path.isdir(abs_recording_path): 90 | continue 91 | #Path where data supposed to be stored 92 | data_path = os.path.join(abs_recording_path,"Data") 93 | 94 | # if not os.path.exists(data_path): 95 | # warnings.warn("Folder {} does not containt \"Data\" folder".format(abs_recording_path)) 96 | # continue 97 | 98 | #Path to the calibration of that particular recording 99 | # calibration_path = os.path.join(abs_recording_path,"Calibration") 100 | # if not os.path.exists(calibration_path): 101 | # warnings.warn("Folder {} does not containt \"Calibration\" folder".format(calibration_path)) 102 | # continue 103 | 104 | #Data iteration 105 | 106 | for file in os.listdir(data_path): 107 | full_filename = os.path.join(data_path,file) 108 | 109 | _, ext = os.path.splitext(full_filename) 110 | if ext != ".png" and ext != ".pgm": 111 | continue 112 | 113 | _id,_name,_type,_ = file.split("_") 114 | unique_name = recording + "-" + str(_id) 115 | 116 | #skip names that we do not want to load 117 | if _name not in self.params.device_list: 118 | continue 119 | 120 | if unique_name not in self.data: 121 | self.data[unique_name] = {} 122 | #self.data[unique_name]["calibration"] = calibration_path 123 | 124 | if _name not in self.data[unique_name]: 125 | self.data[unique_name][_name] = {} 126 | 127 | self.data[unique_name][_name][_type] = full_filename 128 | print("Data loading completed.") 129 | 130 | 131 | def __len__(self): 132 | return len(self.data) 133 | 134 | def __getitem__(self, idx): 135 | #get an entry 136 | key = list(self.data.keys())[idx] 137 | datum = self.data[key] 138 | 139 | datum_out = {} 140 | for device in self.params.device_list: 141 | _, depth_ext = os.path.splitext(datum[device]["depth"]) 142 | depth_scale = 0.001 if depth_ext == ".png" else 0.0001 143 | #color_img = importers.image.load_image(datum[device]["color"]) 144 | depth = torch.from_numpy(numpy.array(cv2.imread(datum[device]["depth"], cv2.IMREAD_ANYDEPTH)).astype(numpy.float32)).unsqueeze(0).unsqueeze(0) * depth_scale 145 | depth_range_mask = (depth < self.params.depth_threshold).float() 146 | #depth_img = importers.image.load_depth(datum[device]["depth"], scale=depth_scale) * depth_range_mask 147 | depth_img = depth * depth_range_mask 148 | intrinsics, intrinsics_inv = get_intrinsics(\ 149 | device, self.device_repository, self.params.decimation_scale) 150 | # extrinsics, extrinsics_inv = importers.extrinsics.load_extrinsics(\ 151 | # os.path.join(datum["calibration"], device + ".extrinsics")) 152 | 153 | datum_out.update({ 154 | #"color" : color_img.squeeze(0), 155 | "depth" : depth_img.squeeze(0), 156 | "intrinsics" : intrinsics.float(), 157 | "intrinsics_original" : torch.zeros((4)), 158 | "normals" : torch.zeros((3,depth.shape[2],depth.shape[3])).float(), 159 | "labels" : torch.zeros_like(depth.squeeze(0)).type(torch.uint8), 160 | "color" : torch.zeros((4,depth.shape[2],depth.shape[3])).type(torch.uint8), 161 | "camera_resolution" : (-1.0,-1.0), 162 | "camera_pose" : torch.zeros((4,4)).float(), 163 | #"intrinsics_inv" : intrinsics_inv, 164 | #"extrinsics" : extrinsics, 165 | #"extrinsics_inv" : extrinsics_inv, 166 | "type": "real" 167 | }) 168 | 169 | return datum_out 170 | 171 | def get_data(self): 172 | return self.data -------------------------------------------------------------------------------- /src/dataset/rendering/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VCL3D/StructureNet/d23e13d24fb18861d84371c651a510f8a83d78a0/src/dataset/rendering/__init__.py -------------------------------------------------------------------------------- /src/dataset/rendering/box_renderer.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import trimesh 3 | import pyrender 4 | from mgen import rotation_around_axis 5 | from ...io import box_model_loader 6 | from .transformations import * 7 | from enum import Flag, auto 8 | 9 | class BoxRenderFlags (Flag): 10 | LABEL_UP_AS_BACKGROUND = auto() 11 | LABEL_DOWN_AS_BACKGROUND = auto() 12 | LABEL_TOP_AND_BOTTOM_AS_BACKGROUND = LABEL_UP_AS_BACKGROUND | LABEL_DOWN_AS_BACKGROUND 13 | 14 | class BoxRendererParams: 15 | 16 | def __init__(self, depth_zmin : float = 0.2, depth_zmax : float = 10.0, render_flags : BoxRenderFlags = None): 17 | 18 | self.depth_zmin = depth_zmin 19 | self.depth_zmax = depth_zmax 20 | self.render_flags = render_flags 21 | 22 | class BoxRenderer(object): 23 | 24 | def __init__(self, box_model_obj_path : str = './data/asymmetric_box.obj', box_scale : float = 1.0, camera_transform : np.array = np.array([ \ 25 | [1.0, 0.0, 0.0, 0.0], 26 | [0.0, -1.0, 0.0, 0.0], 27 | [0.0, 0.0,-1.0, 0.0], 28 | [0.0, 0.0, 0.0, 1.0] 29 | ]), 30 | box_load_flags : box_model_loader.BoxLoadFlags = box_model_loader.BoxLoadFlags.LOAD_ALL): 31 | """The units of the model are in millimeters. Use scale to adjust 32 | camera_transform : numpy 4x4 transformation matrix to apply inorder to align opengl camera space to camera space of the given camera poses 33 | normally for typical camera models where y is down and origin at top left this camera matrix should be 34 | camera_transform = np.array([1.0,0.0,0.0,0.0], 35 | [0.0,-1.0,0.0,0.0], 36 | [0.0,0.0,-1.0,0.0], 37 | [0.0,0.0,0.0,1.0]) 38 | """ 39 | 40 | self._camera_transform = camera_transform 41 | self._box_model_obj_path = box_model_obj_path 42 | self._box_load_flags = box_load_flags 43 | self._background_color = [0.5,0.5,0.5,0.0] 44 | self._canvas_width = 320 45 | self._canvas_height = 180 46 | self._box_scale = box_scale 47 | self._initialize_offscreen_renderer = True 48 | self._create() 49 | 50 | def _generate_mesh(self, camera_pose, render_flags : BoxRenderFlags = None): 51 | 52 | positions, indices = self._box_geometry 53 | colors = BoxRenderer._generate_labels_normals_colormap(box_model = self._box_model, camera_pose = camera_pose, 54 | camera_transform = self._camera_transform, box_pose = self._box_model_pose, render_flags = render_flags) 55 | 56 | prim = pyrender.Primitive(positions = positions, indices = indices, mode = 4, color_0 = colors) 57 | mesh = pyrender.Mesh([prim]) 58 | self._box_mesh = mesh 59 | 60 | @staticmethod 61 | def _generate_geometry(box_model): 62 | positions = np.reshape(box_model["vertices"],(-1,3)) 63 | indices = np.reshape(box_model["indices"],(-1,3)) 64 | return positions, indices 65 | 66 | @staticmethod 67 | def _generate_labels_normals_colormap(box_model, camera_pose : np.array, camera_transform : np.array, box_pose : np.array, render_flags : BoxRenderFlags): 68 | """ 69 | box_model: box_model object created by box_model_loader 70 | camera_pose: the camera pose 4x4 numpy array. 71 | box_pose: box object pose in global space 4x4 numpy array 72 | This function will generate colors with r,g,b color coding the normal in camera coordinate system (not global) 73 | and alpha containing the label id of the box's side 74 | """ 75 | 76 | def _label_as_background(side_name : str) -> bool: 77 | if (render_flags == None): 78 | return False 79 | elif (render_flags & BoxRenderFlags.LABEL_DOWN_AS_BACKGROUND) and ("_down_" in side_name): 80 | return True 81 | elif (render_flags & BoxRenderFlags.LABEL_UP_AS_BACKGROUND) and ("_up_" in side_name): 82 | return True 83 | 84 | return False 85 | 86 | normals = np.reshape(box_model["normals"],(-1,3)) 87 | colors = np.zeros((int(len(box_model["vertices"])/3),4)) 88 | label_count = len(box_model["side_names"]) 89 | 90 | final_camera_pose = camera_pose @ camera_transform 91 | 92 | inv_final_camera_pose = np.linalg.inv(final_camera_pose) 93 | next_label_id = 1 94 | background_label = 0 95 | for i in range(label_count): 96 | if not _label_as_background(box_model["side_names"][i]): 97 | label_id = next_label_id / 255.0 98 | next_label_id += 1 99 | else: 100 | label_id = background_label / 255.0 101 | 102 | for j in range(4): # for the 4 vertices of each side 103 | normal_obj = np.concatenate((normals[i*4+j,:],0.0),axis=None) 104 | normal_global = np.dot(box_pose,normal_obj) 105 | eye_norm = np.dot(inv_final_camera_pose,normal_global) 106 | eye_norm = eye_norm / np.linalg.norm(eye_norm) 107 | eye_norm = (eye_norm + 1.0) / 2.0 # normals from (-1.0 1.0) -> (0.0, 1.0) 108 | eye_norm = eye_norm ** 2.2 # undo gamma correction 109 | semantic = np.concatenate((eye_norm[:3],label_id),axis=None) 110 | colors[i*4+j,:] = semantic 111 | 112 | return colors 113 | 114 | def _generate_scene(self): 115 | scene = pyrender.Scene() 116 | scene.ambient_light = [1.0,1.0,1.0] 117 | self._scene = scene 118 | 119 | def _generate_model_pose(self): 120 | 121 | box_model_pose = np.zeros((4,4)) 122 | box_model_pose[:3,:3] = rotation_around_axis([1,0,0],-np.pi/2) 123 | box_model_pose[3,3] = 1.0 124 | 125 | box_model_pose = np.dot(box_model_pose,scale_matrix(self._box_scale)) 126 | self._box_model_pose = box_model_pose 127 | 128 | def _add_box_mesh_to_scene(self): 129 | 130 | if(len(self._scene.mesh_nodes)>0): 131 | self._scene.remove_node(next(iter(self._scene.mesh_nodes))) 132 | 133 | self._scene.add(self._box_mesh, pose = self._box_model_pose) 134 | 135 | def _create(self): 136 | self._box_model = box_model_loader.load_box_model(self._box_model_obj_path, flags = self._box_load_flags) 137 | self._box_geometry = BoxRenderer._generate_geometry(self._box_model) 138 | self._generate_scene() 139 | self._generate_model_pose() 140 | 141 | @property 142 | def canvas_width(self) -> int: 143 | return self._canvas_width 144 | 145 | @canvas_width.setter 146 | def canvas_width(self, value : int): 147 | value = int(value) 148 | if(self._canvas_width != value): 149 | self._canvas_width = value 150 | self._initialize_offscreen_renderer = True 151 | 152 | @property 153 | def canvas_height(self) -> int: 154 | return self._canvas_height 155 | 156 | @canvas_height.setter 157 | def canvas_height(self,value : int): 158 | value = int(value) 159 | if(self._canvas_height != value): 160 | self._canvas_height = value 161 | self._initialize_offscreen_renderer = True 162 | 163 | @property 164 | def background_color(self): 165 | return self._background_color 166 | 167 | @background_color.setter 168 | def background_color(self, value): 169 | assert(len(value) >= 3 and len(value)<=4) 170 | self._background_color = value 171 | 172 | def render(self, camera_pose : np.array, camera_intrinsics : np.array , znear : float = 1.0, zfar : float= 100.0, render_flags : BoxRenderFlags = None): 173 | ''' 174 | camera_pose: numpy 4x4 array of camera pose in global coordinate system 175 | camera_intrinsics: [fx, fy, cx, cy]: list of 4 floating point values for camera intrinsics (fx,fy,cx,cy in pixels) 176 | znear: near clipping plane - not relevant to intrinsics - z near defines the clipping of the depth values 177 | zfar: far clipping plane - not relevant to intrinsics - z far defines the clipping of the depth values 178 | ''' 179 | 180 | if(self._initialize_offscreen_renderer): 181 | self._renderer = pyrender.OffscreenRenderer(self._canvas_width, self._canvas_height) 182 | self._initialize_offscreen_renderer = False 183 | 184 | if(len(self._scene.camera_nodes)>0): 185 | self._scene.remove_node(next(iter(self._scene.camera_nodes))) 186 | 187 | camera = pyrender.IntrinsicsCamera(fx = camera_intrinsics[0], fy = camera_intrinsics[1], cx = camera_intrinsics[2], cy = camera_intrinsics[3], \ 188 | znear = znear, zfar = zfar) 189 | 190 | final_camera_pose = np.dot(camera_pose,self._camera_transform) 191 | self._scene.bg_color = self._background_color 192 | self._scene.add(camera, pose = final_camera_pose) 193 | self._generate_mesh(camera_pose, render_flags) 194 | self._add_box_mesh_to_scene() 195 | color, depth = self._renderer.render(self._scene, flags = pyrender.RenderFlags.DISABLE_MULTISAMPLING | pyrender.RenderFlags.RGBA) 196 | 197 | # undo normal color encoding 198 | normals = (2.0*color[:,:,:3])/255.0 - 1 199 | labels = color[:,:,3] 200 | 201 | # convert normals to camera coordinate system 202 | inv_camera_transform = np.linalg.inv(self._camera_transform) 203 | inv_camera_rot = inv_camera_transform[:3,:3] 204 | trans_normals = np.dot(inv_camera_rot, normals.reshape((-1,3)).T) 205 | normals_reshaped = np.reshape(trans_normals.T,normals.shape) 206 | return color, depth, normals_reshaped, labels 207 | -------------------------------------------------------------------------------- /src/dataset/round_robin_multidataset.py: -------------------------------------------------------------------------------- 1 | from torch.utils.data.dataset import Dataset 2 | import numpy as np 3 | 4 | class RoundRobinMultiDataset(Dataset): 5 | 6 | def __init__(self, dataset_collection : list): 7 | self._dataset_list = dataset_collection[:] # slicing creates shallow copy 8 | 9 | dataset_lengths = list(map(lambda x: len(x), self._dataset_list)) 10 | self._total_dataset_length = sum(dataset_lengths, 0) 11 | 12 | dataset_count = len(dataset_collection) 13 | self._indexmap = [] # indexmap -> tuple (dataset_id, index_id) 14 | dataset_indices = np.zeros(len(self._dataset_list), dtype = int) 15 | next_dataset_id = 0 16 | for _ in range(self._total_dataset_length): 17 | assigned = False 18 | while not assigned: 19 | dsindex = dataset_indices[next_dataset_id] 20 | if(dsindex < dataset_lengths[next_dataset_id]): 21 | self._indexmap.append((next_dataset_id, dsindex)) 22 | dataset_indices[next_dataset_id] += 1 23 | assigned = True 24 | next_dataset_id += 1 25 | next_dataset_id %= dataset_count 26 | 27 | def __len__(self): 28 | return self._total_dataset_length 29 | 30 | def __getitem__(self,idx): 31 | index = self._indexmap[idx] 32 | return self._dataset_list[index[0]][index[1]] 33 | 34 | 35 | -------------------------------------------------------------------------------- /src/dataset/samplers/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VCL3D/StructureNet/d23e13d24fb18861d84371c651a510f8a83d78a0/src/dataset/samplers/__init__.py -------------------------------------------------------------------------------- /src/dataset/samplers/background/__init__.py: -------------------------------------------------------------------------------- 1 | from .image_background_sampler import * 2 | from .noisy_background_generator import * -------------------------------------------------------------------------------- /src/dataset/samplers/background/image_background_sampler.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import numpy as np 3 | import os 4 | from ..base_sampler import BaseSampler 5 | 6 | class ImageBackgroundSamplerParams: 7 | def __init__(self, path_to_dataset : str, scale : float, rnd_seed : int = 6677): 8 | ''' 9 | scale: value to scale loaded background (to adjust for metersmillimeters) 10 | ''' 11 | self.path_to_dataset = path_to_dataset 12 | self.rnd_seed = rnd_seed 13 | self.scale = scale 14 | 15 | class ImageBackgroundSampler(BaseSampler): 16 | 17 | def __init__(self, params : ImageBackgroundSamplerParams): 18 | super().__init__() 19 | self._params = params 20 | self._rnd_gen = np.random.RandomState(self._params.rnd_seed) 21 | self._get_filelist() 22 | 23 | 24 | def _get_filelist(self): 25 | self._filepaths = list(map(lambda x: os.path.join(self._params.path_to_dataset,x),os.listdir(self._params.path_to_dataset))) 26 | 27 | def sample(self): 28 | 29 | bgcount = len(self._filepaths) 30 | index = self._rnd_gen.randint(0,bgcount) 31 | 32 | fname = self._filepaths[index] 33 | img = cv2.imread(fname,cv2.IMREAD_ANYDEPTH).astype(np.float32) * self._params.scale 34 | return img 35 | -------------------------------------------------------------------------------- /src/dataset/samplers/background/noisy_background_generator.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from ..base_sampler import BaseSampler 3 | 4 | class UniformNoisyBackgroundGeneratorParams: 5 | def __init__(self, width : int, height : int, depth_min : float = 0.5, depth_max : float = 8.0, rnd_seed : int = 1234): 6 | self.width = width 7 | self.height = height 8 | self.depth_min = depth_min 9 | self.depth_max = depth_max 10 | self.rnd_seed = rnd_seed 11 | 12 | class UniformNoisyBackgroundGenerator(BaseSampler): 13 | def __init__(self,params : UniformNoisyBackgroundGeneratorParams): 14 | super().__init__() 15 | self._params = params 16 | self._rng = np.random.RandomState(self._params.rnd_seed) 17 | 18 | def sample(self): 19 | noisy_bg = self._rng.uniform(self._params.depth_min, self._params.depth_max,(self._params.height, self._params.width)) 20 | return noisy_bg 21 | 22 | 23 | class GaussianNoisyBackgroundGeneratorParams: 24 | def __init__(self, width : int, height : int, depth_mean : float = 3.5, depth_std : float = 1.5, rnd_seed = 1234): 25 | self.width = width 26 | self.height = height 27 | self.depth_mean = depth_mean 28 | self.depth_std = depth_std 29 | self.rnd_seed = rnd_seed 30 | 31 | class GaussianNoisyBackgroundGenerator(BaseSampler): 32 | def __init__(self, params : GaussianNoisyBackgroundGeneratorParams): 33 | super().__init__() 34 | self._params = params 35 | self._rng = np.random.RandomState(self._params.rnd_seed) 36 | 37 | def sample(self): 38 | noisy_bg = self._params.depth_std * self._rng.randn(self._params.height,self._params.width) + self._params.depth_mean 39 | np.clip(noisy_bg, 0.0, np.Inf, out = noisy_bg) 40 | return noisy_bg 41 | -------------------------------------------------------------------------------- /src/dataset/samplers/base_sampler.py: -------------------------------------------------------------------------------- 1 | from abc import ABC, abstractclassmethod 2 | 3 | class BaseSampler(ABC): 4 | 5 | def __init__(self): 6 | super().__init__() 7 | 8 | @abstractclassmethod 9 | def sample(self): 10 | pass 11 | -------------------------------------------------------------------------------- /src/dataset/samplers/intrinsics_generator.py: -------------------------------------------------------------------------------- 1 | import numpy 2 | import json 3 | from .base_sampler import BaseSampler 4 | 5 | class IntrinsicsGeneratorParams(object): 6 | 7 | def __init__(self, width : int, height : int, rnd_seed : int): 8 | self.width = width 9 | self.height = height 10 | self.rnd_seed = rnd_seed 11 | 12 | class IntrinsicsGenerator(BaseSampler): 13 | 14 | def __init__(self, params : IntrinsicsGeneratorParams): 15 | super().__init__() 16 | self._params = params 17 | self._rnd_gen = numpy.random.RandomState(self._params.rnd_seed) 18 | 19 | # tuple((width,height)): [[fx fy cx cy], [fx fy cx cy], ...] 20 | self._device_intrinsics = { 21 | (1280,720) :[ 22 | [943.5726318359375, 943.5726318359375, 636.60302734375, 352.9541015625], # RS2 Intrinsics 23 | [939.235107421875, 939.235107421875, 639.2382202148438, 350.4108581542969] # RS2 Intrinsics 24 | ], 25 | (640,480) :[ 26 | [629.0484008789063,629.0484008789063,317.7353515625,235.302734375], # RS2 Intrinsics 27 | [626.15673828125,626.15673828125, 319.4921569824219, 233.60723876953126], # RS2 Intrinsics 28 | [582.023188, 585.883685, 314.722819, 224.157081], # K1 Intrinsics 29 | [581.810914, 580.285359, 314.055143, 231.700159], # K1 Intrinsics 30 | [592.417057, 576.458251, 326.514575, 243.213944], # K1 Intrinsics 31 | [578.750057, 584.497763, 325.442541, 237.415025], # K1 Intrinsics 32 | [580.203486, 585.823696, 330.492915, 221.879735], # K1 Intrinsics 33 | [596.025924, 592.786521, 333.317519, 248.456780], # K1 Intrinsics 34 | [587.712380, 581.384658, 328.100841, 231.595926] # K1 Intrinsics 35 | ], 36 | (512,424) : [ 37 | [366.7136, 366.7136, 256.5948, 207.1343], #K2 Intrinsics 38 | [367.4368, 367.4368, 260.8115, 205.1943], #K2 Intrinsics 39 | [364.3239, 364.3239, 258.5376, 203.6222], #K2 Intrinsics 40 | [365.2731, 365.2731, 255.1621, 208.3562] #K2 Intrinsics 41 | ], 42 | (320,288) : [ 43 | [252.3858, 252.4081, 163.4171, 165.4822], 44 | [252.3462, 252.3647, 157.0585, 166.1083], 45 | [252.2103, 252.2250, 167.5221, 170.2071], 46 | [251.9636, 251.9164, 163.3273, 166.7225], 47 | [251.8373, 251.7830, 166.1493, 171.8638], 48 | [252.5108, 252.5648, 163.9615, 170.0882] 49 | ] 50 | } 51 | 52 | width = self._params.width 53 | height = self._params.height 54 | 55 | if not (width, height) in self._device_intrinsics: 56 | new_intrinsics_list = [] 57 | for reso in self._device_intrinsics.keys(): 58 | 59 | if (reso[0] % width) == 0 and (reso[1] % height) == 0: 60 | downscale_factor_x = reso[0] / width 61 | downscale_factor_y = reso[1] / height 62 | 63 | for intr in self._device_intrinsics[reso]: 64 | new_intrinsics = [intr[0] / downscale_factor_x, intr[1] / downscale_factor_y, intr[2] / downscale_factor_x, intr[3]/downscale_factor_y] 65 | new_intrinsics_list.append(new_intrinsics) 66 | 67 | if len(new_intrinsics_list) > 0: 68 | self._device_intrinsics[(width,height)] = new_intrinsics_list 69 | else: 70 | raise Exception('invalid intrinsics request. no suitable device intrinsics are available for this resolution') 71 | 72 | 73 | def sample(self) -> list: 74 | 75 | reso = (self._params.width, self._params.height) 76 | intr_list = self._device_intrinsics[reso] 77 | 78 | index = self._rnd_gen.randint(0,len(intr_list)) 79 | 80 | intrinsics = intr_list [index] 81 | return intrinsics 82 | 83 | @property 84 | def width(self) -> int: 85 | return self._params.width 86 | 87 | @property 88 | def height(self) -> int: 89 | return self._params.height 90 | -------------------------------------------------------------------------------- /src/dataset/samplers/pose/__init__.py: -------------------------------------------------------------------------------- 1 | from .pose_sampler import * -------------------------------------------------------------------------------- /src/dataset/samplers/pose/pose_sampler.py: -------------------------------------------------------------------------------- 1 | import numpy 2 | from .pose_sampler_utils import * 3 | from enum import Enum 4 | 5 | class PoseType(Enum): 6 | 7 | HORIZONAL = 1, 8 | VERTICAL_1 = 2, 9 | VERTICAL_2 = 3 10 | 11 | #TODO: rename to PoseSamplerParamsRandom or something like this 12 | class PoseSamplerParams(object): 13 | ''' 14 | Parameters for @PoseSampler class. 15 | Parameters define a camera position and a camera "look-at". 16 | Camera position is represented in cylidrical coordinates, with 17 | @param r in [rmin,rmax] : distance from the center 18 | @param z in [zmin,zmax] : height from z = 0 plane 19 | @param ef in [efmin, efmax] : polar angle (usually is in [0,2*pi)) 20 | @param look_at_radius : if p (x,y,z) is a camera look at,then |x| < r ##TODO: this must be handled more carefully, not in a sphere 21 | ''' 22 | def __init__( self, 23 | num_positions : int, 24 | rmin : float, 25 | rmax : float, 26 | zmin : float, 27 | zmax : float, 28 | look_at_radius : float, 29 | up_vector_variance = 5.0, #degrees 30 | phimin = 0.0, 31 | phimax = 2 * numpy.pi, 32 | pose_type : PoseType = PoseType.HORIZONAL, 33 | random_seed = None 34 | ): 35 | self.num_positions = num_positions 36 | self.rmin = rmin 37 | self.rmax = rmax 38 | self.heightmin = zmin 39 | self.heightmax = zmax 40 | self.phimin = phimin 41 | self.phimax = phimax 42 | self.look_at_radius = look_at_radius 43 | self.up_vector_variance = up_vector_variance 44 | self.pose_type = pose_type 45 | self.random_seed = random_seed 46 | 47 | class PoseSamplerParamsGrid(object): 48 | ''' 49 | Parameters for @PoseSampler class, for grid sampling. 50 | Parameters define a camera position and a camera "look-at". 51 | Camera position is represented in cylidrical coordinates, with 52 | @param r in [rmin,rmax] : distance from the center 53 | @param z in [zmin,zmax] : height from z = 0 plane 54 | @param ef in [efmin, efmax] : polar angle (usually is in [0,2*pi)) 55 | @param look_at_radius : if p (x,y,z) is a camera look at,then |x| < r ##TODO: this must be handled more carefully, not in a sphere 56 | ''' 57 | def __init__( self, 58 | rmin :float, 59 | rmax :float, 60 | dr :float, 61 | zmin :float, 62 | zmax :float, 63 | dz :float, 64 | look_at_radius :float, 65 | dphi :float, 66 | up_vector_variance = 5.0, #degrees 67 | phimin = 0.0, 68 | phimax = 2 * numpy.pi, 69 | pose_type : PoseType = PoseType.HORIZONAL, 70 | random_seed = None, 71 | ): 72 | self.rmin = rmin 73 | self.rmax = rmax 74 | self.dr = dr 75 | self.heightmin = zmin 76 | self.heightmax = zmax 77 | self.dz = dz 78 | self.phimin = phimin 79 | self.phimax = phimax 80 | self.dphi = dphi 81 | self.look_at_radius = look_at_radius 82 | self.up_vector_variance = up_vector_variance 83 | self.pose_type = pose_type 84 | self.random_seed = random_seed 85 | 86 | class PoseSampler(object): 87 | ''' 88 | @PoseSampler class that containts the samples 89 | ''' 90 | def __init__( self, 91 | params : PoseSamplerParams, 92 | ): 93 | self.rng = numpy.random.RandomState(seed = params.random_seed) # fix random seed in order to get data generation consistency 94 | 95 | self.params = params 96 | number_look_at_aug = 1 97 | 98 | up_direction = numpy.array([0.0, 1.0, 0.0]) 99 | 100 | 101 | ### First generate camera positions (or camera centers) 102 | if isinstance(self.params, PoseSamplerParams): 103 | number_of_samples = params.num_positions 104 | _R = self.rng.uniform(params.rmin, params.rmax, number_of_samples) #radius 105 | _phi = self.rng.uniform(params.phimin, params.phimax, number_of_samples) #phi 106 | _y = self.rng.uniform(params.heightmin, params.heightmax, number_of_samples) #z 107 | elif isinstance(self.params, PoseSamplerParamsGrid): #GRID 108 | _R = numpy.arange(params.rmin, params.rmax, params.dr) 109 | _phi = numpy.arange(params.phimin, params.phimax, params.dphi) 110 | _y = numpy.arange(params.heightmin, params.heightmax, params.dz) 111 | 112 | Nr = len(_R) 113 | Nphi = len(_phi) 114 | Ny = len(_y) 115 | 116 | 117 | _phi = numpy.repeat(_phi, Nr * Ny) 118 | _R = numpy.tile(numpy.repeat(_R, Ny), Nphi) 119 | _y = numpy.tile(_y, Nr * Nphi) 120 | number_of_samples = len(_y) 121 | 122 | 123 | _x = numpy.multiply(_R, numpy.cos(_phi)) 124 | _z = numpy.multiply(_R, numpy.sin(_phi)) 125 | 126 | ########## Clean up 127 | del _R 128 | del _phi 129 | 130 | # Create camera positions in global space 131 | # N x 3 132 | _positions = numpy.concatenate( (numpy.expand_dims(_x,axis = 1), 133 | numpy.expand_dims(_y,axis = 1), 134 | numpy.expand_dims(_z,axis = 1)), 135 | axis = 1) ### camera positions 136 | 137 | ######### Clean up 138 | del _x 139 | del _y 140 | del _z 141 | 142 | # Create "look at" targets for every camera position 143 | # number_look_at_aug * N x 3 144 | _look_at_augmentations = \ 145 | generatePointsInCircle( number_look_at_aug, 146 | params.look_at_radius, 147 | _positions, 148 | rng_generator = self.rng) 149 | 150 | _positions = numpy.repeat(_positions,number_look_at_aug,axis=0) 151 | #_augmented_look_ats = _look_at_augmentations - _positions 152 | _augmented_look_ats = _positions - _look_at_augmentations 153 | 154 | 155 | 156 | # Create right handed camera coordinate system 157 | _right_vectors, _up_vectors, _augmented_look_ats = \ 158 | createRightHandCartesian(_augmented_look_ats, up_direction) 159 | 160 | # Random angles for augmentation 161 | thetas = self.rng.uniform( 162 | -numpy.radians(params.up_vector_variance) / 2, 163 | numpy.radians(params.up_vector_variance) / 2, 164 | _up_vectors.shape[0]) 165 | 166 | # Rotate every camera coordinate system along "look at" vector 167 | _up_vectors, _ = rotateVectorAboutAxis(_augmented_look_ats,thetas,_up_vectors) 168 | _right_vectors, _ = rotateVectorAboutAxis(_augmented_look_ats,thetas,_right_vectors) 169 | 170 | ########### Clean up 171 | del thetas 172 | 173 | #rotate about x axis 174 | _up_vectors, _ = rotateVectorAboutAxis(_right_vectors, numpy.asarray([numpy.pi]), _up_vectors) 175 | _augmented_look_ats, _ = rotateVectorAboutAxis(_right_vectors, numpy.asarray([numpy.pi]), _augmented_look_ats) 176 | 177 | if self.params.pose_type == PoseType.VERTICAL_1: 178 | _up_vectors, _ = rotateVectorAboutAxis(_augmented_look_ats, numpy.asarray([-numpy.pi/2]), _up_vectors) 179 | _right_vectors, _ = rotateVectorAboutAxis(_augmented_look_ats, numpy.asarray([-numpy.pi/2]), _right_vectors) 180 | elif self.params.pose_type == PoseType.VERTICAL_2: 181 | _up_vectors, _ = rotateVectorAboutAxis(_augmented_look_ats, numpy.asarray([numpy.pi/2]), _up_vectors) 182 | _right_vectors, _ = rotateVectorAboutAxis(_augmented_look_ats, numpy.asarray([numpy.pi/2]), _right_vectors) 183 | 184 | 185 | _rotation_matrices = numpy.concatenate((numpy.expand_dims(_right_vectors,2), 186 | numpy.expand_dims(_up_vectors,2), 187 | numpy.expand_dims(_augmented_look_ats,2)),axis=2) 188 | 189 | self.transformations = numpy.zeros((number_of_samples,4,4)) 190 | self.transformations[:,:3,:3] = _rotation_matrices 191 | self.transformations[:,:3,3] = _positions 192 | self.transformations[:,3,:] = numpy.repeat(numpy.expand_dims(numpy.array([0.0,0.0,0.0,1.0]),0),number_of_samples ,axis = 0) 193 | -------------------------------------------------------------------------------- /src/dataset/samplers/pose/pose_sampler_utils.py: -------------------------------------------------------------------------------- 1 | import numpy 2 | import torch ### in order to make batched operations 3 | 4 | 5 | ''' 6 | Computes camera transformation for a point 7 | @param camera_look_at : look at positions of a camera Nx3 8 | @param camera_up_vector : camera up vector Nx3 9 | @param camera_position : position of the camera Nx3 10 | returns: rotation matrices Nx3x3 11 | http://ksimek.github.io/2012/08/22/extrinsic/ "Look at camera" 12 | ''' 13 | def computeRotation( 14 | camera_look_at_position : numpy.array, ### N x 3 15 | camera_up_vector : numpy.array, ### N x 3 16 | camera_position : numpy.array ### N x 3 17 | ): 18 | return_matrices = numpy.zeros((camera_position.shape[0],3,3)) 19 | L = torch.from_numpy(camera_look_at_position) - torch.from_numpy(camera_position) 20 | L = torch.nn.functional.normalize(L) 21 | 22 | s = torch.cross(L, torch.from_numpy(camera_up_vector), dim = 1) 23 | s = torch.nn.functional.normalize(s) 24 | 25 | udot = torch.cross(s,L, dim = 1) 26 | 27 | return_matrices[:,0,:] = s 28 | return_matrices[:,1,:] = udot 29 | return_matrices[:,2,:] = -L 30 | 31 | return return_matrices 32 | 33 | ''' 34 | Computed the perpendicular plane for a vector as v = b - a, 35 | which passes through point a. 36 | @param a : point one Nx3 37 | @param b : point two Nx3 38 | returns: perpendicular planes Nx4 39 | planes are defined as ax + by + cz + d = 0 40 | ''' 41 | def computePerpendicularPlane( 42 | pointA : numpy.array, ### Nx3 43 | pointB : numpy.array ### Nx3 44 | ): 45 | torchA = torch.from_numpy(pointA) 46 | vectors = torch.from_numpy(pointB) - torchA #vectors abc's 47 | d = torch.bmm(vectors.unsqueeze(-1).view(-1,1,3), 48 | - torchA.unsqueeze(-1).view(-1,3,1)).squeeze(-1) 49 | return torch.cat([vectors,d], dim = 1).numpy() 50 | 51 | ''' 52 | Given a vector, find 2 random perpendicular vectors. 53 | @param vector : input vector 54 | returns x,y : perpendicular vectors 55 | ''' 56 | def computePerpendicularVectors( 57 | vector : numpy.array ### Nx3 58 | ): 59 | vectorTorch = torch.nn.functional.normalize(torch.from_numpy(vector)).float() 60 | x = torch.randn((vectorTorch.shape)) # Same shape as input 61 | x -= torch.bmm(x.unsqueeze(-1).view(-1,1,3), 62 | vectorTorch.unsqueeze(-1).view(-1,3,1)).squeeze(-1) * vectorTorch 63 | 64 | y = torch.cross(vectorTorch, x , dim = 1) 65 | 66 | return torch.nn.functional.normalize(x).numpy(),torch.nn.functional.normalize(y).numpy() 67 | 68 | ''' 69 | Given a random vector v, produce N points that lay in 70 | a cirle of radius R perpendicular to v 71 | @param N : number of points to produce 72 | @param R : radius of circle 73 | @param V : perpendicular vector as discribed 74 | returns : a set of N*M x 3 points 75 | ''' 76 | def generatePointsInCircle( 77 | N : int, 78 | R : float, 79 | vector : numpy.array, ### Mx3 80 | rng_generator = numpy.random 81 | ): 82 | R = float(R) 83 | M = vector.shape[0] 84 | pX, pY = computePerpendicularVectors(vector) 85 | points = numpy.zeros((M*N,3), dtype = numpy.float) 86 | random_factor = rng_generator.uniform(0,1,(M*N,2)) 87 | random_angle = rng_generator.uniform(0,2*numpy.pi,(M*N)) 88 | for i in range(3): 89 | points[:,i] = \ 90 | R*random_factor[:,0]*numpy.cos(random_angle)*numpy.repeat(pX[:,i],N) + \ 91 | R*random_factor[:,1]*numpy.sin(random_angle)*numpy.repeat(pY[:,i],N) 92 | return points 93 | 94 | 95 | 96 | ''' 97 | Rotate vectors along axis by angle. 98 | ''' 99 | def rotateVectorAboutAxisXYZ( 100 | angle : float, 101 | axis : int, #x,y,z 102 | vectors : numpy.array # N x 3 103 | ): 104 | from numpy import sin, cos 105 | vectorsTorch = torch.from_numpy(vectors).unsqueeze(1) 106 | b, _ , _ = vectorsTorch.shape 107 | if axis == 0: 108 | R = torch.tensor([ 109 | [1,0,0], 110 | [0, cos(angle), -sin(angle)], 111 | [0, sin(angle), cos(angle)] 112 | ]).double() 113 | elif axis == 1: 114 | R = torch.tensor([ 115 | [cos(angle),0,sin(angle)], 116 | [0, 1, 0], 117 | [-sin(angle), 0, cos(angle)] 118 | ]).double() 119 | elif axis == 2: 120 | R = torch.tensor([ 121 | [cos(angle),-sin(angle),0], 122 | [sin(angle), cos(angle), 0], 123 | [0, 0, 1] 124 | ]).double() 125 | 126 | if vectors is not None: 127 | rotated = torch.bmm(vectorsTorch, R.unsqueeze(0).expand(b,3,3)) 128 | return rotated.numpy(), R 129 | else: 130 | return None , R 131 | 132 | def rotateVectorAboutAxis( 133 | axii : numpy.array, 134 | thetas : numpy.array, 135 | vectors : numpy.array # N x 3 136 | ): 137 | #import math 138 | """ 139 | Return the rotation matrix associated with counterclockwise rotation about 140 | the given axis by theta radians. 141 | """ 142 | #axis = np.asarray(axis) 143 | #axis = axis / math.sqrt(np.dot(axis, axis)) 144 | torchaxii = torch.from_numpy(axii) 145 | if len(torchaxii.shape) == 1: 146 | torchaxii = torchaxii.unsqueeze(0) 147 | axii = torch.nn.functional.normalize(torchaxii).numpy() #Nx3 148 | a = numpy.cos(thetas / 2.0) 149 | T = -axii * numpy.repeat(numpy.expand_dims(numpy.sin(thetas / 2.0),1),3,axis=1) 150 | b = T[:,0] 151 | c = T[:,1] 152 | d = T[:,2] 153 | aa, bb, cc, dd = a * a, b * b, c * c, d * d 154 | bc, ad, ac, ab, bd, cd = b * c, a * d, a * c, a * b, b * d, c * d 155 | R = numpy.array([[aa + bb - cc - dd, 2 * (bc + ad), 2 * (bd - ac)], 156 | [2 * (bc - ad), aa + cc - bb - dd, 2 * (cd + ab)], 157 | [2 * (bd + ac), 2 * (cd - ab), aa + dd - bb - cc]]) 158 | 159 | if vectors is not None: 160 | if len(vectors.shape) == 1: 161 | vectors_torch = torch.from_numpy(vectors).unsqueeze(0) 162 | else: 163 | vectors_torch = torch.from_numpy(vectors) 164 | return torch.bmm( 165 | torch.from_numpy(R).permute(2,0,1), 166 | vectors_torch.unsqueeze(-1).type_as(torch.from_numpy(R))).squeeze(-1).numpy(), R 167 | else: 168 | return R 169 | 170 | def createRightHandCartesian( 171 | look_at : numpy.array, # N x 3 172 | up_direction : numpy.array # 3 173 | ): 174 | look_at_n = torch.nn.functional.normalize(torch.from_numpy(look_at)) 175 | n, _ = look_at.shape 176 | right_vector = torch.cross(torch.from_numpy(up_direction).unsqueeze(0).expand(n,3), 177 | look_at_n, dim = 1) 178 | right_vector = torch.nn.functional.normalize(right_vector) 179 | return \ 180 | right_vector.numpy(),\ 181 | torch.nn.functional.normalize(torch.cross(look_at_n, right_vector, dim = 1)).numpy(),\ 182 | look_at_n.numpy() 183 | 184 | 185 | 186 | 187 | -------------------------------------------------------------------------------- /src/dataset/samplers/random_sampler.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from .base_sampler import BaseSampler 3 | 4 | class RandomSampler(BaseSampler): 5 | def __init__(self, data : list, weights : list, rnd_seed = 1234): 6 | super().__init__() 7 | assert(len(data) == len(weights)) 8 | 9 | self._data = data 10 | self._probabilities = np.cumsum(weights) / np.sum(weights) 11 | self._rng = np.random.RandomState(rnd_seed) 12 | 13 | def sample(self): 14 | p = self._rng.uniform() 15 | index = np.min([np.searchsorted(self._probabilities,p,side='right'), len(self._probabilities)-1]) 16 | return self._data[index] 17 | -------------------------------------------------------------------------------- /src/io/__init__.py: -------------------------------------------------------------------------------- 1 | from .box_model_loader import * 2 | from .plywrite import * -------------------------------------------------------------------------------- /src/io/box_model_loader.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import tinyobjloader 3 | from enum import Flag, auto 4 | 5 | class BoxLoadFlags(Flag) : 6 | 7 | LOAD_SIDES = auto() 8 | LOAD_UP = auto() 9 | LOAD_DOWN = auto() 10 | LOAD_ALL = LOAD_SIDES | LOAD_UP | LOAD_DOWN 11 | 12 | ''' 13 | obj loader to load box model 14 | returns dictionary 15 | { 16 | shape_names : [string list] 17 | vertices : 3 coordinates per vertex , 4 vertices per side, 24 sides total, vertices of side i at i*4*3 .. (i*4+3)*3 18 | normals: 4 normals per side, one for each side vertex, normals of side i at i*4*3 .. (i*4+3)*3 19 | indices: 6 indices perside (2 triangles), indices of side i at: 6*i ... 6*i+5 20 | } 21 | the units of the model are in millimeters (mm) 22 | ''' 23 | def load_box_model(path_to_model_obj : str, flags : BoxLoadFlags = BoxLoadFlags.LOAD_ALL): 24 | # Create reader. 25 | reader = tinyobjloader.ObjReader() 26 | 27 | # Load .obj(and .mtl) using default configuration 28 | ret = reader.ParseFromFile(path_to_model_obj) 29 | 30 | if ret == False: 31 | print("Warn:", reader.Warning()) 32 | print("Err:", reader.Error()) 33 | print("Failed to load : ", path_to_model_obj) 34 | 35 | sys.exit(-1) 36 | 37 | if reader.Warning(): 38 | print("Warn:", reader.Warning()) 39 | 40 | attrib = reader.GetAttrib() 41 | shapes = reader.GetShapes() 42 | 43 | def _filter_shape(shape_name : str): 44 | if (flags & BoxLoadFlags.LOAD_DOWN) and ("_down_" in shape_name): 45 | return True 46 | elif (flags & BoxLoadFlags.LOAD_UP) and ("_up_" in shape_name): 47 | return True 48 | elif (flags & BoxLoadFlags.LOAD_SIDES) and (("_front_" in shape_name) or ("_back_" in shape_name) or ("_left_" in shape_name) or ("_right_" in shape_name)): 49 | return True 50 | 51 | return False 52 | 53 | 54 | filtered_shapes = list(filter(lambda x: _filter_shape(x.name),shapes)) 55 | 56 | side_names = [] 57 | vertices = [] 58 | normals = [] 59 | indices = [] 60 | 61 | index_map = dict() 62 | 63 | for shape in filtered_shapes: 64 | side_names.append(shape.name) 65 | for idx in shape.mesh.indices: 66 | 67 | if(idx.vertex_index in index_map): 68 | index = index_map[idx.vertex_index] 69 | else: 70 | newidx = len(index_map) 71 | index_map[idx.vertex_index] = newidx 72 | index = newidx 73 | 74 | # for all coordinates (x,y,z) 75 | for j in range(3): 76 | vertices.append(attrib.vertices[idx.vertex_index*3+j]) 77 | normals.append(attrib.normals[idx.normal_index*3+j]) 78 | 79 | indices.append(index) 80 | 81 | box_model = { 82 | "side_names" : side_names, 83 | "vertices" : vertices, 84 | "normals" : normals, 85 | "indices" : indices, 86 | "index_map" : index_map 87 | } 88 | 89 | return box_model 90 | 91 | 92 | -------------------------------------------------------------------------------- /src/io/calibration_result.py: -------------------------------------------------------------------------------- 1 | import json 2 | import torch 3 | import numpy 4 | import datetime 5 | 6 | class CalibrationResult: 7 | def __init__( 8 | self, 9 | method = "structureNet" 10 | ): 11 | self.data = {} 12 | self.data["Metadata"] = { 13 | "Method" : method, 14 | "Date" : str(datetime.datetime.now()) 15 | } 16 | self.data["Viewpoints"] = [] 17 | 18 | def update( 19 | self, 20 | name :str, 21 | extrinsics :torch.tensor, 22 | intrinsics :torch.tensor, 23 | correspondences :torch.tensor 24 | ): 25 | self.data["Viewpoints"].append({ 26 | "name" : name, 27 | "extrinsics" : extrinsics.squeeze().cpu().numpy().flatten().tolist(), 28 | "intrinsics" : intrinsics.squeeze().cpu().numpy().flatten().tolist() 29 | }) 30 | 31 | def write( 32 | self, 33 | filename : str 34 | ): 35 | if ".json" not in filename: 36 | filename += ".json" 37 | with open(filename, 'w') as outfile: 38 | json.dump(self.data, outfile, indent = 4) 39 | outfile.close() 40 | 41 | def read(self, filename : str) : 42 | 43 | with open(filename,'r') as infile: 44 | self.data = json.load(infile) 45 | -------------------------------------------------------------------------------- /src/io/multidimentional_imsave.py: -------------------------------------------------------------------------------- 1 | import OpenEXR 2 | import Imath 3 | import torch 4 | ######### https://github.com/gabrieleilertsen/hdrcnn/issues/1 5 | 6 | 7 | ''' 8 | Function that saves multidimentional tensor as an EXR image. 9 | dimensions : CxHxW 10 | ''' 11 | def saveTensorEXR( 12 | tensor : torch.tensor, 13 | filename : str 14 | ): 15 | assert len(tensor.shape) == 3, "Tensor should be CxHxW" 16 | 17 | c,h,w = tensor.shape 18 | assert c < 99, "More than 99 channels are not supported" 19 | 20 | header = OpenEXR.Header(w,h) 21 | header['channels'] = dict() 22 | data = dict() 23 | 24 | for i in range(c): 25 | _id = ("0" if (i<10) else "") + str(i) 26 | 27 | header['channels'][_id] = Imath.Channel(Imath.PixelType(OpenEXR.FLOAT)) 28 | data[_id] = tensor[i].detach().cpu().numpy().tobytes() 29 | 30 | 31 | file = OpenEXR.OutputFile(filename, header) 32 | file.writePixels(data) 33 | file.close() 34 | 35 | 36 | 37 | 38 | -------------------------------------------------------------------------------- /src/io/plywrite.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import os 3 | 4 | def save_ply(filename, tensor, scale, color='black' , normals = None): 5 | b, _, h, w = tensor.size() 6 | for n in range(b): 7 | coords = tensor[n, :, :, :].detach().cpu().numpy() 8 | x_coords = coords[0, :] * scale 9 | y_coords = coords[1, :] * scale 10 | z_coords = coords[2, :] * scale 11 | if normals is not None: 12 | norms = normals[n, : , : , :].detach().cpu().numpy() 13 | nx_coords = norms[0, :] 14 | ny_coords = norms[1, :] 15 | nz_coords = norms[2, :] 16 | with open(filename.replace("#", str(n)), "w") as ply_file: 17 | ply_file.write("ply\n") 18 | ply_file.write("format ascii 1.0\n") 19 | ply_file.write("element vertex {}\n".format(w * h)) 20 | ply_file.write("property float x\n") 21 | ply_file.write("property float y\n") 22 | ply_file.write("property float z\n") 23 | if normals is not None: 24 | ply_file.write('property float nx\n') 25 | ply_file.write('property float ny\n') 26 | ply_file.write('property float nz\n') 27 | ply_file.write("property uchar red\n") 28 | ply_file.write("property uchar green\n") 29 | ply_file.write("property uchar blue\n") 30 | ply_file.write("end_header\n") 31 | 32 | if normals is None: 33 | for x in torch.arange(w): 34 | for y in torch.arange(h): 35 | ply_file.write("{} {} {} {} {} {}\n".format(\ 36 | x_coords[y, x], y_coords[y, x], z_coords[y, x],\ 37 | "255" if color=='red' else "0", 38 | "255" if color=='green' else "0", 39 | "255" if color=='blue' else "0")) 40 | else: 41 | for x in torch.arange(w): 42 | for y in torch.arange(h): 43 | ply_file.write("{} {} {} {} {} {} {} {} {}\n".format(\ 44 | x_coords[y, x], y_coords[y, x], z_coords[y, x],\ 45 | nx_coords[y, x], ny_coords[y, x], nz_coords[y, x],\ 46 | "255" if color=='red' else "0", 47 | "255" if color=='green' else "0", 48 | "255" if color=='blue' else "0")) 49 | 50 | 51 | def save_ply_custom_color(filename, tensor, scale, color=[0,0,0] , normals = None): 52 | b, _, h, w = tensor.size() 53 | for n in range(b): 54 | coords = tensor[n, :, :, :].detach().cpu().numpy() 55 | x_coords = coords[0, :] * scale 56 | y_coords = coords[1, :] * scale 57 | z_coords = coords[2, :] * scale 58 | if normals is not None: 59 | norms = normals[n, : , : , :].detach().cpu().numpy() 60 | nx_coords = norms[0, :] 61 | ny_coords = norms[1, :] 62 | nz_coords = norms[2, :] 63 | with open(filename.replace("#", str(n)), "w") as ply_file: 64 | ply_file.write("ply\n") 65 | ply_file.write("format ascii 1.0\n") 66 | ply_file.write("element vertex {}\n".format(w * h)) 67 | ply_file.write("property float x\n") 68 | ply_file.write("property float y\n") 69 | ply_file.write("property float z\n") 70 | if normals is not None: 71 | ply_file.write('property float nx\n') 72 | ply_file.write('property float ny\n') 73 | ply_file.write('property float nz\n') 74 | ply_file.write("property uchar red\n") 75 | ply_file.write("property uchar green\n") 76 | ply_file.write("property uchar blue\n") 77 | ply_file.write("end_header\n") 78 | 79 | if normals is None: 80 | for x in torch.arange(w): 81 | for y in torch.arange(h): 82 | ply_file.write("{} {} {} {} {} {}\n".format(\ 83 | x_coords[y, x], y_coords[y, x], z_coords[y, x],\ 84 | color[0], 85 | color[1], 86 | color[2])) 87 | else: 88 | for x in torch.arange(w): 89 | for y in torch.arange(h): 90 | ply_file.write("{} {} {} {} {} {} {} {} {}\n".format(\ 91 | x_coords[y, x], y_coords[y, x], z_coords[y, x],\ 92 | nx_coords[y, x], ny_coords[y, x], nz_coords[y, x],\ 93 | color[0], 94 | color[1], 95 | color[2])) -------------------------------------------------------------------------------- /src/models/UNet_mask_max.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import math 4 | from .blocks import * 5 | 6 | 7 | class Interpolate(nn.Module): 8 | def __init__(self, size, mode): 9 | super(Interpolate, self).__init__() 10 | """ 11 | Args: 12 | size: expected size after interpolation 13 | mode: interpolation type (e.g. bilinear, nearest) 14 | """ 15 | self.interp = nn.functional.interpolate 16 | self.size = size 17 | self.mode = mode 18 | 19 | def forward(self, x): 20 | out = self.interp(x, size=self.size, mode=self.mode) #, align_corners=False 21 | 22 | return out 23 | 24 | 25 | class Encoder(nn.Module): 26 | def __init__(self, ndf): 27 | super(Encoder, self).__init__() 28 | 29 | self.ndf = ndf 30 | 31 | self.encoder_conv1_1 = get_conv_relu(1, self.ndf, kernel_size=7, stride=1, padding=3) #PartialConv2dBlock(1, ndf, bn=False, activ='elu', sample='none-7') 32 | self.encoder_conv1_2 = get_conv_relu(self.ndf, self.ndf * 2, kernel_size=5, stride=1, padding=2) #PartialConv2dBlock(ndf, ndf * 2, bn=False, activ='elu', sample='none-5') 33 | 34 | self.encoder_pool_1 = get_max_pool(kernel_size=2, stride=2) 35 | 36 | self.encoder_conv2_1 = get_conv_relu(self.ndf * 2, self.ndf * 4, kernel_size=3, stride=1, padding=1) #PartialConv2dBlock(ndf * 2, ndf * 4, bn=False, activ='elu', sample='down-3') 37 | self.encoder_conv2_2 = get_conv_relu(self.ndf * 4, self.ndf * 4, kernel_size=3, stride=1, padding=1) #PartialConv2dBlock(ndf * 4, ndf * 4, bn=False, activ='elu') 38 | self.encoder_conv2_3 = get_conv_relu(self.ndf * 4, self.ndf * 4, kernel_size=3, stride=1, padding=1) #PartialConv2dBlock(ndf * 4, ndf * 4, bn=False, activ='elu') 39 | 40 | self.encoder_pool_2 = get_max_pool(kernel_size=2, stride=2) 41 | 42 | self.encoder_conv3_1 = get_conv_relu(self.ndf * 4, self.ndf * 8, kernel_size=3, stride=1, padding=1) #PartialConv2dBlock(ndf * 4, ndf * 8, bn=False, activ='elu', sample='down-3') 43 | self.encoder_conv3_2 = get_conv_relu(self.ndf * 8, self.ndf * 8, kernel_size=3, stride=1, padding=1) #PartialConv2dBlock(ndf * 8, ndf * 8, bn=False, activ='elu') 44 | self.encoder_conv3_3 = get_conv_relu(self.ndf * 8, self.ndf * 8, kernel_size=3, stride=1, padding=1) #PartialConv2dBlock(ndf * 8, ndf * 8, bn=False, activ='elu') 45 | 46 | self.encoder_pool_3 = get_max_pool(kernel_size=2, stride=2) 47 | 48 | self.encoder_conv4_1 = get_conv_relu(self.ndf * 8, self.ndf * 16, kernel_size=3, stride=1, padding=1) 49 | self.encoder_conv4_2 = get_conv_relu(self.ndf * 16, self.ndf * 16, kernel_size=3, stride=1, padding=1) 50 | self.encoder_conv4_3 = get_conv_relu(self.ndf * 16, self.ndf * 16, kernel_size=3, stride=1, padding=1) 51 | 52 | self.encoder_pool_4 = get_max_pool(kernel_size=2, stride=2) 53 | 54 | self.encoder_conv5_1 = get_conv_relu(self.ndf * 16, self.ndf * 16, kernel_size=3, stride=1, padding=1) 55 | self.encoder_conv5_2 = get_conv_relu(self.ndf * 16, self.ndf * 16, kernel_size=3, stride=1, padding=1) 56 | self.encoder_conv5_3 = get_conv_relu(self.ndf * 16, self.ndf * 16, kernel_size=3, stride=1, padding=1) 57 | 58 | def forward(self, x): 59 | out = self.encoder_conv1_1(x) 60 | out_pre_ds_1 = self.encoder_conv1_2(out) 61 | out = self.encoder_pool_1(out_pre_ds_1) 62 | out = self.encoder_conv2_1(out) 63 | out = self.encoder_conv2_2(out) 64 | out_pre_ds_2 = self.encoder_conv2_3(out) 65 | out = self.encoder_pool_2(out_pre_ds_2) 66 | out = self.encoder_conv3_1(out) 67 | out = self.encoder_conv3_2(out) 68 | out_pre_ds_3 = self.encoder_conv3_3(out) 69 | out = self.encoder_pool_3(out_pre_ds_3) 70 | out = self.encoder_conv4_1(out) 71 | out = self.encoder_conv4_2(out) 72 | out_pre_ds_4 = self.encoder_conv4_3(out) 73 | out = self.encoder_pool_4(out_pre_ds_4) 74 | out = self.encoder_conv5_1(out) 75 | out = self.encoder_conv5_2(out) 76 | out = self.encoder_conv5_3(out) 77 | 78 | return out_pre_ds_1, out_pre_ds_2, out_pre_ds_3, out_pre_ds_4, out 79 | 80 | 81 | class Latent(nn.Module): 82 | def __init__(self, ndf): 83 | super(Latent, self).__init__() 84 | 85 | self.ndf = ndf 86 | 87 | self.encoder_resblock1 = ResConv(self.ndf * 16) 88 | self.encoder_resblock2 = ResConv(self.ndf * 16) 89 | self.encoder_resblock3 = ResConv(self.ndf * 16) 90 | self.encoder_resblock3 = ResConv(self.ndf * 16) 91 | self.conv_1x1 = get_conv(self.ndf * 16, 1, kernel_size=1, stride=1, padding=0) 92 | 93 | def forward(self, x): 94 | out = self.encoder_resblock1(x) 95 | out = self.encoder_resblock2(out) 96 | out = self.encoder_resblock3(out) 97 | attention = self.conv_1x1(out) 98 | attention = nn.functional.sigmoid(attention) 99 | out = attention*out 100 | 101 | return out, attention 102 | 103 | 104 | class Decoder(nn.Module): 105 | def __init__(self, width, height, ndf, upsample, nclasses): 106 | super(Decoder, self).__init__() 107 | 108 | self.h = height 109 | self.w = width 110 | self.ndf = ndf 111 | self.upsample = upsample 112 | self.nclasses = nclasses 113 | 114 | #self.decoder_upsample4 = Interpolate((22, self.w // 8), mode=self.upsample) 115 | self.decoder_upsample4 = Interpolate((self.h // 8, self.w // 8), mode=self.upsample) 116 | self.decoder_deconv5_3 = get_conv_relu(self.ndf * 16, self.ndf * 16, kernel_size=3, stride=1, padding=1) 117 | self.decoder_deconv5_2 = get_conv_relu(self.ndf * 16, self.ndf * 16, kernel_size=3, stride=1, padding=1) 118 | self.decoder_deconv5_1 = get_conv_relu(self.ndf * 16, self.ndf * 16, kernel_size=3, stride=1, padding=1) 119 | self.decoder_conv_id_4 = get_conv_relu(2 * self.ndf * 16, self.ndf * 16, kernel_size=1, stride=1, padding=0) 120 | self.decoder_deconv4_3 = get_conv_relu(self.ndf * 16, self.ndf * 16, kernel_size=3, stride=1, padding=1) 121 | self.decoder_deconv4_2 = get_conv_relu(self.ndf * 16, self.ndf * 16, kernel_size=3, stride=1, padding=1) 122 | self.decoder_upsample3 = Interpolate((self.h // 4, self.w // 4), mode=self.upsample) 123 | self.decoder_deconv4_1 = get_conv_relu(self.ndf * 16, self.ndf * 8, kernel_size=3, stride=1, padding=1) #PartialConv2dBlock(ndf * 8, ndf * 8, bn=False, activ='elu') 124 | self.decoder_conv_id_3 = get_conv_relu(2 * self.ndf * 8, self.ndf * 8, kernel_size=1, stride=1, padding=0) #conv_1x1(2 * ndf * 8, ndf * 8, n_type=self.type) 125 | self.decoder_deconv3_3 = get_conv_relu(self.ndf * 8, self.ndf * 8, kernel_size=3, stride=1, padding=1) #PartialConv2dBlock(ndf * 8, ndf * 8, bn=False, activ='elu') 126 | self.decoder_deconv3_2 = get_conv_relu(self.ndf * 8, self.ndf * 8, kernel_size=3, stride=1, padding=1) #PartialConv2dBlock(ndf * 8, ndf * 8, bn=False, activ='elu') 127 | self.decoder_upsample2 = Interpolate((self.h // 2, self.w // 2), mode=self.upsample) 128 | self.decoder_deconv3_1 = get_conv_relu(self.ndf * 8, self.ndf * 4, kernel_size=3, stride=1, padding=1) #PartialConv2dBlock(ndf * 8, ndf * 4, bn=False, activ='elu') 129 | self.decoder_conv_id_2 = get_conv_relu(2 * self.ndf * 4, self.ndf * 4, kernel_size=1, stride=1, padding=0) #conv_1x1(2 * ndf * 4, ndf * 4, n_type=self.type) 130 | self.decoder_deconv2_3 = get_conv_relu(self.ndf * 4, self.ndf * 4, kernel_size=3, stride=1, padding=1) #PartialConv2dBlock(ndf * 4, ndf * 4, bn=False, activ='elu') 131 | self.decoder_deconv2_2 = get_conv_relu(self.ndf * 4, self.ndf * 4, kernel_size=3, stride=1, padding=1) #PartialConv2dBlock(ndf * 4, ndf * 4, bn=False, activ='elu') 132 | self.decoder_upsample1 = Interpolate((self.h, self.w), mode=self.upsample) 133 | self.decoder_deconv2_1 = get_conv_relu(self.ndf * 4, self.ndf * 2, kernel_size=3, stride=1, padding=1) #PartialConv2dBlock(ndf * 4, ndf * 2, bn=False, activ='elu') 134 | self.decoder_conv_id_1 = get_conv_relu(2 * self.ndf * 2, self.ndf * 2, kernel_size=1, stride=1, padding=0) #conv_1x1(2 * ndf * 2, ndf * 2, n_type=self.type) 135 | self.decoder_deconv1_2 = get_conv_relu(self.ndf * 2, self.ndf, kernel_size=3, stride=1, padding=1) #PartialConv2dBlock(ndf * 2, ndf, bn=False, activ='elu') 136 | self.decoder_deconv1_1 = get_conv(self.ndf, self.nclasses, kernel_size=1, stride=1, padding=0) #PartialConv2dBlock(ndf, 1, bn=False, activ='no_acitv') 137 | 138 | def forward(self, x): 139 | out = self.decoder_deconv5_3(x[4]) 140 | out = self.decoder_deconv5_2(out) 141 | out = self.decoder_upsample4(out) 142 | mask4 = self.decoder_upsample4(x[5]) 143 | out = self.decoder_deconv5_1(out) 144 | out_post_up_4 = torch.cat((out, x[3]), 1) 145 | out = self.decoder_conv_id_4(out_post_up_4) 146 | out = out*mask4 147 | out = self.decoder_deconv4_3(out) 148 | out = self.decoder_deconv4_2(out) 149 | out = self.decoder_upsample3(out) 150 | mask3 = self.decoder_upsample3(x[5]) 151 | out = self.decoder_deconv4_1(out) 152 | out_post_up_3 = torch.cat((out, x[2]), 1) 153 | out = self.decoder_conv_id_3(out_post_up_3) 154 | out = out*mask3 155 | out = self.decoder_deconv3_3(out) 156 | out = self.decoder_deconv3_2(out) 157 | out = self.decoder_upsample2(out) 158 | mask2 = self.decoder_upsample2(x[5]) 159 | out = self.decoder_deconv3_1(out) 160 | out_post_up_2 = torch.cat((out, x[1]), 1) 161 | out = self.decoder_conv_id_2(out_post_up_2) 162 | out = out*mask2 163 | out = self.decoder_deconv2_3(out) 164 | out = self.decoder_deconv2_2(out) 165 | out = self.decoder_upsample1(out) 166 | mask1 = self.decoder_upsample1(x[5]) 167 | out = self.decoder_deconv2_1(out) 168 | out_post_up_1 = torch.cat((out, x[0]), 1) 169 | out = self.decoder_conv_id_1(out_post_up_1) 170 | out = out*mask1 171 | out_for_vis = self.decoder_deconv1_2(out) 172 | out = self.decoder_deconv1_1(out_for_vis) 173 | #pred = nn.functional.log_softmax(out, dim=1)#changed only to be compatible with ONNX exporter 174 | pred = nn.functional.softmax(out,dim = 1).log() 175 | 176 | return out_for_vis, pred 177 | 178 | 179 | class UNet_mask_max(nn.Module): 180 | def __init__(self, width, height, ndf, upsample, nclasses): 181 | super(UNet_mask_max, self).__init__() 182 | """ 183 | Args: 184 | width: input width 185 | height: input height 186 | ndf: constant number from channels 187 | upsample: upsampling type (nearest | bilateral) 188 | nclasses: number of semantice segmentation classes 189 | """ 190 | self.h = height 191 | self.w = width 192 | self.ndf = ndf 193 | self.upsample = upsample 194 | self.nclasses = nclasses 195 | 196 | self.encoder = Encoder(self.ndf) 197 | self.latent = Latent(self.ndf) 198 | self.decoder = Decoder(self.w, self.h, ndf, self.upsample, self.nclasses) 199 | 200 | def forward(self, x): 201 | out_list = [] 202 | out_pre_ds_1, out_pre_ds_2, out_pre_ds_3, out_pre_ds_4, out = self.encoder(x) 203 | 204 | # out_list.append(out_pre_ds_1) 205 | # out_list.append(out_pre_ds_2) 206 | # out_list.append(out_pre_ds_3) 207 | # out_list.append(out_pre_ds_4) 208 | 209 | out, attention = self.latent(out) 210 | # out_list.append(out) 211 | # out_list.append(attention) 212 | 213 | out_list = (out_pre_ds_1,out_pre_ds_2,out_pre_ds_3,out_pre_ds_4,out,attention) 214 | 215 | activs, seg_out = self.decoder(out_list) 216 | 217 | return activs, seg_out -------------------------------------------------------------------------------- /src/models/__init__.py: -------------------------------------------------------------------------------- 1 | from .UNet_mask_max import * 2 | 3 | import sys 4 | 5 | def get_model(model, *kwargs): 6 | if str.lower(model) == "gcn": 7 | return GCN(kwargs[0]) 8 | 9 | def get_UNet_model(name, params): 10 | if name == 'default' or name == 'baseline': 11 | return UNet_base(params['width'], params['height'], params['ndf'], params['upsample_type'], params['nclasses']) 12 | elif name == 'full_mask_max': 13 | return UNet_mask_max(params['width'], params['height'], params['ndf'], params['upsample_type'], params['nclasses']) 14 | elif name == 'full_max': 15 | return UNet_max(params['width'], params['height'], params['ndf'], params['upsample_type'], params['nclasses']) 16 | elif name == 'heatmap': 17 | return UNet_heat(params['width'], params['height'], params['ndf'], params['upsample_type'], params['nclasses']) 18 | elif name == 'full_mask': 19 | return UNet_mask(params['width'], params['height'], params['ndf'], params['upsample_type'], params['nclasses']) 20 | elif name == 'with_normals': 21 | return UNet_normals_base(params['width'], params['height'], params['ndf'], params['upsample_type'], params['nclasses']) 22 | else: 23 | print("Could not find the requested model ({})".format(name), file=sys.stderr) -------------------------------------------------------------------------------- /src/models/blocks.py: -------------------------------------------------------------------------------- 1 | import torch.nn.functional as F 2 | import torch.nn as nn 3 | 4 | def get_conv(in_channels, out_channels, kernel_size=3, stride=1, padding=0): 5 | return nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding) 6 | 7 | def get_conv_elu(in_channels, out_channels, alpha=1, kernel_size=3, stride=1, padding=0): 8 | return nn.Sequential( 9 | nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding), 10 | nn.ELU(alpha= alpha, inplace=True) 11 | ) 12 | 13 | def get_conv_relu(in_channels, out_channels, kernel_size=3, stride=1, padding=0): 14 | return nn.Sequential( 15 | nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding), 16 | nn.ReLU(inplace=True) 17 | ) 18 | 19 | def get_conv_lrelu(in_channels, out_channels, slope=1e-2, kernel_size=3, stride=1, padding=0): 20 | return nn.Sequential( 21 | nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding), 22 | nn.LeakyReLU(negative_slope=slope, inplace=True) 23 | ) 24 | 25 | def get_elu(alpha=1): 26 | return nn.ELU(alpha=alpha, inplace=True) 27 | 28 | def get_conv_preactivation(in_channels, out_channels, kernel_size=1, stride=1, padding=1): 29 | return nn.Sequential( 30 | nn.ELU(inplace=False), 31 | nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding) 32 | ) 33 | 34 | def get_conv_preactivation_relu(in_channels, out_channels, kernel_size=1, stride=1, padding=1): 35 | return nn.Sequential( 36 | nn.ReLU(inplace=False), 37 | nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding) 38 | ) 39 | 40 | def get_max_pool(kernel_size, stride): 41 | return nn.Sequential( 42 | nn.MaxPool2d(kernel_size, stride) 43 | ) 44 | 45 | class ResConv(nn.Module): 46 | def __init__(self, ndf): 47 | super(ResConv, self).__init__() 48 | """ 49 | Args: 50 | ndf: constant number from channels 51 | dil: dilation value - parameter for convolutional layers 52 | norma_type: normalization type (elu | batch norm) 53 | """ 54 | self.ndf = ndf 55 | self.conv1 = get_conv_preactivation_relu(self.ndf, self.ndf, kernel_size=3, stride=1, padding=1) 56 | self.conv2 = get_conv_preactivation_relu(self.ndf, self.ndf, kernel_size=3, stride=1, padding=1) 57 | 58 | def forward(self, x): 59 | residual = x 60 | out = self.conv1(x) 61 | out = self.conv2(out) 62 | out += residual 63 | 64 | return out 65 | 66 | #old one 67 | class ResidualBlock(nn.Module): 68 | def __init__(self, in_channels, out_channels, stride=1, padding=0): 69 | super(ResidualBlock, self).__init__() 70 | self.conv1 = get_conv_elu(in_channels, out_channels, stride, padding) 71 | self.conv2 = get_conv(out_channels, out_channels) 72 | self.elu = get_elu() 73 | 74 | def forward(self, x): 75 | out = self.conv1(x) 76 | out = self.conv2(out) 77 | out += x 78 | out = self.elu(out) 79 | return out 80 | 81 | def get_residual(in_channels, out_channels, stride=1, padding=0): 82 | return ResidualBlock(in_channels, out_channels, stride, padding) -------------------------------------------------------------------------------- /src/other/__init__.py: -------------------------------------------------------------------------------- 1 | from .misc import * 2 | from .opt import * -------------------------------------------------------------------------------- /src/other/misc.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | 4 | import os 5 | import sys 6 | 7 | def initialize_weights(model, init = "xavier"): 8 | if init == "xavier": 9 | init_func = nn.init.xavier_normal 10 | elif init == "kaiming": 11 | init_func = nn.init.kaiming_normal 12 | elif init == "gaussian" or init == "normal": 13 | init_func = nn.init.normal 14 | else: 15 | init_func = None 16 | if init_func is not None: 17 | #TODO: logging /w print or lib 18 | for module in model.modules(): 19 | if isinstance(module, nn.Conv2d) or isinstance(module, nn.Linear) \ 20 | or isinstance(module, nn.ConvTranspose2d): 21 | init_func(module.weight) 22 | if module.bias is not None: 23 | module.bias.data.zero_() 24 | elif isinstance(module, nn.BatchNorm2d): 25 | module.weight.data.fill_(1) 26 | module.bias.data.zero_() 27 | elif os.path.exists(init): 28 | #TODO: logging /w print or lib 29 | weights = torch.load(init) 30 | model.load_state_dict(weights["state_dict"]) 31 | else: 32 | print("Error when initializing model's weights, {} either doesn't exist or is not a valid initialization function.".format(init), \ 33 | file=sys.stderr) 34 | 35 | -------------------------------------------------------------------------------- /src/other/opt.py: -------------------------------------------------------------------------------- 1 | import torch.optim as optim 2 | import torch 3 | import sys 4 | 5 | class OptimizerParameters(object): 6 | def __init__(self, learning_rate=0.001, momentum=0.9, momentum2=0.999,\ 7 | epsilon=1e-8, weight_decay=0.0005, damp=0): 8 | super(OptimizerParameters, self).__init__() 9 | self.learning_rate = learning_rate 10 | self.momentum = momentum 11 | self.momentum2 = momentum2 12 | self.epsilon = epsilon 13 | self.damp = damp 14 | self.weight_decay = weight_decay 15 | 16 | def get_learning_rate(self): 17 | return self.learning_rate 18 | 19 | def get_momentum(self): 20 | return self.momentum 21 | 22 | def get_momentum2(self): 23 | return self.momentum2 24 | 25 | def get_epsilon(self): 26 | return self.epsilon 27 | 28 | def get_weight_decay(self): 29 | return self.weight_decay 30 | 31 | def get_damp(self): 32 | return self.damp 33 | 34 | def get_optimizer(opt_type, model_params, opt_params): 35 | if opt_type == "adam": 36 | return optim.Adam(model_params, \ 37 | lr=opt_params.get_learning_rate(), \ 38 | betas=(opt_params.get_momentum(), opt_params.get_momentum2()), \ 39 | eps=opt_params.get_epsilon(), 40 | weight_decay = opt_params.get_weight_decay() \ 41 | ) 42 | elif opt_type == "sgd": 43 | return optim.SGD(model_params, \ 44 | lr=opt_params.get_learning_rate(), \ 45 | momentum=opt_params.get_momentum(), \ 46 | weight_decay=opt_params.get_weight_decay(), \ 47 | dampening=opt_params.get_damp() \ 48 | ) 49 | else: 50 | print("Error when initializing optimizer, {} is not a valid optimizer type.".format(opt_type), \ 51 | file=sys.stderr) 52 | return None 53 | 54 | #not used for now 55 | def get_one_hot_mask(labels, nclasses, batch_size): 56 | one_hot = torch.zeros(batch_size, nclasses, labels.shape[2], labels.shape[3]) 57 | one_hot.scatter_(1, labels.long(), 1) 58 | 59 | return one_hot.long() 60 | 61 | # Computes and stores the average and current value 62 | class AverageMeter(object): 63 | def __init__(self): 64 | self.reset() 65 | 66 | def reset(self): 67 | self.val = torch.tensor(0.0) 68 | self.avg = torch.tensor(0.0) 69 | self.sum = torch.tensor(0.0) 70 | self.count = torch.tensor(0.0) 71 | 72 | def update(self, val, n=1): 73 | self.val = val.cpu().detach() 74 | self.sum += val.cpu().detach() * n 75 | self.count += n 76 | self.avg = self.sum / self.count 77 | -------------------------------------------------------------------------------- /src/supervision/__init__.py: -------------------------------------------------------------------------------- 1 | from .losses import * 2 | from .metrics import * -------------------------------------------------------------------------------- /src/supervision/losses.py: -------------------------------------------------------------------------------- 1 | import math 2 | import numpy 3 | import torch 4 | from torch import nn 5 | 6 | from src.utils import projections 7 | 8 | def soft_dice_loss(y_true, y_pred, epsilon=1e-6): 9 | axes = tuple(range(1, len(y_pred.shape)-1)) 10 | numerator = 2. * torch.sum(y_pred * y_true, axes) 11 | denominator = torch.sum(torch.pow(y_pred, 2) + torch.pow(y_true, 2), axes) 12 | 13 | return 1 - torch.mean(numerator / (denominator + epsilon)) 14 | 15 | def cosine_loss(n_pred, n_true): 16 | npred = n_pred.clone() 17 | ntrue = n_true.clone() 18 | 19 | return torch.sum(1 - torch.sum(n_pred * ntrue, dim=1, keepdim=True)) / (n_true.shape[0] * n_true.shape[1] * n_true.shape[2] * n_true.shape[3]) 20 | 21 | def generate_gt_heatmap(target, kernel_size, sigma): 22 | #get binary mask of the gt 23 | mask = target.clone() 24 | mask[mask != 0] = 1 25 | 26 | x_coord = torch.arange(kernel_size) 27 | x_grid = x_coord.repeat(kernel_size).view(kernel_size, kernel_size) 28 | y_grid = x_grid.t() 29 | xy_grid = torch.stack([x_grid, y_grid], dim=-1).float() 30 | 31 | mean = (kernel_size - 1)/2. 32 | variance = sigma**2. 33 | 34 | # Calculate the 2-dimensional gaussian kernel which is 35 | # the product of two gaussian distributions for two different 36 | # variables (in this case called x and y) 37 | gaussian_kernel = (1./(2.*math.pi*variance)) *\ 38 | torch.exp( 39 | -torch.sum((xy_grid - mean)**2., dim=-1) /\ 40 | (2*variance) 41 | ) 42 | 43 | # Make sure sum of values in gaussian kernel equals 1. 44 | gaussian_kernel = gaussian_kernel / torch.sum(gaussian_kernel) 45 | 46 | # Reshape to 2d depthwise convolutional weight 47 | gaussian_kernel = gaussian_kernel.view(1, 1, kernel_size, kernel_size) 48 | 49 | #conv layer will be used for Gaussian blurring 50 | gconv = nn.Conv2d(1, 1, 3, 1, 1) 51 | 52 | #init kernels with Gaussian distribution 53 | gconv.weight.data = gaussian_kernel 54 | gconv.bias.data.fill_(0) 55 | gconv.weight.requires_grad = False 56 | 57 | heatmap = gconv(mask) 58 | #heatmap = heatmap / torch.sum(heatmap) 59 | 60 | return heatmap 61 | 62 | 63 | from src.utils.geometric import compute_soft_correspondences, computeNonRigidTransformation 64 | from time import perf_counter 65 | import src.utils.transformations as transformations 66 | 67 | def soft_correspondences_loss(out,batch, confidence, criterion, device, SVD = False): 68 | 69 | soft_cor_pred, soft_cor_gt, visibility_mask = compute_soft_correspondences( 70 | out, 71 | batch["depth"].to(device), 72 | batch["intrinsics"].inverse().to(device), 73 | batch["labels"].to(device), 74 | confidence 75 | ) 76 | extrinsics = torch.eye(4)\ 77 | .expand(soft_cor_pred.shape[0],4,4)\ 78 | .to(soft_cor_pred.device) 79 | 80 | if not SVD: 81 | loss = criterion(soft_cor_gt*visibility_mask, soft_cor_pred*visibility_mask) 82 | else: 83 | loss = 0.0 84 | try: 85 | R,t,scale = computeNonRigidTransformation(soft_cor_gt*visibility_mask, soft_cor_pred*visibility_mask) 86 | R,t,scale = R.float(),t.float(),scale.float() 87 | 88 | except: 89 | print("Couldnt compute SVD") 90 | return None 91 | 92 | loss = criterion(scale , torch.ones_like(scale).to(device)) + criterion(R, torch.eye(3).expand_as(R).to(device)) + criterion(t, torch.zeros_like(t).to(device)) 93 | # extrinsics[:,:3,:3] = R 94 | # extrinsics[:,:3, 3] = t.squeeze() 95 | 96 | 97 | # transformed = transformations.transform_points_homogeneous(soft_cor_pred.unsqueeze(-1), extrinsics).squeeze() 98 | 99 | # loss = criterion(transformed, soft_cor_gt) 100 | 101 | return loss.float() 102 | 103 | def get_color_map_nclasses_17() : 104 | colors = [ 105 | #0x12bcea, 106 | 0x000000, # background 107 | # blue 108 | 0x050c66, # mid bottom front 2f 109 | 0x0b1ae6, # mid bottom back 2b 110 | 0x4754ff, # mid bottom right 2r 111 | 0x0a15b8, # mid bottom left 2l 112 | # green 113 | 0x3bff5b, # mid top right 3r 114 | 0x00b81e, # mid top left 3l 115 | 0x006611, # mid top front 3f 116 | 0x00e626, # mid top back 3b 117 | # yellow 118 | 0xffd640, # bottom right 1r 119 | 0xe6b505, # bottom back 1b 120 | 0x665002, # bottom front 1f 121 | 0xb89204, # bottom left 1l 122 | # red 123 | 0x660900, # top front 4f 124 | 0xff493a, # top right 4r 125 | 0xe61300, # top top back 4b 126 | 0xb30f00, # top left 4l 127 | 128 | 0x888888 # uncertain (max probability < threshold), class 25 129 | #0x000000 # uncertain (max probability < threshold), class 25 130 | #0xff0000 # uncertain (max probability < threshold), class 25 131 | ] 132 | return colors 133 | import numpy as np 134 | def colorize_label_map(lmap : np.array, colors : list) -> np.array: 135 | outlmap = np.zeros((lmap.shape[0], lmap.shape[1], 3), dtype = np.uint8) 136 | 137 | for y in range(lmap.shape[0]): 138 | for x in range(lmap.shape[1]): 139 | label =lmap[y,x] 140 | # open cv default is bgr 141 | outlmap [y,x,:] = [colors[label] & 0xFF, (colors[label] & 0xFF00) >> 8, (colors[label] & 0xFF0000) >> 16] 142 | return outlmap 143 | 144 | 145 | extrinsics_calculator = None 146 | box_renderer = None 147 | def soft_correspondences_loss_unlabeled( 148 | out, 149 | batch, 150 | confidence, 151 | confidence_number, #number of labeled pixels that make a prediction valid 152 | criterion, 153 | render_flags, 154 | box_path = './data/asymmetric_box.obj' #sorry 155 | ): 156 | from src.utils.geometric import ExtrinsicsCalculator, compute_soft_correspondences_unlabeled 157 | from src.utils.transformations import transform_points_homogeneous 158 | 159 | device = out.device 160 | global extrinsics_calculator 161 | global box_renderer 162 | 163 | if extrinsics_calculator is None: 164 | extrinsics_calculator = ExtrinsicsCalculator(box_path, device, render_flags) 165 | 166 | if box_renderer is None: 167 | import src.dataset.rendering.box_renderer as br 168 | box_renderer_params = br.BoxRendererParams(render_flags = render_flags) 169 | box_renderer = br.BoxRenderer(box_scale=0.001) 170 | 171 | predicted_sides, visible_sides = compute_soft_correspondences_unlabeled( 172 | out, 173 | batch["depth"].to(device), 174 | batch["intrinsics"].inverse().to(device), 175 | confidence, 176 | confidence_number) 177 | 178 | #loss = torch.tensor(0.0).to(device) 179 | loss = 0.0 180 | uvs = projections.project_points_to_uvs(predicted_sides.unsqueeze(-1), batch["intrinsics"].to(device)) 181 | out_argmax = (torch.argmax(out, dim = 1).float() * (torch.max(out,dim = 1)[0] > confidence).float()).float() 182 | 183 | backgrounds = torch.min(out,dim = 1)[0] 184 | try: 185 | extrinsics, scales = extrinsics_calculator.forward_pointcloud(predicted_sides, visible_sides) 186 | except: 187 | print("couldnt compute svd") 188 | return loss 189 | 190 | extrinsics_c = extrinsics.clone() 191 | extrinsics_c[:,:3,:3] = extrinsics[:,:3,:3] * scales 192 | 193 | for i in range(predicted_sides.shape[0]): 194 | points = predicted_sides[i,:,visible_sides[i].squeeze()].unsqueeze(0).unsqueeze(-1) 195 | transformed_points = transform_points_homogeneous(points,extrinsics[i].unsqueeze(0)) 196 | l2 = criterion( 197 | transformed_points.squeeze(), 198 | extrinsics_calculator.box_sides_center[:,visible_sides[i].squeeze()[1:]].squeeze()) 199 | loss = l2 + torch.mean(out[i,0,:]) 200 | 201 | return loss 202 | -------------------------------------------------------------------------------- /src/supervision/metrics.py: -------------------------------------------------------------------------------- 1 | import math 2 | import numpy 3 | import torch 4 | from torch import nn 5 | 6 | def jaccard(pred, gt, nclasses, class_pixel_count_threshold : int = 0): 7 | y_true = gt.clone() 8 | y_pred = pred.clone() 9 | 10 | if len(y_pred.shape) == 3: 11 | y_pred.unsqueeze_(0) 12 | 13 | if len(y_pred.shape) != 4: 14 | raise Exception("Wrong numer of channels") 15 | bs,_,_,_ = y_pred.shape 16 | 17 | if len(y_true.shape) == 3: 18 | y_true.unsqueeze_(0) 19 | if len(y_true.shape) != 4: 20 | raise Exception("Wrong numer of channels") 21 | 22 | if pred.shape[1] == 1: 23 | y_pred = torch.zeros_like(y_pred)\ 24 | .repeat(1,nclasses,1,1)\ 25 | .scatter_(1,y_pred.long(),1) 26 | if gt.shape[1] == 1: 27 | y_true = torch.zeros_like(y_true)\ 28 | .repeat(1,nclasses,1,1)\ 29 | .scatter_(1,y_true.long(),1) 30 | 31 | 32 | a = torch.sum(y_true, dim = (-1, -2)) 33 | b = torch.sum(y_pred, dim = (-1, -2)) 34 | 35 | intersection = y_true.bool() & y_pred.bool() 36 | union = y_true.bool() | y_pred.bool() 37 | 38 | false_negative = (b == 0) & (a != 0) 39 | false_positive = (a == 0) & (b != 0) #prediction says class is visible but is wrong 40 | true_negative = ((a == 0) & (b == 0)) | (false_negative & (a < class_pixel_count_threshold)) # if false negative but ground truth has only a few pixels in this class, consider this as true negative 41 | 42 | iou = torch.where( 43 | false_positive,#condition 44 | torch.zeros_like(false_positive).float(),#if condition is true 45 | torch.where(#if condtiontion is false 46 | true_negative,#inner condition 47 | torch.ones_like(false_positive).float(),#if condition is true 48 | torch.sum(intersection.float(), dim = (-1,-2)) / torch.sum(union.float(), dim = (-1,-2))).float())#if condition is false 49 | 50 | mask = torch.ones_like(iou).bool() 51 | 52 | return iou, mask -------------------------------------------------------------------------------- /src/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .projections import * 2 | from .transformations import * 3 | from .visualization import * 4 | from .image_utils import * 5 | from .geometric import * 6 | from .train_utils import * 7 | from .save_pointcloud import * -------------------------------------------------------------------------------- /src/utils/box_collider.py: -------------------------------------------------------------------------------- 1 | from ..io import box_model_loader as box_loader 2 | import torch 3 | 4 | class BoxCollider(object): 5 | 6 | def __init__(self, box_scale : float = 0.001, box_file_path : str = './data/asymmetric_box.obj', device : str = 'cuda'): 7 | self._box = box_loader.load_box_model(box_file_path) 8 | self._init_bboxes(device) 9 | self._box_scale = box_scale 10 | 11 | def _init_bboxes(self, device : str): 12 | 13 | box_names = ["mid_bottom","mid_top","bottom","top"] 14 | 15 | self._bboxes = torch.zeros((len(box_names), 3, 2)).to(device) # bboxes: bboxes[bbox_no,coordinate x,y,z, min/max] 16 | side_names = self._box["side_names"] 17 | side_idx_pack = list(zip(range(len(side_names)),side_names)) 18 | for i in range(len(box_names)): 19 | 20 | sides = list(filter(lambda x: x[1].startswith(box_names[i]),side_idx_pack)) 21 | # 4 vertices per side 22 | #verts = [ torch.Tensor(self._box["vertices"][i*4*3 + k*4*3:i*4*3 + (k+1)*4*3]).reshape((4,3)).unsqueeze(0).to(device) for k in range(len(sides)) ] 23 | verts = [ torch.Tensor(self._box["vertices"][sides[k][0]*4*3: (sides[k][0]+1)*4*3]).reshape((4,3)).unsqueeze(0).to(device) for k in range(len(sides)) ] 24 | verts_cat = torch.cat(verts,dim=0) # side_count x vertex_count x 3 25 | verts_p = verts_cat.view(-1,3) 26 | 27 | self._bboxes[i,:,0] = torch.min(verts_p,dim=0)[0] # torch min returns tuple (values,indices) 28 | self._bboxes[i,:,1] = torch.max(verts_p,dim=0)[0] # torch max returns tuple (values,indices) 29 | 30 | #vert = torch.Tensor(self._box["vertices"][i*4*3:(i+1)*4*3]).reshape((3,4)).to(device) 31 | 32 | def _is_inside_bbox(self, points : torch.Tensor, bbox : torch.Tensor): 33 | ''' points: batch x 3 x point_count 34 | bbox: 3 x 2 (bbox_min | bbox_max) 35 | 36 | returns mask batch x point_count -> [0,1] 37 | ''' 38 | 39 | points_trans = points.transpose(1,2) 40 | 41 | mask_max = points_trans <= bbox[:,1] # broadcast semantics 42 | mask_min = points_trans >= bbox[:,0] 43 | 44 | mask = torch.sum((mask_min & mask_max).to(torch.int), dim = 2) == 3 45 | return mask 46 | 47 | 48 | def is_inside_box(self, points : torch.Tensor, extrude_factor : float = 1.0) -> torch.Tensor: 49 | ''' points: batch x 3 x point_count 50 | returns mask batch x point_count -> [0,1] 51 | ''' 52 | 53 | masks = [self._is_inside_bbox(points,extrude_factor * self._box_scale * self._bboxes[i,:,:]) for i in range(self._bboxes.shape[0])] 54 | 55 | for i in range(len(masks)): 56 | if(i==0): 57 | fmask = masks[i] 58 | else: 59 | fmask |= masks[i] 60 | 61 | return fmask 62 | -------------------------------------------------------------------------------- /src/utils/geometric.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from src.utils import projections 3 | from src.io import box_model_loader, plywrite 4 | from src.dataset.rendering.box_renderer import BoxRenderFlags 5 | from enum import Flag, auto 6 | #import .projections 7 | class BoxRenderFlags (Flag): 8 | LABEL_UP_AS_BACKGROUND = auto() 9 | LABEL_DOWN_AS_BACKGROUND = auto() 10 | LABEL_TOP_AND_BOTTOM_AS_BACKGROUND = LABEL_UP_AS_BACKGROUND | LABEL_DOWN_AS_BACKGROUND 11 | 12 | import os.path as osp 13 | ''' 14 | Given two sets (set1 and set2 with dimensions b,c,N [b:batch,c:channels,N:spatial]) 15 | computes the (non_rigid by default, rigid if scale is ignored) transformation 16 | from set1 to set2. 17 | This function uses SVD as described here(http://nghiaho.com/?page_id=671) 18 | ,naming is consistent wherever possible. 19 | ''' 20 | def computeNonRigidTransformation( 21 | source :torch.tensor, 22 | target :torch.tensor 23 | ): 24 | if len(source.shape) == 2: 25 | source = source.unsqueeze(0) 26 | 27 | if len(target.shape) == 2: 28 | target = target.unsqueeze(0) 29 | 30 | b1, c1, N1 = source.shape 31 | b2, c2, N2 = target.shape 32 | 33 | assert b1==b2, "Batch sizes differ" #TODO: maybe change later so that it could support b1=K, b2=1 34 | assert c1==c2, "Inputs channels differ" 35 | assert N1==N2, "Number of samples differ" 36 | 37 | b, c, N = b1, c1, N1 38 | 39 | if source.dtype != torch.double: 40 | source = source.double() 41 | 42 | if target.dtype != torch.double: 43 | target = target.double() 44 | 45 | device = source.device 46 | 47 | centroid_source = torch.mean(source, dim = 2).unsqueeze(-1) #bxcx1 48 | centroid_target = torch.mean(target, dim = 2).unsqueeze(-1) #bxcx1 49 | 50 | H_source = source - centroid_source 51 | H_target = target - centroid_target 52 | 53 | variance_source = torch.sum(H_source**2) 54 | variance_source = torch.einsum('...bij->...b',H_source**2) 55 | 56 | H = torch.einsum('...in,...jn->...ijn',H_source,H_target) 57 | H = torch.sum(H, dim = -1) 58 | 59 | 60 | list_R, list_t, list_scale = [], [], [] 61 | 62 | # care https://github.com/pytorch/pytorch/issues/16076#issuecomment-477755364 63 | for _b in range(b): 64 | #assert torch.abs(torch.det(H[_b])).item() > 1.0e-15, "Seems that H matrix is singular" 65 | U,S,V = torch.svd(H[_b]) 66 | R = torch.matmul(V, U.t()) 67 | 68 | Z = torch.eye(R.shape[0]).double().to(device) 69 | Z[-1,-1] *= torch.sign(torch.det(R)) 70 | 71 | R = torch.mm(V,torch.mm(Z,U.t())) 72 | 73 | scale = torch.trace(torch.mm(R,H[_b])) / variance_source[_b] 74 | 75 | list_R.append(R.unsqueeze(0)) 76 | list_scale.append(scale.unsqueeze(0).unsqueeze(-1)) 77 | 78 | 79 | R = torch.cat(list_R, dim = 0) 80 | scale = torch.cat(list_scale, dim = 0).unsqueeze(-1) 81 | t = -torch.bmm(R,centroid_source) + centroid_target 82 | return R, t, scale 83 | 84 | ''' 85 | Function that estimates the median 3D position of all 86 | segments 87 | INPUTS: 88 | labels : b,c,H,W where c is # of labels 89 | depth : b,1,H,W 90 | intrinsics_inv : b,3,3 91 | OUTPUT 92 | points : b,3,c 93 | ''' 94 | def computeLabelsMedianPoint( 95 | labels : torch.tensor, 96 | depth : torch.tensor, 97 | intrinsics_inv : torch.tensor 98 | ): 99 | b, _, h, w = depth.shape 100 | c = labels.shape[1] 101 | device = depth.device 102 | grid = projections.create_image_domain_grid(width = w, height= h).to(device) 103 | 104 | pointcloud = projections.deproject_depth_to_points(depth,grid, intrinsics_inv) 105 | median_points = torch.zeros((b,3,c)).to(device) 106 | visible_sides = torch.zeros((b,c)).byte().to(device) 107 | 108 | for i in range(b): 109 | for j in range(c): 110 | z = torch.nonzero(labels[i,j,:,:]) 111 | if z.shape[0] > 200: 112 | visible_sides[i,j] = 1 113 | median_points[i,:,j] = torch.median(pointcloud[i,:,z[:,0],z[:,1]].view(3,-1),dim = -1)[0] 114 | 115 | return median_points, visible_sides, pointcloud 116 | 117 | 118 | def compute_center_of_visible_box(id, box): 119 | vertices = torch.tensor(box['vertices']).reshape(-1,3) 120 | box_width = (vertices[box['index_map'][0]] - vertices[box['index_map'][1]]).norm() 121 | box_height = (vertices[box['index_map'][0]] - vertices[box['index_map'][2]]).norm() 122 | box_depth = (vertices[box['index_map'][22]] - vertices[box['index_map'][21]]).norm() 123 | if id == 2: 124 | center = vertices[box['index_map'][3]] +\ 125 | vertices[box['index_map'][5]] +\ 126 | torch.tensor([ 127 | vertices[box['index_map'][2]][0], 128 | vertices[box['index_map'][2]][1] + box_depth, 129 | vertices[box['index_map'][2]][2]]) +\ 130 | torch.tensor([ 131 | vertices[box['index_map'][6]][0], 132 | vertices[box['index_map'][6]][1] + box_depth, 133 | vertices[box['index_map'][6]][2]]) 134 | 135 | return center / 4 136 | elif id == 3: 137 | center = vertices[box['index_map'][1]] +\ 138 | vertices[box['index_map'][7]] +\ 139 | torch.tensor([ 140 | vertices[box['index_map'][0]][0], 141 | vertices[box['index_map'][0]][1] + box_depth, 142 | vertices[box['index_map'][0]][2]]) +\ 143 | torch.tensor([ 144 | vertices[box['index_map'][17]][0], 145 | vertices[box['index_map'][17]][1] + box_depth, 146 | vertices[box['index_map'][17]][2]]) 147 | 148 | return center / 4 149 | elif id == 8: 150 | center = vertices[box['index_map'][30]] +\ 151 | vertices[box['index_map'][26]] +\ 152 | torch.tensor([ 153 | vertices[box['index_map'][29]][0] - box_depth, 154 | vertices[box['index_map'][29]][1], 155 | vertices[box['index_map'][29]][2]]) +\ 156 | torch.tensor([ 157 | vertices[box['index_map'][27]][0] - box_depth, 158 | vertices[box['index_map'][27]][1], 159 | vertices[box['index_map'][27]][2]]) 160 | 161 | return center / 4 162 | elif id == 9: 163 | center = vertices[box['index_map'][36]] +\ 164 | vertices[box['index_map'][38]] +\ 165 | torch.tensor([ 166 | vertices[box['index_map'][37]][0] - box_depth, 167 | vertices[box['index_map'][37]][1], 168 | vertices[box['index_map'][37]][2]]) +\ 169 | torch.tensor([ 170 | vertices[box['index_map'][31]][0] - box_depth, 171 | vertices[box['index_map'][31]][1], 172 | vertices[box['index_map'][31]][2]]) 173 | 174 | return center / 4 175 | elif id == 13: 176 | center = vertices[box['index_map'][53]] +\ 177 | vertices[box['index_map'][55]] +\ 178 | torch.tensor([ 179 | vertices[box['index_map'][54]][0] + box_depth, 180 | vertices[box['index_map'][54]][1], 181 | vertices[box['index_map'][54]][2]]) +\ 182 | torch.tensor([ 183 | vertices[box['index_map'][48]][0] + box_depth, 184 | vertices[box['index_map'][48]][1], 185 | vertices[box['index_map'][48]][2]]) 186 | 187 | return center / 4 188 | elif id == 19: 189 | center = vertices[box['index_map'][77]] +\ 190 | vertices[box['index_map'][78]] +\ 191 | torch.tensor([ 192 | vertices[box['index_map'][76]][0], 193 | vertices[box['index_map'][76]][1] - box_depth, 194 | vertices[box['index_map'][76]][2]]) +\ 195 | torch.tensor([ 196 | vertices[box['index_map'][75]][0], 197 | vertices[box['index_map'][75]][1] - box_depth, 198 | vertices[box['index_map'][75]][2]]) 199 | 200 | return center / 4 201 | 202 | return torch.tensor(box["vertices"])\ 203 | .reshape(-1,4,3).permute(2,1,0).mean(dim = 1)[:,id] 204 | 205 | ''' 206 | Module that is used to compute the (rough) extrinsics transformation. 207 | Base idea is, given a depth map and its camera intrinsics (therefore pointcloud) 208 | and the corresponding labels, compute the (non rigid in case of sigma) rigid 209 | transformation to the "BOX" or "global" coordinate system. 210 | ''' 211 | class ExtrinsicsCalculator(torch.nn.Module): 212 | def __init__(self, box_path, device, render_flags): 213 | def _label_as_background(side_name : str) -> bool: 214 | if (render_flags == None): 215 | return False 216 | elif (render_flags & BoxRenderFlags.LABEL_DOWN_AS_BACKGROUND) and ("_down_" in side_name): 217 | return True 218 | elif (render_flags & BoxRenderFlags.LABEL_UP_AS_BACKGROUND) and ("_up_" in side_name): 219 | return True 220 | 221 | return False 222 | 223 | 224 | super(ExtrinsicsCalculator,self).__init__() 225 | self.device = device 226 | self.box = box_model_loader.load_box_model(box_path) 227 | 228 | vertices = torch.tensor(self.box["vertices"]).reshape(-1,3) 229 | 230 | 231 | valid_ids = [] 232 | for i in range(len(self.box["side_names"])): 233 | if not _label_as_background(self.box["side_names"][i]): 234 | valid_ids.append(i) 235 | 236 | # self.box_sides_center2 = torch.tensor(self.box["vertices"])\ 237 | # .reshape(-1,4,3).permute(2,1,0).mean(dim = 1).to(device)[:,valid_ids]/ 1000.0 #### 238 | self.box_sides_center = torch.cat([compute_center_of_visible_box(i, self.box).unsqueeze(-1) for i in valid_ids], dim = 1)/ 1000.0 239 | self.box_sides_center = self.box_sides_center.to(self.device) 240 | 241 | 242 | 243 | 244 | def forward( 245 | self, 246 | depthmap : torch.tensor,#b,1,h,w 247 | labels : torch.tensor,#b,c,h,w 248 | intrinsics : torch.tensor #b,3,3 249 | ): 250 | b,c,h,w = labels.shape 251 | 252 | extrinsics = torch.eye(4)\ 253 | .expand(b,4,4)\ 254 | .to(depthmap.device) 255 | scales = [] 256 | 257 | sides, visible_sides, pointclouds = computeLabelsMedianPoint( 258 | labels, 259 | depthmap, 260 | intrinsics.inverse() 261 | ) 262 | 263 | for i in range(b): 264 | R,t,scale = computeNonRigidTransformation( 265 | sides[i,:,visible_sides[i].bool()][:,1:], #ignore background registration 266 | self.box_sides_center[:,visible_sides[i,1:].bool()].to(depthmap.device) 267 | ) 268 | extrinsics[i,:3,:3] = R 269 | t = torch.zeros_like(t).to(t.device) if True in (t!=t) else t 270 | extrinsics[i,:3,3] = t.squeeze(-1) 271 | scales.append(scale) 272 | 273 | scales = torch.cat(scales,dim = 0).float() 274 | 275 | return extrinsics, scales, pointclouds 276 | 277 | 278 | def forward_pointcloud( 279 | self, 280 | sides : torch.tensor, #3D positions of every channel 281 | visible_sides : torch.tensor # visible sides of the boxes 282 | ): 283 | b = sides.shape[0] 284 | 285 | extrinsics = torch.eye(4)\ 286 | .expand(b,4,4)\ 287 | .to(sides.device) 288 | scales = [] 289 | 290 | for i in range(b): 291 | # if torch.sum(torch.sum(visible_sides[i])) < 3: 292 | # continue 293 | R,t,scale = computeNonRigidTransformation( 294 | sides[i,:,visible_sides[i].bool().squeeze()].unsqueeze(0), #ignore background registration 295 | self.box_sides_center[:,visible_sides[i].bool().squeeze()[1:]].unsqueeze(0) 296 | ) 297 | extrinsics[i,:3,:3] = R 298 | extrinsics[i,:3,3] = t.squeeze(-1) 299 | scales.append(scale) 300 | 301 | if scales: 302 | scales = torch.cat(scales,dim = 0).float() 303 | else: 304 | scales = 10.0 * torch.ones((b)) 305 | 306 | return extrinsics, scales 307 | 308 | def computeLoss( 309 | self, 310 | validity_threshold, #label occurences threshold 311 | labels, #one hot enc 312 | pclouds 313 | ): 314 | def isnan(x): 315 | return x!=x 316 | 317 | epsilon = 0.0005 318 | b,c,h,w = labels.shape 319 | 320 | valid = (labels.view(b,c,-1).sum(dim = -1) > validity_threshold)[:,1:] #exclude backghround 321 | if valid.sum() == 0: 322 | return None,None,None 323 | 324 | 325 | pred_center = torch.einsum("bthw, bchw -> btc", pclouds, labels.float()) \ 326 | / (labels.float().sum(dim = (-1,-2)).unsqueeze(1) + epsilon)# bx3xc 327 | 328 | 329 | residuals = (self.box_sides_center.unsqueeze(0) - pred_center[:,:,1:]) 330 | loss = (((residuals**2).sum(dim = 1) * valid.float()).sum(dim = -1) / valid.sum(dim = -1).float()).sqrt() 331 | #loss1 = torch.sqrt(torch.einsum("btc, bc -> b", residuals**2, valid.float())) / valid.sum(dim = -1).float() 332 | #loss = torch.sqrt(torch.einsum("btc, bc -> b", residuals**2, valid.float())) 333 | 334 | 335 | for i in range(b): 336 | print("Visible sides : {} , mean error {} meters.".format(valid[i].sum(), loss[i])) 337 | return loss, valid.sum(dim = -1), pred_center[:,:,1:].unsqueeze(-1) 338 | 339 | 340 | 341 | 342 | ''' 343 | Function that is used to compute soft 3D correspondences 344 | between network prediction and ground truth labels. 345 | ''' 346 | def compute_soft_correspondences( 347 | pred_labels : torch.tensor, #NOT log probability 348 | depth_maps : torch.tensor, 349 | inverse_intrinsics : torch.tensor, 350 | gt_labels : torch.tensor, 351 | confidence : float 352 | ): 353 | epsilon = 1.0e-05 354 | conf_threshold = 0.0 355 | b,c,h,w = pred_labels.shape 356 | device = gt_labels.device 357 | 358 | grid = projections.create_image_domain_grid(width = w, height = h).to(device) 359 | pointclouds = projections.deproject_depth_to_points(depth_maps, grid, inverse_intrinsics) 360 | 361 | soft_correspondences_pred = torch.zeros((b,c,3)).to(device) 362 | soft_correspondences_gt = torch.zeros((b,c,3)).to(device) 363 | visibility = torch.zeros((b,c)).float().to(device) 364 | 365 | mask_gt = torch.zeros_like(pred_labels) 366 | mask_gt.scatter_(1,gt_labels.long(),1) #b,c,h,w {0,1} 367 | 368 | mask_pred = (pred_labels > confidence).float() #b,c,h,w {0,1} 369 | 370 | pred_masked = pred_labels * mask_pred #b,c,h,w [0,1] 371 | 372 | weights = pred_labels * mask_pred #b,c,h,w [0,1] 373 | soft_correspondences_pred = torch.einsum("bthw, bchw -> bct", pointclouds, pred_masked) / (torch.sum(weights, dim = [-1, -2]) + epsilon).unsqueeze(-1) 374 | soft_correspondences_gt = torch.einsum("bthw, bchw -> bct", pointclouds, mask_gt) / (torch.sum(mask_gt, dim = [-1, -2]) + epsilon).unsqueeze(-1) 375 | 376 | 377 | visibility = (torch.sum(mask_gt, dim = [-1 , -2]) !=0 ).float() 378 | 379 | return soft_correspondences_pred.permute(0,2,1), soft_correspondences_gt.permute(0,2,1), visibility.unsqueeze(1) 380 | 381 | 382 | ''' 383 | Function that computes soft correspondences between network predictions 384 | and the ground truth labels from the box. 385 | ''' 386 | def compute_soft_correspondences_unlabeled( 387 | pred_labels : torch.tensor, #NOT log probability 388 | depth_maps : torch.tensor, 389 | inverse_intrinsics : torch.tensor, 390 | confidence : float, 391 | confidence_number : int 392 | ): 393 | epsilon = 1.0e-05 394 | b,c,h,w = pred_labels.shape 395 | device = pred_labels.device 396 | 397 | grid = projections.create_image_domain_grid(width = w, height = h).to(device) 398 | pointclouds = projections.deproject_depth_to_points(depth_maps, grid, inverse_intrinsics) 399 | 400 | visibility = torch.zeros((b,c)).to(device).bool() 401 | soft_correspondences_pred = torch.zeros((b,c,3)).to(device) 402 | 403 | predicted_labels_1d = torch.argmax(pred_labels,dim = 1).float() 404 | 405 | ### find which labels are seen in the predicted tensor given the confidence threshold 406 | raise Exception("Make it with einsum as labeled") 407 | for i in range(1,c): #skip background 408 | mask_pred = (pred_labels[:,i,:,:] > confidence).float()#b,c,h,w 409 | #visibility[:,i] = (torch.sum(pred_labels[:,i,:,:].view(b,-1), dim = -1) >=confidence_number).float() 410 | visibility[:,i] = torch.sum((predicted_labels_1d * mask_pred).view(b,-1) == i, dim = -1) >= confidence_number 411 | 412 | weights = (pred_labels[:,i,:,:] * mask_pred).unsqueeze(1) 413 | pointclouds_masked_pred = pointclouds * weights 414 | mean_point_pred = torch.sum(pointclouds_masked_pred.view(b,3,-1), dim = -1) / \ 415 | (torch.sum(weights.view(b,1,-1), dim = -1) + epsilon) 416 | 417 | soft_correspondences_pred[:,i,:] = mean_point_pred 418 | 419 | 420 | return soft_correspondences_pred.permute(0,2,1), visibility.unsqueeze(1) 421 | 422 | 423 | -------------------------------------------------------------------------------- /src/utils/image_utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import cv2 3 | from enum import Enum 4 | 5 | def random_crop_and_scale_to_fit_target(src_img : np.array, target_width : int, target_height : int, rng : np.random.RandomState = np.random.RandomState()): 6 | 7 | # case 1: no cropping because size fits 8 | if src_img.shape[0] == target_height and src_img.shape[1] == target_width: 9 | return src_img 10 | 11 | crop_startY = 0 12 | crop_startX = 0 13 | img_height = src_img.shape[0] 14 | img_width = src_img.shape[1] 15 | 16 | 17 | s = np.min([float(img_width) / float(target_width), float(img_height) / float(target_height)]) 18 | 19 | cw = np.min([int(s * target_width), img_width]) 20 | ch = np.min([int(s * target_height), img_height]) 21 | 22 | crop_startX = rng.randint(0,img_width-cw+1) 23 | crop_startY = rng.randint(0,img_height-ch+1) 24 | 25 | cropped_img = src_img[crop_startY:crop_startY+ch,crop_startX:crop_startX+cw] 26 | resized_img = cv2.resize(cropped_img, (target_width, target_height),interpolation = cv2.INTER_LINEAR) 27 | return resized_img 28 | 29 | def get_color_map_nclasses_25() : 30 | 31 | colors = [ 32 | #0x12bcea, 33 | 0x000000, # background 34 | # blue 35 | 0x050c66, # mid bottom front 2f 36 | 0x0b1ae6, # mid bottom back 2b 37 | 0x000010, # mid bottom down 2d 38 | 0x313466, # mid bottom up 2u 39 | 0x4754ff, # mid bottom right 2r 40 | 0x0a15b8, # mid bottom left 2l 41 | # green 42 | 0x3bff5b, # mid top right 3r 43 | 0x00b81e, # mid top left 3l 44 | 0x001000, # mid top down 3d 45 | 0x2c6636, # mid top up 3u 46 | 0x006611, # mid top front 3f 47 | 0x00e626, # mid top back 3b 48 | # yellow 49 | 0xffd640, # bottom right 1r 50 | 0x665a2e, # bottom up 1u 51 | 0xe6b505, # bottom back 1b 52 | 0x665002, # bottom front 1f 53 | 0x101000, # bottom down 1d 54 | 0xb89204, # bottom left 1l 55 | # red 56 | 0x660900, # top front 4f 57 | 0x100000, # top down 4d 58 | 0xff493a, # top right 4r 59 | 0x66312c, # top up 4u 60 | 0xe61300, # top top back 4b 61 | 0xb30f00, # top left 4l 62 | 63 | 0x888888 # uncertain (max probability < threshold), class 25 64 | #0x000000 # uncertain (max probability < threshold), class 25 65 | #0xff0000 # uncertain (max probability < threshold), class 25 66 | ] 67 | 68 | return colors 69 | 70 | def get_color_map_nclasses_21() : 71 | 72 | colors = [ 73 | #0x12bcea, 74 | 0x000000, # background 75 | # blue 76 | 0x050c66, # mid bottom front 2f 77 | 0x0b1ae6, # mid bottom back 2b 78 | #0x000010, # mid bottom down 2d 79 | 0x313466, # mid bottom up 2u 80 | 0x4754ff, # mid bottom right 2r 81 | 0x0a15b8, # mid bottom left 2l 82 | # green 83 | 0x3bff5b, # mid top right 3r 84 | 0x00b81e, # mid top left 3l 85 | #0x001000, # mid top down 3d 86 | 0x2c6636, # mid top up 3u 87 | 0x006611, # mid top front 3f 88 | 0x00e626, # mid top back 3b 89 | # yellow 90 | 0xffd640, # bottom right 1r 91 | 0x665a2e, # bottom up 1u 92 | 0xe6b505, # bottom back 1b 93 | 0x665002, # bottom front 1f 94 | #0x101000, # bottom down 1d 95 | 0xb89204, # bottom left 1l 96 | # red 97 | 0x660900, # top front 4f 98 | #0x100000, # top down 4d 99 | 0xff493a, # top right 4r 100 | 0x66312c, # top up 4u 101 | 0xe61300, # top top back 4b 102 | 0xb30f00, # top left 4l 103 | 104 | 0x888888 # uncertain (max probability < threshold), class 25 105 | #0x000000 # uncertain (max probability < threshold), class 25 106 | #0xff0000 # uncertain (max probability < threshold), class 25 107 | ] 108 | 109 | return colors 110 | 111 | def get_color_map_nclasses_17() : 112 | 113 | colors = [ 114 | #0x12bcea, 115 | 0x000000, # background 116 | # blue 117 | 0x050c66, # mid bottom front 2f 118 | 0x0b1ae6, # mid bottom back 2b 119 | 0x4754ff, # mid bottom right 2r 120 | 0x0a15b8, # mid bottom left 2l 121 | # green 122 | 0x3bff5b, # mid top right 3r 123 | 0x00b81e, # mid top left 3l 124 | 0x006611, # mid top front 3f 125 | 0x00e626, # mid top back 3b 126 | # yellow 127 | 0xffd640, # bottom right 1r 128 | 0xe6b505, # bottom back 1b 129 | 0x665002, # bottom front 1f 130 | 0xb89204, # bottom left 1l 131 | # red 132 | 0x660900, # top front 4f 133 | 0xff493a, # top right 4r 134 | 0xe61300, # top top back 4b 135 | 0xb30f00, # top left 4l 136 | 137 | 0x888888 # uncertain (max probability < threshold), class 25 138 | #0x000000 # uncertain (max probability < threshold), class 25 139 | #0xff0000 # uncertain (max probability < threshold), class 25 140 | ] 141 | 142 | return colors 143 | 144 | def colorize_label_map(lmap : np.array, colors: list) -> np.array: 145 | 146 | outlmap = np.zeros((lmap.shape[0], lmap.shape[1], 3), dtype = np.uint8) 147 | 148 | for y in range(lmap.shape[0]): 149 | for x in range(lmap.shape[1]): 150 | label =lmap[y,x] 151 | # open cv default is bgr 152 | outlmap [y,x,:] = [colors[label] & 0xFF, (colors[label] & 0xFF00) >> 8, (colors[label] & 0xFF0000) >> 16] 153 | 154 | return outlmap 155 | 156 | -------------------------------------------------------------------------------- /src/utils/projections.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | def create_image_domain_grid(width, height, data_type=torch.float32): 4 | v_range = ( 5 | torch.arange(0, height) # [0 - h] 6 | .view(1, height, 1) # [1, [0 - h], 1] 7 | .expand(1, height, width) # [1, [0 - h], W] 8 | .type(data_type) # [1, H, W] 9 | ) 10 | u_range = ( 11 | torch.arange(0, width) # [0 - w] 12 | .view(1, 1, width) # [1, 1, [0 - w]] 13 | .expand(1, height, width) # [1, H, [0 - w]] 14 | .type(data_type) # [1, H, W] 15 | ) 16 | ones = ( 17 | torch.ones(1, height, width) # [1, H, W] := 1 18 | .type(data_type) 19 | ) 20 | return torch.stack((u_range, v_range, ones), dim=1) # [1, 3, H, W] 21 | 22 | def project_points_to_uvs(points, intrinsics): 23 | b, _, h, w = points.size() # [B, 3, H, W] 24 | x_coordinate3d = points[:, 0] #TODO: check if adding small value makes sense to avoid zeros? 25 | y_coordinate3d = points[:, 1] 26 | z_coordinate3d = points[:, 2].clamp(min=1e-3) 27 | x_homogeneous = x_coordinate3d / z_coordinate3d 28 | y_homogeneous = y_coordinate3d / z_coordinate3d 29 | ones = z_coordinate3d.new_ones(z_coordinate3d.size()) 30 | homogeneous_coordinates = ( # (x/z, y/z, 1.0) 31 | torch.stack([x_homogeneous, y_homogeneous, ones], dim=1) # [B, 3, H, W] 32 | .reshape(b, 3, -1) # [B, 3, H*W] 33 | ) 34 | uv_coordinates = intrinsics @ homogeneous_coordinates # [B, 3, H*W] 35 | return ( # image domain coordinates 36 | uv_coordinates[:, :2, :] # [B, 2, H*W] 37 | .reshape(b, 2, h, w) # [B, 2, H, W] 38 | ) # [B, 2, H, W] 39 | 40 | def normalize_uvs(uvs): 41 | _, __, h, w = uvs.size() 42 | normalized_u = 2 * uvs[:, 0, :, :] / (w - 1) - 1 43 | normalized_v = 2 * uvs[:, 1, :, :] / (h - 1) - 1 44 | return torch.stack([normalized_u, normalized_v], dim=1)\ 45 | .clamp(min=-1, max=1) #TODO: check clamping or masking /w 2s 46 | 47 | def deproject_depth_to_points(depth, grid, intrinsics_inv): 48 | b, _, h, w = depth.size() 49 | # check https://pytorch.org/docs/stable/torch.html#torch.matmul 50 | # need to return a one-dimensional tensor to use the matrix-vector product 51 | # as a result we reshape to [B, 3, H*W] in order to multiply the intrinsics matrix 52 | # with a 3x1 vector (u, v, 1) 53 | current_pixel_coords = ( # convert grid to appropriate dims for matrix multiplication 54 | grid # [1, 3, H, W] #grid[:,:,:h,:w] 55 | .expand(b, 3, h, w) # [B, 3, H, W] 56 | .reshape(b, 3, -1) # [B, 3, H*W] := [B, 3, UV1] 57 | ) 58 | 59 | p3d = ( # K_inv * [UV1] * depth 60 | (intrinsics_inv @ current_pixel_coords) # [B, 3, 3] * [B, 3, UV1] 61 | .reshape(b, 3, h, w) * # [B, 3, H, W] 62 | depth 63 | #.unsqueeze(1) # unsqueeze to tri-channel for element wise product 64 | ) # [B, 3, H, W] 65 | return p3d 66 | 67 | 68 | def calculate_normals(points , policy = "upright"): 69 | if policy is "upright": 70 | points_temp = torch.nn.functional.pad(points, (0, 1, 0, 0), mode="replicate") 71 | dx = points_temp[:, :, :, :-1] - points_temp[:, :, :, 1:] # NCHW 72 | points_temp = torch.nn.functional.pad(points, (0, 0, 0, 1), mode="replicate") 73 | dy = points_temp[:, :, :-1, :] - points_temp[:, :, 1:, :] # NCHW 74 | normals = torch.cross(dy,dx) 75 | return torch.nn.functional.normalize(normals) -------------------------------------------------------------------------------- /src/utils/save_pointcloud.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | def save_ply(filename, tensor, scale, color=[0,0,0] , normals = None): 4 | #w,h,c = tensor.shape 5 | if len(tensor.shape) == 2: 6 | tensor = np.expand_dims(tensor, 0) 7 | 8 | h,w,c = tensor.shape 9 | x_coords = tensor[:, :, 0] * scale 10 | y_coords = tensor[:, :, 1] * scale 11 | z_coords = tensor[:, :, 2] * scale 12 | if normals is not None: 13 | if len(normals.shape) == 2: 14 | normals = np.expand_dims(normals, 0) 15 | nx_coords = normals[:, :, 0] 16 | ny_coords = normals[:, :, 1] 17 | nz_coords = normals[:, :, 2] 18 | with open(filename, "w") as ply_file: 19 | ply_file.write("ply\n") 20 | ply_file.write("format ascii 1.0\n") 21 | ply_file.write("element vertex {}\n".format(w * h)) 22 | ply_file.write("property float x\n") 23 | ply_file.write("property float y\n") 24 | ply_file.write("property float z\n") 25 | if normals is not None: 26 | ply_file.write('property float nx\n') 27 | ply_file.write('property float ny\n') 28 | ply_file.write('property float nz\n') 29 | ply_file.write("property uchar red\n") 30 | ply_file.write("property uchar green\n") 31 | ply_file.write("property uchar blue\n") 32 | ply_file.write("end_header\n") 33 | 34 | if normals is None: 35 | for x in np.arange(h): 36 | for y in np.arange(w): 37 | ply_file.write("{} {} {} {} {} {}\n".format(\ 38 | x_coords[x, y], y_coords[x, y], z_coords[x, y],\ 39 | color[0],color[1],color[2] 40 | )) 41 | else: 42 | for x in np.arange(h): 43 | for y in np.arange(w): 44 | ply_file.write("{} {} {} {} {} {} {} {} {}\n".format(\ 45 | x_coords[x, y], y_coords[x, y], z_coords[x, y],\ 46 | nx_coords[x, y], ny_coords[x, y], nz_coords[x, y],\ 47 | color[0],color[1],color[2])) -------------------------------------------------------------------------------- /src/utils/train_utils.py: -------------------------------------------------------------------------------- 1 | 2 | ''' 3 | Split a batch into real and synthetic batches 4 | returns real, synth batches 5 | ''' 6 | def split_batch( 7 | batch :dict 8 | ): 9 | real_ids = [i for i,x in enumerate(batch["type"]) if x == "real"] 10 | synth_ids = [i for i,x in enumerate(batch["type"]) if x == "synthetic"] 11 | 12 | if not real_ids: 13 | return None, batch, None, synth_ids 14 | if not synth_ids: 15 | return batch, None, real_ids, None 16 | 17 | synth_batch = { 18 | "depth" : batch["depth"][synth_ids], 19 | "normals" : batch["normals"][synth_ids], 20 | "labels" : batch["labels"][synth_ids], 21 | "color" : batch["color"][synth_ids], 22 | "intrinsics_original" : batch["intrinsics_original"][synth_ids], 23 | "intrinsics" : batch["intrinsics"][synth_ids], 24 | "camera_resolution" : batch["camera_resolution"], 25 | "camera_pose" : batch["camera_pose"][synth_ids], 26 | "type" : "synthetic" 27 | } 28 | 29 | real_batch = { 30 | "depth" : batch["depth"][real_ids], 31 | "intrinsics" : batch["intrinsics"][real_ids], 32 | "type" : "real" 33 | } 34 | 35 | return real_batch, synth_batch, real_ids, synth_ids -------------------------------------------------------------------------------- /src/utils/transformations.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | def transform_points(points, rotation, translation): 4 | b, _, h, w = points.size() # [B, 3, H, W] 5 | points3d = points.reshape(b, 3, -1) # [B, 3, H*W] 6 | return ( 7 | (rotation @ points3d) # [B, 3, 3] * [B, 3, H*W] 8 | + translation # [B, 3, 1] 9 | ).reshape(b, 3, h, w) # [B, 3, H, W] 10 | 11 | def transform_points_homogeneous(points, transformation_matrix): 12 | b, c, h, w = points.size() # [B, 4, H, W] 13 | if c == 3: 14 | points_homogeneous = torch.cat([points, torch.ones(b,1,h,w).type_as(points).to(points.device)], dim = 1) 15 | points_homogeneous = points_homogeneous.reshape(b, 4, -1) # [B, 4, H*W] 16 | return ( 17 | (transformation_matrix @ points_homogeneous) # [B, 4, 4] * [B, 4, H*W] 18 | ).reshape(b,4,h,w)[:,:3,:,:] # [B, 3, H, W] 19 | 20 | def extract_rotation_translation(pose): 21 | b, _, _ = pose.shape 22 | return pose[:, :3, :3].clone(), pose[:,:3, 3].reshape(b, 3, 1).clone() # rotation, translation 23 | 24 | def transform_normals(oriented_points , rotation): 25 | b, _ , _ , _ = oriented_points.size() 26 | return transform_points(oriented_points , rotation , torch.zeros((b , 3 , 1)).type_as(oriented_points)) 27 | 28 | def rotatePointsAboutAxisXYZ( 29 | angle : float, 30 | axis : int, #x,y,z 31 | points : torch.tensor # N x 3 32 | ): 33 | from numpy import sin, cos 34 | b, _ , h, w = points.shape 35 | if axis == 0: 36 | R = torch.tensor([ 37 | [1,0,0], 38 | [0, cos(angle), -sin(angle)], 39 | [0, sin(angle), cos(angle)] 40 | ]).double() 41 | elif axis == 1: 42 | R = torch.tensor([ 43 | [cos(angle),0,sin(angle)], 44 | [0, 1, 0], 45 | [-sin(angle), 0, cos(angle)] 46 | ]).double() 47 | elif axis == 2: 48 | R = torch.tensor([ 49 | [cos(angle),-sin(angle),0], 50 | [sin(angle), cos(angle), 0], 51 | [0, 0, 1] 52 | ]).double() 53 | 54 | rotated = torch.bmm(R.unsqueeze(0).expand(b,3,3) , points.view(b,3,-1)).view(b,3,h,w) 55 | 56 | return rotated -------------------------------------------------------------------------------- /src/utils/visualization.py: -------------------------------------------------------------------------------- 1 | import visdom 2 | import numpy 3 | import torch 4 | 5 | class NullVisualizer(object): 6 | def __init__(self): 7 | self.name = __name__ 8 | 9 | def append_loss(self, epoch, global_iteration, loss, mode='train'): 10 | pass 11 | 12 | def show_images(self, images, title): 13 | pass 14 | 15 | class VisdomVisualizer(object): 16 | def __init__(self, name, server="http://localhost", count=2): 17 | self.visualizer = visdom.Visdom(server=server, port=8097, env=name,\ 18 | use_incoming_socket=False) 19 | self.name = name 20 | self.first_train_value = True 21 | self.first_test_value = True 22 | self.count = count 23 | self.plots = {} 24 | 25 | def append_loss(self, epoch, global_iteration, loss, loss_name="total", mode='train'): 26 | plot_name = loss_name + '_train_loss' if mode == 'train' else 'test_loss' 27 | opts = ( 28 | { 29 | 'title': plot_name, 30 | #'legend': mode, 31 | 'xlabel': 'iterations', 32 | 'ylabel': loss_name 33 | }) 34 | if mode == 'val': 35 | loss_value = float(loss) 36 | else: 37 | loss_value = float(loss.detach().cpu().numpy()) 38 | 39 | if loss_name not in self.plots: 40 | self.plots[loss_name] = self.visualizer.line(X=numpy.array([global_iteration]), Y=numpy.array([loss_value]), opts=opts) 41 | else: 42 | self.visualizer.line(X=numpy.array([global_iteration]), Y=numpy.array([loss_value]), win=self.plots[loss_name], name=mode, update = 'append') 43 | 44 | def show_images(self, images, title): 45 | b, c, h, w = images.size() 46 | recon_images = images.detach().cpu()[:self.count, [2, 1, 0], :, :]\ 47 | if c == 3 else\ 48 | images.detach().cpu()[:self.count, :, :, :] 49 | opts = ( 50 | { 51 | 'title': title, 'width': self.count / 2 * 640, 52 | 'height': self.count / 4 * 360 53 | }) 54 | self.visualizer.images(recon_images, opts=opts,\ 55 | win=self.name + title + "_window") 56 | 57 | def show_image(self, image, title): 58 | b, c, h, w = image.size() 59 | recon_image = image.detach().cpu()[:self.count, [2, 1, 0], :, :]\ 60 | if c == 3 else\ 61 | image.detach().cpu()[:self.count, :, :, :] 62 | opts = ( 63 | { 64 | 'title': title, 'width': 1280, 65 | 'height': 720 66 | }) 67 | self.visualizer.images(recon_image, opts=opts,\ 68 | win=self.name + title + "_window") 69 | 70 | def show_activations(self, maps, title): 71 | c, h, w = maps.size() 72 | maps_cpu = maps.detach().cpu()[:, :, :] 73 | #maps_cpu = maps_cpu.squeeze(0) 74 | for i in range(c): #c 75 | opts = ( 76 | { 77 | 'title': title + str(i), 'colormap': 'Viridis' 78 | }) 79 | heatmap = maps_cpu[i, :, :] 80 | heatmap_flipped = torch.flip(heatmap, [0]) 81 | self.visualizer.heatmap(heatmap_flipped,\ 82 | opts=opts, win=self.name + title + "_window_" + str(i)) 83 | 84 | def show_seg_map(self, in_map, title, iter=0): 85 | maps = in_map.squeeze(0).detach() 86 | h, w = maps.size() 87 | maps_cpu = maps.cpu()[:, :] 88 | opts = ( 89 | { 90 | 'title': title + "_" + str(iter), 'colormap': 'Viridis' 91 | }) 92 | heatmap_flipped = torch.flip(maps_cpu, [0]) 93 | self.visualizer.heatmap(heatmap_flipped,\ 94 | opts=opts, win=self.name + title + "_window_") 95 | 96 | def show_kernels(self, maps, title): 97 | b, c, h, w = maps.size() 98 | maps_cpu = maps.detach().cpu()[:, :, :, :] 99 | maps_cpu = maps_cpu.squeeze(0) 100 | count, _, _ = maps_cpu.size() 101 | for i in range(count): 102 | opts = ( 103 | { 104 | 'title': title + str(i), 'colormap': 'Viridis' 105 | }) 106 | heatmap = maps_cpu[i, :, :] 107 | self.visualizer.heatmap(heatmap,\ 108 | opts=opts, win=self.name + title + "_window_" + str(i)) 109 | 110 | def show_map(self, maps, title): 111 | b, c, h, w = maps.size() 112 | maps_cpu = maps.detach().cpu()[:self.count, :, :, :] 113 | for i in range(self.count): 114 | opts = ( 115 | { 116 | 'title': title + str(i), 'colormap': 'Viridis' 117 | }) 118 | heatmap = maps_cpu[i, :, :, :].squeeze(0) 119 | self.visualizer.heatmap(heatmap,\ 120 | opts=opts, win=self.name + title + "_window_" + str(i)) 121 | 122 | def show_point_clouds(self, coords, title): 123 | point_clouds = coords.detach().cpu()[:self.count, :, :, :] 124 | opts = ( 125 | { 126 | 'title': title + '_points3D', 'webgl': True, 127 | #'legend'=['Predicted', 'Ground Truth'], 128 | 'markersize': 0.5, 129 | #'markercolor': torch.tensor([[0,0,255], [255,0,0]]).int().numpy(), 130 | 'xtickmin': -3, 'xtickmax': 3, 'xtickstep': 0.2, 131 | 'ytickmin': -3, 'ytickmax': 3, 'ytickstep': 0.2, 132 | 'ztickmin': -2, 'ztickmax': 5, 'ztickstep': 0.2 133 | }) 134 | for i in range(self.count): 135 | p3d = point_clouds[i, :, :, :].permute(1, 2, 0).reshape(-1, 3) 136 | self.visualizer.scatter(X=p3d, opts=opts,\ 137 | win=self.name + "_" + title + '_' + str(i+1)) 138 | 139 | def show_normals(self, normals_pred, title): 140 | normals = normals_pred.detach().cpu() 141 | normals = torch.abs(normals) 142 | #normals = normals.div(2) * 255 143 | 144 | #normals_step = normals.div_(torch.max(normals) - torch.min(normals))*255 145 | #normals_rescaled = normals_step.add(255) 146 | 147 | opts = ( 148 | { 149 | 'title': title 150 | }) 151 | normals_flipped = torch.flip(normals, dims=[0]) #TODO: CHECK THIS 152 | self.visualizer.images(normals_flipped,\ 153 | opts=opts, win=self.name + title + "_window_") 154 | 155 | import os 156 | def save_checkpoint(state, curr_epoch, name = ""): 157 | if not os.path.exists(os.path.join('models', name)): 158 | os.makedirs(os.path.join('models', name)) 159 | #torch.save(state, './models/model_e%d.pth.tar' % (curr_epoch)) 160 | print("Saving model to : {}".format(os.path.join('models', name, "model_e{}.pth.tar".format(curr_epoch)))) 161 | torch.save(state, os.path.join('models', name, "model_e{}.pth.tar".format(curr_epoch))) 162 | 163 | --------------------------------------------------------------------------------