├── CARTO ├── simnet │ └── lib │ │ ├── net │ │ ├── init │ │ │ ├── __init__.py │ │ │ └── default_init.py │ │ ├── functions │ │ │ ├── __init__.py │ │ │ └── learning_rate.py │ │ ├── models │ │ │ ├── __init__.py │ │ │ └── layers │ │ │ │ ├── __init__.py │ │ │ │ ├── soft_argmin.py │ │ │ │ ├── matchability.py │ │ │ │ ├── transition_blocks.py │ │ │ │ ├── hdc_functions.py │ │ │ │ ├── stochastic_depth.py │ │ │ │ ├── cost_volume.py │ │ │ │ ├── fpn_bilinear.py │ │ │ │ └── residual_blocks.py │ │ ├── post_processing │ │ │ ├── utils.py │ │ │ ├── surface_outputs.py │ │ │ ├── orochi_outputs.py │ │ │ ├── nms.py │ │ │ ├── segmentation_outputs.py │ │ │ ├── depth_outputs.py │ │ │ ├── keypoint_outputs.py │ │ │ └── box_outputs.py │ │ ├── data_module.py │ │ ├── pre_processing │ │ │ ├── grasp_inputs.py │ │ │ ├── keypoint_inputs.py │ │ │ ├── box_inputs.py │ │ │ ├── pose_inputs.py │ │ │ └── obb_inputs.py │ │ ├── losses.py │ │ ├── onnx_plugins.py │ │ └── dataset.py │ │ ├── primitive.py │ │ └── onnx_plugins.py ├── __init__.py ├── Encoder │ ├── inference_config.txt │ └── net_train.py ├── lib │ ├── rename_unpickler.py │ ├── compression.py │ └── real_data.py └── Decoder │ ├── models │ ├── lipschitz_norm.py │ ├── lr_schedules.py │ └── joint_state_decoder.py │ ├── multi_poly.py │ ├── visualizing │ ├── offscreen.py │ ├── visualize_sdf_values.py │ └── visualize_asdf_dataset.ipynb │ ├── data │ ├── verify_watertight.py │ ├── visualize_dataset_pytorch.py │ ├── visualize_dataset.py │ ├── asdf_dataset.py │ └── verify_partnet.py │ └── loss.py ├── figure_1.png ├── datasets └── decoder │ ├── split_files │ ├── StorageFurniture_prismatic │ │ └── object_ids.yaml │ ├── StorageFurniture_revolute │ │ └── object_ids.yaml │ ├── Oven_revolute │ │ └── object_ids.yaml │ ├── WashingMachine_revolute │ │ └── object_ids.yaml │ ├── Knife_prismatic │ │ └── object_ids.yaml │ ├── Microwave_revolute │ │ └── object_ids.yaml │ ├── Refrigerator_revolute │ │ └── object_ids.yaml │ ├── Knife_revolute │ │ └── object_ids.yaml │ ├── Dishwasher_revolute │ │ └── object_ids.yaml │ ├── Stapler_revolute │ │ └── object_ids.yaml │ ├── Table_prismatic │ │ └── object_ids.yaml │ └── Laptop_revolute │ │ └── object_ids.yaml │ └── id_lists │ └── All_Real_Categories.txt ├── .gitignore ├── setup.py ├── requirements.txt ├── download_archives.sh └── scripts ├── real_dataset_vis.ipynb ├── preprocess_partnetmobility.py └── full_inference.py /CARTO/simnet/lib/net/init/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /CARTO/simnet/lib/net/functions/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /CARTO/simnet/lib/net/models/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /CARTO/simnet/lib/net/models/layers/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /figure_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/robot-learning-freiburg/CARTO/HEAD/figure_1.png -------------------------------------------------------------------------------- /CARTO/__init__.py: -------------------------------------------------------------------------------- 1 | import pathlib 2 | 3 | ROOT_DIR = pathlib.Path(__file__).parent.resolve() 4 | -------------------------------------------------------------------------------- /CARTO/Encoder/inference_config.txt: -------------------------------------------------------------------------------- 1 | --train_batch_size=1 2 | --train_num_workers=1 3 | --test_path=datasets/synthetic 4 | -------------------------------------------------------------------------------- /datasets/decoder/split_files/StorageFurniture_prismatic/object_ids.yaml: -------------------------------------------------------------------------------- 1 | test: 2 | - 82c05fe4890a7f5112de5317fe5b354f 3 | - d69d9de0c79ac6a9c59350d819542ec7 4 | train: 5 | - 606d50b144d8ca164da5feafe6f1c8fc 6 | - 1af4a1dfa4f94cd44da5feafe6f1c8fc 7 | - 33ec57af7f648994da5feafe6f1c8fc 8 | - 2950d1baed4dbd78c59350d819542ec7 9 | - 21ae39cf6ba8557f4da5feafe6f1c8fc 10 | val: [] 11 | -------------------------------------------------------------------------------- /datasets/decoder/split_files/StorageFurniture_revolute/object_ids.yaml: -------------------------------------------------------------------------------- 1 | test: 2 | - 170be0087472182bc59350d819542ec7 3 | - 17d25c26485edcf94da5feafe6f1c8fc 4 | train: 5 | - 10c14b0cb76f87584da5feafe6f1c8fc 6 | - 1548461b13adc0d0c59350d819542ec7 7 | - 1fc8231114fa42a7c59350d819542ec7 8 | - 19c79a42f68d7d444da5feafe6f1c8fc 9 | - 1caaaa5c1da4dd2dc59350d819542ec7 10 | - 198cbe57b01bad9dc59350d819542ec7 11 | val: [] 12 | -------------------------------------------------------------------------------- /datasets/decoder/split_files/Oven_revolute/object_ids.yaml: -------------------------------------------------------------------------------- 1 | test: 2 | - a46e0c10f17f928ba2bc8d1e386113dc 3 | - 3ea1ace396f6ccae48407a54b1fbfda8 4 | train: 5 | - b296fbfbbe5dccf09c12d6260da9ac2b 6 | - bae2babb26dc352b20489998d734835a 7 | - eff23594cc0aed121b3e6b75a323070-0 8 | - bb5533538179f6c39209092a6c03f1bd 9 | - 8c2491e5245804d1ffc6e457221b9271 10 | - b8cf469bc1b42ab64a44340bf227e40 11 | - ef97ff5c1d6a00f2a760e402290727de 12 | val: [] 13 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | data/* 2 | wandb/ 3 | datasets/decoder/generated_data/ 4 | datasets/partnet-mobility-v0/ 5 | simnet/lib/datasets/*/ 6 | *.json 7 | results/ 8 | *.ply 9 | *.pyc 10 | vis/* 11 | datasets/runs/* 12 | *.egg-info 13 | .vscode/ 14 | external_libs/ 15 | */eval/reconstruction/* 16 | __pycache__/ 17 | datasets/decoder/runs/* 18 | datasets/encoder/runs/* 19 | datasets/real/* 20 | datasets/synthetic/* 21 | *.tar.gz 22 | downloaded_archives/* 23 | -------------------------------------------------------------------------------- /datasets/decoder/split_files/WashingMachine_revolute/object_ids.yaml: -------------------------------------------------------------------------------- 1 | test: 2 | - 5528ee64-7656-40e4-8db0-70fd39427b4d 3 | - 8b04de89-4f3f-45d8-8d7a-6bb5958e5340 4 | train: 5 | - 62e22f4d1846d8c1fdc6c1669e5c540 6 | - 0d31000f-e876-4751-876d-efa6a61fa9b2 7 | - 4163de2ce7f6f59aed1d8381d2c075c2-0 8 | - d87cf480-ba57-43b1-b1f2-bae2b8fe2fa4 9 | - u094c89ee-d9f6-4266-a9b3-c1f2549b1105 10 | - 04569f2f-3e07-4655-9337-bfa41a5ccbc0 11 | - ucfaedfea-c15a-495c-9037-21108eeeb006 12 | - 265d042dcfed6f15c357c21161963e89 13 | val: [] 14 | -------------------------------------------------------------------------------- /datasets/decoder/split_files/Knife_prismatic/object_ids.yaml: -------------------------------------------------------------------------------- 1 | test: 2 | - 5d6201caa12611fe89f4664416242a41 3 | - e7523e396f8d4ae171e397fe45dce6b 4 | - 35233c0d786b5956d25d105fdf500c48 5 | train: 6 | - ba60dc6d-526e-4014-961a-5049df9079c6-0 7 | - fed0863a69b3744c44f6844c4f2ce888-0 8 | - fe95df61cc16452ccb3316c0fb4cfa01-0 9 | - 3181976321565dfee9027543872faef 10 | - 59481570acb7a0872d4ba5e1aa44cc40-0 11 | - 12f3efd9-f013-4aab-922c-0328502acd3f 12 | - 32036cc5-6e63-47cf-96ba-89ef2be3950e-0 13 | - 9f264c87-89e3-4b06-8f36-b618ec54694c 14 | - 35c3d7b9-7dec-4e66-a962-14ea0fde4cad 15 | val: [] 16 | -------------------------------------------------------------------------------- /datasets/decoder/split_files/Microwave_revolute/object_ids.yaml: -------------------------------------------------------------------------------- 1 | test: 2 | - bdb10a17b04e2adbb7fb7f3ae74b618c 3 | - 4f956e259344d4a3599fb6902c958d23 4 | - 891f65c773939191c834958aed613724 5 | train: 6 | - b9f1eeea355194c19941e769880462e7 7 | - 87bae84777fe8b702bac1bcdfc2402d2 8 | - 95bc6fb98624ea3229d75ea275a1cb4e 9 | - df5bd51614d2fbdef114be17e2e7c4b5 10 | - c3bb5f3c842a6c2d178e7d331e641179 11 | - f9544effad178100be92f74d81ff60bf 12 | - dc5c91c8c01b1c8c506c648223cdabe9 13 | - c75ebd7c340649ba5ad304c2564ae1df 14 | - 6d83dea57df3c4a3500158c23c4c5a8e 15 | - 42aac49442bb9f8bb4e3935c6cee4b35 16 | val: [] 17 | -------------------------------------------------------------------------------- /datasets/decoder/split_files/Refrigerator_revolute/object_ids.yaml: -------------------------------------------------------------------------------- 1 | test: 2 | - bc82358ed0ee28d41277c98ea0908b0 3 | - 5b81d7830eabb7547c6e1fb05e1b9037 4 | - ad6bd7e24e5bc25f3593835fe348a036 5 | train: 6 | - 1515a188cbc382fa84ad27a2f1142330 7 | - 6fb955194baf07a750a5eaedf6275e1b 8 | - 93d69af3c0034d3d9807c66948157e66 9 | - 7028b24b7d64efaf3194539af1047dcf 10 | - 6601ef650f03e000c49931aa7ca8fecb 11 | - 9e53ec8bedae98859807c66948157e66 12 | - 4d8d0cb708324170c98c13d6112727de 13 | - 58c878d494ecbbd62835d3f06aeb6e0 14 | - 3158fd17e409d38a732208e596b26ebc 15 | - 827c9a85df258dd8faf0b97ff18d3546 16 | val: [] 17 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | """Install script for setuptools.""" 2 | 3 | import setuptools 4 | from os import path 5 | 6 | # read the contents of your README file 7 | this_directory = path.abspath(path.dirname(__file__)) 8 | with open(path.join(this_directory, "README.md"), encoding="utf-8") as f: 9 | long_description = f.read() 10 | 11 | # TODO 12 | # Add requirements.txt parsing 13 | 14 | setuptools.setup( 15 | name="carto", 16 | version="0.0.1", 17 | author="Nick Heppert", 18 | author_email="heppert@cs.uni-freiburg.de", 19 | packages=setuptools.find_packages(), 20 | python_requires=">=3.8", 21 | ) 22 | -------------------------------------------------------------------------------- /CARTO/lib/rename_unpickler.py: -------------------------------------------------------------------------------- 1 | import io 2 | import pickle 3 | 4 | 5 | class Unpickler(pickle.Unpickler): 6 | def find_class(self, module, name): 7 | renamed_module = module 8 | 9 | # Ensure old checkpoints can still be loaded 10 | renamed_module = renamed_module.replace( 11 | "simnet.shape_pretraining_articulated", "CARTO.Decoder" 12 | ) 13 | renamed_module = renamed_module.replace("simnet.lib", "CARTO.simnet.lib") 14 | return super(Unpickler, self).find_class(renamed_module, name) 15 | 16 | 17 | def renamed_load(file_obj): 18 | return Unpickler(file_obj).load() 19 | 20 | 21 | def renamed_loads(pickled_bytes): 22 | file_obj = io.BytesIO(pickled_bytes) 23 | return renamed_load(file_obj) 24 | -------------------------------------------------------------------------------- /CARTO/simnet/lib/net/models/layers/soft_argmin.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Toyota Research Institute. All rights reserved. 2 | 3 | import torch 4 | import torch.nn as nn 5 | import torch.nn.functional as F 6 | 7 | 8 | @torch.jit.script 9 | def soft_argmin(input): 10 | _, channels, _, _ = input.shape 11 | 12 | softmin = F.softmin(input, dim=1) 13 | index_tensor = torch.arange( 14 | 0, channels, dtype=softmin.dtype, device=softmin.device 15 | ).view(1, channels, 1, 1) 16 | output = torch.sum(softmin * index_tensor, dim=1, keepdim=True) 17 | return output 18 | 19 | 20 | class SoftArgmin(nn.Module): 21 | """Compute soft argmin operation for given cost volume""" 22 | 23 | def forward(self, input): 24 | return soft_argmin(input) 25 | -------------------------------------------------------------------------------- /datasets/decoder/split_files/Knife_revolute/object_ids.yaml: -------------------------------------------------------------------------------- 1 | test: 2 | - a683ed081504a35e4a9a3a0b87d50a92 3 | - b4f8a49abc400a775d6ddb389935ee57 4 | - u51509034-d4f7-4ef7-b014-6660f4df034d-0 5 | - 96a7c39f7eb90f65c90183d47cf3c337-1 6 | train: 7 | - 31f86223e3faaec3eae5cab1248d1ec6-0 8 | - 31f86223e3faaec3eae5cab1248d1ec6-1 9 | - 23fd9817d509fe472bf266a8f0187ce5-1 10 | - 737fd576f8eae54adfb1b24fd658f3b5-0 11 | - c7a96262d5dfc1ae72c447ef6e5cffc2 12 | - e9d3d9ef-57e3-4f0a-bbc7-e1cc75947ccd-7 13 | - 75bfa1045150e49fe177ccfa080b14b0-0 14 | - ceb3b39c9a035752b4fc059d1d10ec5d-0 15 | - 23fd9817d509fe472bf266a8f0187ce5-0 16 | - 581ad58ce8664d2d4ff0e6230d32c1e3 17 | - u2bba3644-e88e-4650-9124-e9964702f9ef-0 18 | - ud489e3ab-3fac-4753-8373-f5d4cebaeec5 19 | - fca703c2489237d51b44a9962207f944 20 | - 19dff8164764e2a259f37b6e82c5e93 21 | val: [] 22 | -------------------------------------------------------------------------------- /CARTO/simnet/lib/net/post_processing/utils.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import numpy as np 3 | 4 | 5 | def to_numpy_from_torch(torch_tensor: torch.Tensor, multiplier: float = 100.0): 6 | numpy_array = np.ascontiguousarray(torch_tensor.float().cpu().numpy()) 7 | if numpy_array.ndim == 3: # Not batched 8 | # print(f"not batched {numpy_array.shape = }") 9 | numpy_array = np.expand_dims(numpy_array, 0) # Add one dimension 10 | numpy_array = numpy_array.transpose((0, 2, 3, 1)) 11 | return numpy_array / multiplier 12 | 13 | 14 | def to_torch_from_numpy(numpy_array: np.ndarray, multiplier: float = 100.0): 15 | numpy_array = numpy_array.transpose((2, 0, 1)) 16 | numpy_array = numpy_array * multiplier 17 | torch_tensor = torch.from_numpy(np.ascontiguousarray(numpy_array)).float() 18 | return torch_tensor 19 | -------------------------------------------------------------------------------- /CARTO/simnet/lib/net/init/default_init.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | 3 | 4 | def default_init(module): 5 | """Initialize parameters of the module. 6 | 7 | For convolution, weights are initialized by Kaiming method and 8 | biases are initialized to zero. 9 | For batch normalization, scales and biases are set to 1 and 0, 10 | respectively. 11 | """ 12 | if isinstance(module, nn.Conv2d): 13 | nn.init.kaiming_normal_(module.weight.data) 14 | if module.bias is not None: 15 | module.bias.data.zero_() 16 | elif isinstance(module, nn.Conv3d): 17 | nn.init.kaiming_normal_(module.weight.data) 18 | if module.bias is not None: 19 | module.bias.data.zero_() 20 | elif isinstance(module, nn.BatchNorm2d): 21 | module.weight.data.fill_(1) 22 | module.bias.data.zero_() 23 | elif isinstance(module, nn.BatchNorm3d): 24 | module.weight.data.fill_(1) 25 | module.bias.data.zero_() 26 | -------------------------------------------------------------------------------- /datasets/decoder/split_files/Dishwasher_revolute/object_ids.yaml: -------------------------------------------------------------------------------- 1 | test: 2 | - b1080bd937b04a44575f4e5007488531 3 | - 496dcf99-6e76-480c-8fab-a5579f16f2c7 4 | - 9112f0ee6b1cdf5082ec48ff3a4fe07c 5 | - a377f5af14ac6710a168e247bb97e471 6 | - cc8161b35f7bef958c88d30f502a452 7 | train: 8 | - a2caaa68364f6207f054969eeb39ff86 9 | - 187d79cd04b2bdfddf3a1b0d597ce76e 10 | - af913c310f1b978ae6488a574e8954a5 11 | - a62b6a19d2093bc91cbd656f2f1bc2ff 12 | - d95f6ea8-cda0-4d59-aa49-11309e3f0ce3 13 | - 6e51cc2c2da50c6a59c5c7ba83ec931a 14 | - a238b87f02c5de1edf3a1b0d597ce76e 15 | - aa4ad2f41efb815cb022c94235bc8601 16 | - 503b4dff71b404dabf195d81040cc60 17 | - c5f76c9a4137a3563862b05b9038dcc 18 | - 5d17e90f512a3dc7df3a1b0d597ce76e 19 | - c6090fb2806b2abfa5f4a1f264741b67 20 | - 55b0f47aea128c3b91d8be9599fbaa1f 21 | - 7d19e1db73ebfee26f893b5bc716a3fa 22 | - 4e9832bbbb077f9c5c5adfeaec1397f 23 | - 795af925dfc8897b035d20a1a3ca345 24 | - 93b7c0394cc309c8df3a1b0d597ce76e 25 | - 66725b8cad4355a03735baeeeb56a00 26 | val: [] 27 | -------------------------------------------------------------------------------- /datasets/decoder/split_files/Stapler_revolute/object_ids.yaml: -------------------------------------------------------------------------------- 1 | test: 2 | - 376eb047b40ef4f6a480e3d8fdbd4a92 3 | - 72a2bd9428f7179357fcd7a97096d25 4 | - 20c5096ea98cc955920de219c00d1c3b 5 | - d9378f9a4a7d6514602a101aa41a6f48 6 | - ue12a29d7-6d30-4159-ac11-3c6a058ad354 7 | train: 8 | - 58a427e5201aa43be00ace8e7c1a5eeb 9 | - 8f54f0bec8eb5d35d25169d37940fb64 10 | - f636f0aa2025ba3923c841f9d5051936 11 | - 3800d2ab6bc278bcd5a3e6010c55b78e 12 | - 453034dc-b04a-4415-8c43-16d6d23c47b2 13 | - 8c34afa29665356013b1d3e1528f0506-0 14 | - u26949e8f-8139-485b-99f9-694c026ed5a6 15 | - d01ff66659767d50cee19268a161fc4a 16 | - 88ac7b2b3050f1f861f7b52424be58ab 17 | - b3188e51216de8cce2e4961161b75547 18 | - 8d152be34b41785677937146265c551a 19 | - dc2cda7d-6fd5-48dd-8f7e-7524d7eb1c0a 20 | - 37b40b7e9290c0a330314ffb9bb887b5 21 | - c16cba81-714d-4b1a-94cd-7a148af83db0 22 | - 6a030b1836586b9f7e1c85c5c15da7fb 23 | - u9ea1219b-e360-4351-ae52-f589989c58e3-0 24 | - 98bc3afca001f433a1702a37604ec6f 25 | - f39912a4f0516fb897371d1e7cc637f3 26 | val: [] 27 | -------------------------------------------------------------------------------- /datasets/decoder/split_files/Table_prismatic/object_ids.yaml: -------------------------------------------------------------------------------- 1 | test: 2 | - 21227197948fd2857c2f94a943a8669b 3 | - e64f3681d7c76bb743638dabe1eb5336 4 | - 31c090b23f52bb61208c8c852ee795bc 5 | - 299ff1bf2618a4b584b039efed4b32d7 6 | - 415d7746f792eb1de0445fc6d980dd5c 7 | train: 8 | - 949e39403ab4fab37ade8e3ca8db8db3 9 | - 29f110b8740bd8068c427edcde5d5e2b 10 | - 74b8222078ba776c661673811de66400 11 | - c9857deb88989a67b5851007eadc6f74 12 | - a19e6780182c72cf9bf8bea04806ba15 13 | - 4aab0e569f1dc3bc8d7e9f13fd8f661d 14 | - 48045af90c7959e5738e43095496b061 15 | - 70d0937e1d38a9c2a45b742ddc5add59 16 | - a95828fa4607295674c8eb7e4d6198a5 17 | - 9e42bbdbfe36680391e4d6c585a697a 18 | - a516711827a396085528d560ddea455 19 | - 20edff7e1500fc4ed45f502ecff9e44f 20 | - 7b5b7bfa8580e913e2580b23e60e4674 21 | - 712d2c844d61aa9cefead98a255f706f 22 | - 28001cb70c38f19cf32b6091d9628440 23 | - 78c4b505894342269299936b751bd77b 24 | - 4dc3e9e293450817d3dad974dc098fa1 25 | - 2dc57230d14506eacd6ce29440b718cf 26 | - 9dd80e356880c9deaf268f6180933aa3 27 | val: [] 28 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | # git+https://github.com/kevinleestone/pyrender.git@bf4184ea1079ef328ce0f2a55e52f17520927067 2 | numpy==1.22.3 3 | tqdm 4 | trimesh 5 | zstandard 6 | tyro 7 | shortuuid 8 | # opencv-python==4.5.5.64 9 | # opencv-python==4.4.0.46 10 | opencv-python==4.3.0.36 11 | wandb 12 | matplotlib 13 | plyfile 14 | opentsne 15 | seaborn 16 | open3d 17 | roma 18 | pytorch_lightning==1.6.3 19 | QtPy==2.1.0 20 | labelcloud # for running the labeling processs 21 | git+https://github.com/heppert-tri/mesh_to_sdf.git 22 | git+https://github.com/heppert-tri/urdfpy.git 23 | git+https://github.com/facebookresearch/pytorch3d.git@stable 24 | 25 | # ipython 26 | # boto3 27 | # colour-demosaicing 28 | # scikit-image 29 | # lxml 30 | # # pytorch_lightning==1.4.0 31 | # setuptools==59.5.0 32 | # scikit-learn 33 | # fvcore 34 | # blake3 35 | # base58 36 | # rich 37 | # coloredlogs 38 | # rtree 39 | # py3ode==1.2.0.dev15 40 | # transformers 41 | # sentence-transformers 42 | # yapf 43 | # h5py 44 | # jupyter 45 | # plotly 46 | # zstd 47 | # torchviz 48 | -------------------------------------------------------------------------------- /datasets/decoder/split_files/Laptop_revolute/object_ids.yaml: -------------------------------------------------------------------------------- 1 | test: 2 | - 125c93cbc6544bd1f9f50a550b8c1cce 3 | - 4fc3d56243d2d8801ef1ccfaf50f2048 4 | - 8d70fb6adc63e21eb7e0383b9609fa5 5 | - cc691d9e8e189ce47a381a112bfd785 6 | - 3b2db36aaa2546b99c7c402f274622c 7 | train: 8 | - 66e3b7c7f2e8e9297fd8853234f5e918 9 | - 4bacb1694e86005afb6e846333373df8 10 | - a4b410734514306ac401e233323032d6 11 | - afa49e97861c45e5e738f481f8560d58 12 | - f7c26b8c94ba8214397c35f585745a82 13 | - 7df09674bc991904c78df40cf2e9097a 14 | - cbcb79f534518dfbcfe78be5b7b99c8d 15 | - 5d544ee4b094c6606436916a86a90ed7 16 | - 6b78948484df58cdc664c3d4e2d59341 17 | - 241ec8a746dd1cfc78f71a335ebabfa5 18 | - 6489453e322cdb53f9f3c6290096f50f 19 | - aa92ecd31491bca87a88a2ad67bfd073 20 | - 5678a2173ff575d09cebe817bc1591b3 21 | - 1b67b4bfed6688ba5b22feddf58c05e1 22 | - 850673bcbce8f73ec8a6d87a62ac0341 23 | - f53ea19f871a80d420685b5a7e34b501 24 | - 1f507b26c31ae69be42930af58a36dce 25 | - 29f5cfcef7272f1f640578ae55230ebc 26 | - 97e94d800fd6dc07dbaa6d42a4980930 27 | - b5f6fd84a3f44ddb1aa47689117a61e1 28 | val: [] 29 | -------------------------------------------------------------------------------- /CARTO/simnet/lib/net/functions/learning_rate.py: -------------------------------------------------------------------------------- 1 | # Copyright 2018 Toyota Research Institute. All rights reserved. 2 | # 3 | # Originally from Koichiro Yamaguchi's pixwislab repo mirrored at: 4 | # https://github.awsinternal.tri.global/driving/pixwislab 5 | 6 | 7 | def lambda_learning_rate_poly(max_epochs, exponent): 8 | """Make a function for computing learning rate by "poly" policy. 9 | 10 | This policy does a polynomial decay of the learning rate over the epochs 11 | of training. 12 | 13 | Args: 14 | max_epochs (int): max numbers of epochs 15 | exponent (float): exponent value 16 | """ 17 | return lambda epoch: pow((1.0 - epoch / max_epochs), exponent) 18 | 19 | 20 | def lambda_warmup(warmup_period, warmup_factor, wrapped_lambda): 21 | def warmup(epoch, warmup_period, warmup_factor): 22 | if epoch > warmup_period: 23 | return 1.0 24 | else: 25 | return warmup_factor + (1.0 - warmup_factor) * (epoch / warmup_period) 26 | 27 | return lambda epoch: warmup(epoch, warmup_period, warmup_factor) * wrapped_lambda( 28 | epoch 29 | ) 30 | -------------------------------------------------------------------------------- /CARTO/simnet/lib/net/data_module.py: -------------------------------------------------------------------------------- 1 | import pytorch_lightning as pl 2 | 3 | from CARTO.simnet.lib.net import common 4 | 5 | 6 | class DataModule(pl.LightningDataModule): 7 | def __init__(self, hparams, train_dataset=None, preprocess_func=None): 8 | super().__init__() 9 | 10 | # Using the same hyperparmeter saving method as the model module 11 | # doesn't work, so just assign to some other variable for now. 12 | self.params = hparams 13 | self.train_dataset = train_dataset 14 | self.preprocess_func = preprocess_func 15 | 16 | def train_dataloader(self): 17 | return common.get_loader( 18 | self.params, 19 | "train", 20 | preprocess_func=self.preprocess_func, 21 | datapoint_dataset=self.train_dataset, 22 | ) 23 | 24 | def val_dataloader(self): 25 | return common.get_loader( 26 | self.params, "val", preprocess_func=self.preprocess_func 27 | ) 28 | 29 | def test_dataloader(self): 30 | return common.get_loader( 31 | self.params, "test", preprocess_func=self.preprocess_func 32 | ) 33 | -------------------------------------------------------------------------------- /CARTO/simnet/lib/net/models/layers/matchability.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Toyota Research Institute. All rights reserved. 2 | 3 | import torch 4 | import torch.nn as nn 5 | import torch.nn.functional as F 6 | 7 | # 8 | 9 | 10 | @torch.jit.script 11 | def matchability(input): 12 | softmin = F.softmin(input, dim=1) 13 | log_softmin = F.log_softmax(-input, dim=1) 14 | output = torch.sum(softmin * log_softmin, dim=1, keepdim=True) 15 | return output 16 | 17 | 18 | class Matchability(nn.Module): 19 | """Compute disparity matchability value from https://arxiv.org/abs/2008.04800""" 20 | 21 | def forward(self, input): 22 | if torch.jit.is_scripting(): 23 | # Torchscript generation can't handle mixed precision, so always compute at float32. 24 | return matchability(input) 25 | else: 26 | return self.forward_with_amp(input) 27 | 28 | @torch.jit.unused 29 | def forward_with_amp(self, input): 30 | """This operation is unstable at float16, so compute at float32 even when using mixed precision""" 31 | with torch.cuda.amp.autocast(enabled=False): 32 | input = input.to(torch.float32) 33 | return matchability(input) 34 | -------------------------------------------------------------------------------- /CARTO/Decoder/models/lipschitz_norm.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.nn.functional import softplus 3 | 4 | 5 | class LipschitzNorm(torch.nn.Module): 6 | name: str 7 | dim: int 8 | 9 | def __init__(self, name, dim: int, weight) -> None: 10 | super().__init__() 11 | self.name = name 12 | self.dim = dim 13 | self.register_parameter( 14 | "lipschitz_constant", 15 | torch.nn.Parameter(torch.max(torch.sum(torch.abs(weight), dim))), 16 | ) 17 | 18 | def compute_weight(self, module): 19 | W = getattr(module, self.name) 20 | absrowsum = torch.sum(torch.abs(W), dim=self.dim) 21 | softplus_c = softplus(self.lipschitz_constant) 22 | scale = torch.minimum(torch.Tensor([1.0]).to(W.device), softplus_c / absrowsum) 23 | return torch.nn.Parameter(W * scale[:, None]) 24 | 25 | @staticmethod 26 | def apply(module, name: str, dim: int = -1) -> "LipschitzNorm": 27 | weight = getattr(module, name) 28 | fn = LipschitzNorm(name, dim, weight) 29 | setattr(module, name, fn.compute_weight(module)) 30 | module.register_forward_pre_hook(fn) 31 | return fn 32 | 33 | def __call__(self, module, inputs): 34 | setattr(module, self.name, self.compute_weight(module)) 35 | 36 | 37 | def lipschitz_norm(module, name: str = "weight", dim: int = 1): 38 | lipschitz_norm_instance = LipschitzNorm.apply(module, name, dim) 39 | return module, lipschitz_norm_instance 40 | -------------------------------------------------------------------------------- /CARTO/simnet/lib/net/models/layers/transition_blocks.py: -------------------------------------------------------------------------------- 1 | # Copyright 2018-2020 Toyota Research Institute. All rights reserved. 2 | # 3 | # Originally from Koichiro Yamaguchi's pixwislab repo mirrored at: 4 | # https://github.awsinternal.tri.global/driving/pixwislab 5 | 6 | import torch.nn as nn 7 | 8 | 9 | class TransitionBlock(nn.Module): 10 | """Transition block for changing resolution or the number of channels.""" 11 | 12 | def __init__(self, in_channels, out_channels, stride): 13 | """ 14 | Args: 15 | in_channels (int): The number of input channels. 16 | out_channels (int): The number of output channels. 17 | stride (int): Stride (1 or 2). 18 | """ 19 | assert stride in (1, 2) 20 | assert not (in_channels == out_channels and stride == 1) 21 | super().__init__() 22 | 23 | if stride == 1: 24 | self.conv = nn.Conv2d(in_channels, out_channels, kernel_size=1, bias=False) 25 | else: 26 | self.conv = nn.Conv2d( 27 | in_channels, 28 | out_channels, 29 | kernel_size=3, 30 | stride=2, 31 | padding=1, 32 | bias=False, 33 | ) 34 | self.bn = nn.BatchNorm2d(out_channels) 35 | self.relu = nn.ReLU(inplace=True) 36 | 37 | def forward(self, inputs): 38 | """Forward computation. 39 | 40 | Args: 41 | inputs (Tensor): Input tensor. 42 | 43 | Returns: 44 | Output tensor. 45 | """ 46 | return self.relu(self.bn(self.conv(inputs))) 47 | -------------------------------------------------------------------------------- /CARTO/lib/compression.py: -------------------------------------------------------------------------------- 1 | import json 2 | import zstandard 3 | import collections 4 | import io 5 | import tarfile 6 | 7 | 8 | def write_compressed_json(x, path): 9 | cctx = zstandard.ZstdCompressor() 10 | with open(path, "wb") as raw_fh: 11 | with cctx.stream_writer(raw_fh) as zst_fh: 12 | zst_fh.write(json.dumps(x, sort_keys=True, indent=2).encode()) 13 | 14 | 15 | def read_compressed_json(path): 16 | cctx = zstandard.ZstdDecompressor() 17 | with open(path, "rb") as raw_fh: 18 | with cctx.stream_reader(raw_fh) as zst_fh: 19 | bytes_ = zst_fh.read() 20 | str_ = bytes_.decode() 21 | x = json.loads(str_, object_pairs_hook=collections.OrderedDict) 22 | return x 23 | 24 | 25 | def extract_compressed_tarfile(tarfile_path, dst_dir): 26 | cctx = zstandard.ZstdDecompressor() 27 | with open(tarfile_path, "rb") as raw_fh: 28 | with cctx.stream_reader(raw_fh) as zst_fh: 29 | tarfile_buf = zst_fh.read() 30 | 31 | with io.BytesIO(tarfile_buf) as raw_fh: 32 | with tarfile.TarFile(fileobj=raw_fh) as tar: 33 | members = tar.getmembers() 34 | for member in members: 35 | if not member.isfile(): 36 | continue 37 | data = tar.extractfile(member).read() 38 | assert member.name[0] != "/" 39 | member_path = dst_dir / member.path 40 | parent_dir = member_path.parent 41 | parent_dir.mkdir(parents=True, exist_ok=True) 42 | with open(member_path, "wb") as f: 43 | f.write(data) 44 | -------------------------------------------------------------------------------- /CARTO/simnet/lib/net/models/layers/hdc_functions.py: -------------------------------------------------------------------------------- 1 | # Copyright 2018-2020 Toyota Research Institute. All rights reserved. 2 | # 3 | # Originally from Koichiro Yamaguchi's pixwislab repo mirrored at: 4 | # https://github.awsinternal.tri.global/driving/pixwislab 5 | 6 | import torch.nn as nn 7 | 8 | 9 | def hdc_resnet_group( 10 | block_func, in_channels, base_channels, num_blocks, dilation_rates 11 | ): 12 | """Make a group of pre-activation residual blocks with Hybrid Dilated 13 | Convolution (HDC). 14 | 15 | "Understanding Convolution for Semantic Segmentation", 16 | https://arxiv.org/abs/1702.08502. 17 | 18 | Args: 19 | block_func (ResidualBlock): Function of a residual block. 20 | in_channels (int): The number of input channels. 21 | base_channels (int): The number of base channels of the residual block. 22 | num_blocks (int): The number of residual blocks. 23 | dilation_rates (list): List of dilation rates. 24 | 25 | Returns: 26 | Module of a group of residual blocks. 27 | """ 28 | assert block_func.preact() 29 | 30 | num_rates = len(dilation_rates) 31 | residual_blocks = [ 32 | block_func( 33 | in_channels, 34 | base_channels, 35 | dilation_rate=dilation_rates[0], 36 | add_preact=False, 37 | ) 38 | ] 39 | in_channels = block_func.expansion() * base_channels 40 | for idx in range(1, num_blocks): 41 | residual_blocks.append( 42 | block_func( 43 | in_channels, 44 | base_channels, 45 | dilation_rate=dilation_rates[idx % num_rates], 46 | add_preact=True, 47 | add_last_norm=idx == num_blocks - 1, 48 | ) 49 | ) 50 | return nn.Sequential(*residual_blocks) 51 | -------------------------------------------------------------------------------- /CARTO/simnet/lib/net/pre_processing/grasp_inputs.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from scipy.stats import multivariate_normal 3 | 4 | from CARTO.simnet.lib.net.pre_processing import pose_inputs 5 | from CARTO.simnet.lib.non_convex_grasper import _NUM_GRASPS_PER_OBJECT 6 | from CARTO.simnet.lib import datapoint 7 | 8 | _HEATMAP_THRESHOLD = 0.3 9 | _DOWNSCALE_VALUE = 8 10 | _PEAK_CONCENTRATION = 0.8 11 | 12 | 13 | def compute_network_targets(grasps, masks, height, width): 14 | assert len(grasps) == len(masks) 15 | if len(grasps) == 0: 16 | height_d = int(height / _DOWNSCALE_VALUE) 17 | width_d = int(width / _DOWNSCALE_VALUE) 18 | return datapoint.Grasps( 19 | heat_map=np.zeros([height, width]), 20 | grasp_success_target=np.zeros([height_d, width_d, _NUM_GRASPS_PER_OBJECT]), 21 | ) 22 | heatmaps = pose_inputs.compute_heatmaps_from_masks(masks) 23 | grasp_success_target = compute_grasp_target(grasps, heatmaps) 24 | return datapoint.Grasps( 25 | heat_map=np.max(heatmaps, axis=0), 26 | grasp_success_target=grasp_success_target, 27 | ) 28 | 29 | 30 | def compute_grasp_target(grasps_per_objects, heat_maps, threshold=0.3): 31 | grasp_target = np.zeros( 32 | [ 33 | len(grasps_per_objects), 34 | heat_maps[0].shape[0], 35 | heat_maps[0].shape[1], 36 | _NUM_GRASPS_PER_OBJECT, 37 | ] 38 | ) 39 | heatmap_indices = np.argmax(np.array(heat_maps), axis=0) 40 | for grasps_per_object, heat_map, ii in zip( 41 | grasps_per_objects, heat_maps, range(len(heat_maps)) 42 | ): 43 | grasp_values = np.zeros(_NUM_GRASPS_PER_OBJECT) 44 | mask = heatmap_indices == ii 45 | for jj, grasp in enumerate(grasps_per_object): 46 | grasp_values[jj] = grasp.success 47 | grasp_target[ii, mask] = grasp_values 48 | return np.sum(grasp_target, axis=0)[::_DOWNSCALE_VALUE, ::_DOWNSCALE_VALUE] 49 | -------------------------------------------------------------------------------- /CARTO/simnet/lib/net/pre_processing/keypoint_inputs.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import numpy as np 3 | from scipy.stats import multivariate_normal 4 | 5 | from CARTO.simnet.lib import datapoint 6 | 7 | _KEYPOINT_VAR = 20 8 | 9 | 10 | def compute_network_targets(keypoints, height, width): 11 | coords = np.indices((height, width)) 12 | coords = coords.reshape([2, -1]).T 13 | all_targets = [] 14 | # for each type of keypoint 15 | for keypoint_group in keypoints: 16 | # for each keypoint in each keypoint group 17 | individual_heat_maps = [] 18 | for keypoint in keypoint_group: 19 | # for each instance of the keypoint in the image 20 | for px in keypoint.pixels: 21 | # place a Gaussian target distribution at the pixel location 22 | cur_heat_map = np.zeros([height, width]) 23 | cov = np.eye(2) * _KEYPOINT_VAR 24 | multi_var = multivariate_normal(mean=px[::-1], cov=cov) 25 | density = multi_var.pdf(coords) 26 | cur_heat_map[coords[:, 0], coords[:, 1]] = density 27 | individual_heat_maps.append(cur_heat_map) 28 | # take a max over all pixels for this keypoint group 29 | if len(individual_heat_maps): 30 | target = np.stack(individual_heat_maps).max(0) 31 | target /= target.max() 32 | else: 33 | target = np.zeros([height, width]) 34 | all_targets.append(datapoint.Keypoint(heat_map=target)) 35 | return all_targets 36 | 37 | 38 | def vis_network_targets(keypoints, height, width, left_img): 39 | target_images = [] 40 | all_targets = compute_network_targets(keypoints, height, width) 41 | for target in all_targets: 42 | heat_map = target.heat_map 43 | img = cv2.cvtColor(left_img, cv2.COLOR_BGR2GRAY) 44 | img = cv2.addWeighted(heat_map, 0.999, img.astype(float), 0.00005, 0) 45 | img /= img.max() / 255 46 | target_images.append(img.astype(np.uint8)) 47 | return target_images 48 | -------------------------------------------------------------------------------- /CARTO/simnet/lib/net/losses.py: -------------------------------------------------------------------------------- 1 | # Copyright 2019 Toyota Research Institute. All rights reserved. 2 | 3 | import torch 4 | import torch.nn as nn 5 | import numpy as np 6 | import IPython 7 | 8 | 9 | class MaskedL1Loss(nn.Module): 10 | def __init__(self, centroid_threshold=0.3, downscale_factor=8): 11 | super().__init__() 12 | self.loss = nn.L1Loss(reduction="none") 13 | self.centroid_threshold = centroid_threshold 14 | self.downscale_factor = downscale_factor 15 | 16 | def forward(self, output, target, valid_mask): 17 | """ 18 | output: [N,16,H,W] 19 | target: [N,16,H,W] 20 | valid_mask: [N,H,W] 21 | """ 22 | valid_count = torch.sum( 23 | valid_mask[:, :: self.downscale_factor, :: self.downscale_factor] 24 | > self.centroid_threshold 25 | ) 26 | loss = self.loss(output, target) 27 | if len(output.shape) == 4: 28 | loss = torch.sum(loss, dim=1) 29 | loss[ 30 | valid_mask[:, :: self.downscale_factor, :: self.downscale_factor] 31 | < self.centroid_threshold 32 | ] = 0.0 33 | if valid_count == 0: 34 | return torch.sum(loss) 35 | return torch.sum(loss) / valid_count 36 | 37 | 38 | class MSELoss(nn.Module): 39 | def __init__(self): 40 | super().__init__() 41 | self.loss = nn.MSELoss(reduction="none") 42 | 43 | def forward(self, output, target): 44 | """ 45 | output: [N,H,W] 46 | target: [N,H,W] 47 | ignore_mask: [N,H,W] 48 | """ 49 | loss = self.loss(output, target) 50 | return torch.mean(loss) 51 | 52 | 53 | class MaskedMSELoss(nn.Module): 54 | def __init__(self): 55 | super().__init__() 56 | self.loss = nn.MSELoss(reduction="none") 57 | 58 | def forward(self, output, target, ignore_mask): 59 | """ 60 | output: [N,H,W] 61 | target: [N,H,W] 62 | ignore_mask: [N,H,W] 63 | """ 64 | valid_sum = torch.sum(torch.logical_not(ignore_mask)) 65 | loss = self.loss(output, target) 66 | loss[ignore_mask > 0] = 0.0 67 | return torch.sum(loss) / valid_sum 68 | -------------------------------------------------------------------------------- /download_archives.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Input Parameters 4 | ARCHIVE_NAME=$1 5 | if [ $ARCHIVE_NAME != "real" ] && [ $ARCHIVE_NAME != "synthetic" ] && [ $ARCHIVE_NAME != "A-SDF" ] 6 | then 7 | echo "Unknown archive name ${ARCHIVE_NAME}. Use ./download_archives.sh [real|synthetic|A-SDF]" 8 | exit 0 9 | fi 10 | 11 | 12 | # Create directory 13 | BASE_DIR="downloaded_archives" 14 | DIR="${BASE_DIR}/${ARCHIVE_NAME}_parts/" 15 | mkdir -p ${DIR} 16 | echo "Created ${DIR} for saving" 17 | 18 | PARTS=() 19 | if [ $ARCHIVE_NAME == "real" ] 20 | then 21 | for x in {a..r} 22 | do 23 | PARTS+=("a${x}") 24 | done 25 | elif [ $ARCHIVE_NAME == "synthetic" ] 26 | then 27 | for x in {a..c} 28 | do 29 | for y in {a..z} 30 | do 31 | PARTS+=("${x}${y}") 32 | done 33 | done 34 | for x in {a..y} 35 | do 36 | PARTS+=(".d${x}") 37 | done 38 | elif [ $ARCHIVE_NAME == "A-SDF" ] 39 | then 40 | for x in {a..q} 41 | do 42 | PARTS+=("a${x}") 43 | done 44 | fi 45 | 46 | EVERYTHING_OK=1 47 | # Download file 48 | for PART in "${PARTS[@]}" 49 | do 50 | echo "${DIR}" 51 | FILE_NAME="${ARCHIVE_NAME}.part.${PART}" 52 | URL="http://carto.cs.uni-freiburg.de/datasets/${ARCHIVE_NAME}_parts/${FILE_NAME}" 53 | #Check if file exists on the server 54 | if curl --output /dev/null --silent --head --fail "$URL" 55 | then 56 | echo "URL exists on server: $URL" 57 | # Download file 58 | if test -f "${DIR}/${FILE_NAME}" 59 | then 60 | echo "Skipping as file already exists locallly" 61 | else 62 | if wget -P ${DIR} ${URL} 63 | then 64 | echo "Successfully downloaded $URL" 65 | else 66 | echo "Error downloading $URL" 67 | EVERYTHING_OK = 0 68 | fi 69 | fi 70 | else 71 | echo "URL does not exist: $URL" 72 | EVERYTHING_OK = 0 73 | fi 74 | done 75 | 76 | # Unzip file 77 | if [ $EVERYTHING_OK -eq 0 ] 78 | then 79 | echo "Error downloading ${ARCHIVE_NAME} data" 80 | exit 1 81 | fi 82 | 83 | cat $DIR/* > "${BASE_DIR}/${ARCHIVE_NAME}.tar.gz" 84 | echo "Successfully downloaded ${ARCHIVE_NAME}" 85 | -------------------------------------------------------------------------------- /CARTO/Decoder/multi_poly.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | import numpy as np 3 | import matplotlib.pyplot as plt 4 | 5 | 6 | class MultiPoly: 7 | def __init__(self, x: np.ndarray, Y: np.ndarray, poly_dim: int = 1): 8 | assert x.ndim == 1 9 | assert Y.ndim == 2 10 | assert x.shape[0] == Y.shape[0] 11 | self.poly_fits: List[np.polynomial.Polynomial] = [] 12 | dim_amount = Y.shape[1] 13 | 14 | for lat_dim in range(dim_amount): 15 | poly = np.polynomial.Polynomial.fit(x, Y[:, lat_dim], poly_dim) 16 | self.poly_fits.append(poly) 17 | 18 | self.domain = np.array([np.min(x), np.max(x)]) 19 | 20 | def get_vals(self, X: np.ndarray): 21 | return self.__call__(X) 22 | 23 | def get_domain_mean(self): 24 | x = np.mean(self.domain) 25 | return self.get_vals(x) 26 | 27 | def linspace(self, n: int = 50, domain=None): 28 | X = np.linspace(*(domain if domain else self.domain), num=n) 29 | return self(X) 30 | 31 | def __call__(self, X: np.ndarray): 32 | Ys = [] 33 | for poly in self.poly_fits: 34 | Ys.append(poly(X)) 35 | return np.stack(Ys, axis=0).T 36 | 37 | def get_plot( 38 | self, 39 | x: np.ndarray, 40 | Y: np.ndarray, 41 | domain=None, 42 | n_samples: int = 50, 43 | types: List[str] = [], 44 | markers=["v", "P", "d"], 45 | ): 46 | plt_dim = int(np.ceil(np.sqrt(len(self.poly_fits)))) 47 | fig, axes = plt.subplots( 48 | plt_dim, plt_dim, figsize=(7, 7), sharex=True, sharey=True 49 | ) 50 | for i, poly in enumerate(self.poly_fits): 51 | xx, yy = poly.linspace(n_samples, domain=domain if domain else self.domain) 52 | ax = axes[i // plt_dim][i % plt_dim] 53 | for type, marker in zip(set(types), markers): 54 | mask = np.array(types) == type 55 | ax.scatter( 56 | x[mask], 57 | Y[mask, i], 58 | label=type, 59 | marker=marker, 60 | c=x[mask], 61 | cmap="jet", 62 | ) 63 | ax.plot(xx, yy, color="orange") 64 | return fig, axes 65 | -------------------------------------------------------------------------------- /CARTO/simnet/lib/net/post_processing/surface_outputs.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import cv2 3 | import IPython 4 | import torch 5 | import torch.nn as nn 6 | 7 | from CARTO.simnet.lib import datapoint 8 | from torch.nn import functional as F 9 | from CARTO.simnet.lib.net import losses 10 | 11 | _masked_l1_loss = losses.MaskedL1Loss() 12 | _MAX_DISP = 128 13 | 14 | 15 | class SurfaceOutput: 16 | def __init__(self, surface_pred, hparams): 17 | self.surface_pred = surface_pred 18 | self.is_numpy = False 19 | self.loss = nn.SmoothL1Loss(reduction="none") 20 | self.hparams = hparams 21 | 22 | # Converters for torch to numpy 23 | def convert_to_numpy_from_torch(self): 24 | self.surface_pred = np.ascontiguousarray(self.surface_pred.cpu().numpy()) 25 | self.surface_pred.transpose((1, 2, 0)) 26 | self.is_numpy = True 27 | 28 | def convert_to_torch_from_numpy(self): 29 | self.surface_pred.transpose((2, 0, 1)) 30 | self.surface_pred = torch.from_numpy( 31 | np.ascontiguousarray(self.surface_pred) 32 | ).float() 33 | self.is_numpy = False 34 | 35 | def get_visualization_img(self, left_img_np): 36 | if not self.is_numpy: 37 | self.convert_to_numpy_from_torch() 38 | 39 | surface = self.surface_pred[0] 40 | downscale_factor = int(left_img_np.shape[0] / disp.shape[0]) 41 | left_img = left_img_np[::downscale_factor, ::downscale_factor] 42 | viz_img = np.zeros([left_img.shape[0] * 2, left_img.shape[1], 3]) 43 | viz_img[0 : left_img.shape[0], :, :] = left_img 44 | viz_img[left_img.shape[0] : left_img.shape[0] + disp.shape[0], :, :] = surface 45 | return viz_img 46 | 47 | def compute_loss(self, surface_targets, log): 48 | if self.is_numpy: 49 | raise ValueError("Output is not in torch mode") 50 | surface_target_stacked = [] 51 | for surface_target in surface_targets: 52 | surface_target_stacked.append(surface_target.surface_pred) 53 | surface_target_batch = torch.stack(surface_target_stacked) 54 | surface_target_batch = surface_target_batch.to(torch.device("cuda:0")) 55 | mask = torch.sum(surface_target_batch, axis=1) > 0 56 | surface_loss = self.loss(surface_target_batch, self.surface_pred, mask) 57 | log["surface"] = surface_loss 58 | return self.hparams.loss_surface_mult * surface_loss 59 | -------------------------------------------------------------------------------- /CARTO/Decoder/visualizing/offscreen.py: -------------------------------------------------------------------------------- 1 | from typing import List, Optional 2 | import pyrender 3 | import torch 4 | import numpy as np 5 | 6 | 7 | def look_at( 8 | center: np.ndarray, target: np.ndarray, up: np.ndarray = np.array([0.0, 1.0, 0.0]) 9 | ): 10 | """ 11 | params: 12 | center: Camera position 13 | target: Target to look at 14 | up: up axis of camera 15 | """ 16 | 17 | f = center - target 18 | f /= np.linalg.norm(f) 19 | up /= np.linalg.norm(up) 20 | r = np.cross(up, f) 21 | u = np.cross(f, r) 22 | 23 | m = np.zeros((4, 4)) 24 | m[0:3, 0] = r 25 | m[0:3, 1] = u 26 | m[0:3, 2] = f 27 | m[0:3, 3] = center 28 | m[3, 3] = 1.0 29 | return m 30 | 31 | 32 | def get_default_scene(): 33 | scene = pyrender.Scene() 34 | cam = pyrender.PerspectiveCamera(yfov=(np.pi / 3.0)) 35 | cam_pose = look_at( 36 | np.array([-1.0, -1.0, 1.0]), 37 | np.array([0.0, 0.0, 0.0]), 38 | up=np.array([0.0, 0.0, 1.0]), 39 | ) 40 | scene.add(cam, pose=cam_pose) 41 | 42 | light = pyrender.SpotLight( 43 | color=np.ones(3), 44 | intensity=3.0, 45 | innerConeAngle=np.pi / 16.0, 46 | outerConeAngle=np.pi / 6.0, 47 | ) 48 | scene.add(light, pose=cam_pose) 49 | return scene 50 | 51 | 52 | def get_point_cloud( 53 | points: np.ndarray, 54 | sdf: np.ndarray, 55 | color: np.ndarray = np.array([0.0, 0.0, 0.0]), 56 | threshold: float = 0e-3, 57 | ) -> pyrender.Mesh: 58 | if isinstance(points, torch.Tensor): 59 | points = points.cpu().numpy() 60 | if isinstance(sdf, torch.Tensor): 61 | sdf = sdf.cpu().numpy() 62 | if sdf.ndim == 2: 63 | sdf = sdf[:, 0] 64 | 65 | if not np.count_nonzero(sdf <= threshold): 66 | threshold = sdf.min() + 1e-5 67 | 68 | points = points[sdf <= threshold] 69 | # colors = np.ones(points.shape) * color 70 | colors = np.abs(points) / 2.0 71 | cloud = pyrender.Mesh.from_points(points, colors=colors) 72 | return cloud 73 | 74 | 75 | def render_offscreen( 76 | scene: Optional[pyrender.Scene] = None, meshes: Optional[List[pyrender.Mesh]] = [] 77 | ): 78 | if not scene: 79 | scene = get_default_scene() 80 | for mesh in meshes: 81 | scene.add(mesh) 82 | r = pyrender.OffscreenRenderer(viewport_width=640, viewport_height=480) 83 | color, depth = r.render(scene) 84 | r.delete() 85 | return color 86 | -------------------------------------------------------------------------------- /CARTO/simnet/lib/net/models/layers/stochastic_depth.py: -------------------------------------------------------------------------------- 1 | # BACKPORT FROM TORCHVISION 0.11 2 | from torch import Tensor 3 | import torch 4 | import torch.nn as nn 5 | 6 | 7 | def stochastic_depth( 8 | input: Tensor, p: float, mode: str, training: bool = True 9 | ) -> Tensor: 10 | """ 11 | Implements the Stochastic Depth from `"Deep Networks with Stochastic Depth" 12 | `_ used for randomly dropping residual 13 | branches of residual architectures. 14 | 15 | Args: 16 | input (Tensor[N, ...]): The input tensor or arbitrary dimensions with the first one 17 | being its batch i.e. a batch with ``N`` rows. 18 | p (float): probability of the input to be zeroed. 19 | mode (str): ``"batch"`` or ``"row"``. 20 | ``"batch"`` randomly zeroes the entire input, ``"row"`` zeroes 21 | randomly selected rows from the batch. 22 | training: apply stochastic depth if is ``True``. Default: ``True`` 23 | 24 | Returns: 25 | Tensor[N, ...]: The randomly zeroed tensor. 26 | """ 27 | # if not torch.jit.is_scripting() and not torch.jit.is_tracing(): 28 | # _log_api_usage_once(stochastic_depth) 29 | if p < 0.0 or p > 1.0: 30 | raise ValueError(f"drop probability has to be between 0 and 1, but got {p}") 31 | if mode not in ["batch", "row"]: 32 | raise ValueError(f"mode has to be either 'batch' or 'row', but got {mode}") 33 | if not training or p == 0.0: 34 | return input 35 | 36 | survival_rate = 1.0 - p 37 | if mode == "row": 38 | size = [input.shape[0]] + [1] * (input.ndim - 1) 39 | else: 40 | size = [1] * input.ndim 41 | noise = torch.empty(size, dtype=input.dtype, device=input.device) 42 | noise = noise.bernoulli_(survival_rate) 43 | if survival_rate > 0.0: 44 | noise.div_(survival_rate) 45 | return input * noise 46 | 47 | 48 | # torch.fx.wrap("stochastic_depth") 49 | 50 | 51 | class StochasticDepth(nn.Module): 52 | """ 53 | See :func:`stochastic_depth`. 54 | """ 55 | 56 | def __init__(self, p: float, mode: str) -> None: 57 | super().__init__() 58 | # _log_api_usage_once(self) 59 | self.p = p 60 | self.mode = mode 61 | 62 | def forward(self, input: Tensor) -> Tensor: 63 | return stochastic_depth(input, self.p, self.mode, self.training) 64 | 65 | def __repr__(self) -> str: 66 | s = f"{self.__class__.__name__}(p={self.p}, mode={self.mode})" 67 | return s 68 | -------------------------------------------------------------------------------- /CARTO/simnet/lib/net/post_processing/orochi_outputs.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import cv2 3 | import IPython 4 | import torch 5 | import torch.nn as nn 6 | from torch.nn import functional as F 7 | 8 | from CARTO.simnet.lib import color_stuff 9 | from CARTO.simnet.lib import datapoint 10 | from CARTO.simnet.lib.net.dataset import PanopticOutputs 11 | 12 | 13 | def visualize_img( 14 | panoptic_outputs: PanopticOutputs, 15 | c_img, 16 | camera_model, 17 | class_list, 18 | poses=False, 19 | prune_distance=False, 20 | is_target=False, 21 | ): 22 | c_img = np.copy(c_img) 23 | 24 | c_img = panoptic_outputs.room_segmentation[0].get_visualization_img( 25 | c_img, is_target=is_target 26 | ) 27 | 28 | if len(panoptic_outputs.handhold_obbs) > 0: 29 | c_img = panoptic_outputs.handhold_obbs[0].get_visualization_img( 30 | 0, c_img, camera_model=camera_model, poses=poses 31 | ) 32 | if len(panoptic_outputs.cabinet_door_obbs) > 0: 33 | c_img = panoptic_outputs.cabinet_door_obbs[0].get_visualization_img( 34 | 0, c_img, camera_model=camera_model, class_list=[], poses=poses 35 | ) 36 | if len(panoptic_outputs.graspable_objects_obbs) > 0: 37 | c_img = panoptic_outputs.graspable_objects_obbs[0].get_visualization_img( 38 | 0, 39 | c_img, 40 | camera_model=camera_model, 41 | class_list=class_list, 42 | prune_distance=prune_distance, 43 | poses=poses, 44 | ) 45 | 46 | return c_img 47 | 48 | 49 | def visualize_heatmap(panoptic_outputs: PanopticOutputs, c_img): 50 | if len(panoptic_outputs.graspable_objects_obbs) > 0: 51 | # print(panoptic_outputs.graspable_objects_obbs[0].heatmap.shape) 52 | # print(np.max(panoptic_outputs.graspable_objects_obbs[0])) 53 | # print(np.min(panoptic_outputs.graspable_objects_obbs[0].heatmap.shape)) 54 | heatmap = cv2.applyColorMap( 55 | ( 56 | np.clip( 57 | panoptic_outputs.graspable_objects_obbs[0].heatmap[0, ...], 0.0, 1.0 58 | ) 59 | * 255.0 60 | ).astype(np.uint8), 61 | cv2.COLORMAP_JET, 62 | ) 63 | gray = cv2.cvtColor(c_img.copy(), cv2.COLOR_RGB2GRAY).astype(np.uint8) 64 | gray_full = np.zeros_like(heatmap) 65 | gray_full[..., 0] = gray 66 | gray_full[..., 1] = gray 67 | gray_full[..., 2] = gray 68 | return cv2.addWeighted(gray_full, 0.9, heatmap.astype(np.uint8), 0.4, 0) 69 | -------------------------------------------------------------------------------- /CARTO/simnet/lib/primitive.py: -------------------------------------------------------------------------------- 1 | from CARTO.simnet.lib import sg 2 | import numpy as np 3 | import trimesh 4 | 5 | DEFAULT_COLOR = np.array([60, 60, 60, 255], dtype=np.uint8) 6 | CORNER_1 = np.array([255, 255, 0, 255], dtype=np.uint8) 7 | CORNER_2 = np.array([0, 255, 255, 255], dtype=np.uint8) 8 | CORNER_3 = np.array([255, 0, 255, 255], dtype=np.uint8) 9 | CORNER_4 = np.array([0, 255, 0, 255], dtype=np.uint8) 10 | CORNER_5 = np.array([255, 0, 0, 255], dtype=np.uint8) 11 | CORNER_6 = np.array([0, 0, 255, 255], dtype=np.uint8) 12 | CORNER_7 = np.array([255, 255, 255, 255], dtype=np.uint8) 13 | CORNER_8 = np.array([255, 255, 0, 255], dtype=np.uint8) 14 | RED = np.array([255, 0, 0, 255], dtype=np.uint8) 15 | GREEN = np.array([0, 255, 0, 255], dtype=np.uint8) 16 | BLUE = np.array([0, 0, 255, 255], dtype=np.uint8) 17 | 18 | 19 | def make_coordinate_frame(scale=1.0, name="coord_frame_vis"): 20 | node = sg.Node() 21 | small = 0.2 * scale 22 | large = 1.0 * scale 23 | node.add_child(make_cube(large, small, small, color=RED, name=f"{name}_x")) 24 | node.add_child(make_cube(small, large, small, color=GREEN, name=f"{name}_y")) 25 | node.add_child(make_cube(small, small, large, color=BLUE, name=f"{name}_z")) 26 | return node 27 | 28 | 29 | def make_cube( 30 | x_width=1.0, y_depth=1.0, z_height=1.0, name="cube", color=None, disable_color=False 31 | ): 32 | if disable_color: 33 | vertex_colors = None 34 | else: 35 | vertex_colors = [ 36 | CORNER_1 if color is None else color, 37 | CORNER_2 if color is None else color, 38 | CORNER_3 if color is None else color, 39 | CORNER_4 if color is None else color, 40 | CORNER_5 if color is None else color, 41 | CORNER_6 if color is None else color, 42 | CORNER_7 if color is None else color, 43 | CORNER_8 if color is None else color, 44 | ] 45 | mesh = trimesh.Trimesh( 46 | vertices=[ 47 | [0, 0, 0], # 0 48 | [x_width, 0, 0], # 1 49 | [x_width, y_depth, 0], # 2 50 | [0, y_depth, 0], # 3 51 | [0, 0, z_height], # 4 52 | [x_width, 0, z_height], # 5 53 | [x_width, y_depth, z_height], # 6 54 | [0, y_depth, z_height], # 7 55 | ], 56 | faces=[ 57 | [0, 3, 2, 1], 58 | [1, 2, 6, 5], 59 | [2, 3, 7, 6], 60 | [4, 5, 6, 7], 61 | [0, 1, 5, 4], 62 | [0, 4, 7, 3], 63 | ], 64 | vertex_colors=vertex_colors, 65 | ) 66 | node = sg.Node(name=name) 67 | node.meshes = [mesh] 68 | node.meta.is_object = True 69 | return node 70 | -------------------------------------------------------------------------------- /CARTO/Decoder/visualizing/visualize_sdf_values.py: -------------------------------------------------------------------------------- 1 | ## Load training SDFs 2 | import argparse 3 | import colorsys 4 | import os 5 | import numpy as np 6 | import pathlib 7 | import tqdm 8 | import open3d as o3d 9 | import random 10 | 11 | from CARTO.simnet.lib.datapoint import decompress_datapoint 12 | from CARTO.Decoder import utils 13 | from CARTO.Decoder.data import dataset 14 | from CARTO.Decoder import config 15 | from CARTO.Decoder.visualizing import code_vis 16 | from PIL import Image 17 | 18 | import seaborn as sns 19 | 20 | 21 | def main(args): 22 | file_dir = pathlib.Path(args.file_dir) 23 | out_dir = pathlib.Path(args.out_dir) 24 | out_dir.mkdir(exist_ok=True, parents=True) 25 | dataset_cfg: config.GenerationConfig = utils.load_cfg( 26 | file_dir, cfg_class=config.GenerationConfig 27 | ) 28 | all_files = list(file_dir.glob("*.zstd")) 29 | if args.latest or args.earliest: 30 | all_files.sort(key=lambda x: os.path.getmtime(x), reverse=args.earliest) 31 | else: 32 | print("Shuffling object list") 33 | random.shuffle(all_files) 34 | 35 | counts = utils.AccumulatorDict() 36 | for file_name in all_files: 37 | counts.increment(str(file_name).split("_")[-2], 1) 38 | print(counts) 39 | 40 | render = code_vis.get_o3d_render(frame_width=600, frame_height=600) 41 | 42 | for i, file_path in tqdm.tqdm(enumerate(all_files[: args.n])): 43 | with open(file_path, "rb") as fh: 44 | buf = fh.read() 45 | data_point: dataset.DataPoint = decompress_datapoint(buf) 46 | 47 | # print(data_point.keys()) 48 | sdf = data_point.sdf_values[:, None] 49 | points = data_point.points 50 | # Assign inside/outside color 51 | colors = np.where( 52 | sdf < 0.0, 53 | np.ones_like(points) * sns.color_palette("tab10")[0], 54 | np.ones_like(points) * sns.color_palette("tab10")[1], 55 | ) 56 | 57 | if len(points) == 0: 58 | continue 59 | 60 | points /= dataset_cfg.max_extent 61 | 62 | pcd = o3d.geometry.PointCloud() 63 | pcd.points = o3d.utility.Vector3dVector(points) 64 | pcd.colors = o3d.utility.Vector3dVector(colors) 65 | 66 | img_np = code_vis.render_o3d_mesh(pcd, height_coloring=False, render=render) 67 | img_PIL = Image.fromarray(img_np) 68 | img_PIL.save(str(out_dir / f"{i}.png")) 69 | 70 | 71 | if __name__ == "__main__": 72 | parser = argparse.ArgumentParser() 73 | parser.add_argument("file_dir") 74 | parser.add_argument("out_dir") 75 | parser.add_argument("-n", type=int, default=100) 76 | parser.add_argument("-l", "--latest", action="store_true", default=False) 77 | parser.add_argument("-e", "--earliest", action="store_true", default=False) 78 | args = parser.parse_args() 79 | main(args) 80 | -------------------------------------------------------------------------------- /CARTO/Decoder/models/lr_schedules.py: -------------------------------------------------------------------------------- 1 | # from typing import List 2 | 3 | from CARTO.Decoder.config import LearningRateScheduleConfig, LearningRateScheduleType 4 | 5 | 6 | class LearningRateSchedule: 7 | def get_learning_rate(self, epoch): 8 | pass 9 | 10 | @staticmethod 11 | def get_from_config(cfg: LearningRateScheduleConfig): 12 | if cfg.type == LearningRateScheduleType.STEP: 13 | return StepLearningRateSchedule( 14 | cfg.initial, 15 | cfg.interval, 16 | cfg.factor, 17 | ) 18 | elif cfg.type == LearningRateScheduleType.WARMUP: 19 | return WarmupLearningRateSchedule( 20 | cfg.initial, 21 | cfg.final, 22 | cfg.length, 23 | ) 24 | 25 | elif cfg.type == LearningRateScheduleType.CONSTANT: 26 | return ConstantLearningRateSchedule(cfg.initial) 27 | elif cfg.type == LearningRateScheduleType.LEVEL_DECAY: 28 | return LevelDecayLearningRateSchedule(cfg.initial, cfg.factor) 29 | else: 30 | raise Exception( 31 | 'no known learning rate schedule of type "{}"'.format(cfg.type) 32 | ) 33 | 34 | 35 | class ConstantLearningRateSchedule(LearningRateSchedule): 36 | def __init__(self, value): 37 | self.value 38 | 39 | def get_learning_rate(self, epoch): 40 | return self.value 41 | 42 | 43 | class StepLearningRateSchedule(LearningRateSchedule): 44 | def __init__(self, initial, interval, factor): 45 | self.initial = initial 46 | self.interval = interval 47 | self.factor = factor 48 | 49 | def get_learning_rate(self, epoch): 50 | return self.initial * (self.factor ** (epoch // self.interval)) 51 | 52 | 53 | class WarmupLearningRateSchedule(LearningRateSchedule): 54 | def __init__(self, initial, warmed_up, length): 55 | self.initial = initial 56 | self.warmed_up = warmed_up 57 | self.length = length 58 | 59 | def get_learning_rate(self, epoch): 60 | if epoch > self.length: 61 | return self.warmed_up 62 | return self.initial + (self.warmed_up - self.initial) * epoch / self.length 63 | 64 | 65 | class LevelDecayLearningRateSchedule(LearningRateSchedule): 66 | def __init__(self, initial, decay): 67 | self.initial = initial 68 | self.decay = decay 69 | self.level = 0 70 | 71 | def inc_level(self, level=1): 72 | self.level += level 73 | 74 | def get_learning_rate(self, epoch): 75 | """ 76 | Epoch does not matter 77 | """ 78 | return self.initial * ((self.decay) ** self.level) 79 | 80 | 81 | # def get_learning_rate_schedules(schedulers: List[LearningRateSchedulerConfig]): 82 | # schedules = [] 83 | # for schedule in schedulers: 84 | 85 | # return schedules 86 | -------------------------------------------------------------------------------- /CARTO/Decoder/data/verify_watertight.py: -------------------------------------------------------------------------------- 1 | import multiprocessing 2 | from concurrent import futures 3 | import pathlib 4 | 5 | import tqdm 6 | import functools 7 | import gc 8 | 9 | from typing import Dict, Any, Callable, List 10 | import itertools 11 | 12 | # import pyrender 13 | import trimesh 14 | import urdfpy 15 | 16 | from CARTO.simnet.lib import partnet_mobility 17 | from CARTO.simnet.lib.datasets import PartNetMobilityV0DB 18 | from CARTO.simnet.lib.datapoint import compress_datapoint, decompress_datapoint 19 | 20 | import uuid 21 | from CARTO.Decoder import utils, config 22 | from CARTO.Decoder.data import dataset 23 | import open3d as o3d 24 | import numpy as np 25 | 26 | 27 | def process_object_id( 28 | object_id: str, 29 | joint_filter: Callable[[Dict[str, Any]], bool] = lambda _: True, 30 | joint_offset: float = 0.0, 31 | ): 32 | # object_id = "187d79cd04b2bdfddf3a1b0d597ce76e" 33 | 34 | object_path = PartNetMobilityV0DB.get_object(object_id) 35 | object_meta = PartNetMobilityV0DB.get_object_meta(object_id) 36 | 37 | joints_of_interest: List[str] = [] 38 | # Artifact from preprocessing 39 | for joint_id, joint in object_meta["joints"].items(): 40 | if not joint_filter( 41 | joint, partnet_mobility.get_joint_name_exclusion_list(object_meta) 42 | ): 43 | continue 44 | joints_of_interest.append(joint_id) 45 | 46 | joint_config = {} 47 | for joint_id, joint in object_meta["joints"].items(): 48 | joint_config[joint_id] = joint["limit"][0] + ( 49 | joint_offset if joint_id in joints_of_interest else 0.0 50 | ) 51 | 52 | canonical_transform = np.array( 53 | PartNetMobilityV0DB.get_object_meta(object_id)["canonical_transformation"] 54 | ) 55 | urdf_object = urdfpy.URDF.load(str(object_path / "mobility.urdf")) 56 | trimesh_object, _, _ = utils.object_to_trimesh( 57 | urdf_object, joint_config=joint_config, base_transform=canonical_transform 58 | ) 59 | # points, sdf = utils.object_to_sdf(trimesh_object) 60 | # points = points[sdf <= 0] 61 | points, _ = utils.object_to_point_cloud(trimesh_object, number_samples=100000) 62 | color = utils.get_random_color() 63 | 64 | pcd = o3d.geometry.PointCloud() 65 | pcd.points = o3d.utility.Vector3dVector(points) 66 | # pcd.points = o3d.utility.Vector3dVector(s_pc.points) 67 | pcd.paint_uniform_color(color) 68 | return pcd 69 | 70 | 71 | def main(): 72 | object_filter, joint_filter = partnet_mobility.get_filter_function( 73 | # category_list=["Microwave", "Laptop"], 74 | category_list=["Laptop"], 75 | # category_list=["Microwave"], 76 | # category_list=["WashingMachine"], 77 | max_unique_parents=1, 78 | no_limit_ok=False, 79 | min_prismatic=0.1, 80 | min_revolute=0.1, 81 | ) 82 | PartNetMobilityV0DB.set_filter(object_filter) 83 | print(f"Length of filtered dataset: {len(PartNetMobilityV0DB)}") 84 | 85 | pcds = [] 86 | for object_id in tqdm.tqdm(PartNetMobilityV0DB.index_list): 87 | pcd: o3d.geometry.PointCloud = process_object_id( 88 | object_id, joint_filter=joint_filter, joint_offset=1.5 89 | ) 90 | pcds.append(pcd) 91 | pcd_local = [pcd] 92 | o3d.visualization.draw_geometries(pcd_local) 93 | 94 | o3d.visualization.draw_geometries(pcds) 95 | 96 | 97 | if __name__ == "__main__": 98 | main() 99 | -------------------------------------------------------------------------------- /CARTO/Decoder/models/joint_state_decoder.py: -------------------------------------------------------------------------------- 1 | from typing import Dict, List 2 | 3 | import torch 4 | from torch import nn 5 | import torch.nn.functional as F 6 | 7 | from CARTO.Decoder import config 8 | 9 | 10 | class ClassificationHead(nn.Module): 11 | def __init__(self, in_dim, weight_normalizer=lambda x: x): 12 | super(ClassificationHead, self).__init__() 13 | 14 | lin_state = weight_normalizer(nn.Linear(in_dim, 1)) # Continous prediction 15 | lin_type = weight_normalizer( 16 | nn.Linear(in_dim, 2, bias=False) 17 | ) # 0: revolute, 1: prismatic 18 | setattr(self, "lin_state", lin_state) 19 | setattr(self, "lin_type", lin_type) 20 | 21 | def forward(self, input): 22 | lin_state = getattr(self, "lin_state") 23 | lin_type = getattr(self, "lin_type") 24 | 25 | state_pred = lin_state(input) 26 | type_pred = torch.sigmoid(lin_type(input)) 27 | return {"state": state_pred, "type": type_pred} 28 | 29 | 30 | class ZeroOneHead(nn.Module): 31 | def __init__(self, in_dim, weight_normalizer=lambda x: x): 32 | super(ZeroOneHead, self).__init__() 33 | lin_module = weight_normalizer(nn.Linear(in_dim, 1)) 34 | setattr(self, "lin_module", lin_module) 35 | 36 | def forward(self, input): 37 | lin_module = getattr(self, "lin_module") 38 | pred = torch.sigmoid(lin_module(input)) 39 | return {"state": pred} 40 | 41 | 42 | class JointStateDecoder(nn.Module): 43 | def __init__( 44 | self, 45 | cfg: config.JointStateDecoderModelConfig, 46 | joint_config_latent_code_dim: int = 16, 47 | ): 48 | super(JointStateDecoder, self).__init__() 49 | self.joint_config_latent_code_dim = joint_config_latent_code_dim 50 | 51 | dims = [joint_config_latent_code_dim] + cfg.dims 52 | self.num_layers = len(dims) 53 | 54 | weight_normalizer = config.get_weight_normalizer(cfg.weight_normalizer) 55 | 56 | for layer in range(0, self.num_layers - 1): 57 | out_dim = dims[layer + 1] 58 | linear_layer = weight_normalizer(nn.Linear(dims[layer], out_dim)) 59 | # linear_layer = nn.utils.weight_norm(linear_layer) 60 | setattr(self, "lin" + str(layer), linear_layer) 61 | 62 | if cfg.output_head == config.JointDecoderOutputHeadStyle.CLASSIFICATION: 63 | out_head_class = ClassificationHead 64 | elif cfg.output_head == config.JointDecoderOutputHeadStyle.ZERO_ONE_HEAD: 65 | out_head_class = ZeroOneHead 66 | else: 67 | raise ModuleNotFoundError(f"Unknown output head {cfg.output_head}") 68 | 69 | setattr( 70 | self, 71 | "output_head", 72 | out_head_class(dims[-1], weight_normalizer=weight_normalizer), 73 | ) 74 | 75 | self.relu = nn.ReLU() 76 | self.th = nn.Tanh() 77 | 78 | def forward(self, input): 79 | assert ( 80 | input.size()[-1] == self.joint_config_latent_code_dim 81 | ), f"{input.size()[-1]} == {self.joint_config_latent_code_dim}" 82 | 83 | x = input 84 | for layer in range(0, self.num_layers - 1): 85 | lin = getattr(self, "lin" + str(layer)) 86 | x = lin(x) 87 | x = self.relu(x) 88 | x = F.dropout(x, p=0.2, training=self.training) 89 | output_head = getattr(self, "output_head") 90 | return output_head(x) 91 | -------------------------------------------------------------------------------- /CARTO/Decoder/data/visualize_dataset_pytorch.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import pathlib 3 | import numpy as np 4 | import open3d as o3d 5 | import random 6 | import tqdm 7 | 8 | from CARTO.Decoder.data import dataset 9 | from CARTO.Decoder import utils, config 10 | 11 | 12 | def main(args): 13 | split_dicts = dataset.get_dataset_split_dict( 14 | pathlib.Path(args.data_dir), args.split_name, file_name=args.split_file_name 15 | ) 16 | gen_cfg: config.GenerationConfig = utils.load_cfg( 17 | pathlib.Path(args.data_dir), cfg_class=config.GenerationConfig 18 | ) 19 | rescaler = dataset.Rescaler3D(scale=gen_cfg.max_extent) 20 | print(gen_cfg.max_extent) 21 | train_dataset = dataset.SDFDataset( 22 | split_dicts["train"], rescaler=rescaler, cache_in_ram=False, subsample=100000000 23 | ) 24 | val_dataset = dataset.SDFDataset( 25 | split_dicts["val"], rescaler=rescaler, cache_in_ram=False 26 | ) 27 | 28 | print(f"{len(train_dataset) = }") 29 | 30 | pcds = [] 31 | k = 100 32 | # k = len(train_dataset) 33 | indices = random.sample(range(len(train_dataset)), k) 34 | 35 | for i in tqdm.tqdm(indices): 36 | data_point: dataset.DataPoint = train_dataset[i] 37 | 38 | sdf = data_point.sdf_values 39 | points = data_point.points[sdf <= 0.0] 40 | color = utils.get_random_color() 41 | 42 | if np.abs(points).max() > 1: 43 | print(np.abs(points).max()) 44 | 45 | # print(points.shape) 46 | # print(color) 47 | 48 | pcd = o3d.geometry.PointCloud() 49 | pcd.points = o3d.utility.Vector3dVector(points) 50 | pcd.paint_uniform_color(color) 51 | pcds.append(pcd) 52 | 53 | pcds.append(o3d.geometry.TriangleMesh.create_coordinate_frame()) 54 | print(len(pcds)) 55 | 56 | if args.unit_cube: 57 | cube_points = np.array( 58 | [ 59 | [-1.0, -1.0, -1.0], 60 | [1.0, -1.0, -1.0], 61 | [-1.0, 1.0, -1.0], 62 | [1.0, 1.0, -1.0], 63 | [-1.0, -1.0, 1.0], 64 | [1.0, -1.0, 1.0], 65 | [-1.0, 1.0, 1.0], 66 | [1.0, 1.0, 1.0], 67 | ], 68 | dtype=np.float, 69 | ) 70 | # cube_points /= 2 71 | lines = np.array( 72 | [ 73 | [0, 1], 74 | [0, 2], 75 | [1, 3], 76 | [2, 3], 77 | [4, 5], 78 | [4, 6], 79 | [5, 7], 80 | [6, 7], 81 | [0, 4], 82 | [1, 5], 83 | [2, 6], 84 | [3, 7], 85 | ] 86 | ) 87 | colors = [[1, 0, 0] for i in range(len(lines))] 88 | line_set = o3d.geometry.LineSet() 89 | line_set.points = o3d.utility.Vector3dVector(cube_points) 90 | line_set.lines = o3d.utility.Vector2iVector(lines) 91 | line_set.colors = o3d.utility.Vector3dVector(colors) 92 | pcds.append(line_set) 93 | 94 | o3d.visualization.draw_geometries(pcds) 95 | 96 | 97 | if __name__ == "__main__": 98 | parser = argparse.ArgumentParser() 99 | parser.add_argument("data_dir") 100 | parser.add_argument("split_name") 101 | parser.add_argument("--unit-cube", action="store_true", default=True) 102 | args = parser.parse_args() 103 | main(args) 104 | -------------------------------------------------------------------------------- /CARTO/simnet/lib/net/post_processing/nms.py: -------------------------------------------------------------------------------- 1 | import IPython 2 | import numpy as np 3 | 4 | 5 | def run(detections, overlap_thresh=0.75, order_mode="confidence"): 6 | # initialize the list of picked detections 7 | pruned_detections = [] 8 | 9 | # sort the indexes 10 | if order_mode == "lower_y": 11 | idxs = create_order_by_lower_y(detections) 12 | elif order_mode == "confidence": 13 | idxs = create_order_by_score(detections) 14 | 15 | overlap_function = get_2d_one_way_iou 16 | 17 | # keep looping while some indexes still remain in the indexes list 18 | while len(idxs) > 0: 19 | # grab the last index in the indexes list and add the index value 20 | # to the list of picked indexes 21 | last = len(idxs) - 1 22 | ii = idxs[last] 23 | indices_to_suppress = [] 24 | for index, index_of_index in zip(idxs[:last], range(last)): 25 | detection_proposal = detections[index] 26 | overlap = overlap_function(detections[ii], detection_proposal) 27 | if overlap > overlap_thresh: 28 | indices_to_suppress.append(index_of_index) 29 | # Add the the pruned_detections. 30 | pruned_detections.append(detections[ii]) 31 | indices_to_suppress.append(last) 32 | idxs = np.delete(idxs, indices_to_suppress) 33 | 34 | # return only the bounding boxes that were picked 35 | return prune_by_min_height(pruned_detections) 36 | 37 | 38 | def prune_by_min_height(detections): 39 | pruned_detections = [] 40 | for detection in detections: 41 | if detection.bbox[1][0] - detection.bbox[0][0] < 12: 42 | continue 43 | pruned_detections.append(detection) 44 | return pruned_detections 45 | 46 | 47 | def create_order_by_lower_y(detections): 48 | idxs = [] 49 | for detection in detections: 50 | idxs.append(detection.bbox[1][1]) 51 | idxs = np.argsort(idxs) 52 | return idxs 53 | 54 | 55 | def create_order_by_score(detections): 56 | idxs = [] 57 | for detection in detections: 58 | idxs.append(detection.score) 59 | idxs = np.argsort(idxs) 60 | return idxs 61 | 62 | 63 | def get_2d_one_way_iou(detection_one, detection_two): 64 | box_one = np.array( 65 | [ 66 | detection_one.bbox[0][0], 67 | detection_one.bbox[0][1], 68 | detection_one.bbox[1][0], 69 | detection_one.bbox[1][1], 70 | ] 71 | ) 72 | box_two = np.array( 73 | [ 74 | detection_two.bbox[0][0], 75 | detection_two.bbox[0][1], 76 | detection_two.bbox[1][0], 77 | detection_two.bbox[1][1], 78 | ] 79 | ) 80 | # determine the (x, y)-coordinates of the intersection rectangle 81 | xA = max(box_one[0], box_two[0]) 82 | yA = max(box_one[1], box_two[1]) 83 | xB = min(box_one[2], box_two[2]) 84 | yB = min(box_one[3], box_two[3]) 85 | # compute the area of intersection rectangle 86 | inter_area = max(0, xB - xA + 1) * max(0, yB - yA + 1) 87 | # compute the area of both the prediction and ground-truth 88 | # rectangles 89 | box_one_area = (box_one[2] - box_one[0] + 1) * (box_one[3] - box_one[1] + 1) 90 | box_two_area = (box_two[2] - box_two[0] + 1) * (box_two[3] - box_two[1] + 1) 91 | # compute the intersection over union by taking the intersection 92 | # area and dividing it by the sum of prediction + ground-truth 93 | # areas - the interesection area 94 | if float(box_one_area) == 0.0: 95 | return 0 96 | return inter_area / float(box_one_area + box_two_area - inter_area) 97 | -------------------------------------------------------------------------------- /CARTO/simnet/lib/net/models/layers/cost_volume.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Toyota Research Institute. All rights reserved. 2 | 3 | import torch 4 | import torch.nn as nn 5 | 6 | 7 | @torch.jit.script 8 | def cost_volume(left, right, num_disparities: int, is_right: bool): 9 | batch_size, channels, height, width = left.shape 10 | 11 | output = torch.zeros( 12 | (batch_size, channels, num_disparities, height, width), 13 | dtype=left.dtype, 14 | device=left.device, 15 | ) 16 | 17 | for i in range(num_disparities): 18 | if not is_right: 19 | output[:, :, i, :, i:] = left[:, :, :, i:] * right[:, :, :, : width - i] 20 | else: 21 | output[:, :, i, :, : width - i] = ( 22 | left[:, :, :, i:] * right[:, :, :, : width - i] 23 | ) 24 | 25 | return output 26 | 27 | 28 | class CostVolume(nn.Module): 29 | """Compute cost volume using cross correlation of left and right feature maps""" 30 | 31 | def __init__(self, num_disparities, is_right=False): 32 | super().__init__() 33 | self.num_disparities = num_disparities 34 | self.is_right = is_right 35 | 36 | def forward(self, left, right): 37 | if torch.jit.is_scripting(): 38 | return cost_volume(left, right, self.num_disparities, self.is_right) 39 | else: 40 | return self.forward_with_amp(left, right) 41 | 42 | @torch.jit.unused 43 | def forward_with_amp(self, left, right): 44 | """This operation is unstable at float16, so compute at float32 even when using mixed precision""" 45 | with torch.cuda.amp.autocast(enabled=False): 46 | left = left.to(torch.float32) 47 | right = right.to(torch.float32) 48 | output = cost_volume(left, right, self.num_disparities, self.is_right) 49 | output = torch.clamp(output, -1e3, 1e3) 50 | return output 51 | 52 | 53 | @torch.jit.script 54 | def dot_product_cost_volume(left, right, num_disparities: int, is_right: bool): 55 | batch_size, channels, height, width = left.shape 56 | 57 | output = torch.zeros( 58 | (batch_size, num_disparities, height, width), 59 | dtype=left.dtype, 60 | device=left.device, 61 | ) 62 | 63 | for i in range(num_disparities): 64 | if not is_right: 65 | output[:, i, :, i:] = ( 66 | left[:, :, :, i:] * right[:, :, :, : width - i] 67 | ).mean(dim=1) 68 | else: 69 | output[:, i, :, width - i] = ( 70 | left[:, :, :, i:] * right[:, :, :, : width - i] 71 | ).mean(dim=1) 72 | 73 | return output 74 | 75 | 76 | class DotProductCostVolume(nn.Module): 77 | """Compute cost volume using dot product of left and right feature maps""" 78 | 79 | def __init__(self, num_disparities, is_right=False): 80 | super().__init__() 81 | self.num_disparities = num_disparities 82 | self.is_right = is_right 83 | 84 | def forward(self, left, right): 85 | return dot_product_cost_volume(left, right, self.num_disparities, self.is_right) 86 | 87 | @torch.jit.unused 88 | def forward_with_amp(self, left, right): 89 | """This operation is unstable at float16, so compute at float32 even when using mixed precision""" 90 | with torch.cuda.amp.autocast(enabled=False): 91 | left = left.to(torch.float32) 92 | right = right.to(torch.float32) 93 | output = dot_product_cost_volume( 94 | left, right, self.num_disparities, self.is_right 95 | ) 96 | output = torch.clamp(output, -1e3, 1e3) 97 | return output 98 | -------------------------------------------------------------------------------- /CARTO/simnet/lib/net/pre_processing/box_inputs.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from scipy.stats import multivariate_normal 3 | 4 | from CARTO.simnet.lib import datapoint 5 | 6 | _DOWNSCALE_VALUE = 1 7 | _PEAK_CONCENTRATION = 0.8 8 | 9 | 10 | def compute_network_targets(boxes, masks, height, width): 11 | if len(boxes) == 0: 12 | return datapoint.Box( 13 | heat_map=np.zeros([height, width]), 14 | vertex_target=np.zeros([height, width, 4]), 15 | ) 16 | heatmaps = compute_heatmaps_from_masks(masks) 17 | vertex_target = compute_vertex_field(boxes, heatmaps) 18 | return datapoint.Box(heat_map=np.max(heatmaps, axis=0), vertex_target=vertex_target) 19 | 20 | 21 | def compute_network_targets_from_detections( 22 | detections, occ_threshold, min_height, truncation_level, height, width 23 | ): 24 | detections_marked, masks = mark_ignore_in_box_detections(detections) 25 | ignore_mask = np.zeros([height, width]) 26 | for detection, mask in zip(detections, masks): 27 | if not detetion.ignore: 28 | boxes.append(detection) 29 | masks.append(masks) 30 | else: 31 | ignore_mask[mask] = 1.0 32 | if len(boxes) == 0: 33 | return datapoint.Box( 34 | heat_map=np.zeros([height, width]), 35 | vertex_target=np.zeros([height, width, 4]), 36 | ) 37 | heatmaps = compute_heatmaps_from_masks(masks) 38 | vertex_target = compute_vertex_field(boxes, heatmaps) 39 | return datapoint.Box( 40 | heat_map=np.max(heatmaps, axis=0), 41 | vertex_target=vertex_target, 42 | ignore_mask=ignore_mask, 43 | ) 44 | 45 | 46 | def compute_heatmaps_from_masks(masks): 47 | heatmaps = [compute_heatmap_from_mask(mask) for mask in masks] 48 | return heatmaps 49 | 50 | 51 | def compute_heatmap_from_mask(mask): 52 | if np.sum(mask) == 0: 53 | raise ValueError("Mask is empty") 54 | coords = np.indices(mask.shape) 55 | coords = coords.reshape([2, -1]).T 56 | mask_f = mask.flatten() 57 | indices = coords[np.where(mask_f > 0)] 58 | mean_value = np.floor(np.average(indices, axis=0)) 59 | cov = np.cov((indices - mean_value).T) 60 | cov = cov * _PEAK_CONCENTRATION 61 | multi_var = multivariate_normal(mean=mean_value, cov=cov) 62 | density = multi_var.pdf(coords) 63 | heat_map = np.zeros(mask.shape) 64 | heat_map[coords[:, 0], coords[:, 1]] = density 65 | return heat_map / np.max(heat_map) 66 | 67 | 68 | def compute_vertex_field(bboxes, heatmaps): 69 | H, W = heatmaps[0].shape[0], heatmaps[0].shape[1] 70 | # For each vertex compute the displacement field. 71 | disp_fields = [] 72 | vertex_target = np.zeros( 73 | [len(bboxes), int(H / _DOWNSCALE_VALUE), int(W / _DOWNSCALE_VALUE), 4] 74 | ) 75 | heatmap_indices = np.argmax(np.array(heatmaps), axis=0) 76 | for i in range(2): 77 | vertex_points = [] 78 | coords = np.indices([H, W]) 79 | coords = coords.transpose((1, 2, 0)) 80 | for box_idx, bbox, heatmap in zip(range(len(bboxes)), bboxes, heatmaps): 81 | disp_field = np.zeros([H, W, 2]) 82 | vertex_point = np.array([bbox[i][0], bbox[i][1]]) 83 | mask = heatmap_indices == box_idx 84 | disp_field[mask] = coords[mask] - vertex_point 85 | # Normalize by height and width 86 | disp_field[mask, 0] = 1.0 - (disp_field[mask, 0] + H) / (2 * H) 87 | disp_field[mask, 1] = 1.0 - (disp_field[mask, 1] + W) / (2 * W) 88 | vertex_target[box_idx, :, :, (2 * i) : (2 * i) + 2] = disp_field[ 89 | ::_DOWNSCALE_VALUE, ::_DOWNSCALE_VALUE 90 | ] 91 | return np.max(vertex_target, axis=0) 92 | -------------------------------------------------------------------------------- /CARTO/simnet/lib/net/pre_processing/pose_inputs.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from scipy.stats import multivariate_normal 3 | 4 | from CARTO.simnet.lib.net.post_processing import epnp 5 | from CARTO.simnet.lib.label import Pose 6 | from CARTO.simnet.lib import datapoint 7 | 8 | _HEATMAP_THRESHOLD = 0.3 9 | _DOWNSCALE_VALUE = 8 10 | _PEAK_CONCENTRATION = 0.8 11 | 12 | 13 | def compute_network_targets(poses, masks, camera_model): 14 | heatmaps = compute_heatmaps_from_masks(masks) 15 | vertex_target = compute_vertex_field(poses, heatmaps, camera_model) 16 | z_centroid = compute_z_centroid_field(poses, heatmaps) 17 | return datapoint.Pose( 18 | heat_map=np.max(heatmaps, axis=0), 19 | vertex_target=vertex_target, 20 | z_centroid=z_centroid, 21 | ) 22 | 23 | 24 | def compute_heatmaps_from_masks(masks): 25 | heatmaps = [compute_heatmap_from_mask(mask) for mask in masks] 26 | return heatmaps 27 | 28 | 29 | def compute_heatmap_from_mask(mask): 30 | if np.sum(mask) == 0: 31 | raise ValueError("Mask is empty") 32 | coords = np.indices(mask.shape) 33 | coords = coords.reshape([2, -1]).T 34 | mask_f = mask.flatten() 35 | indices = coords[np.where(mask_f > 0)] 36 | mean_value = np.floor(np.average(indices, axis=0)) 37 | cov = np.cov((indices - mean_value).T) 38 | cov = cov * _PEAK_CONCENTRATION 39 | multi_var = multivariate_normal(mean=mean_value, cov=cov) 40 | density = multi_var.pdf(coords) 41 | heat_map = np.zeros(mask.shape) 42 | heat_map[coords[:, 0], coords[:, 1]] = density 43 | return heat_map / np.max(heat_map) 44 | 45 | 46 | def compute_vertex_field(poses, heatmaps, camera_model): 47 | H, W = heatmaps[0].shape[0], heatmaps[0].shape[1] 48 | # Compute the projected box pixels. 49 | boxes = [] 50 | for pose in poses: 51 | pose_no_rot = Pose( 52 | camera_T_object=pose.camera_T_no_rot_object, scale_matrix=pose.scale_matrix 53 | ) 54 | boxes.append(epnp.project_pose_onto_image(pose_no_rot, camera_model)) 55 | # For each vertex compute the displacement field. 56 | disp_fields = [] 57 | vertex_target = np.zeros( 58 | [len(poses), int(H / _DOWNSCALE_VALUE), int(W / _DOWNSCALE_VALUE), 16] 59 | ) 60 | heatmap_indices = np.argmax(np.array(heatmaps), axis=0) 61 | for i in range(8): 62 | vertex_points = [] 63 | coords = np.indices([H, W]) 64 | coords = coords.transpose((1, 2, 0)) 65 | for box_idx, bbox, heatmap in zip(range(len(boxes)), boxes, heatmaps): 66 | disp_field = np.zeros([H, W, 2]) 67 | vertex_point = np.array([bbox[i][0], bbox[i][1]]) 68 | mask = heatmap_indices == box_idx 69 | disp_field[mask] = coords[mask] - vertex_point 70 | # Normalize by height and width 71 | disp_field[mask, 0] = 1.0 - (disp_field[mask, 0] + H) / (2 * H) 72 | disp_field[mask, 1] = 1.0 - (disp_field[mask, 1] + W) / (2 * W) 73 | vertex_target[box_idx, :, :, (2 * i) : 2 * i + 2] = disp_field[ 74 | ::_DOWNSCALE_VALUE, ::_DOWNSCALE_VALUE 75 | ] 76 | return np.max(vertex_target, axis=0) 77 | 78 | 79 | def compute_z_centroid_field(poses, heatmaps): 80 | z_centroid_target = np.zeros( 81 | [len(poses), heatmaps[0].shape[0], heatmaps[0].shape[1]] 82 | ) 83 | heatmap_indices = np.argmax(np.array(heatmaps), axis=0) 84 | for pose, heat_map, ii in zip(poses, heatmaps, range(len(heatmaps))): 85 | mask = heatmap_indices == ii 86 | z_centroid_target[ii, mask] = pose.camera_T_object[2, 3] 87 | # Normalize z_centroid by 1. and multiply by 10 to avoid tensorrt float precision issues. 88 | return np.sum(z_centroid_target, axis=0)[::_DOWNSCALE_VALUE, ::_DOWNSCALE_VALUE] 89 | -------------------------------------------------------------------------------- /scripts/real_dataset_vis.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "%load_ext autoreload\n", 10 | "%autoreload 2" 11 | ] 12 | }, 13 | { 14 | "cell_type": "code", 15 | "execution_count": 2, 16 | "metadata": {}, 17 | "outputs": [ 18 | { 19 | "name": "stdout", 20 | "output_type": "stream", 21 | "text": [ 22 | "Jupyter environment detected. Enabling Open3D WebVisualizer.\n", 23 | "[Open3D INFO] WebRTC GUI backend enabled.\n", 24 | "[Open3D INFO] WebRTCWindowSystem: HTTP handshake server disabled.\n" 25 | ] 26 | } 27 | ], 28 | "source": [ 29 | "from CARTO.lib.real_data import RealDataset\n", 30 | "import pprint\n", 31 | "import copy\n", 32 | "import open3d as o3d\n", 33 | "import numpy as np\n", 34 | "import matplotlib.pyplot as plt" 35 | ] 36 | }, 37 | { 38 | "cell_type": "code", 39 | "execution_count": 3, 40 | "metadata": {}, 41 | "outputs": [], 42 | "source": [ 43 | "load_pc = True\n", 44 | "real_dataset = RealDataset(\"datasets/real\", load_pc=load_pc)" 45 | ] 46 | }, 47 | { 48 | "cell_type": "code", 49 | "execution_count": 4, 50 | "metadata": {}, 51 | "outputs": [], 52 | "source": [ 53 | "for sample_idx in range(len(real_dataset)):\n", 54 | " if load_pc:\n", 55 | " dp, labels, pointcloud = real_dataset[sample_idx]\n", 56 | " else:\n", 57 | " dp, labels = real_dataset[sample_idx]\n", 58 | " break" 59 | ] 60 | }, 61 | { 62 | "cell_type": "code", 63 | "execution_count": 5, 64 | "metadata": {}, 65 | "outputs": [], 66 | "source": [ 67 | "down_pcd = pointcloud.voxel_down_sample(voxel_size=0.02)" 68 | ] 69 | }, 70 | { 71 | "cell_type": "code", 72 | "execution_count": 6, 73 | "metadata": {}, 74 | "outputs": [ 75 | { 76 | "name": "stdout", 77 | "output_type": "stream", 78 | "text": [ 79 | "object_dict[\"center\"] = array([-0.00375343, 0.0190836 , 0.55100996])\n", 80 | "object_dict[\"rotation\"] = array([[ 1. , 0. , 0. ],\n", 81 | " [ 0. , -0.2923717 , -0.95630476],\n", 82 | " [ 0. , 0.95630476, -0.2923717 ]])\n", 83 | "object_dict[\"extent\"] = array([0.33348947, 0.38 , 0.19401672])\n" 84 | ] 85 | } 86 | ], 87 | "source": [ 88 | "object_obbs = []\n", 89 | "for object_dict in labels[\"objects\"]:\n", 90 | " print(f'{object_dict[\"center\"] = }\\n{object_dict[\"rotation\"] = }\\n{object_dict[\"extent\"] = }')\n", 91 | " object_obb = o3d.geometry.OrientedBoundingBox(\n", 92 | " object_dict[\"center\"], object_dict[\"rotation\"], object_dict[\"extent\"]\n", 93 | " ) \n", 94 | " object_obbs.append(object_obb)" 95 | ] 96 | }, 97 | { 98 | "cell_type": "code", 99 | "execution_count": 7, 100 | "metadata": {}, 101 | "outputs": [], 102 | "source": [ 103 | "# o3d.visualization.draw_plotly([downpcd] + object_obbs)\n", 104 | "# o3d.visualization.draw_plotly(object_obbs)" 105 | ] 106 | }, 107 | { 108 | "attachments": {}, 109 | "cell_type": "markdown", 110 | "metadata": {}, 111 | "source": [ 112 | "Run Local" 113 | ] 114 | }, 115 | { 116 | "cell_type": "code", 117 | "execution_count": 8, 118 | "metadata": {}, 119 | "outputs": [], 120 | "source": [ 121 | "o3d.visualization.draw_geometries([down_pcd] + object_obbs)" 122 | ] 123 | } 124 | ], 125 | "metadata": { 126 | "kernelspec": { 127 | "display_name": "CARTO", 128 | "language": "python", 129 | "name": "python3" 130 | }, 131 | "language_info": { 132 | "codemirror_mode": { 133 | "name": "ipython", 134 | "version": 3 135 | }, 136 | "file_extension": ".py", 137 | "mimetype": "text/x-python", 138 | "name": "python", 139 | "nbconvert_exporter": "python", 140 | "pygments_lexer": "ipython3", 141 | "version": "3.8.17" 142 | }, 143 | "orig_nbformat": 4 144 | }, 145 | "nbformat": 4, 146 | "nbformat_minor": 2 147 | } 148 | -------------------------------------------------------------------------------- /CARTO/Decoder/data/visualize_dataset.py: -------------------------------------------------------------------------------- 1 | ## Load training SDFs 2 | import argparse 3 | import colorsys 4 | import os 5 | import numpy as np 6 | import pathlib 7 | import tqdm 8 | import open3d as o3d 9 | import random 10 | 11 | from traitlets import default 12 | 13 | from CARTO.simnet.lib.datapoint import decompress_datapoint 14 | from CARTO.Decoder import utils 15 | from CARTO.Decoder.data import dataset 16 | from CARTO.Decoder import config 17 | 18 | 19 | def main(args): 20 | file_dir = pathlib.Path(args.file_dir) 21 | dataset_cfg: config.GenerationConfig = utils.load_cfg( 22 | file_dir, cfg_class=config.GenerationConfig 23 | ) 24 | all_files = list(file_dir.glob("*.zstd")) 25 | if args.latest or args.earliest: 26 | all_files.sort(key=lambda x: os.path.getmtime(x), reverse=args.earliest) 27 | else: 28 | print("Shuffling object list") 29 | random.shuffle(all_files) 30 | 31 | counts = utils.AccumulatorDict() 32 | for file_name in all_files: 33 | counts.increment(str(file_name).split("_")[-2], 1) 34 | print(counts) 35 | 36 | pcds = [] 37 | object_ratios = [] 38 | all_max = 0.0 39 | for i, file_path in tqdm.tqdm(enumerate(all_files[: args.n])): 40 | with open(file_path, "rb") as fh: 41 | buf = fh.read() 42 | data_point: dataset.DataPoint = decompress_datapoint(buf) 43 | 44 | if args.sdf: 45 | # print(data_point.keys()) 46 | sdf = data_point.sdf_values 47 | points = data_point.points[sdf <= 0.0] 48 | color = utils.get_random_color() 49 | normals = None 50 | elif args.pc: 51 | points = data_point.full_pc 52 | normals = data_point.full_normals 53 | 54 | if len(points) == 0: 55 | continue 56 | 57 | all_max = max(all_max, np.max(points)) 58 | # if (np.max(points) < 1.0): 59 | # continue 60 | # print("Adding to Visualization") 61 | 62 | points /= dataset_cfg.max_extent 63 | 64 | pcd = o3d.geometry.PointCloud() 65 | pcd.points = o3d.utility.Vector3dVector(points) 66 | if args.pc: 67 | pcd.normals = o3d.utility.Vector3dVector(normals) 68 | if args.sdf: 69 | pcd.paint_uniform_color(color) 70 | pcds.append(pcd) 71 | 72 | if args.sdf: 73 | object_ratios.append(np.count_nonzero(sdf <= 0) / sdf.shape[0]) 74 | 75 | if args.unit_cube: 76 | cube_points = np.array( 77 | [ 78 | [-1.0, -1.0, -1.0], 79 | [1.0, -1.0, -1.0], 80 | [-1.0, 1.0, -1.0], 81 | [1.0, 1.0, -1.0], 82 | [-1.0, -1.0, 1.0], 83 | [1.0, -1.0, 1.0], 84 | [-1.0, 1.0, 1.0], 85 | [1.0, 1.0, 1.0], 86 | ], 87 | dtype=np.float, 88 | ) 89 | # cube_points /= 2 90 | lines = np.array( 91 | [ 92 | [0, 1], 93 | [0, 2], 94 | [1, 3], 95 | [2, 3], 96 | [4, 5], 97 | [4, 6], 98 | [5, 7], 99 | [6, 7], 100 | [0, 4], 101 | [1, 5], 102 | [2, 6], 103 | [3, 7], 104 | ] 105 | ) 106 | colors = [[1, 0, 0] for i in range(len(lines))] 107 | line_set = o3d.geometry.LineSet() 108 | line_set.points = o3d.utility.Vector3dVector(cube_points) 109 | line_set.lines = o3d.utility.Vector2iVector(lines) 110 | line_set.colors = o3d.utility.Vector3dVector(colors) 111 | pcds.append(line_set) 112 | 113 | pcds.append(o3d.geometry.TriangleMesh.create_coordinate_frame()) 114 | o3d.visualization.draw_geometries(pcds) 115 | 116 | print(f"{all_max = }") 117 | 118 | if args.sdf: 119 | print(f"{object_ratios = }\n\tw/ mean {np.array(object_ratios).mean()}") 120 | 121 | 122 | if __name__ == "__main__": 123 | parser = argparse.ArgumentParser() 124 | parser.add_argument("file_dir") 125 | parser.add_argument("-n", type=int, default=100) 126 | parser.add_argument("-l", "--latest", action="store_true", default=False) 127 | parser.add_argument("-e", "--earliest", action="store_true", default=False) 128 | parser.add_argument("-sdf", action="store_true", default=False) 129 | parser.add_argument("-pc", action="store_true", default=False) 130 | parser.add_argument("--unit-cube", action="store_true", default=False) 131 | args = parser.parse_args() 132 | main(args) 133 | -------------------------------------------------------------------------------- /CARTO/lib/real_data.py: -------------------------------------------------------------------------------- 1 | import copy 2 | import json 3 | import pathlib 4 | from collections import namedtuple 5 | from typing import Any, Tuple, Union 6 | 7 | import numpy as np 8 | import open3d as o3d 9 | import torch 10 | 11 | from CARTO import ROOT_DIR 12 | from CARTO.simnet.lib import datapoint 13 | from CARTO.simnet.lib.net.dataset import Dataset, PanopticOutputs, create_anaglyph 14 | from CARTO.simnet.lib.net.post_processing.depth_outputs import DepthOutput 15 | 16 | MISSING_LABELS_ID = [ 17 | "YgMop2xGR9QQg3tvtHV8LE", 18 | "7mcbjCWqCUbcmuDRys7pKb", 19 | "ftGQw2kjUAGi4EEKm2sath", 20 | "8GBGdsC7F636882P4rmEC8", 21 | "JLnmWMvp6pN6CuSLLWwoab", 22 | "kYpjYEACcccqbJZX7HrpXG", 23 | "9wRKEke2FaHzNg42rWDisS", 24 | "QLctV2dhgTprak4d9HCwzY", 25 | "S7Ty7vSH6YcyBuDpCmThtD", 26 | ] 27 | 28 | depth_hparams = namedtuple("depth_hparams", ["max_disparity"]) 29 | 30 | 31 | def convert_labels(old_labels): 32 | labels = copy.deepcopy(old_labels) 33 | labels["id"] = labels["filename"].split(".")[0] 34 | for object_idx in range(len(labels["objects"])): 35 | object_dict = labels["objects"][object_idx] 36 | 37 | center = np.array( 38 | [ 39 | object_dict["centroid"]["x"], 40 | object_dict["centroid"]["y"], 41 | object_dict["centroid"]["z"], 42 | ] 43 | ) 44 | zyx_array = np.array( 45 | [ 46 | object_dict["rotations"]["z"], 47 | object_dict["rotations"]["y"], 48 | object_dict["rotations"]["x"], 49 | ] 50 | ) 51 | zyx_array = zyx_array / 180 * np.pi 52 | R = o3d.geometry.get_rotation_matrix_from_zyx(zyx_array) 53 | extent = np.array( 54 | [ 55 | object_dict["dimensions"]["length"], 56 | object_dict["dimensions"]["width"], 57 | object_dict["dimensions"]["height"], 58 | ] 59 | ) 60 | object_dict["center"] = center 61 | object_dict["rotation"] = R 62 | object_dict["extent"] = extent 63 | 64 | del object_dict["centroid"] 65 | del object_dict["rotations"] 66 | del object_dict["dimensions"] 67 | 68 | labels["objects"][object_idx] = object_dict 69 | 70 | del labels["folder"] 71 | del labels["filename"] 72 | del labels["path"] 73 | return labels 74 | 75 | 76 | class RealDataset(Dataset): 77 | def __init__( 78 | self, 79 | dataset_path: Union[str, pathlib.Path], 80 | load_pc: bool = False, 81 | skip_without_labels=True, 82 | ): 83 | self.dataset_path = pathlib.Path(dataset_path) 84 | simnet_dataset = datapoint.make_dataset(str(self.dataset_path / "data")) 85 | self.datapoint_handles = simnet_dataset.list() 86 | if skip_without_labels: 87 | self.datapoint_handles = list( 88 | filter(lambda x: x.uid not in MISSING_LABELS_ID, self.datapoint_handles) 89 | ) 90 | self.load_pc = load_pc 91 | 92 | self.hparams = depth_hparams(max_disparity=180) 93 | 94 | def __len__(self): 95 | return len(self.datapoint_handles) 96 | 97 | def __getitem__(self, idx) -> Tuple[PanopticOutputs, Any, Any]: 98 | # TODO Update the Any! 99 | local_handle: datapoint.LocalReadHandle = self.datapoint_handles[idx] 100 | 101 | dp: datapoint.Panoptic = local_handle.read() 102 | anaglyph = create_anaglyph(dp.stereo) 103 | 104 | panoptic_out = PanopticOutputs( 105 | depth=[] 106 | if dp.depth is None 107 | else [DepthOutput(torch.Tensor(dp.depth), self.hparams)], 108 | room_segmentation=[], 109 | cabinet_door_obbs=[], 110 | handhold_obbs=[], 111 | graspable_objects_obbs=[], 112 | grasp_quality_scores=[], 113 | small_depth=[], 114 | val_data=[dp.val_data], 115 | stereo_imgs=[anaglyph], 116 | ) 117 | 118 | # Load labels 119 | with ( 120 | ROOT_DIR / ".." / self.dataset_path / "labels" / f"{local_handle.uid}.json" 121 | ).open() as label_file: 122 | labels = json.load(label_file) 123 | labels = convert_labels(labels) 124 | 125 | if not self.load_pc: 126 | return panoptic_out, labels 127 | 128 | pointcloud_loc = ( 129 | ROOT_DIR 130 | / ".." 131 | / self.dataset_path 132 | / "pointclouds" 133 | / f"{local_handle.uid}.ply" 134 | ) 135 | 136 | pc = o3d.io.read_point_cloud(str(pointcloud_loc)) 137 | 138 | return panoptic_out, labels, pc 139 | -------------------------------------------------------------------------------- /CARTO/simnet/lib/net/post_processing/segmentation_outputs.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import cv2 3 | import IPython 4 | import torch 5 | import torch.nn as nn 6 | from torch.nn import functional as F 7 | 8 | from CARTO.simnet.lib import color_stuff 9 | from CARTO.simnet.lib import datapoint 10 | 11 | # Panoptic Segmentation Colors 12 | 13 | 14 | class SegmentationOutput: 15 | def __init__(self, seg_pred, hparams): 16 | self.seg_pred = seg_pred 17 | self.is_numpy = False 18 | self.hparams = hparams 19 | 20 | # Converters for torch to numpy 21 | def convert_to_numpy_from_torch(self): 22 | self.seg_pred = np.ascontiguousarray(self.seg_pred.float().cpu().numpy()) 23 | self.is_numpy = True 24 | 25 | def convert_to_torch_from_numpy(self): 26 | self.seg_pred = torch.from_numpy(np.ascontiguousarray(self.seg_pred)).long() 27 | self.is_numpy = False 28 | 29 | def get_visualization_img(self, left_image, is_target=False): 30 | if not self.is_numpy: 31 | self.convert_to_numpy_from_torch() 32 | if is_target: 33 | seg_mask = self.seg_pred 34 | max_number = int(seg_mask.max()) + 1 35 | else: 36 | seg_mask = np.argmax(self.seg_pred, axis=1)[0] 37 | assert self.seg_pred.ndim == 4 # 1 x L x H x W 38 | max_number = int(self.seg_pred.shape[1]) 39 | return draw_segmentation_mask(left_image, seg_mask, num_classes=max_number) 40 | 41 | def get_visualization_img_with_categories( 42 | self, left_image, detections, class_list, is_target=False 43 | ): 44 | if not self.is_numpy: 45 | self.convert_to_numpy_from_torch() 46 | if is_target: 47 | seg_mask_predictions = self.seg_pred 48 | else: 49 | seg_mask_predictions = np.argmax(self.seg_pred[0], axis=0) 50 | 51 | return draw_segmentation_mask_with_categories( 52 | left_image, seg_mask_predictions, detections, class_list 53 | ) 54 | 55 | def get_prediction(self): 56 | if not self.is_numpy: 57 | self.convert_to_numpy_from_torch() 58 | return self.seg_pred[0] 59 | 60 | def compute_loss(self, seg_targets, log, name): 61 | if self.is_numpy: 62 | raise ValueError("Output is not in torch mode") 63 | seg_target_stacked = [] 64 | for seg_target in seg_targets: 65 | seg_target_stacked.append(seg_target.seg_pred) 66 | seg_target_batch = torch.stack(seg_target_stacked) 67 | seg_target_batch = seg_target_batch.to(torch.device("cuda:0")) 68 | if len(seg_target_batch.shape) == 4: 69 | seg_target_batch = torch.argmax(seg_target_batch, dim=1) 70 | seg_loss = F.cross_entropy( 71 | self.seg_pred, seg_target_batch, reduction="mean", ignore_index=-100 72 | ) 73 | log[name] = seg_loss.item() 74 | return self.hparams.loss_seg_mult * seg_loss 75 | 76 | 77 | def draw_segmentation_mask(color_img, seg_mask, num_classes=7): 78 | assert len(seg_mask.shape) == 2 79 | seg_mask = seg_mask.astype(np.uint8) 80 | # TODO(mike.laskey) Replace this with a set list. 81 | if num_classes == 7: 82 | colors = color_stuff.get_panoptic_colors() 83 | else: 84 | colors = color_stuff.get_colors(num_classes) 85 | 86 | color_img = color_img_to_gray(color_img) 87 | for ii, color in zip(range(num_classes), colors): 88 | if ii == 0: # ignore background class 89 | continue 90 | 91 | colored_mask = np.zeros([seg_mask.shape[0], seg_mask.shape[1], 3]) 92 | colored_mask[seg_mask == ii, :] = color 93 | color_img = cv2.addWeighted( 94 | color_img.astype(np.uint8), 0.9, colored_mask.astype(np.uint8), 0.4, 0 95 | ) 96 | return cv2.cvtColor(color_img.astype(np.uint8), cv2.COLOR_BGR2RGB) 97 | 98 | 99 | def color_img_to_gray(image): 100 | gray_scale_img = np.zeros(image.shape) 101 | img = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) 102 | for i in range(3): 103 | gray_scale_img[:, :, i] = img 104 | gray_scale_img[:, :, i] = img 105 | return gray_scale_img 106 | 107 | 108 | def draw_segmentation_mask_with_categories(color_img, seg_mask, detections, class_list): 109 | assert len(seg_mask.shape) == 2 110 | seg_mask = seg_mask.astype(np.int) 111 | seg_mask_vis = draw_segmentation_mask( 112 | color_img, seg_mask, num_classes=len(class_list) 113 | ) 114 | for detection in detections: 115 | pixel_x = int(detection[0]) 116 | pixel_y = int(detection[1]) 117 | 118 | category_id = seg_mask[pixel_x, pixel_y] 119 | category = class_list[category_id] 120 | 121 | if category.name == "background": 122 | color = (255, 0, 0) # dark blue 123 | else: 124 | color = (255, 128, 128) # light blue 125 | 126 | seg_mask_vis = cv2.putText( 127 | seg_mask_vis, 128 | category.name, 129 | (pixel_y, pixel_x), 130 | cv2.FONT_HERSHEY_SIMPLEX, 131 | 1, 132 | color, 133 | 2, 134 | cv2.LINE_AA, 135 | ) 136 | return seg_mask_vis 137 | -------------------------------------------------------------------------------- /scripts/preprocess_partnetmobility.py: -------------------------------------------------------------------------------- 1 | #!/opt/mmt/python_venv/bin/python 2 | 3 | import argparse 4 | import json 5 | import logging 6 | import pathlib 7 | import random 8 | import subprocess 9 | from collections import defaultdict 10 | from concurrent import futures 11 | import tarfile 12 | 13 | import numpy as np 14 | import tqdm 15 | import trimesh 16 | import urdfpy 17 | import zstandard as zstd 18 | from CARTO.lib.partnet_mobility import get_joint_dict 19 | from CARTO.lib.compression import write_compressed_json 20 | 21 | 22 | PARALLEL = True 23 | 24 | 25 | def identity_matrix(): 26 | return np.eye(4) 27 | 28 | 29 | around_z_neg_90 = trimesh.transformations.rotation_matrix( 30 | np.pi / 2, np.array([0.0, 0.0, -1.0]) 31 | ) 32 | 33 | # Dictionaries for transformations of objects that might be not in a canonical way! 34 | canonical_transformations_cat = defaultdict( 35 | identity_matrix, 36 | { 37 | "Pliers": around_z_neg_90, 38 | # "Scissors": around_z_neg_90 39 | }, 40 | ) 41 | canonical_transformations_instance = defaultdict( 42 | identity_matrix, {"d01ff66659767d50cee19268a161fc4a": around_z_neg_90} 43 | ) 44 | 45 | 46 | def main(top_dir=pathlib.Path("datasets/partnet-mobility-v0/raw_dataset")): 47 | model_dirs = (top_dir).glob("*") 48 | 49 | (top_dir / ".." / "tarfiles").mkdir(exist_ok=True, parents=True) 50 | 51 | full_index = [] 52 | if PARALLEL: 53 | all_futures = [] 54 | with futures.ProcessPoolExecutor() as executor: 55 | for model_dir in model_dirs: 56 | all_futures.append(executor.submit(process_model, model_dir)) 57 | with tqdm.tqdm(total=len(all_futures)) as pbar: 58 | for future in futures.as_completed(all_futures): 59 | pbar.update(1) 60 | full_index.append(future.result()) 61 | else: 62 | for model_dir in tqdm.tqdm(model_dirs, total=len(model_dirs)): 63 | full_index.append(process_model(model_dir)) 64 | 65 | index = [meta for (meta, safe) in full_index if safe] 66 | print(f"Found {len(full_index)} models but only {len(index)} are safe") 67 | 68 | print("Writing index") 69 | index = sorted(index, key=lambda x: x["model_id"]) 70 | index_path = top_dir / ".." / "index.json.zst" 71 | write_compressed_json(index, index_path) 72 | 73 | 74 | def load_semantics(semantics_file): 75 | joint_meta_info = {} 76 | for line in semantics_file.readlines(): 77 | line_entries = line.rstrip("\n").split(" ") 78 | joint_meta_info[f"joint_{int(line_entries[0].split('_')[1])}"] = { 79 | "sem_type": line_entries[1], 80 | "sem_name": line_entries[2], 81 | } 82 | return joint_meta_info 83 | 84 | 85 | def process_model(model_dir: pathlib.Path): 86 | with open(model_dir / "meta.json") as fh: 87 | meta = json.load(fh) 88 | assert "model_id" in meta 89 | model_id = meta["model_id"] 90 | 91 | # Create tar-ball 92 | all_paths = model_dir.glob("**/*") 93 | tar_path = model_dir / ".." / ".." / "tarfiles" / (model_id + ".tar.zst") 94 | cctx = zstd.ZstdCompressor() 95 | with open(tar_path, "wb") as raw_fh: 96 | with cctx.stream_writer(raw_fh) as zst_fh: 97 | with tarfile.open(fileobj=zst_fh, mode="w") as tar: 98 | for path in all_paths: 99 | rel_path = path.relative_to(model_dir) 100 | tar.add(str(path), arcname=str(rel_path), recursive=False) 101 | 102 | tar_bytes = tar_path.stat().st_size 103 | meta["num_bytes"] = tar_bytes 104 | 105 | with open(model_dir / "semantics.txt") as fh: 106 | joint_semantics = load_semantics(fh) 107 | 108 | # Try loading the URDF 109 | # This step is important as PartNetMobility might miss some .obj! 110 | try: 111 | urdf = urdfpy.URDF.load(str(model_dir / "mobility.urdf")) 112 | except ValueError as e: 113 | logging.warning(f"urdfpy could not load model at {model_dir} with error\n{e}") 114 | return None, False 115 | 116 | # Manually parse relevant joint informations for saving in index 117 | joint: urdfpy.Joint 118 | for joint in urdf.joints: 119 | try: 120 | joint_dict = get_joint_dict(joint) 121 | joint_semantics[joint_dict["id"]].update(joint_dict) 122 | except: 123 | has_slider_plus = False 124 | for joint_semants in joint_semantics.values(): 125 | has_slider_plus |= joint_semants["sem_type"] == "slider+" 126 | if not has_slider_plus: 127 | print(f"--- {model_id} @ {model_dir} ---") 128 | print(f"{joint_semantics = }") 129 | print(f"{joint_dict = }") 130 | else: 131 | print(f"-- Found 'slider+'-type") 132 | meta["joints"] = joint_semantics 133 | 134 | trans_cat = canonical_transformations_cat[meta["model_cat"]] 135 | trans_ins = canonical_transformations_instance[model_id] 136 | meta["canonical_transformation"] = (trans_cat @ trans_ins).tolist() 137 | 138 | return meta, True 139 | 140 | 141 | if __name__ == "__main__": 142 | parser = argparse.ArgumentParser("Create index for PartnetMobility V0") 143 | args = parser.parse_args() 144 | main() 145 | -------------------------------------------------------------------------------- /CARTO/simnet/lib/net/onnx_plugins.py: -------------------------------------------------------------------------------- 1 | """Plugins that can be used in an ONNX model.""" 2 | import numpy as np 3 | import torch 4 | import torch.nn as nn 5 | import torch.nn.functional as F 6 | import torch.nn.modules.utils as utils 7 | 8 | # TODO(krishnashankar): Arguments of functions in modules below 9 | # differ from those of the base class(es) they inherit from, and 10 | # pylint complains. For now, disable here and consider disabling 11 | # globally. 12 | 13 | # pylint: disable=arguments-differ 14 | # pylint: disable=protected-access 15 | 16 | 17 | @torch.autograd.function.traceable 18 | class ExportableUpsampleFunction(torch.autograd.Function): 19 | """Upsample function that can be traced for ONNX export.""" 20 | 21 | @staticmethod 22 | def symbolic(g, inputs, scale_factor): 23 | assert scale_factor == 2, "Only 2x upsample implemented" 24 | return g.op( 25 | "TRT_PluginV2", 26 | inputs, 27 | version_s="0.0.1", 28 | namespace_s="", 29 | data_s="", 30 | name_s="UpsampleBilinearEvenSquare", 31 | ) 32 | 33 | @staticmethod 34 | def forward(ctx, inputs, scale_factor): 35 | return F.interpolate( 36 | inputs, scale_factor=scale_factor, mode="bilinear", align_corners=False 37 | ) 38 | 39 | @staticmethod 40 | def backward(_): 41 | raise RuntimeError("Backward not implemented") 42 | 43 | 44 | class ExportableUpsample(nn.Module): 45 | """Upsample module that can be used in an ONNX model.""" 46 | 47 | def __init__(self, scale_factor): 48 | super().__init__() 49 | self.scale_factor = scale_factor 50 | 51 | def forward(self, inputs): 52 | return ExportableUpsampleFunction.apply(inputs, self.scale_factor) 53 | 54 | 55 | class UpsampleWithConvTranspose(nn.Module): 56 | """Upsample model implemented with transposed convolution.""" 57 | 58 | def __init__(self, scale_factor): 59 | super(UpsampleWithConvTranspose, self).__init__() 60 | self.weights = None 61 | self.scale_factor = utils._pair(scale_factor) 62 | 63 | def check_scale_factor(scale_factor): 64 | assert scale_factor == 1 or scale_factor % 2 == 0 65 | 66 | check_scale_factor(self.scale_factor[0]) 67 | check_scale_factor(self.scale_factor[1]) 68 | 69 | def get_kernel_size(self, factor): 70 | return 2 * factor - factor % 2 71 | 72 | def bilinear_upsample_kernel(self, size): 73 | """Get a transpoed convolution kernel that implemented upsampling for the 74 | given size.""" 75 | 76 | def get_factor_and_center(size): 77 | factor = (size + 1) // 2 78 | if size % 2 == 1: 79 | center = factor - 1 80 | else: 81 | center = factor - 0.5 82 | return factor, center 83 | 84 | factor_h, center_h = get_factor_and_center(size[0]) 85 | factor_w, center_w = get_factor_and_center(size[1]) 86 | og = np.ogrid[: size[0], : size[1]] 87 | return (1 - abs((og[0] - center_h) / factor_h)) * ( 88 | 1 - abs((og[1] - center_w) / factor_w) 89 | ) 90 | 91 | def bilinear_upsample_weights(self, factor, nchannels): 92 | """Get transposed convolution weights for upsampling.""" 93 | filter_size_h = self.get_kernel_size(factor[0]) 94 | filter_size_w = self.get_kernel_size(factor[1]) 95 | 96 | weights = np.zeros( 97 | (filter_size_h, filter_size_w, nchannels, nchannels), dtype=np.float32 98 | ) 99 | 100 | kernel = self.bilinear_upsample_kernel((filter_size_h, filter_size_w)) 101 | 102 | for c in range(nchannels): 103 | weights[:, :, c, c] = kernel 104 | 105 | return weights 106 | 107 | def forward(self, inputs): 108 | in_channels = inputs.shape[1] 109 | if self.weights is None: 110 | weights = self.bilinear_upsample_weights(self.scale_factor, in_channels) 111 | # Order weights to be compatible with pytorch (in_channels, out_channels, height, width). 112 | self.weights = ( 113 | torch.from_numpy(weights.transpose(2, 3, 0, 1)) 114 | .to(inputs.device) 115 | .type(inputs.dtype) 116 | ) 117 | output = torch.nn.functional.conv_transpose2d( 118 | inputs, 119 | self.weights, 120 | stride=self.scale_factor, 121 | padding=(self.scale_factor[0] // 2, self.scale_factor[1] // 2), 122 | ) 123 | return output 124 | 125 | 126 | def fix_module(module): 127 | """Replace all modules in the given module with ONNX-compatible modules.""" 128 | for child_module_name, child_module in module.named_children(): 129 | if isinstance(child_module, nn.Upsample): 130 | scale_factor = int(child_module.scale_factor) 131 | # TensorRT plugin can only load 2x upsample from ONNX currently, so 132 | # otherwise use transposed convolution. 133 | if False and scale_factor == 2: 134 | module._modules[child_module_name] = ExportableUpsample(scale_factor) 135 | else: 136 | module._modules[child_module_name] = UpsampleWithConvTranspose( 137 | scale_factor 138 | ) 139 | elif len(list(child_module.children())) > 0: 140 | fix_module(child_module) 141 | -------------------------------------------------------------------------------- /CARTO/Decoder/data/asdf_dataset.py: -------------------------------------------------------------------------------- 1 | import pathlib 2 | import torch 3 | import sys 4 | import json 5 | import numpy as np 6 | import trimesh 7 | import logging 8 | 9 | from CARTO.Decoder.data.dataset import DataPoint 10 | 11 | from typing import List, Dict 12 | 13 | ## Adding ASDF to our search path 14 | # For code release: should or maybe could be a submodule 15 | ASDF_BASE_PATH = "external_libs/A-SDF" 16 | sys.path.append(ASDF_BASE_PATH) 17 | 18 | try: 19 | from asdf.data import SDFSamples 20 | except Exception as e: 21 | logging.critical(e, exc_info=True) # log exception info at CRITICAL log level 22 | 23 | 24 | class ASDFDataset(torch.utils.data.Dataset): 25 | def __init__( 26 | self, 27 | all_file_ids: List[pathlib.Path], 28 | subsample_amount: int = 12500, 29 | load_ram: bool = True, 30 | train: bool = True, 31 | load_gt: bool = False, 32 | ): 33 | ### Create A-SDF datasets.. 34 | 35 | all_file_ids = [ 36 | f"{file_ids}_{'train' if train else 'test'}.json" 37 | for file_ids in all_file_ids 38 | ] 39 | self.all_file_splits = [] 40 | self.dataset_categories = [] 41 | 42 | for file_ids in all_file_ids: 43 | with open(pathlib.Path(ASDF_BASE_PATH) / file_ids, "r") as f: 44 | json_split = json.load(f) 45 | self.all_file_splits.append(json_split) 46 | self.dataset_categories.append(list(json_split["shape2motion"].keys())[0]) 47 | 48 | self.ASDF_datasets = [] 49 | self.stops = [0] 50 | 51 | for file_split in self.all_file_splits: 52 | asdf_set = SDFSamples( 53 | pathlib.Path(ASDF_BASE_PATH) / "data", 54 | file_split, 55 | subsample_amount, 56 | load_ram=load_ram, 57 | articulation=True, 58 | ) 59 | 60 | self.ASDF_datasets.append(asdf_set) 61 | self.stops.append(self.stops[-1] + len(asdf_set)) 62 | 63 | self.stops = np.array(self.stops) 64 | self.load_gt = load_gt 65 | 66 | def __len__(self) -> int: 67 | return self.stops[-1] 68 | 69 | def __getitem__(self, idx: int) -> DataPoint: 70 | dataset_idx = len(self.stops) - np.count_nonzero(idx < self.stops) - 1 71 | category = self.dataset_categories[dataset_idx] 72 | 73 | # dataset_idx = 74 | if category == "laptop": 75 | limits = [-1.5708, 0.0] # Upper limit does not matter 76 | else: 77 | limits = [0.0, 0.0] 78 | 79 | local_idx = idx - self.stops[dataset_idx] 80 | asdf_data = self.ASDF_datasets[dataset_idx][local_idx] 81 | (tensor, joint_state, instance_id), i = asdf_data 82 | points = tensor[:, :3] 83 | sdf = tensor[:, 3] 84 | parts = tensor[:, 4] 85 | 86 | datapoint = DataPoint( 87 | object_id=f"{category}_{instance_id}", 88 | joint_config_id=str(idx), 89 | joint_config={"joint": float(joint_state / 180 * np.pi)}, 90 | points=points.float().cpu(), 91 | sdf_values=sdf.float().cpu(), 92 | ) 93 | datapoint.joint_def = { 94 | "joint": { 95 | "type": "revolute", # All ASDFs objects are revolute 96 | "limit": limits, 97 | } 98 | } 99 | if not self.load_gt: 100 | return datapoint 101 | 102 | corresponding_split = self.all_file_splits[dataset_idx] 103 | instance_name = f"{corresponding_split['shape2motion'][self.dataset_categories[dataset_idx]][local_idx]}" 104 | 105 | ground_truth_samples_filename = ( 106 | pathlib.Path(ASDF_BASE_PATH) 107 | / "data" 108 | / "SurfaceSamples" 109 | / "shape2motion" 110 | / category 111 | / (instance_name + ".obj") 112 | ) 113 | normalization_params_filename = ( 114 | pathlib.Path(ASDF_BASE_PATH) 115 | / "data" 116 | / "NormalizationParameters" 117 | / "shape2motion" 118 | / category 119 | / (instance_name + ".npz") 120 | ) 121 | 122 | gt_mesh = trimesh.load(ground_truth_samples_filename) 123 | gt_points = gt_mesh.vertices 124 | 125 | # Apply the inverse normalization 126 | normalization_params = np.load(normalization_params_filename) 127 | offset = normalization_params["offset"] 128 | scale = normalization_params["scale"] 129 | gt_points = (gt_points + offset) * scale 130 | datapoint.full_pc = np.copy(gt_points) 131 | 132 | return datapoint 133 | 134 | # Same as for our-SDF 135 | @staticmethod 136 | def collate_fn(datapoints: List[DataPoint]) -> Dict: 137 | return { 138 | "object_id": [datapoint.object_id for datapoint in datapoints], 139 | "joint_config_id": [ 140 | str(datapoint.joint_config_id) for datapoint in datapoints 141 | ], 142 | "joint_config": [datapoint.joint_config for datapoint in datapoints], 143 | "zero_joint_config": [ 144 | datapoint.zero_joint_config for datapoint in datapoints 145 | ], 146 | "joint_definition": [datapoint.joint_def for datapoint in datapoints], 147 | "sdf": torch.stack( 148 | [torch.FloatTensor(datapoint.sdf_values) for datapoint in datapoints] 149 | ), 150 | "points": torch.stack( 151 | [torch.FloatTensor(datapoint.points) for datapoint in datapoints] 152 | ), 153 | } 154 | -------------------------------------------------------------------------------- /scripts/full_inference.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | torch.cuda.set_device(0) 4 | import dataclasses 5 | import pathlib 6 | import random 7 | from typing import Optional 8 | 9 | # Ensure mesh_to_sdf is imported first 10 | import numpy as np 11 | import open3d as o3d 12 | import seaborn as sns 13 | import torch 14 | import tyro 15 | 16 | from CARTO.Encoder.inference import CARTO, CARTOPrediction 17 | from CARTO.lib.real_data import RealDataset 18 | from CARTO.simnet.lib.net.dataset import Dataset, PanopticOutputs 19 | 20 | sns.set() 21 | 22 | import pickle 23 | 24 | import matplotlib.pyplot as plt 25 | import seaborn as sns 26 | import tqdm 27 | 28 | 29 | def save_image(data, file_path: pathlib.Path, FIG_DPI: int = 400): 30 | fig = plt.figure( 31 | dpi=FIG_DPI, figsize=(data.shape[1] / FIG_DPI, data.shape[0] / FIG_DPI) 32 | ) 33 | ax = plt.Axes(fig, [0.0, 0.0, 1.0, 1.0]) 34 | ax.set_axis_off() 35 | fig.add_axes(ax) 36 | ax.imshow(data) 37 | # plt.tight_layout() 38 | fig.savefig(str(file_path)) 39 | plt.close(fig) 40 | 41 | 42 | @dataclasses.dataclass 43 | class InferenceConfig: 44 | model_id: str = "14i8yfym" 45 | checkpoint_id: Optional[str] = None 46 | seed: int = 12345 47 | start_lod: int = 4 48 | end_lod: int = 7 49 | dataset_path: Optional[str] = None 50 | real_data: bool = ( 51 | False # Real data has a larger image size than synthetic/simulated data 52 | ) 53 | single_sample: int = -1 54 | max_samples: int = 1000 55 | 56 | 57 | def main(cfg: InferenceConfig): 58 | carto = CARTO(cfg.model_id, checkpoint_id=cfg.checkpoint_id) 59 | 60 | if cfg.real_data: 61 | dataset = RealDataset("datasets/real", load_pc=False) 62 | dataset_name = "real" 63 | else: 64 | if cfg.dataset_path is None: 65 | cfg.dataset_path = carto.hparams.test_path 66 | 67 | dataset_name = "_".join(cfg.dataset_path.split("/")[1:]) 68 | dataset = Dataset(cfg.dataset_path, carto.hparams) 69 | print(f"{len(dataset)} samples @ {cfg.dataset_path}") 70 | 71 | iterator = ( 72 | tqdm.tqdm(range(cfg.max_samples)) 73 | if cfg.single_sample < 0 74 | else [cfg.single_sample] 75 | ) 76 | for sample_id in iterator: 77 | vis_dir = carto.model_dir / "vis" / dataset_name / f"full_scene_{sample_id}" 78 | 79 | sample: PanopticOutputs 80 | if cfg.real_data: 81 | sample, _ = dataset[sample_id] 82 | sample.stereo_imgs[0] = sample.stereo_imgs[0][:, ::2, ::2] 83 | if len(sample.depth) > 0: 84 | sample.depth[0].depth_pred = sample.depth[0].depth_pred[::2, ::2] 85 | else: 86 | sample = dataset[sample_id] 87 | 88 | carto_prediction: CARTOPrediction = carto(sample) 89 | carto_prediction.set_vis_dir(vis_dir) 90 | 91 | carto_prediction.save_rgb() 92 | carto_prediction.save_segmentation() 93 | if not carto.hparams.model_rgbd: 94 | carto_prediction.save_depth() 95 | carto_prediction.save_bbox() 96 | 97 | carto_prediction.save_heatmap() 98 | carto_prediction.save_poses() 99 | 100 | ply_objects = carto_prediction.get_canonical_objects(ply=True) 101 | for idx, ply_object in enumerate(ply_objects): 102 | o3d.io.write_point_cloud( 103 | str(vis_dir / f"predicted_pc_{idx:03d}.ply"), ply_object 104 | ) 105 | 106 | #### TODO Add Function in carto_prediction? 107 | # for shape_id in range(len(latent_embeddings_shape)): 108 | # artciulated_vis_dir = vis_dir / "articulated" / str(shape_id) 109 | # artciulated_vis_dir.mkdir(exist_ok=True, parents=True) 110 | # shape_code = latent_embeddings_shape[shape_id] 111 | # # Overwrite a single joint state 112 | # joint_dict_result = joint_decoder(torch.Tensor(latent_embeddings_arti).cuda()) 113 | # pred_joint_state = joint_dict_result["state"][0].detach().cpu() 114 | # pred_joint_type = utils.get_joint_type_batch(joint_dict_result["type"])[0] 115 | 116 | # latent_embeddings_arti = joint_embedding.poly_fits[pred_joint_type].linspace(60) 117 | # latent_embeddings_shapes = np.tile(shape_code, (latent_embeddings_arti.shape[0], 1)) 118 | 119 | # ply_objects = decoder.get_ply_meshes( 120 | # latent_embeddings_shapes, 121 | # latent_embeddings_arti, 122 | # distance_threshold=1e-2, 123 | # lod_start=4, 124 | # lod_current=8, 125 | # estimate_normals=False, 126 | # chunk_size=5e5 127 | # ) 128 | 129 | # for idx, ply_object in enumerate(ply_objects): 130 | # o3d.io.write_point_cloud(str(artciulated_vis_dir / f"{idx:03d}.ply"), ply_object) 131 | #### TODO Add Function 132 | 133 | carto_prediction.save_2d_points() 134 | carto_prediction.save_pred_obb() 135 | 136 | pose_dicts = { 137 | "abs_pose_output": carto_prediction.get_poses(), 138 | "root_T_camera": sample.val_data[0].root_T_camera, 139 | } 140 | 141 | save_name = str(vis_dir / f"abs_pose_output.pkl") 142 | with open(save_name, "wb") as output: 143 | pickle.dump(pose_dicts, output) 144 | 145 | #### TODO Add function to plot in embedding 146 | # carto_prediction.save_in_embeddings(...) 147 | #### 148 | 149 | 150 | if __name__ == "__main__": 151 | cfg: InferenceConfig = tyro.parse(InferenceConfig) 152 | torch.random.manual_seed(cfg.seed) 153 | random.seed(cfg.seed) 154 | np.random.seed(cfg.seed) 155 | main(cfg) 156 | -------------------------------------------------------------------------------- /CARTO/simnet/lib/net/models/layers/fpn_bilinear.py: -------------------------------------------------------------------------------- 1 | # MODIFIED FROM TORCHVISION 0.11.3 TO USE BILINEAR UPSAMPLE 2 | 3 | from collections import OrderedDict 4 | from typing import List, Dict, Optional 5 | 6 | from torch import nn, Tensor 7 | import torchvision 8 | from torchvision.ops.feature_pyramid_network import ExtraFPNBlock 9 | 10 | 11 | class FeaturePyramidNetworkBilinear(nn.Module): 12 | """ 13 | Module that adds a FPN from on top of a set of feature maps. This is based on 14 | `"Feature Pyramid Network for Object Detection" `_. 15 | 16 | The feature maps are currently supposed to be in increasing depth 17 | order. 18 | 19 | The input to the model is expected to be an OrderedDict[Tensor], containing 20 | the feature maps on top of which the FPN will be added. 21 | 22 | Args: 23 | in_channels_list (list[int]): number of channels for each feature map that 24 | is passed to the module 25 | out_channels (int): number of channels of the FPN representation 26 | extra_blocks (ExtraFPNBlock or None): if provided, extra operations will 27 | be performed. It is expected to take the fpn features, the original 28 | features and the names of the original features as input, and returns 29 | a new list of feature maps and their corresponding names 30 | 31 | Examples:: 32 | 33 | >>> m = torchvision.ops.FeaturePyramidNetwork([10, 20, 30], 5) 34 | >>> # get some dummy data 35 | >>> x = OrderedDict() 36 | >>> x['feat0'] = torch.rand(1, 10, 64, 64) 37 | >>> x['feat2'] = torch.rand(1, 20, 16, 16) 38 | >>> x['feat3'] = torch.rand(1, 30, 8, 8) 39 | >>> # compute the FPN on top of x 40 | >>> output = m(x) 41 | >>> print([(k, v.shape) for k, v in output.items()]) 42 | >>> # returns 43 | >>> [('feat0', torch.Size([1, 5, 64, 64])), 44 | >>> ('feat2', torch.Size([1, 5, 16, 16])), 45 | >>> ('feat3', torch.Size([1, 5, 8, 8]))] 46 | 47 | """ 48 | 49 | def __init__( 50 | self, 51 | in_channels_list: List[int], 52 | out_channels: int, 53 | extra_blocks: Optional[ExtraFPNBlock] = None, 54 | ): 55 | super().__init__() 56 | self.inner_blocks = nn.ModuleList() 57 | self.layer_blocks = nn.ModuleList() 58 | for idx, in_channels in enumerate(in_channels_list): 59 | if in_channels == 0: 60 | raise ValueError("in_channels=0 is currently not supported") 61 | inner_block_module = nn.Conv2d(in_channels, out_channels, 1) 62 | layer_block_module = nn.Conv2d(out_channels, out_channels, 3, padding=1) 63 | self.inner_blocks.append(inner_block_module) 64 | self.layer_blocks.append(layer_block_module) 65 | 66 | # initialize parameters now to avoid modifying the initialization of top_blocks 67 | for m in self.modules(): 68 | if isinstance(m, nn.Conv2d): 69 | nn.init.kaiming_uniform_(m.weight, a=1) 70 | nn.init.constant_(m.bias, 0) 71 | 72 | if extra_blocks is not None: 73 | assert isinstance(extra_blocks, ExtraFPNBlock) 74 | self.extra_blocks = extra_blocks 75 | 76 | self.upsample2 = nn.Upsample( 77 | scale_factor=2, mode="bilinear", align_corners=False 78 | ) 79 | 80 | def get_result_from_inner_blocks(self, x: Tensor, idx: int) -> Tensor: 81 | """ 82 | This is equivalent to self.inner_blocks[idx](x), 83 | but torchscript doesn't support this yet 84 | """ 85 | num_blocks = len(self.inner_blocks) 86 | if idx < 0: 87 | idx += num_blocks 88 | out = x 89 | for i, module in enumerate(self.inner_blocks): 90 | if i == idx: 91 | out = module(x) 92 | return out 93 | 94 | def get_result_from_layer_blocks(self, x: Tensor, idx: int) -> Tensor: 95 | """ 96 | This is equivalent to self.layer_blocks[idx](x), 97 | but torchscript doesn't support this yet 98 | """ 99 | num_blocks = len(self.layer_blocks) 100 | if idx < 0: 101 | idx += num_blocks 102 | out = x 103 | for i, module in enumerate(self.layer_blocks): 104 | if i == idx: 105 | out = module(x) 106 | return out 107 | 108 | def forward(self, x: Dict[str, Tensor]) -> Dict[str, Tensor]: 109 | """ 110 | Computes the FPN for a set of feature maps. 111 | 112 | Args: 113 | x (OrderedDict[Tensor]): feature maps for each feature level. 114 | 115 | Returns: 116 | results (OrderedDict[Tensor]): feature maps after FPN layers. 117 | They are ordered from highest resolution first. 118 | """ 119 | # unpack OrderedDict into two lists for easier handling 120 | names = list(x.keys()) 121 | x = list(x.values()) 122 | 123 | last_inner = self.get_result_from_inner_blocks(x[-1], -1) 124 | results = [] 125 | results.append(self.get_result_from_layer_blocks(last_inner, -1)) 126 | 127 | for idx in range(len(x) - 2, -1, -1): 128 | inner_lateral = self.get_result_from_inner_blocks(x[idx], idx) 129 | inner_top_down = self.upsample2(last_inner) 130 | last_inner = inner_lateral + inner_top_down 131 | results.insert(0, self.get_result_from_layer_blocks(last_inner, idx)) 132 | 133 | if self.extra_blocks is not None: 134 | results, names = self.extra_blocks(results, x, names) 135 | 136 | # make it back an OrderedDict 137 | out = OrderedDict([(k, v) for k, v in zip(names, results)]) 138 | 139 | return out 140 | -------------------------------------------------------------------------------- /CARTO/Decoder/visualizing/visualize_asdf_dataset.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "%load_ext autoreload\n", 10 | "%autoreload 2" 11 | ] 12 | }, 13 | { 14 | "cell_type": "code", 15 | "execution_count": null, 16 | "metadata": {}, 17 | "outputs": [], 18 | "source": [ 19 | "from CARTO.Decoder.data import asdf_dataset\n", 20 | "import open3d as o3d\n", 21 | "import pathlib\n", 22 | "import random \n", 23 | "import numpy as np" 24 | ] 25 | }, 26 | { 27 | "cell_type": "code", 28 | "execution_count": 3, 29 | "metadata": {}, 30 | "outputs": [], 31 | "source": [ 32 | "\n", 33 | "def get_colors(n): \n", 34 | " ret = [] \n", 35 | " r = int(random.random() * 256) \n", 36 | " g = int(random.random() * 256) \n", 37 | " b = int(random.random() * 256) \n", 38 | " step = 256 / n \n", 39 | " for i in range(n): \n", 40 | " r += step \n", 41 | " g += step \n", 42 | " b += step \n", 43 | " r = int(r) % 256 \n", 44 | " g = int(g) % 256 \n", 45 | " b = int(b) % 256 \n", 46 | " ret.append(np.array([r,g,b])/255) \n", 47 | " return ret " 48 | ] 49 | }, 50 | { 51 | "cell_type": "code", 52 | "execution_count": 7, 53 | "metadata": {}, 54 | "outputs": [], 55 | "source": [ 56 | "category = \"oven\"\n", 57 | "\n", 58 | "local_dataset = asdf_dataset.ASDFDataset(\n", 59 | " [f\"examples/splits/sm_{category}_6_angle\"],\n", 60 | " subsample_amount=1e12, # Very big\n", 61 | " load_ram=False,\n", 62 | " train=True,\n", 63 | " load_gt=True\n", 64 | ")\n", 65 | "\n", 66 | "all_categories = asdf_dataset.ASDFDataset(\n", 67 | " [\n", 68 | " \"examples/splits/sm_door_6_angle\", \"examples/splits/sm_laptop_6_angle\",\n", 69 | " \"examples/splits/sm_oven_6_angle\", \"examples/splits/sm_stapler_6_angle\",\n", 70 | " \"examples/splits/sm_washing_machine_6_angle\"\n", 71 | " ],\n", 72 | " subsample_amount=1e12, # Very big\n", 73 | " load_ram=False,\n", 74 | " train=True\n", 75 | ")" 76 | ] 77 | }, 78 | { 79 | "cell_type": "code", 80 | "execution_count": 8, 81 | "metadata": {}, 82 | "outputs": [], 83 | "source": [ 84 | "out_path= pathlib.Path(\"vis/asdf_testing_full_pc\") / category\n", 85 | "out_path.mkdir(exist_ok=True, parents=True)" 86 | ] 87 | }, 88 | { 89 | "cell_type": "code", 90 | "execution_count": null, 91 | "metadata": {}, 92 | "outputs": [], 93 | "source": [ 94 | "all_pcds = []\n", 95 | "N = len(local_dataset) // 6\n", 96 | "colors = get_colors(N)\n", 97 | "\n", 98 | "# for idx in range(0, len(local_dataset), 6):\n", 99 | "for idx in range(0, 6, 1):\n", 100 | " dp = local_dataset[idx]\n", 101 | " pcd = o3d.geometry.PointCloud()\n", 102 | " # pcd.points = o3d.utility.Vector3dVector(dp.points[dp.sdf_values.abs() < 1e-2])\n", 103 | " pcd.points = o3d.utility.Vector3dVector(dp.full_pc[::3, :])\n", 104 | " pcd.paint_uniform_color(colors[idx // 6])\n", 105 | " print(dp.zero_joint_config)\n", 106 | " o3d.io.write_point_cloud(str(out_path / f\"{idx}.ply\"), pcd)\n", 107 | " print(pcd.get_min_bound())\n", 108 | " print(pcd.get_max_bound())" 109 | ] 110 | }, 111 | { 112 | "cell_type": "code", 113 | "execution_count": 10, 114 | "metadata": {}, 115 | "outputs": [], 116 | "source": [ 117 | "# o3d.visualization.draw_plotly(all_pcds)" 118 | ] 119 | }, 120 | { 121 | "cell_type": "code", 122 | "execution_count": null, 123 | "metadata": {}, 124 | "outputs": [], 125 | "source": [] 126 | } 127 | ], 128 | "metadata": { 129 | "kernelspec": { 130 | "display_name": "Python 3.8.13", 131 | "language": "python", 132 | "name": "python3" 133 | }, 134 | "language_info": { 135 | "codemirror_mode": { 136 | "name": "ipython", 137 | "version": 3 138 | }, 139 | "file_extension": ".py", 140 | "mimetype": "text/x-python", 141 | "name": "python", 142 | "nbconvert_exporter": "python", 143 | "pygments_lexer": "ipython3", 144 | "version": "3.8.13" 145 | }, 146 | "orig_nbformat": 4, 147 | "vscode": { 148 | "interpreter": { 149 | "hash": "d94be9d2cbc472181826ec82d481f764c09292b938a47daff0fd759a1975e02c" 150 | } 151 | } 152 | }, 153 | "nbformat": 4, 154 | "nbformat_minor": 2 155 | } 156 | -------------------------------------------------------------------------------- /CARTO/simnet/lib/net/models/layers/residual_blocks.py: -------------------------------------------------------------------------------- 1 | # Copyright 2018-2020 Toyota Research Institute. All rights reserved. 2 | # 3 | # Originally from Koichiro Yamaguchi's pixwislab repo mirrored at: 4 | # https://github.awsinternal.tri.global/driving/pixwislab 5 | 6 | import torch.nn as nn 7 | 8 | 9 | def resnet_shortcut(in_channels, out_channels, stride, preact=False): 10 | """Shortcut layer for residual block. 11 | 12 | When the numbers of input and output channels are the same and stride is 13 | equal to 1, no layer is made. 14 | 15 | Args: 16 | in_channels (int): The number of input channels. 17 | out_channels (int): The number of output channels. 18 | stride (int): Stride of the residual block. 19 | preact (bool, optional): If True, make a shortcut for pre-activation 20 | residual block. 21 | 22 | Returns: 23 | Module of shortcut layers. 24 | """ 25 | if stride == 1 and in_channels == out_channels: 26 | return None 27 | 28 | if preact: 29 | return nn.Conv2d( 30 | in_channels, out_channels, kernel_size=1, stride=stride, bias=False 31 | ) 32 | else: 33 | return nn.Sequential( 34 | nn.Conv2d( 35 | in_channels, out_channels, kernel_size=1, stride=stride, bias=False 36 | ), 37 | nn.BatchNorm2d(out_channels), 38 | ) 39 | 40 | 41 | class ResidualBlock(nn.Module): 42 | """Base class for residual block.""" 43 | 44 | @classmethod 45 | def expansion(cls): 46 | """Expansion rate.""" 47 | raise NotImplementedError 48 | 49 | @classmethod 50 | def preact(cls): 51 | """Pre-activation flag.""" 52 | raise NotImplementedError 53 | 54 | 55 | class PreactBasicResidualBlock(ResidualBlock): 56 | """Pre-activation basic residual block.""" 57 | 58 | def __init__( 59 | self, 60 | in_channels, 61 | base_channels, 62 | stride=1, 63 | dilation_rate=1, 64 | add_preact=True, 65 | add_last_norm=False, 66 | ): 67 | """ 68 | Args: 69 | in_channels (int): The number of input channels. 70 | base_channels (int): The number of output channels. 71 | stride (int, optional): Stride of the residual block. 72 | dilation_rate (int, optional): Dilation rate of the residual block. 73 | add_preact (bool, optional): If True, add pre-activation. 74 | add_last_norm (bool, optional): If True, add batch normalization 75 | after the last convolution. 76 | """ 77 | super().__init__() 78 | if add_preact: 79 | self.preact_bn = nn.BatchNorm2d(in_channels) 80 | else: 81 | self.preact_bn = None 82 | self.conv_shortcut = resnet_shortcut( 83 | in_channels, base_channels, stride, preact=True 84 | ) 85 | self.conv1 = nn.Conv2d( 86 | in_channels, 87 | base_channels, 88 | kernel_size=3, 89 | stride=stride, 90 | padding=dilation_rate, 91 | dilation=dilation_rate, 92 | bias=False, 93 | ) 94 | self.bn1 = nn.BatchNorm2d(base_channels) 95 | self.relu = nn.ReLU(inplace=True) 96 | self.conv2 = nn.Conv2d( 97 | base_channels, 98 | base_channels, 99 | kernel_size=3, 100 | padding=dilation_rate, 101 | dilation=dilation_rate, 102 | bias=False, 103 | ) 104 | self.bn_last = nn.BatchNorm2d(base_channels) if add_last_norm else None 105 | 106 | @classmethod 107 | def expansion(cls): 108 | """Expansion rate, which is a ratio of the number of the output 109 | channels to the number of the base channels in the residual block. 110 | 111 | Returns: 112 | Expansion rate (= 1). 113 | """ 114 | return 1 115 | 116 | @classmethod 117 | def preact(cls): 118 | """Pre-activation flag. 119 | 120 | Returns: 121 | Flag (= True). 122 | """ 123 | return True 124 | 125 | def forward(self, inputs): 126 | """Forward computation. 127 | 128 | Args: 129 | inputs (Tensor): Input tensor. 130 | 131 | Returns: 132 | Output tensor. 133 | """ 134 | if self.conv_shortcut is None: 135 | shortcut_inputs = inputs 136 | else: 137 | shortcut_inputs = self.conv_shortcut(inputs) 138 | 139 | if self.preact_bn is not None: 140 | inputs = self.relu(self.preact_bn(inputs)) 141 | outputs = self.relu(self.bn1(self.conv1(inputs))) 142 | outputs = self.conv2(outputs) 143 | 144 | outputs += shortcut_inputs 145 | 146 | if self.bn_last is not None: 147 | outputs = self.relu(self.bn_last(outputs)) 148 | return outputs 149 | 150 | 151 | def preact_resnet_group( 152 | block_func, in_channels, base_channels, num_blocks, stride=1, dilation_rate=1 153 | ): 154 | """Make a group of pre-activation residual blocks. 155 | 156 | Args: 157 | block_func (ResidualBlock): Function of a residual block. 158 | in_channels (int): The number of input channels. 159 | base_channels (int): The number of base channels of the residual block. 160 | num_blocks (int): The number of residual blocks. 161 | stride (int, optional): Stride of the first residual block. 162 | dilation_rate (int, optional): Dilation rate of residual blocks. 163 | 164 | Returns: 165 | Module of a group of residual blocks. 166 | """ 167 | assert block_func.preact() 168 | 169 | residual_blocks = [ 170 | block_func( 171 | in_channels, 172 | base_channels, 173 | stride=stride, 174 | dilation_rate=dilation_rate, 175 | add_preact=False, 176 | ) 177 | ] 178 | in_channels = block_func.expansion() * base_channels 179 | for idx in range(1, num_blocks): 180 | residual_blocks.append( 181 | block_func( 182 | in_channels, 183 | base_channels, 184 | dilation_rate=dilation_rate, 185 | add_preact=True, 186 | add_last_norm=idx == num_blocks - 1, 187 | ) 188 | ) 189 | return nn.Sequential(*residual_blocks) 190 | -------------------------------------------------------------------------------- /CARTO/Decoder/loss.py: -------------------------------------------------------------------------------- 1 | from collections import defaultdict 2 | from typing import Dict, List, Union, Any 3 | import itertools 4 | import numpy as np 5 | 6 | import urdfpy 7 | import torch 8 | from CARTO.Decoder import utils 9 | 10 | 11 | def articulation_similarity( 12 | A: Dict[str, float], 13 | A_def: Dict[str, Any], 14 | B: Dict[str, float], 15 | B_def: Dict[str, Any], 16 | max_values: Dict[str, float] = defaultdict(lambda: 1), 17 | ) -> float: 18 | """ 19 | Calculates the similarity between two two-level kinematic trees including their joint state 20 | Make sure the joint states are in a canonical state! 21 | """ 22 | # TODO Nick: For now it's very simple 23 | # --> Only one joint 24 | assert len(A) == 1 and len(B) == 1 25 | 26 | joint_id_A = list(A.keys())[0] 27 | joint_id_B = list(B.keys())[0] 28 | 29 | sim: float 30 | if A_def[joint_id_A]["type"] == B_def[joint_id_B]["type"]: 31 | joint_state_A = A[joint_id_A] 32 | joint_state_B = B[joint_id_B] 33 | max_joint_state = max_values[A_def[joint_id_A]["type"]] 34 | # print(max_joint_state) 35 | # L1 36 | # dist = np.abs(joint_state_A - joint_state_B) 37 | # L2 38 | dist = ((joint_state_A - joint_state_B) / max_joint_state) ** 2 39 | sim = utils.exp_kernel(dist) 40 | # sim = utils.distance_to_sim(dist) 41 | # sim = utils.gauss_kernel(dist) 42 | else: 43 | sim = 0.0 44 | 45 | return sim 46 | 47 | 48 | def get_articulation_similarity_matrix( 49 | joint_configs: List[Dict[str, float]], joint_definitions: List[Dict[str, Any]] 50 | ): 51 | """ 52 | Returns a matrix of size NxN given a list of N joint config dicts 53 | """ 54 | # Get max values 55 | # max_values = utils.AccumulatorDict(accumulator=max) 56 | # for joint_config, joint_def in zip(joint_configs, joint_definitions): 57 | # joint_id = list(joint_config.keys())[0] 58 | # max_values.increment(joint_def[joint_id]["type"], joint_config[joint_id]) 59 | max_values = {"prismatic": 0.5, "revolute": 3 / 2 * np.pi} 60 | 61 | sim_matrix = torch.tensor( 62 | [ 63 | [ 64 | articulation_similarity( 65 | joint_config_i, 66 | joint_def_i, 67 | joint_config_j, 68 | joint_def_j, 69 | max_values=max_values, 70 | ) 71 | for joint_config_i, joint_def_i in zip(joint_configs, joint_definitions) 72 | ] 73 | for joint_config_j, joint_def_j in zip(joint_configs, joint_definitions) 74 | ] 75 | ) 76 | return sim_matrix 77 | 78 | 79 | class JointSimLoss(torch.nn.Module): 80 | def __init__(self, joint_config_sim_matrix): 81 | super(JointSimLoss, self).__init__() 82 | self.joint_config_sim_matrix = joint_config_sim_matrix 83 | 84 | def forward(self, embedding_matrix): 85 | """ 86 | Calculate the distance loss 87 | """ 88 | # Different sim/distance metrics 89 | # https://elar.urfu.ru/bitstream/10995/3713/2/RuSSIR_2011_07.pdf 90 | # http://dep805.ru/about/sologub/russir2011poster.pdf 91 | # embedding_sim = utils.self_cosine_similarity(joint_config_embedding.weight) 92 | # Not working great --> Bug? 93 | 94 | # embedding_sim = utils.distance_to_sim( 95 | # utils.self_manhattan_distance(joint_config_embedding.weight) 96 | # ) 97 | 98 | # embedding_sim = utils.distance_to_sim( 99 | # utils.self_euclidean_distance(joint_config_embedding.weight) 100 | # ) 101 | 102 | # embedding_sim = torch.exp(-utils.self_manhattan_distance(joint_config_embedding.weight)) 103 | 104 | embedding_sim = torch.exp(-utils.self_euclidean_distance(embedding_matrix)) 105 | joint_config_embedding_loss = torch.nn.functional.l1_loss( 106 | embedding_sim, self.joint_config_sim_matrix 107 | ) 108 | # joint_config_embedding_loss = joint_config_embedding_loss / (embedding_matrix.size()[0]**2) 109 | return joint_config_embedding_loss 110 | 111 | 112 | class JointClassificationLoss(torch.nn.Module): 113 | def __init__(self, multi_class: float = 1.0, multi_state: float = 1.0): 114 | super(JointClassificationLoss, self).__init__() 115 | self.multi_class = multi_class 116 | self.multi_state = multi_state 117 | 118 | def forward( 119 | self, 120 | gt_joint_configs: List[Dict[str, float]], 121 | gt_joint_definitions: List[Dict[str, Any]], 122 | pred_vector: Dict[str, torch.Tensor], 123 | ): 124 | """ 125 | Assumes gt_joint_configs is in zerod state! 126 | """ 127 | assert len(gt_joint_configs[0]) == 1 128 | 129 | # Extract pred 130 | pred_types_one_hot: torch.Tensor = pred_vector["type"] 131 | pred_joint_states: torch.Tensor = pred_vector["state"] 132 | 133 | # Extract GT from batch 134 | gt_types, joint_values = utils.extract_type_and_value( 135 | gt_joint_definitions, gt_joint_configs 136 | ) 137 | gt_types_index = utils.encode_joint_types(gt_types).to( 138 | pred_types_one_hot.device 139 | ) 140 | gt_joint_states = ( 141 | torch.Tensor(joint_values).to(pred_joint_states.device).unsqueeze(-1) 142 | ) 143 | 144 | class_loss = torch.nn.functional.cross_entropy( 145 | pred_types_one_hot, gt_types_index 146 | ) 147 | state_loss = torch.nn.functional.mse_loss(pred_joint_states, gt_joint_states) 148 | 149 | return self.multi_class * class_loss + self.multi_state * state_loss, { 150 | "class": class_loss.item(), 151 | "state": state_loss.item(), 152 | } 153 | 154 | 155 | class JointZeroOneLoss(torch.nn.Module): 156 | def __init__(self): 157 | super(JointZeroOneLoss, self).__init__() 158 | 159 | def forward( 160 | self, 161 | gt_joint_configs: List[Dict[str, float]], 162 | gt_joint_definitions: List[Dict[str, Any]], 163 | pred_vector: Dict[str, torch.Tensor], 164 | ): 165 | """ 166 | Assumes gt_joint_configs is in zerod state! 167 | """ 168 | assert len(gt_joint_configs[0]) == 1 169 | 170 | pred_zero_one = pred_vector["state"] 171 | gt_zero_one = ( 172 | torch.Tensor( 173 | utils.extract_zero_one_in_limits(gt_joint_definitions, gt_joint_configs) 174 | ) 175 | .to(pred_zero_one.device) 176 | .unsqueeze(-1) 177 | ) 178 | 179 | loss = torch.nn.functional.mse_loss(pred_zero_one, gt_zero_one) 180 | # print(f"{pred_zero_one}\n{gt_zero_one}") 181 | return loss, {"zero_one_loss": loss.item()} 182 | -------------------------------------------------------------------------------- /datasets/decoder/id_lists/All_Real_Categories.txt: -------------------------------------------------------------------------------- 1 | 187d79cd04b2bdfddf3a1b0d597ce76e 2 | 496dcf99-6e76-480c-8fab-a5579f16f2c7 3 | 4e9832bbbb077f9c5c5adfeaec1397f 4 | 503b4dff71b404dabf195d81040cc60 5 | 55b0f47aea128c3b91d8be9599fbaa1f 6 | 5d17e90f512a3dc7df3a1b0d597ce76e 7 | 66725b8cad4355a03735baeeeb56a00 8 | 6e51cc2c2da50c6a59c5c7ba83ec931a 9 | 795af925dfc8897b035d20a1a3ca345 10 | 7d19e1db73ebfee26f893b5bc716a3fa 11 | 9112f0ee6b1cdf5082ec48ff3a4fe07c 12 | 93b7c0394cc309c8df3a1b0d597ce76e 13 | a238b87f02c5de1edf3a1b0d597ce76e 14 | a2caaa68364f6207f054969eeb39ff86 15 | a377f5af14ac6710a168e247bb97e471 16 | a62b6a19d2093bc91cbd656f2f1bc2ff 17 | aa4ad2f41efb815cb022c94235bc8601 18 | af913c310f1b978ae6488a574e8954a5 19 | b1080bd937b04a44575f4e5007488531 20 | c5f76c9a4137a3563862b05b9038dcc 21 | c6090fb2806b2abfa5f4a1f264741b67 22 | cc8161b35f7bef958c88d30f502a452 23 | d95f6ea8-cda0-4d59-aa49-11309e3f0ce3 24 | 125c93cbc6544bd1f9f50a550b8c1cce 25 | 1b67b4bfed6688ba5b22feddf58c05e1 26 | 1f507b26c31ae69be42930af58a36dce 27 | 241ec8a746dd1cfc78f71a335ebabfa5 28 | 29f5cfcef7272f1f640578ae55230ebc 29 | 3b2db36aaa2546b99c7c402f274622c 30 | 4bacb1694e86005afb6e846333373df8 31 | 4fc3d56243d2d8801ef1ccfaf50f2048 32 | 5678a2173ff575d09cebe817bc1591b3 33 | 5d544ee4b094c6606436916a86a90ed7 34 | 6489453e322cdb53f9f3c6290096f50f 35 | 66e3b7c7f2e8e9297fd8853234f5e918 36 | 6b78948484df58cdc664c3d4e2d59341 37 | 7df09674bc991904c78df40cf2e9097a 38 | 850673bcbce8f73ec8a6d87a62ac0341 39 | 8d70fb6adc63e21eb7e0383b9609fa5 40 | 97e94d800fd6dc07dbaa6d42a4980930 41 | a4b410734514306ac401e233323032d6 42 | aa92ecd31491bca87a88a2ad67bfd073 43 | afa49e97861c45e5e738f481f8560d58 44 | b5f6fd84a3f44ddb1aa47689117a61e1 45 | cbcb79f534518dfbcfe78be5b7b99c8d 46 | cc691d9e8e189ce47a381a112bfd785 47 | f53ea19f871a80d420685b5a7e34b501 48 | f7c26b8c94ba8214397c35f585745a82 49 | 42aac49442bb9f8bb4e3935c6cee4b35 50 | 4f956e259344d4a3599fb6902c958d23 51 | 6d83dea57df3c4a3500158c23c4c5a8e 52 | 87bae84777fe8b702bac1bcdfc2402d2 53 | 891f65c773939191c834958aed613724 54 | 95bc6fb98624ea3229d75ea275a1cb4e 55 | bdb10a17b04e2adbb7fb7f3ae74b618c 56 | c3bb5f3c842a6c2d178e7d331e641179 57 | c75ebd7c340649ba5ad304c2564ae1df 58 | dc5c91c8c01b1c8c506c648223cdabe9 59 | df5bd51614d2fbdef114be17e2e7c4b5 60 | f9544effad178100be92f74d81ff60bf 61 | b9f1eeea355194c19941e769880462e7 62 | 3ea1ace396f6ccae48407a54b1fbfda8 63 | 8c2491e5245804d1ffc6e457221b9271 64 | a46e0c10f17f928ba2bc8d1e386113dc 65 | b296fbfbbe5dccf09c12d6260da9ac2b 66 | b8cf469bc1b42ab64a44340bf227e40 67 | bae2babb26dc352b20489998d734835a 68 | bb5533538179f6c39209092a6c03f1bd 69 | ef97ff5c1d6a00f2a760e402290727de 70 | eff23594cc0aed121b3e6b75a323070-0 71 | 1515a188cbc382fa84ad27a2f1142330 72 | 3158fd17e409d38a732208e596b26ebc 73 | 4d8d0cb708324170c98c13d6112727de 74 | 58c878d494ecbbd62835d3f06aeb6e0 75 | 5b81d7830eabb7547c6e1fb05e1b9037 76 | 6601ef650f03e000c49931aa7ca8fecb 77 | 6fb955194baf07a750a5eaedf6275e1b 78 | 7028b24b7d64efaf3194539af1047dcf 79 | 827c9a85df258dd8faf0b97ff18d3546 80 | 93d69af3c0034d3d9807c66948157e66 81 | 9e53ec8bedae98859807c66948157e66 82 | ad6bd7e24e5bc25f3593835fe348a036 83 | bc82358ed0ee28d41277c98ea0908b0 84 | 20edff7e1500fc4ed45f502ecff9e44f 85 | 21227197948fd2857c2f94a943a8669b 86 | 28001cb70c38f19cf32b6091d9628440 87 | 299ff1bf2618a4b584b039efed4b32d7 88 | 29f110b8740bd8068c427edcde5d5e2b 89 | 2dc57230d14506eacd6ce29440b718cf 90 | 31c090b23f52bb61208c8c852ee795bc 91 | 415d7746f792eb1de0445fc6d980dd5c 92 | 48045af90c7959e5738e43095496b061 93 | 4aab0e569f1dc3bc8d7e9f13fd8f661d 94 | 4dc3e9e293450817d3dad974dc098fa1 95 | 70d0937e1d38a9c2a45b742ddc5add59 96 | 712d2c844d61aa9cefead98a255f706f 97 | 74b8222078ba776c661673811de66400 98 | 78c4b505894342269299936b751bd77b 99 | 7b5b7bfa8580e913e2580b23e60e4674 100 | 949e39403ab4fab37ade8e3ca8db8db3 101 | 9dd80e356880c9deaf268f6180933aa3 102 | 9e42bbdbfe36680391e4d6c585a697a 103 | a19e6780182c72cf9bf8bea04806ba15 104 | a516711827a396085528d560ddea455 105 | a95828fa4607295674c8eb7e4d6198a5 106 | c9857deb88989a67b5851007eadc6f74 107 | e64f3681d7c76bb743638dabe1eb5336 108 | 04569f2f-3e07-4655-9337-bfa41a5ccbc0 109 | 0d31000f-e876-4751-876d-efa6a61fa9b2 110 | 265d042dcfed6f15c357c21161963e89 111 | 4163de2ce7f6f59aed1d8381d2c075c2-0 112 | 5528ee64-7656-40e4-8db0-70fd39427b4d 113 | 62e22f4d1846d8c1fdc6c1669e5c540 114 | 8b04de89-4f3f-45d8-8d7a-6bb5958e5340 115 | d87cf480-ba57-43b1-b1f2-bae2b8fe2fa4 116 | u094c89ee-d9f6-4266-a9b3-c1f2549b1105 117 | ucfaedfea-c15a-495c-9037-21108eeeb006 118 | 12f3efd9-f013-4aab-922c-0328502acd3f 119 | 19dff8164764e2a259f37b6e82c5e93 120 | 23fd9817d509fe472bf266a8f0187ce5-0 121 | 23fd9817d509fe472bf266a8f0187ce5-1 122 | 3181976321565dfee9027543872faef 123 | 31f86223e3faaec3eae5cab1248d1ec6-0 124 | 31f86223e3faaec3eae5cab1248d1ec6-1 125 | 32036cc5-6e63-47cf-96ba-89ef2be3950e-0 126 | 35233c0d786b5956d25d105fdf500c48 127 | 35c3d7b9-7dec-4e66-a962-14ea0fde4cad 128 | 581ad58ce8664d2d4ff0e6230d32c1e3 129 | 59481570acb7a0872d4ba5e1aa44cc40-0 130 | 5d6201caa12611fe89f4664416242a41 131 | 737fd576f8eae54adfb1b24fd658f3b5-0 132 | 75bfa1045150e49fe177ccfa080b14b0-0 133 | 96a7c39f7eb90f65c90183d47cf3c337-1 134 | 9f264c87-89e3-4b06-8f36-b618ec54694c 135 | a683ed081504a35e4a9a3a0b87d50a92 136 | b4f8a49abc400a775d6ddb389935ee57 137 | ba60dc6d-526e-4014-961a-5049df9079c6-0 138 | c7a96262d5dfc1ae72c447ef6e5cffc2 139 | ceb3b39c9a035752b4fc059d1d10ec5d-0 140 | e7523e396f8d4ae171e397fe45dce6b 141 | e9d3d9ef-57e3-4f0a-bbc7-e1cc75947ccd-7 142 | fca703c2489237d51b44a9962207f944 143 | fe95df61cc16452ccb3316c0fb4cfa01-0 144 | fed0863a69b3744c44f6844c4f2ce888-0 145 | u2bba3644-e88e-4650-9124-e9964702f9ef-0 146 | u51509034-d4f7-4ef7-b014-6660f4df034d-0 147 | ud489e3ab-3fac-4753-8373-f5d4cebaeec5 148 | 20c5096ea98cc955920de219c00d1c3b 149 | 376eb047b40ef4f6a480e3d8fdbd4a92 150 | 37b40b7e9290c0a330314ffb9bb887b5 151 | 3800d2ab6bc278bcd5a3e6010c55b78e 152 | 453034dc-b04a-4415-8c43-16d6d23c47b2 153 | 58a427e5201aa43be00ace8e7c1a5eeb 154 | 6a030b1836586b9f7e1c85c5c15da7fb 155 | 72a2bd9428f7179357fcd7a97096d25 156 | 88ac7b2b3050f1f861f7b52424be58ab 157 | 8c34afa29665356013b1d3e1528f0506-0 158 | 8d152be34b41785677937146265c551a 159 | 8f54f0bec8eb5d35d25169d37940fb64 160 | 98bc3afca001f433a1702a37604ec6f 161 | b3188e51216de8cce2e4961161b75547 162 | c16cba81-714d-4b1a-94cd-7a148af83db0 163 | d01ff66659767d50cee19268a161fc4a 164 | d9378f9a4a7d6514602a101aa41a6f48 165 | dc2cda7d-6fd5-48dd-8f7e-7524d7eb1c0a 166 | f39912a4f0516fb897371d1e7cc637f3 167 | f636f0aa2025ba3923c841f9d5051936 168 | u26949e8f-8139-485b-99f9-694c026ed5a6 169 | u9ea1219b-e360-4351-ae52-f589989c58e3-0 170 | ue12a29d7-6d30-4159-ac11-3c6a058ad354 171 | 1af4a1dfa4f94cd44da5feafe6f1c8fc 172 | 21ae39cf6ba8557f4da5feafe6f1c8fc 173 | 2950d1baed4dbd78c59350d819542ec7 174 | 33ec57af7f648994da5feafe6f1c8fc 175 | 606d50b144d8ca164da5feafe6f1c8fc 176 | 82c05fe4890a7f5112de5317fe5b354f 177 | d69d9de0c79ac6a9c59350d819542ec7 178 | 10c14b0cb76f87584da5feafe6f1c8fc 179 | 1548461b13adc0d0c59350d819542ec7 180 | 170be0087472182bc59350d819542ec7 181 | 17d25c26485edcf94da5feafe6f1c8fc 182 | 198cbe57b01bad9dc59350d819542ec7 183 | 19c79a42f68d7d444da5feafe6f1c8fc 184 | 1caaaa5c1da4dd2dc59350d819542ec7 185 | 1fc8231114fa42a7c59350d819542ec7 186 | 2862558059dd584c59350d819542ec7 187 | -------------------------------------------------------------------------------- /CARTO/Encoder/net_train.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | os.environ["PYTHONHASHSEED"] = str(1) 4 | 5 | # Allow the sentence tokenizer to be run in parallel. 6 | os.environ["TOKENIZERS_PARALLELISM"] = "true" 7 | 8 | import argparse 9 | import json 10 | import pathlib 11 | import random 12 | import sys 13 | from importlib.machinery import SourceFileLoader 14 | 15 | import cv2 16 | import IPython 17 | 18 | # To ensure mesh_to_sdf is imported before pyrender 19 | import mesh_to_sdf 20 | import numpy as np 21 | import pytorch_lightning as pl 22 | import torch 23 | import wandb 24 | from typing import Optional 25 | from pytorch_lightning import loggers 26 | from pytorch_lightning.callbacks import ModelCheckpoint 27 | from CARTO.app.panoptic_tidying import tidy_classes 28 | from CARTO.lib import camera, datapoint 29 | from CARTO.lib.datapoint import Panoptic 30 | from CARTO.lib.net import common 31 | from CARTO.lib.net.data_module import DataModule 32 | from CARTO.lib.net.dataset import PanopticOutputs 33 | from CARTO.lib.net.panoptic_trainer import PanopticModel 34 | from CARTO.lib.net.post_processing.eval3d import Eval3d 35 | from CARTO.lib.shapenet_utils import NOCS_CATEGORIES 36 | from CARTO.lib import partnet_mobility 37 | 38 | # ./runner.sh simnet/app/panoptic_category_reconstruction/net_train.py @simnet/app/panoptic_category_reconstruction/net_config_overfit.txt 39 | 40 | _GPU_TO_USE = 0 41 | 42 | 43 | def set_seed(seed: Optional[int]): 44 | if seed is None: 45 | return 46 | 47 | random.seed(seed) 48 | np.random.seed(seed) 49 | torch.manual_seed(seed) 50 | 51 | 52 | class EvalMethod: 53 | def __init__(self, hparams, log_prefix="val"): 54 | assert log_prefix == "val" or log_prefix == "test" 55 | 56 | self.objects_eval_3d = Eval3d() 57 | self.doors_eval_3d = Eval3d() 58 | self.handholds_eval_3d = Eval3d() 59 | self.camera_model = camera.ZED2Camera1080p() 60 | self.log_prefix = log_prefix 61 | 62 | def process_sample( 63 | self, panoptic_outputs: PanopticOutputs, panoptic_targets: Panoptic 64 | ): 65 | batch_size = len(panoptic_targets.val_data) 66 | 67 | for i in range(batch_size): 68 | val_data = panoptic_targets.val_data[i] 69 | if val_data.scene_name == "unlabeled_data": 70 | continue 71 | 72 | ## Compute detections 73 | if len(panoptic_outputs.cabinet_door_obbs) > 0: 74 | door_detections = panoptic_outputs.cabinet_door_obbs[0].get_detections( 75 | i, 76 | camera_model=self.camera_model, 77 | class_list=val_data.door_class_ids, 78 | ) 79 | self.doors_eval_3d.process_sample( 80 | door_detections, val_data.door_detections, val_data.scene_name 81 | ) 82 | 83 | if len(panoptic_outputs.graspable_objects_obbs) > 0: 84 | objects_detections = panoptic_outputs.graspable_objects_obbs[ 85 | 0 86 | ].get_detections( 87 | i, 88 | camera_model=self.camera_model, 89 | class_list=val_data.object_class_ids, 90 | ) 91 | self.objects_eval_3d.process_sample( 92 | objects_detections, val_data.object_detections, val_data.scene_name 93 | ) 94 | 95 | if len(panoptic_outputs.handhold_obbs) > 0: 96 | handhold_detections = panoptic_outputs.handhold_obbs[0].get_detections( 97 | i, camera_model=self.camera_model 98 | ) 99 | self.handholds_eval_3d.process_sample( 100 | handhold_detections, 101 | val_data.handhold_detections, 102 | val_data.scene_name, 103 | ) 104 | 105 | def process_all_dataset(self, log): 106 | log[ 107 | self.log_prefix + "/objects 3Dmap" 108 | ] = self.objects_eval_3d.process_all_3D_dataset() 109 | log[ 110 | self.log_prefix + "/cabinet 3Dmap" 111 | ] = self.doors_eval_3d.process_all_3D_dataset() 112 | log[ 113 | self.log_prefix + "/handhold 3Dmap" 114 | ] = self.handholds_eval_3d.process_all_3D_dataset() 115 | log[ 116 | self.log_prefix + "/object_class_accuracy" 117 | ] = self.objects_eval_3d.process_category_accuracy() 118 | log[ 119 | self.log_prefix + "/door_class_accuracy" 120 | ] = self.doors_eval_3d.process_category_accuracy() 121 | 122 | def reset(self): 123 | self.objects_eval_3d = Eval3d() 124 | self.doors_eval_3d = Eval3d() 125 | self.handholds_eval_3d = Eval3d() 126 | 127 | 128 | if __name__ == "__main__": 129 | print("WARNING -- This was not tested for the code release -- WARNING") 130 | parser = argparse.ArgumentParser(fromfile_prefix_chars="@") 131 | common.add_train_args(parser) 132 | hparams = parser.parse_args() 133 | set_seed(hparams.seed) 134 | categories = [ 135 | "Dishwasher", 136 | "Knife", 137 | "Laptop", 138 | "Microwave", 139 | "Oven", 140 | "Refrigerator", 141 | "Stapler", 142 | "StorageFurniture", 143 | "Table", 144 | "WashingMachine", 145 | ] 146 | hparams.object_categories = [ 147 | partnet_mobility.partnet_mobility_db[object_cat] for object_cat in categories 148 | ] 149 | 150 | train_ds = datapoint.make_dataset(hparams.train_path) 151 | samples_per_epoch = len(train_ds.list()) 152 | samples_per_step = hparams.train_batch_size 153 | steps = hparams.max_steps 154 | # max to allow overfitting for a single example 155 | steps_per_epoch = max(samples_per_epoch // samples_per_step, 1) 156 | epochs = int(np.ceil(steps / steps_per_epoch)) 157 | actual_steps = epochs * steps_per_epoch 158 | print(f"{epochs = } {samples_per_epoch = } {actual_steps = }") 159 | model = PanopticModel( 160 | hparams, epochs, EvalMethod(hparams, "val"), EvalMethod(hparams, "test") 161 | ) 162 | data_module = DataModule(hparams, train_ds) 163 | model_checkpoint = ModelCheckpoint( 164 | # save_top_k=-1, # -1 Saves all models --> deactivate to save some space 165 | every_n_epochs=1, 166 | mode="max", # Does not do anything as we do not have monitor= set (--> saves latest) 167 | ) 168 | if hparams.wandb_name is not None: 169 | logger = loggers.WandbLogger(name=hparams.wandb_name, project="arti2real") 170 | else: 171 | logger = loggers.TensorBoardLogger(save_dir=hparams.output) 172 | # Mixed precision training uses 16-bit precision floats, otherwise use 32-bit floats. 173 | precision = 16 if hparams.use_amp else 32 174 | 175 | trainer = pl.Trainer( 176 | max_epochs=epochs, 177 | gpus=[_GPU_TO_USE], 178 | callbacks=[model_checkpoint], 179 | val_check_interval=hparams.val_check_interval, 180 | limit_val_batches=hparams.limit_val_batches, 181 | limit_test_batches=hparams.limit_test_batches, 182 | logger=logger, 183 | default_root_dir=hparams.output, 184 | precision=precision, 185 | ) 186 | trainer.fit(model, data_module) 187 | if hparams.test_path is not None: 188 | trainer.test(model, data_module) 189 | -------------------------------------------------------------------------------- /CARTO/simnet/lib/net/pre_processing/obb_inputs.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from scipy.stats import multivariate_normal 3 | 4 | from CARTO.simnet.lib.net.post_processing import epnp 5 | from CARTO.simnet.lib.net.pre_processing import pose_inputs 6 | from CARTO.simnet.lib import datapoint 7 | 8 | _HEATMAP_THRESHOLD = 0.3 9 | _DOWNSCALE_VALUE = 8 10 | _PEAK_CONCENTRATION = 0.8 11 | 12 | # def compute_network_targets(obbs, masks, height, width, camera_model, class_index_list=None): 13 | # assert len(obbs) == len(masks) 14 | # if len(obbs) == 0: 15 | # height_d = int(height / _DOWNSCALE_VALUE) 16 | # width_d = int(width / _DOWNSCALE_VALUE) 17 | # return datapoint.OBB( 18 | # heat_map=np.zeros([height, width]), 19 | # vertex_target=np.zeros([height_d, width_d, 16]), 20 | # cov_matrices=np.zeros([height_d, width_d, 6]), 21 | # z_centroid=np.zeros([height_d, width_d]), 22 | # classes=np.zeros([height_d, width_d]) 23 | # ) 24 | # heatmaps = pose_inputs.compute_heatmaps_from_masks(masks) 25 | # vertex_target = pose_inputs.compute_vertex_field(obbs, heatmaps, camera_model) 26 | # z_centroid = pose_inputs.compute_z_centroid_field(obbs, heatmaps) 27 | # cov_matrix = compute_rotation_field(obbs, heatmaps) 28 | # class_target = None 29 | # if class_index_list is not None: 30 | # class_target = compute_class_field(obbs, class_index_list, heatmaps) 31 | # return datapoint.OBB( 32 | # heat_map=np.max(heatmaps, axis=0), 33 | # vertex_target=vertex_target, 34 | # cov_matrices=cov_matrix, 35 | # z_centroid=z_centroid, 36 | # classes=class_target 37 | # ) 38 | 39 | 40 | ## Extended Targers to include the pose + latent emb 41 | def compute_network_targets( 42 | obbs, 43 | masks, 44 | shape_code, 45 | arti_code, 46 | poses, 47 | height, 48 | width, 49 | camera_model, 50 | class_index_list=None, 51 | shape_emb_size=32, 52 | arti_emb_size=16, 53 | ): 54 | assert len(obbs) == len(masks) 55 | if len(obbs) == 0: 56 | height_d = int(height / _DOWNSCALE_VALUE) 57 | width_d = int(width / _DOWNSCALE_VALUE) 58 | return datapoint.OBB( 59 | heat_map=np.zeros([height, width]), 60 | vertex_target=np.zeros([height_d, width_d, 16]), 61 | cov_matrices=np.zeros([height_d, width_d, 6]), 62 | z_centroid=np.zeros([height_d, width_d]), 63 | shape_emb=np.zeros([height_d, width_d, shape_emb_size]), 64 | arti_emb=np.zeros([height_d, width_d, arti_emb_size]), 65 | abs_pose=np.zeros([height_d, width_d, 13]), 66 | ) 67 | heatmaps = pose_inputs.compute_heatmaps_from_masks(masks) 68 | vertex_target = pose_inputs.compute_vertex_field(obbs, heatmaps, camera_model) 69 | z_centroid = pose_inputs.compute_z_centroid_field(obbs, heatmaps) 70 | cov_matrix = compute_rotation_field(obbs, heatmaps) 71 | shape_emb_target = compute_latent_emb( 72 | obbs, shape_code, heatmaps, embedding_size=shape_emb_size 73 | ) 74 | arti_emb_target = compute_latent_emb( 75 | obbs, arti_code, heatmaps, embedding_size=arti_emb_size 76 | ) 77 | abs_pose_target = compute_abspose_field(poses, heatmaps, camera_model) 78 | return datapoint.OBB( 79 | heat_map=np.max(heatmaps, axis=0), 80 | vertex_target=vertex_target, 81 | cov_matrices=cov_matrix, 82 | z_centroid=z_centroid, 83 | shape_emb=shape_emb_target, 84 | arti_emb=arti_emb_target, 85 | abs_pose=abs_pose_target, 86 | ) 87 | 88 | 89 | #### 90 | # How does it work? 91 | # The first dimension represents a layer for each obbs. 92 | # Data will be set in the according channels (last dimension) 93 | # As the remainders entries for this layer stay zero, we can sum over the 94 | # first dimension to get rid of it. 95 | # TODO Nick maybe refactor and use direct indexing to save time 96 | # class_target[mask] = class_values 97 | # etc.. 98 | #### 99 | def compute_class_field(obbs, class_index_list, heat_maps, threshold=0.3): 100 | class_target = np.zeros([len(obbs), heat_maps[0].shape[0], heat_maps[0].shape[1]]) 101 | heatmap_indices = np.argmax(np.array(heat_maps), axis=0) 102 | for obb, heat_map, ii in zip(obbs, heat_maps, range(len(heat_maps))): 103 | mask = heatmap_indices == ii 104 | class_values = class_index_list.index(obb.category_name) 105 | class_target[ii, mask] = class_values 106 | return np.sum(class_target, axis=0)[::_DOWNSCALE_VALUE, ::_DOWNSCALE_VALUE] 107 | 108 | 109 | def compute_rotation_field(obbs, heat_maps, threshold=0.3): 110 | cov_target = np.zeros([len(obbs), heat_maps[0].shape[0], heat_maps[0].shape[1], 6]) 111 | heatmap_indices = np.argmax(np.array(heat_maps), axis=0) 112 | for obb, heat_map, ii in zip(obbs, heat_maps, range(len(heat_maps))): 113 | mask = heatmap_indices == ii 114 | cov_matrix = obb.cov_matrix 115 | cov_mat_values = np.array( 116 | [ 117 | cov_matrix[0, 0], 118 | cov_matrix[1, 1], 119 | cov_matrix[2, 2], 120 | cov_matrix[0, 1], 121 | cov_matrix[0, 2], 122 | cov_matrix[1, 2], 123 | ] 124 | ) 125 | cov_target[ii, mask] = cov_mat_values 126 | return np.sum(cov_target, axis=0)[::_DOWNSCALE_VALUE, ::_DOWNSCALE_VALUE] 127 | 128 | 129 | def compute_latent_emb(obbs, embeddings, heat_maps, embedding_size=1): 130 | """ 131 | Fills each pixel with the closest embedding code according to the heatmap 132 | """ 133 | latent_emb_target = np.zeros( 134 | [len(obbs), heat_maps[0].shape[0], heat_maps[0].shape[1], embedding_size] 135 | ) 136 | heatmap_indices = np.argmax(np.array(heat_maps), axis=0) 137 | for emb, ii in zip(embeddings, range(len(heat_maps))): 138 | mask = heatmap_indices == ii 139 | latent_emb_target[ii, mask] = emb 140 | return np.sum(latent_emb_target, axis=0)[::_DOWNSCALE_VALUE, ::_DOWNSCALE_VALUE] 141 | 142 | 143 | def compute_abspose_field(poses, heat_maps, camera_model, threshold=0.3): 144 | abs_pose_target = np.zeros( 145 | [len(poses), heat_maps[0].shape[0], heat_maps[0].shape[1], 13] 146 | ) 147 | heatmap_indices = np.argmax(np.array(heat_maps), axis=0) 148 | for pose, ii in zip(poses, range(len(heat_maps))): 149 | mask = heatmap_indices == ii 150 | actual_abs_pose = camera_model.RT_matrix @ pose.camera_T_object 151 | rotation_matrix = actual_abs_pose[:3, :3] 152 | translation_vector = actual_abs_pose[:3, 3] 153 | scale = pose.scale_matrix[0, 0] 154 | abs_pose_values = np.array( 155 | [ 156 | rotation_matrix[0, 0], 157 | rotation_matrix[0, 1], 158 | rotation_matrix[0, 2], 159 | rotation_matrix[1, 0], 160 | rotation_matrix[1, 1], 161 | rotation_matrix[1, 2], 162 | rotation_matrix[2, 0], 163 | rotation_matrix[2, 1], 164 | rotation_matrix[2, 2], 165 | translation_vector[0], 166 | translation_vector[1], 167 | translation_vector[2], 168 | scale, 169 | ] 170 | ) 171 | abs_pose_target[ii, mask] = abs_pose_values 172 | return np.sum(abs_pose_target, axis=0)[::_DOWNSCALE_VALUE, ::_DOWNSCALE_VALUE] 173 | -------------------------------------------------------------------------------- /CARTO/simnet/lib/net/post_processing/depth_outputs.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import cv2 3 | import IPython 4 | import torch 5 | import torch.nn as nn 6 | 7 | from CARTO.simnet.lib import datapoint 8 | from torch.nn import functional as F 9 | from CARTO.simnet.lib.net import losses 10 | 11 | _mse_loss = losses.MSELoss() 12 | 13 | 14 | class DepthOutput: 15 | def __init__(self, depth_pred, hparams): 16 | self.depth_pred = depth_pred 17 | self.is_numpy = False 18 | self.disp_loss = DisparityLoss(hparams.max_disparity, False) 19 | self.loss = nn.SmoothL1Loss(reduction="none") 20 | self.hparams = hparams 21 | 22 | # Converters for torch to numpy 23 | def convert_to_numpy_from_torch(self): 24 | self.depth_pred = np.ascontiguousarray(self.depth_pred.float().cpu().numpy()) 25 | self.is_numpy = True 26 | return self.depth_pred 27 | 28 | def convert_to_torch_from_numpy(self): 29 | self.depth_pred[self.depth_pred > self.hparams.max_disparity] = ( 30 | self.hparams.max_disparity - 1 31 | ) 32 | self.depth_pred = torch.from_numpy( 33 | np.ascontiguousarray(self.depth_pred) 34 | ).float() 35 | self.is_numpy = False 36 | return self.depth_pred 37 | 38 | def get_prediction(self, is_target: bool = False): 39 | if not self.is_numpy: 40 | self.convert_to_numpy_from_torch() 41 | if is_target: 42 | return self.depth_pred 43 | else: 44 | return self.depth_pred[0] 45 | 46 | def get_visualization_img( 47 | self, left_img_np, corner_scale=1, raw_disp=True, is_target: bool = False 48 | ): 49 | if not self.is_numpy: 50 | self.convert_to_numpy_from_torch() 51 | 52 | if is_target: 53 | disp = self.depth_pred 54 | else: 55 | disp = self.depth_pred[0] 56 | 57 | if raw_disp: 58 | return disp_map_visualize(disp, self.hparams.max_disparity) 59 | disp_scaled = disp[::corner_scale, ::corner_scale] 60 | left_img_np[ 61 | : disp_scaled.shape[0], -disp_scaled.shape[1] : 62 | ] = disp_map_visualize(disp_scaled, self.hparams.max_disparity) 63 | return left_img_np 64 | 65 | def compute_loss(self, depth_targets, log, name): 66 | if self.is_numpy: 67 | raise ValueError("Output is not in torch mode") 68 | depth_target_stacked = [] 69 | for depth_target in depth_targets: 70 | depth_target_stacked.append(depth_target.depth_pred) 71 | depth_target_batch = torch.stack(depth_target_stacked) 72 | depth_target_batch = depth_target_batch.to(torch.device("cuda:0")) 73 | depth_loss = self.disp_loss(self.depth_pred, depth_target_batch) 74 | log[name] = depth_loss.item() 75 | return self.hparams.loss_depth_mult * depth_loss 76 | 77 | 78 | class DisparityLoss(nn.Module): 79 | """Smooth L1-loss for disparity with check for valid ground truth""" 80 | 81 | def __init__(self, max_disparity, stdmean_scaled): 82 | super().__init__() 83 | 84 | self.max_disparity = max_disparity 85 | self.stdmean_scaled = stdmean_scaled 86 | self.loss = nn.SmoothL1Loss(reduction="none") 87 | 88 | def forward(self, disparity, disparity_gt, right=False, low_range_div=None): 89 | # Scale ground truth disparity based on output scale. 90 | scale_factor = disparity_gt.shape[2] // disparity.shape[2] 91 | disparity_gt = downsample_disparity(disparity_gt, scale_factor) 92 | max_disparity = self.max_disparity / scale_factor 93 | if low_range_div is not None: 94 | max_disparity /= low_range_div 95 | 96 | # with torch.no_grad(): 97 | # valid_mask = get_disparity_valid_mask(disparity_gt, max_disparity, right) 98 | 99 | batch_size, _, _ = disparity.shape 100 | loss = torch.zeros(1, dtype=disparity.dtype, device=disparity.device) 101 | 102 | # Not all batch elements may have ground truth for disparity, so we compute the loss for each batch element 103 | # individually. 104 | valid_count = 0 105 | for batch_idx in range(batch_size): 106 | if torch.sum(disparity_gt[batch_idx, :, :]) < 1e-3: 107 | continue 108 | 109 | single_loss = self.loss( 110 | disparity[batch_idx, :, :], disparity_gt[batch_idx, :, :] 111 | ) 112 | valid_count += 1 113 | 114 | if self.stdmean_scaled: 115 | # Scale loss by standard deviation and mean of ground truth to reduce influence of very high 116 | # disparities. 117 | gt_std, gt_mean = torch.std_mean(disparity_gt[batch_idx, :, :]) 118 | loss += torch.mean(single_loss) / (gt_mean + 2.0 * gt_std) 119 | else: 120 | # Scale loss by scale factor due to difference of expected magnitude of disparity at different scales. 121 | loss += torch.mean(single_loss) * scale_factor 122 | # Avoid potential divide by 0. 123 | if valid_count > 0: 124 | return loss / batch_size 125 | else: 126 | return loss 127 | 128 | 129 | def downsample_disparity(disparity, factor): 130 | """Downsample disparity using a min-pool operation 131 | 132 | Input can be either a Numpy array or Torch tensor. 133 | """ 134 | with torch.no_grad(): 135 | # Convert input to tensor at the appropriate number of dimensions if needed. 136 | is_numpy = type(disparity) == np.ndarray 137 | if is_numpy: 138 | disparity = torch.from_numpy(disparity) 139 | new_dims = 4 - len(disparity.shape) 140 | for i in range(new_dims): 141 | disparity = disparity.unsqueeze(0) 142 | 143 | disparity = F.max_pool2d(disparity, kernel_size=factor, stride=factor) / factor 144 | 145 | # Convert output disparity back into same format and number of dimensions as input. 146 | for i in range(new_dims): 147 | disparity = disparity.squeeze(0) 148 | if is_numpy: 149 | disparity = disparity.numpy() 150 | return disparity 151 | 152 | 153 | def get_disparity_valid_mask(disparity, max_disparity, right=False): 154 | """Generate mask where disparity is valid based on the given max_disparity""" 155 | IGNORE_EDGE = False 156 | result = torch.logical_and(disparity > 1e-3, disparity < (max_disparity - 1 - 1e-3)) 157 | if IGNORE_EDGE: 158 | width = disparity.shape[-1] 159 | edge_mask = ( 160 | torch.arange(width, dtype=disparity.dtype, device=disparity.device) - 1 161 | ) 162 | if right: 163 | edge_mask = torch.flip(edge_mask, (0,)) 164 | edge_mask = edge_mask.expand_as(disparity) 165 | valid_edge = disparity < edge_mask 166 | result = torch.logical_and(result, valid_edge) 167 | return result 168 | 169 | 170 | def turbo_vis(heatmap, normalize=False, uint8_output=False): 171 | assert len(heatmap.shape) == 2 172 | if normalize: 173 | heatmap = heatmap.astype(np.float32) 174 | heatmap -= np.min(heatmap) 175 | heatmap /= np.max(heatmap) 176 | assert heatmap.dtype != np.uint8 177 | 178 | x = heatmap 179 | x = x.clip(0, 1) 180 | a = (x * 255).astype(int) 181 | b = (a + 1).clip(max=255) 182 | f = x * 255.0 - a 183 | turbo_map = datapoint.TURBO_COLORMAP_DATA_NP[::-1] 184 | pseudo_color = turbo_map[a] + (turbo_map[b] - turbo_map[a]) * f[..., np.newaxis] 185 | pseudo_color[heatmap < 0.0] = 0.0 186 | pseudo_color[heatmap > 1.0] = 1.0 187 | if uint8_output: 188 | pseudo_color = (pseudo_color * 255).astype(np.uint8) 189 | return pseudo_color 190 | 191 | 192 | def disp_map_visualize(x, max_disp): 193 | assert len(x.shape) == 2 194 | x = x.astype(np.float64) 195 | valid = (x < max_disp) & np.isfinite(x) 196 | if valid.sum() == 0: 197 | return np.zeros_like(x).astype(np.uint8) 198 | x -= np.min(x[valid]) 199 | x /= np.max(x[valid]) 200 | x = 1.0 - x 201 | x[~valid] = 0.0 202 | x = turbo_vis(x) 203 | x = (x * 255).astype(np.uint8) 204 | return x[:, :, ::-1] 205 | -------------------------------------------------------------------------------- /CARTO/Decoder/data/verify_partnet.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | # Otherwise we can't use pyrender 4 | # os.environ["PYOPENGL_PLATFORM"] = "" 5 | # os.environ['DISPLAY'] = ':1' 6 | 7 | import multiprocessing 8 | from concurrent import futures 9 | import pathlib 10 | 11 | import tqdm 12 | import functools 13 | import gc 14 | import yaml 15 | 16 | import numpy as np 17 | 18 | from typing import Dict, Any, Callable, List 19 | import itertools 20 | import open3d as o3d 21 | 22 | # import pyrender 23 | import trimesh 24 | import urdfpy 25 | 26 | from CARTO.simnet.lib import partnet_mobility 27 | from CARTO.simnet.lib.datasets import PartNetMobilityV0, PartNetMobilityV0DB 28 | from CARTO.simnet.lib.datapoint import compress_datapoint, decompress_datapoint 29 | from CARTO.Decoder.visualizing import offscreen 30 | 31 | import uuid 32 | from CARTO.Decoder import utils, config 33 | from CARTO.Decoder.data import dataset 34 | import pyrender 35 | 36 | 37 | def process_object_id( 38 | object_id: str, 39 | joint_filter: Callable[[Dict[str, Any]], bool] = lambda _: True, 40 | joint_offset: float = 0.0, 41 | ): 42 | object_path = PartNetMobilityV0DB.get_object(object_id) 43 | object_meta = PartNetMobilityV0DB.get_object_meta(object_id) 44 | 45 | joints_of_interest: List[str] = [] 46 | # Artifact from preprocessing 47 | for joint_id, joint in object_meta["joints"].items(): 48 | if not joint_filter( 49 | joint, partnet_mobility.get_joint_name_exclusion_list(object_meta) 50 | ): 51 | continue 52 | joints_of_interest.append(joint_id) 53 | 54 | joint_config = {} 55 | for joint_id, joint in object_meta["joints"].items(): 56 | if joint_id in joints_of_interest: 57 | limits = partnet_mobility.get_canonical_joint_limits(object_meta, joint_id) 58 | # limits = np.array(object_meta["joints"][joint_id]["limit"]) 59 | # joint_config[joint_id] = limits[0] 60 | # joint_config[joint_id] = limits[1] 61 | joint_config[joint_id] = limits[0] + joint_offset 62 | else: 63 | joint_config[joint_id] = 0.0 64 | 65 | canonical_transform = np.array( 66 | PartNetMobilityV0DB.get_object_meta(object_id)["canonical_transformation"] 67 | ) 68 | 69 | # if PartNetMobilityV0DB.get_object_meta(object_id)["model_cat"] == "Scissors": 70 | # canonical_transform = trimesh.transformations.rotation_matrix( 71 | # np.pi / 2, np.array([0., 0.0, -1.]) 72 | # ) 73 | # print(canonical_transform) 74 | # canonical_transform = trimesh.transformations.random_rotation_matrix() 75 | 76 | urdf_object = urdfpy.URDF.load(str(object_path / "mobility.urdf")) 77 | 78 | # return utils.object_to_trimesh(urdf_object, joint_config, base_transform=canonical_transform) 79 | if len(joints_of_interest) == 1: 80 | # print(object_meta["joints"][joints_of_interest[0]]) 81 | obj_trimesh, _, _ = utils.object_to_trimesh( 82 | urdf_object, 83 | joint_config=joint_config, 84 | base_transform=canonical_transform, 85 | origin_frame=config.ObjectOrigin.CLOSED_STATE 86 | # origin_frame=config.ObjectOrigin.PARTNETMOBILITY 87 | ) 88 | return obj_trimesh 89 | else: 90 | return None 91 | 92 | 93 | def main(): 94 | object_filter, joint_filter = partnet_mobility.get_filter_function( 95 | # category_list=["Box"], 96 | # category_list=["Scissors"], 97 | # category_list=["Pliers"], 98 | # category_list=["Stapler"], 99 | # category_list=["Knife"], 100 | # category_list=["Dishwasher"], 101 | # category_list=["Microwave"], 102 | # category_list=["Oven"], 103 | # category_list=["Table"], 104 | # category_list=["WashingMachine"], 105 | # category_list=["Refrigerator"], 106 | category_list=["StorageFurniture"], 107 | # category_list=["Laptop"], 108 | # category_list=["Toilet"], 109 | # category_list=["Microwave", "Scissors"], 110 | # category_list=["Pliers", "Scissors", "Stapler"], 111 | # category_list=["Pliers", "Scissors"], 112 | # category_list=["Microwave", "Fridge", "Toilet", "WashingMachine", "Dishwasher", "Oven"], 113 | # category_list=[ 114 | # "Box", "Dishwasher", "Door", "Laptop", "Microwave", "Oven", "Refrigerator", "Safe", 115 | # "StorageFurniture", "Table", "Toilet", "TrashCan", "WashingMachine", "Window", "Stapler" 116 | # ], 117 | # category_list=[ 118 | # "Dishwasher", "Laptop", "Microwave", "Oven", "Refrigerator", "StorageFurniture", "Table", 119 | # "WashingMachine", "Stapler" 120 | # ], 121 | max_unique_parents=2, 122 | max_joints=1, 123 | no_limit_ok=False, 124 | min_prismatic=0.1, 125 | min_revolute=0.1, 126 | allowed_joints=["revolute"], 127 | # allowed_joints=["prismatic"] 128 | ) 129 | partnet_mobility_db = PartNetMobilityV0() 130 | partnet_mobility_db.set_filter(object_filter) 131 | print(f"Length of filtered dataset: {len(partnet_mobility_db)}") 132 | # exit(0) 133 | joint_offset = 0.5 134 | # joint_offset = 3.14159 135 | 136 | scene = pyrender.Scene() 137 | added_to_scene = 0 138 | object_ids = partnet_mobility_db.index_list 139 | for id_ in object_ids: 140 | print(id_) 141 | 142 | pcds = [] 143 | 144 | # Hardcode some 145 | object_ids = ["187d79cd04b2bdfddf3a1b0d597ce76e"] 146 | 147 | for object_id in tqdm.tqdm(object_ids): 148 | # for object_id in tqdm.tqdm(object_ids[:1]): 149 | trimesh_scene = process_object_id( 150 | object_id, joint_filter=joint_filter, joint_offset=joint_offset 151 | ) 152 | if trimesh_scene is None: 153 | continue 154 | trimesh_single: trimesh.Trimesh = trimesh_scene.dump(concatenate=True) 155 | 156 | o3d_mesh: o3d.geometry.TriangleMesh = trimesh_single.as_open3d 157 | o3d_mesh.paint_uniform_color([1, 0.706, 0]) 158 | o3d_mesh.compute_vertex_normals() 159 | pcds.append(o3d_mesh) 160 | # scene.add(pyrender.Mesh.from_trimesh(trimesh_single)) 161 | added_to_scene += 1 162 | # print(object_id) 163 | # Single Scene 164 | # scene_local = offscreen.get_default_scene() 165 | # scene_local.add(pyrender.Mesh.from_trimesh(trimesh_single)) 166 | # pyrender.Viewer(scene_local, use_raymond_lighting=True, show_world_axis=True) 167 | # pyrender.Viewer(scene, use_raymond_lighting=True, show_world_axis=True) 168 | print(f"{object_id}") 169 | o3d.visualization.draw_geometries([o3d_mesh] 170 | print(f"Objects in scene {added_to_scene}") 171 | # pyrender.Viewer(scene, use_raymond_lighting=True, show_world_axis=True) 172 | 173 | if True: 174 | points = np.array( 175 | [ 176 | [-1.0, -1.0, -1.0], 177 | [1.0, -1.0, -1.0], 178 | [-1.0, 1.0, -1.0], 179 | [1.0, 1.0, -1.0], 180 | [-1.0, -1.0, 1.0], 181 | [1.0, -1.0, 1.0], 182 | [-1.0, 1.0, 1.0], 183 | [1.0, 1.0, 1.0], 184 | ], 185 | dtype=np.float, 186 | ) 187 | # points /= 2. 188 | lines = np.array( 189 | [ 190 | [0, 1], 191 | [0, 2], 192 | [1, 3], 193 | [2, 3], 194 | [4, 5], 195 | [4, 6], 196 | [5, 7], 197 | [6, 7], 198 | [0, 4], 199 | [1, 5], 200 | [2, 6], 201 | [3, 7], 202 | ] 203 | ) 204 | colors = [[1, 0, 0] for i in range(len(lines))] 205 | line_set = o3d.geometry.LineSet() 206 | line_set.points = o3d.utility.Vector3dVector(points) 207 | line_set.lines = o3d.utility.Vector2iVector(lines) 208 | line_set.colors = o3d.utility.Vector3dVector(colors) 209 | pcds.append(line_set) 210 | pcds.append(o3d.geometry.TriangleMesh.create_coordinate_frame()) 211 | o3d.visualization.draw_geometries(pcds) 212 | # o3d.visualization.enable_indirect_light() 213 | 214 | 215 | if __name__ == "__main__": 216 | # TODO Use new tyro feature to parse function header? 217 | main() 218 | -------------------------------------------------------------------------------- /CARTO/simnet/lib/net/post_processing/keypoint_outputs.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import cv2 3 | import IPython 4 | import torch 5 | import torch.nn as nn 6 | from skimage.feature import peak_local_max 7 | 8 | from CARTO.simnet.lib.net import losses 9 | from CARTO.simnet.lib.label import get_unique_colors, Keypoint 10 | 11 | KEYPOINT_COLORS = get_unique_colors(10) 12 | LOSS = nn.BCELoss() 13 | # LOSS = nn.MSELoss() 14 | 15 | 16 | class KeypointOutput: 17 | def __init__(self, heatmap, hparams, ignore_mask=None): 18 | self.heatmap = heatmap 19 | self.first_heatmap = None 20 | self.ignore_mask = ignore_mask 21 | self.is_numpy = False 22 | self.hparams = hparams 23 | self.loss = LOSS 24 | self.num_keypoints = hparams.num_keypoints 25 | self.all_keypoints = None 26 | 27 | # Converters for torch to numpy 28 | def convert_to_numpy_from_torch(self): 29 | self.heatmap = np.ascontiguousarray(self.heatmap.float().cpu().numpy()) 30 | self.is_numpy = True 31 | 32 | def convert_to_torch_from_numpy(self): 33 | self.heatmap = torch.from_numpy(np.ascontiguousarray(self.heatmap)).float() 34 | self.is_numpy = False 35 | 36 | def get_keypoints(self, min_distance=40, min_confidence=0.3): 37 | if not self.is_numpy: 38 | self.convert_to_numpy_from_torch() 39 | # If this keypoint network only predicts a single type of keypoint, we must add a 40 | # dimension that is missing in the output head for the visualization code. 41 | if self.num_keypoints == 1: 42 | self.first_heatmap = self.heatmap[:, None, :, :][0] 43 | else: 44 | if len(self.heatmap.shape) == 4: # network outputs 45 | self.first_heatmap = self.heatmap[0] 46 | else: 47 | self.first_heatmap = self.heatmap 48 | return extract_keypoints_from_heatmap( 49 | self.first_heatmap, min_distance, min_confidence 50 | ) 51 | 52 | def get_detections(self, left_img): 53 | if not self.is_numpy: 54 | self.convert_to_numpy_from_torch() 55 | # If this keypoint network only predicts a single type of keypoint, we must add a 56 | # dimension that is missing in the output head for the visualization code. 57 | if self.num_keypoints == 1: 58 | self.first_heatmap = self.heatmap[:, None, :, :][0] 59 | else: 60 | if len(self.heatmap.shape) == 4: # network outputs 61 | self.first_heatmap = self.heatmap[0] 62 | else: 63 | self.first_heatmap = self.heatmap 64 | img, all_keypoints = vis_keypoints_from_heatmap( 65 | self.first_heatmap, 66 | left_img.shape[0], 67 | left_img.shape[1], 68 | left_img, 69 | True, 70 | True, 71 | ) 72 | return img 73 | 74 | def evaluation_metrics(self, targ_kp_output): 75 | targ_keypoints = targ_kp_output.get_keypoints() 76 | results = {} 77 | for i in range(len(targ_keypoints)): 78 | results[i] = {} 79 | for confidence in np.linspace(0, 1.01, 10): 80 | all_keypoints = self.get_keypoints(min_confidence=confidence) 81 | for i, (pred_class, targ_class) in enumerate( 82 | zip(all_keypoints, targ_keypoints) 83 | ): 84 | tp, fp, fn = evaluate_keypoints(pred_class, targ_class) 85 | precision = tp / (tp + fp) if tp + fp > 0 else 1 86 | recall = tp / (tp + fn) if tp + fn > 0 else 1 87 | results[i][confidence] = (precision, recall) 88 | return results 89 | 90 | def get_visualization_img(self, left_img): 91 | if not self.is_numpy: 92 | self.convert_to_numpy_from_torch() 93 | if self.num_keypoints == 1: 94 | self.first_heatmap = self.heatmap[:, None, :, :][0] 95 | else: 96 | self.first_heatmap = self.heatmap[0] 97 | return vis_network_outputs(self.first_heatmap, left_img) 98 | 99 | def compute_loss(self, keypoint_targets, log, name): 100 | if self.is_numpy: 101 | raise ValueError("Output is not in torch mode") 102 | heatmap_target = torch.stack( 103 | [ 104 | torch.squeeze(keypoint_target.heatmap) 105 | for keypoint_target in keypoint_targets 106 | ] 107 | ) 108 | 109 | # Move to GPU 110 | heatmap_target = heatmap_target.to(torch.device("cuda:0")) 111 | 112 | heatmap_loss = self.loss(self.heatmap, heatmap_target) 113 | log[name] = heatmap_loss.item() 114 | return self.hparams.loss_keypoint_mult * heatmap_loss 115 | 116 | 117 | def vis_network_outputs(heatmaps, left_img, idx=0): 118 | heatmap_vis = [] 119 | gray_img = cv2.cvtColor(left_img, cv2.COLOR_BGR2GRAY) 120 | for heatmap in heatmaps: 121 | img = np.copy(gray_img) 122 | heatmap /= np.max(heatmap) 123 | img = cv2.addWeighted(heatmap.astype(float), 0.999, img.astype(float), 0.001, 0) 124 | img /= img.max() / 255 125 | heatmap_vis.append(img[:, :, np.newaxis].astype(np.float32)) 126 | return heatmap_vis 127 | 128 | 129 | def draw_keypoints_from_predictions(keypoints, left_img): 130 | for i, keypoint_group in enumerate(keypoints): 131 | color = KEYPOINT_COLORS[i] 132 | color = (int(color[0]), int(color[1]), int(color[2])) 133 | for keypoint in keypoint_group: 134 | for px in keypoint.pixels: 135 | left_img = cv2.circle( 136 | left_img, tuple(px.ravel().astype(int)), 5, color, 2 137 | ) 138 | return left_img 139 | 140 | 141 | def extract_peaks_from_heatmap(heatmap, min_distance=40, min_confidence=0.3): 142 | peaks = peak_local_max( 143 | heatmap, 144 | min_distance=min_distance, 145 | threshold_abs=min_confidence, 146 | exclude_border=False, 147 | num_peaks=2, 148 | ) 149 | 150 | return peaks 151 | 152 | 153 | def evaluate_keypoints(pred_kp, targ_kp, distance_threshold=20): 154 | true_positives = 0 155 | false_negatives = 0 156 | counted = [] 157 | all_distances = [] 158 | for px in pred_kp.pixels: 159 | distances = np.linalg.norm(targ_kp.pixels - px, axis=1) 160 | all_distances.append(distances) 161 | all_distances = np.array(all_distances) 162 | for i in range(len(targ_kp.pixels)): 163 | if len(all_distances) == 0: 164 | break 165 | closest = all_distances[:, i].argmin() # prediction that is closest 166 | if all_distances[closest, i] < distance_threshold: 167 | true_positives += 1 # correct prediction 168 | all_distances[ 169 | closest 170 | ] = 1e10 # don't let this prediction be a positive for anything else 171 | else: 172 | false_negatives += 1 # no prediction was sufficiently close 173 | false_positives = len(pred_kp.pixels) - true_positives 174 | false_negatives = len(targ_kp.pixels) - true_positives 175 | return true_positives, false_positives, false_negatives 176 | 177 | 178 | def extract_keypoints_from_heatmap(heatmap, min_distance=40, min_confidence=0.5): 179 | all_keypoints = [] 180 | for idx in range(heatmap.shape[0]): 181 | keypoints = extract_peaks_from_heatmap( 182 | heatmap[idx], min_distance, min_confidence 183 | ) 184 | all_keypoints.append(Keypoint(pixels=[px for px in keypoints])) 185 | return all_keypoints 186 | 187 | 188 | def vis_keypoints_from_heatmap( 189 | heatmap, height, width, left_img, raw_keypoints=False, gray=False 190 | ): 191 | if gray: 192 | img = cv2.cvtColor(left_img, cv2.COLOR_BGR2GRAY) 193 | img = np.stack((img,) * 3, axis=-1) 194 | else: 195 | img = np.copy(left_img) 196 | img = img / img.max() * 255 197 | all_keypoints = [] 198 | for idx in range(heatmap.shape[0]): 199 | keypoints = extract_peaks_from_heatmap(heatmap[idx]) 200 | all_keypoints.append(Keypoint(pixels=[px for px in keypoints])) 201 | color = KEYPOINT_COLORS[idx] 202 | color = (int(color[0]), int(color[1]), int(color[2])) 203 | for px in keypoints: 204 | img = cv2.circle(img, tuple(px.ravel()[::-1].astype(int)), 5, color, 2) 205 | if raw_keypoints: 206 | return img, all_keypoints 207 | return img 208 | -------------------------------------------------------------------------------- /CARTO/simnet/lib/net/dataset.py: -------------------------------------------------------------------------------- 1 | # Copyright 2019 Toyota Research Institute. All rights reserved. 2 | 3 | import dataclasses 4 | import os 5 | import random 6 | import pathlib 7 | from typing import List, Tuple, Any 8 | 9 | import cv2 10 | import numpy as np 11 | import torch 12 | import IPython 13 | import torch.nn.functional as F 14 | from torch.utils.data import Dataset 15 | 16 | from CARTO.simnet.lib import datapoint 17 | from CARTO.simnet.lib.net.post_processing.segmentation_outputs import SegmentationOutput 18 | from CARTO.simnet.lib.net.post_processing.depth_outputs import DepthOutput 19 | from CARTO.simnet.lib.net.post_processing.pose_outputs import PoseOutput 20 | from CARTO.simnet.lib.net.post_processing.obb_outputs import OBBOutput 21 | 22 | 23 | def extract_left_numpy_img(anaglyph): 24 | anaglyph_np = np.ascontiguousarray(anaglyph.cpu().numpy()) 25 | anaglyph_np = anaglyph_np.transpose((1, 2, 0)) 26 | left_img = anaglyph_np[..., 0:3] * 255.0 27 | return left_img 28 | 29 | 30 | def extract_right_numpy_img(anaglyph): 31 | anaglyph_np = np.ascontiguousarray(anaglyph.cpu().numpy()) 32 | anaglyph_np = anaglyph_np.transpose((1, 2, 0)) 33 | left_img = anaglyph_np[..., 3:6] * 255.0 34 | return left_img 35 | 36 | 37 | def create_anaglyph(stereo_dp): 38 | height, width, _ = stereo_dp.left_color.shape 39 | image = np.zeros([height, width, 6], dtype=np.uint8) 40 | cv2.normalize(stereo_dp.left_color, stereo_dp.left_color, 0, 255, cv2.NORM_MINMAX) 41 | cv2.normalize(stereo_dp.right_color, stereo_dp.right_color, 0, 255, cv2.NORM_MINMAX) 42 | image[..., 0:3] = stereo_dp.left_color 43 | image[..., 3:6] = stereo_dp.right_color 44 | image = image * 1.0 / 255.0 45 | image = image.transpose((2, 0, 1)) 46 | return torch.from_numpy(np.ascontiguousarray(image)).float() 47 | 48 | 49 | # Struct for Panoptic Outputs 50 | @dataclasses.dataclass 51 | class PanopticOutputs: 52 | depth: list = dataclasses.field(default_factory=list) 53 | small_depth: list = dataclasses.field(default_factory=list) 54 | room_segmentation: List[SegmentationOutput] = dataclasses.field( 55 | default_factory=list 56 | ) 57 | cabinet_door_obbs: List[OBBOutput] = dataclasses.field(default_factory=list) 58 | handhold_obbs: List[OBBOutput] = dataclasses.field(default_factory=list) 59 | graspable_objects_obbs: List[OBBOutput] = dataclasses.field(default_factory=list) 60 | grasp_quality_scores: list = dataclasses.field(default_factory=list) 61 | val_data: List[datapoint.ValData] = dataclasses.field(default_factory=list) 62 | stereo_imgs: list = dataclasses.field(default_factory=list) 63 | 64 | 65 | def to_list(target): 66 | if target is None: 67 | return [] 68 | target.convert_to_torch_from_numpy() 69 | return [target] 70 | 71 | 72 | class Dataset(Dataset): 73 | def __init__( 74 | self, dataset_uri, hparams, preprocess_image_func=None, datapoint_dataset=None 75 | ): 76 | super().__init__() 77 | 78 | if datapoint_dataset is None: 79 | datapoint_dataset = datapoint.make_dataset(dataset_uri) 80 | 81 | self.datapoint_handles = datapoint_dataset.list() 82 | # No need to shuffle, already shufled based on random uids 83 | self.hparams = hparams 84 | 85 | if preprocess_image_func is None: 86 | self.preprocces_image_func = create_anaglyph 87 | else: 88 | self.preprocces_image_func = preprocess_image_func 89 | 90 | def __len__(self): 91 | return len(self.datapoint_handles) 92 | 93 | def __getitem__(self, idx): 94 | dp: datapoint.Panoptic = self.datapoint_handles[idx].read() 95 | 96 | # Process image 97 | anaglyph = self.preprocces_image_func(dp.stereo) 98 | if dp.val_data.scene_name == "unlabeled_data": 99 | return PanopticOutputs( 100 | depth=[] 101 | if dp.depth is None 102 | else [DepthOutput(torch.Tensor(dp.depth), self.hparams)], 103 | room_segmentation=[], 104 | cabinet_door_obbs=[], 105 | handhold_obbs=[], 106 | graspable_objects_obbs=[], 107 | grasp_quality_scores=[], 108 | small_depth=[], 109 | val_data=[dp.val_data], 110 | stereo_imgs=[anaglyph], 111 | language=[], 112 | ) 113 | 114 | # Segmenation targets 115 | segmentation_target = to_list(SegmentationOutput(dp.segmentation, self.hparams)) 116 | 117 | # Ground truth disparity 118 | depth_target = to_list(DepthOutput(dp.depth, self.hparams)) 119 | 120 | # OBB output heads 121 | if dp.cabinet_door_obb: 122 | cabinet_door_obb_target = OBBOutput( 123 | dp.cabinet_door_obb.heat_map, 124 | dp.cabinet_door_obb.vertex_target, 125 | dp.cabinet_door_obb.z_centroid, 126 | dp.cabinet_door_obb.cov_matrices, 127 | self.hparams, 128 | class_field=dp.cabinet_door_obb.classes, 129 | ) 130 | else: 131 | cabinet_door_obb_target = None 132 | cabinet_door_obb_target = to_list(cabinet_door_obb_target) 133 | 134 | if dp.handhold_obb: 135 | handhold_obb_target = OBBOutput( 136 | dp.handhold_obb.heat_map, 137 | dp.handhold_obb.vertex_target, 138 | dp.handhold_obb.z_centroid, 139 | dp.handhold_obb.cov_matrices, 140 | self.hparams, 141 | ) 142 | else: 143 | handhold_obb_target = None 144 | handhold_obb_target = to_list(handhold_obb_target) 145 | 146 | if dp.graspable_objects_obb: 147 | graspable_objects_obb_target = OBBOutput( 148 | dp.graspable_objects_obb.heat_map, 149 | dp.graspable_objects_obb.vertex_target, 150 | dp.graspable_objects_obb.z_centroid, 151 | dp.graspable_objects_obb.cov_matrices, 152 | self.hparams, 153 | class_field=dp.graspable_objects_obb.classes, 154 | shape_emb=dp.graspable_objects_obb.shape_emb, 155 | arti_emb=dp.graspable_objects_obb.arti_emb, 156 | abs_pose_field=dp.graspable_objects_obb.abs_pose, 157 | ) 158 | else: 159 | graspable_objects_obb_target = None 160 | graspable_objects_obb_target = to_list(graspable_objects_obb_target) 161 | 162 | # Grasp quality 163 | # grasp_quality_scores_target = GraspOutput( 164 | # dp.grasps.heat_map, dp.grasps.grasp_success_target, self.hparams 165 | # ) 166 | # Convert targets to pytorch 167 | # grasp_quality_scores_target.convert_to_torch_from_numpy() 168 | 169 | # Add the language input to panoptic outputs 170 | return PanopticOutputs( 171 | depth=depth_target, 172 | room_segmentation=segmentation_target, 173 | cabinet_door_obbs=cabinet_door_obb_target, 174 | handhold_obbs=handhold_obb_target, 175 | graspable_objects_obbs=graspable_objects_obb_target, 176 | grasp_quality_scores=[], 177 | small_depth=[], 178 | val_data=[dp.val_data], 179 | stereo_imgs=[anaglyph], 180 | ) 181 | 182 | 183 | def panoptic_collate(batch, rgbd=False) -> Tuple[torch.Tensor, Any, PanopticOutputs]: 184 | # list of elements per patch 185 | # Each element is a tuple of (stereo,imgs) 186 | panoptic_targets = PanopticOutputs() 187 | stereo_images_list = [] 188 | 189 | for ii in range(len(batch)): 190 | panoptic_targets.depth.extend(batch[ii].depth) 191 | panoptic_targets.room_segmentation.extend(batch[ii].room_segmentation) 192 | panoptic_targets.cabinet_door_obbs.extend(batch[ii].cabinet_door_obbs) 193 | panoptic_targets.handhold_obbs.extend(batch[ii].handhold_obbs) 194 | panoptic_targets.graspable_objects_obbs.extend(batch[ii].graspable_objects_obbs) 195 | panoptic_targets.grasp_quality_scores.extend(batch[ii].grasp_quality_scores) 196 | panoptic_targets.val_data.extend(batch[ii].val_data) 197 | stereo_images_list.extend(batch[ii].stereo_imgs) 198 | 199 | stereo_images_torch = torch.stack(stereo_images_list) 200 | if rgbd: 201 | stereo_images_torch = torch.cat( 202 | ( 203 | stereo_images_torch[:, :3, ...], 204 | torch.stack( 205 | [po_target.depth_pred for po_target in panoptic_targets.depth] 206 | ).unsqueeze(1), 207 | ), 208 | dim=1, 209 | ) 210 | 211 | return stereo_images_torch, panoptic_targets 212 | -------------------------------------------------------------------------------- /CARTO/simnet/lib/onnx_plugins.py: -------------------------------------------------------------------------------- 1 | """Plugins that can be used in an ONNX model.""" 2 | import struct 3 | 4 | import numpy as np 5 | import torch 6 | import torch.nn as nn 7 | import torch.nn.functional as F 8 | import torch.nn.modules.utils as utils 9 | 10 | from CARTO.simnet.lib.net.models.layers.cost_volume import ( 11 | dot_product_cost_volume, 12 | DotProductCostVolume, 13 | ) 14 | from CARTO.simnet.lib.net.models.layers.soft_argmin import SoftArgmin, soft_argmin 15 | 16 | # TODO(krishnashankar): Arguments of functions in modules below 17 | # differ from those of the base class(es) they inherit from, and 18 | # pylint complains. For now, disable here and consider disabling 19 | # globally. 20 | 21 | # pylint: disable=arguments-differ 22 | # pylint: disable=protected-access 23 | 24 | 25 | @torch.autograd.function.traceable 26 | class ExportableUpsampleFunction(torch.autograd.Function): 27 | """Upsample function that can be traced for ONNX export.""" 28 | 29 | @staticmethod 30 | def symbolic(g, inputs, scale_factor): 31 | assert scale_factor == 2, "Only 2x upsample implemented" 32 | return g.op( 33 | "TRT_PluginV2", 34 | inputs, 35 | version_s="0.0.1", 36 | namespace_s="", 37 | data_s="", 38 | name_s="UpsampleBilinearEvenSquare", 39 | ) 40 | 41 | @staticmethod 42 | def forward(ctx, inputs, scale_factor): 43 | return F.interpolate( 44 | inputs, scale_factor=scale_factor, mode="bilinear", align_corners=False 45 | ) 46 | 47 | @staticmethod 48 | def backward(_): 49 | raise RuntimeError("Backward not implemented") 50 | 51 | 52 | class ExportableUpsample(nn.Module): 53 | """Upsample module that can be used in an ONNX model.""" 54 | 55 | def __init__(self, scale_factor): 56 | super().__init__() 57 | self.scale_factor = scale_factor 58 | 59 | def forward(self, inputs): 60 | return ExportableUpsampleFunction.apply(inputs, self.scale_factor) 61 | 62 | 63 | class UpsampleWithConvTranspose(nn.Module): 64 | """Upsample model implemented with transposed convolution.""" 65 | 66 | def __init__(self, scale_factor): 67 | super(UpsampleWithConvTranspose, self).__init__() 68 | self.weights = None 69 | self.scale_factor = utils._pair(scale_factor) 70 | 71 | def check_scale_factor(scale_factor): 72 | assert scale_factor == 1 or scale_factor % 2 == 0 73 | 74 | check_scale_factor(self.scale_factor[0]) 75 | check_scale_factor(self.scale_factor[1]) 76 | 77 | def get_kernel_size(self, factor): 78 | return 2 * factor - factor % 2 79 | 80 | def bilinear_upsample_kernel(self, size): 81 | """Get a transpoed convolution kernel that implemented upsampling for the 82 | given size.""" 83 | 84 | def get_factor_and_center(size): 85 | factor = (size + 1) // 2 86 | if size % 2 == 1: 87 | center = factor - 1 88 | else: 89 | center = factor - 0.5 90 | return factor, center 91 | 92 | factor_h, center_h = get_factor_and_center(size[0]) 93 | factor_w, center_w = get_factor_and_center(size[1]) 94 | og = np.ogrid[: size[0], : size[1]] 95 | return (1 - abs((og[0] - center_h) / factor_h)) * ( 96 | 1 - abs((og[1] - center_w) / factor_w) 97 | ) 98 | 99 | def bilinear_upsample_weights(self, factor, nchannels): 100 | """Get transposed convolution weights for upsampling.""" 101 | filter_size_h = self.get_kernel_size(factor[0]) 102 | filter_size_w = self.get_kernel_size(factor[1]) 103 | 104 | weights = np.zeros( 105 | (filter_size_h, filter_size_w, nchannels, nchannels), dtype=np.float32 106 | ) 107 | 108 | kernel = self.bilinear_upsample_kernel((filter_size_h, filter_size_w)) 109 | 110 | for c in range(nchannels): 111 | weights[:, :, c, c] = kernel 112 | 113 | return weights 114 | 115 | def forward(self, inputs): 116 | in_channels = inputs.shape[1] 117 | if self.weights is None: 118 | weights = self.bilinear_upsample_weights(self.scale_factor, in_channels) 119 | # Order weights to be compatible with pytorch (in_channels, out_channels, height, width). 120 | self.weights = ( 121 | torch.from_numpy(weights.transpose(2, 3, 0, 1)) 122 | .to(inputs.device) 123 | .type(inputs.dtype) 124 | ) 125 | output = torch.nn.functional.conv_transpose2d( 126 | inputs, 127 | self.weights, 128 | stride=self.scale_factor, 129 | padding=(self.scale_factor[0] // 2, self.scale_factor[1] // 2), 130 | ) 131 | return output 132 | 133 | 134 | @torch.autograd.function.traceable 135 | class ExportableDotProductCostVolumeFunction(torch.autograd.Function): 136 | @staticmethod 137 | def symbolic(g, left, right, num_disparities, is_right): 138 | assert not is_right 139 | serialized_data = struct.pack(" 0: 207 | fix_module_train(child_module) 208 | 209 | 210 | def fix_module_onnx(module): 211 | """Replace all modules in the given module with ONNX-compatible modules.""" 212 | for child_module_name, child_module in module.named_children(): 213 | if isinstance(child_module, nn.Upsample): 214 | scale_factor = int(child_module.scale_factor) 215 | # TensorRT plugin can only load 2x upsample from ONNX currently, so 216 | # otherwise use transposed convolution. 217 | if False and scale_factor == 2: 218 | module._modules[child_module_name] = ExportableUpsample(scale_factor) 219 | else: 220 | module._modules[child_module_name] = UpsampleWithConvTranspose( 221 | scale_factor 222 | ) 223 | elif isinstance(child_module, DotProductCostVolume): 224 | num_disparities = child_module.num_disparities 225 | is_right = child_module.is_right 226 | module._modules[child_module_name] = ExportableDotProductCostVolume( 227 | num_disparities, is_right 228 | ) 229 | elif isinstance(child_module, SoftArgmin): 230 | module._modules[child_module_name] = ExportableSoftArgmin() 231 | elif len(list(child_module.children())) > 0: 232 | fix_module_onnx(child_module) 233 | -------------------------------------------------------------------------------- /CARTO/simnet/lib/net/post_processing/box_outputs.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import cv2 3 | import IPython 4 | import torch 5 | import torch.nn as nn 6 | 7 | from CARTO.simnet.lib.net.post_processing import pose_outputs 8 | from CARTO.simnet.lib.net.post_processing import nms 9 | from CARTO.simnet.lib.net.post_processing.eval2d import Detection 10 | from CARTO.simnet.lib.net import losses 11 | 12 | _mask_l1_loss = losses.MaskedL1Loss(downscale_factor=1) 13 | _mse_loss = losses.MaskedMSELoss() 14 | 15 | 16 | class BoxOutput: 17 | def __init__(self, heatmap, vertex_field, hparams, ignore_mask=None): 18 | self.heatmap = heatmap 19 | self.vertex_field = vertex_field 20 | self.ignore_mask = ignore_mask 21 | self.is_numpy = False 22 | self.hparams = hparams 23 | 24 | # Converters for torch to numpy 25 | def convert_to_numpy_from_torch(self): 26 | self.heatmap = np.ascontiguousarray(self.heatmap.float().cpu().numpy()) 27 | self.vertex_field = np.ascontiguousarray( 28 | self.vertex_field.float().cpu().numpy() 29 | ) 30 | self.vertex_field = self.vertex_field.transpose((0, 2, 3, 1)) 31 | self.vertex_field = self.vertex_field / 100.0 32 | self.is_numpy = True 33 | 34 | def convert_to_torch_from_numpy(self): 35 | self.vertex_field = self.vertex_field.transpose((2, 0, 1)) 36 | self.vertex_field = 100.0 * self.vertex_field 37 | self.vertex_field = torch.from_numpy( 38 | np.ascontiguousarray(self.vertex_field) 39 | ).float() 40 | self.ignore_mask = torch.from_numpy( 41 | np.ascontiguousarray(self.ignore_mask) 42 | ).bool() 43 | self.heatmap = torch.from_numpy(np.ascontiguousarray(self.heatmap)).float() 44 | self.is_numpy = False 45 | 46 | def get_detections(self, min_confidence=0.02, overlap_thresh=0.75): 47 | if not self.is_numpy: 48 | self.convert_to_numpy_from_torch() 49 | detections = create_detections_from_outputs( 50 | np.copy(self.heatmap[0]), 51 | np.copy(self.vertex_field[0]), 52 | min_confidence=min_confidence, 53 | ) 54 | detections = nms.run(detections, overlap_thresh=overlap_thresh) 55 | return detections 56 | 57 | def get_visualization_img(self, left_img, is_pretty=False): 58 | if not self.is_numpy: 59 | self.convert_to_numpy_from_torch() 60 | if is_pretty: 61 | return draw_pretty_detection_from_outputs( 62 | self.heatmap[0], self.vertex_field[0], left_img 63 | ) 64 | return draw_detection_from_outputs( 65 | self.heatmap[0], self.vertex_field[0], left_img 66 | ) 67 | 68 | def compute_loss(self, pose_targets, log, prefix): 69 | if self.is_numpy: 70 | raise ValueError("Output is not in torch mode") 71 | vertex_target = torch.stack( 72 | [pose_target.vertex_field for pose_target in pose_targets] 73 | ) 74 | heatmap_target = torch.stack( 75 | [pose_target.heatmap for pose_target in pose_targets] 76 | ) 77 | ignore_target = torch.stack( 78 | [pose_target.ignore_mask for pose_target in pose_targets] 79 | ) 80 | 81 | # Move to GPU 82 | heatmap_target = heatmap_target.to(torch.device("cuda:0")) 83 | vertex_target = vertex_target.to(torch.device("cuda:0")) 84 | ignore_target = ignore_target.to(torch.device("cuda:0")) 85 | 86 | vertex_loss = _mask_l1_loss(vertex_target, self.vertex_field, heatmap_target) 87 | log[f"{prefix}/vertex_loss"] = vertex_loss.item() 88 | heatmap_loss = _mse_loss(self.heatmap, heatmap_target, ignore_target) 89 | log[f"{prefix}/heatmap"] = heatmap_loss.item() 90 | return ( 91 | self.hparams.loss_vertex_mult * vertex_loss 92 | + self.hparams.loss_heatmap_mult * heatmap_loss 93 | ) 94 | 95 | 96 | def draw_detection_from_outputs( 97 | heatmap_output, vertex_output, c_img, min_confidence=0.4 98 | ): 99 | c_img_gray = np.zeros(c_img.shape) 100 | for i in range(3): 101 | c_img_gray[:, :, i] = cv2.cvtColor(c_img, cv2.COLOR_BGR2GRAY) 102 | 103 | peaks = pose_outputs.extract_peaks_from_centroid( 104 | heatmap_output, min_confidence=min_confidence 105 | ) 106 | peak_img = pose_outputs.draw_peaks(heatmap_output, peaks) 107 | bboxes_ext = extract_vertices_from_peaks(np.copy(peaks), vertex_output, c_img_gray) 108 | img = draw_2d_boxes(c_img_gray, bboxes_ext) 109 | img = cv2.addWeighted(img.astype(np.uint8), 0.9, peak_img.astype(np.uint8), 0.4, 0) 110 | return img 111 | 112 | 113 | def draw_pretty_detection_from_outputs( 114 | heatmap_output, vertex_output, c_img, min_confidence=0.4 115 | ): 116 | # c_img_gray = np.zeros(c_img.shape) 117 | # for i in range(3): 118 | # c_img_gray[:, :, i] = cv2.cvtColor(c_img, cv2.COLOR_BGR2GRAY) 119 | 120 | c_img = cv2.cvtColor(c_img, cv2.COLOR_BGR2RGB) 121 | 122 | peaks = pose_outputs.extract_peaks_from_centroid( 123 | heatmap_output, min_confidence=min_confidence 124 | ) 125 | bboxes_ext = extract_vertices_from_peaks(np.copy(peaks), vertex_output, c_img) 126 | img = draw_2d_boxes(c_img, bboxes_ext) 127 | return img 128 | 129 | 130 | def create_detections_from_outputs(heatmap_output, vertex_output, min_confidence=0.1): 131 | peaks = pose_outputs.extract_peaks_from_centroid( 132 | heatmap_output, min_confidence=min_confidence 133 | ) 134 | bboxes_ext = extract_vertices_from_peaks( 135 | np.copy(peaks), vertex_output, heatmap_output 136 | ) 137 | detections = [] 138 | for peak, bbox_ext in zip(peaks, bboxes_ext): 139 | score = heatmap_output[peak[0], peak[1]] 140 | bbox = [ 141 | np.array([bbox_ext[0][0], bbox_ext[0][1]]), 142 | np.array([bbox_ext[1][0], bbox_ext[1][1]]), 143 | ] 144 | detection = Detection(class_label="Car", bbox=bbox, score=score) 145 | detections.append(detection) 146 | return detections 147 | 148 | 149 | def extract_vertices_from_peaks(peaks, vertex_fields, c_img, scale_factor=1): 150 | assert peaks.shape[1] == 2 151 | assert vertex_fields.shape[2] == 4 152 | height = vertex_fields.shape[0] * scale_factor 153 | width = vertex_fields.shape[1] * scale_factor 154 | vertex_fields[:, :, ::2] = (1.0 - vertex_fields[:, :, ::2]) * (2 * height) - height 155 | vertex_fields[:, :, 1::2] = (1.0 - vertex_fields[:, :, 1::2]) * (2 * width) - width 156 | bboxes = [] 157 | for ii in range(peaks.shape[0]): 158 | bbox = get_bbox_from_vertex( 159 | vertex_fields, peaks[ii, :], scale_factor=scale_factor 160 | ) 161 | bboxes.append(bbox) 162 | return bboxes 163 | 164 | 165 | def get_bbox_from_vertex(vertex_fields, index, scale_factor=64): 166 | assert index.shape[0] == 2 167 | index[0] = int(index[0] / scale_factor) 168 | index[1] = int(index[1] / scale_factor) 169 | bbox = vertex_fields[index[0], index[1], :] 170 | bbox = [[bbox[0], bbox[1]], [bbox[2], bbox[3]]] 171 | bbox = scale_factor * (index) - bbox 172 | return bbox 173 | 174 | 175 | def draw_2d_boxes_with_colors(img, bboxes, colors): 176 | for bounding_box, color in zip(bboxes, colors): 177 | bbox = bounding_box.bounding_box 178 | pt1 = (int(bbox[0][1]), int(bbox[0][0])) 179 | pt2 = (int(bbox[1][1]), int(bbox[1][0])) 180 | img = cv2.rectangle(img, pt1, pt2, color, 2) 181 | return img 182 | 183 | 184 | def draw_2d_boxes(c_img, bboxes): 185 | c_img = cv2.cvtColor(np.array(c_img), cv2.COLOR_BGR2RGB) 186 | for bounding_box in bboxes: 187 | bbox = bounding_box.bbox 188 | pt1 = (int(bbox[0][0]), int(bbox[0][1])) 189 | pt2 = (int(bbox[1][0]), int(bbox[1][1])) 190 | c_img = cv2.rectangle(c_img, pt1, pt2, (255, 0, 0), 2) 191 | return c_img 192 | 193 | 194 | def draw_2d_boxes_with_labels(c_img, bboxes): 195 | c_img = cv2.cvtColor(np.array(c_img), cv2.COLOR_BGR2RGB) 196 | for bounding_box in bboxes: 197 | bbox = bounding_box.bbox 198 | pt1 = (int(bbox[0][0]), int(bbox[0][1])) 199 | pt2 = (int(bbox[1][0]), int(bbox[1][1])) 200 | c_img = cv2.rectangle(c_img, pt1, pt2, (255, 0, 0), 2) 201 | c_img = draw_class_label( 202 | c_img, pt1 + ((bbox[1] - bbox[0]) / 2.0), bounding_box.class_label 203 | ) 204 | 205 | return c_img 206 | 207 | 208 | def draw_class_label(c_img, pixel_center, class_label): 209 | color = (0, 255, 0) # green 210 | if class_label == "null": 211 | return c_img 212 | class_label = " ".join(class_label.split("_")) 213 | # TODO: add more metadata to class labels so we can have human friendly names and styles 214 | pixel_x = int(pixel_center[0]) 215 | pixel_y = int(pixel_center[1]) 216 | size = 0.75 217 | thickness = 2 218 | color = (0, 255, 0) 219 | c_img = cv2.putText( 220 | c_img.copy(), 221 | class_label, 222 | (pixel_x, pixel_y), 223 | cv2.FONT_HERSHEY_SIMPLEX, 224 | size, 225 | color, 226 | thickness, 227 | cv2.LINE_AA, 228 | ) 229 | return c_img 230 | --------------------------------------------------------------------------------