├── CARTO
    ├── simnet
    │   └── lib
    │   │   ├── net
    │   │       ├── init
    │   │       │   ├── __init__.py
    │   │       │   └── default_init.py
    │   │       ├── functions
    │   │       │   ├── __init__.py
    │   │       │   └── learning_rate.py
    │   │       ├── models
    │   │       │   ├── __init__.py
    │   │       │   └── layers
    │   │       │   │   ├── __init__.py
    │   │       │   │   ├── soft_argmin.py
    │   │       │   │   ├── matchability.py
    │   │       │   │   ├── transition_blocks.py
    │   │       │   │   ├── hdc_functions.py
    │   │       │   │   ├── stochastic_depth.py
    │   │       │   │   ├── cost_volume.py
    │   │       │   │   ├── fpn_bilinear.py
    │   │       │   │   └── residual_blocks.py
    │   │       ├── post_processing
    │   │       │   ├── utils.py
    │   │       │   ├── surface_outputs.py
    │   │       │   ├── orochi_outputs.py
    │   │       │   ├── nms.py
    │   │       │   ├── segmentation_outputs.py
    │   │       │   ├── depth_outputs.py
    │   │       │   ├── keypoint_outputs.py
    │   │       │   └── box_outputs.py
    │   │       ├── data_module.py
    │   │       ├── pre_processing
    │   │       │   ├── grasp_inputs.py
    │   │       │   ├── keypoint_inputs.py
    │   │       │   ├── box_inputs.py
    │   │       │   ├── pose_inputs.py
    │   │       │   └── obb_inputs.py
    │   │       ├── losses.py
    │   │       ├── onnx_plugins.py
    │   │       └── dataset.py
    │   │   ├── primitive.py
    │   │   └── onnx_plugins.py
    ├── __init__.py
    ├── Encoder
    │   ├── inference_config.txt
    │   └── net_train.py
    ├── lib
    │   ├── rename_unpickler.py
    │   ├── compression.py
    │   └── real_data.py
    └── Decoder
    │   ├── models
    │       ├── lipschitz_norm.py
    │       ├── lr_schedules.py
    │       └── joint_state_decoder.py
    │   ├── multi_poly.py
    │   ├── visualizing
    │       ├── offscreen.py
    │       ├── visualize_sdf_values.py
    │       └── visualize_asdf_dataset.ipynb
    │   ├── data
    │       ├── verify_watertight.py
    │       ├── visualize_dataset_pytorch.py
    │       ├── visualize_dataset.py
    │       ├── asdf_dataset.py
    │       └── verify_partnet.py
    │   └── loss.py
├── figure_1.png
├── datasets
    └── decoder
    │   ├── split_files
    │       ├── StorageFurniture_prismatic
    │       │   └── object_ids.yaml
    │       ├── StorageFurniture_revolute
    │       │   └── object_ids.yaml
    │       ├── Oven_revolute
    │       │   └── object_ids.yaml
    │       ├── WashingMachine_revolute
    │       │   └── object_ids.yaml
    │       ├── Knife_prismatic
    │       │   └── object_ids.yaml
    │       ├── Microwave_revolute
    │       │   └── object_ids.yaml
    │       ├── Refrigerator_revolute
    │       │   └── object_ids.yaml
    │       ├── Knife_revolute
    │       │   └── object_ids.yaml
    │       ├── Dishwasher_revolute
    │       │   └── object_ids.yaml
    │       ├── Stapler_revolute
    │       │   └── object_ids.yaml
    │       ├── Table_prismatic
    │       │   └── object_ids.yaml
    │       └── Laptop_revolute
    │       │   └── object_ids.yaml
    │   └── id_lists
    │       └── All_Real_Categories.txt
├── .gitignore
├── setup.py
├── requirements.txt
├── download_archives.sh
└── scripts
    ├── real_dataset_vis.ipynb
    ├── preprocess_partnetmobility.py
    └── full_inference.py


/CARTO/simnet/lib/net/init/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/CARTO/simnet/lib/net/functions/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/CARTO/simnet/lib/net/models/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/CARTO/simnet/lib/net/models/layers/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/figure_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/robot-learning-freiburg/CARTO/HEAD/figure_1.png


--------------------------------------------------------------------------------
/CARTO/__init__.py:
--------------------------------------------------------------------------------
1 | import pathlib
2 | 
3 | ROOT_DIR = pathlib.Path(__file__).parent.resolve()
4 | 


--------------------------------------------------------------------------------
/CARTO/Encoder/inference_config.txt:
--------------------------------------------------------------------------------
1 | --train_batch_size=1
2 | --train_num_workers=1
3 | --test_path=datasets/synthetic
4 | 


--------------------------------------------------------------------------------
/datasets/decoder/split_files/StorageFurniture_prismatic/object_ids.yaml:
--------------------------------------------------------------------------------
 1 | test:
 2 | - 82c05fe4890a7f5112de5317fe5b354f
 3 | - d69d9de0c79ac6a9c59350d819542ec7
 4 | train:
 5 | - 606d50b144d8ca164da5feafe6f1c8fc
 6 | - 1af4a1dfa4f94cd44da5feafe6f1c8fc
 7 | - 33ec57af7f648994da5feafe6f1c8fc
 8 | - 2950d1baed4dbd78c59350d819542ec7
 9 | - 21ae39cf6ba8557f4da5feafe6f1c8fc
10 | val: []
11 | 


--------------------------------------------------------------------------------
/datasets/decoder/split_files/StorageFurniture_revolute/object_ids.yaml:
--------------------------------------------------------------------------------
 1 | test:
 2 | - 170be0087472182bc59350d819542ec7
 3 | - 17d25c26485edcf94da5feafe6f1c8fc
 4 | train:
 5 | - 10c14b0cb76f87584da5feafe6f1c8fc
 6 | - 1548461b13adc0d0c59350d819542ec7
 7 | - 1fc8231114fa42a7c59350d819542ec7
 8 | - 19c79a42f68d7d444da5feafe6f1c8fc
 9 | - 1caaaa5c1da4dd2dc59350d819542ec7
10 | - 198cbe57b01bad9dc59350d819542ec7
11 | val: []
12 | 


--------------------------------------------------------------------------------
/datasets/decoder/split_files/Oven_revolute/object_ids.yaml:
--------------------------------------------------------------------------------
 1 | test:
 2 | - a46e0c10f17f928ba2bc8d1e386113dc
 3 | - 3ea1ace396f6ccae48407a54b1fbfda8
 4 | train:
 5 | - b296fbfbbe5dccf09c12d6260da9ac2b
 6 | - bae2babb26dc352b20489998d734835a
 7 | - eff23594cc0aed121b3e6b75a323070-0
 8 | - bb5533538179f6c39209092a6c03f1bd
 9 | - 8c2491e5245804d1ffc6e457221b9271
10 | - b8cf469bc1b42ab64a44340bf227e40
11 | - ef97ff5c1d6a00f2a760e402290727de
12 | val: []
13 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | data/*
 2 | wandb/
 3 | datasets/decoder/generated_data/
 4 | datasets/partnet-mobility-v0/
 5 | simnet/lib/datasets/*/
 6 | *.json
 7 | results/
 8 | *.ply
 9 | *.pyc
10 | vis/*
11 | datasets/runs/*
12 | *.egg-info
13 | .vscode/
14 | external_libs/
15 | */eval/reconstruction/*
16 | __pycache__/
17 | datasets/decoder/runs/*
18 | datasets/encoder/runs/*
19 | datasets/real/*
20 | datasets/synthetic/*
21 | *.tar.gz
22 | downloaded_archives/*
23 | 


--------------------------------------------------------------------------------
/datasets/decoder/split_files/WashingMachine_revolute/object_ids.yaml:
--------------------------------------------------------------------------------
 1 | test:
 2 | - 5528ee64-7656-40e4-8db0-70fd39427b4d
 3 | - 8b04de89-4f3f-45d8-8d7a-6bb5958e5340
 4 | train:
 5 | - 62e22f4d1846d8c1fdc6c1669e5c540
 6 | - 0d31000f-e876-4751-876d-efa6a61fa9b2
 7 | - 4163de2ce7f6f59aed1d8381d2c075c2-0
 8 | - d87cf480-ba57-43b1-b1f2-bae2b8fe2fa4
 9 | - u094c89ee-d9f6-4266-a9b3-c1f2549b1105
10 | - 04569f2f-3e07-4655-9337-bfa41a5ccbc0
11 | - ucfaedfea-c15a-495c-9037-21108eeeb006
12 | - 265d042dcfed6f15c357c21161963e89
13 | val: []
14 | 


--------------------------------------------------------------------------------
/datasets/decoder/split_files/Knife_prismatic/object_ids.yaml:
--------------------------------------------------------------------------------
 1 | test:
 2 | - 5d6201caa12611fe89f4664416242a41
 3 | - e7523e396f8d4ae171e397fe45dce6b
 4 | - 35233c0d786b5956d25d105fdf500c48
 5 | train:
 6 | - ba60dc6d-526e-4014-961a-5049df9079c6-0
 7 | - fed0863a69b3744c44f6844c4f2ce888-0
 8 | - fe95df61cc16452ccb3316c0fb4cfa01-0
 9 | - 3181976321565dfee9027543872faef
10 | - 59481570acb7a0872d4ba5e1aa44cc40-0
11 | - 12f3efd9-f013-4aab-922c-0328502acd3f
12 | - 32036cc5-6e63-47cf-96ba-89ef2be3950e-0
13 | - 9f264c87-89e3-4b06-8f36-b618ec54694c
14 | - 35c3d7b9-7dec-4e66-a962-14ea0fde4cad
15 | val: []
16 | 


--------------------------------------------------------------------------------
/datasets/decoder/split_files/Microwave_revolute/object_ids.yaml:
--------------------------------------------------------------------------------
 1 | test:
 2 | - bdb10a17b04e2adbb7fb7f3ae74b618c
 3 | - 4f956e259344d4a3599fb6902c958d23
 4 | - 891f65c773939191c834958aed613724
 5 | train:
 6 | - b9f1eeea355194c19941e769880462e7
 7 | - 87bae84777fe8b702bac1bcdfc2402d2
 8 | - 95bc6fb98624ea3229d75ea275a1cb4e
 9 | - df5bd51614d2fbdef114be17e2e7c4b5
10 | - c3bb5f3c842a6c2d178e7d331e641179
11 | - f9544effad178100be92f74d81ff60bf
12 | - dc5c91c8c01b1c8c506c648223cdabe9
13 | - c75ebd7c340649ba5ad304c2564ae1df
14 | - 6d83dea57df3c4a3500158c23c4c5a8e
15 | - 42aac49442bb9f8bb4e3935c6cee4b35
16 | val: []
17 | 


--------------------------------------------------------------------------------
/datasets/decoder/split_files/Refrigerator_revolute/object_ids.yaml:
--------------------------------------------------------------------------------
 1 | test:
 2 | - bc82358ed0ee28d41277c98ea0908b0
 3 | - 5b81d7830eabb7547c6e1fb05e1b9037
 4 | - ad6bd7e24e5bc25f3593835fe348a036
 5 | train:
 6 | - 1515a188cbc382fa84ad27a2f1142330
 7 | - 6fb955194baf07a750a5eaedf6275e1b
 8 | - 93d69af3c0034d3d9807c66948157e66
 9 | - 7028b24b7d64efaf3194539af1047dcf
10 | - 6601ef650f03e000c49931aa7ca8fecb
11 | - 9e53ec8bedae98859807c66948157e66
12 | - 4d8d0cb708324170c98c13d6112727de
13 | - 58c878d494ecbbd62835d3f06aeb6e0
14 | - 3158fd17e409d38a732208e596b26ebc
15 | - 827c9a85df258dd8faf0b97ff18d3546
16 | val: []
17 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | """Install script for setuptools."""
 2 | 
 3 | import setuptools
 4 | from os import path
 5 | 
 6 | # read the contents of your README file
 7 | this_directory = path.abspath(path.dirname(__file__))
 8 | with open(path.join(this_directory, "README.md"), encoding="utf-8") as f:
 9 |     long_description = f.read()
10 | 
11 | # TODO
12 | # Add requirements.txt parsing
13 | 
14 | setuptools.setup(
15 |     name="carto",
16 |     version="0.0.1",
17 |     author="Nick Heppert",
18 |     author_email="heppert@cs.uni-freiburg.de",
19 |     packages=setuptools.find_packages(),
20 |     python_requires=">=3.8",
21 | )
22 | 


--------------------------------------------------------------------------------
/CARTO/lib/rename_unpickler.py:
--------------------------------------------------------------------------------
 1 | import io
 2 | import pickle
 3 | 
 4 | 
 5 | class Unpickler(pickle.Unpickler):
 6 |     def find_class(self, module, name):
 7 |         renamed_module = module
 8 | 
 9 |         # Ensure old checkpoints can still be loaded
10 |         renamed_module = renamed_module.replace(
11 |             "simnet.shape_pretraining_articulated", "CARTO.Decoder"
12 |         )
13 |         renamed_module = renamed_module.replace("simnet.lib", "CARTO.simnet.lib")
14 |         return super(Unpickler, self).find_class(renamed_module, name)
15 | 
16 | 
17 | def renamed_load(file_obj):
18 |     return Unpickler(file_obj).load()
19 | 
20 | 
21 | def renamed_loads(pickled_bytes):
22 |     file_obj = io.BytesIO(pickled_bytes)
23 |     return renamed_load(file_obj)
24 | 


--------------------------------------------------------------------------------
/CARTO/simnet/lib/net/models/layers/soft_argmin.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2020 Toyota Research Institute.  All rights reserved.
 2 | 
 3 | import torch
 4 | import torch.nn as nn
 5 | import torch.nn.functional as F
 6 | 
 7 | 
 8 | @torch.jit.script
 9 | def soft_argmin(input):
10 |     _, channels, _, _ = input.shape
11 | 
12 |     softmin = F.softmin(input, dim=1)
13 |     index_tensor = torch.arange(
14 |         0, channels, dtype=softmin.dtype, device=softmin.device
15 |     ).view(1, channels, 1, 1)
16 |     output = torch.sum(softmin * index_tensor, dim=1, keepdim=True)
17 |     return output
18 | 
19 | 
20 | class SoftArgmin(nn.Module):
21 |     """Compute soft argmin operation for given cost volume"""
22 | 
23 |     def forward(self, input):
24 |         return soft_argmin(input)
25 | 


--------------------------------------------------------------------------------
/datasets/decoder/split_files/Knife_revolute/object_ids.yaml:
--------------------------------------------------------------------------------
 1 | test:
 2 | - a683ed081504a35e4a9a3a0b87d50a92
 3 | - b4f8a49abc400a775d6ddb389935ee57
 4 | - u51509034-d4f7-4ef7-b014-6660f4df034d-0
 5 | - 96a7c39f7eb90f65c90183d47cf3c337-1
 6 | train:
 7 | - 31f86223e3faaec3eae5cab1248d1ec6-0
 8 | - 31f86223e3faaec3eae5cab1248d1ec6-1
 9 | - 23fd9817d509fe472bf266a8f0187ce5-1
10 | - 737fd576f8eae54adfb1b24fd658f3b5-0
11 | - c7a96262d5dfc1ae72c447ef6e5cffc2
12 | - e9d3d9ef-57e3-4f0a-bbc7-e1cc75947ccd-7
13 | - 75bfa1045150e49fe177ccfa080b14b0-0
14 | - ceb3b39c9a035752b4fc059d1d10ec5d-0
15 | - 23fd9817d509fe472bf266a8f0187ce5-0
16 | - 581ad58ce8664d2d4ff0e6230d32c1e3
17 | - u2bba3644-e88e-4650-9124-e9964702f9ef-0
18 | - ud489e3ab-3fac-4753-8373-f5d4cebaeec5
19 | - fca703c2489237d51b44a9962207f944
20 | - 19dff8164764e2a259f37b6e82c5e93
21 | val: []
22 | 


--------------------------------------------------------------------------------
/CARTO/simnet/lib/net/post_processing/utils.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import numpy as np
 3 | 
 4 | 
 5 | def to_numpy_from_torch(torch_tensor: torch.Tensor, multiplier: float = 100.0):
 6 |     numpy_array = np.ascontiguousarray(torch_tensor.float().cpu().numpy())
 7 |     if numpy_array.ndim == 3:  # Not batched
 8 |         # print(f"not batched {numpy_array.shape = }")
 9 |         numpy_array = np.expand_dims(numpy_array, 0)  # Add one dimension
10 |     numpy_array = numpy_array.transpose((0, 2, 3, 1))
11 |     return numpy_array / multiplier
12 | 
13 | 
14 | def to_torch_from_numpy(numpy_array: np.ndarray, multiplier: float = 100.0):
15 |     numpy_array = numpy_array.transpose((2, 0, 1))
16 |     numpy_array = numpy_array * multiplier
17 |     torch_tensor = torch.from_numpy(np.ascontiguousarray(numpy_array)).float()
18 |     return torch_tensor
19 | 


--------------------------------------------------------------------------------
/CARTO/simnet/lib/net/init/default_init.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | 
 3 | 
 4 | def default_init(module):
 5 |     """Initialize parameters of the module.
 6 | 
 7 |     For convolution, weights are initialized by Kaiming method and
 8 |     biases are initialized to zero.
 9 |     For batch normalization, scales and biases are set to 1 and 0,
10 |     respectively.
11 |     """
12 |     if isinstance(module, nn.Conv2d):
13 |         nn.init.kaiming_normal_(module.weight.data)
14 |         if module.bias is not None:
15 |             module.bias.data.zero_()
16 |     elif isinstance(module, nn.Conv3d):
17 |         nn.init.kaiming_normal_(module.weight.data)
18 |         if module.bias is not None:
19 |             module.bias.data.zero_()
20 |     elif isinstance(module, nn.BatchNorm2d):
21 |         module.weight.data.fill_(1)
22 |         module.bias.data.zero_()
23 |     elif isinstance(module, nn.BatchNorm3d):
24 |         module.weight.data.fill_(1)
25 |         module.bias.data.zero_()
26 | 


--------------------------------------------------------------------------------
/datasets/decoder/split_files/Dishwasher_revolute/object_ids.yaml:
--------------------------------------------------------------------------------
 1 | test:
 2 | - b1080bd937b04a44575f4e5007488531
 3 | - 496dcf99-6e76-480c-8fab-a5579f16f2c7
 4 | - 9112f0ee6b1cdf5082ec48ff3a4fe07c
 5 | - a377f5af14ac6710a168e247bb97e471
 6 | - cc8161b35f7bef958c88d30f502a452
 7 | train:
 8 | - a2caaa68364f6207f054969eeb39ff86
 9 | - 187d79cd04b2bdfddf3a1b0d597ce76e
10 | - af913c310f1b978ae6488a574e8954a5
11 | - a62b6a19d2093bc91cbd656f2f1bc2ff
12 | - d95f6ea8-cda0-4d59-aa49-11309e3f0ce3
13 | - 6e51cc2c2da50c6a59c5c7ba83ec931a
14 | - a238b87f02c5de1edf3a1b0d597ce76e
15 | - aa4ad2f41efb815cb022c94235bc8601
16 | - 503b4dff71b404dabf195d81040cc60
17 | - c5f76c9a4137a3563862b05b9038dcc
18 | - 5d17e90f512a3dc7df3a1b0d597ce76e
19 | - c6090fb2806b2abfa5f4a1f264741b67
20 | - 55b0f47aea128c3b91d8be9599fbaa1f
21 | - 7d19e1db73ebfee26f893b5bc716a3fa
22 | - 4e9832bbbb077f9c5c5adfeaec1397f
23 | - 795af925dfc8897b035d20a1a3ca345
24 | - 93b7c0394cc309c8df3a1b0d597ce76e
25 | - 66725b8cad4355a03735baeeeb56a00
26 | val: []
27 | 


--------------------------------------------------------------------------------
/datasets/decoder/split_files/Stapler_revolute/object_ids.yaml:
--------------------------------------------------------------------------------
 1 | test:
 2 | - 376eb047b40ef4f6a480e3d8fdbd4a92
 3 | - 72a2bd9428f7179357fcd7a97096d25
 4 | - 20c5096ea98cc955920de219c00d1c3b
 5 | - d9378f9a4a7d6514602a101aa41a6f48
 6 | - ue12a29d7-6d30-4159-ac11-3c6a058ad354
 7 | train:
 8 | - 58a427e5201aa43be00ace8e7c1a5eeb
 9 | - 8f54f0bec8eb5d35d25169d37940fb64
10 | - f636f0aa2025ba3923c841f9d5051936
11 | - 3800d2ab6bc278bcd5a3e6010c55b78e
12 | - 453034dc-b04a-4415-8c43-16d6d23c47b2
13 | - 8c34afa29665356013b1d3e1528f0506-0
14 | - u26949e8f-8139-485b-99f9-694c026ed5a6
15 | - d01ff66659767d50cee19268a161fc4a
16 | - 88ac7b2b3050f1f861f7b52424be58ab
17 | - b3188e51216de8cce2e4961161b75547
18 | - 8d152be34b41785677937146265c551a
19 | - dc2cda7d-6fd5-48dd-8f7e-7524d7eb1c0a
20 | - 37b40b7e9290c0a330314ffb9bb887b5
21 | - c16cba81-714d-4b1a-94cd-7a148af83db0
22 | - 6a030b1836586b9f7e1c85c5c15da7fb
23 | - u9ea1219b-e360-4351-ae52-f589989c58e3-0
24 | - 98bc3afca001f433a1702a37604ec6f
25 | - f39912a4f0516fb897371d1e7cc637f3
26 | val: []
27 | 


--------------------------------------------------------------------------------
/datasets/decoder/split_files/Table_prismatic/object_ids.yaml:
--------------------------------------------------------------------------------
 1 | test:
 2 | - 21227197948fd2857c2f94a943a8669b
 3 | - e64f3681d7c76bb743638dabe1eb5336
 4 | - 31c090b23f52bb61208c8c852ee795bc
 5 | - 299ff1bf2618a4b584b039efed4b32d7
 6 | - 415d7746f792eb1de0445fc6d980dd5c
 7 | train:
 8 | - 949e39403ab4fab37ade8e3ca8db8db3
 9 | - 29f110b8740bd8068c427edcde5d5e2b
10 | - 74b8222078ba776c661673811de66400
11 | - c9857deb88989a67b5851007eadc6f74
12 | - a19e6780182c72cf9bf8bea04806ba15
13 | - 4aab0e569f1dc3bc8d7e9f13fd8f661d
14 | - 48045af90c7959e5738e43095496b061
15 | - 70d0937e1d38a9c2a45b742ddc5add59
16 | - a95828fa4607295674c8eb7e4d6198a5
17 | - 9e42bbdbfe36680391e4d6c585a697a
18 | - a516711827a396085528d560ddea455
19 | - 20edff7e1500fc4ed45f502ecff9e44f
20 | - 7b5b7bfa8580e913e2580b23e60e4674
21 | - 712d2c844d61aa9cefead98a255f706f
22 | - 28001cb70c38f19cf32b6091d9628440
23 | - 78c4b505894342269299936b751bd77b
24 | - 4dc3e9e293450817d3dad974dc098fa1
25 | - 2dc57230d14506eacd6ce29440b718cf
26 | - 9dd80e356880c9deaf268f6180933aa3
27 | val: []
28 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | # git+https://github.com/kevinleestone/pyrender.git@bf4184ea1079ef328ce0f2a55e52f17520927067
 2 | numpy==1.22.3
 3 | tqdm
 4 | trimesh
 5 | zstandard
 6 | tyro
 7 | shortuuid
 8 | # opencv-python==4.5.5.64
 9 | # opencv-python==4.4.0.46
10 | opencv-python==4.3.0.36
11 | wandb
12 | matplotlib
13 | plyfile
14 | opentsne
15 | seaborn
16 | open3d
17 | roma
18 | pytorch_lightning==1.6.3
19 | QtPy==2.1.0
20 | labelcloud # for running the labeling processs
21 | git+https://github.com/heppert-tri/mesh_to_sdf.git
22 | git+https://github.com/heppert-tri/urdfpy.git
23 | git+https://github.com/facebookresearch/pytorch3d.git@stable
24 | 
25 | # ipython
26 | # boto3
27 | # colour-demosaicing
28 | # scikit-image
29 | # lxml
30 | # # pytorch_lightning==1.4.0
31 | # setuptools==59.5.0
32 | # scikit-learn
33 | # fvcore
34 | # blake3
35 | # base58
36 | # rich
37 | # coloredlogs
38 | # rtree
39 | # py3ode==1.2.0.dev15
40 | # transformers
41 | # sentence-transformers
42 | # yapf
43 | # h5py
44 | # jupyter
45 | # plotly
46 | # zstd
47 | # torchviz
48 | 


--------------------------------------------------------------------------------
/datasets/decoder/split_files/Laptop_revolute/object_ids.yaml:
--------------------------------------------------------------------------------
 1 | test:
 2 | - 125c93cbc6544bd1f9f50a550b8c1cce
 3 | - 4fc3d56243d2d8801ef1ccfaf50f2048
 4 | - 8d70fb6adc63e21eb7e0383b9609fa5
 5 | - cc691d9e8e189ce47a381a112bfd785
 6 | - 3b2db36aaa2546b99c7c402f274622c
 7 | train:
 8 | - 66e3b7c7f2e8e9297fd8853234f5e918
 9 | - 4bacb1694e86005afb6e846333373df8
10 | - a4b410734514306ac401e233323032d6
11 | - afa49e97861c45e5e738f481f8560d58
12 | - f7c26b8c94ba8214397c35f585745a82
13 | - 7df09674bc991904c78df40cf2e9097a
14 | - cbcb79f534518dfbcfe78be5b7b99c8d
15 | - 5d544ee4b094c6606436916a86a90ed7
16 | - 6b78948484df58cdc664c3d4e2d59341
17 | - 241ec8a746dd1cfc78f71a335ebabfa5
18 | - 6489453e322cdb53f9f3c6290096f50f
19 | - aa92ecd31491bca87a88a2ad67bfd073
20 | - 5678a2173ff575d09cebe817bc1591b3
21 | - 1b67b4bfed6688ba5b22feddf58c05e1
22 | - 850673bcbce8f73ec8a6d87a62ac0341
23 | - f53ea19f871a80d420685b5a7e34b501
24 | - 1f507b26c31ae69be42930af58a36dce
25 | - 29f5cfcef7272f1f640578ae55230ebc
26 | - 97e94d800fd6dc07dbaa6d42a4980930
27 | - b5f6fd84a3f44ddb1aa47689117a61e1
28 | val: []
29 | 


--------------------------------------------------------------------------------
/CARTO/simnet/lib/net/functions/learning_rate.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2018 Toyota Research Institute.  All rights reserved.
 2 | #
 3 | # Originally from Koichiro Yamaguchi's pixwislab repo mirrored at:
 4 | # https://github.awsinternal.tri.global/driving/pixwislab
 5 | 
 6 | 
 7 | def lambda_learning_rate_poly(max_epochs, exponent):
 8 |     """Make a function for computing learning rate by "poly" policy.
 9 | 
10 |     This policy does a polynomial decay of the learning rate over the epochs
11 |     of training.
12 | 
13 |     Args:
14 |         max_epochs (int): max numbers of epochs
15 |         exponent (float): exponent value
16 |     """
17 |     return lambda epoch: pow((1.0 - epoch / max_epochs), exponent)
18 | 
19 | 
20 | def lambda_warmup(warmup_period, warmup_factor, wrapped_lambda):
21 |     def warmup(epoch, warmup_period, warmup_factor):
22 |         if epoch > warmup_period:
23 |             return 1.0
24 |         else:
25 |             return warmup_factor + (1.0 - warmup_factor) * (epoch / warmup_period)
26 | 
27 |     return lambda epoch: warmup(epoch, warmup_period, warmup_factor) * wrapped_lambda(
28 |         epoch
29 |     )
30 | 


--------------------------------------------------------------------------------
/CARTO/simnet/lib/net/data_module.py:
--------------------------------------------------------------------------------
 1 | import pytorch_lightning as pl
 2 | 
 3 | from CARTO.simnet.lib.net import common
 4 | 
 5 | 
 6 | class DataModule(pl.LightningDataModule):
 7 |     def __init__(self, hparams, train_dataset=None, preprocess_func=None):
 8 |         super().__init__()
 9 | 
10 |         # Using the same hyperparmeter saving method as the model module
11 |         # doesn't work, so just assign to some other variable for now.
12 |         self.params = hparams
13 |         self.train_dataset = train_dataset
14 |         self.preprocess_func = preprocess_func
15 | 
16 |     def train_dataloader(self):
17 |         return common.get_loader(
18 |             self.params,
19 |             "train",
20 |             preprocess_func=self.preprocess_func,
21 |             datapoint_dataset=self.train_dataset,
22 |         )
23 | 
24 |     def val_dataloader(self):
25 |         return common.get_loader(
26 |             self.params, "val", preprocess_func=self.preprocess_func
27 |         )
28 | 
29 |     def test_dataloader(self):
30 |         return common.get_loader(
31 |             self.params, "test", preprocess_func=self.preprocess_func
32 |         )
33 | 


--------------------------------------------------------------------------------
/CARTO/simnet/lib/net/models/layers/matchability.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2020 Toyota Research Institute.  All rights reserved.
 2 | 
 3 | import torch
 4 | import torch.nn as nn
 5 | import torch.nn.functional as F
 6 | 
 7 | #
 8 | 
 9 | 
10 | @torch.jit.script
11 | def matchability(input):
12 |     softmin = F.softmin(input, dim=1)
13 |     log_softmin = F.log_softmax(-input, dim=1)
14 |     output = torch.sum(softmin * log_softmin, dim=1, keepdim=True)
15 |     return output
16 | 
17 | 
18 | class Matchability(nn.Module):
19 |     """Compute disparity matchability value from https://arxiv.org/abs/2008.04800"""
20 | 
21 |     def forward(self, input):
22 |         if torch.jit.is_scripting():
23 |             # Torchscript generation can't handle mixed precision, so always compute at float32.
24 |             return matchability(input)
25 |         else:
26 |             return self.forward_with_amp(input)
27 | 
28 |     @torch.jit.unused
29 |     def forward_with_amp(self, input):
30 |         """This operation is unstable at float16, so compute at float32 even when using mixed precision"""
31 |         with torch.cuda.amp.autocast(enabled=False):
32 |             input = input.to(torch.float32)
33 |             return matchability(input)
34 | 


--------------------------------------------------------------------------------
/CARTO/Decoder/models/lipschitz_norm.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch.nn.functional import softplus
 3 | 
 4 | 
 5 | class LipschitzNorm(torch.nn.Module):
 6 |     name: str
 7 |     dim: int
 8 | 
 9 |     def __init__(self, name, dim: int, weight) -> None:
10 |         super().__init__()
11 |         self.name = name
12 |         self.dim = dim
13 |         self.register_parameter(
14 |             "lipschitz_constant",
15 |             torch.nn.Parameter(torch.max(torch.sum(torch.abs(weight), dim))),
16 |         )
17 | 
18 |     def compute_weight(self, module):
19 |         W = getattr(module, self.name)
20 |         absrowsum = torch.sum(torch.abs(W), dim=self.dim)
21 |         softplus_c = softplus(self.lipschitz_constant)
22 |         scale = torch.minimum(torch.Tensor([1.0]).to(W.device), softplus_c / absrowsum)
23 |         return torch.nn.Parameter(W * scale[:, None])
24 | 
25 |     @staticmethod
26 |     def apply(module, name: str, dim: int = -1) -> "LipschitzNorm":
27 |         weight = getattr(module, name)
28 |         fn = LipschitzNorm(name, dim, weight)
29 |         setattr(module, name, fn.compute_weight(module))
30 |         module.register_forward_pre_hook(fn)
31 |         return fn
32 | 
33 |     def __call__(self, module, inputs):
34 |         setattr(module, self.name, self.compute_weight(module))
35 | 
36 | 
37 | def lipschitz_norm(module, name: str = "weight", dim: int = 1):
38 |     lipschitz_norm_instance = LipschitzNorm.apply(module, name, dim)
39 |     return module, lipschitz_norm_instance
40 | 


--------------------------------------------------------------------------------
/CARTO/simnet/lib/net/models/layers/transition_blocks.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2018-2020 Toyota Research Institute.  All rights reserved.
 2 | #
 3 | # Originally from Koichiro Yamaguchi's pixwislab repo mirrored at:
 4 | # https://github.awsinternal.tri.global/driving/pixwislab
 5 | 
 6 | import torch.nn as nn
 7 | 
 8 | 
 9 | class TransitionBlock(nn.Module):
10 |     """Transition block for changing resolution or the number of channels."""
11 | 
12 |     def __init__(self, in_channels, out_channels, stride):
13 |         """
14 |         Args:
15 |             in_channels (int): The number of input channels.
16 |             out_channels (int): The number of output channels.
17 |             stride (int): Stride (1 or 2).
18 |         """
19 |         assert stride in (1, 2)
20 |         assert not (in_channels == out_channels and stride == 1)
21 |         super().__init__()
22 | 
23 |         if stride == 1:
24 |             self.conv = nn.Conv2d(in_channels, out_channels, kernel_size=1, bias=False)
25 |         else:
26 |             self.conv = nn.Conv2d(
27 |                 in_channels,
28 |                 out_channels,
29 |                 kernel_size=3,
30 |                 stride=2,
31 |                 padding=1,
32 |                 bias=False,
33 |             )
34 |         self.bn = nn.BatchNorm2d(out_channels)
35 |         self.relu = nn.ReLU(inplace=True)
36 | 
37 |     def forward(self, inputs):
38 |         """Forward computation.
39 | 
40 |         Args:
41 |             inputs (Tensor): Input tensor.
42 | 
43 |         Returns:
44 |             Output tensor.
45 |         """
46 |         return self.relu(self.bn(self.conv(inputs)))
47 | 


--------------------------------------------------------------------------------
/CARTO/lib/compression.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import zstandard
 3 | import collections
 4 | import io
 5 | import tarfile
 6 | 
 7 | 
 8 | def write_compressed_json(x, path):
 9 |     cctx = zstandard.ZstdCompressor()
10 |     with open(path, "wb") as raw_fh:
11 |         with cctx.stream_writer(raw_fh) as zst_fh:
12 |             zst_fh.write(json.dumps(x, sort_keys=True, indent=2).encode())
13 | 
14 | 
15 | def read_compressed_json(path):
16 |     cctx = zstandard.ZstdDecompressor()
17 |     with open(path, "rb") as raw_fh:
18 |         with cctx.stream_reader(raw_fh) as zst_fh:
19 |             bytes_ = zst_fh.read()
20 |             str_ = bytes_.decode()
21 |             x = json.loads(str_, object_pairs_hook=collections.OrderedDict)
22 |             return x
23 | 
24 | 
25 | def extract_compressed_tarfile(tarfile_path, dst_dir):
26 |     cctx = zstandard.ZstdDecompressor()
27 |     with open(tarfile_path, "rb") as raw_fh:
28 |         with cctx.stream_reader(raw_fh) as zst_fh:
29 |             tarfile_buf = zst_fh.read()
30 | 
31 |     with io.BytesIO(tarfile_buf) as raw_fh:
32 |         with tarfile.TarFile(fileobj=raw_fh) as tar:
33 |             members = tar.getmembers()
34 |             for member in members:
35 |                 if not member.isfile():
36 |                     continue
37 |                 data = tar.extractfile(member).read()
38 |                 assert member.name[0] != "/"
39 |                 member_path = dst_dir / member.path
40 |                 parent_dir = member_path.parent
41 |                 parent_dir.mkdir(parents=True, exist_ok=True)
42 |                 with open(member_path, "wb") as f:
43 |                     f.write(data)
44 | 


--------------------------------------------------------------------------------
/CARTO/simnet/lib/net/models/layers/hdc_functions.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2018-2020 Toyota Research Institute.  All rights reserved.
 2 | #
 3 | # Originally from Koichiro Yamaguchi's pixwislab repo mirrored at:
 4 | # https://github.awsinternal.tri.global/driving/pixwislab
 5 | 
 6 | import torch.nn as nn
 7 | 
 8 | 
 9 | def hdc_resnet_group(
10 |     block_func, in_channels, base_channels, num_blocks, dilation_rates
11 | ):
12 |     """Make a group of pre-activation residual blocks with Hybrid Dilated
13 |     Convolution (HDC).
14 | 
15 |     "Understanding Convolution for Semantic Segmentation",
16 |     https://arxiv.org/abs/1702.08502.
17 | 
18 |     Args:
19 |         block_func (ResidualBlock): Function of a residual block.
20 |         in_channels (int): The number of input channels.
21 |         base_channels (int): The number of base channels of the residual block.
22 |         num_blocks (int): The number of residual blocks.
23 |         dilation_rates (list): List of dilation rates.
24 | 
25 |     Returns:
26 |         Module of a group of residual blocks.
27 |     """
28 |     assert block_func.preact()
29 | 
30 |     num_rates = len(dilation_rates)
31 |     residual_blocks = [
32 |         block_func(
33 |             in_channels,
34 |             base_channels,
35 |             dilation_rate=dilation_rates[0],
36 |             add_preact=False,
37 |         )
38 |     ]
39 |     in_channels = block_func.expansion() * base_channels
40 |     for idx in range(1, num_blocks):
41 |         residual_blocks.append(
42 |             block_func(
43 |                 in_channels,
44 |                 base_channels,
45 |                 dilation_rate=dilation_rates[idx % num_rates],
46 |                 add_preact=True,
47 |                 add_last_norm=idx == num_blocks - 1,
48 |             )
49 |         )
50 |     return nn.Sequential(*residual_blocks)
51 | 


--------------------------------------------------------------------------------
/CARTO/simnet/lib/net/pre_processing/grasp_inputs.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from scipy.stats import multivariate_normal
 3 | 
 4 | from CARTO.simnet.lib.net.pre_processing import pose_inputs
 5 | from CARTO.simnet.lib.non_convex_grasper import _NUM_GRASPS_PER_OBJECT
 6 | from CARTO.simnet.lib import datapoint
 7 | 
 8 | _HEATMAP_THRESHOLD = 0.3
 9 | _DOWNSCALE_VALUE = 8
10 | _PEAK_CONCENTRATION = 0.8
11 | 
12 | 
13 | def compute_network_targets(grasps, masks, height, width):
14 |     assert len(grasps) == len(masks)
15 |     if len(grasps) == 0:
16 |         height_d = int(height / _DOWNSCALE_VALUE)
17 |         width_d = int(width / _DOWNSCALE_VALUE)
18 |         return datapoint.Grasps(
19 |             heat_map=np.zeros([height, width]),
20 |             grasp_success_target=np.zeros([height_d, width_d, _NUM_GRASPS_PER_OBJECT]),
21 |         )
22 |     heatmaps = pose_inputs.compute_heatmaps_from_masks(masks)
23 |     grasp_success_target = compute_grasp_target(grasps, heatmaps)
24 |     return datapoint.Grasps(
25 |         heat_map=np.max(heatmaps, axis=0),
26 |         grasp_success_target=grasp_success_target,
27 |     )
28 | 
29 | 
30 | def compute_grasp_target(grasps_per_objects, heat_maps, threshold=0.3):
31 |     grasp_target = np.zeros(
32 |         [
33 |             len(grasps_per_objects),
34 |             heat_maps[0].shape[0],
35 |             heat_maps[0].shape[1],
36 |             _NUM_GRASPS_PER_OBJECT,
37 |         ]
38 |     )
39 |     heatmap_indices = np.argmax(np.array(heat_maps), axis=0)
40 |     for grasps_per_object, heat_map, ii in zip(
41 |         grasps_per_objects, heat_maps, range(len(heat_maps))
42 |     ):
43 |         grasp_values = np.zeros(_NUM_GRASPS_PER_OBJECT)
44 |         mask = heatmap_indices == ii
45 |         for jj, grasp in enumerate(grasps_per_object):
46 |             grasp_values[jj] = grasp.success
47 |         grasp_target[ii, mask] = grasp_values
48 |     return np.sum(grasp_target, axis=0)[::_DOWNSCALE_VALUE, ::_DOWNSCALE_VALUE]
49 | 


--------------------------------------------------------------------------------
/CARTO/simnet/lib/net/pre_processing/keypoint_inputs.py:
--------------------------------------------------------------------------------
 1 | import cv2
 2 | import numpy as np
 3 | from scipy.stats import multivariate_normal
 4 | 
 5 | from CARTO.simnet.lib import datapoint
 6 | 
 7 | _KEYPOINT_VAR = 20
 8 | 
 9 | 
10 | def compute_network_targets(keypoints, height, width):
11 |     coords = np.indices((height, width))
12 |     coords = coords.reshape([2, -1]).T
13 |     all_targets = []
14 |     # for each type of keypoint
15 |     for keypoint_group in keypoints:
16 |         # for each keypoint in each keypoint group
17 |         individual_heat_maps = []
18 |         for keypoint in keypoint_group:
19 |             # for each instance of the keypoint in the image
20 |             for px in keypoint.pixels:
21 |                 # place a Gaussian target distribution at the pixel location
22 |                 cur_heat_map = np.zeros([height, width])
23 |                 cov = np.eye(2) * _KEYPOINT_VAR
24 |                 multi_var = multivariate_normal(mean=px[::-1], cov=cov)
25 |                 density = multi_var.pdf(coords)
26 |                 cur_heat_map[coords[:, 0], coords[:, 1]] = density
27 |                 individual_heat_maps.append(cur_heat_map)
28 |         # take a max over all pixels for this keypoint group
29 |         if len(individual_heat_maps):
30 |             target = np.stack(individual_heat_maps).max(0)
31 |             target /= target.max()
32 |         else:
33 |             target = np.zeros([height, width])
34 |         all_targets.append(datapoint.Keypoint(heat_map=target))
35 |     return all_targets
36 | 
37 | 
38 | def vis_network_targets(keypoints, height, width, left_img):
39 |     target_images = []
40 |     all_targets = compute_network_targets(keypoints, height, width)
41 |     for target in all_targets:
42 |         heat_map = target.heat_map
43 |         img = cv2.cvtColor(left_img, cv2.COLOR_BGR2GRAY)
44 |         img = cv2.addWeighted(heat_map, 0.999, img.astype(float), 0.00005, 0)
45 |         img /= img.max() / 255
46 |         target_images.append(img.astype(np.uint8))
47 |     return target_images
48 | 


--------------------------------------------------------------------------------
/CARTO/simnet/lib/net/losses.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2019 Toyota Research Institute.  All rights reserved.
 2 | 
 3 | import torch
 4 | import torch.nn as nn
 5 | import numpy as np
 6 | import IPython
 7 | 
 8 | 
 9 | class MaskedL1Loss(nn.Module):
10 |     def __init__(self, centroid_threshold=0.3, downscale_factor=8):
11 |         super().__init__()
12 |         self.loss = nn.L1Loss(reduction="none")
13 |         self.centroid_threshold = centroid_threshold
14 |         self.downscale_factor = downscale_factor
15 | 
16 |     def forward(self, output, target, valid_mask):
17 |         """
18 |         output: [N,16,H,W]
19 |         target: [N,16,H,W]
20 |         valid_mask: [N,H,W]
21 |         """
22 |         valid_count = torch.sum(
23 |             valid_mask[:, :: self.downscale_factor, :: self.downscale_factor]
24 |             > self.centroid_threshold
25 |         )
26 |         loss = self.loss(output, target)
27 |         if len(output.shape) == 4:
28 |             loss = torch.sum(loss, dim=1)
29 |         loss[
30 |             valid_mask[:, :: self.downscale_factor, :: self.downscale_factor]
31 |             < self.centroid_threshold
32 |         ] = 0.0
33 |         if valid_count == 0:
34 |             return torch.sum(loss)
35 |         return torch.sum(loss) / valid_count
36 | 
37 | 
38 | class MSELoss(nn.Module):
39 |     def __init__(self):
40 |         super().__init__()
41 |         self.loss = nn.MSELoss(reduction="none")
42 | 
43 |     def forward(self, output, target):
44 |         """
45 |         output: [N,H,W]
46 |         target: [N,H,W]
47 |         ignore_mask: [N,H,W]
48 |         """
49 |         loss = self.loss(output, target)
50 |         return torch.mean(loss)
51 | 
52 | 
53 | class MaskedMSELoss(nn.Module):
54 |     def __init__(self):
55 |         super().__init__()
56 |         self.loss = nn.MSELoss(reduction="none")
57 | 
58 |     def forward(self, output, target, ignore_mask):
59 |         """
60 |         output: [N,H,W]
61 |         target: [N,H,W]
62 |         ignore_mask: [N,H,W]
63 |         """
64 |         valid_sum = torch.sum(torch.logical_not(ignore_mask))
65 |         loss = self.loss(output, target)
66 |         loss[ignore_mask > 0] = 0.0
67 |         return torch.sum(loss) / valid_sum
68 | 


--------------------------------------------------------------------------------
/download_archives.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Input Parameters
 4 | ARCHIVE_NAME=$1
 5 | if [ $ARCHIVE_NAME != "real" ] && [ $ARCHIVE_NAME != "synthetic" ] && [ $ARCHIVE_NAME != "A-SDF" ]
 6 | then
 7 |     echo "Unknown archive name ${ARCHIVE_NAME}. Use ./download_archives.sh [real|synthetic|A-SDF]"
 8 |     exit 0
 9 | fi
10 |     
11 | 
12 | # Create directory
13 | BASE_DIR="downloaded_archives"
14 | DIR="${BASE_DIR}/${ARCHIVE_NAME}_parts/"
15 | mkdir -p ${DIR}
16 | echo "Created ${DIR} for saving"
17 | 
18 | PARTS=()
19 | if [ $ARCHIVE_NAME == "real" ]
20 | then
21 |     for x in {a..r}
22 |     do
23 |         PARTS+=("a${x}")
24 |     done
25 | elif [ $ARCHIVE_NAME == "synthetic" ]
26 | then
27 |     for x in {a..c}
28 |     do
29 |         for y in {a..z}
30 |         do
31 |             PARTS+=("${x}${y}")
32 |         done
33 |     done
34 |     for x in {a..y}
35 |     do
36 |         PARTS+=(".d${x}")
37 |     done
38 | elif [ $ARCHIVE_NAME == "A-SDF" ]
39 | then
40 |     for x in {a..q}
41 |     do
42 |         PARTS+=("a${x}")
43 |     done  
44 | fi
45 | 
46 | EVERYTHING_OK=1
47 | # Download file
48 | for PART in "${PARTS[@]}"
49 | do
50 |     echo "${DIR}"
51 |     FILE_NAME="${ARCHIVE_NAME}.part.${PART}"
52 |     URL="http://carto.cs.uni-freiburg.de/datasets/${ARCHIVE_NAME}_parts/${FILE_NAME}"
53 |     #Check if file exists on the server
54 |     if curl --output /dev/null --silent --head --fail "$URL"
55 |     then
56 |         echo "URL exists on server: $URL"
57 |         # Download file
58 |         if test -f "${DIR}/${FILE_NAME}"
59 |         then
60 |             echo "Skipping as file already exists locallly"
61 |         else
62 |             if wget -P ${DIR} ${URL}
63 |             then
64 |                 echo "Successfully downloaded $URL"
65 |             else
66 |                 echo "Error downloading $URL"
67 |                 EVERYTHING_OK = 0
68 |             fi
69 |         fi
70 |     else
71 |         echo "URL does not exist: $URL"
72 |         EVERYTHING_OK = 0
73 |     fi
74 | done
75 | 
76 | # Unzip file
77 | if [ $EVERYTHING_OK -eq 0 ]
78 | then
79 |     echo "Error downloading ${ARCHIVE_NAME} data"
80 |     exit 1
81 | fi
82 | 
83 | cat $DIR/* > "${BASE_DIR}/${ARCHIVE_NAME}.tar.gz"
84 | echo "Successfully downloaded ${ARCHIVE_NAME}"
85 | 


--------------------------------------------------------------------------------
/CARTO/Decoder/multi_poly.py:
--------------------------------------------------------------------------------
 1 | from typing import List
 2 | import numpy as np
 3 | import matplotlib.pyplot as plt
 4 | 
 5 | 
 6 | class MultiPoly:
 7 |     def __init__(self, x: np.ndarray, Y: np.ndarray, poly_dim: int = 1):
 8 |         assert x.ndim == 1
 9 |         assert Y.ndim == 2
10 |         assert x.shape[0] == Y.shape[0]
11 |         self.poly_fits: List[np.polynomial.Polynomial] = []
12 |         dim_amount = Y.shape[1]
13 | 
14 |         for lat_dim in range(dim_amount):
15 |             poly = np.polynomial.Polynomial.fit(x, Y[:, lat_dim], poly_dim)
16 |             self.poly_fits.append(poly)
17 | 
18 |         self.domain = np.array([np.min(x), np.max(x)])
19 | 
20 |     def get_vals(self, X: np.ndarray):
21 |         return self.__call__(X)
22 | 
23 |     def get_domain_mean(self):
24 |         x = np.mean(self.domain)
25 |         return self.get_vals(x)
26 | 
27 |     def linspace(self, n: int = 50, domain=None):
28 |         X = np.linspace(*(domain if domain else self.domain), num=n)
29 |         return self(X)
30 | 
31 |     def __call__(self, X: np.ndarray):
32 |         Ys = []
33 |         for poly in self.poly_fits:
34 |             Ys.append(poly(X))
35 |         return np.stack(Ys, axis=0).T
36 | 
37 |     def get_plot(
38 |         self,
39 |         x: np.ndarray,
40 |         Y: np.ndarray,
41 |         domain=None,
42 |         n_samples: int = 50,
43 |         types: List[str] = [],
44 |         markers=["v", "P", "d"],
45 |     ):
46 |         plt_dim = int(np.ceil(np.sqrt(len(self.poly_fits))))
47 |         fig, axes = plt.subplots(
48 |             plt_dim, plt_dim, figsize=(7, 7), sharex=True, sharey=True
49 |         )
50 |         for i, poly in enumerate(self.poly_fits):
51 |             xx, yy = poly.linspace(n_samples, domain=domain if domain else self.domain)
52 |             ax = axes[i // plt_dim][i % plt_dim]
53 |             for type, marker in zip(set(types), markers):
54 |                 mask = np.array(types) == type
55 |                 ax.scatter(
56 |                     x[mask],
57 |                     Y[mask, i],
58 |                     label=type,
59 |                     marker=marker,
60 |                     c=x[mask],
61 |                     cmap="jet",
62 |                 )
63 |             ax.plot(xx, yy, color="orange")
64 |         return fig, axes
65 | 


--------------------------------------------------------------------------------
/CARTO/simnet/lib/net/post_processing/surface_outputs.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import cv2
 3 | import IPython
 4 | import torch
 5 | import torch.nn as nn
 6 | 
 7 | from CARTO.simnet.lib import datapoint
 8 | from torch.nn import functional as F
 9 | from CARTO.simnet.lib.net import losses
10 | 
11 | _masked_l1_loss = losses.MaskedL1Loss()
12 | _MAX_DISP = 128
13 | 
14 | 
15 | class SurfaceOutput:
16 |     def __init__(self, surface_pred, hparams):
17 |         self.surface_pred = surface_pred
18 |         self.is_numpy = False
19 |         self.loss = nn.SmoothL1Loss(reduction="none")
20 |         self.hparams = hparams
21 | 
22 |     # Converters for torch to numpy
23 |     def convert_to_numpy_from_torch(self):
24 |         self.surface_pred = np.ascontiguousarray(self.surface_pred.cpu().numpy())
25 |         self.surface_pred.transpose((1, 2, 0))
26 |         self.is_numpy = True
27 | 
28 |     def convert_to_torch_from_numpy(self):
29 |         self.surface_pred.transpose((2, 0, 1))
30 |         self.surface_pred = torch.from_numpy(
31 |             np.ascontiguousarray(self.surface_pred)
32 |         ).float()
33 |         self.is_numpy = False
34 | 
35 |     def get_visualization_img(self, left_img_np):
36 |         if not self.is_numpy:
37 |             self.convert_to_numpy_from_torch()
38 | 
39 |         surface = self.surface_pred[0]
40 |         downscale_factor = int(left_img_np.shape[0] / disp.shape[0])
41 |         left_img = left_img_np[::downscale_factor, ::downscale_factor]
42 |         viz_img = np.zeros([left_img.shape[0] * 2, left_img.shape[1], 3])
43 |         viz_img[0 : left_img.shape[0], :, :] = left_img
44 |         viz_img[left_img.shape[0] : left_img.shape[0] + disp.shape[0], :, :] = surface
45 |         return viz_img
46 | 
47 |     def compute_loss(self, surface_targets, log):
48 |         if self.is_numpy:
49 |             raise ValueError("Output is not in torch mode")
50 |         surface_target_stacked = []
51 |         for surface_target in surface_targets:
52 |             surface_target_stacked.append(surface_target.surface_pred)
53 |         surface_target_batch = torch.stack(surface_target_stacked)
54 |         surface_target_batch = surface_target_batch.to(torch.device("cuda:0"))
55 |         mask = torch.sum(surface_target_batch, axis=1) > 0
56 |         surface_loss = self.loss(surface_target_batch, self.surface_pred, mask)
57 |         log["surface"] = surface_loss
58 |         return self.hparams.loss_surface_mult * surface_loss
59 | 


--------------------------------------------------------------------------------
/CARTO/Decoder/visualizing/offscreen.py:
--------------------------------------------------------------------------------
 1 | from typing import List, Optional
 2 | import pyrender
 3 | import torch
 4 | import numpy as np
 5 | 
 6 | 
 7 | def look_at(
 8 |     center: np.ndarray, target: np.ndarray, up: np.ndarray = np.array([0.0, 1.0, 0.0])
 9 | ):
10 |     """
11 |     params:
12 |       center: Camera position
13 |       target: Target to look at
14 |       up: up axis of camera
15 |     """
16 | 
17 |     f = center - target
18 |     f /= np.linalg.norm(f)
19 |     up /= np.linalg.norm(up)
20 |     r = np.cross(up, f)
21 |     u = np.cross(f, r)
22 | 
23 |     m = np.zeros((4, 4))
24 |     m[0:3, 0] = r
25 |     m[0:3, 1] = u
26 |     m[0:3, 2] = f
27 |     m[0:3, 3] = center
28 |     m[3, 3] = 1.0
29 |     return m
30 | 
31 | 
32 | def get_default_scene():
33 |     scene = pyrender.Scene()
34 |     cam = pyrender.PerspectiveCamera(yfov=(np.pi / 3.0))
35 |     cam_pose = look_at(
36 |         np.array([-1.0, -1.0, 1.0]),
37 |         np.array([0.0, 0.0, 0.0]),
38 |         up=np.array([0.0, 0.0, 1.0]),
39 |     )
40 |     scene.add(cam, pose=cam_pose)
41 | 
42 |     light = pyrender.SpotLight(
43 |         color=np.ones(3),
44 |         intensity=3.0,
45 |         innerConeAngle=np.pi / 16.0,
46 |         outerConeAngle=np.pi / 6.0,
47 |     )
48 |     scene.add(light, pose=cam_pose)
49 |     return scene
50 | 
51 | 
52 | def get_point_cloud(
53 |     points: np.ndarray,
54 |     sdf: np.ndarray,
55 |     color: np.ndarray = np.array([0.0, 0.0, 0.0]),
56 |     threshold: float = 0e-3,
57 | ) -> pyrender.Mesh:
58 |     if isinstance(points, torch.Tensor):
59 |         points = points.cpu().numpy()
60 |     if isinstance(sdf, torch.Tensor):
61 |         sdf = sdf.cpu().numpy()
62 |     if sdf.ndim == 2:
63 |         sdf = sdf[:, 0]
64 | 
65 |     if not np.count_nonzero(sdf <= threshold):
66 |         threshold = sdf.min() + 1e-5
67 | 
68 |     points = points[sdf <= threshold]
69 |     # colors = np.ones(points.shape) * color
70 |     colors = np.abs(points) / 2.0
71 |     cloud = pyrender.Mesh.from_points(points, colors=colors)
72 |     return cloud
73 | 
74 | 
75 | def render_offscreen(
76 |     scene: Optional[pyrender.Scene] = None, meshes: Optional[List[pyrender.Mesh]] = []
77 | ):
78 |     if not scene:
79 |         scene = get_default_scene()
80 |     for mesh in meshes:
81 |         scene.add(mesh)
82 |     r = pyrender.OffscreenRenderer(viewport_width=640, viewport_height=480)
83 |     color, depth = r.render(scene)
84 |     r.delete()
85 |     return color
86 | 


--------------------------------------------------------------------------------
/CARTO/simnet/lib/net/models/layers/stochastic_depth.py:
--------------------------------------------------------------------------------
 1 | # BACKPORT FROM TORCHVISION 0.11
 2 | from torch import Tensor
 3 | import torch
 4 | import torch.nn as nn
 5 | 
 6 | 
 7 | def stochastic_depth(
 8 |     input: Tensor, p: float, mode: str, training: bool = True
 9 | ) -> Tensor:
10 |     """
11 |     Implements the Stochastic Depth from `"Deep Networks with Stochastic Depth"
12 |     <https://arxiv.org/abs/1603.09382>`_ used for randomly dropping residual
13 |     branches of residual architectures.
14 | 
15 |     Args:
16 |         input (Tensor[N, ...]): The input tensor or arbitrary dimensions with the first one
17 |                     being its batch i.e. a batch with ``N`` rows.
18 |         p (float): probability of the input to be zeroed.
19 |         mode (str): ``"batch"`` or ``"row"``.
20 |                     ``"batch"`` randomly zeroes the entire input, ``"row"`` zeroes
21 |                     randomly selected rows from the batch.
22 |         training: apply stochastic depth if is ``True``. Default: ``True``
23 | 
24 |     Returns:
25 |         Tensor[N, ...]: The randomly zeroed tensor.
26 |     """
27 |     # if not torch.jit.is_scripting() and not torch.jit.is_tracing():
28 |     #     _log_api_usage_once(stochastic_depth)
29 |     if p < 0.0 or p > 1.0:
30 |         raise ValueError(f"drop probability has to be between 0 and 1, but got {p}")
31 |     if mode not in ["batch", "row"]:
32 |         raise ValueError(f"mode has to be either 'batch' or 'row', but got {mode}")
33 |     if not training or p == 0.0:
34 |         return input
35 | 
36 |     survival_rate = 1.0 - p
37 |     if mode == "row":
38 |         size = [input.shape[0]] + [1] * (input.ndim - 1)
39 |     else:
40 |         size = [1] * input.ndim
41 |     noise = torch.empty(size, dtype=input.dtype, device=input.device)
42 |     noise = noise.bernoulli_(survival_rate)
43 |     if survival_rate > 0.0:
44 |         noise.div_(survival_rate)
45 |     return input * noise
46 | 
47 | 
48 | # torch.fx.wrap("stochastic_depth")
49 | 
50 | 
51 | class StochasticDepth(nn.Module):
52 |     """
53 |     See :func:`stochastic_depth`.
54 |     """
55 | 
56 |     def __init__(self, p: float, mode: str) -> None:
57 |         super().__init__()
58 |         # _log_api_usage_once(self)
59 |         self.p = p
60 |         self.mode = mode
61 | 
62 |     def forward(self, input: Tensor) -> Tensor:
63 |         return stochastic_depth(input, self.p, self.mode, self.training)
64 | 
65 |     def __repr__(self) -> str:
66 |         s = f"{self.__class__.__name__}(p={self.p}, mode={self.mode})"
67 |         return s
68 | 


--------------------------------------------------------------------------------
/CARTO/simnet/lib/net/post_processing/orochi_outputs.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import cv2
 3 | import IPython
 4 | import torch
 5 | import torch.nn as nn
 6 | from torch.nn import functional as F
 7 | 
 8 | from CARTO.simnet.lib import color_stuff
 9 | from CARTO.simnet.lib import datapoint
10 | from CARTO.simnet.lib.net.dataset import PanopticOutputs
11 | 
12 | 
13 | def visualize_img(
14 |     panoptic_outputs: PanopticOutputs,
15 |     c_img,
16 |     camera_model,
17 |     class_list,
18 |     poses=False,
19 |     prune_distance=False,
20 |     is_target=False,
21 | ):
22 |     c_img = np.copy(c_img)
23 | 
24 |     c_img = panoptic_outputs.room_segmentation[0].get_visualization_img(
25 |         c_img, is_target=is_target
26 |     )
27 | 
28 |     if len(panoptic_outputs.handhold_obbs) > 0:
29 |         c_img = panoptic_outputs.handhold_obbs[0].get_visualization_img(
30 |             0, c_img, camera_model=camera_model, poses=poses
31 |         )
32 |     if len(panoptic_outputs.cabinet_door_obbs) > 0:
33 |         c_img = panoptic_outputs.cabinet_door_obbs[0].get_visualization_img(
34 |             0, c_img, camera_model=camera_model, class_list=[], poses=poses
35 |         )
36 |     if len(panoptic_outputs.graspable_objects_obbs) > 0:
37 |         c_img = panoptic_outputs.graspable_objects_obbs[0].get_visualization_img(
38 |             0,
39 |             c_img,
40 |             camera_model=camera_model,
41 |             class_list=class_list,
42 |             prune_distance=prune_distance,
43 |             poses=poses,
44 |         )
45 | 
46 |     return c_img
47 | 
48 | 
49 | def visualize_heatmap(panoptic_outputs: PanopticOutputs, c_img):
50 |     if len(panoptic_outputs.graspable_objects_obbs) > 0:
51 |         # print(panoptic_outputs.graspable_objects_obbs[0].heatmap.shape)
52 |         # print(np.max(panoptic_outputs.graspable_objects_obbs[0]))
53 |         # print(np.min(panoptic_outputs.graspable_objects_obbs[0].heatmap.shape))
54 |         heatmap = cv2.applyColorMap(
55 |             (
56 |                 np.clip(
57 |                     panoptic_outputs.graspable_objects_obbs[0].heatmap[0, ...], 0.0, 1.0
58 |                 )
59 |                 * 255.0
60 |             ).astype(np.uint8),
61 |             cv2.COLORMAP_JET,
62 |         )
63 |         gray = cv2.cvtColor(c_img.copy(), cv2.COLOR_RGB2GRAY).astype(np.uint8)
64 |         gray_full = np.zeros_like(heatmap)
65 |         gray_full[..., 0] = gray
66 |         gray_full[..., 1] = gray
67 |         gray_full[..., 2] = gray
68 |         return cv2.addWeighted(gray_full, 0.9, heatmap.astype(np.uint8), 0.4, 0)
69 | 


--------------------------------------------------------------------------------
/CARTO/simnet/lib/primitive.py:
--------------------------------------------------------------------------------
 1 | from CARTO.simnet.lib import sg
 2 | import numpy as np
 3 | import trimesh
 4 | 
 5 | DEFAULT_COLOR = np.array([60, 60, 60, 255], dtype=np.uint8)
 6 | CORNER_1 = np.array([255, 255, 0, 255], dtype=np.uint8)
 7 | CORNER_2 = np.array([0, 255, 255, 255], dtype=np.uint8)
 8 | CORNER_3 = np.array([255, 0, 255, 255], dtype=np.uint8)
 9 | CORNER_4 = np.array([0, 255, 0, 255], dtype=np.uint8)
10 | CORNER_5 = np.array([255, 0, 0, 255], dtype=np.uint8)
11 | CORNER_6 = np.array([0, 0, 255, 255], dtype=np.uint8)
12 | CORNER_7 = np.array([255, 255, 255, 255], dtype=np.uint8)
13 | CORNER_8 = np.array([255, 255, 0, 255], dtype=np.uint8)
14 | RED = np.array([255, 0, 0, 255], dtype=np.uint8)
15 | GREEN = np.array([0, 255, 0, 255], dtype=np.uint8)
16 | BLUE = np.array([0, 0, 255, 255], dtype=np.uint8)
17 | 
18 | 
19 | def make_coordinate_frame(scale=1.0, name="coord_frame_vis"):
20 |     node = sg.Node()
21 |     small = 0.2 * scale
22 |     large = 1.0 * scale
23 |     node.add_child(make_cube(large, small, small, color=RED, name=f"{name}_x"))
24 |     node.add_child(make_cube(small, large, small, color=GREEN, name=f"{name}_y"))
25 |     node.add_child(make_cube(small, small, large, color=BLUE, name=f"{name}_z"))
26 |     return node
27 | 
28 | 
29 | def make_cube(
30 |     x_width=1.0, y_depth=1.0, z_height=1.0, name="cube", color=None, disable_color=False
31 | ):
32 |     if disable_color:
33 |         vertex_colors = None
34 |     else:
35 |         vertex_colors = [
36 |             CORNER_1 if color is None else color,
37 |             CORNER_2 if color is None else color,
38 |             CORNER_3 if color is None else color,
39 |             CORNER_4 if color is None else color,
40 |             CORNER_5 if color is None else color,
41 |             CORNER_6 if color is None else color,
42 |             CORNER_7 if color is None else color,
43 |             CORNER_8 if color is None else color,
44 |         ]
45 |     mesh = trimesh.Trimesh(
46 |         vertices=[
47 |             [0, 0, 0],  # 0
48 |             [x_width, 0, 0],  # 1
49 |             [x_width, y_depth, 0],  # 2
50 |             [0, y_depth, 0],  # 3
51 |             [0, 0, z_height],  # 4
52 |             [x_width, 0, z_height],  # 5
53 |             [x_width, y_depth, z_height],  # 6
54 |             [0, y_depth, z_height],  # 7
55 |         ],
56 |         faces=[
57 |             [0, 3, 2, 1],
58 |             [1, 2, 6, 5],
59 |             [2, 3, 7, 6],
60 |             [4, 5, 6, 7],
61 |             [0, 1, 5, 4],
62 |             [0, 4, 7, 3],
63 |         ],
64 |         vertex_colors=vertex_colors,
65 |     )
66 |     node = sg.Node(name=name)
67 |     node.meshes = [mesh]
68 |     node.meta.is_object = True
69 |     return node
70 | 


--------------------------------------------------------------------------------
/CARTO/Decoder/visualizing/visualize_sdf_values.py:
--------------------------------------------------------------------------------
 1 | ## Load training SDFs
 2 | import argparse
 3 | import colorsys
 4 | import os
 5 | import numpy as np
 6 | import pathlib
 7 | import tqdm
 8 | import open3d as o3d
 9 | import random
10 | 
11 | from CARTO.simnet.lib.datapoint import decompress_datapoint
12 | from CARTO.Decoder import utils
13 | from CARTO.Decoder.data import dataset
14 | from CARTO.Decoder import config
15 | from CARTO.Decoder.visualizing import code_vis
16 | from PIL import Image
17 | 
18 | import seaborn as sns
19 | 
20 | 
21 | def main(args):
22 |     file_dir = pathlib.Path(args.file_dir)
23 |     out_dir = pathlib.Path(args.out_dir)
24 |     out_dir.mkdir(exist_ok=True, parents=True)
25 |     dataset_cfg: config.GenerationConfig = utils.load_cfg(
26 |         file_dir, cfg_class=config.GenerationConfig
27 |     )
28 |     all_files = list(file_dir.glob("*.zstd"))
29 |     if args.latest or args.earliest:
30 |         all_files.sort(key=lambda x: os.path.getmtime(x), reverse=args.earliest)
31 |     else:
32 |         print("Shuffling object list")
33 |         random.shuffle(all_files)
34 | 
35 |     counts = utils.AccumulatorDict()
36 |     for file_name in all_files:
37 |         counts.increment(str(file_name).split("_")[-2], 1)
38 |     print(counts)
39 | 
40 |     render = code_vis.get_o3d_render(frame_width=600, frame_height=600)
41 | 
42 |     for i, file_path in tqdm.tqdm(enumerate(all_files[: args.n])):
43 |         with open(file_path, "rb") as fh:
44 |             buf = fh.read()
45 |             data_point: dataset.DataPoint = decompress_datapoint(buf)
46 | 
47 |             # print(data_point.keys())
48 |         sdf = data_point.sdf_values[:, None]
49 |         points = data_point.points
50 |         # Assign inside/outside color
51 |         colors = np.where(
52 |             sdf < 0.0,
53 |             np.ones_like(points) * sns.color_palette("tab10")[0],
54 |             np.ones_like(points) * sns.color_palette("tab10")[1],
55 |         )
56 | 
57 |         if len(points) == 0:
58 |             continue
59 | 
60 |         points /= dataset_cfg.max_extent
61 | 
62 |         pcd = o3d.geometry.PointCloud()
63 |         pcd.points = o3d.utility.Vector3dVector(points)
64 |         pcd.colors = o3d.utility.Vector3dVector(colors)
65 | 
66 |         img_np = code_vis.render_o3d_mesh(pcd, height_coloring=False, render=render)
67 |         img_PIL = Image.fromarray(img_np)
68 |         img_PIL.save(str(out_dir / f"{i}.png"))
69 | 
70 | 
71 | if __name__ == "__main__":
72 |     parser = argparse.ArgumentParser()
73 |     parser.add_argument("file_dir")
74 |     parser.add_argument("out_dir")
75 |     parser.add_argument("-n", type=int, default=100)
76 |     parser.add_argument("-l", "--latest", action="store_true", default=False)
77 |     parser.add_argument("-e", "--earliest", action="store_true", default=False)
78 |     args = parser.parse_args()
79 |     main(args)
80 | 


--------------------------------------------------------------------------------
/CARTO/Decoder/models/lr_schedules.py:
--------------------------------------------------------------------------------
 1 | # from typing import List
 2 | 
 3 | from CARTO.Decoder.config import LearningRateScheduleConfig, LearningRateScheduleType
 4 | 
 5 | 
 6 | class LearningRateSchedule:
 7 |     def get_learning_rate(self, epoch):
 8 |         pass
 9 | 
10 |     @staticmethod
11 |     def get_from_config(cfg: LearningRateScheduleConfig):
12 |         if cfg.type == LearningRateScheduleType.STEP:
13 |             return StepLearningRateSchedule(
14 |                 cfg.initial,
15 |                 cfg.interval,
16 |                 cfg.factor,
17 |             )
18 |         elif cfg.type == LearningRateScheduleType.WARMUP:
19 |             return WarmupLearningRateSchedule(
20 |                 cfg.initial,
21 |                 cfg.final,
22 |                 cfg.length,
23 |             )
24 | 
25 |         elif cfg.type == LearningRateScheduleType.CONSTANT:
26 |             return ConstantLearningRateSchedule(cfg.initial)
27 |         elif cfg.type == LearningRateScheduleType.LEVEL_DECAY:
28 |             return LevelDecayLearningRateSchedule(cfg.initial, cfg.factor)
29 |         else:
30 |             raise Exception(
31 |                 'no known learning rate schedule of type "{}"'.format(cfg.type)
32 |             )
33 | 
34 | 
35 | class ConstantLearningRateSchedule(LearningRateSchedule):
36 |     def __init__(self, value):
37 |         self.value
38 | 
39 |     def get_learning_rate(self, epoch):
40 |         return self.value
41 | 
42 | 
43 | class StepLearningRateSchedule(LearningRateSchedule):
44 |     def __init__(self, initial, interval, factor):
45 |         self.initial = initial
46 |         self.interval = interval
47 |         self.factor = factor
48 | 
49 |     def get_learning_rate(self, epoch):
50 |         return self.initial * (self.factor ** (epoch // self.interval))
51 | 
52 | 
53 | class WarmupLearningRateSchedule(LearningRateSchedule):
54 |     def __init__(self, initial, warmed_up, length):
55 |         self.initial = initial
56 |         self.warmed_up = warmed_up
57 |         self.length = length
58 | 
59 |     def get_learning_rate(self, epoch):
60 |         if epoch > self.length:
61 |             return self.warmed_up
62 |         return self.initial + (self.warmed_up - self.initial) * epoch / self.length
63 | 
64 | 
65 | class LevelDecayLearningRateSchedule(LearningRateSchedule):
66 |     def __init__(self, initial, decay):
67 |         self.initial = initial
68 |         self.decay = decay
69 |         self.level = 0
70 | 
71 |     def inc_level(self, level=1):
72 |         self.level += level
73 | 
74 |     def get_learning_rate(self, epoch):
75 |         """
76 |         Epoch does not matter
77 |         """
78 |         return self.initial * ((self.decay) ** self.level)
79 | 
80 | 
81 | # def get_learning_rate_schedules(schedulers: List[LearningRateSchedulerConfig]):
82 | #   schedules = []
83 | #   for schedule in schedulers:
84 | 
85 | #   return schedules
86 | 


--------------------------------------------------------------------------------
/CARTO/Decoder/data/verify_watertight.py:
--------------------------------------------------------------------------------
 1 | import multiprocessing
 2 | from concurrent import futures
 3 | import pathlib
 4 | 
 5 | import tqdm
 6 | import functools
 7 | import gc
 8 | 
 9 | from typing import Dict, Any, Callable, List
10 | import itertools
11 | 
12 | # import pyrender
13 | import trimesh
14 | import urdfpy
15 | 
16 | from CARTO.simnet.lib import partnet_mobility
17 | from CARTO.simnet.lib.datasets import PartNetMobilityV0DB
18 | from CARTO.simnet.lib.datapoint import compress_datapoint, decompress_datapoint
19 | 
20 | import uuid
21 | from CARTO.Decoder import utils, config
22 | from CARTO.Decoder.data import dataset
23 | import open3d as o3d
24 | import numpy as np
25 | 
26 | 
27 | def process_object_id(
28 |     object_id: str,
29 |     joint_filter: Callable[[Dict[str, Any]], bool] = lambda _: True,
30 |     joint_offset: float = 0.0,
31 | ):
32 |     # object_id = "187d79cd04b2bdfddf3a1b0d597ce76e"
33 | 
34 |     object_path = PartNetMobilityV0DB.get_object(object_id)
35 |     object_meta = PartNetMobilityV0DB.get_object_meta(object_id)
36 | 
37 |     joints_of_interest: List[str] = []
38 |     # Artifact from preprocessing
39 |     for joint_id, joint in object_meta["joints"].items():
40 |         if not joint_filter(
41 |             joint, partnet_mobility.get_joint_name_exclusion_list(object_meta)
42 |         ):
43 |             continue
44 |         joints_of_interest.append(joint_id)
45 | 
46 |     joint_config = {}
47 |     for joint_id, joint in object_meta["joints"].items():
48 |         joint_config[joint_id] = joint["limit"][0] + (
49 |             joint_offset if joint_id in joints_of_interest else 0.0
50 |         )
51 | 
52 |     canonical_transform = np.array(
53 |         PartNetMobilityV0DB.get_object_meta(object_id)["canonical_transformation"]
54 |     )
55 |     urdf_object = urdfpy.URDF.load(str(object_path / "mobility.urdf"))
56 |     trimesh_object, _, _ = utils.object_to_trimesh(
57 |         urdf_object, joint_config=joint_config, base_transform=canonical_transform
58 |     )
59 |     # points, sdf = utils.object_to_sdf(trimesh_object)
60 |     # points = points[sdf <= 0]
61 |     points, _ = utils.object_to_point_cloud(trimesh_object, number_samples=100000)
62 |     color = utils.get_random_color()
63 | 
64 |     pcd = o3d.geometry.PointCloud()
65 |     pcd.points = o3d.utility.Vector3dVector(points)
66 |     # pcd.points = o3d.utility.Vector3dVector(s_pc.points)
67 |     pcd.paint_uniform_color(color)
68 |     return pcd
69 | 
70 | 
71 | def main():
72 |     object_filter, joint_filter = partnet_mobility.get_filter_function(
73 |         # category_list=["Microwave", "Laptop"],
74 |         category_list=["Laptop"],
75 |         # category_list=["Microwave"],
76 |         # category_list=["WashingMachine"],
77 |         max_unique_parents=1,
78 |         no_limit_ok=False,
79 |         min_prismatic=0.1,
80 |         min_revolute=0.1,
81 |     )
82 |     PartNetMobilityV0DB.set_filter(object_filter)
83 |     print(f"Length of filtered dataset: {len(PartNetMobilityV0DB)}")
84 | 
85 |     pcds = []
86 |     for object_id in tqdm.tqdm(PartNetMobilityV0DB.index_list):
87 |         pcd: o3d.geometry.PointCloud = process_object_id(
88 |             object_id, joint_filter=joint_filter, joint_offset=1.5
89 |         )
90 |         pcds.append(pcd)
91 |         pcd_local = [pcd]
92 |         o3d.visualization.draw_geometries(pcd_local)
93 | 
94 |     o3d.visualization.draw_geometries(pcds)
95 | 
96 | 
97 | if __name__ == "__main__":
98 |     main()
99 | 


--------------------------------------------------------------------------------
/CARTO/Decoder/models/joint_state_decoder.py:
--------------------------------------------------------------------------------
 1 | from typing import Dict, List
 2 | 
 3 | import torch
 4 | from torch import nn
 5 | import torch.nn.functional as F
 6 | 
 7 | from CARTO.Decoder import config
 8 | 
 9 | 
10 | class ClassificationHead(nn.Module):
11 |     def __init__(self, in_dim, weight_normalizer=lambda x: x):
12 |         super(ClassificationHead, self).__init__()
13 | 
14 |         lin_state = weight_normalizer(nn.Linear(in_dim, 1))  # Continous prediction
15 |         lin_type = weight_normalizer(
16 |             nn.Linear(in_dim, 2, bias=False)
17 |         )  # 0: revolute, 1: prismatic
18 |         setattr(self, "lin_state", lin_state)
19 |         setattr(self, "lin_type", lin_type)
20 | 
21 |     def forward(self, input):
22 |         lin_state = getattr(self, "lin_state")
23 |         lin_type = getattr(self, "lin_type")
24 | 
25 |         state_pred = lin_state(input)
26 |         type_pred = torch.sigmoid(lin_type(input))
27 |         return {"state": state_pred, "type": type_pred}
28 | 
29 | 
30 | class ZeroOneHead(nn.Module):
31 |     def __init__(self, in_dim, weight_normalizer=lambda x: x):
32 |         super(ZeroOneHead, self).__init__()
33 |         lin_module = weight_normalizer(nn.Linear(in_dim, 1))
34 |         setattr(self, "lin_module", lin_module)
35 | 
36 |     def forward(self, input):
37 |         lin_module = getattr(self, "lin_module")
38 |         pred = torch.sigmoid(lin_module(input))
39 |         return {"state": pred}
40 | 
41 | 
42 | class JointStateDecoder(nn.Module):
43 |     def __init__(
44 |         self,
45 |         cfg: config.JointStateDecoderModelConfig,
46 |         joint_config_latent_code_dim: int = 16,
47 |     ):
48 |         super(JointStateDecoder, self).__init__()
49 |         self.joint_config_latent_code_dim = joint_config_latent_code_dim
50 | 
51 |         dims = [joint_config_latent_code_dim] + cfg.dims
52 |         self.num_layers = len(dims)
53 | 
54 |         weight_normalizer = config.get_weight_normalizer(cfg.weight_normalizer)
55 | 
56 |         for layer in range(0, self.num_layers - 1):
57 |             out_dim = dims[layer + 1]
58 |             linear_layer = weight_normalizer(nn.Linear(dims[layer], out_dim))
59 |             # linear_layer = nn.utils.weight_norm(linear_layer)
60 |             setattr(self, "lin" + str(layer), linear_layer)
61 | 
62 |         if cfg.output_head == config.JointDecoderOutputHeadStyle.CLASSIFICATION:
63 |             out_head_class = ClassificationHead
64 |         elif cfg.output_head == config.JointDecoderOutputHeadStyle.ZERO_ONE_HEAD:
65 |             out_head_class = ZeroOneHead
66 |         else:
67 |             raise ModuleNotFoundError(f"Unknown output head {cfg.output_head}")
68 | 
69 |         setattr(
70 |             self,
71 |             "output_head",
72 |             out_head_class(dims[-1], weight_normalizer=weight_normalizer),
73 |         )
74 | 
75 |         self.relu = nn.ReLU()
76 |         self.th = nn.Tanh()
77 | 
78 |     def forward(self, input):
79 |         assert (
80 |             input.size()[-1] == self.joint_config_latent_code_dim
81 |         ), f"{input.size()[-1]} == {self.joint_config_latent_code_dim}"
82 | 
83 |         x = input
84 |         for layer in range(0, self.num_layers - 1):
85 |             lin = getattr(self, "lin" + str(layer))
86 |             x = lin(x)
87 |             x = self.relu(x)
88 |             x = F.dropout(x, p=0.2, training=self.training)
89 |         output_head = getattr(self, "output_head")
90 |         return output_head(x)
91 | 


--------------------------------------------------------------------------------
/CARTO/Decoder/data/visualize_dataset_pytorch.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import pathlib
  3 | import numpy as np
  4 | import open3d as o3d
  5 | import random
  6 | import tqdm
  7 | 
  8 | from CARTO.Decoder.data import dataset
  9 | from CARTO.Decoder import utils, config
 10 | 
 11 | 
 12 | def main(args):
 13 |     split_dicts = dataset.get_dataset_split_dict(
 14 |         pathlib.Path(args.data_dir), args.split_name, file_name=args.split_file_name
 15 |     )
 16 |     gen_cfg: config.GenerationConfig = utils.load_cfg(
 17 |         pathlib.Path(args.data_dir), cfg_class=config.GenerationConfig
 18 |     )
 19 |     rescaler = dataset.Rescaler3D(scale=gen_cfg.max_extent)
 20 |     print(gen_cfg.max_extent)
 21 |     train_dataset = dataset.SDFDataset(
 22 |         split_dicts["train"], rescaler=rescaler, cache_in_ram=False, subsample=100000000
 23 |     )
 24 |     val_dataset = dataset.SDFDataset(
 25 |         split_dicts["val"], rescaler=rescaler, cache_in_ram=False
 26 |     )
 27 | 
 28 |     print(f"{len(train_dataset) = }")
 29 | 
 30 |     pcds = []
 31 |     k = 100
 32 |     # k = len(train_dataset)
 33 |     indices = random.sample(range(len(train_dataset)), k)
 34 | 
 35 |     for i in tqdm.tqdm(indices):
 36 |         data_point: dataset.DataPoint = train_dataset[i]
 37 | 
 38 |         sdf = data_point.sdf_values
 39 |         points = data_point.points[sdf <= 0.0]
 40 |         color = utils.get_random_color()
 41 | 
 42 |         if np.abs(points).max() > 1:
 43 |             print(np.abs(points).max())
 44 | 
 45 |         # print(points.shape)
 46 |         # print(color)
 47 | 
 48 |         pcd = o3d.geometry.PointCloud()
 49 |         pcd.points = o3d.utility.Vector3dVector(points)
 50 |         pcd.paint_uniform_color(color)
 51 |         pcds.append(pcd)
 52 | 
 53 |     pcds.append(o3d.geometry.TriangleMesh.create_coordinate_frame())
 54 |     print(len(pcds))
 55 | 
 56 |     if args.unit_cube:
 57 |         cube_points = np.array(
 58 |             [
 59 |                 [-1.0, -1.0, -1.0],
 60 |                 [1.0, -1.0, -1.0],
 61 |                 [-1.0, 1.0, -1.0],
 62 |                 [1.0, 1.0, -1.0],
 63 |                 [-1.0, -1.0, 1.0],
 64 |                 [1.0, -1.0, 1.0],
 65 |                 [-1.0, 1.0, 1.0],
 66 |                 [1.0, 1.0, 1.0],
 67 |             ],
 68 |             dtype=np.float,
 69 |         )
 70 |         # cube_points /= 2
 71 |         lines = np.array(
 72 |             [
 73 |                 [0, 1],
 74 |                 [0, 2],
 75 |                 [1, 3],
 76 |                 [2, 3],
 77 |                 [4, 5],
 78 |                 [4, 6],
 79 |                 [5, 7],
 80 |                 [6, 7],
 81 |                 [0, 4],
 82 |                 [1, 5],
 83 |                 [2, 6],
 84 |                 [3, 7],
 85 |             ]
 86 |         )
 87 |         colors = [[1, 0, 0] for i in range(len(lines))]
 88 |         line_set = o3d.geometry.LineSet()
 89 |         line_set.points = o3d.utility.Vector3dVector(cube_points)
 90 |         line_set.lines = o3d.utility.Vector2iVector(lines)
 91 |         line_set.colors = o3d.utility.Vector3dVector(colors)
 92 |         pcds.append(line_set)
 93 | 
 94 |     o3d.visualization.draw_geometries(pcds)
 95 | 
 96 | 
 97 | if __name__ == "__main__":
 98 |     parser = argparse.ArgumentParser()
 99 |     parser.add_argument("data_dir")
100 |     parser.add_argument("split_name")
101 |     parser.add_argument("--unit-cube", action="store_true", default=True)
102 |     args = parser.parse_args()
103 |     main(args)
104 | 


--------------------------------------------------------------------------------
/CARTO/simnet/lib/net/post_processing/nms.py:
--------------------------------------------------------------------------------
 1 | import IPython
 2 | import numpy as np
 3 | 
 4 | 
 5 | def run(detections, overlap_thresh=0.75, order_mode="confidence"):
 6 |     # initialize the list of picked detections
 7 |     pruned_detections = []
 8 | 
 9 |     # sort the indexes
10 |     if order_mode == "lower_y":
11 |         idxs = create_order_by_lower_y(detections)
12 |     elif order_mode == "confidence":
13 |         idxs = create_order_by_score(detections)
14 | 
15 |     overlap_function = get_2d_one_way_iou
16 | 
17 |     # keep looping while some indexes still remain in the indexes list
18 |     while len(idxs) > 0:
19 |         # grab the last index in the indexes list and add the index value
20 |         # to the list of picked indexes
21 |         last = len(idxs) - 1
22 |         ii = idxs[last]
23 |         indices_to_suppress = []
24 |         for index, index_of_index in zip(idxs[:last], range(last)):
25 |             detection_proposal = detections[index]
26 |             overlap = overlap_function(detections[ii], detection_proposal)
27 |             if overlap > overlap_thresh:
28 |                 indices_to_suppress.append(index_of_index)
29 |         # Add the the pruned_detections.
30 |         pruned_detections.append(detections[ii])
31 |         indices_to_suppress.append(last)
32 |         idxs = np.delete(idxs, indices_to_suppress)
33 | 
34 |     # return only the bounding boxes that were picked
35 |     return prune_by_min_height(pruned_detections)
36 | 
37 | 
38 | def prune_by_min_height(detections):
39 |     pruned_detections = []
40 |     for detection in detections:
41 |         if detection.bbox[1][0] - detection.bbox[0][0] < 12:
42 |             continue
43 |         pruned_detections.append(detection)
44 |     return pruned_detections
45 | 
46 | 
47 | def create_order_by_lower_y(detections):
48 |     idxs = []
49 |     for detection in detections:
50 |         idxs.append(detection.bbox[1][1])
51 |     idxs = np.argsort(idxs)
52 |     return idxs
53 | 
54 | 
55 | def create_order_by_score(detections):
56 |     idxs = []
57 |     for detection in detections:
58 |         idxs.append(detection.score)
59 |     idxs = np.argsort(idxs)
60 |     return idxs
61 | 
62 | 
63 | def get_2d_one_way_iou(detection_one, detection_two):
64 |     box_one = np.array(
65 |         [
66 |             detection_one.bbox[0][0],
67 |             detection_one.bbox[0][1],
68 |             detection_one.bbox[1][0],
69 |             detection_one.bbox[1][1],
70 |         ]
71 |     )
72 |     box_two = np.array(
73 |         [
74 |             detection_two.bbox[0][0],
75 |             detection_two.bbox[0][1],
76 |             detection_two.bbox[1][0],
77 |             detection_two.bbox[1][1],
78 |         ]
79 |     )
80 |     # determine the (x, y)-coordinates of the intersection rectangle
81 |     xA = max(box_one[0], box_two[0])
82 |     yA = max(box_one[1], box_two[1])
83 |     xB = min(box_one[2], box_two[2])
84 |     yB = min(box_one[3], box_two[3])
85 |     # compute the area of intersection rectangle
86 |     inter_area = max(0, xB - xA + 1) * max(0, yB - yA + 1)
87 |     # compute the area of both the prediction and ground-truth
88 |     # rectangles
89 |     box_one_area = (box_one[2] - box_one[0] + 1) * (box_one[3] - box_one[1] + 1)
90 |     box_two_area = (box_two[2] - box_two[0] + 1) * (box_two[3] - box_two[1] + 1)
91 |     # compute the intersection over union by taking the intersection
92 |     # area and dividing it by the sum of prediction + ground-truth
93 |     # areas - the interesection area
94 |     if float(box_one_area) == 0.0:
95 |         return 0
96 |     return inter_area / float(box_one_area + box_two_area - inter_area)
97 | 


--------------------------------------------------------------------------------
/CARTO/simnet/lib/net/models/layers/cost_volume.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2020 Toyota Research Institute.  All rights reserved.
 2 | 
 3 | import torch
 4 | import torch.nn as nn
 5 | 
 6 | 
 7 | @torch.jit.script
 8 | def cost_volume(left, right, num_disparities: int, is_right: bool):
 9 |     batch_size, channels, height, width = left.shape
10 | 
11 |     output = torch.zeros(
12 |         (batch_size, channels, num_disparities, height, width),
13 |         dtype=left.dtype,
14 |         device=left.device,
15 |     )
16 | 
17 |     for i in range(num_disparities):
18 |         if not is_right:
19 |             output[:, :, i, :, i:] = left[:, :, :, i:] * right[:, :, :, : width - i]
20 |         else:
21 |             output[:, :, i, :, : width - i] = (
22 |                 left[:, :, :, i:] * right[:, :, :, : width - i]
23 |             )
24 | 
25 |     return output
26 | 
27 | 
28 | class CostVolume(nn.Module):
29 |     """Compute cost volume using cross correlation of left and right feature maps"""
30 | 
31 |     def __init__(self, num_disparities, is_right=False):
32 |         super().__init__()
33 |         self.num_disparities = num_disparities
34 |         self.is_right = is_right
35 | 
36 |     def forward(self, left, right):
37 |         if torch.jit.is_scripting():
38 |             return cost_volume(left, right, self.num_disparities, self.is_right)
39 |         else:
40 |             return self.forward_with_amp(left, right)
41 | 
42 |     @torch.jit.unused
43 |     def forward_with_amp(self, left, right):
44 |         """This operation is unstable at float16, so compute at float32 even when using mixed precision"""
45 |         with torch.cuda.amp.autocast(enabled=False):
46 |             left = left.to(torch.float32)
47 |             right = right.to(torch.float32)
48 |             output = cost_volume(left, right, self.num_disparities, self.is_right)
49 |             output = torch.clamp(output, -1e3, 1e3)
50 |             return output
51 | 
52 | 
53 | @torch.jit.script
54 | def dot_product_cost_volume(left, right, num_disparities: int, is_right: bool):
55 |     batch_size, channels, height, width = left.shape
56 | 
57 |     output = torch.zeros(
58 |         (batch_size, num_disparities, height, width),
59 |         dtype=left.dtype,
60 |         device=left.device,
61 |     )
62 | 
63 |     for i in range(num_disparities):
64 |         if not is_right:
65 |             output[:, i, :, i:] = (
66 |                 left[:, :, :, i:] * right[:, :, :, : width - i]
67 |             ).mean(dim=1)
68 |         else:
69 |             output[:, i, :, width - i] = (
70 |                 left[:, :, :, i:] * right[:, :, :, : width - i]
71 |             ).mean(dim=1)
72 | 
73 |     return output
74 | 
75 | 
76 | class DotProductCostVolume(nn.Module):
77 |     """Compute cost volume using dot product of left and right feature maps"""
78 | 
79 |     def __init__(self, num_disparities, is_right=False):
80 |         super().__init__()
81 |         self.num_disparities = num_disparities
82 |         self.is_right = is_right
83 | 
84 |     def forward(self, left, right):
85 |         return dot_product_cost_volume(left, right, self.num_disparities, self.is_right)
86 | 
87 |     @torch.jit.unused
88 |     def forward_with_amp(self, left, right):
89 |         """This operation is unstable at float16, so compute at float32 even when using mixed precision"""
90 |         with torch.cuda.amp.autocast(enabled=False):
91 |             left = left.to(torch.float32)
92 |             right = right.to(torch.float32)
93 |             output = dot_product_cost_volume(
94 |                 left, right, self.num_disparities, self.is_right
95 |             )
96 |             output = torch.clamp(output, -1e3, 1e3)
97 |             return output
98 | 


--------------------------------------------------------------------------------
/CARTO/simnet/lib/net/pre_processing/box_inputs.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from scipy.stats import multivariate_normal
 3 | 
 4 | from CARTO.simnet.lib import datapoint
 5 | 
 6 | _DOWNSCALE_VALUE = 1
 7 | _PEAK_CONCENTRATION = 0.8
 8 | 
 9 | 
10 | def compute_network_targets(boxes, masks, height, width):
11 |     if len(boxes) == 0:
12 |         return datapoint.Box(
13 |             heat_map=np.zeros([height, width]),
14 |             vertex_target=np.zeros([height, width, 4]),
15 |         )
16 |     heatmaps = compute_heatmaps_from_masks(masks)
17 |     vertex_target = compute_vertex_field(boxes, heatmaps)
18 |     return datapoint.Box(heat_map=np.max(heatmaps, axis=0), vertex_target=vertex_target)
19 | 
20 | 
21 | def compute_network_targets_from_detections(
22 |     detections, occ_threshold, min_height, truncation_level, height, width
23 | ):
24 |     detections_marked, masks = mark_ignore_in_box_detections(detections)
25 |     ignore_mask = np.zeros([height, width])
26 |     for detection, mask in zip(detections, masks):
27 |         if not detetion.ignore:
28 |             boxes.append(detection)
29 |             masks.append(masks)
30 |         else:
31 |             ignore_mask[mask] = 1.0
32 |     if len(boxes) == 0:
33 |         return datapoint.Box(
34 |             heat_map=np.zeros([height, width]),
35 |             vertex_target=np.zeros([height, width, 4]),
36 |         )
37 |     heatmaps = compute_heatmaps_from_masks(masks)
38 |     vertex_target = compute_vertex_field(boxes, heatmaps)
39 |     return datapoint.Box(
40 |         heat_map=np.max(heatmaps, axis=0),
41 |         vertex_target=vertex_target,
42 |         ignore_mask=ignore_mask,
43 |     )
44 | 
45 | 
46 | def compute_heatmaps_from_masks(masks):
47 |     heatmaps = [compute_heatmap_from_mask(mask) for mask in masks]
48 |     return heatmaps
49 | 
50 | 
51 | def compute_heatmap_from_mask(mask):
52 |     if np.sum(mask) == 0:
53 |         raise ValueError("Mask is empty")
54 |     coords = np.indices(mask.shape)
55 |     coords = coords.reshape([2, -1]).T
56 |     mask_f = mask.flatten()
57 |     indices = coords[np.where(mask_f > 0)]
58 |     mean_value = np.floor(np.average(indices, axis=0))
59 |     cov = np.cov((indices - mean_value).T)
60 |     cov = cov * _PEAK_CONCENTRATION
61 |     multi_var = multivariate_normal(mean=mean_value, cov=cov)
62 |     density = multi_var.pdf(coords)
63 |     heat_map = np.zeros(mask.shape)
64 |     heat_map[coords[:, 0], coords[:, 1]] = density
65 |     return heat_map / np.max(heat_map)
66 | 
67 | 
68 | def compute_vertex_field(bboxes, heatmaps):
69 |     H, W = heatmaps[0].shape[0], heatmaps[0].shape[1]
70 |     # For each vertex compute the displacement field.
71 |     disp_fields = []
72 |     vertex_target = np.zeros(
73 |         [len(bboxes), int(H / _DOWNSCALE_VALUE), int(W / _DOWNSCALE_VALUE), 4]
74 |     )
75 |     heatmap_indices = np.argmax(np.array(heatmaps), axis=0)
76 |     for i in range(2):
77 |         vertex_points = []
78 |         coords = np.indices([H, W])
79 |         coords = coords.transpose((1, 2, 0))
80 |         for box_idx, bbox, heatmap in zip(range(len(bboxes)), bboxes, heatmaps):
81 |             disp_field = np.zeros([H, W, 2])
82 |             vertex_point = np.array([bbox[i][0], bbox[i][1]])
83 |             mask = heatmap_indices == box_idx
84 |             disp_field[mask] = coords[mask] - vertex_point
85 |             # Normalize by height and width
86 |             disp_field[mask, 0] = 1.0 - (disp_field[mask, 0] + H) / (2 * H)
87 |             disp_field[mask, 1] = 1.0 - (disp_field[mask, 1] + W) / (2 * W)
88 |             vertex_target[box_idx, :, :, (2 * i) : (2 * i) + 2] = disp_field[
89 |                 ::_DOWNSCALE_VALUE, ::_DOWNSCALE_VALUE
90 |             ]
91 |     return np.max(vertex_target, axis=0)
92 | 


--------------------------------------------------------------------------------
/CARTO/simnet/lib/net/pre_processing/pose_inputs.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from scipy.stats import multivariate_normal
 3 | 
 4 | from CARTO.simnet.lib.net.post_processing import epnp
 5 | from CARTO.simnet.lib.label import Pose
 6 | from CARTO.simnet.lib import datapoint
 7 | 
 8 | _HEATMAP_THRESHOLD = 0.3
 9 | _DOWNSCALE_VALUE = 8
10 | _PEAK_CONCENTRATION = 0.8
11 | 
12 | 
13 | def compute_network_targets(poses, masks, camera_model):
14 |     heatmaps = compute_heatmaps_from_masks(masks)
15 |     vertex_target = compute_vertex_field(poses, heatmaps, camera_model)
16 |     z_centroid = compute_z_centroid_field(poses, heatmaps)
17 |     return datapoint.Pose(
18 |         heat_map=np.max(heatmaps, axis=0),
19 |         vertex_target=vertex_target,
20 |         z_centroid=z_centroid,
21 |     )
22 | 
23 | 
24 | def compute_heatmaps_from_masks(masks):
25 |     heatmaps = [compute_heatmap_from_mask(mask) for mask in masks]
26 |     return heatmaps
27 | 
28 | 
29 | def compute_heatmap_from_mask(mask):
30 |     if np.sum(mask) == 0:
31 |         raise ValueError("Mask is empty")
32 |     coords = np.indices(mask.shape)
33 |     coords = coords.reshape([2, -1]).T
34 |     mask_f = mask.flatten()
35 |     indices = coords[np.where(mask_f > 0)]
36 |     mean_value = np.floor(np.average(indices, axis=0))
37 |     cov = np.cov((indices - mean_value).T)
38 |     cov = cov * _PEAK_CONCENTRATION
39 |     multi_var = multivariate_normal(mean=mean_value, cov=cov)
40 |     density = multi_var.pdf(coords)
41 |     heat_map = np.zeros(mask.shape)
42 |     heat_map[coords[:, 0], coords[:, 1]] = density
43 |     return heat_map / np.max(heat_map)
44 | 
45 | 
46 | def compute_vertex_field(poses, heatmaps, camera_model):
47 |     H, W = heatmaps[0].shape[0], heatmaps[0].shape[1]
48 |     # Compute the projected box pixels.
49 |     boxes = []
50 |     for pose in poses:
51 |         pose_no_rot = Pose(
52 |             camera_T_object=pose.camera_T_no_rot_object, scale_matrix=pose.scale_matrix
53 |         )
54 |         boxes.append(epnp.project_pose_onto_image(pose_no_rot, camera_model))
55 |     # For each vertex compute the displacement field.
56 |     disp_fields = []
57 |     vertex_target = np.zeros(
58 |         [len(poses), int(H / _DOWNSCALE_VALUE), int(W / _DOWNSCALE_VALUE), 16]
59 |     )
60 |     heatmap_indices = np.argmax(np.array(heatmaps), axis=0)
61 |     for i in range(8):
62 |         vertex_points = []
63 |         coords = np.indices([H, W])
64 |         coords = coords.transpose((1, 2, 0))
65 |         for box_idx, bbox, heatmap in zip(range(len(boxes)), boxes, heatmaps):
66 |             disp_field = np.zeros([H, W, 2])
67 |             vertex_point = np.array([bbox[i][0], bbox[i][1]])
68 |             mask = heatmap_indices == box_idx
69 |             disp_field[mask] = coords[mask] - vertex_point
70 |             # Normalize by height and width
71 |             disp_field[mask, 0] = 1.0 - (disp_field[mask, 0] + H) / (2 * H)
72 |             disp_field[mask, 1] = 1.0 - (disp_field[mask, 1] + W) / (2 * W)
73 |             vertex_target[box_idx, :, :, (2 * i) : 2 * i + 2] = disp_field[
74 |                 ::_DOWNSCALE_VALUE, ::_DOWNSCALE_VALUE
75 |             ]
76 |     return np.max(vertex_target, axis=0)
77 | 
78 | 
79 | def compute_z_centroid_field(poses, heatmaps):
80 |     z_centroid_target = np.zeros(
81 |         [len(poses), heatmaps[0].shape[0], heatmaps[0].shape[1]]
82 |     )
83 |     heatmap_indices = np.argmax(np.array(heatmaps), axis=0)
84 |     for pose, heat_map, ii in zip(poses, heatmaps, range(len(heatmaps))):
85 |         mask = heatmap_indices == ii
86 |         z_centroid_target[ii, mask] = pose.camera_T_object[2, 3]
87 |     # Normalize z_centroid by 1. and multiply by 10 to avoid tensorrt float precision issues.
88 |     return np.sum(z_centroid_target, axis=0)[::_DOWNSCALE_VALUE, ::_DOWNSCALE_VALUE]
89 | 


--------------------------------------------------------------------------------
/scripts/real_dataset_vis.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "%load_ext autoreload\n",
 10 |     "%autoreload 2"
 11 |    ]
 12 |   },
 13 |   {
 14 |    "cell_type": "code",
 15 |    "execution_count": 2,
 16 |    "metadata": {},
 17 |    "outputs": [
 18 |     {
 19 |      "name": "stdout",
 20 |      "output_type": "stream",
 21 |      "text": [
 22 |       "Jupyter environment detected. Enabling Open3D WebVisualizer.\n",
 23 |       "[Open3D INFO] WebRTC GUI backend enabled.\n",
 24 |       "[Open3D INFO] WebRTCWindowSystem: HTTP handshake server disabled.\n"
 25 |      ]
 26 |     }
 27 |    ],
 28 |    "source": [
 29 |     "from CARTO.lib.real_data import RealDataset\n",
 30 |     "import pprint\n",
 31 |     "import copy\n",
 32 |     "import open3d as o3d\n",
 33 |     "import numpy as np\n",
 34 |     "import matplotlib.pyplot as plt"
 35 |    ]
 36 |   },
 37 |   {
 38 |    "cell_type": "code",
 39 |    "execution_count": 3,
 40 |    "metadata": {},
 41 |    "outputs": [],
 42 |    "source": [
 43 |     "load_pc = True\n",
 44 |     "real_dataset = RealDataset(\"datasets/real\", load_pc=load_pc)"
 45 |    ]
 46 |   },
 47 |   {
 48 |    "cell_type": "code",
 49 |    "execution_count": 4,
 50 |    "metadata": {},
 51 |    "outputs": [],
 52 |    "source": [
 53 |     "for sample_idx in range(len(real_dataset)):\n",
 54 |     "    if load_pc:\n",
 55 |     "        dp, labels, pointcloud = real_dataset[sample_idx]\n",
 56 |     "    else:\n",
 57 |     "        dp, labels = real_dataset[sample_idx]\n",
 58 |     "    break"
 59 |    ]
 60 |   },
 61 |   {
 62 |    "cell_type": "code",
 63 |    "execution_count": 5,
 64 |    "metadata": {},
 65 |    "outputs": [],
 66 |    "source": [
 67 |     "down_pcd = pointcloud.voxel_down_sample(voxel_size=0.02)"
 68 |    ]
 69 |   },
 70 |   {
 71 |    "cell_type": "code",
 72 |    "execution_count": 6,
 73 |    "metadata": {},
 74 |    "outputs": [
 75 |     {
 76 |      "name": "stdout",
 77 |      "output_type": "stream",
 78 |      "text": [
 79 |       "object_dict[\"center\"] = array([-0.00375343,  0.0190836 ,  0.55100996])\n",
 80 |       "object_dict[\"rotation\"] = array([[ 1.        ,  0.        ,  0.        ],\n",
 81 |       "       [ 0.        , -0.2923717 , -0.95630476],\n",
 82 |       "       [ 0.        ,  0.95630476, -0.2923717 ]])\n",
 83 |       "object_dict[\"extent\"] = array([0.33348947, 0.38      , 0.19401672])\n"
 84 |      ]
 85 |     }
 86 |    ],
 87 |    "source": [
 88 |     "object_obbs = []\n",
 89 |     "for object_dict in labels[\"objects\"]:\n",
 90 |     "    print(f'{object_dict[\"center\"] = }\\n{object_dict[\"rotation\"] = }\\n{object_dict[\"extent\"] = }')\n",
 91 |     "    object_obb = o3d.geometry.OrientedBoundingBox(\n",
 92 |     "        object_dict[\"center\"], object_dict[\"rotation\"], object_dict[\"extent\"]\n",
 93 |     "    )    \n",
 94 |     "    object_obbs.append(object_obb)"
 95 |    ]
 96 |   },
 97 |   {
 98 |    "cell_type": "code",
 99 |    "execution_count": 7,
100 |    "metadata": {},
101 |    "outputs": [],
102 |    "source": [
103 |     "# o3d.visualization.draw_plotly([downpcd] + object_obbs)\n",
104 |     "# o3d.visualization.draw_plotly(object_obbs)"
105 |    ]
106 |   },
107 |   {
108 |    "attachments": {},
109 |    "cell_type": "markdown",
110 |    "metadata": {},
111 |    "source": [
112 |     "Run Local"
113 |    ]
114 |   },
115 |   {
116 |    "cell_type": "code",
117 |    "execution_count": 8,
118 |    "metadata": {},
119 |    "outputs": [],
120 |    "source": [
121 |     "o3d.visualization.draw_geometries([down_pcd] + object_obbs)"
122 |    ]
123 |   }
124 |  ],
125 |  "metadata": {
126 |   "kernelspec": {
127 |    "display_name": "CARTO",
128 |    "language": "python",
129 |    "name": "python3"
130 |   },
131 |   "language_info": {
132 |    "codemirror_mode": {
133 |     "name": "ipython",
134 |     "version": 3
135 |    },
136 |    "file_extension": ".py",
137 |    "mimetype": "text/x-python",
138 |    "name": "python",
139 |    "nbconvert_exporter": "python",
140 |    "pygments_lexer": "ipython3",
141 |    "version": "3.8.17"
142 |   },
143 |   "orig_nbformat": 4
144 |  },
145 |  "nbformat": 4,
146 |  "nbformat_minor": 2
147 | }
148 | 


--------------------------------------------------------------------------------
/CARTO/Decoder/data/visualize_dataset.py:
--------------------------------------------------------------------------------
  1 | ## Load training SDFs
  2 | import argparse
  3 | import colorsys
  4 | import os
  5 | import numpy as np
  6 | import pathlib
  7 | import tqdm
  8 | import open3d as o3d
  9 | import random
 10 | 
 11 | from traitlets import default
 12 | 
 13 | from CARTO.simnet.lib.datapoint import decompress_datapoint
 14 | from CARTO.Decoder import utils
 15 | from CARTO.Decoder.data import dataset
 16 | from CARTO.Decoder import config
 17 | 
 18 | 
 19 | def main(args):
 20 |     file_dir = pathlib.Path(args.file_dir)
 21 |     dataset_cfg: config.GenerationConfig = utils.load_cfg(
 22 |         file_dir, cfg_class=config.GenerationConfig
 23 |     )
 24 |     all_files = list(file_dir.glob("*.zstd"))
 25 |     if args.latest or args.earliest:
 26 |         all_files.sort(key=lambda x: os.path.getmtime(x), reverse=args.earliest)
 27 |     else:
 28 |         print("Shuffling object list")
 29 |         random.shuffle(all_files)
 30 | 
 31 |     counts = utils.AccumulatorDict()
 32 |     for file_name in all_files:
 33 |         counts.increment(str(file_name).split("_")[-2], 1)
 34 |     print(counts)
 35 | 
 36 |     pcds = []
 37 |     object_ratios = []
 38 |     all_max = 0.0
 39 |     for i, file_path in tqdm.tqdm(enumerate(all_files[: args.n])):
 40 |         with open(file_path, "rb") as fh:
 41 |             buf = fh.read()
 42 |             data_point: dataset.DataPoint = decompress_datapoint(buf)
 43 | 
 44 |         if args.sdf:
 45 |             # print(data_point.keys())
 46 |             sdf = data_point.sdf_values
 47 |             points = data_point.points[sdf <= 0.0]
 48 |             color = utils.get_random_color()
 49 |             normals = None
 50 |         elif args.pc:
 51 |             points = data_point.full_pc
 52 |             normals = data_point.full_normals
 53 | 
 54 |         if len(points) == 0:
 55 |             continue
 56 | 
 57 |         all_max = max(all_max, np.max(points))
 58 |         # if (np.max(points) < 1.0):
 59 |         #   continue
 60 |         # print("Adding to Visualization")
 61 | 
 62 |         points /= dataset_cfg.max_extent
 63 | 
 64 |         pcd = o3d.geometry.PointCloud()
 65 |         pcd.points = o3d.utility.Vector3dVector(points)
 66 |         if args.pc:
 67 |             pcd.normals = o3d.utility.Vector3dVector(normals)
 68 |         if args.sdf:
 69 |             pcd.paint_uniform_color(color)
 70 |         pcds.append(pcd)
 71 | 
 72 |         if args.sdf:
 73 |             object_ratios.append(np.count_nonzero(sdf <= 0) / sdf.shape[0])
 74 | 
 75 |     if args.unit_cube:
 76 |         cube_points = np.array(
 77 |             [
 78 |                 [-1.0, -1.0, -1.0],
 79 |                 [1.0, -1.0, -1.0],
 80 |                 [-1.0, 1.0, -1.0],
 81 |                 [1.0, 1.0, -1.0],
 82 |                 [-1.0, -1.0, 1.0],
 83 |                 [1.0, -1.0, 1.0],
 84 |                 [-1.0, 1.0, 1.0],
 85 |                 [1.0, 1.0, 1.0],
 86 |             ],
 87 |             dtype=np.float,
 88 |         )
 89 |         # cube_points /= 2
 90 |         lines = np.array(
 91 |             [
 92 |                 [0, 1],
 93 |                 [0, 2],
 94 |                 [1, 3],
 95 |                 [2, 3],
 96 |                 [4, 5],
 97 |                 [4, 6],
 98 |                 [5, 7],
 99 |                 [6, 7],
100 |                 [0, 4],
101 |                 [1, 5],
102 |                 [2, 6],
103 |                 [3, 7],
104 |             ]
105 |         )
106 |         colors = [[1, 0, 0] for i in range(len(lines))]
107 |         line_set = o3d.geometry.LineSet()
108 |         line_set.points = o3d.utility.Vector3dVector(cube_points)
109 |         line_set.lines = o3d.utility.Vector2iVector(lines)
110 |         line_set.colors = o3d.utility.Vector3dVector(colors)
111 |         pcds.append(line_set)
112 | 
113 |     pcds.append(o3d.geometry.TriangleMesh.create_coordinate_frame())
114 |     o3d.visualization.draw_geometries(pcds)
115 | 
116 |     print(f"{all_max = }")
117 | 
118 |     if args.sdf:
119 |         print(f"{object_ratios = }\n\tw/ mean {np.array(object_ratios).mean()}")
120 | 
121 | 
122 | if __name__ == "__main__":
123 |     parser = argparse.ArgumentParser()
124 |     parser.add_argument("file_dir")
125 |     parser.add_argument("-n", type=int, default=100)
126 |     parser.add_argument("-l", "--latest", action="store_true", default=False)
127 |     parser.add_argument("-e", "--earliest", action="store_true", default=False)
128 |     parser.add_argument("-sdf", action="store_true", default=False)
129 |     parser.add_argument("-pc", action="store_true", default=False)
130 |     parser.add_argument("--unit-cube", action="store_true", default=False)
131 |     args = parser.parse_args()
132 |     main(args)
133 | 


--------------------------------------------------------------------------------
/CARTO/lib/real_data.py:
--------------------------------------------------------------------------------
  1 | import copy
  2 | import json
  3 | import pathlib
  4 | from collections import namedtuple
  5 | from typing import Any, Tuple, Union
  6 | 
  7 | import numpy as np
  8 | import open3d as o3d
  9 | import torch
 10 | 
 11 | from CARTO import ROOT_DIR
 12 | from CARTO.simnet.lib import datapoint
 13 | from CARTO.simnet.lib.net.dataset import Dataset, PanopticOutputs, create_anaglyph
 14 | from CARTO.simnet.lib.net.post_processing.depth_outputs import DepthOutput
 15 | 
 16 | MISSING_LABELS_ID = [
 17 |     "YgMop2xGR9QQg3tvtHV8LE",
 18 |     "7mcbjCWqCUbcmuDRys7pKb",
 19 |     "ftGQw2kjUAGi4EEKm2sath",
 20 |     "8GBGdsC7F636882P4rmEC8",
 21 |     "JLnmWMvp6pN6CuSLLWwoab",
 22 |     "kYpjYEACcccqbJZX7HrpXG",
 23 |     "9wRKEke2FaHzNg42rWDisS",
 24 |     "QLctV2dhgTprak4d9HCwzY",
 25 |     "S7Ty7vSH6YcyBuDpCmThtD",
 26 | ]
 27 | 
 28 | depth_hparams = namedtuple("depth_hparams", ["max_disparity"])
 29 | 
 30 | 
 31 | def convert_labels(old_labels):
 32 |     labels = copy.deepcopy(old_labels)
 33 |     labels["id"] = labels["filename"].split(".")[0]
 34 |     for object_idx in range(len(labels["objects"])):
 35 |         object_dict = labels["objects"][object_idx]
 36 | 
 37 |         center = np.array(
 38 |             [
 39 |                 object_dict["centroid"]["x"],
 40 |                 object_dict["centroid"]["y"],
 41 |                 object_dict["centroid"]["z"],
 42 |             ]
 43 |         )
 44 |         zyx_array = np.array(
 45 |             [
 46 |                 object_dict["rotations"]["z"],
 47 |                 object_dict["rotations"]["y"],
 48 |                 object_dict["rotations"]["x"],
 49 |             ]
 50 |         )
 51 |         zyx_array = zyx_array / 180 * np.pi
 52 |         R = o3d.geometry.get_rotation_matrix_from_zyx(zyx_array)
 53 |         extent = np.array(
 54 |             [
 55 |                 object_dict["dimensions"]["length"],
 56 |                 object_dict["dimensions"]["width"],
 57 |                 object_dict["dimensions"]["height"],
 58 |             ]
 59 |         )
 60 |         object_dict["center"] = center
 61 |         object_dict["rotation"] = R
 62 |         object_dict["extent"] = extent
 63 | 
 64 |         del object_dict["centroid"]
 65 |         del object_dict["rotations"]
 66 |         del object_dict["dimensions"]
 67 | 
 68 |         labels["objects"][object_idx] = object_dict
 69 | 
 70 |     del labels["folder"]
 71 |     del labels["filename"]
 72 |     del labels["path"]
 73 |     return labels
 74 | 
 75 | 
 76 | class RealDataset(Dataset):
 77 |     def __init__(
 78 |         self,
 79 |         dataset_path: Union[str, pathlib.Path],
 80 |         load_pc: bool = False,
 81 |         skip_without_labels=True,
 82 |     ):
 83 |         self.dataset_path = pathlib.Path(dataset_path)
 84 |         simnet_dataset = datapoint.make_dataset(str(self.dataset_path / "data"))
 85 |         self.datapoint_handles = simnet_dataset.list()
 86 |         if skip_without_labels:
 87 |             self.datapoint_handles = list(
 88 |                 filter(lambda x: x.uid not in MISSING_LABELS_ID, self.datapoint_handles)
 89 |             )
 90 |         self.load_pc = load_pc
 91 | 
 92 |         self.hparams = depth_hparams(max_disparity=180)
 93 | 
 94 |     def __len__(self):
 95 |         return len(self.datapoint_handles)
 96 | 
 97 |     def __getitem__(self, idx) -> Tuple[PanopticOutputs, Any, Any]:
 98 |         # TODO Update the Any!
 99 |         local_handle: datapoint.LocalReadHandle = self.datapoint_handles[idx]
100 | 
101 |         dp: datapoint.Panoptic = local_handle.read()
102 |         anaglyph = create_anaglyph(dp.stereo)
103 | 
104 |         panoptic_out = PanopticOutputs(
105 |             depth=[]
106 |             if dp.depth is None
107 |             else [DepthOutput(torch.Tensor(dp.depth), self.hparams)],
108 |             room_segmentation=[],
109 |             cabinet_door_obbs=[],
110 |             handhold_obbs=[],
111 |             graspable_objects_obbs=[],
112 |             grasp_quality_scores=[],
113 |             small_depth=[],
114 |             val_data=[dp.val_data],
115 |             stereo_imgs=[anaglyph],
116 |         )
117 | 
118 |         # Load labels
119 |         with (
120 |             ROOT_DIR / ".." / self.dataset_path / "labels" / f"{local_handle.uid}.json"
121 |         ).open() as label_file:
122 |             labels = json.load(label_file)
123 |         labels = convert_labels(labels)
124 | 
125 |         if not self.load_pc:
126 |             return panoptic_out, labels
127 | 
128 |         pointcloud_loc = (
129 |             ROOT_DIR
130 |             / ".."
131 |             / self.dataset_path
132 |             / "pointclouds"
133 |             / f"{local_handle.uid}.ply"
134 |         )
135 | 
136 |         pc = o3d.io.read_point_cloud(str(pointcloud_loc))
137 | 
138 |         return panoptic_out, labels, pc
139 | 


--------------------------------------------------------------------------------
/CARTO/simnet/lib/net/post_processing/segmentation_outputs.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import cv2
  3 | import IPython
  4 | import torch
  5 | import torch.nn as nn
  6 | from torch.nn import functional as F
  7 | 
  8 | from CARTO.simnet.lib import color_stuff
  9 | from CARTO.simnet.lib import datapoint
 10 | 
 11 | # Panoptic Segmentation Colors
 12 | 
 13 | 
 14 | class SegmentationOutput:
 15 |     def __init__(self, seg_pred, hparams):
 16 |         self.seg_pred = seg_pred
 17 |         self.is_numpy = False
 18 |         self.hparams = hparams
 19 | 
 20 |     # Converters for torch to numpy
 21 |     def convert_to_numpy_from_torch(self):
 22 |         self.seg_pred = np.ascontiguousarray(self.seg_pred.float().cpu().numpy())
 23 |         self.is_numpy = True
 24 | 
 25 |     def convert_to_torch_from_numpy(self):
 26 |         self.seg_pred = torch.from_numpy(np.ascontiguousarray(self.seg_pred)).long()
 27 |         self.is_numpy = False
 28 | 
 29 |     def get_visualization_img(self, left_image, is_target=False):
 30 |         if not self.is_numpy:
 31 |             self.convert_to_numpy_from_torch()
 32 |         if is_target:
 33 |             seg_mask = self.seg_pred
 34 |             max_number = int(seg_mask.max()) + 1
 35 |         else:
 36 |             seg_mask = np.argmax(self.seg_pred, axis=1)[0]
 37 |             assert self.seg_pred.ndim == 4  # 1 x L x H x W
 38 |             max_number = int(self.seg_pred.shape[1])
 39 |         return draw_segmentation_mask(left_image, seg_mask, num_classes=max_number)
 40 | 
 41 |     def get_visualization_img_with_categories(
 42 |         self, left_image, detections, class_list, is_target=False
 43 |     ):
 44 |         if not self.is_numpy:
 45 |             self.convert_to_numpy_from_torch()
 46 |         if is_target:
 47 |             seg_mask_predictions = self.seg_pred
 48 |         else:
 49 |             seg_mask_predictions = np.argmax(self.seg_pred[0], axis=0)
 50 | 
 51 |         return draw_segmentation_mask_with_categories(
 52 |             left_image, seg_mask_predictions, detections, class_list
 53 |         )
 54 | 
 55 |     def get_prediction(self):
 56 |         if not self.is_numpy:
 57 |             self.convert_to_numpy_from_torch()
 58 |         return self.seg_pred[0]
 59 | 
 60 |     def compute_loss(self, seg_targets, log, name):
 61 |         if self.is_numpy:
 62 |             raise ValueError("Output is not in torch mode")
 63 |         seg_target_stacked = []
 64 |         for seg_target in seg_targets:
 65 |             seg_target_stacked.append(seg_target.seg_pred)
 66 |         seg_target_batch = torch.stack(seg_target_stacked)
 67 |         seg_target_batch = seg_target_batch.to(torch.device("cuda:0"))
 68 |         if len(seg_target_batch.shape) == 4:
 69 |             seg_target_batch = torch.argmax(seg_target_batch, dim=1)
 70 |         seg_loss = F.cross_entropy(
 71 |             self.seg_pred, seg_target_batch, reduction="mean", ignore_index=-100
 72 |         )
 73 |         log[name] = seg_loss.item()
 74 |         return self.hparams.loss_seg_mult * seg_loss
 75 | 
 76 | 
 77 | def draw_segmentation_mask(color_img, seg_mask, num_classes=7):
 78 |     assert len(seg_mask.shape) == 2
 79 |     seg_mask = seg_mask.astype(np.uint8)
 80 |     # TODO(mike.laskey) Replace this with a set list.
 81 |     if num_classes == 7:
 82 |         colors = color_stuff.get_panoptic_colors()
 83 |     else:
 84 |         colors = color_stuff.get_colors(num_classes)
 85 | 
 86 |     color_img = color_img_to_gray(color_img)
 87 |     for ii, color in zip(range(num_classes), colors):
 88 |         if ii == 0:  # ignore background class
 89 |             continue
 90 | 
 91 |         colored_mask = np.zeros([seg_mask.shape[0], seg_mask.shape[1], 3])
 92 |         colored_mask[seg_mask == ii, :] = color
 93 |         color_img = cv2.addWeighted(
 94 |             color_img.astype(np.uint8), 0.9, colored_mask.astype(np.uint8), 0.4, 0
 95 |         )
 96 |     return cv2.cvtColor(color_img.astype(np.uint8), cv2.COLOR_BGR2RGB)
 97 | 
 98 | 
 99 | def color_img_to_gray(image):
100 |     gray_scale_img = np.zeros(image.shape)
101 |     img = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
102 |     for i in range(3):
103 |         gray_scale_img[:, :, i] = img
104 |     gray_scale_img[:, :, i] = img
105 |     return gray_scale_img
106 | 
107 | 
108 | def draw_segmentation_mask_with_categories(color_img, seg_mask, detections, class_list):
109 |     assert len(seg_mask.shape) == 2
110 |     seg_mask = seg_mask.astype(np.int)
111 |     seg_mask_vis = draw_segmentation_mask(
112 |         color_img, seg_mask, num_classes=len(class_list)
113 |     )
114 |     for detection in detections:
115 |         pixel_x = int(detection[0])
116 |         pixel_y = int(detection[1])
117 | 
118 |         category_id = seg_mask[pixel_x, pixel_y]
119 |         category = class_list[category_id]
120 | 
121 |         if category.name == "background":
122 |             color = (255, 0, 0)  # dark blue
123 |         else:
124 |             color = (255, 128, 128)  # light blue
125 | 
126 |         seg_mask_vis = cv2.putText(
127 |             seg_mask_vis,
128 |             category.name,
129 |             (pixel_y, pixel_x),
130 |             cv2.FONT_HERSHEY_SIMPLEX,
131 |             1,
132 |             color,
133 |             2,
134 |             cv2.LINE_AA,
135 |         )
136 |     return seg_mask_vis
137 | 


--------------------------------------------------------------------------------
/scripts/preprocess_partnetmobility.py:
--------------------------------------------------------------------------------
  1 | #!/opt/mmt/python_venv/bin/python
  2 | 
  3 | import argparse
  4 | import json
  5 | import logging
  6 | import pathlib
  7 | import random
  8 | import subprocess
  9 | from collections import defaultdict
 10 | from concurrent import futures
 11 | import tarfile
 12 | 
 13 | import numpy as np
 14 | import tqdm
 15 | import trimesh
 16 | import urdfpy
 17 | import zstandard as zstd
 18 | from CARTO.lib.partnet_mobility import get_joint_dict
 19 | from CARTO.lib.compression import write_compressed_json
 20 | 
 21 | 
 22 | PARALLEL = True
 23 | 
 24 | 
 25 | def identity_matrix():
 26 |     return np.eye(4)
 27 | 
 28 | 
 29 | around_z_neg_90 = trimesh.transformations.rotation_matrix(
 30 |     np.pi / 2, np.array([0.0, 0.0, -1.0])
 31 | )
 32 | 
 33 | # Dictionaries for transformations of objects that might be not in a canonical way!
 34 | canonical_transformations_cat = defaultdict(
 35 |     identity_matrix,
 36 |     {
 37 |         "Pliers": around_z_neg_90,
 38 |         # "Scissors": around_z_neg_90
 39 |     },
 40 | )
 41 | canonical_transformations_instance = defaultdict(
 42 |     identity_matrix, {"d01ff66659767d50cee19268a161fc4a": around_z_neg_90}
 43 | )
 44 | 
 45 | 
 46 | def main(top_dir=pathlib.Path("datasets/partnet-mobility-v0/raw_dataset")):
 47 |     model_dirs = (top_dir).glob("*")
 48 | 
 49 |     (top_dir / ".." / "tarfiles").mkdir(exist_ok=True, parents=True)
 50 | 
 51 |     full_index = []
 52 |     if PARALLEL:
 53 |         all_futures = []
 54 |         with futures.ProcessPoolExecutor() as executor:
 55 |             for model_dir in model_dirs:
 56 |                 all_futures.append(executor.submit(process_model, model_dir))
 57 |             with tqdm.tqdm(total=len(all_futures)) as pbar:
 58 |                 for future in futures.as_completed(all_futures):
 59 |                     pbar.update(1)
 60 |                     full_index.append(future.result())
 61 |     else:
 62 |         for model_dir in tqdm.tqdm(model_dirs, total=len(model_dirs)):
 63 |             full_index.append(process_model(model_dir))
 64 | 
 65 |     index = [meta for (meta, safe) in full_index if safe]
 66 |     print(f"Found {len(full_index)} models but only {len(index)} are safe")
 67 | 
 68 |     print("Writing index")
 69 |     index = sorted(index, key=lambda x: x["model_id"])
 70 |     index_path = top_dir / ".." / "index.json.zst"
 71 |     write_compressed_json(index, index_path)
 72 | 
 73 | 
 74 | def load_semantics(semantics_file):
 75 |     joint_meta_info = {}
 76 |     for line in semantics_file.readlines():
 77 |         line_entries = line.rstrip("\n").split(" ")
 78 |         joint_meta_info[f"joint_{int(line_entries[0].split('_')[1])}"] = {
 79 |             "sem_type": line_entries[1],
 80 |             "sem_name": line_entries[2],
 81 |         }
 82 |     return joint_meta_info
 83 | 
 84 | 
 85 | def process_model(model_dir: pathlib.Path):
 86 |     with open(model_dir / "meta.json") as fh:
 87 |         meta = json.load(fh)
 88 |     assert "model_id" in meta
 89 |     model_id = meta["model_id"]
 90 | 
 91 |     # Create tar-ball
 92 |     all_paths = model_dir.glob("**/*")
 93 |     tar_path = model_dir / ".." / ".." / "tarfiles" / (model_id + ".tar.zst")
 94 |     cctx = zstd.ZstdCompressor()
 95 |     with open(tar_path, "wb") as raw_fh:
 96 |         with cctx.stream_writer(raw_fh) as zst_fh:
 97 |             with tarfile.open(fileobj=zst_fh, mode="w") as tar:
 98 |                 for path in all_paths:
 99 |                     rel_path = path.relative_to(model_dir)
100 |                     tar.add(str(path), arcname=str(rel_path), recursive=False)
101 | 
102 |     tar_bytes = tar_path.stat().st_size
103 |     meta["num_bytes"] = tar_bytes
104 | 
105 |     with open(model_dir / "semantics.txt") as fh:
106 |         joint_semantics = load_semantics(fh)
107 | 
108 |     # Try loading the URDF
109 |     # This step is important as PartNetMobility might miss some .obj!
110 |     try:
111 |         urdf = urdfpy.URDF.load(str(model_dir / "mobility.urdf"))
112 |     except ValueError as e:
113 |         logging.warning(f"urdfpy could not load model at {model_dir} with error\n{e}")
114 |         return None, False
115 | 
116 |     # Manually parse relevant joint informations for saving in index
117 |     joint: urdfpy.Joint
118 |     for joint in urdf.joints:
119 |         try:
120 |             joint_dict = get_joint_dict(joint)
121 |             joint_semantics[joint_dict["id"]].update(joint_dict)
122 |         except:
123 |             has_slider_plus = False
124 |             for joint_semants in joint_semantics.values():
125 |                 has_slider_plus |= joint_semants["sem_type"] == "slider+"
126 |             if not has_slider_plus:
127 |                 print(f"--- {model_id} @ {model_dir} ---")
128 |                 print(f"{joint_semantics = }")
129 |                 print(f"{joint_dict = }")
130 |             else:
131 |                 print(f"-- Found 'slider+'-type")
132 |     meta["joints"] = joint_semantics
133 | 
134 |     trans_cat = canonical_transformations_cat[meta["model_cat"]]
135 |     trans_ins = canonical_transformations_instance[model_id]
136 |     meta["canonical_transformation"] = (trans_cat @ trans_ins).tolist()
137 | 
138 |     return meta, True
139 | 
140 | 
141 | if __name__ == "__main__":
142 |     parser = argparse.ArgumentParser("Create index for PartnetMobility V0")
143 |     args = parser.parse_args()
144 |     main()
145 | 


--------------------------------------------------------------------------------
/CARTO/simnet/lib/net/onnx_plugins.py:
--------------------------------------------------------------------------------
  1 | """Plugins that can be used in an ONNX model."""
  2 | import numpy as np
  3 | import torch
  4 | import torch.nn as nn
  5 | import torch.nn.functional as F
  6 | import torch.nn.modules.utils as utils
  7 | 
  8 | # TODO(krishnashankar): Arguments of functions in modules below
  9 | # differ from those of the base class(es) they inherit from, and
 10 | # pylint complains. For now, disable here and consider disabling
 11 | # globally.
 12 | 
 13 | # pylint: disable=arguments-differ
 14 | # pylint: disable=protected-access
 15 | 
 16 | 
 17 | @torch.autograd.function.traceable
 18 | class ExportableUpsampleFunction(torch.autograd.Function):
 19 |     """Upsample function that can be traced for ONNX export."""
 20 | 
 21 |     @staticmethod
 22 |     def symbolic(g, inputs, scale_factor):
 23 |         assert scale_factor == 2, "Only 2x upsample implemented"
 24 |         return g.op(
 25 |             "TRT_PluginV2",
 26 |             inputs,
 27 |             version_s="0.0.1",
 28 |             namespace_s="",
 29 |             data_s="",
 30 |             name_s="UpsampleBilinearEvenSquare",
 31 |         )
 32 | 
 33 |     @staticmethod
 34 |     def forward(ctx, inputs, scale_factor):
 35 |         return F.interpolate(
 36 |             inputs, scale_factor=scale_factor, mode="bilinear", align_corners=False
 37 |         )
 38 | 
 39 |     @staticmethod
 40 |     def backward(_):
 41 |         raise RuntimeError("Backward not implemented")
 42 | 
 43 | 
 44 | class ExportableUpsample(nn.Module):
 45 |     """Upsample module that can be used in an ONNX model."""
 46 | 
 47 |     def __init__(self, scale_factor):
 48 |         super().__init__()
 49 |         self.scale_factor = scale_factor
 50 | 
 51 |     def forward(self, inputs):
 52 |         return ExportableUpsampleFunction.apply(inputs, self.scale_factor)
 53 | 
 54 | 
 55 | class UpsampleWithConvTranspose(nn.Module):
 56 |     """Upsample model implemented with transposed convolution."""
 57 | 
 58 |     def __init__(self, scale_factor):
 59 |         super(UpsampleWithConvTranspose, self).__init__()
 60 |         self.weights = None
 61 |         self.scale_factor = utils._pair(scale_factor)
 62 | 
 63 |         def check_scale_factor(scale_factor):
 64 |             assert scale_factor == 1 or scale_factor % 2 == 0
 65 | 
 66 |         check_scale_factor(self.scale_factor[0])
 67 |         check_scale_factor(self.scale_factor[1])
 68 | 
 69 |     def get_kernel_size(self, factor):
 70 |         return 2 * factor - factor % 2
 71 | 
 72 |     def bilinear_upsample_kernel(self, size):
 73 |         """Get a transpoed convolution kernel that implemented upsampling for the
 74 |         given size."""
 75 | 
 76 |         def get_factor_and_center(size):
 77 |             factor = (size + 1) // 2
 78 |             if size % 2 == 1:
 79 |                 center = factor - 1
 80 |             else:
 81 |                 center = factor - 0.5
 82 |             return factor, center
 83 | 
 84 |         factor_h, center_h = get_factor_and_center(size[0])
 85 |         factor_w, center_w = get_factor_and_center(size[1])
 86 |         og = np.ogrid[: size[0], : size[1]]
 87 |         return (1 - abs((og[0] - center_h) / factor_h)) * (
 88 |             1 - abs((og[1] - center_w) / factor_w)
 89 |         )
 90 | 
 91 |     def bilinear_upsample_weights(self, factor, nchannels):
 92 |         """Get transposed convolution weights for upsampling."""
 93 |         filter_size_h = self.get_kernel_size(factor[0])
 94 |         filter_size_w = self.get_kernel_size(factor[1])
 95 | 
 96 |         weights = np.zeros(
 97 |             (filter_size_h, filter_size_w, nchannels, nchannels), dtype=np.float32
 98 |         )
 99 | 
100 |         kernel = self.bilinear_upsample_kernel((filter_size_h, filter_size_w))
101 | 
102 |         for c in range(nchannels):
103 |             weights[:, :, c, c] = kernel
104 | 
105 |         return weights
106 | 
107 |     def forward(self, inputs):
108 |         in_channels = inputs.shape[1]
109 |         if self.weights is None:
110 |             weights = self.bilinear_upsample_weights(self.scale_factor, in_channels)
111 |             # Order weights to be compatible with pytorch (in_channels, out_channels, height, width).
112 |             self.weights = (
113 |                 torch.from_numpy(weights.transpose(2, 3, 0, 1))
114 |                 .to(inputs.device)
115 |                 .type(inputs.dtype)
116 |             )
117 |         output = torch.nn.functional.conv_transpose2d(
118 |             inputs,
119 |             self.weights,
120 |             stride=self.scale_factor,
121 |             padding=(self.scale_factor[0] // 2, self.scale_factor[1] // 2),
122 |         )
123 |         return output
124 | 
125 | 
126 | def fix_module(module):
127 |     """Replace all modules in the given module with ONNX-compatible modules."""
128 |     for child_module_name, child_module in module.named_children():
129 |         if isinstance(child_module, nn.Upsample):
130 |             scale_factor = int(child_module.scale_factor)
131 |             # TensorRT plugin can only load 2x upsample from ONNX currently, so
132 |             # otherwise use transposed convolution.
133 |             if False and scale_factor == 2:
134 |                 module._modules[child_module_name] = ExportableUpsample(scale_factor)
135 |             else:
136 |                 module._modules[child_module_name] = UpsampleWithConvTranspose(
137 |                     scale_factor
138 |                 )
139 |         elif len(list(child_module.children())) > 0:
140 |             fix_module(child_module)
141 | 


--------------------------------------------------------------------------------
/CARTO/Decoder/data/asdf_dataset.py:
--------------------------------------------------------------------------------
  1 | import pathlib
  2 | import torch
  3 | import sys
  4 | import json
  5 | import numpy as np
  6 | import trimesh
  7 | import logging
  8 | 
  9 | from CARTO.Decoder.data.dataset import DataPoint
 10 | 
 11 | from typing import List, Dict
 12 | 
 13 | ## Adding ASDF to our search path
 14 | # For code release: should or maybe could be a submodule
 15 | ASDF_BASE_PATH = "external_libs/A-SDF"
 16 | sys.path.append(ASDF_BASE_PATH)
 17 | 
 18 | try:
 19 |     from asdf.data import SDFSamples
 20 | except Exception as e:
 21 |     logging.critical(e, exc_info=True)  # log exception info at CRITICAL log level
 22 | 
 23 | 
 24 | class ASDFDataset(torch.utils.data.Dataset):
 25 |     def __init__(
 26 |         self,
 27 |         all_file_ids: List[pathlib.Path],
 28 |         subsample_amount: int = 12500,
 29 |         load_ram: bool = True,
 30 |         train: bool = True,
 31 |         load_gt: bool = False,
 32 |     ):
 33 |         ### Create A-SDF datasets..
 34 | 
 35 |         all_file_ids = [
 36 |             f"{file_ids}_{'train' if train else 'test'}.json"
 37 |             for file_ids in all_file_ids
 38 |         ]
 39 |         self.all_file_splits = []
 40 |         self.dataset_categories = []
 41 | 
 42 |         for file_ids in all_file_ids:
 43 |             with open(pathlib.Path(ASDF_BASE_PATH) / file_ids, "r") as f:
 44 |                 json_split = json.load(f)
 45 |             self.all_file_splits.append(json_split)
 46 |             self.dataset_categories.append(list(json_split["shape2motion"].keys())[0])
 47 | 
 48 |         self.ASDF_datasets = []
 49 |         self.stops = [0]
 50 | 
 51 |         for file_split in self.all_file_splits:
 52 |             asdf_set = SDFSamples(
 53 |                 pathlib.Path(ASDF_BASE_PATH) / "data",
 54 |                 file_split,
 55 |                 subsample_amount,
 56 |                 load_ram=load_ram,
 57 |                 articulation=True,
 58 |             )
 59 | 
 60 |             self.ASDF_datasets.append(asdf_set)
 61 |             self.stops.append(self.stops[-1] + len(asdf_set))
 62 | 
 63 |         self.stops = np.array(self.stops)
 64 |         self.load_gt = load_gt
 65 | 
 66 |     def __len__(self) -> int:
 67 |         return self.stops[-1]
 68 | 
 69 |     def __getitem__(self, idx: int) -> DataPoint:
 70 |         dataset_idx = len(self.stops) - np.count_nonzero(idx < self.stops) - 1
 71 |         category = self.dataset_categories[dataset_idx]
 72 | 
 73 |         # dataset_idx =
 74 |         if category == "laptop":
 75 |             limits = [-1.5708, 0.0]  # Upper limit does not matter
 76 |         else:
 77 |             limits = [0.0, 0.0]
 78 | 
 79 |         local_idx = idx - self.stops[dataset_idx]
 80 |         asdf_data = self.ASDF_datasets[dataset_idx][local_idx]
 81 |         (tensor, joint_state, instance_id), i = asdf_data
 82 |         points = tensor[:, :3]
 83 |         sdf = tensor[:, 3]
 84 |         parts = tensor[:, 4]
 85 | 
 86 |         datapoint = DataPoint(
 87 |             object_id=f"{category}_{instance_id}",
 88 |             joint_config_id=str(idx),
 89 |             joint_config={"joint": float(joint_state / 180 * np.pi)},
 90 |             points=points.float().cpu(),
 91 |             sdf_values=sdf.float().cpu(),
 92 |         )
 93 |         datapoint.joint_def = {
 94 |             "joint": {
 95 |                 "type": "revolute",  # All ASDFs objects are revolute
 96 |                 "limit": limits,
 97 |             }
 98 |         }
 99 |         if not self.load_gt:
100 |             return datapoint
101 | 
102 |         corresponding_split = self.all_file_splits[dataset_idx]
103 |         instance_name = f"{corresponding_split['shape2motion'][self.dataset_categories[dataset_idx]][local_idx]}"
104 | 
105 |         ground_truth_samples_filename = (
106 |             pathlib.Path(ASDF_BASE_PATH)
107 |             / "data"
108 |             / "SurfaceSamples"
109 |             / "shape2motion"
110 |             / category
111 |             / (instance_name + ".obj")
112 |         )
113 |         normalization_params_filename = (
114 |             pathlib.Path(ASDF_BASE_PATH)
115 |             / "data"
116 |             / "NormalizationParameters"
117 |             / "shape2motion"
118 |             / category
119 |             / (instance_name + ".npz")
120 |         )
121 | 
122 |         gt_mesh = trimesh.load(ground_truth_samples_filename)
123 |         gt_points = gt_mesh.vertices
124 | 
125 |         # Apply the inverse normalization
126 |         normalization_params = np.load(normalization_params_filename)
127 |         offset = normalization_params["offset"]
128 |         scale = normalization_params["scale"]
129 |         gt_points = (gt_points + offset) * scale
130 |         datapoint.full_pc = np.copy(gt_points)
131 | 
132 |         return datapoint
133 | 
134 |     # Same as for our-SDF
135 |     @staticmethod
136 |     def collate_fn(datapoints: List[DataPoint]) -> Dict:
137 |         return {
138 |             "object_id": [datapoint.object_id for datapoint in datapoints],
139 |             "joint_config_id": [
140 |                 str(datapoint.joint_config_id) for datapoint in datapoints
141 |             ],
142 |             "joint_config": [datapoint.joint_config for datapoint in datapoints],
143 |             "zero_joint_config": [
144 |                 datapoint.zero_joint_config for datapoint in datapoints
145 |             ],
146 |             "joint_definition": [datapoint.joint_def for datapoint in datapoints],
147 |             "sdf": torch.stack(
148 |                 [torch.FloatTensor(datapoint.sdf_values) for datapoint in datapoints]
149 |             ),
150 |             "points": torch.stack(
151 |                 [torch.FloatTensor(datapoint.points) for datapoint in datapoints]
152 |             ),
153 |         }
154 | 


--------------------------------------------------------------------------------
/scripts/full_inference.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | 
  3 | torch.cuda.set_device(0)
  4 | import dataclasses
  5 | import pathlib
  6 | import random
  7 | from typing import Optional
  8 | 
  9 | # Ensure mesh_to_sdf is imported first
 10 | import numpy as np
 11 | import open3d as o3d
 12 | import seaborn as sns
 13 | import torch
 14 | import tyro
 15 | 
 16 | from CARTO.Encoder.inference import CARTO, CARTOPrediction
 17 | from CARTO.lib.real_data import RealDataset
 18 | from CARTO.simnet.lib.net.dataset import Dataset, PanopticOutputs
 19 | 
 20 | sns.set()
 21 | 
 22 | import pickle
 23 | 
 24 | import matplotlib.pyplot as plt
 25 | import seaborn as sns
 26 | import tqdm
 27 | 
 28 | 
 29 | def save_image(data, file_path: pathlib.Path, FIG_DPI: int = 400):
 30 |     fig = plt.figure(
 31 |         dpi=FIG_DPI, figsize=(data.shape[1] / FIG_DPI, data.shape[0] / FIG_DPI)
 32 |     )
 33 |     ax = plt.Axes(fig, [0.0, 0.0, 1.0, 1.0])
 34 |     ax.set_axis_off()
 35 |     fig.add_axes(ax)
 36 |     ax.imshow(data)
 37 |     # plt.tight_layout()
 38 |     fig.savefig(str(file_path))
 39 |     plt.close(fig)
 40 | 
 41 | 
 42 | @dataclasses.dataclass
 43 | class InferenceConfig:
 44 |     model_id: str = "14i8yfym"
 45 |     checkpoint_id: Optional[str] = None
 46 |     seed: int = 12345
 47 |     start_lod: int = 4
 48 |     end_lod: int = 7
 49 |     dataset_path: Optional[str] = None
 50 |     real_data: bool = (
 51 |         False  # Real data has a larger image size than synthetic/simulated data
 52 |     )
 53 |     single_sample: int = -1
 54 |     max_samples: int = 1000
 55 | 
 56 | 
 57 | def main(cfg: InferenceConfig):
 58 |     carto = CARTO(cfg.model_id, checkpoint_id=cfg.checkpoint_id)
 59 | 
 60 |     if cfg.real_data:
 61 |         dataset = RealDataset("datasets/real", load_pc=False)
 62 |         dataset_name = "real"
 63 |     else:
 64 |         if cfg.dataset_path is None:
 65 |             cfg.dataset_path = carto.hparams.test_path
 66 | 
 67 |         dataset_name = "_".join(cfg.dataset_path.split("/")[1:])
 68 |         dataset = Dataset(cfg.dataset_path, carto.hparams)
 69 |         print(f"{len(dataset)} samples @ {cfg.dataset_path}")
 70 | 
 71 |     iterator = (
 72 |         tqdm.tqdm(range(cfg.max_samples))
 73 |         if cfg.single_sample < 0
 74 |         else [cfg.single_sample]
 75 |     )
 76 |     for sample_id in iterator:
 77 |         vis_dir = carto.model_dir / "vis" / dataset_name / f"full_scene_{sample_id}"
 78 | 
 79 |         sample: PanopticOutputs
 80 |         if cfg.real_data:
 81 |             sample, _ = dataset[sample_id]
 82 |             sample.stereo_imgs[0] = sample.stereo_imgs[0][:, ::2, ::2]
 83 |             if len(sample.depth) > 0:
 84 |                 sample.depth[0].depth_pred = sample.depth[0].depth_pred[::2, ::2]
 85 |         else:
 86 |             sample = dataset[sample_id]
 87 | 
 88 |         carto_prediction: CARTOPrediction = carto(sample)
 89 |         carto_prediction.set_vis_dir(vis_dir)
 90 | 
 91 |         carto_prediction.save_rgb()
 92 |         carto_prediction.save_segmentation()
 93 |         if not carto.hparams.model_rgbd:
 94 |             carto_prediction.save_depth()
 95 |         carto_prediction.save_bbox()
 96 | 
 97 |         carto_prediction.save_heatmap()
 98 |         carto_prediction.save_poses()
 99 | 
100 |         ply_objects = carto_prediction.get_canonical_objects(ply=True)
101 |         for idx, ply_object in enumerate(ply_objects):
102 |             o3d.io.write_point_cloud(
103 |                 str(vis_dir / f"predicted_pc_{idx:03d}.ply"), ply_object
104 |             )
105 | 
106 |         #### TODO Add Function in carto_prediction?
107 |         # for shape_id in range(len(latent_embeddings_shape)):
108 |         #   artciulated_vis_dir = vis_dir / "articulated" / str(shape_id)
109 |         #   artciulated_vis_dir.mkdir(exist_ok=True, parents=True)
110 |         #   shape_code = latent_embeddings_shape[shape_id]
111 |         #   # Overwrite a single joint state
112 |         #   joint_dict_result = joint_decoder(torch.Tensor(latent_embeddings_arti).cuda())
113 |         #   pred_joint_state = joint_dict_result["state"][0].detach().cpu()
114 |         #   pred_joint_type = utils.get_joint_type_batch(joint_dict_result["type"])[0]
115 | 
116 |         #   latent_embeddings_arti = joint_embedding.poly_fits[pred_joint_type].linspace(60)
117 |         #   latent_embeddings_shapes = np.tile(shape_code, (latent_embeddings_arti.shape[0], 1))
118 | 
119 |         #   ply_objects = decoder.get_ply_meshes(
120 |         #       latent_embeddings_shapes,
121 |         #       latent_embeddings_arti,
122 |         #       distance_threshold=1e-2,
123 |         #       lod_start=4,
124 |         #       lod_current=8,
125 |         #       estimate_normals=False,
126 |         #       chunk_size=5e5
127 |         #   )
128 | 
129 |         #   for idx, ply_object in enumerate(ply_objects):
130 |         #     o3d.io.write_point_cloud(str(artciulated_vis_dir / f"{idx:03d}.ply"), ply_object)
131 |         #### TODO Add Function
132 | 
133 |         carto_prediction.save_2d_points()
134 |         carto_prediction.save_pred_obb()
135 | 
136 |         pose_dicts = {
137 |             "abs_pose_output": carto_prediction.get_poses(),
138 |             "root_T_camera": sample.val_data[0].root_T_camera,
139 |         }
140 | 
141 |         save_name = str(vis_dir / f"abs_pose_output.pkl")
142 |         with open(save_name, "wb") as output:
143 |             pickle.dump(pose_dicts, output)
144 | 
145 |         #### TODO Add function to plot in embedding
146 |         # carto_prediction.save_in_embeddings(...)
147 |         ####
148 | 
149 | 
150 | if __name__ == "__main__":
151 |     cfg: InferenceConfig = tyro.parse(InferenceConfig)
152 |     torch.random.manual_seed(cfg.seed)
153 |     random.seed(cfg.seed)
154 |     np.random.seed(cfg.seed)
155 |     main(cfg)
156 | 


--------------------------------------------------------------------------------
/CARTO/simnet/lib/net/models/layers/fpn_bilinear.py:
--------------------------------------------------------------------------------
  1 | # MODIFIED FROM TORCHVISION 0.11.3 TO USE BILINEAR UPSAMPLE
  2 | 
  3 | from collections import OrderedDict
  4 | from typing import List, Dict, Optional
  5 | 
  6 | from torch import nn, Tensor
  7 | import torchvision
  8 | from torchvision.ops.feature_pyramid_network import ExtraFPNBlock
  9 | 
 10 | 
 11 | class FeaturePyramidNetworkBilinear(nn.Module):
 12 |     """
 13 |     Module that adds a FPN from on top of a set of feature maps. This is based on
 14 |     `"Feature Pyramid Network for Object Detection" <https://arxiv.org/abs/1612.03144>`_.
 15 | 
 16 |     The feature maps are currently supposed to be in increasing depth
 17 |     order.
 18 | 
 19 |     The input to the model is expected to be an OrderedDict[Tensor], containing
 20 |     the feature maps on top of which the FPN will be added.
 21 | 
 22 |     Args:
 23 |         in_channels_list (list[int]): number of channels for each feature map that
 24 |             is passed to the module
 25 |         out_channels (int): number of channels of the FPN representation
 26 |         extra_blocks (ExtraFPNBlock or None): if provided, extra operations will
 27 |             be performed. It is expected to take the fpn features, the original
 28 |             features and the names of the original features as input, and returns
 29 |             a new list of feature maps and their corresponding names
 30 | 
 31 |     Examples::
 32 | 
 33 |         >>> m = torchvision.ops.FeaturePyramidNetwork([10, 20, 30], 5)
 34 |         >>> # get some dummy data
 35 |         >>> x = OrderedDict()
 36 |         >>> x['feat0'] = torch.rand(1, 10, 64, 64)
 37 |         >>> x['feat2'] = torch.rand(1, 20, 16, 16)
 38 |         >>> x['feat3'] = torch.rand(1, 30, 8, 8)
 39 |         >>> # compute the FPN on top of x
 40 |         >>> output = m(x)
 41 |         >>> print([(k, v.shape) for k, v in output.items()])
 42 |         >>> # returns
 43 |         >>>   [('feat0', torch.Size([1, 5, 64, 64])),
 44 |         >>>    ('feat2', torch.Size([1, 5, 16, 16])),
 45 |         >>>    ('feat3', torch.Size([1, 5, 8, 8]))]
 46 | 
 47 |     """
 48 | 
 49 |     def __init__(
 50 |         self,
 51 |         in_channels_list: List[int],
 52 |         out_channels: int,
 53 |         extra_blocks: Optional[ExtraFPNBlock] = None,
 54 |     ):
 55 |         super().__init__()
 56 |         self.inner_blocks = nn.ModuleList()
 57 |         self.layer_blocks = nn.ModuleList()
 58 |         for idx, in_channels in enumerate(in_channels_list):
 59 |             if in_channels == 0:
 60 |                 raise ValueError("in_channels=0 is currently not supported")
 61 |             inner_block_module = nn.Conv2d(in_channels, out_channels, 1)
 62 |             layer_block_module = nn.Conv2d(out_channels, out_channels, 3, padding=1)
 63 |             self.inner_blocks.append(inner_block_module)
 64 |             self.layer_blocks.append(layer_block_module)
 65 | 
 66 |         # initialize parameters now to avoid modifying the initialization of top_blocks
 67 |         for m in self.modules():
 68 |             if isinstance(m, nn.Conv2d):
 69 |                 nn.init.kaiming_uniform_(m.weight, a=1)
 70 |                 nn.init.constant_(m.bias, 0)
 71 | 
 72 |         if extra_blocks is not None:
 73 |             assert isinstance(extra_blocks, ExtraFPNBlock)
 74 |         self.extra_blocks = extra_blocks
 75 | 
 76 |         self.upsample2 = nn.Upsample(
 77 |             scale_factor=2, mode="bilinear", align_corners=False
 78 |         )
 79 | 
 80 |     def get_result_from_inner_blocks(self, x: Tensor, idx: int) -> Tensor:
 81 |         """
 82 |         This is equivalent to self.inner_blocks[idx](x),
 83 |         but torchscript doesn't support this yet
 84 |         """
 85 |         num_blocks = len(self.inner_blocks)
 86 |         if idx < 0:
 87 |             idx += num_blocks
 88 |         out = x
 89 |         for i, module in enumerate(self.inner_blocks):
 90 |             if i == idx:
 91 |                 out = module(x)
 92 |         return out
 93 | 
 94 |     def get_result_from_layer_blocks(self, x: Tensor, idx: int) -> Tensor:
 95 |         """
 96 |         This is equivalent to self.layer_blocks[idx](x),
 97 |         but torchscript doesn't support this yet
 98 |         """
 99 |         num_blocks = len(self.layer_blocks)
100 |         if idx < 0:
101 |             idx += num_blocks
102 |         out = x
103 |         for i, module in enumerate(self.layer_blocks):
104 |             if i == idx:
105 |                 out = module(x)
106 |         return out
107 | 
108 |     def forward(self, x: Dict[str, Tensor]) -> Dict[str, Tensor]:
109 |         """
110 |         Computes the FPN for a set of feature maps.
111 | 
112 |         Args:
113 |             x (OrderedDict[Tensor]): feature maps for each feature level.
114 | 
115 |         Returns:
116 |             results (OrderedDict[Tensor]): feature maps after FPN layers.
117 |                 They are ordered from highest resolution first.
118 |         """
119 |         # unpack OrderedDict into two lists for easier handling
120 |         names = list(x.keys())
121 |         x = list(x.values())
122 | 
123 |         last_inner = self.get_result_from_inner_blocks(x[-1], -1)
124 |         results = []
125 |         results.append(self.get_result_from_layer_blocks(last_inner, -1))
126 | 
127 |         for idx in range(len(x) - 2, -1, -1):
128 |             inner_lateral = self.get_result_from_inner_blocks(x[idx], idx)
129 |             inner_top_down = self.upsample2(last_inner)
130 |             last_inner = inner_lateral + inner_top_down
131 |             results.insert(0, self.get_result_from_layer_blocks(last_inner, idx))
132 | 
133 |         if self.extra_blocks is not None:
134 |             results, names = self.extra_blocks(results, x, names)
135 | 
136 |         # make it back an OrderedDict
137 |         out = OrderedDict([(k, v) for k, v in zip(names, results)])
138 | 
139 |         return out
140 | 


--------------------------------------------------------------------------------
/CARTO/Decoder/visualizing/visualize_asdf_dataset.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |     "cells": [
  3 |         {
  4 |             "cell_type": "code",
  5 |             "execution_count": null,
  6 |             "metadata": {},
  7 |             "outputs": [],
  8 |             "source": [
  9 |                 "%load_ext autoreload\n",
 10 |                 "%autoreload 2"
 11 |             ]
 12 |         },
 13 |         {
 14 |             "cell_type": "code",
 15 |             "execution_count": null,
 16 |             "metadata": {},
 17 |             "outputs": [],
 18 |             "source": [
 19 |                 "from CARTO.Decoder.data import asdf_dataset\n",
 20 |                 "import open3d as o3d\n",
 21 |                 "import pathlib\n",
 22 |                 "import random \n",
 23 |                 "import numpy as np"
 24 |             ]
 25 |         },
 26 |         {
 27 |             "cell_type": "code",
 28 |             "execution_count": 3,
 29 |             "metadata": {},
 30 |             "outputs": [],
 31 |             "source": [
 32 |                 "\n",
 33 |                 "def get_colors(n): \n",
 34 |                 "  ret = [] \n",
 35 |                 "  r = int(random.random() * 256) \n",
 36 |                 "  g = int(random.random() * 256) \n",
 37 |                 "  b = int(random.random() * 256) \n",
 38 |                 "  step = 256 / n \n",
 39 |                 "  for i in range(n): \n",
 40 |                 "    r += step \n",
 41 |                 "    g += step \n",
 42 |                 "    b += step \n",
 43 |                 "    r = int(r) % 256 \n",
 44 |                 "    g = int(g) % 256 \n",
 45 |                 "    b = int(b) % 256 \n",
 46 |                 "    ret.append(np.array([r,g,b])/255)  \n",
 47 |                 "  return ret "
 48 |             ]
 49 |         },
 50 |         {
 51 |             "cell_type": "code",
 52 |             "execution_count": 7,
 53 |             "metadata": {},
 54 |             "outputs": [],
 55 |             "source": [
 56 |                 "category = \"oven\"\n",
 57 |                 "\n",
 58 |                 "local_dataset = asdf_dataset.ASDFDataset(\n",
 59 |                 "    [f\"examples/splits/sm_{category}_6_angle\"],\n",
 60 |                 "    subsample_amount=1e12,  # Very big\n",
 61 |                 "    load_ram=False,\n",
 62 |                 "    train=True,\n",
 63 |                 "    load_gt=True\n",
 64 |                 ")\n",
 65 |                 "\n",
 66 |                 "all_categories = asdf_dataset.ASDFDataset(\n",
 67 |                 "    [\n",
 68 |                 "        \"examples/splits/sm_door_6_angle\", \"examples/splits/sm_laptop_6_angle\",\n",
 69 |                 "        \"examples/splits/sm_oven_6_angle\", \"examples/splits/sm_stapler_6_angle\",\n",
 70 |                 "        \"examples/splits/sm_washing_machine_6_angle\"\n",
 71 |                 "    ],\n",
 72 |                 "    subsample_amount=1e12,  # Very big\n",
 73 |                 "    load_ram=False,\n",
 74 |                 "    train=True\n",
 75 |                 ")"
 76 |             ]
 77 |         },
 78 |         {
 79 |             "cell_type": "code",
 80 |             "execution_count": 8,
 81 |             "metadata": {},
 82 |             "outputs": [],
 83 |             "source": [
 84 |                 "out_path= pathlib.Path(\"vis/asdf_testing_full_pc\") / category\n",
 85 |                 "out_path.mkdir(exist_ok=True, parents=True)"
 86 |             ]
 87 |         },
 88 |         {
 89 |             "cell_type": "code",
 90 |             "execution_count": null,
 91 |             "metadata": {},
 92 |             "outputs": [],
 93 |             "source": [
 94 |                 "all_pcds = []\n",
 95 |                 "N = len(local_dataset) // 6\n",
 96 |                 "colors = get_colors(N)\n",
 97 |                 "\n",
 98 |                 "# for idx in range(0, len(local_dataset), 6):\n",
 99 |                 "for idx in range(0, 6, 1):\n",
100 |                 "    dp = local_dataset[idx]\n",
101 |                 "    pcd = o3d.geometry.PointCloud()\n",
102 |                 "    # pcd.points = o3d.utility.Vector3dVector(dp.points[dp.sdf_values.abs() < 1e-2])\n",
103 |                 "    pcd.points = o3d.utility.Vector3dVector(dp.full_pc[::3, :])\n",
104 |                 "    pcd.paint_uniform_color(colors[idx // 6])\n",
105 |                 "    print(dp.zero_joint_config)\n",
106 |                 "    o3d.io.write_point_cloud(str(out_path / f\"{idx}.ply\"), pcd)\n",
107 |                 "    print(pcd.get_min_bound())\n",
108 |                 "    print(pcd.get_max_bound())"
109 |             ]
110 |         },
111 |         {
112 |             "cell_type": "code",
113 |             "execution_count": 10,
114 |             "metadata": {},
115 |             "outputs": [],
116 |             "source": [
117 |                 "# o3d.visualization.draw_plotly(all_pcds)"
118 |             ]
119 |         },
120 |         {
121 |             "cell_type": "code",
122 |             "execution_count": null,
123 |             "metadata": {},
124 |             "outputs": [],
125 |             "source": []
126 |         }
127 |     ],
128 |     "metadata": {
129 |         "kernelspec": {
130 |             "display_name": "Python 3.8.13",
131 |             "language": "python",
132 |             "name": "python3"
133 |         },
134 |         "language_info": {
135 |             "codemirror_mode": {
136 |                 "name": "ipython",
137 |                 "version": 3
138 |             },
139 |             "file_extension": ".py",
140 |             "mimetype": "text/x-python",
141 |             "name": "python",
142 |             "nbconvert_exporter": "python",
143 |             "pygments_lexer": "ipython3",
144 |             "version": "3.8.13"
145 |         },
146 |         "orig_nbformat": 4,
147 |         "vscode": {
148 |             "interpreter": {
149 |                 "hash": "d94be9d2cbc472181826ec82d481f764c09292b938a47daff0fd759a1975e02c"
150 |             }
151 |         }
152 |     },
153 |     "nbformat": 4,
154 |     "nbformat_minor": 2
155 | }
156 | 


--------------------------------------------------------------------------------
/CARTO/simnet/lib/net/models/layers/residual_blocks.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2018-2020 Toyota Research Institute.  All rights reserved.
  2 | #
  3 | # Originally from Koichiro Yamaguchi's pixwislab repo mirrored at:
  4 | # https://github.awsinternal.tri.global/driving/pixwislab
  5 | 
  6 | import torch.nn as nn
  7 | 
  8 | 
  9 | def resnet_shortcut(in_channels, out_channels, stride, preact=False):
 10 |     """Shortcut layer for residual block.
 11 | 
 12 |     When the numbers of input and output channels are the same and stride is
 13 |     equal to 1, no layer is made.
 14 | 
 15 |     Args:
 16 |         in_channels (int): The number of input channels.
 17 |         out_channels (int): The number of output channels.
 18 |         stride (int): Stride of the residual block.
 19 |         preact (bool, optional): If True, make a shortcut for pre-activation
 20 |                                  residual block.
 21 | 
 22 |     Returns:
 23 |         Module of shortcut layers.
 24 |     """
 25 |     if stride == 1 and in_channels == out_channels:
 26 |         return None
 27 | 
 28 |     if preact:
 29 |         return nn.Conv2d(
 30 |             in_channels, out_channels, kernel_size=1, stride=stride, bias=False
 31 |         )
 32 |     else:
 33 |         return nn.Sequential(
 34 |             nn.Conv2d(
 35 |                 in_channels, out_channels, kernel_size=1, stride=stride, bias=False
 36 |             ),
 37 |             nn.BatchNorm2d(out_channels),
 38 |         )
 39 | 
 40 | 
 41 | class ResidualBlock(nn.Module):
 42 |     """Base class for residual block."""
 43 | 
 44 |     @classmethod
 45 |     def expansion(cls):
 46 |         """Expansion rate."""
 47 |         raise NotImplementedError
 48 | 
 49 |     @classmethod
 50 |     def preact(cls):
 51 |         """Pre-activation flag."""
 52 |         raise NotImplementedError
 53 | 
 54 | 
 55 | class PreactBasicResidualBlock(ResidualBlock):
 56 |     """Pre-activation basic residual block."""
 57 | 
 58 |     def __init__(
 59 |         self,
 60 |         in_channels,
 61 |         base_channels,
 62 |         stride=1,
 63 |         dilation_rate=1,
 64 |         add_preact=True,
 65 |         add_last_norm=False,
 66 |     ):
 67 |         """
 68 |         Args:
 69 |             in_channels (int): The number of input channels.
 70 |             base_channels (int): The number of output channels.
 71 |             stride (int, optional): Stride of the residual block.
 72 |             dilation_rate (int, optional): Dilation rate of the residual block.
 73 |             add_preact (bool, optional): If True, add pre-activation.
 74 |             add_last_norm (bool, optional): If True, add batch normalization
 75 |                                             after the last convolution.
 76 |         """
 77 |         super().__init__()
 78 |         if add_preact:
 79 |             self.preact_bn = nn.BatchNorm2d(in_channels)
 80 |         else:
 81 |             self.preact_bn = None
 82 |         self.conv_shortcut = resnet_shortcut(
 83 |             in_channels, base_channels, stride, preact=True
 84 |         )
 85 |         self.conv1 = nn.Conv2d(
 86 |             in_channels,
 87 |             base_channels,
 88 |             kernel_size=3,
 89 |             stride=stride,
 90 |             padding=dilation_rate,
 91 |             dilation=dilation_rate,
 92 |             bias=False,
 93 |         )
 94 |         self.bn1 = nn.BatchNorm2d(base_channels)
 95 |         self.relu = nn.ReLU(inplace=True)
 96 |         self.conv2 = nn.Conv2d(
 97 |             base_channels,
 98 |             base_channels,
 99 |             kernel_size=3,
100 |             padding=dilation_rate,
101 |             dilation=dilation_rate,
102 |             bias=False,
103 |         )
104 |         self.bn_last = nn.BatchNorm2d(base_channels) if add_last_norm else None
105 | 
106 |     @classmethod
107 |     def expansion(cls):
108 |         """Expansion rate, which is a ratio of the number of the output
109 |         channels to the number of the base channels in the residual block.
110 | 
111 |         Returns:
112 |             Expansion rate (= 1).
113 |         """
114 |         return 1
115 | 
116 |     @classmethod
117 |     def preact(cls):
118 |         """Pre-activation flag.
119 | 
120 |         Returns:
121 |             Flag (= True).
122 |         """
123 |         return True
124 | 
125 |     def forward(self, inputs):
126 |         """Forward computation.
127 | 
128 |         Args:
129 |             inputs (Tensor): Input tensor.
130 | 
131 |         Returns:
132 |             Output tensor.
133 |         """
134 |         if self.conv_shortcut is None:
135 |             shortcut_inputs = inputs
136 |         else:
137 |             shortcut_inputs = self.conv_shortcut(inputs)
138 | 
139 |         if self.preact_bn is not None:
140 |             inputs = self.relu(self.preact_bn(inputs))
141 |         outputs = self.relu(self.bn1(self.conv1(inputs)))
142 |         outputs = self.conv2(outputs)
143 | 
144 |         outputs += shortcut_inputs
145 | 
146 |         if self.bn_last is not None:
147 |             outputs = self.relu(self.bn_last(outputs))
148 |         return outputs
149 | 
150 | 
151 | def preact_resnet_group(
152 |     block_func, in_channels, base_channels, num_blocks, stride=1, dilation_rate=1
153 | ):
154 |     """Make a group of pre-activation residual blocks.
155 | 
156 |     Args:
157 |         block_func (ResidualBlock): Function of a residual block.
158 |         in_channels (int): The number of input channels.
159 |         base_channels (int): The number of base channels of the residual block.
160 |         num_blocks (int): The number of residual blocks.
161 |         stride (int, optional): Stride of the first residual block.
162 |         dilation_rate (int, optional): Dilation rate of residual blocks.
163 | 
164 |     Returns:
165 |         Module of a group of residual blocks.
166 |     """
167 |     assert block_func.preact()
168 | 
169 |     residual_blocks = [
170 |         block_func(
171 |             in_channels,
172 |             base_channels,
173 |             stride=stride,
174 |             dilation_rate=dilation_rate,
175 |             add_preact=False,
176 |         )
177 |     ]
178 |     in_channels = block_func.expansion() * base_channels
179 |     for idx in range(1, num_blocks):
180 |         residual_blocks.append(
181 |             block_func(
182 |                 in_channels,
183 |                 base_channels,
184 |                 dilation_rate=dilation_rate,
185 |                 add_preact=True,
186 |                 add_last_norm=idx == num_blocks - 1,
187 |             )
188 |         )
189 |     return nn.Sequential(*residual_blocks)
190 | 


--------------------------------------------------------------------------------
/CARTO/Decoder/loss.py:
--------------------------------------------------------------------------------
  1 | from collections import defaultdict
  2 | from typing import Dict, List, Union, Any
  3 | import itertools
  4 | import numpy as np
  5 | 
  6 | import urdfpy
  7 | import torch
  8 | from CARTO.Decoder import utils
  9 | 
 10 | 
 11 | def articulation_similarity(
 12 |     A: Dict[str, float],
 13 |     A_def: Dict[str, Any],
 14 |     B: Dict[str, float],
 15 |     B_def: Dict[str, Any],
 16 |     max_values: Dict[str, float] = defaultdict(lambda: 1),
 17 | ) -> float:
 18 |     """
 19 |     Calculates the similarity between two two-level kinematic trees including their joint state
 20 |     Make sure the joint states are in a canonical state!
 21 |     """
 22 |     # TODO Nick: For now it's very simple
 23 |     # --> Only one joint
 24 |     assert len(A) == 1 and len(B) == 1
 25 | 
 26 |     joint_id_A = list(A.keys())[0]
 27 |     joint_id_B = list(B.keys())[0]
 28 | 
 29 |     sim: float
 30 |     if A_def[joint_id_A]["type"] == B_def[joint_id_B]["type"]:
 31 |         joint_state_A = A[joint_id_A]
 32 |         joint_state_B = B[joint_id_B]
 33 |         max_joint_state = max_values[A_def[joint_id_A]["type"]]
 34 |         # print(max_joint_state)
 35 |         # L1
 36 |         # dist = np.abs(joint_state_A - joint_state_B)
 37 |         # L2
 38 |         dist = ((joint_state_A - joint_state_B) / max_joint_state) ** 2
 39 |         sim = utils.exp_kernel(dist)
 40 |         # sim = utils.distance_to_sim(dist)
 41 |         # sim = utils.gauss_kernel(dist)
 42 |     else:
 43 |         sim = 0.0
 44 | 
 45 |     return sim
 46 | 
 47 | 
 48 | def get_articulation_similarity_matrix(
 49 |     joint_configs: List[Dict[str, float]], joint_definitions: List[Dict[str, Any]]
 50 | ):
 51 |     """
 52 |     Returns a matrix of size NxN given a list of N joint config dicts
 53 |     """
 54 |     # Get max values
 55 |     # max_values = utils.AccumulatorDict(accumulator=max)
 56 |     # for joint_config, joint_def in zip(joint_configs, joint_definitions):
 57 |     #   joint_id = list(joint_config.keys())[0]
 58 |     #   max_values.increment(joint_def[joint_id]["type"], joint_config[joint_id])
 59 |     max_values = {"prismatic": 0.5, "revolute": 3 / 2 * np.pi}
 60 | 
 61 |     sim_matrix = torch.tensor(
 62 |         [
 63 |             [
 64 |                 articulation_similarity(
 65 |                     joint_config_i,
 66 |                     joint_def_i,
 67 |                     joint_config_j,
 68 |                     joint_def_j,
 69 |                     max_values=max_values,
 70 |                 )
 71 |                 for joint_config_i, joint_def_i in zip(joint_configs, joint_definitions)
 72 |             ]
 73 |             for joint_config_j, joint_def_j in zip(joint_configs, joint_definitions)
 74 |         ]
 75 |     )
 76 |     return sim_matrix
 77 | 
 78 | 
 79 | class JointSimLoss(torch.nn.Module):
 80 |     def __init__(self, joint_config_sim_matrix):
 81 |         super(JointSimLoss, self).__init__()
 82 |         self.joint_config_sim_matrix = joint_config_sim_matrix
 83 | 
 84 |     def forward(self, embedding_matrix):
 85 |         """
 86 |         Calculate the distance loss
 87 |         """
 88 |         # Different sim/distance metrics
 89 |         # https://elar.urfu.ru/bitstream/10995/3713/2/RuSSIR_2011_07.pdf
 90 |         # http://dep805.ru/about/sologub/russir2011poster.pdf
 91 |         # embedding_sim = utils.self_cosine_similarity(joint_config_embedding.weight)
 92 |         # Not working great --> Bug?
 93 | 
 94 |         # embedding_sim = utils.distance_to_sim(
 95 |         #     utils.self_manhattan_distance(joint_config_embedding.weight)
 96 |         # )
 97 | 
 98 |         # embedding_sim = utils.distance_to_sim(
 99 |         #     utils.self_euclidean_distance(joint_config_embedding.weight)
100 |         # )
101 | 
102 |         # embedding_sim = torch.exp(-utils.self_manhattan_distance(joint_config_embedding.weight))
103 | 
104 |         embedding_sim = torch.exp(-utils.self_euclidean_distance(embedding_matrix))
105 |         joint_config_embedding_loss = torch.nn.functional.l1_loss(
106 |             embedding_sim, self.joint_config_sim_matrix
107 |         )
108 |         # joint_config_embedding_loss = joint_config_embedding_loss / (embedding_matrix.size()[0]**2)
109 |         return joint_config_embedding_loss
110 | 
111 | 
112 | class JointClassificationLoss(torch.nn.Module):
113 |     def __init__(self, multi_class: float = 1.0, multi_state: float = 1.0):
114 |         super(JointClassificationLoss, self).__init__()
115 |         self.multi_class = multi_class
116 |         self.multi_state = multi_state
117 | 
118 |     def forward(
119 |         self,
120 |         gt_joint_configs: List[Dict[str, float]],
121 |         gt_joint_definitions: List[Dict[str, Any]],
122 |         pred_vector: Dict[str, torch.Tensor],
123 |     ):
124 |         """
125 |         Assumes gt_joint_configs is in zerod state!
126 |         """
127 |         assert len(gt_joint_configs[0]) == 1
128 | 
129 |         # Extract pred
130 |         pred_types_one_hot: torch.Tensor = pred_vector["type"]
131 |         pred_joint_states: torch.Tensor = pred_vector["state"]
132 | 
133 |         # Extract GT from batch
134 |         gt_types, joint_values = utils.extract_type_and_value(
135 |             gt_joint_definitions, gt_joint_configs
136 |         )
137 |         gt_types_index = utils.encode_joint_types(gt_types).to(
138 |             pred_types_one_hot.device
139 |         )
140 |         gt_joint_states = (
141 |             torch.Tensor(joint_values).to(pred_joint_states.device).unsqueeze(-1)
142 |         )
143 | 
144 |         class_loss = torch.nn.functional.cross_entropy(
145 |             pred_types_one_hot, gt_types_index
146 |         )
147 |         state_loss = torch.nn.functional.mse_loss(pred_joint_states, gt_joint_states)
148 | 
149 |         return self.multi_class * class_loss + self.multi_state * state_loss, {
150 |             "class": class_loss.item(),
151 |             "state": state_loss.item(),
152 |         }
153 | 
154 | 
155 | class JointZeroOneLoss(torch.nn.Module):
156 |     def __init__(self):
157 |         super(JointZeroOneLoss, self).__init__()
158 | 
159 |     def forward(
160 |         self,
161 |         gt_joint_configs: List[Dict[str, float]],
162 |         gt_joint_definitions: List[Dict[str, Any]],
163 |         pred_vector: Dict[str, torch.Tensor],
164 |     ):
165 |         """
166 |         Assumes gt_joint_configs is in zerod state!
167 |         """
168 |         assert len(gt_joint_configs[0]) == 1
169 | 
170 |         pred_zero_one = pred_vector["state"]
171 |         gt_zero_one = (
172 |             torch.Tensor(
173 |                 utils.extract_zero_one_in_limits(gt_joint_definitions, gt_joint_configs)
174 |             )
175 |             .to(pred_zero_one.device)
176 |             .unsqueeze(-1)
177 |         )
178 | 
179 |         loss = torch.nn.functional.mse_loss(pred_zero_one, gt_zero_one)
180 |         # print(f"{pred_zero_one}\n{gt_zero_one}")
181 |         return loss, {"zero_one_loss": loss.item()}
182 | 


--------------------------------------------------------------------------------
/datasets/decoder/id_lists/All_Real_Categories.txt:
--------------------------------------------------------------------------------
  1 | 187d79cd04b2bdfddf3a1b0d597ce76e
  2 | 496dcf99-6e76-480c-8fab-a5579f16f2c7
  3 | 4e9832bbbb077f9c5c5adfeaec1397f
  4 | 503b4dff71b404dabf195d81040cc60
  5 | 55b0f47aea128c3b91d8be9599fbaa1f
  6 | 5d17e90f512a3dc7df3a1b0d597ce76e
  7 | 66725b8cad4355a03735baeeeb56a00
  8 | 6e51cc2c2da50c6a59c5c7ba83ec931a
  9 | 795af925dfc8897b035d20a1a3ca345
 10 | 7d19e1db73ebfee26f893b5bc716a3fa
 11 | 9112f0ee6b1cdf5082ec48ff3a4fe07c
 12 | 93b7c0394cc309c8df3a1b0d597ce76e
 13 | a238b87f02c5de1edf3a1b0d597ce76e
 14 | a2caaa68364f6207f054969eeb39ff86
 15 | a377f5af14ac6710a168e247bb97e471
 16 | a62b6a19d2093bc91cbd656f2f1bc2ff
 17 | aa4ad2f41efb815cb022c94235bc8601
 18 | af913c310f1b978ae6488a574e8954a5
 19 | b1080bd937b04a44575f4e5007488531
 20 | c5f76c9a4137a3563862b05b9038dcc
 21 | c6090fb2806b2abfa5f4a1f264741b67
 22 | cc8161b35f7bef958c88d30f502a452
 23 | d95f6ea8-cda0-4d59-aa49-11309e3f0ce3
 24 | 125c93cbc6544bd1f9f50a550b8c1cce
 25 | 1b67b4bfed6688ba5b22feddf58c05e1
 26 | 1f507b26c31ae69be42930af58a36dce
 27 | 241ec8a746dd1cfc78f71a335ebabfa5
 28 | 29f5cfcef7272f1f640578ae55230ebc
 29 | 3b2db36aaa2546b99c7c402f274622c
 30 | 4bacb1694e86005afb6e846333373df8
 31 | 4fc3d56243d2d8801ef1ccfaf50f2048
 32 | 5678a2173ff575d09cebe817bc1591b3
 33 | 5d544ee4b094c6606436916a86a90ed7
 34 | 6489453e322cdb53f9f3c6290096f50f
 35 | 66e3b7c7f2e8e9297fd8853234f5e918
 36 | 6b78948484df58cdc664c3d4e2d59341
 37 | 7df09674bc991904c78df40cf2e9097a
 38 | 850673bcbce8f73ec8a6d87a62ac0341
 39 | 8d70fb6adc63e21eb7e0383b9609fa5
 40 | 97e94d800fd6dc07dbaa6d42a4980930
 41 | a4b410734514306ac401e233323032d6
 42 | aa92ecd31491bca87a88a2ad67bfd073
 43 | afa49e97861c45e5e738f481f8560d58
 44 | b5f6fd84a3f44ddb1aa47689117a61e1
 45 | cbcb79f534518dfbcfe78be5b7b99c8d
 46 | cc691d9e8e189ce47a381a112bfd785
 47 | f53ea19f871a80d420685b5a7e34b501
 48 | f7c26b8c94ba8214397c35f585745a82
 49 | 42aac49442bb9f8bb4e3935c6cee4b35
 50 | 4f956e259344d4a3599fb6902c958d23
 51 | 6d83dea57df3c4a3500158c23c4c5a8e
 52 | 87bae84777fe8b702bac1bcdfc2402d2
 53 | 891f65c773939191c834958aed613724
 54 | 95bc6fb98624ea3229d75ea275a1cb4e
 55 | bdb10a17b04e2adbb7fb7f3ae74b618c
 56 | c3bb5f3c842a6c2d178e7d331e641179
 57 | c75ebd7c340649ba5ad304c2564ae1df
 58 | dc5c91c8c01b1c8c506c648223cdabe9
 59 | df5bd51614d2fbdef114be17e2e7c4b5
 60 | f9544effad178100be92f74d81ff60bf
 61 | b9f1eeea355194c19941e769880462e7
 62 | 3ea1ace396f6ccae48407a54b1fbfda8
 63 | 8c2491e5245804d1ffc6e457221b9271
 64 | a46e0c10f17f928ba2bc8d1e386113dc
 65 | b296fbfbbe5dccf09c12d6260da9ac2b
 66 | b8cf469bc1b42ab64a44340bf227e40
 67 | bae2babb26dc352b20489998d734835a
 68 | bb5533538179f6c39209092a6c03f1bd
 69 | ef97ff5c1d6a00f2a760e402290727de
 70 | eff23594cc0aed121b3e6b75a323070-0
 71 | 1515a188cbc382fa84ad27a2f1142330
 72 | 3158fd17e409d38a732208e596b26ebc
 73 | 4d8d0cb708324170c98c13d6112727de
 74 | 58c878d494ecbbd62835d3f06aeb6e0
 75 | 5b81d7830eabb7547c6e1fb05e1b9037
 76 | 6601ef650f03e000c49931aa7ca8fecb
 77 | 6fb955194baf07a750a5eaedf6275e1b
 78 | 7028b24b7d64efaf3194539af1047dcf
 79 | 827c9a85df258dd8faf0b97ff18d3546
 80 | 93d69af3c0034d3d9807c66948157e66
 81 | 9e53ec8bedae98859807c66948157e66
 82 | ad6bd7e24e5bc25f3593835fe348a036
 83 | bc82358ed0ee28d41277c98ea0908b0
 84 | 20edff7e1500fc4ed45f502ecff9e44f
 85 | 21227197948fd2857c2f94a943a8669b
 86 | 28001cb70c38f19cf32b6091d9628440
 87 | 299ff1bf2618a4b584b039efed4b32d7
 88 | 29f110b8740bd8068c427edcde5d5e2b
 89 | 2dc57230d14506eacd6ce29440b718cf
 90 | 31c090b23f52bb61208c8c852ee795bc
 91 | 415d7746f792eb1de0445fc6d980dd5c
 92 | 48045af90c7959e5738e43095496b061
 93 | 4aab0e569f1dc3bc8d7e9f13fd8f661d
 94 | 4dc3e9e293450817d3dad974dc098fa1
 95 | 70d0937e1d38a9c2a45b742ddc5add59
 96 | 712d2c844d61aa9cefead98a255f706f
 97 | 74b8222078ba776c661673811de66400
 98 | 78c4b505894342269299936b751bd77b
 99 | 7b5b7bfa8580e913e2580b23e60e4674
100 | 949e39403ab4fab37ade8e3ca8db8db3
101 | 9dd80e356880c9deaf268f6180933aa3
102 | 9e42bbdbfe36680391e4d6c585a697a
103 | a19e6780182c72cf9bf8bea04806ba15
104 | a516711827a396085528d560ddea455
105 | a95828fa4607295674c8eb7e4d6198a5
106 | c9857deb88989a67b5851007eadc6f74
107 | e64f3681d7c76bb743638dabe1eb5336
108 | 04569f2f-3e07-4655-9337-bfa41a5ccbc0
109 | 0d31000f-e876-4751-876d-efa6a61fa9b2
110 | 265d042dcfed6f15c357c21161963e89
111 | 4163de2ce7f6f59aed1d8381d2c075c2-0
112 | 5528ee64-7656-40e4-8db0-70fd39427b4d
113 | 62e22f4d1846d8c1fdc6c1669e5c540
114 | 8b04de89-4f3f-45d8-8d7a-6bb5958e5340
115 | d87cf480-ba57-43b1-b1f2-bae2b8fe2fa4
116 | u094c89ee-d9f6-4266-a9b3-c1f2549b1105
117 | ucfaedfea-c15a-495c-9037-21108eeeb006
118 | 12f3efd9-f013-4aab-922c-0328502acd3f
119 | 19dff8164764e2a259f37b6e82c5e93
120 | 23fd9817d509fe472bf266a8f0187ce5-0
121 | 23fd9817d509fe472bf266a8f0187ce5-1
122 | 3181976321565dfee9027543872faef
123 | 31f86223e3faaec3eae5cab1248d1ec6-0
124 | 31f86223e3faaec3eae5cab1248d1ec6-1
125 | 32036cc5-6e63-47cf-96ba-89ef2be3950e-0
126 | 35233c0d786b5956d25d105fdf500c48
127 | 35c3d7b9-7dec-4e66-a962-14ea0fde4cad
128 | 581ad58ce8664d2d4ff0e6230d32c1e3
129 | 59481570acb7a0872d4ba5e1aa44cc40-0
130 | 5d6201caa12611fe89f4664416242a41
131 | 737fd576f8eae54adfb1b24fd658f3b5-0
132 | 75bfa1045150e49fe177ccfa080b14b0-0
133 | 96a7c39f7eb90f65c90183d47cf3c337-1
134 | 9f264c87-89e3-4b06-8f36-b618ec54694c
135 | a683ed081504a35e4a9a3a0b87d50a92
136 | b4f8a49abc400a775d6ddb389935ee57
137 | ba60dc6d-526e-4014-961a-5049df9079c6-0
138 | c7a96262d5dfc1ae72c447ef6e5cffc2
139 | ceb3b39c9a035752b4fc059d1d10ec5d-0
140 | e7523e396f8d4ae171e397fe45dce6b
141 | e9d3d9ef-57e3-4f0a-bbc7-e1cc75947ccd-7
142 | fca703c2489237d51b44a9962207f944
143 | fe95df61cc16452ccb3316c0fb4cfa01-0
144 | fed0863a69b3744c44f6844c4f2ce888-0
145 | u2bba3644-e88e-4650-9124-e9964702f9ef-0
146 | u51509034-d4f7-4ef7-b014-6660f4df034d-0
147 | ud489e3ab-3fac-4753-8373-f5d4cebaeec5
148 | 20c5096ea98cc955920de219c00d1c3b
149 | 376eb047b40ef4f6a480e3d8fdbd4a92
150 | 37b40b7e9290c0a330314ffb9bb887b5
151 | 3800d2ab6bc278bcd5a3e6010c55b78e
152 | 453034dc-b04a-4415-8c43-16d6d23c47b2
153 | 58a427e5201aa43be00ace8e7c1a5eeb
154 | 6a030b1836586b9f7e1c85c5c15da7fb
155 | 72a2bd9428f7179357fcd7a97096d25
156 | 88ac7b2b3050f1f861f7b52424be58ab
157 | 8c34afa29665356013b1d3e1528f0506-0
158 | 8d152be34b41785677937146265c551a
159 | 8f54f0bec8eb5d35d25169d37940fb64
160 | 98bc3afca001f433a1702a37604ec6f
161 | b3188e51216de8cce2e4961161b75547
162 | c16cba81-714d-4b1a-94cd-7a148af83db0
163 | d01ff66659767d50cee19268a161fc4a
164 | d9378f9a4a7d6514602a101aa41a6f48
165 | dc2cda7d-6fd5-48dd-8f7e-7524d7eb1c0a
166 | f39912a4f0516fb897371d1e7cc637f3
167 | f636f0aa2025ba3923c841f9d5051936
168 | u26949e8f-8139-485b-99f9-694c026ed5a6
169 | u9ea1219b-e360-4351-ae52-f589989c58e3-0
170 | ue12a29d7-6d30-4159-ac11-3c6a058ad354
171 | 1af4a1dfa4f94cd44da5feafe6f1c8fc
172 | 21ae39cf6ba8557f4da5feafe6f1c8fc
173 | 2950d1baed4dbd78c59350d819542ec7
174 | 33ec57af7f648994da5feafe6f1c8fc
175 | 606d50b144d8ca164da5feafe6f1c8fc
176 | 82c05fe4890a7f5112de5317fe5b354f
177 | d69d9de0c79ac6a9c59350d819542ec7
178 | 10c14b0cb76f87584da5feafe6f1c8fc
179 | 1548461b13adc0d0c59350d819542ec7
180 | 170be0087472182bc59350d819542ec7
181 | 17d25c26485edcf94da5feafe6f1c8fc
182 | 198cbe57b01bad9dc59350d819542ec7
183 | 19c79a42f68d7d444da5feafe6f1c8fc
184 | 1caaaa5c1da4dd2dc59350d819542ec7
185 | 1fc8231114fa42a7c59350d819542ec7
186 | 2862558059dd584c59350d819542ec7 
187 | 


--------------------------------------------------------------------------------
/CARTO/Encoder/net_train.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | 
  3 | os.environ["PYTHONHASHSEED"] = str(1)
  4 | 
  5 | # Allow the sentence tokenizer to be run in parallel.
  6 | os.environ["TOKENIZERS_PARALLELISM"] = "true"
  7 | 
  8 | import argparse
  9 | import json
 10 | import pathlib
 11 | import random
 12 | import sys
 13 | from importlib.machinery import SourceFileLoader
 14 | 
 15 | import cv2
 16 | import IPython
 17 | 
 18 | # To ensure mesh_to_sdf is imported before pyrender
 19 | import mesh_to_sdf
 20 | import numpy as np
 21 | import pytorch_lightning as pl
 22 | import torch
 23 | import wandb
 24 | from typing import Optional
 25 | from pytorch_lightning import loggers
 26 | from pytorch_lightning.callbacks import ModelCheckpoint
 27 | from CARTO.app.panoptic_tidying import tidy_classes
 28 | from CARTO.lib import camera, datapoint
 29 | from CARTO.lib.datapoint import Panoptic
 30 | from CARTO.lib.net import common
 31 | from CARTO.lib.net.data_module import DataModule
 32 | from CARTO.lib.net.dataset import PanopticOutputs
 33 | from CARTO.lib.net.panoptic_trainer import PanopticModel
 34 | from CARTO.lib.net.post_processing.eval3d import Eval3d
 35 | from CARTO.lib.shapenet_utils import NOCS_CATEGORIES
 36 | from CARTO.lib import partnet_mobility
 37 | 
 38 | # ./runner.sh simnet/app/panoptic_category_reconstruction/net_train.py @simnet/app/panoptic_category_reconstruction/net_config_overfit.txt
 39 | 
 40 | _GPU_TO_USE = 0
 41 | 
 42 | 
 43 | def set_seed(seed: Optional[int]):
 44 |     if seed is None:
 45 |         return
 46 | 
 47 |     random.seed(seed)
 48 |     np.random.seed(seed)
 49 |     torch.manual_seed(seed)
 50 | 
 51 | 
 52 | class EvalMethod:
 53 |     def __init__(self, hparams, log_prefix="val"):
 54 |         assert log_prefix == "val" or log_prefix == "test"
 55 | 
 56 |         self.objects_eval_3d = Eval3d()
 57 |         self.doors_eval_3d = Eval3d()
 58 |         self.handholds_eval_3d = Eval3d()
 59 |         self.camera_model = camera.ZED2Camera1080p()
 60 |         self.log_prefix = log_prefix
 61 | 
 62 |     def process_sample(
 63 |         self, panoptic_outputs: PanopticOutputs, panoptic_targets: Panoptic
 64 |     ):
 65 |         batch_size = len(panoptic_targets.val_data)
 66 | 
 67 |         for i in range(batch_size):
 68 |             val_data = panoptic_targets.val_data[i]
 69 |             if val_data.scene_name == "unlabeled_data":
 70 |                 continue
 71 | 
 72 |             ## Compute detections
 73 |             if len(panoptic_outputs.cabinet_door_obbs) > 0:
 74 |                 door_detections = panoptic_outputs.cabinet_door_obbs[0].get_detections(
 75 |                     i,
 76 |                     camera_model=self.camera_model,
 77 |                     class_list=val_data.door_class_ids,
 78 |                 )
 79 |                 self.doors_eval_3d.process_sample(
 80 |                     door_detections, val_data.door_detections, val_data.scene_name
 81 |                 )
 82 | 
 83 |             if len(panoptic_outputs.graspable_objects_obbs) > 0:
 84 |                 objects_detections = panoptic_outputs.graspable_objects_obbs[
 85 |                     0
 86 |                 ].get_detections(
 87 |                     i,
 88 |                     camera_model=self.camera_model,
 89 |                     class_list=val_data.object_class_ids,
 90 |                 )
 91 |                 self.objects_eval_3d.process_sample(
 92 |                     objects_detections, val_data.object_detections, val_data.scene_name
 93 |                 )
 94 | 
 95 |             if len(panoptic_outputs.handhold_obbs) > 0:
 96 |                 handhold_detections = panoptic_outputs.handhold_obbs[0].get_detections(
 97 |                     i, camera_model=self.camera_model
 98 |                 )
 99 |                 self.handholds_eval_3d.process_sample(
100 |                     handhold_detections,
101 |                     val_data.handhold_detections,
102 |                     val_data.scene_name,
103 |                 )
104 | 
105 |     def process_all_dataset(self, log):
106 |         log[
107 |             self.log_prefix + "/objects 3Dmap"
108 |         ] = self.objects_eval_3d.process_all_3D_dataset()
109 |         log[
110 |             self.log_prefix + "/cabinet 3Dmap"
111 |         ] = self.doors_eval_3d.process_all_3D_dataset()
112 |         log[
113 |             self.log_prefix + "/handhold 3Dmap"
114 |         ] = self.handholds_eval_3d.process_all_3D_dataset()
115 |         log[
116 |             self.log_prefix + "/object_class_accuracy"
117 |         ] = self.objects_eval_3d.process_category_accuracy()
118 |         log[
119 |             self.log_prefix + "/door_class_accuracy"
120 |         ] = self.doors_eval_3d.process_category_accuracy()
121 | 
122 |     def reset(self):
123 |         self.objects_eval_3d = Eval3d()
124 |         self.doors_eval_3d = Eval3d()
125 |         self.handholds_eval_3d = Eval3d()
126 | 
127 | 
128 | if __name__ == "__main__":
129 |     print("WARNING -- This was not tested for the code release -- WARNING")
130 |     parser = argparse.ArgumentParser(fromfile_prefix_chars="@")
131 |     common.add_train_args(parser)
132 |     hparams = parser.parse_args()
133 |     set_seed(hparams.seed)
134 |     categories = [
135 |         "Dishwasher",
136 |         "Knife",
137 |         "Laptop",
138 |         "Microwave",
139 |         "Oven",
140 |         "Refrigerator",
141 |         "Stapler",
142 |         "StorageFurniture",
143 |         "Table",
144 |         "WashingMachine",
145 |     ]
146 |     hparams.object_categories = [
147 |         partnet_mobility.partnet_mobility_db[object_cat] for object_cat in categories
148 |     ]
149 | 
150 |     train_ds = datapoint.make_dataset(hparams.train_path)
151 |     samples_per_epoch = len(train_ds.list())
152 |     samples_per_step = hparams.train_batch_size
153 |     steps = hparams.max_steps
154 |     # max to allow overfitting for a single example
155 |     steps_per_epoch = max(samples_per_epoch // samples_per_step, 1)
156 |     epochs = int(np.ceil(steps / steps_per_epoch))
157 |     actual_steps = epochs * steps_per_epoch
158 |     print(f"{epochs = } {samples_per_epoch = } {actual_steps = }")
159 |     model = PanopticModel(
160 |         hparams, epochs, EvalMethod(hparams, "val"), EvalMethod(hparams, "test")
161 |     )
162 |     data_module = DataModule(hparams, train_ds)
163 |     model_checkpoint = ModelCheckpoint(
164 |         # save_top_k=-1, # -1 Saves all models --> deactivate to save some space
165 |         every_n_epochs=1,
166 |         mode="max",  # Does not do anything as we do not have monitor= set (--> saves latest)
167 |     )
168 |     if hparams.wandb_name is not None:
169 |         logger = loggers.WandbLogger(name=hparams.wandb_name, project="arti2real")
170 |     else:
171 |         logger = loggers.TensorBoardLogger(save_dir=hparams.output)
172 |     # Mixed precision training uses 16-bit precision floats, otherwise use 32-bit floats.
173 |     precision = 16 if hparams.use_amp else 32
174 | 
175 |     trainer = pl.Trainer(
176 |         max_epochs=epochs,
177 |         gpus=[_GPU_TO_USE],
178 |         callbacks=[model_checkpoint],
179 |         val_check_interval=hparams.val_check_interval,
180 |         limit_val_batches=hparams.limit_val_batches,
181 |         limit_test_batches=hparams.limit_test_batches,
182 |         logger=logger,
183 |         default_root_dir=hparams.output,
184 |         precision=precision,
185 |     )
186 |     trainer.fit(model, data_module)
187 |     if hparams.test_path is not None:
188 |         trainer.test(model, data_module)
189 | 


--------------------------------------------------------------------------------
/CARTO/simnet/lib/net/pre_processing/obb_inputs.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from scipy.stats import multivariate_normal
  3 | 
  4 | from CARTO.simnet.lib.net.post_processing import epnp
  5 | from CARTO.simnet.lib.net.pre_processing import pose_inputs
  6 | from CARTO.simnet.lib import datapoint
  7 | 
  8 | _HEATMAP_THRESHOLD = 0.3
  9 | _DOWNSCALE_VALUE = 8
 10 | _PEAK_CONCENTRATION = 0.8
 11 | 
 12 | # def compute_network_targets(obbs, masks, height, width, camera_model, class_index_list=None):
 13 | #   assert len(obbs) == len(masks)
 14 | #   if len(obbs) == 0:
 15 | #     height_d = int(height / _DOWNSCALE_VALUE)
 16 | #     width_d = int(width / _DOWNSCALE_VALUE)
 17 | #     return datapoint.OBB(
 18 | #         heat_map=np.zeros([height, width]),
 19 | #         vertex_target=np.zeros([height_d, width_d, 16]),
 20 | #         cov_matrices=np.zeros([height_d, width_d, 6]),
 21 | #         z_centroid=np.zeros([height_d, width_d]),
 22 | #         classes=np.zeros([height_d, width_d])
 23 | #     )
 24 | #   heatmaps = pose_inputs.compute_heatmaps_from_masks(masks)
 25 | #   vertex_target = pose_inputs.compute_vertex_field(obbs, heatmaps, camera_model)
 26 | #   z_centroid = pose_inputs.compute_z_centroid_field(obbs, heatmaps)
 27 | #   cov_matrix = compute_rotation_field(obbs, heatmaps)
 28 | #   class_target = None
 29 | #   if class_index_list is not None:
 30 | #     class_target = compute_class_field(obbs, class_index_list, heatmaps)
 31 | #   return datapoint.OBB(
 32 | #       heat_map=np.max(heatmaps, axis=0),
 33 | #       vertex_target=vertex_target,
 34 | #       cov_matrices=cov_matrix,
 35 | #       z_centroid=z_centroid,
 36 | #       classes=class_target
 37 | #   )
 38 | 
 39 | 
 40 | ## Extended Targers to include the pose + latent emb
 41 | def compute_network_targets(
 42 |     obbs,
 43 |     masks,
 44 |     shape_code,
 45 |     arti_code,
 46 |     poses,
 47 |     height,
 48 |     width,
 49 |     camera_model,
 50 |     class_index_list=None,
 51 |     shape_emb_size=32,
 52 |     arti_emb_size=16,
 53 | ):
 54 |     assert len(obbs) == len(masks)
 55 |     if len(obbs) == 0:
 56 |         height_d = int(height / _DOWNSCALE_VALUE)
 57 |         width_d = int(width / _DOWNSCALE_VALUE)
 58 |         return datapoint.OBB(
 59 |             heat_map=np.zeros([height, width]),
 60 |             vertex_target=np.zeros([height_d, width_d, 16]),
 61 |             cov_matrices=np.zeros([height_d, width_d, 6]),
 62 |             z_centroid=np.zeros([height_d, width_d]),
 63 |             shape_emb=np.zeros([height_d, width_d, shape_emb_size]),
 64 |             arti_emb=np.zeros([height_d, width_d, arti_emb_size]),
 65 |             abs_pose=np.zeros([height_d, width_d, 13]),
 66 |         )
 67 |     heatmaps = pose_inputs.compute_heatmaps_from_masks(masks)
 68 |     vertex_target = pose_inputs.compute_vertex_field(obbs, heatmaps, camera_model)
 69 |     z_centroid = pose_inputs.compute_z_centroid_field(obbs, heatmaps)
 70 |     cov_matrix = compute_rotation_field(obbs, heatmaps)
 71 |     shape_emb_target = compute_latent_emb(
 72 |         obbs, shape_code, heatmaps, embedding_size=shape_emb_size
 73 |     )
 74 |     arti_emb_target = compute_latent_emb(
 75 |         obbs, arti_code, heatmaps, embedding_size=arti_emb_size
 76 |     )
 77 |     abs_pose_target = compute_abspose_field(poses, heatmaps, camera_model)
 78 |     return datapoint.OBB(
 79 |         heat_map=np.max(heatmaps, axis=0),
 80 |         vertex_target=vertex_target,
 81 |         cov_matrices=cov_matrix,
 82 |         z_centroid=z_centroid,
 83 |         shape_emb=shape_emb_target,
 84 |         arti_emb=arti_emb_target,
 85 |         abs_pose=abs_pose_target,
 86 |     )
 87 | 
 88 | 
 89 | ####
 90 | # How does it work?
 91 | # The first dimension represents a layer for each obbs.
 92 | # Data will be set in the according channels (last dimension)
 93 | # As the remainders entries for this layer stay zero, we can sum over the
 94 | # first dimension to get rid of it.
 95 | # TODO Nick maybe refactor and use direct indexing to save time
 96 | #   class_target[mask] = class_values
 97 | #   etc..
 98 | ####
 99 | def compute_class_field(obbs, class_index_list, heat_maps, threshold=0.3):
100 |     class_target = np.zeros([len(obbs), heat_maps[0].shape[0], heat_maps[0].shape[1]])
101 |     heatmap_indices = np.argmax(np.array(heat_maps), axis=0)
102 |     for obb, heat_map, ii in zip(obbs, heat_maps, range(len(heat_maps))):
103 |         mask = heatmap_indices == ii
104 |         class_values = class_index_list.index(obb.category_name)
105 |         class_target[ii, mask] = class_values
106 |     return np.sum(class_target, axis=0)[::_DOWNSCALE_VALUE, ::_DOWNSCALE_VALUE]
107 | 
108 | 
109 | def compute_rotation_field(obbs, heat_maps, threshold=0.3):
110 |     cov_target = np.zeros([len(obbs), heat_maps[0].shape[0], heat_maps[0].shape[1], 6])
111 |     heatmap_indices = np.argmax(np.array(heat_maps), axis=0)
112 |     for obb, heat_map, ii in zip(obbs, heat_maps, range(len(heat_maps))):
113 |         mask = heatmap_indices == ii
114 |         cov_matrix = obb.cov_matrix
115 |         cov_mat_values = np.array(
116 |             [
117 |                 cov_matrix[0, 0],
118 |                 cov_matrix[1, 1],
119 |                 cov_matrix[2, 2],
120 |                 cov_matrix[0, 1],
121 |                 cov_matrix[0, 2],
122 |                 cov_matrix[1, 2],
123 |             ]
124 |         )
125 |         cov_target[ii, mask] = cov_mat_values
126 |     return np.sum(cov_target, axis=0)[::_DOWNSCALE_VALUE, ::_DOWNSCALE_VALUE]
127 | 
128 | 
129 | def compute_latent_emb(obbs, embeddings, heat_maps, embedding_size=1):
130 |     """
131 |     Fills each pixel with the closest embedding code according to the heatmap
132 |     """
133 |     latent_emb_target = np.zeros(
134 |         [len(obbs), heat_maps[0].shape[0], heat_maps[0].shape[1], embedding_size]
135 |     )
136 |     heatmap_indices = np.argmax(np.array(heat_maps), axis=0)
137 |     for emb, ii in zip(embeddings, range(len(heat_maps))):
138 |         mask = heatmap_indices == ii
139 |         latent_emb_target[ii, mask] = emb
140 |     return np.sum(latent_emb_target, axis=0)[::_DOWNSCALE_VALUE, ::_DOWNSCALE_VALUE]
141 | 
142 | 
143 | def compute_abspose_field(poses, heat_maps, camera_model, threshold=0.3):
144 |     abs_pose_target = np.zeros(
145 |         [len(poses), heat_maps[0].shape[0], heat_maps[0].shape[1], 13]
146 |     )
147 |     heatmap_indices = np.argmax(np.array(heat_maps), axis=0)
148 |     for pose, ii in zip(poses, range(len(heat_maps))):
149 |         mask = heatmap_indices == ii
150 |         actual_abs_pose = camera_model.RT_matrix @ pose.camera_T_object
151 |         rotation_matrix = actual_abs_pose[:3, :3]
152 |         translation_vector = actual_abs_pose[:3, 3]
153 |         scale = pose.scale_matrix[0, 0]
154 |         abs_pose_values = np.array(
155 |             [
156 |                 rotation_matrix[0, 0],
157 |                 rotation_matrix[0, 1],
158 |                 rotation_matrix[0, 2],
159 |                 rotation_matrix[1, 0],
160 |                 rotation_matrix[1, 1],
161 |                 rotation_matrix[1, 2],
162 |                 rotation_matrix[2, 0],
163 |                 rotation_matrix[2, 1],
164 |                 rotation_matrix[2, 2],
165 |                 translation_vector[0],
166 |                 translation_vector[1],
167 |                 translation_vector[2],
168 |                 scale,
169 |             ]
170 |         )
171 |         abs_pose_target[ii, mask] = abs_pose_values
172 |     return np.sum(abs_pose_target, axis=0)[::_DOWNSCALE_VALUE, ::_DOWNSCALE_VALUE]
173 | 


--------------------------------------------------------------------------------
/CARTO/simnet/lib/net/post_processing/depth_outputs.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import cv2
  3 | import IPython
  4 | import torch
  5 | import torch.nn as nn
  6 | 
  7 | from CARTO.simnet.lib import datapoint
  8 | from torch.nn import functional as F
  9 | from CARTO.simnet.lib.net import losses
 10 | 
 11 | _mse_loss = losses.MSELoss()
 12 | 
 13 | 
 14 | class DepthOutput:
 15 |     def __init__(self, depth_pred, hparams):
 16 |         self.depth_pred = depth_pred
 17 |         self.is_numpy = False
 18 |         self.disp_loss = DisparityLoss(hparams.max_disparity, False)
 19 |         self.loss = nn.SmoothL1Loss(reduction="none")
 20 |         self.hparams = hparams
 21 | 
 22 |     # Converters for torch to numpy
 23 |     def convert_to_numpy_from_torch(self):
 24 |         self.depth_pred = np.ascontiguousarray(self.depth_pred.float().cpu().numpy())
 25 |         self.is_numpy = True
 26 |         return self.depth_pred
 27 | 
 28 |     def convert_to_torch_from_numpy(self):
 29 |         self.depth_pred[self.depth_pred > self.hparams.max_disparity] = (
 30 |             self.hparams.max_disparity - 1
 31 |         )
 32 |         self.depth_pred = torch.from_numpy(
 33 |             np.ascontiguousarray(self.depth_pred)
 34 |         ).float()
 35 |         self.is_numpy = False
 36 |         return self.depth_pred
 37 | 
 38 |     def get_prediction(self, is_target: bool = False):
 39 |         if not self.is_numpy:
 40 |             self.convert_to_numpy_from_torch()
 41 |         if is_target:
 42 |             return self.depth_pred
 43 |         else:
 44 |             return self.depth_pred[0]
 45 | 
 46 |     def get_visualization_img(
 47 |         self, left_img_np, corner_scale=1, raw_disp=True, is_target: bool = False
 48 |     ):
 49 |         if not self.is_numpy:
 50 |             self.convert_to_numpy_from_torch()
 51 | 
 52 |         if is_target:
 53 |             disp = self.depth_pred
 54 |         else:
 55 |             disp = self.depth_pred[0]
 56 | 
 57 |         if raw_disp:
 58 |             return disp_map_visualize(disp, self.hparams.max_disparity)
 59 |         disp_scaled = disp[::corner_scale, ::corner_scale]
 60 |         left_img_np[
 61 |             : disp_scaled.shape[0], -disp_scaled.shape[1] :
 62 |         ] = disp_map_visualize(disp_scaled, self.hparams.max_disparity)
 63 |         return left_img_np
 64 | 
 65 |     def compute_loss(self, depth_targets, log, name):
 66 |         if self.is_numpy:
 67 |             raise ValueError("Output is not in torch mode")
 68 |         depth_target_stacked = []
 69 |         for depth_target in depth_targets:
 70 |             depth_target_stacked.append(depth_target.depth_pred)
 71 |         depth_target_batch = torch.stack(depth_target_stacked)
 72 |         depth_target_batch = depth_target_batch.to(torch.device("cuda:0"))
 73 |         depth_loss = self.disp_loss(self.depth_pred, depth_target_batch)
 74 |         log[name] = depth_loss.item()
 75 |         return self.hparams.loss_depth_mult * depth_loss
 76 | 
 77 | 
 78 | class DisparityLoss(nn.Module):
 79 |     """Smooth L1-loss for disparity with check for valid ground truth"""
 80 | 
 81 |     def __init__(self, max_disparity, stdmean_scaled):
 82 |         super().__init__()
 83 | 
 84 |         self.max_disparity = max_disparity
 85 |         self.stdmean_scaled = stdmean_scaled
 86 |         self.loss = nn.SmoothL1Loss(reduction="none")
 87 | 
 88 |     def forward(self, disparity, disparity_gt, right=False, low_range_div=None):
 89 |         # Scale ground truth disparity based on output scale.
 90 |         scale_factor = disparity_gt.shape[2] // disparity.shape[2]
 91 |         disparity_gt = downsample_disparity(disparity_gt, scale_factor)
 92 |         max_disparity = self.max_disparity / scale_factor
 93 |         if low_range_div is not None:
 94 |             max_disparity /= low_range_div
 95 | 
 96 |         # with torch.no_grad():
 97 |         #    valid_mask = get_disparity_valid_mask(disparity_gt, max_disparity, right)
 98 | 
 99 |         batch_size, _, _ = disparity.shape
100 |         loss = torch.zeros(1, dtype=disparity.dtype, device=disparity.device)
101 | 
102 |         # Not all batch elements may have ground truth for disparity, so we compute the loss for each batch element
103 |         # individually.
104 |         valid_count = 0
105 |         for batch_idx in range(batch_size):
106 |             if torch.sum(disparity_gt[batch_idx, :, :]) < 1e-3:
107 |                 continue
108 | 
109 |             single_loss = self.loss(
110 |                 disparity[batch_idx, :, :], disparity_gt[batch_idx, :, :]
111 |             )
112 |             valid_count += 1
113 | 
114 |             if self.stdmean_scaled:
115 |                 # Scale loss by standard deviation and mean of ground truth to reduce influence of very high
116 |                 # disparities.
117 |                 gt_std, gt_mean = torch.std_mean(disparity_gt[batch_idx, :, :])
118 |                 loss += torch.mean(single_loss) / (gt_mean + 2.0 * gt_std)
119 |             else:
120 |                 # Scale loss by scale factor due to difference of expected magnitude of disparity at different scales.
121 |                 loss += torch.mean(single_loss) * scale_factor
122 |         # Avoid potential divide by 0.
123 |         if valid_count > 0:
124 |             return loss / batch_size
125 |         else:
126 |             return loss
127 | 
128 | 
129 | def downsample_disparity(disparity, factor):
130 |     """Downsample disparity using a min-pool operation
131 | 
132 |     Input can be either a Numpy array or Torch tensor.
133 |     """
134 |     with torch.no_grad():
135 |         # Convert input to tensor at the appropriate number of dimensions if needed.
136 |         is_numpy = type(disparity) == np.ndarray
137 |         if is_numpy:
138 |             disparity = torch.from_numpy(disparity)
139 |         new_dims = 4 - len(disparity.shape)
140 |         for i in range(new_dims):
141 |             disparity = disparity.unsqueeze(0)
142 | 
143 |         disparity = F.max_pool2d(disparity, kernel_size=factor, stride=factor) / factor
144 | 
145 |         # Convert output disparity back into same format and number of dimensions as input.
146 |         for i in range(new_dims):
147 |             disparity = disparity.squeeze(0)
148 |         if is_numpy:
149 |             disparity = disparity.numpy()
150 |         return disparity
151 | 
152 | 
153 | def get_disparity_valid_mask(disparity, max_disparity, right=False):
154 |     """Generate mask where disparity is valid based on the given max_disparity"""
155 |     IGNORE_EDGE = False
156 |     result = torch.logical_and(disparity > 1e-3, disparity < (max_disparity - 1 - 1e-3))
157 |     if IGNORE_EDGE:
158 |         width = disparity.shape[-1]
159 |         edge_mask = (
160 |             torch.arange(width, dtype=disparity.dtype, device=disparity.device) - 1
161 |         )
162 |         if right:
163 |             edge_mask = torch.flip(edge_mask, (0,))
164 |         edge_mask = edge_mask.expand_as(disparity)
165 |         valid_edge = disparity < edge_mask
166 |         result = torch.logical_and(result, valid_edge)
167 |     return result
168 | 
169 | 
170 | def turbo_vis(heatmap, normalize=False, uint8_output=False):
171 |     assert len(heatmap.shape) == 2
172 |     if normalize:
173 |         heatmap = heatmap.astype(np.float32)
174 |         heatmap -= np.min(heatmap)
175 |         heatmap /= np.max(heatmap)
176 |     assert heatmap.dtype != np.uint8
177 | 
178 |     x = heatmap
179 |     x = x.clip(0, 1)
180 |     a = (x * 255).astype(int)
181 |     b = (a + 1).clip(max=255)
182 |     f = x * 255.0 - a
183 |     turbo_map = datapoint.TURBO_COLORMAP_DATA_NP[::-1]
184 |     pseudo_color = turbo_map[a] + (turbo_map[b] - turbo_map[a]) * f[..., np.newaxis]
185 |     pseudo_color[heatmap < 0.0] = 0.0
186 |     pseudo_color[heatmap > 1.0] = 1.0
187 |     if uint8_output:
188 |         pseudo_color = (pseudo_color * 255).astype(np.uint8)
189 |     return pseudo_color
190 | 
191 | 
192 | def disp_map_visualize(x, max_disp):
193 |     assert len(x.shape) == 2
194 |     x = x.astype(np.float64)
195 |     valid = (x < max_disp) & np.isfinite(x)
196 |     if valid.sum() == 0:
197 |         return np.zeros_like(x).astype(np.uint8)
198 |     x -= np.min(x[valid])
199 |     x /= np.max(x[valid])
200 |     x = 1.0 - x
201 |     x[~valid] = 0.0
202 |     x = turbo_vis(x)
203 |     x = (x * 255).astype(np.uint8)
204 |     return x[:, :, ::-1]
205 | 


--------------------------------------------------------------------------------
/CARTO/Decoder/data/verify_partnet.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | 
  3 | # Otherwise we can't use pyrender
  4 | # os.environ["PYOPENGL_PLATFORM"] = ""
  5 | # os.environ['DISPLAY'] = ':1'
  6 | 
  7 | import multiprocessing
  8 | from concurrent import futures
  9 | import pathlib
 10 | 
 11 | import tqdm
 12 | import functools
 13 | import gc
 14 | import yaml
 15 | 
 16 | import numpy as np
 17 | 
 18 | from typing import Dict, Any, Callable, List
 19 | import itertools
 20 | import open3d as o3d
 21 | 
 22 | # import pyrender
 23 | import trimesh
 24 | import urdfpy
 25 | 
 26 | from CARTO.simnet.lib import partnet_mobility
 27 | from CARTO.simnet.lib.datasets import PartNetMobilityV0, PartNetMobilityV0DB
 28 | from CARTO.simnet.lib.datapoint import compress_datapoint, decompress_datapoint
 29 | from CARTO.Decoder.visualizing import offscreen
 30 | 
 31 | import uuid
 32 | from CARTO.Decoder import utils, config
 33 | from CARTO.Decoder.data import dataset
 34 | import pyrender
 35 | 
 36 | 
 37 | def process_object_id(
 38 |     object_id: str,
 39 |     joint_filter: Callable[[Dict[str, Any]], bool] = lambda _: True,
 40 |     joint_offset: float = 0.0,
 41 | ):
 42 |     object_path = PartNetMobilityV0DB.get_object(object_id)
 43 |     object_meta = PartNetMobilityV0DB.get_object_meta(object_id)
 44 | 
 45 |     joints_of_interest: List[str] = []
 46 |     # Artifact from preprocessing
 47 |     for joint_id, joint in object_meta["joints"].items():
 48 |         if not joint_filter(
 49 |             joint, partnet_mobility.get_joint_name_exclusion_list(object_meta)
 50 |         ):
 51 |             continue
 52 |         joints_of_interest.append(joint_id)
 53 | 
 54 |     joint_config = {}
 55 |     for joint_id, joint in object_meta["joints"].items():
 56 |         if joint_id in joints_of_interest:
 57 |             limits = partnet_mobility.get_canonical_joint_limits(object_meta, joint_id)
 58 |             # limits = np.array(object_meta["joints"][joint_id]["limit"])
 59 |             # joint_config[joint_id] = limits[0]
 60 |             # joint_config[joint_id] = limits[1]
 61 |             joint_config[joint_id] = limits[0] + joint_offset
 62 |         else:
 63 |             joint_config[joint_id] = 0.0
 64 | 
 65 |     canonical_transform = np.array(
 66 |         PartNetMobilityV0DB.get_object_meta(object_id)["canonical_transformation"]
 67 |     )
 68 | 
 69 |     # if PartNetMobilityV0DB.get_object_meta(object_id)["model_cat"] == "Scissors":
 70 |     #   canonical_transform = trimesh.transformations.rotation_matrix(
 71 |     #       np.pi / 2, np.array([0., 0.0, -1.])
 72 |     #   )
 73 |     # print(canonical_transform)
 74 |     # canonical_transform = trimesh.transformations.random_rotation_matrix()
 75 | 
 76 |     urdf_object = urdfpy.URDF.load(str(object_path / "mobility.urdf"))
 77 | 
 78 |     # return utils.object_to_trimesh(urdf_object, joint_config, base_transform=canonical_transform)
 79 |     if len(joints_of_interest) == 1:
 80 |         # print(object_meta["joints"][joints_of_interest[0]])
 81 |         obj_trimesh, _, _ = utils.object_to_trimesh(
 82 |             urdf_object,
 83 |             joint_config=joint_config,
 84 |             base_transform=canonical_transform,
 85 |             origin_frame=config.ObjectOrigin.CLOSED_STATE
 86 |             # origin_frame=config.ObjectOrigin.PARTNETMOBILITY
 87 |         )
 88 |         return obj_trimesh
 89 |     else:
 90 |         return None
 91 | 
 92 | 
 93 | def main():
 94 |     object_filter, joint_filter = partnet_mobility.get_filter_function(
 95 |         # category_list=["Box"],
 96 |         # category_list=["Scissors"],
 97 |         # category_list=["Pliers"],
 98 |         # category_list=["Stapler"],
 99 |         # category_list=["Knife"],
100 |         # category_list=["Dishwasher"],
101 |         # category_list=["Microwave"],
102 |         # category_list=["Oven"],
103 |         # category_list=["Table"],
104 |         # category_list=["WashingMachine"],
105 |         # category_list=["Refrigerator"],
106 |         category_list=["StorageFurniture"],
107 |         # category_list=["Laptop"],
108 |         # category_list=["Toilet"],
109 |         # category_list=["Microwave", "Scissors"],
110 |         # category_list=["Pliers", "Scissors", "Stapler"],
111 |         # category_list=["Pliers", "Scissors"],
112 |         # category_list=["Microwave", "Fridge", "Toilet", "WashingMachine", "Dishwasher", "Oven"],
113 |         # category_list=[
114 |         #     "Box", "Dishwasher", "Door", "Laptop", "Microwave", "Oven", "Refrigerator", "Safe",
115 |         #     "StorageFurniture", "Table", "Toilet", "TrashCan", "WashingMachine", "Window", "Stapler"
116 |         # ],
117 |         # category_list=[
118 |         #     "Dishwasher", "Laptop", "Microwave", "Oven", "Refrigerator", "StorageFurniture", "Table",
119 |         #     "WashingMachine", "Stapler"
120 |         # ],
121 |         max_unique_parents=2,
122 |         max_joints=1,
123 |         no_limit_ok=False,
124 |         min_prismatic=0.1,
125 |         min_revolute=0.1,
126 |         allowed_joints=["revolute"],
127 |         # allowed_joints=["prismatic"]
128 |     )
129 |     partnet_mobility_db = PartNetMobilityV0()
130 |     partnet_mobility_db.set_filter(object_filter)
131 |     print(f"Length of filtered dataset: {len(partnet_mobility_db)}")
132 |     # exit(0)
133 |     joint_offset = 0.5
134 |     # joint_offset = 3.14159
135 | 
136 |     scene = pyrender.Scene()
137 |     added_to_scene = 0
138 |     object_ids = partnet_mobility_db.index_list 
139 |     for id_ in object_ids:
140 |         print(id_)
141 | 
142 |     pcds = []
143 | 
144 |     # Hardcode some
145 |     object_ids = ["187d79cd04b2bdfddf3a1b0d597ce76e"]
146 | 
147 |     for object_id in tqdm.tqdm(object_ids):
148 |         # for object_id in tqdm.tqdm(object_ids[:1]):
149 |         trimesh_scene = process_object_id(
150 |             object_id, joint_filter=joint_filter, joint_offset=joint_offset
151 |         )
152 |         if trimesh_scene is None:
153 |             continue
154 |         trimesh_single: trimesh.Trimesh = trimesh_scene.dump(concatenate=True)
155 | 
156 |         o3d_mesh: o3d.geometry.TriangleMesh = trimesh_single.as_open3d
157 |         o3d_mesh.paint_uniform_color([1, 0.706, 0])
158 |         o3d_mesh.compute_vertex_normals()
159 |         pcds.append(o3d_mesh)
160 |         # scene.add(pyrender.Mesh.from_trimesh(trimesh_single))
161 |         added_to_scene += 1
162 |         # print(object_id)
163 |         # Single Scene
164 |         # scene_local = offscreen.get_default_scene()
165 |         # scene_local.add(pyrender.Mesh.from_trimesh(trimesh_single))
166 |         # pyrender.Viewer(scene_local, use_raymond_lighting=True, show_world_axis=True)
167 |         # pyrender.Viewer(scene, use_raymond_lighting=True, show_world_axis=True)
168 |         print(f"{object_id}")
169 |         o3d.visualization.draw_geometries([o3d_mesh]
170 |     print(f"Objects in scene {added_to_scene}")
171 |     # pyrender.Viewer(scene, use_raymond_lighting=True, show_world_axis=True)
172 | 
173 |     if True:
174 |         points = np.array(
175 |             [
176 |                 [-1.0, -1.0, -1.0],
177 |                 [1.0, -1.0, -1.0],
178 |                 [-1.0, 1.0, -1.0],
179 |                 [1.0, 1.0, -1.0],
180 |                 [-1.0, -1.0, 1.0],
181 |                 [1.0, -1.0, 1.0],
182 |                 [-1.0, 1.0, 1.0],
183 |                 [1.0, 1.0, 1.0],
184 |             ],
185 |             dtype=np.float,
186 |         )
187 |         # points /= 2.
188 |         lines = np.array(
189 |             [
190 |                 [0, 1],
191 |                 [0, 2],
192 |                 [1, 3],
193 |                 [2, 3],
194 |                 [4, 5],
195 |                 [4, 6],
196 |                 [5, 7],
197 |                 [6, 7],
198 |                 [0, 4],
199 |                 [1, 5],
200 |                 [2, 6],
201 |                 [3, 7],
202 |             ]
203 |         )
204 |         colors = [[1, 0, 0] for i in range(len(lines))]
205 |         line_set = o3d.geometry.LineSet()
206 |         line_set.points = o3d.utility.Vector3dVector(points)
207 |         line_set.lines = o3d.utility.Vector2iVector(lines)
208 |         line_set.colors = o3d.utility.Vector3dVector(colors)
209 |         pcds.append(line_set)
210 |     pcds.append(o3d.geometry.TriangleMesh.create_coordinate_frame())
211 |     o3d.visualization.draw_geometries(pcds)
212 |     # o3d.visualization.enable_indirect_light()
213 | 
214 | 
215 | if __name__ == "__main__":
216 |     # TODO Use new tyro feature to parse function header?
217 |     main()
218 | 


--------------------------------------------------------------------------------
/CARTO/simnet/lib/net/post_processing/keypoint_outputs.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import cv2
  3 | import IPython
  4 | import torch
  5 | import torch.nn as nn
  6 | from skimage.feature import peak_local_max
  7 | 
  8 | from CARTO.simnet.lib.net import losses
  9 | from CARTO.simnet.lib.label import get_unique_colors, Keypoint
 10 | 
 11 | KEYPOINT_COLORS = get_unique_colors(10)
 12 | LOSS = nn.BCELoss()
 13 | # LOSS = nn.MSELoss()
 14 | 
 15 | 
 16 | class KeypointOutput:
 17 |     def __init__(self, heatmap, hparams, ignore_mask=None):
 18 |         self.heatmap = heatmap
 19 |         self.first_heatmap = None
 20 |         self.ignore_mask = ignore_mask
 21 |         self.is_numpy = False
 22 |         self.hparams = hparams
 23 |         self.loss = LOSS
 24 |         self.num_keypoints = hparams.num_keypoints
 25 |         self.all_keypoints = None
 26 | 
 27 |     # Converters for torch to numpy
 28 |     def convert_to_numpy_from_torch(self):
 29 |         self.heatmap = np.ascontiguousarray(self.heatmap.float().cpu().numpy())
 30 |         self.is_numpy = True
 31 | 
 32 |     def convert_to_torch_from_numpy(self):
 33 |         self.heatmap = torch.from_numpy(np.ascontiguousarray(self.heatmap)).float()
 34 |         self.is_numpy = False
 35 | 
 36 |     def get_keypoints(self, min_distance=40, min_confidence=0.3):
 37 |         if not self.is_numpy:
 38 |             self.convert_to_numpy_from_torch()
 39 |         # If this keypoint network only predicts a single type of keypoint, we must add a
 40 |         # dimension that is missing in the output head for the visualization code.
 41 |         if self.num_keypoints == 1:
 42 |             self.first_heatmap = self.heatmap[:, None, :, :][0]
 43 |         else:
 44 |             if len(self.heatmap.shape) == 4:  # network outputs
 45 |                 self.first_heatmap = self.heatmap[0]
 46 |             else:
 47 |                 self.first_heatmap = self.heatmap
 48 |         return extract_keypoints_from_heatmap(
 49 |             self.first_heatmap, min_distance, min_confidence
 50 |         )
 51 | 
 52 |     def get_detections(self, left_img):
 53 |         if not self.is_numpy:
 54 |             self.convert_to_numpy_from_torch()
 55 |         # If this keypoint network only predicts a single type of keypoint, we must add a
 56 |         # dimension that is missing in the output head for the visualization code.
 57 |         if self.num_keypoints == 1:
 58 |             self.first_heatmap = self.heatmap[:, None, :, :][0]
 59 |         else:
 60 |             if len(self.heatmap.shape) == 4:  # network outputs
 61 |                 self.first_heatmap = self.heatmap[0]
 62 |             else:
 63 |                 self.first_heatmap = self.heatmap
 64 |         img, all_keypoints = vis_keypoints_from_heatmap(
 65 |             self.first_heatmap,
 66 |             left_img.shape[0],
 67 |             left_img.shape[1],
 68 |             left_img,
 69 |             True,
 70 |             True,
 71 |         )
 72 |         return img
 73 | 
 74 |     def evaluation_metrics(self, targ_kp_output):
 75 |         targ_keypoints = targ_kp_output.get_keypoints()
 76 |         results = {}
 77 |         for i in range(len(targ_keypoints)):
 78 |             results[i] = {}
 79 |         for confidence in np.linspace(0, 1.01, 10):
 80 |             all_keypoints = self.get_keypoints(min_confidence=confidence)
 81 |             for i, (pred_class, targ_class) in enumerate(
 82 |                 zip(all_keypoints, targ_keypoints)
 83 |             ):
 84 |                 tp, fp, fn = evaluate_keypoints(pred_class, targ_class)
 85 |                 precision = tp / (tp + fp) if tp + fp > 0 else 1
 86 |                 recall = tp / (tp + fn) if tp + fn > 0 else 1
 87 |                 results[i][confidence] = (precision, recall)
 88 |         return results
 89 | 
 90 |     def get_visualization_img(self, left_img):
 91 |         if not self.is_numpy:
 92 |             self.convert_to_numpy_from_torch()
 93 |         if self.num_keypoints == 1:
 94 |             self.first_heatmap = self.heatmap[:, None, :, :][0]
 95 |         else:
 96 |             self.first_heatmap = self.heatmap[0]
 97 |         return vis_network_outputs(self.first_heatmap, left_img)
 98 | 
 99 |     def compute_loss(self, keypoint_targets, log, name):
100 |         if self.is_numpy:
101 |             raise ValueError("Output is not in torch mode")
102 |         heatmap_target = torch.stack(
103 |             [
104 |                 torch.squeeze(keypoint_target.heatmap)
105 |                 for keypoint_target in keypoint_targets
106 |             ]
107 |         )
108 | 
109 |         # Move to GPU
110 |         heatmap_target = heatmap_target.to(torch.device("cuda:0"))
111 | 
112 |         heatmap_loss = self.loss(self.heatmap, heatmap_target)
113 |         log[name] = heatmap_loss.item()
114 |         return self.hparams.loss_keypoint_mult * heatmap_loss
115 | 
116 | 
117 | def vis_network_outputs(heatmaps, left_img, idx=0):
118 |     heatmap_vis = []
119 |     gray_img = cv2.cvtColor(left_img, cv2.COLOR_BGR2GRAY)
120 |     for heatmap in heatmaps:
121 |         img = np.copy(gray_img)
122 |         heatmap /= np.max(heatmap)
123 |         img = cv2.addWeighted(heatmap.astype(float), 0.999, img.astype(float), 0.001, 0)
124 |         img /= img.max() / 255
125 |         heatmap_vis.append(img[:, :, np.newaxis].astype(np.float32))
126 |     return heatmap_vis
127 | 
128 | 
129 | def draw_keypoints_from_predictions(keypoints, left_img):
130 |     for i, keypoint_group in enumerate(keypoints):
131 |         color = KEYPOINT_COLORS[i]
132 |         color = (int(color[0]), int(color[1]), int(color[2]))
133 |         for keypoint in keypoint_group:
134 |             for px in keypoint.pixels:
135 |                 left_img = cv2.circle(
136 |                     left_img, tuple(px.ravel().astype(int)), 5, color, 2
137 |                 )
138 |     return left_img
139 | 
140 | 
141 | def extract_peaks_from_heatmap(heatmap, min_distance=40, min_confidence=0.3):
142 |     peaks = peak_local_max(
143 |         heatmap,
144 |         min_distance=min_distance,
145 |         threshold_abs=min_confidence,
146 |         exclude_border=False,
147 |         num_peaks=2,
148 |     )
149 | 
150 |     return peaks
151 | 
152 | 
153 | def evaluate_keypoints(pred_kp, targ_kp, distance_threshold=20):
154 |     true_positives = 0
155 |     false_negatives = 0
156 |     counted = []
157 |     all_distances = []
158 |     for px in pred_kp.pixels:
159 |         distances = np.linalg.norm(targ_kp.pixels - px, axis=1)
160 |         all_distances.append(distances)
161 |     all_distances = np.array(all_distances)
162 |     for i in range(len(targ_kp.pixels)):
163 |         if len(all_distances) == 0:
164 |             break
165 |         closest = all_distances[:, i].argmin()  # prediction that is closest
166 |         if all_distances[closest, i] < distance_threshold:
167 |             true_positives += 1  # correct prediction
168 |             all_distances[
169 |                 closest
170 |             ] = 1e10  # don't let this prediction be a positive for anything else
171 |         else:
172 |             false_negatives += 1  # no prediction was sufficiently close
173 |     false_positives = len(pred_kp.pixels) - true_positives
174 |     false_negatives = len(targ_kp.pixels) - true_positives
175 |     return true_positives, false_positives, false_negatives
176 | 
177 | 
178 | def extract_keypoints_from_heatmap(heatmap, min_distance=40, min_confidence=0.5):
179 |     all_keypoints = []
180 |     for idx in range(heatmap.shape[0]):
181 |         keypoints = extract_peaks_from_heatmap(
182 |             heatmap[idx], min_distance, min_confidence
183 |         )
184 |         all_keypoints.append(Keypoint(pixels=[px for px in keypoints]))
185 |     return all_keypoints
186 | 
187 | 
188 | def vis_keypoints_from_heatmap(
189 |     heatmap, height, width, left_img, raw_keypoints=False, gray=False
190 | ):
191 |     if gray:
192 |         img = cv2.cvtColor(left_img, cv2.COLOR_BGR2GRAY)
193 |         img = np.stack((img,) * 3, axis=-1)
194 |     else:
195 |         img = np.copy(left_img)
196 |     img = img / img.max() * 255
197 |     all_keypoints = []
198 |     for idx in range(heatmap.shape[0]):
199 |         keypoints = extract_peaks_from_heatmap(heatmap[idx])
200 |         all_keypoints.append(Keypoint(pixels=[px for px in keypoints]))
201 |         color = KEYPOINT_COLORS[idx]
202 |         color = (int(color[0]), int(color[1]), int(color[2]))
203 |         for px in keypoints:
204 |             img = cv2.circle(img, tuple(px.ravel()[::-1].astype(int)), 5, color, 2)
205 |     if raw_keypoints:
206 |         return img, all_keypoints
207 |     return img
208 | 


--------------------------------------------------------------------------------
/CARTO/simnet/lib/net/dataset.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2019 Toyota Research Institute.  All rights reserved.
  2 | 
  3 | import dataclasses
  4 | import os
  5 | import random
  6 | import pathlib
  7 | from typing import List, Tuple, Any
  8 | 
  9 | import cv2
 10 | import numpy as np
 11 | import torch
 12 | import IPython
 13 | import torch.nn.functional as F
 14 | from torch.utils.data import Dataset
 15 | 
 16 | from CARTO.simnet.lib import datapoint
 17 | from CARTO.simnet.lib.net.post_processing.segmentation_outputs import SegmentationOutput
 18 | from CARTO.simnet.lib.net.post_processing.depth_outputs import DepthOutput
 19 | from CARTO.simnet.lib.net.post_processing.pose_outputs import PoseOutput
 20 | from CARTO.simnet.lib.net.post_processing.obb_outputs import OBBOutput
 21 | 
 22 | 
 23 | def extract_left_numpy_img(anaglyph):
 24 |     anaglyph_np = np.ascontiguousarray(anaglyph.cpu().numpy())
 25 |     anaglyph_np = anaglyph_np.transpose((1, 2, 0))
 26 |     left_img = anaglyph_np[..., 0:3] * 255.0
 27 |     return left_img
 28 | 
 29 | 
 30 | def extract_right_numpy_img(anaglyph):
 31 |     anaglyph_np = np.ascontiguousarray(anaglyph.cpu().numpy())
 32 |     anaglyph_np = anaglyph_np.transpose((1, 2, 0))
 33 |     left_img = anaglyph_np[..., 3:6] * 255.0
 34 |     return left_img
 35 | 
 36 | 
 37 | def create_anaglyph(stereo_dp):
 38 |     height, width, _ = stereo_dp.left_color.shape
 39 |     image = np.zeros([height, width, 6], dtype=np.uint8)
 40 |     cv2.normalize(stereo_dp.left_color, stereo_dp.left_color, 0, 255, cv2.NORM_MINMAX)
 41 |     cv2.normalize(stereo_dp.right_color, stereo_dp.right_color, 0, 255, cv2.NORM_MINMAX)
 42 |     image[..., 0:3] = stereo_dp.left_color
 43 |     image[..., 3:6] = stereo_dp.right_color
 44 |     image = image * 1.0 / 255.0
 45 |     image = image.transpose((2, 0, 1))
 46 |     return torch.from_numpy(np.ascontiguousarray(image)).float()
 47 | 
 48 | 
 49 | # Struct for Panoptic Outputs
 50 | @dataclasses.dataclass
 51 | class PanopticOutputs:
 52 |     depth: list = dataclasses.field(default_factory=list)
 53 |     small_depth: list = dataclasses.field(default_factory=list)
 54 |     room_segmentation: List[SegmentationOutput] = dataclasses.field(
 55 |         default_factory=list
 56 |     )
 57 |     cabinet_door_obbs: List[OBBOutput] = dataclasses.field(default_factory=list)
 58 |     handhold_obbs: List[OBBOutput] = dataclasses.field(default_factory=list)
 59 |     graspable_objects_obbs: List[OBBOutput] = dataclasses.field(default_factory=list)
 60 |     grasp_quality_scores: list = dataclasses.field(default_factory=list)
 61 |     val_data: List[datapoint.ValData] = dataclasses.field(default_factory=list)
 62 |     stereo_imgs: list = dataclasses.field(default_factory=list)
 63 | 
 64 | 
 65 | def to_list(target):
 66 |     if target is None:
 67 |         return []
 68 |     target.convert_to_torch_from_numpy()
 69 |     return [target]
 70 | 
 71 | 
 72 | class Dataset(Dataset):
 73 |     def __init__(
 74 |         self, dataset_uri, hparams, preprocess_image_func=None, datapoint_dataset=None
 75 |     ):
 76 |         super().__init__()
 77 | 
 78 |         if datapoint_dataset is None:
 79 |             datapoint_dataset = datapoint.make_dataset(dataset_uri)
 80 | 
 81 |         self.datapoint_handles = datapoint_dataset.list()
 82 |         # No need to shuffle, already shufled based on random uids
 83 |         self.hparams = hparams
 84 | 
 85 |         if preprocess_image_func is None:
 86 |             self.preprocces_image_func = create_anaglyph
 87 |         else:
 88 |             self.preprocces_image_func = preprocess_image_func
 89 | 
 90 |     def __len__(self):
 91 |         return len(self.datapoint_handles)
 92 | 
 93 |     def __getitem__(self, idx):
 94 |         dp: datapoint.Panoptic = self.datapoint_handles[idx].read()
 95 | 
 96 |         # Process image
 97 |         anaglyph = self.preprocces_image_func(dp.stereo)
 98 |         if dp.val_data.scene_name == "unlabeled_data":
 99 |             return PanopticOutputs(
100 |                 depth=[]
101 |                 if dp.depth is None
102 |                 else [DepthOutput(torch.Tensor(dp.depth), self.hparams)],
103 |                 room_segmentation=[],
104 |                 cabinet_door_obbs=[],
105 |                 handhold_obbs=[],
106 |                 graspable_objects_obbs=[],
107 |                 grasp_quality_scores=[],
108 |                 small_depth=[],
109 |                 val_data=[dp.val_data],
110 |                 stereo_imgs=[anaglyph],
111 |                 language=[],
112 |             )
113 | 
114 |         # Segmenation targets
115 |         segmentation_target = to_list(SegmentationOutput(dp.segmentation, self.hparams))
116 | 
117 |         # Ground truth disparity
118 |         depth_target = to_list(DepthOutput(dp.depth, self.hparams))
119 | 
120 |         # OBB output heads
121 |         if dp.cabinet_door_obb:
122 |             cabinet_door_obb_target = OBBOutput(
123 |                 dp.cabinet_door_obb.heat_map,
124 |                 dp.cabinet_door_obb.vertex_target,
125 |                 dp.cabinet_door_obb.z_centroid,
126 |                 dp.cabinet_door_obb.cov_matrices,
127 |                 self.hparams,
128 |                 class_field=dp.cabinet_door_obb.classes,
129 |             )
130 |         else:
131 |             cabinet_door_obb_target = None
132 |         cabinet_door_obb_target = to_list(cabinet_door_obb_target)
133 | 
134 |         if dp.handhold_obb:
135 |             handhold_obb_target = OBBOutput(
136 |                 dp.handhold_obb.heat_map,
137 |                 dp.handhold_obb.vertex_target,
138 |                 dp.handhold_obb.z_centroid,
139 |                 dp.handhold_obb.cov_matrices,
140 |                 self.hparams,
141 |             )
142 |         else:
143 |             handhold_obb_target = None
144 |         handhold_obb_target = to_list(handhold_obb_target)
145 | 
146 |         if dp.graspable_objects_obb:
147 |             graspable_objects_obb_target = OBBOutput(
148 |                 dp.graspable_objects_obb.heat_map,
149 |                 dp.graspable_objects_obb.vertex_target,
150 |                 dp.graspable_objects_obb.z_centroid,
151 |                 dp.graspable_objects_obb.cov_matrices,
152 |                 self.hparams,
153 |                 class_field=dp.graspable_objects_obb.classes,
154 |                 shape_emb=dp.graspable_objects_obb.shape_emb,
155 |                 arti_emb=dp.graspable_objects_obb.arti_emb,
156 |                 abs_pose_field=dp.graspable_objects_obb.abs_pose,
157 |             )
158 |         else:
159 |             graspable_objects_obb_target = None
160 |         graspable_objects_obb_target = to_list(graspable_objects_obb_target)
161 | 
162 |         # Grasp quality
163 |         # grasp_quality_scores_target = GraspOutput(
164 |         #    dp.grasps.heat_map, dp.grasps.grasp_success_target, self.hparams
165 |         # )
166 |         # Convert targets to pytorch
167 |         # grasp_quality_scores_target.convert_to_torch_from_numpy()
168 | 
169 |         # Add the language input to panoptic outputs
170 |         return PanopticOutputs(
171 |             depth=depth_target,
172 |             room_segmentation=segmentation_target,
173 |             cabinet_door_obbs=cabinet_door_obb_target,
174 |             handhold_obbs=handhold_obb_target,
175 |             graspable_objects_obbs=graspable_objects_obb_target,
176 |             grasp_quality_scores=[],
177 |             small_depth=[],
178 |             val_data=[dp.val_data],
179 |             stereo_imgs=[anaglyph],
180 |         )
181 | 
182 | 
183 | def panoptic_collate(batch, rgbd=False) -> Tuple[torch.Tensor, Any, PanopticOutputs]:
184 |     # list of elements per patch
185 |     # Each element is a tuple of (stereo,imgs)
186 |     panoptic_targets = PanopticOutputs()
187 |     stereo_images_list = []
188 | 
189 |     for ii in range(len(batch)):
190 |         panoptic_targets.depth.extend(batch[ii].depth)
191 |         panoptic_targets.room_segmentation.extend(batch[ii].room_segmentation)
192 |         panoptic_targets.cabinet_door_obbs.extend(batch[ii].cabinet_door_obbs)
193 |         panoptic_targets.handhold_obbs.extend(batch[ii].handhold_obbs)
194 |         panoptic_targets.graspable_objects_obbs.extend(batch[ii].graspable_objects_obbs)
195 |         panoptic_targets.grasp_quality_scores.extend(batch[ii].grasp_quality_scores)
196 |         panoptic_targets.val_data.extend(batch[ii].val_data)
197 |         stereo_images_list.extend(batch[ii].stereo_imgs)
198 | 
199 |     stereo_images_torch = torch.stack(stereo_images_list)
200 |     if rgbd:
201 |         stereo_images_torch = torch.cat(
202 |             (
203 |                 stereo_images_torch[:, :3, ...],
204 |                 torch.stack(
205 |                     [po_target.depth_pred for po_target in panoptic_targets.depth]
206 |                 ).unsqueeze(1),
207 |             ),
208 |             dim=1,
209 |         )
210 | 
211 |     return stereo_images_torch, panoptic_targets
212 | 


--------------------------------------------------------------------------------
/CARTO/simnet/lib/onnx_plugins.py:
--------------------------------------------------------------------------------
  1 | """Plugins that can be used in an ONNX model."""
  2 | import struct
  3 | 
  4 | import numpy as np
  5 | import torch
  6 | import torch.nn as nn
  7 | import torch.nn.functional as F
  8 | import torch.nn.modules.utils as utils
  9 | 
 10 | from CARTO.simnet.lib.net.models.layers.cost_volume import (
 11 |     dot_product_cost_volume,
 12 |     DotProductCostVolume,
 13 | )
 14 | from CARTO.simnet.lib.net.models.layers.soft_argmin import SoftArgmin, soft_argmin
 15 | 
 16 | # TODO(krishnashankar): Arguments of functions in modules below
 17 | # differ from those of the base class(es) they inherit from, and
 18 | # pylint complains. For now, disable here and consider disabling
 19 | # globally.
 20 | 
 21 | # pylint: disable=arguments-differ
 22 | # pylint: disable=protected-access
 23 | 
 24 | 
 25 | @torch.autograd.function.traceable
 26 | class ExportableUpsampleFunction(torch.autograd.Function):
 27 |     """Upsample function that can be traced for ONNX export."""
 28 | 
 29 |     @staticmethod
 30 |     def symbolic(g, inputs, scale_factor):
 31 |         assert scale_factor == 2, "Only 2x upsample implemented"
 32 |         return g.op(
 33 |             "TRT_PluginV2",
 34 |             inputs,
 35 |             version_s="0.0.1",
 36 |             namespace_s="",
 37 |             data_s="",
 38 |             name_s="UpsampleBilinearEvenSquare",
 39 |         )
 40 | 
 41 |     @staticmethod
 42 |     def forward(ctx, inputs, scale_factor):
 43 |         return F.interpolate(
 44 |             inputs, scale_factor=scale_factor, mode="bilinear", align_corners=False
 45 |         )
 46 | 
 47 |     @staticmethod
 48 |     def backward(_):
 49 |         raise RuntimeError("Backward not implemented")
 50 | 
 51 | 
 52 | class ExportableUpsample(nn.Module):
 53 |     """Upsample module that can be used in an ONNX model."""
 54 | 
 55 |     def __init__(self, scale_factor):
 56 |         super().__init__()
 57 |         self.scale_factor = scale_factor
 58 | 
 59 |     def forward(self, inputs):
 60 |         return ExportableUpsampleFunction.apply(inputs, self.scale_factor)
 61 | 
 62 | 
 63 | class UpsampleWithConvTranspose(nn.Module):
 64 |     """Upsample model implemented with transposed convolution."""
 65 | 
 66 |     def __init__(self, scale_factor):
 67 |         super(UpsampleWithConvTranspose, self).__init__()
 68 |         self.weights = None
 69 |         self.scale_factor = utils._pair(scale_factor)
 70 | 
 71 |         def check_scale_factor(scale_factor):
 72 |             assert scale_factor == 1 or scale_factor % 2 == 0
 73 | 
 74 |         check_scale_factor(self.scale_factor[0])
 75 |         check_scale_factor(self.scale_factor[1])
 76 | 
 77 |     def get_kernel_size(self, factor):
 78 |         return 2 * factor - factor % 2
 79 | 
 80 |     def bilinear_upsample_kernel(self, size):
 81 |         """Get a transpoed convolution kernel that implemented upsampling for the
 82 |         given size."""
 83 | 
 84 |         def get_factor_and_center(size):
 85 |             factor = (size + 1) // 2
 86 |             if size % 2 == 1:
 87 |                 center = factor - 1
 88 |             else:
 89 |                 center = factor - 0.5
 90 |             return factor, center
 91 | 
 92 |         factor_h, center_h = get_factor_and_center(size[0])
 93 |         factor_w, center_w = get_factor_and_center(size[1])
 94 |         og = np.ogrid[: size[0], : size[1]]
 95 |         return (1 - abs((og[0] - center_h) / factor_h)) * (
 96 |             1 - abs((og[1] - center_w) / factor_w)
 97 |         )
 98 | 
 99 |     def bilinear_upsample_weights(self, factor, nchannels):
100 |         """Get transposed convolution weights for upsampling."""
101 |         filter_size_h = self.get_kernel_size(factor[0])
102 |         filter_size_w = self.get_kernel_size(factor[1])
103 | 
104 |         weights = np.zeros(
105 |             (filter_size_h, filter_size_w, nchannels, nchannels), dtype=np.float32
106 |         )
107 | 
108 |         kernel = self.bilinear_upsample_kernel((filter_size_h, filter_size_w))
109 | 
110 |         for c in range(nchannels):
111 |             weights[:, :, c, c] = kernel
112 | 
113 |         return weights
114 | 
115 |     def forward(self, inputs):
116 |         in_channels = inputs.shape[1]
117 |         if self.weights is None:
118 |             weights = self.bilinear_upsample_weights(self.scale_factor, in_channels)
119 |             # Order weights to be compatible with pytorch (in_channels, out_channels, height, width).
120 |             self.weights = (
121 |                 torch.from_numpy(weights.transpose(2, 3, 0, 1))
122 |                 .to(inputs.device)
123 |                 .type(inputs.dtype)
124 |             )
125 |         output = torch.nn.functional.conv_transpose2d(
126 |             inputs,
127 |             self.weights,
128 |             stride=self.scale_factor,
129 |             padding=(self.scale_factor[0] // 2, self.scale_factor[1] // 2),
130 |         )
131 |         return output
132 | 
133 | 
134 | @torch.autograd.function.traceable
135 | class ExportableDotProductCostVolumeFunction(torch.autograd.Function):
136 |     @staticmethod
137 |     def symbolic(g, left, right, num_disparities, is_right):
138 |         assert not is_right
139 |         serialized_data = struct.pack("<iiiii", num_disparities, 0, 0, 0, 0)
140 |         return g.op(
141 |             "TRT_PluginV2",
142 |             left,
143 |             right,
144 |             version_s="0.0.1",
145 |             namespace_s="",
146 |             data_s=serialized_data,
147 |             name_s="DotProductCostVolume",
148 |         )
149 | 
150 |     @staticmethod
151 |     def forward(ctx, left, right, num_disparities, is_right):
152 |         return dot_product_cost_volume(left, right, num_disparities, is_right)
153 | 
154 | 
155 | class ExportableDotProductCostVolume(nn.Module):
156 |     def __init__(self, num_disparities, is_right):
157 |         super().__init__()
158 |         self.num_disparities = num_disparities
159 |         self.is_right = is_right
160 | 
161 |     def forward(self, left, right):
162 |         return ExportableDotProductCostVolumeFunction.apply(
163 |             left, right, self.num_disparities, self.is_right
164 |         )
165 | 
166 | 
167 | @torch.autograd.function.traceable
168 | class ExportableSoftArgminFunction(torch.autograd.Function):
169 |     @staticmethod
170 |     def symbolic(g, input):
171 |         return g.op(
172 |             "TRT_PluginV2",
173 |             input,
174 |             version_s="0.0.1",
175 |             namespace_s="",
176 |             data_s="",
177 |             name_s="SoftArgmin",
178 |         )
179 | 
180 |     @staticmethod
181 |     def forward(ctx, input):
182 |         return soft_argmin(input)
183 | 
184 | 
185 | class ExportableSoftArgmin(nn.Module):
186 |     def __init__(self):
187 |         super().__init__()
188 | 
189 |     def forward(self, input):
190 |         return ExportableSoftArgminFunction.apply(input)
191 | 
192 | 
193 | def fix_module_train(module):
194 |     """Replace all modules in the given module with ONNX-compatible modules."""
195 |     for child_module_name, child_module in module.named_children():
196 |         if isinstance(child_module, nn.Upsample):
197 |             scale_factor = int(child_module.scale_factor)
198 |             # TensorRT plugin can only load 2x upsample from ONNX currently, so
199 |             # otherwise use transposed convolution.
200 |             if False and scale_factor == 2:
201 |                 module._modules[child_module_name] = ExportableUpsample(scale_factor)
202 |             else:
203 |                 module._modules[child_module_name] = UpsampleWithConvTranspose(
204 |                     scale_factor
205 |                 )
206 |         elif len(list(child_module.children())) > 0:
207 |             fix_module_train(child_module)
208 | 
209 | 
210 | def fix_module_onnx(module):
211 |     """Replace all modules in the given module with ONNX-compatible modules."""
212 |     for child_module_name, child_module in module.named_children():
213 |         if isinstance(child_module, nn.Upsample):
214 |             scale_factor = int(child_module.scale_factor)
215 |             # TensorRT plugin can only load 2x upsample from ONNX currently, so
216 |             # otherwise use transposed convolution.
217 |             if False and scale_factor == 2:
218 |                 module._modules[child_module_name] = ExportableUpsample(scale_factor)
219 |             else:
220 |                 module._modules[child_module_name] = UpsampleWithConvTranspose(
221 |                     scale_factor
222 |                 )
223 |         elif isinstance(child_module, DotProductCostVolume):
224 |             num_disparities = child_module.num_disparities
225 |             is_right = child_module.is_right
226 |             module._modules[child_module_name] = ExportableDotProductCostVolume(
227 |                 num_disparities, is_right
228 |             )
229 |         elif isinstance(child_module, SoftArgmin):
230 |             module._modules[child_module_name] = ExportableSoftArgmin()
231 |         elif len(list(child_module.children())) > 0:
232 |             fix_module_onnx(child_module)
233 | 


--------------------------------------------------------------------------------
/CARTO/simnet/lib/net/post_processing/box_outputs.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import cv2
  3 | import IPython
  4 | import torch
  5 | import torch.nn as nn
  6 | 
  7 | from CARTO.simnet.lib.net.post_processing import pose_outputs
  8 | from CARTO.simnet.lib.net.post_processing import nms
  9 | from CARTO.simnet.lib.net.post_processing.eval2d import Detection
 10 | from CARTO.simnet.lib.net import losses
 11 | 
 12 | _mask_l1_loss = losses.MaskedL1Loss(downscale_factor=1)
 13 | _mse_loss = losses.MaskedMSELoss()
 14 | 
 15 | 
 16 | class BoxOutput:
 17 |     def __init__(self, heatmap, vertex_field, hparams, ignore_mask=None):
 18 |         self.heatmap = heatmap
 19 |         self.vertex_field = vertex_field
 20 |         self.ignore_mask = ignore_mask
 21 |         self.is_numpy = False
 22 |         self.hparams = hparams
 23 | 
 24 |     # Converters for torch to numpy
 25 |     def convert_to_numpy_from_torch(self):
 26 |         self.heatmap = np.ascontiguousarray(self.heatmap.float().cpu().numpy())
 27 |         self.vertex_field = np.ascontiguousarray(
 28 |             self.vertex_field.float().cpu().numpy()
 29 |         )
 30 |         self.vertex_field = self.vertex_field.transpose((0, 2, 3, 1))
 31 |         self.vertex_field = self.vertex_field / 100.0
 32 |         self.is_numpy = True
 33 | 
 34 |     def convert_to_torch_from_numpy(self):
 35 |         self.vertex_field = self.vertex_field.transpose((2, 0, 1))
 36 |         self.vertex_field = 100.0 * self.vertex_field
 37 |         self.vertex_field = torch.from_numpy(
 38 |             np.ascontiguousarray(self.vertex_field)
 39 |         ).float()
 40 |         self.ignore_mask = torch.from_numpy(
 41 |             np.ascontiguousarray(self.ignore_mask)
 42 |         ).bool()
 43 |         self.heatmap = torch.from_numpy(np.ascontiguousarray(self.heatmap)).float()
 44 |         self.is_numpy = False
 45 | 
 46 |     def get_detections(self, min_confidence=0.02, overlap_thresh=0.75):
 47 |         if not self.is_numpy:
 48 |             self.convert_to_numpy_from_torch()
 49 |         detections = create_detections_from_outputs(
 50 |             np.copy(self.heatmap[0]),
 51 |             np.copy(self.vertex_field[0]),
 52 |             min_confidence=min_confidence,
 53 |         )
 54 |         detections = nms.run(detections, overlap_thresh=overlap_thresh)
 55 |         return detections
 56 | 
 57 |     def get_visualization_img(self, left_img, is_pretty=False):
 58 |         if not self.is_numpy:
 59 |             self.convert_to_numpy_from_torch()
 60 |         if is_pretty:
 61 |             return draw_pretty_detection_from_outputs(
 62 |                 self.heatmap[0], self.vertex_field[0], left_img
 63 |             )
 64 |         return draw_detection_from_outputs(
 65 |             self.heatmap[0], self.vertex_field[0], left_img
 66 |         )
 67 | 
 68 |     def compute_loss(self, pose_targets, log, prefix):
 69 |         if self.is_numpy:
 70 |             raise ValueError("Output is not in torch mode")
 71 |         vertex_target = torch.stack(
 72 |             [pose_target.vertex_field for pose_target in pose_targets]
 73 |         )
 74 |         heatmap_target = torch.stack(
 75 |             [pose_target.heatmap for pose_target in pose_targets]
 76 |         )
 77 |         ignore_target = torch.stack(
 78 |             [pose_target.ignore_mask for pose_target in pose_targets]
 79 |         )
 80 | 
 81 |         # Move to GPU
 82 |         heatmap_target = heatmap_target.to(torch.device("cuda:0"))
 83 |         vertex_target = vertex_target.to(torch.device("cuda:0"))
 84 |         ignore_target = ignore_target.to(torch.device("cuda:0"))
 85 | 
 86 |         vertex_loss = _mask_l1_loss(vertex_target, self.vertex_field, heatmap_target)
 87 |         log[f"{prefix}/vertex_loss"] = vertex_loss.item()
 88 |         heatmap_loss = _mse_loss(self.heatmap, heatmap_target, ignore_target)
 89 |         log[f"{prefix}/heatmap"] = heatmap_loss.item()
 90 |         return (
 91 |             self.hparams.loss_vertex_mult * vertex_loss
 92 |             + self.hparams.loss_heatmap_mult * heatmap_loss
 93 |         )
 94 | 
 95 | 
 96 | def draw_detection_from_outputs(
 97 |     heatmap_output, vertex_output, c_img, min_confidence=0.4
 98 | ):
 99 |     c_img_gray = np.zeros(c_img.shape)
100 |     for i in range(3):
101 |         c_img_gray[:, :, i] = cv2.cvtColor(c_img, cv2.COLOR_BGR2GRAY)
102 | 
103 |     peaks = pose_outputs.extract_peaks_from_centroid(
104 |         heatmap_output, min_confidence=min_confidence
105 |     )
106 |     peak_img = pose_outputs.draw_peaks(heatmap_output, peaks)
107 |     bboxes_ext = extract_vertices_from_peaks(np.copy(peaks), vertex_output, c_img_gray)
108 |     img = draw_2d_boxes(c_img_gray, bboxes_ext)
109 |     img = cv2.addWeighted(img.astype(np.uint8), 0.9, peak_img.astype(np.uint8), 0.4, 0)
110 |     return img
111 | 
112 | 
113 | def draw_pretty_detection_from_outputs(
114 |     heatmap_output, vertex_output, c_img, min_confidence=0.4
115 | ):
116 |     # c_img_gray = np.zeros(c_img.shape)
117 |     # for i in range(3):
118 |     #  c_img_gray[:, :, i] = cv2.cvtColor(c_img, cv2.COLOR_BGR2GRAY)
119 | 
120 |     c_img = cv2.cvtColor(c_img, cv2.COLOR_BGR2RGB)
121 | 
122 |     peaks = pose_outputs.extract_peaks_from_centroid(
123 |         heatmap_output, min_confidence=min_confidence
124 |     )
125 |     bboxes_ext = extract_vertices_from_peaks(np.copy(peaks), vertex_output, c_img)
126 |     img = draw_2d_boxes(c_img, bboxes_ext)
127 |     return img
128 | 
129 | 
130 | def create_detections_from_outputs(heatmap_output, vertex_output, min_confidence=0.1):
131 |     peaks = pose_outputs.extract_peaks_from_centroid(
132 |         heatmap_output, min_confidence=min_confidence
133 |     )
134 |     bboxes_ext = extract_vertices_from_peaks(
135 |         np.copy(peaks), vertex_output, heatmap_output
136 |     )
137 |     detections = []
138 |     for peak, bbox_ext in zip(peaks, bboxes_ext):
139 |         score = heatmap_output[peak[0], peak[1]]
140 |         bbox = [
141 |             np.array([bbox_ext[0][0], bbox_ext[0][1]]),
142 |             np.array([bbox_ext[1][0], bbox_ext[1][1]]),
143 |         ]
144 |         detection = Detection(class_label="Car", bbox=bbox, score=score)
145 |         detections.append(detection)
146 |     return detections
147 | 
148 | 
149 | def extract_vertices_from_peaks(peaks, vertex_fields, c_img, scale_factor=1):
150 |     assert peaks.shape[1] == 2
151 |     assert vertex_fields.shape[2] == 4
152 |     height = vertex_fields.shape[0] * scale_factor
153 |     width = vertex_fields.shape[1] * scale_factor
154 |     vertex_fields[:, :, ::2] = (1.0 - vertex_fields[:, :, ::2]) * (2 * height) - height
155 |     vertex_fields[:, :, 1::2] = (1.0 - vertex_fields[:, :, 1::2]) * (2 * width) - width
156 |     bboxes = []
157 |     for ii in range(peaks.shape[0]):
158 |         bbox = get_bbox_from_vertex(
159 |             vertex_fields, peaks[ii, :], scale_factor=scale_factor
160 |         )
161 |         bboxes.append(bbox)
162 |     return bboxes
163 | 
164 | 
165 | def get_bbox_from_vertex(vertex_fields, index, scale_factor=64):
166 |     assert index.shape[0] == 2
167 |     index[0] = int(index[0] / scale_factor)
168 |     index[1] = int(index[1] / scale_factor)
169 |     bbox = vertex_fields[index[0], index[1], :]
170 |     bbox = [[bbox[0], bbox[1]], [bbox[2], bbox[3]]]
171 |     bbox = scale_factor * (index) - bbox
172 |     return bbox
173 | 
174 | 
175 | def draw_2d_boxes_with_colors(img, bboxes, colors):
176 |     for bounding_box, color in zip(bboxes, colors):
177 |         bbox = bounding_box.bounding_box
178 |         pt1 = (int(bbox[0][1]), int(bbox[0][0]))
179 |         pt2 = (int(bbox[1][1]), int(bbox[1][0]))
180 |         img = cv2.rectangle(img, pt1, pt2, color, 2)
181 |     return img
182 | 
183 | 
184 | def draw_2d_boxes(c_img, bboxes):
185 |     c_img = cv2.cvtColor(np.array(c_img), cv2.COLOR_BGR2RGB)
186 |     for bounding_box in bboxes:
187 |         bbox = bounding_box.bbox
188 |         pt1 = (int(bbox[0][0]), int(bbox[0][1]))
189 |         pt2 = (int(bbox[1][0]), int(bbox[1][1]))
190 |         c_img = cv2.rectangle(c_img, pt1, pt2, (255, 0, 0), 2)
191 |     return c_img
192 | 
193 | 
194 | def draw_2d_boxes_with_labels(c_img, bboxes):
195 |     c_img = cv2.cvtColor(np.array(c_img), cv2.COLOR_BGR2RGB)
196 |     for bounding_box in bboxes:
197 |         bbox = bounding_box.bbox
198 |         pt1 = (int(bbox[0][0]), int(bbox[0][1]))
199 |         pt2 = (int(bbox[1][0]), int(bbox[1][1]))
200 |         c_img = cv2.rectangle(c_img, pt1, pt2, (255, 0, 0), 2)
201 |         c_img = draw_class_label(
202 |             c_img, pt1 + ((bbox[1] - bbox[0]) / 2.0), bounding_box.class_label
203 |         )
204 | 
205 |     return c_img
206 | 
207 | 
208 | def draw_class_label(c_img, pixel_center, class_label):
209 |     color = (0, 255, 0)  # green
210 |     if class_label == "null":
211 |         return c_img
212 |     class_label = " ".join(class_label.split("_"))
213 |     # TODO: add more metadata to class labels so we can have human friendly names and styles
214 |     pixel_x = int(pixel_center[0])
215 |     pixel_y = int(pixel_center[1])
216 |     size = 0.75
217 |     thickness = 2
218 |     color = (0, 255, 0)
219 |     c_img = cv2.putText(
220 |         c_img.copy(),
221 |         class_label,
222 |         (pixel_x, pixel_y),
223 |         cv2.FONT_HERSHEY_SIMPLEX,
224 |         size,
225 |         color,
226 |         thickness,
227 |         cv2.LINE_AA,
228 |     )
229 |     return c_img
230 | 


--------------------------------------------------------------------------------