├── ACID ├── src │ ├── __init__.py │ ├── utils │ │ ├── __init__.py │ │ ├── libmcubes │ │ │ ├── .gitignore │ │ │ ├── pyarray_symbol.h │ │ │ ├── __init__.py │ │ │ ├── pywrapper.h │ │ │ ├── LICENSE │ │ │ ├── mcubes.pyx │ │ │ ├── exporter.py │ │ │ ├── README.rst │ │ │ ├── pyarraymodule.h │ │ │ └── pywrapper.cpp │ │ ├── libmise │ │ │ ├── .gitignore │ │ │ ├── __init__.py │ │ │ ├── test.py │ │ │ └── mise.pyx │ │ ├── libsimplify │ │ │ ├── test.py │ │ │ ├── __init__.py │ │ │ └── simplify_mesh.pyx │ │ ├── visualize.py │ │ ├── io.py │ │ └── common_util.py │ ├── encoder │ │ ├── __init__.py │ │ ├── pointnet.py │ │ └── unet.py │ ├── conv_onet │ │ ├── __init__.py │ │ ├── config.py │ │ ├── models │ │ │ └── decoder.py │ │ └── generation.py │ ├── data │ │ ├── __init__.py │ │ └── transforms.py │ ├── training.py │ ├── layers.py │ ├── config.py │ ├── checkpoints.py │ └── common.py ├── result │ └── geodesics │ │ └── .placehold ├── environment.yaml ├── metadata │ └── camera.json ├── configs │ ├── default.yaml │ └── plush_dyn_geodesics.yaml ├── setup.py ├── preprocess │ ├── gen_data_flow_splits.py │ ├── gen_data_contrastive_pairs_flow.py │ └── gen_data_flow_plush.py ├── README.md └── plush_train.py ├── _media ├── plushsim.png └── model_figure.png ├── .gitignore ├── licenses └── oss │ └── convonet-LICENSE.txt ├── README.md ├── PlushSim ├── all_animals.txt ├── README.md └── scripts │ ├── data_gen_attic.py │ ├── writer.py │ └── syntheticdata.py └── LICENSE /ACID/src/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /ACID/src/utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /ACID/result/geodesics/.placehold: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /ACID/src/utils/libmcubes/.gitignore: -------------------------------------------------------------------------------- 1 | PyMCubes.egg-info 2 | build 3 | -------------------------------------------------------------------------------- /ACID/src/utils/libmise/.gitignore: -------------------------------------------------------------------------------- 1 | mise.c 2 | mise.cpp 3 | mise.html 4 | -------------------------------------------------------------------------------- /_media/plushsim.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVlabs/ACID/HEAD/_media/plushsim.png -------------------------------------------------------------------------------- /_media/model_figure.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVlabs/ACID/HEAD/_media/model_figure.png -------------------------------------------------------------------------------- /ACID/src/utils/libmcubes/pyarray_symbol.h: -------------------------------------------------------------------------------- 1 | 2 | #define PY_ARRAY_UNIQUE_SYMBOL mcubes_PyArray_API 3 | -------------------------------------------------------------------------------- /ACID/src/utils/libmise/__init__.py: -------------------------------------------------------------------------------- 1 | from .mise import MISE 2 | 3 | __all__ = [ 4 | MISE 5 | ] 6 | -------------------------------------------------------------------------------- /ACID/src/encoder/__init__.py: -------------------------------------------------------------------------------- 1 | from src.encoder import ( 2 | pointnet 3 | ) 4 | 5 | 6 | encoder_dict = { 7 | 'geom_encoder': pointnet.GeomEncoder, 8 | } 9 | -------------------------------------------------------------------------------- /ACID/src/conv_onet/__init__.py: -------------------------------------------------------------------------------- 1 | from src.conv_onet import ( 2 | config, generation, training, models 3 | ) 4 | 5 | __all__ = [ 6 | config, generation, training, models 7 | ] 8 | -------------------------------------------------------------------------------- /ACID/src/utils/libsimplify/test.py: -------------------------------------------------------------------------------- 1 | from simplify_mesh import mesh_simplify 2 | import numpy as np 3 | 4 | v = np.random.rand(100, 3) 5 | f = np.random.choice(range(100), (50, 3)) 6 | 7 | mesh_simplify(v, f, 50) -------------------------------------------------------------------------------- /ACID/src/utils/libmcubes/__init__.py: -------------------------------------------------------------------------------- 1 | from src.utils.libmcubes.mcubes import ( 2 | marching_cubes, marching_cubes_func 3 | ) 4 | from src.utils.libmcubes.exporter import ( 5 | export_mesh, export_obj, export_off 6 | ) 7 | 8 | 9 | __all__ = [ 10 | marching_cubes, marching_cubes_func, 11 | export_mesh, export_obj, export_off 12 | ] 13 | -------------------------------------------------------------------------------- /ACID/src/utils/libsimplify/__init__.py: -------------------------------------------------------------------------------- 1 | from .simplify_mesh import ( 2 | mesh_simplify 3 | ) 4 | import trimesh 5 | 6 | 7 | def simplify_mesh(mesh, f_target=10000, agressiveness=7.): 8 | vertices = mesh.vertices 9 | faces = mesh.faces 10 | 11 | vertices, faces = mesh_simplify(vertices, faces, f_target, agressiveness) 12 | 13 | mesh_simplified = trimesh.Trimesh(vertices, faces, process=False) 14 | 15 | return mesh_simplified 16 | -------------------------------------------------------------------------------- /ACID/src/data/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | from src.data.core import ( 3 | PlushEnvGeom, collate_remove_none, worker_init_fn, get_plush_loader 4 | ) 5 | from src.data.transforms import ( 6 | PointcloudNoise, SubsamplePointcloud, 7 | SubsamplePoints, 8 | ) 9 | __all__ = [ 10 | # Core 11 | PlushEnvGeom, 12 | get_plush_loader, 13 | collate_remove_none, 14 | worker_init_fn, 15 | PointcloudNoise, 16 | SubsamplePointcloud, 17 | SubsamplePoints, 18 | ] 19 | -------------------------------------------------------------------------------- /ACID/src/utils/libmcubes/pywrapper.h: -------------------------------------------------------------------------------- 1 | 2 | #ifndef _PYWRAPPER_H 3 | #define _PYWRAPPER_H 4 | 5 | #include 6 | #include "pyarraymodule.h" 7 | 8 | #include 9 | 10 | PyObject* marching_cubes(PyArrayObject* arr, double isovalue); 11 | PyObject* marching_cubes2(PyArrayObject* arr, double isovalue); 12 | PyObject* marching_cubes3(PyArrayObject* arr, double isovalue); 13 | PyObject* marching_cubes_func(PyObject* lower, PyObject* upper, 14 | int numx, int numy, int numz, PyObject* f, double isovalue); 15 | 16 | #endif // _PYWRAPPER_H 17 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /out 2 | /data 3 | _delete/ 4 | build 5 | temp 6 | .vscode 7 | .pytest_cache 8 | .cache 9 | *.pyc 10 | *.pyd 11 | *.pt 12 | *.so 13 | *.o 14 | *.prof 15 | *.swp 16 | *.lib 17 | *.obj 18 | *.exp 19 | .nfs* 20 | *.jpg 21 | *.png 22 | *.ply 23 | *.off 24 | *.npz 25 | *.txt 26 | *.blend 27 | *.blend1 28 | *.ini 29 | *.pdf 30 | *.npy 31 | *.pkl 32 | *.zip 33 | rgb 34 | pngs 35 | bash_scripts 36 | /src/utils/libmcubes/mcubes.cpp 37 | /src/utils/libsimplify/simplify_mesh.cpp 38 | /src/utils/libsimplify/build 39 | /PlushSim/assets/ 40 | /PlushSim/interaction_sequence/ 41 | .DS_Store 42 | -------------------------------------------------------------------------------- /ACID/src/utils/libmise/test.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from mise import MISE 3 | import time 4 | 5 | t0 = time.time() 6 | extractor = MISE(1, 2, 0.) 7 | 8 | p = extractor.query() 9 | i = 0 10 | 11 | while p.shape[0] != 0: 12 | print(i) 13 | print(p) 14 | v = 2 * (p.sum(axis=-1) > 2).astype(np.float64) - 1 15 | extractor.update(p, v) 16 | p = extractor.query() 17 | i += 1 18 | if (i >= 8): 19 | break 20 | 21 | print(extractor.to_dense()) 22 | # p, v = extractor.get_points() 23 | # print(p) 24 | # print(v) 25 | print('Total time: %f' % (time.time() - t0)) 26 | -------------------------------------------------------------------------------- /ACID/environment.yaml: -------------------------------------------------------------------------------- 1 | name: acid_train 2 | channels: 3 | - conda-forge 4 | - pytorch 5 | - defaults 6 | dependencies: 7 | - cython=0.29.2 8 | - imageio=2.4.1 9 | - numpy=1.15.4 10 | - numpy-base=1.15.4 11 | - matplotlib=3.0.3 12 | - matplotlib-base=3.0.3 13 | - pandas=0.23.4 14 | - pillow=5.3.0 15 | - pyembree=0.1.4 16 | - pytest=4.0.2 17 | - python=3.7.10 18 | - pytorch=1.4.0 19 | - pyyaml=3.13 20 | - scikit-image=0.14.1 21 | - scipy=1.5.2 22 | - tensorboardx=1.4 23 | - torchvision=0.2.1 24 | - tqdm=4.28.1 25 | - trimesh=2.37.7 26 | - pip 27 | - pip: 28 | - scikit-learn==0.24.2 29 | - h5py==2.9.0 30 | - plyfile==0.7 31 | - polyscope==1.2.0 32 | 33 | -------------------------------------------------------------------------------- /ACID/src/training.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from collections import defaultdict 3 | from tqdm import tqdm 4 | 5 | 6 | class BaseTrainer(object): 7 | ''' Base trainer class. 8 | ''' 9 | 10 | def evaluate(self, val_loader): 11 | ''' Performs an evaluation. 12 | Args: 13 | val_loader (dataloader): pytorch dataloader 14 | ''' 15 | eval_list = defaultdict(list) 16 | 17 | for data in tqdm(val_loader): 18 | eval_step_dict = self.eval_step(data) 19 | 20 | for k, v in eval_step_dict.items(): 21 | eval_list[k].append(v) 22 | 23 | eval_dict = {k: np.mean(v) for k, v in eval_list.items()} 24 | return eval_dict 25 | 26 | def train_step(self, *args, **kwargs): 27 | ''' Performs a training step. 28 | ''' 29 | raise NotImplementedError 30 | 31 | def eval_step(self, *args, **kwargs): 32 | ''' Performs an evaluation step. 33 | ''' 34 | raise NotImplementedError 35 | 36 | def visualize(self, *args, **kwargs): 37 | ''' Performs visualization. 38 | ''' 39 | raise NotImplementedError 40 | -------------------------------------------------------------------------------- /licenses/oss/convonet-LICENSE.txt: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 Songyou Peng, Michael Niemeyer, Lars Mescheder, Marc Pollefeys, Andreas Geiger 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /ACID/metadata/camera.json: -------------------------------------------------------------------------------- 1 | {"cam0": [[[1.0, 0.0, 0.0, 0.0], [0.0, 0.30901699437494745, 0.9510565162951536, 9.500000000000028], [0.0, -0.9510565162951536, 0.30901699437494745, 3.000000000000007], [0.0, 0.0, 0.0, 1.0]], [[305.4163695204686, 0.0, 180.0], [0.0, 305.4163695204686, 320.0], [0.0, 0.0, 1.0]]], "cam1": [[[6.861555643110583e-17, -0.9510565162951536, 0.30901699437494745, 2.9999999999999964], [2.11176968422134e-16, 0.30901699437494745, 0.9510565162951536, 9.500000000000028], [-1.0, 0.0, 2.220446049250313e-16, -6.861555643110581e-15], [0.0, 0.0, 0.0, 1.0]], [[305.4163695204686, 0.0, 180.0], [0.0, 305.4163695204686, 320.0], [0.0, 0.0, 1.0]]], "cam2": [[[6.861555643110583e-17, 0.9510565162951536, -0.30901699437494745, -3.0], [-2.11176968422134e-16, 0.30901699437494745, 0.9510565162951536, 9.500000000000028], [1.0, 0.0, 2.220446049250313e-16, 6.861555643110581e-15], [0.0, 0.0, 0.0, 1.0]], [[305.4163695204686, 0.0, 180.0], [0.0, 305.4163695204686, 320.0], [0.0, 0.0, 1.0]]], "cam3": [[[-1.0, -9.662289450897499e-17, 1.601616662667754e-16, 1.892183365217075e-15], [1.2246467991473532e-16, 0.30901699437494745, 0.9510565162951536, 9.500000000000028], [-1.413865101699022e-16, 0.9510565162951536, -0.30901699437494745, -3.000000000000007], [0.0, 0.0, 0.0, 1.0]], [[305.4163695204686, 0.0, 180.0], [0.0, 305.4163695204686, 320.0], [0.0, 0.0, 1.0]]]} -------------------------------------------------------------------------------- /ACID/configs/default.yaml: -------------------------------------------------------------------------------- 1 | method: conv_onet 2 | data: 3 | train_split: train 4 | val_split: val 5 | test_split: test 6 | dim: 3 7 | act_dim: 6 8 | padding: 0.1 9 | type: geom 10 | model: 11 | decoder: simple 12 | encoder: resnet18 13 | decoder_kwargs: {} 14 | encoder_kwargs: {} 15 | multi_gpu: false 16 | c_dim: 512 17 | training: 18 | out_dir: out/default 19 | batch_size: 64 20 | pos_weight: 5 21 | print_every: 200 22 | visualize_every: 1000 23 | visualize_total: 15 24 | checkpoint_every: 1000 25 | validate_every: 2000 26 | backup_every: 100000 27 | eval_sample: false 28 | model_selection_metric: loss 29 | model_selection_mode: minimize 30 | n_workers: 4 31 | n_workers_val: 4 32 | test: 33 | threshold: 0.5 34 | eval_mesh: true 35 | eval_pointcloud: true 36 | remove_wall: false 37 | model_file: model_best.pt 38 | generation: 39 | batch_size: 100000 40 | refinement_step: 0 41 | vis_n_outputs: 30 42 | generate_mesh: true 43 | generate_pointcloud: true 44 | generation_dir: generation 45 | use_sampling: false 46 | resolution_0: 32 47 | upsampling_steps: 3 48 | simplify_nfaces: null 49 | copy_groundtruth: false 50 | copy_input: true 51 | latent_number: 4 52 | latent_H: 8 53 | latent_W: 8 54 | latent_ny: 2 55 | latent_nx: 2 56 | latent_repeat: true 57 | sliding_window: False # added for crop generation -------------------------------------------------------------------------------- /ACID/src/layers.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | 5 | # Resnet Blocks 6 | class ResnetBlockFC(nn.Module): 7 | ''' Fully connected ResNet Block class. 8 | 9 | Args: 10 | size_in (int): input dimension 11 | size_out (int): output dimension 12 | size_h (int): hidden dimension 13 | ''' 14 | 15 | def __init__(self, size_in, size_out=None, size_h=None): 16 | super().__init__() 17 | # Attributes 18 | if size_out is None: 19 | size_out = size_in 20 | 21 | if size_h is None: 22 | size_h = min(size_in, size_out) 23 | 24 | self.size_in = size_in 25 | self.size_h = size_h 26 | self.size_out = size_out 27 | # Submodules 28 | self.fc_0 = nn.Linear(size_in, size_h) 29 | self.fc_1 = nn.Linear(size_h, size_out) 30 | self.actvn = nn.ReLU() 31 | 32 | if size_in == size_out: 33 | self.shortcut = None 34 | else: 35 | self.shortcut = nn.Linear(size_in, size_out, bias=False) 36 | # Initialization 37 | nn.init.zeros_(self.fc_1.weight) 38 | 39 | def forward(self, x): 40 | net = self.fc_0(self.actvn(x)) 41 | dx = self.fc_1(self.actvn(net)) 42 | 43 | if self.shortcut is not None: 44 | x_s = self.shortcut(x) 45 | else: 46 | x_s = x 47 | 48 | return x_s + dx -------------------------------------------------------------------------------- /ACID/configs/plush_dyn_geodesics.yaml: -------------------------------------------------------------------------------- 1 | method: conv_onet 2 | data: 3 | flow_path: train_data/flow 4 | pair_path: train_data/pair 5 | pointcloud_n_obj: 5000 6 | pointcloud_n_env: 1000 7 | pointcloud_noise: 0.005 8 | points_subsample: 3000 9 | model: 10 | type: combined 11 | obj_encoder_kwargs: 12 | f_dim: 3 13 | hidden_dim: 64 14 | plane_resolution: 128 15 | unet_kwargs: 16 | depth: 4 17 | merge_mode: concat 18 | start_filts: 64 19 | env_encoder_kwargs: 20 | f_dim: 3 21 | hidden_dim: 16 22 | plane_resolution: 64 23 | unet_kwargs: 24 | depth: 2 25 | merge_mode: concat 26 | start_filts: 16 27 | decoder_kwargs: 28 | corr_dim: 32 29 | sample_mode: bilinear # bilinear / nearest 30 | hidden_size: 32 31 | obj_c_dim: 64 32 | env_c_dim: 16 33 | loss: 34 | type: contrastive 35 | contrastive_threshold: 1 36 | use_geodesics: true 37 | scale_with_geodesics: False 38 | training: 39 | out_dir: result/dyn/geodesics 40 | batch_size: 4 41 | model_selection_metric: flow 42 | model_selection_mode: minimize 43 | print_every: 1 44 | visualize_every: 4000 45 | validate_every: 4000 46 | checkpoint_every: 4000 47 | backup_every: 4000 48 | n_workers: 16 49 | n_workers_val: 4 50 | test: 51 | threshold: 0.95 52 | eval_mesh: true 53 | eval_pointcloud: false 54 | model_file: model_best.pt 55 | generation: 56 | refine: false 57 | n_x: 128 58 | n_z: 1 59 | -------------------------------------------------------------------------------- /ACID/setup.py: -------------------------------------------------------------------------------- 1 | try: 2 | from setuptools import setup 3 | except ImportError: 4 | from distutils.core import setup 5 | from distutils.extension import Extension 6 | from Cython.Build import cythonize 7 | from torch.utils.cpp_extension import BuildExtension, CppExtension, CUDAExtension 8 | import numpy 9 | 10 | 11 | # Get the numpy include directory. 12 | numpy_include_dir = numpy.get_include() 13 | 14 | # Extensions 15 | # mcubes (marching cubes algorithm) 16 | mcubes_module = Extension( 17 | 'src.utils.libmcubes.mcubes', 18 | sources=[ 19 | 'src/utils/libmcubes/mcubes.pyx', 20 | 'src/utils/libmcubes/pywrapper.cpp', 21 | 'src/utils/libmcubes/marchingcubes.cpp' 22 | ], 23 | language='c++', 24 | extra_compile_args=['-std=c++11'], 25 | include_dirs=[numpy_include_dir] 26 | ) 27 | 28 | # mise (efficient mesh extraction) 29 | mise_module = Extension( 30 | 'src.utils.libmise.mise', 31 | sources=[ 32 | 'src/utils/libmise/mise.pyx' 33 | ], 34 | ) 35 | 36 | # simplify (efficient mesh simplification) 37 | simplify_mesh_module = Extension( 38 | 'src.utils.libsimplify.simplify_mesh', 39 | sources=[ 40 | 'src/utils/libsimplify/simplify_mesh.pyx' 41 | ], 42 | include_dirs=[numpy_include_dir] 43 | ) 44 | 45 | 46 | # Gather all extension modules 47 | ext_modules = [ 48 | mcubes_module, 49 | mise_module, 50 | simplify_mesh_module, 51 | ] 52 | 53 | setup( 54 | ext_modules=cythonize(ext_modules), 55 | cmdclass={ 56 | 'build_ext': BuildExtension 57 | } 58 | ) 59 | -------------------------------------------------------------------------------- /ACID/src/utils/libmcubes/LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2012-2015, P. M. Neila 2 | All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without 5 | modification, are permitted provided that the following conditions are met: 6 | 7 | * Redistributions of source code must retain the above copyright notice, this 8 | list of conditions and the following disclaimer. 9 | 10 | * Redistributions in binary form must reproduce the above copyright notice, 11 | this list of conditions and the following disclaimer in the documentation 12 | and/or other materials provided with the distribution. 13 | 14 | * Neither the name of the copyright holder nor the names of its 15 | contributors may be used to endorse or promote products derived from 16 | this software without specific prior written permission. 17 | 18 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 19 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 21 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 22 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 24 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 25 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 26 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | -------------------------------------------------------------------------------- /ACID/src/utils/libmcubes/mcubes.pyx: -------------------------------------------------------------------------------- 1 | 2 | # distutils: language = c++ 3 | # cython: embedsignature = True 4 | 5 | # from libcpp.vector cimport vector 6 | import numpy as np 7 | 8 | # Define PY_ARRAY_UNIQUE_SYMBOL 9 | cdef extern from "pyarray_symbol.h": 10 | pass 11 | 12 | cimport numpy as np 13 | 14 | np.import_array() 15 | 16 | cdef extern from "pywrapper.h": 17 | cdef object c_marching_cubes "marching_cubes"(np.ndarray, double) except + 18 | cdef object c_marching_cubes2 "marching_cubes2"(np.ndarray, double) except + 19 | cdef object c_marching_cubes3 "marching_cubes3"(np.ndarray, double) except + 20 | cdef object c_marching_cubes_func "marching_cubes_func"(tuple, tuple, int, int, int, object, double) except + 21 | 22 | def marching_cubes(np.ndarray volume, float isovalue): 23 | 24 | verts, faces = c_marching_cubes(volume, isovalue) 25 | verts.shape = (-1, 3) 26 | faces.shape = (-1, 3) 27 | return verts, faces 28 | 29 | def marching_cubes2(np.ndarray volume, float isovalue): 30 | 31 | verts, faces = c_marching_cubes2(volume, isovalue) 32 | verts.shape = (-1, 3) 33 | faces.shape = (-1, 3) 34 | return verts, faces 35 | 36 | def marching_cubes3(np.ndarray volume, float isovalue): 37 | 38 | verts, faces = c_marching_cubes3(volume, isovalue) 39 | verts.shape = (-1, 3) 40 | faces.shape = (-1, 3) 41 | return verts, faces 42 | 43 | def marching_cubes_func(tuple lower, tuple upper, int numx, int numy, int numz, object f, double isovalue): 44 | 45 | verts, faces = c_marching_cubes_func(lower, upper, numx, numy, numz, f, isovalue) 46 | verts.shape = (-1, 3) 47 | faces.shape = (-1, 3) 48 | return verts, faces 49 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [![NVIDIA Source Code License](https://img.shields.io/badge/license-NSCL-blue.svg)](https://github.com/NVlabs/ACID/blob/master/LICENSE) 2 | ![Python 3.7](https://img.shields.io/badge/python-3.7-green.svg) 3 | 4 | # ACID: Action-Conditional Implicit Visual Dynamics for Deformable Object Manipulation 5 | 6 | ### [Project Page](https://b0ku1.github.io/acid/) | [Paper](https://arxiv.org/abs/2203.06856) 7 | 8 |
9 | 10 |
11 | 12 | This repository contains the codebase used in [**ACID: Action-Conditional Implicit Visual Dynamics for Deformable Object Manipulation**](https://b0ku1.github.io/acid/), which will appear in [RSS 2022](https://roboticsconference.org/program/papers/) and is nominated for Best Student Paper Award. Specifically, the repo contains code for: 13 | * [**PlushSim**](./PlushSim/), the simulation environment used to generate all manipulation data. 14 | * [**ACID model**](./ACID/), the implicit visual dynamics model's model and training code. 15 | 16 | If you find our code or paper useful, please consider citing 17 | ```bibtex 18 | @article{shen2022acid, 19 | title={ACID: Action-Conditional Implicit Visual Dynamics for Deformable Object Manipulation}, 20 | author={Shen, Bokui and Jiang, Zhenyu and Choy, Christopher and J. Guibas, Leonidas and Savarese, Silvio and Anandkumar, Anima and Zhu, Yuke}, 21 | journal={Robotics: Science and Systems (RSS)}, 22 | year={2022} 23 | } 24 | ``` 25 | 26 | # ACID model 27 | Please see the [README](./ACID/README.md) for more detailed information. 28 | 29 | 30 | # PlushSim 31 | Please see the [README](./PlushSim/README.md) for more detailed information. 32 | 33 | 34 | # License 35 | Please check the [LICENSE](./LICENSE) file. ACID may be used non-commercially, meaning for research or evaluation purposes only. For business inquiries, please contact researchinquiries@nvidia.com. 36 | -------------------------------------------------------------------------------- /ACID/src/utils/libmcubes/exporter.py: -------------------------------------------------------------------------------- 1 | 2 | import numpy as np 3 | 4 | 5 | def export_obj(vertices, triangles, filename): 6 | """ 7 | Exports a mesh in the (.obj) format. 8 | """ 9 | 10 | with open(filename, 'w') as fh: 11 | 12 | for v in vertices: 13 | fh.write("v {} {} {}\n".format(*v)) 14 | 15 | for f in triangles: 16 | fh.write("f {} {} {}\n".format(*(f + 1))) 17 | 18 | 19 | def export_off(vertices, triangles, filename): 20 | """ 21 | Exports a mesh in the (.off) format. 22 | """ 23 | 24 | with open(filename, 'w') as fh: 25 | fh.write('OFF\n') 26 | fh.write('{} {} 0\n'.format(len(vertices), len(triangles))) 27 | 28 | for v in vertices: 29 | fh.write("{} {} {}\n".format(*v)) 30 | 31 | for f in triangles: 32 | fh.write("3 {} {} {}\n".format(*f)) 33 | 34 | 35 | def export_mesh(vertices, triangles, filename, mesh_name="mcubes_mesh"): 36 | """ 37 | Exports a mesh in the COLLADA (.dae) format. 38 | 39 | Needs PyCollada (https://github.com/pycollada/pycollada). 40 | """ 41 | 42 | import collada 43 | 44 | mesh = collada.Collada() 45 | 46 | vert_src = collada.source.FloatSource("verts-array", vertices, ('X','Y','Z')) 47 | geom = collada.geometry.Geometry(mesh, "geometry0", mesh_name, [vert_src]) 48 | 49 | input_list = collada.source.InputList() 50 | input_list.addInput(0, 'VERTEX', "#verts-array") 51 | 52 | triset = geom.createTriangleSet(np.copy(triangles), input_list, "") 53 | geom.primitives.append(triset) 54 | mesh.geometries.append(geom) 55 | 56 | geomnode = collada.scene.GeometryNode(geom, []) 57 | node = collada.scene.Node(mesh_name, children=[geomnode]) 58 | 59 | myscene = collada.scene.Scene("mcubes_scene", [node]) 60 | mesh.scenes.append(myscene) 61 | mesh.scene = myscene 62 | 63 | mesh.write(filename) 64 | -------------------------------------------------------------------------------- /ACID/src/utils/libmcubes/README.rst: -------------------------------------------------------------------------------- 1 | ======== 2 | PyMCubes 3 | ======== 4 | 5 | PyMCubes is an implementation of the marching cubes algorithm to extract 6 | isosurfaces from volumetric data. The volumetric data can be given as a 7 | three-dimensional NumPy array or as a Python function ``f(x, y, z)``. The first 8 | option is much faster, but it requires more memory and becomes unfeasible for 9 | very large volumes. 10 | 11 | PyMCubes also provides a function to export the results of the marching cubes as 12 | COLLADA ``(.dae)`` files. This requires the 13 | `PyCollada `_ library. 14 | 15 | Installation 16 | ============ 17 | 18 | Just as any standard Python package, clone or download the project 19 | and run:: 20 | 21 | $ cd path/to/PyMCubes 22 | $ python setup.py build 23 | $ python setup.py install 24 | 25 | If you do not have write permission on the directory of Python packages, 26 | install with the ``--user`` option:: 27 | 28 | $ python setup.py install --user 29 | 30 | Example 31 | ======= 32 | 33 | The following example creates a data volume with spherical isosurfaces and 34 | extracts one of them (i.e., a sphere) with PyMCubes. The result is exported as 35 | ``sphere.dae``:: 36 | 37 | >>> import numpy as np 38 | >>> import mcubes 39 | 40 | # Create a data volume (30 x 30 x 30) 41 | >>> X, Y, Z = np.mgrid[:30, :30, :30] 42 | >>> u = (X-15)**2 + (Y-15)**2 + (Z-15)**2 - 8**2 43 | 44 | # Extract the 0-isosurface 45 | >>> vertices, triangles = mcubes.marching_cubes(u, 0) 46 | 47 | # Export the result to sphere.dae 48 | >>> mcubes.export_mesh(vertices, triangles, "sphere.dae", "MySphere") 49 | 50 | The second example is very similar to the first one, but it uses a function 51 | to represent the volume instead of a NumPy array:: 52 | 53 | >>> import numpy as np 54 | >>> import mcubes 55 | 56 | # Create the volume 57 | >>> f = lambda x, y, z: x**2 + y**2 + z**2 58 | 59 | # Extract the 16-isosurface 60 | >>> vertices, triangles = mcubes.marching_cubes_func((-10,-10,-10), (10,10,10), 61 | ... 100, 100, 100, f, 16) 62 | 63 | # Export the result to sphere2.dae 64 | >>> mcubes.export_mesh(vertices, triangles, "sphere2.dae", "MySphere") 65 | -------------------------------------------------------------------------------- /ACID/preprocess/gen_data_flow_splits.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import os.path as osp 4 | ACID_dir = osp.dirname(osp.dirname(osp.realpath(__file__))) 5 | sys.path.insert(0,ACID_dir) 6 | 7 | import glob 8 | 9 | import argparse 10 | flow_default = osp.join(ACID_dir, "train_data", "flow") 11 | parser = argparse.ArgumentParser("Making training / testing splits...") 12 | parser.add_argument("--flow_root", type=str, default=flow_default) 13 | parser.add_argument("--no_split", action="store_true", default=False) 14 | args = parser.parse_args() 15 | 16 | flow_root = args.flow_root 17 | 18 | all_npz = glob.glob(f"{flow_root}/*/*/*.npz") 19 | 20 | print(f"In total {len(all_npz)} data points...") 21 | 22 | def filename_to_id(fname): 23 | split_id, model_name, f = fname.split("/")[-3:] 24 | reset_id, frame_id = (int(x) for x in os.path.splitext(f)[0].split('_')) 25 | return split_id, model_name, reset_id, frame_id 26 | 27 | from collections import defaultdict 28 | 29 | total_files = defaultdict(lambda : defaultdict(lambda : [])) 30 | for fname in all_npz: 31 | split_id, model_name, reset_id, frame_id = filename_to_id(fname) 32 | total_files[(split_id, model_name)][reset_id].append(frame_id) 33 | 34 | total_files = dict(total_files) 35 | for k,v in total_files.items(): 36 | total_files[k] = dict(v) 37 | import pickle 38 | if args.no_split: 39 | train = total_files 40 | test = total_files 41 | else: 42 | train = {} 43 | test = {} 44 | for k,v in total_files.items(): 45 | split_id, model_name = k 46 | if "teddy" in model_name: 47 | test[k] = v 48 | else: 49 | train[k] = v 50 | 51 | train_total = [] 52 | for k,v in train.items(): 53 | for x, u in v.items(): 54 | for y in u: 55 | train_total.append((*k, x, y)) 56 | print(f"training data points: {len(train_total)}") 57 | test_total = [] 58 | for k,v in test.items(): 59 | for x, u in v.items(): 60 | for y in u: 61 | test_total.append((*k, x, y)) 62 | print(f"testing data points: {len(test_total)}") 63 | 64 | with open(f"{flow_root}/train.pkl", "wb") as fp: 65 | pickle.dump(train_total, fp) 66 | with open(f"{flow_root}/test.pkl", "wb") as fp: 67 | pickle.dump(test_total, fp) -------------------------------------------------------------------------------- /ACID/src/utils/visualize.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from matplotlib import pyplot as plt 3 | from mpl_toolkits.mplot3d import Axes3D 4 | import src.common as common 5 | 6 | 7 | def visualize_data(data, data_type, out_file): 8 | r''' Visualizes the data with regard to its type. 9 | 10 | Args: 11 | data (tensor): batch of data 12 | data_type (string): data type (img, voxels or pointcloud) 13 | out_file (string): output file 14 | ''' 15 | if data_type == 'voxels': 16 | visualize_voxels(data, out_file=out_file) 17 | elif data_type == 'pointcloud': 18 | visualize_pointcloud(data, out_file=out_file) 19 | elif data_type is None or data_type == 'idx': 20 | pass 21 | else: 22 | raise ValueError('Invalid data_type "%s"' % data_type) 23 | 24 | 25 | def visualize_voxels(voxels, out_file=None, show=False): 26 | r''' Visualizes voxel data. 27 | 28 | Args: 29 | voxels (tensor): voxel data 30 | out_file (string): output file 31 | show (bool): whether the plot should be shown 32 | ''' 33 | # Use numpy 34 | voxels = np.asarray(voxels) 35 | # Create plot 36 | fig = plt.figure() 37 | ax = fig.gca(projection=Axes3D.name) 38 | voxels = voxels.transpose(2, 0, 1) 39 | ax.voxels(voxels, edgecolor='k') 40 | ax.set_xlabel('Z') 41 | ax.set_ylabel('X') 42 | ax.set_zlabel('Y') 43 | ax.view_init(elev=30, azim=45) 44 | if out_file is not None: 45 | plt.savefig(out_file) 46 | if show: 47 | plt.show() 48 | plt.close(fig) 49 | 50 | 51 | def visualize_pointcloud(points, normals=None, 52 | out_file=None, show=False): 53 | r''' Visualizes point cloud data. 54 | 55 | Args: 56 | points (tensor): point data 57 | normals (tensor): normal data (if existing) 58 | out_file (string): output file 59 | show (bool): whether the plot should be shown 60 | ''' 61 | # Use numpy 62 | points = np.asarray(points) 63 | # Create plot 64 | fig = plt.figure() 65 | ax = fig.gca(projection=Axes3D.name) 66 | ax.scatter(points[:, 2], points[:, 0], points[:, 1]) 67 | if normals is not None: 68 | ax.quiver( 69 | points[:, 2], points[:, 0], points[:, 1], 70 | normals[:, 2], normals[:, 0], normals[:, 1], 71 | length=0.1, color='k' 72 | ) 73 | ax.set_xlabel('Z') 74 | ax.set_ylabel('X') 75 | ax.set_zlabel('Y') 76 | ax.set_xlim(-0.5, 0.5) 77 | ax.set_ylim(-0.5, 0.5) 78 | ax.set_zlim(-0.5, 0.5) 79 | ax.view_init(elev=30, azim=45) 80 | if out_file is not None: 81 | plt.savefig(out_file) 82 | if show: 83 | plt.show() 84 | plt.close(fig) 85 | 86 | -------------------------------------------------------------------------------- /ACID/src/utils/libsimplify/simplify_mesh.pyx: -------------------------------------------------------------------------------- 1 | # distutils: language = c++ 2 | from libcpp.vector cimport vector 3 | import numpy as np 4 | cimport numpy as np 5 | 6 | 7 | cdef extern from "Simplify.h": 8 | cdef struct vec3f: 9 | double x, y, z 10 | 11 | cdef cppclass SymetricMatrix: 12 | SymetricMatrix() except + 13 | 14 | 15 | cdef extern from "Simplify.h" namespace "Simplify": 16 | cdef struct Triangle: 17 | int v[3] 18 | double err[4] 19 | int deleted, dirty, attr 20 | vec3f uvs[3] 21 | int material 22 | 23 | cdef struct Vertex: 24 | vec3f p 25 | int tstart, tcount 26 | SymetricMatrix q 27 | int border 28 | 29 | cdef vector[Triangle] triangles 30 | cdef vector[Vertex] vertices 31 | cdef void simplify_mesh(int, double) 32 | 33 | 34 | cpdef mesh_simplify(double[:, ::1] vertices_in, long[:, ::1] triangles_in, 35 | int f_target, double agressiveness=7.) except +: 36 | vertices.clear() 37 | triangles.clear() 38 | 39 | # Read in vertices and triangles 40 | cdef Vertex v 41 | for iv in range(vertices_in.shape[0]): 42 | v = Vertex() 43 | v.p.x = vertices_in[iv, 0] 44 | v.p.y = vertices_in[iv, 1] 45 | v.p.z = vertices_in[iv, 2] 46 | vertices.push_back(v) 47 | 48 | cdef Triangle t 49 | for it in range(triangles_in.shape[0]): 50 | t = Triangle() 51 | t.v[0] = triangles_in[it, 0] 52 | t.v[1] = triangles_in[it, 1] 53 | t.v[2] = triangles_in[it, 2] 54 | triangles.push_back(t) 55 | 56 | # Simplify 57 | # print('Simplify...') 58 | simplify_mesh(f_target, agressiveness) 59 | 60 | # Only use triangles that are not deleted 61 | cdef vector[Triangle] triangles_notdel 62 | triangles_notdel.reserve(triangles.size()) 63 | 64 | for t in triangles: 65 | if not t.deleted: 66 | triangles_notdel.push_back(t) 67 | 68 | # Read out triangles 69 | vertices_out = np.empty((vertices.size(), 3), dtype=np.float64) 70 | triangles_out = np.empty((triangles_notdel.size(), 3), dtype=np.int64) 71 | 72 | cdef double[:, :] vertices_out_view = vertices_out 73 | cdef long[:, :] triangles_out_view = triangles_out 74 | 75 | for iv in range(vertices.size()): 76 | vertices_out_view[iv, 0] = vertices[iv].p.x 77 | vertices_out_view[iv, 1] = vertices[iv].p.y 78 | vertices_out_view[iv, 2] = vertices[iv].p.z 79 | 80 | for it in range(triangles_notdel.size()): 81 | triangles_out_view[it, 0] = triangles_notdel[it].v[0] 82 | triangles_out_view[it, 1] = triangles_notdel[it].v[1] 83 | triangles_out_view[it, 2] = triangles_notdel[it].v[2] 84 | 85 | # Clear vertices and triangles 86 | vertices.clear() 87 | triangles.clear() 88 | 89 | return vertices_out, triangles_out -------------------------------------------------------------------------------- /ACID/src/config.py: -------------------------------------------------------------------------------- 1 | import yaml 2 | from torchvision import transforms 3 | from src import data 4 | from src import conv_onet 5 | 6 | 7 | method_dict = { 8 | 'conv_onet': conv_onet 9 | } 10 | 11 | 12 | # General config 13 | def load_config(path, default_path=None): 14 | ''' Loads config file. 15 | 16 | Args: 17 | path (str): path to config file 18 | default_path (bool): whether to use default path 19 | ''' 20 | # Load configuration from file itself 21 | with open(path, 'r') as f: 22 | cfg_special = yaml.load(f) 23 | 24 | # Check if we should inherit from a config 25 | inherit_from = cfg_special.get('inherit_from') 26 | 27 | # If yes, load this config first as default 28 | # If no, use the default_path 29 | if inherit_from is not None: 30 | cfg = load_config(inherit_from, default_path) 31 | elif default_path is not None: 32 | with open(default_path, 'r') as f: 33 | cfg = yaml.load(f) 34 | else: 35 | cfg = dict() 36 | 37 | # Include main configuration 38 | update_recursive(cfg, cfg_special) 39 | 40 | return cfg 41 | 42 | 43 | def update_recursive(dict1, dict2): 44 | ''' Update two config dictionaries recursively. 45 | 46 | Args: 47 | dict1 (dict): first dictionary to be updated 48 | dict2 (dict): second dictionary which entries should be used 49 | 50 | ''' 51 | for k, v in dict2.items(): 52 | if k not in dict1: 53 | dict1[k] = dict() 54 | if isinstance(v, dict): 55 | update_recursive(dict1[k], v) 56 | else: 57 | dict1[k] = v 58 | 59 | 60 | # Models 61 | def get_model(cfg, device=None, dataset=None): 62 | ''' Returns the model instance. 63 | 64 | Args: 65 | cfg (dict): config dictionary 66 | device (device): pytorch device 67 | dataset (dataset): dataset 68 | ''' 69 | method = cfg['method'] 70 | model = method_dict[method].config.get_model( 71 | cfg, device=device, dataset=dataset) 72 | return model 73 | 74 | 75 | # Trainer 76 | def get_trainer(model, optimizer, cfg, device): 77 | ''' Returns a trainer instance. 78 | 79 | Args: 80 | model (nn.Module): the model which is used 81 | optimizer (optimizer): pytorch optimizer 82 | cfg (dict): config dictionary 83 | device (device): pytorch device 84 | ''' 85 | method = cfg['method'] 86 | trainer = method_dict[method].config.get_trainer( 87 | model, optimizer, cfg, device) 88 | return trainer 89 | 90 | 91 | # Generator for final mesh extraction 92 | def get_generator(model, cfg, device): 93 | ''' Returns a generator instance. 94 | 95 | Args: 96 | model (nn.Module): the model which is used 97 | cfg (dict): config dictionary 98 | device (device): pytorch device 99 | ''' 100 | method = cfg['method'] 101 | generator = method_dict[method].config.get_generator(model, cfg, device) 102 | return generator 103 | -------------------------------------------------------------------------------- /PlushSim/all_animals.txt: -------------------------------------------------------------------------------- 1 | teddy/teddy_aug_6/teddy_aug_6.usda 2 | teddy/teddy_aug_11/teddy_aug_11.usda 3 | teddy/teddy_aug_4/teddy_aug_4.usda 4 | teddy/teddy_aug_5/teddy_aug_5.usda 5 | teddy/teddy_aug_1/teddy_aug_1.usda 6 | teddy/teddy_aug_10/teddy_aug_10.usda 7 | teddy/teddy_aug_2/teddy_aug_2.usda 8 | teddy/teddy_aug_3/teddy_aug_3.usda 9 | teddy/teddy_scaled/teddy_scaled.usda 10 | teddy/teddy_aug_9/teddy_aug_9.usda 11 | teddy/teddy_aug_0/teddy_aug_0.usda 12 | teddy/teddy_aug_8/teddy_aug_8.usda 13 | teddy/teddy_aug_7/teddy_aug_7.usda 14 | dog/dog_aug_7/dog_aug_7.usda 15 | dog/dog_aug_0/dog_aug_0.usda 16 | dog/dog_aug_8/dog_aug_8.usda 17 | dog/dog_aug_4/dog_aug_4.usda 18 | dog/dog_scaled/dog_scaled.usda 19 | dog/dog_aug_11/dog_aug_11.usda 20 | dog/dog_aug_1/dog_aug_1.usda 21 | dog/dog_aug_6/dog_aug_6.usda 22 | dog/dog_aug_9/dog_aug_9.usda 23 | dog/dog_aug_3/dog_aug_3.usda 24 | dog/dog_aug_2/dog_aug_2.usda 25 | dog/dog_aug_5/dog_aug_5.usda 26 | dog/dog_aug_10/dog_aug_10.usda 27 | snake/snake_aug_1/snake_aug_1.usda 28 | snake/snake_scaled/snake_scaled.usda 29 | snake/snake_aug_5/snake_aug_5.usda 30 | snake/snake_aug_4/snake_aug_4.usda 31 | snake/snake_aug_0/snake_aug_0.usda 32 | snake/snake_aug_9/snake_aug_9.usda 33 | snake/snake_aug_10/snake_aug_10.usda 34 | snake/snake_aug_7/snake_aug_7.usda 35 | snake/snake_aug_6/snake_aug_6.usda 36 | snake/snake_aug_11/snake_aug_11.usda 37 | snake/snake_aug_2/snake_aug_2.usda 38 | snake/snake_aug_8/snake_aug_8.usda 39 | snake/snake_aug_3/snake_aug_3.usda 40 | octopus/octopus_scaled/octopus_scaled.usda 41 | octopus/octopus_aug_0/octopus_aug_0.usda 42 | octopus/octopus_aug_11/octopus_aug_11.usda 43 | octopus/octopus_aug_3/octopus_aug_3.usda 44 | octopus/octopus_aug_1/octopus_aug_1.usda 45 | octopus/octopus_aug_6/octopus_aug_6.usda 46 | octopus/octopus_aug_7/octopus_aug_7.usda 47 | octopus/octopus_aug_2/octopus_aug_2.usda 48 | octopus/octopus_aug_10/octopus_aug_10.usda 49 | octopus/octopus_aug_9/octopus_aug_9.usda 50 | octopus/octopus_aug_5/octopus_aug_5.usda 51 | octopus/octopus_aug_8/octopus_aug_8.usda 52 | octopus/octopus_aug_4/octopus_aug_4.usda 53 | rabbit/rabbit_aug_6/rabbit_aug_6.usda 54 | rabbit/rabbit_aug_8/rabbit_aug_8.usda 55 | rabbit/rabbit_aug_7/rabbit_aug_7.usda 56 | rabbit/rabbit_aug_1/rabbit_aug_1.usda 57 | rabbit/rabbit_aug_4/rabbit_aug_4.usda 58 | rabbit/rabbit_aug_10/rabbit_aug_10.usda 59 | rabbit/rabbit_aug_2/rabbit_aug_2.usda 60 | rabbit/rabbit_aug_11/rabbit_aug_11.usda 61 | rabbit/rabbit_aug_9/rabbit_aug_9.usda 62 | rabbit/rabbit_aug_3/rabbit_aug_3.usda 63 | rabbit/rabbit_aug_0/rabbit_aug_0.usda 64 | rabbit/rabbit_aug_5/rabbit_aug_5.usda 65 | rabbit/rabbit_scaled/rabbit_scaled.usda 66 | elephant/elephant_aug_3/elephant_aug_3.usda 67 | elephant/elephant_aug_11/elephant_aug_11.usda 68 | elephant/elephant_scaled/elephant_scaled.usda 69 | elephant/elephant_aug_1/elephant_aug_1.usda 70 | elephant/elephant_aug_10/elephant_aug_10.usda 71 | elephant/elephant_aug_5/elephant_aug_5.usda 72 | elephant/elephant_aug_4/elephant_aug_4.usda 73 | elephant/elephant_aug_7/elephant_aug_7.usda 74 | elephant/elephant_aug_9/elephant_aug_9.usda 75 | elephant/elephant_aug_8/elephant_aug_8.usda 76 | elephant/elephant_aug_0/elephant_aug_0.usda 77 | elephant/elephant_aug_6/elephant_aug_6.usda 78 | elephant/elephant_aug_2/elephant_aug_2.usda 79 | -------------------------------------------------------------------------------- /ACID/src/checkpoints.py: -------------------------------------------------------------------------------- 1 | import os 2 | import urllib 3 | import torch 4 | from torch.utils import model_zoo 5 | 6 | 7 | class CheckpointIO(object): 8 | ''' CheckpointIO class. 9 | 10 | It handles saving and loading checkpoints. 11 | 12 | Args: 13 | checkpoint_dir (str): path where checkpoints are saved 14 | ''' 15 | def __init__(self, checkpoint_dir='./chkpts', **kwargs): 16 | self.module_dict = kwargs 17 | self.checkpoint_dir = checkpoint_dir 18 | if not os.path.exists(checkpoint_dir): 19 | os.makedirs(checkpoint_dir) 20 | 21 | def register_modules(self, **kwargs): 22 | ''' Registers modules in current module dictionary. 23 | ''' 24 | self.module_dict.update(kwargs) 25 | 26 | def save(self, filename, **kwargs): 27 | ''' Saves the current module dictionary. 28 | 29 | Args: 30 | filename (str): name of output file 31 | ''' 32 | if not os.path.isabs(filename): 33 | filename = os.path.join(self.checkpoint_dir, filename) 34 | 35 | outdict = kwargs 36 | for k, v in self.module_dict.items(): 37 | outdict[k] = v.state_dict() 38 | torch.save(outdict, filename) 39 | 40 | def load(self, filename): 41 | '''Loads a module dictionary from local file or url. 42 | 43 | Args: 44 | filename (str): name of saved module dictionary 45 | ''' 46 | if is_url(filename): 47 | return self.load_url(filename) 48 | else: 49 | return self.load_file(filename) 50 | 51 | def load_file(self, filename): 52 | '''Loads a module dictionary from file. 53 | 54 | Args: 55 | filename (str): name of saved module dictionary 56 | ''' 57 | 58 | if not os.path.isabs(filename): 59 | filename = os.path.join(self.checkpoint_dir, filename) 60 | 61 | if os.path.exists(filename): 62 | print(filename) 63 | print('=> Loading checkpoint from local file...') 64 | state_dict = torch.load(filename) 65 | scalars = self.parse_state_dict(state_dict) 66 | return scalars 67 | else: 68 | raise FileExistsError 69 | 70 | def load_url(self, url): 71 | '''Load a module dictionary from url. 72 | 73 | Args: 74 | url (str): url to saved model 75 | ''' 76 | print(url) 77 | print('=> Loading checkpoint from url...') 78 | state_dict = model_zoo.load_url(url, progress=True) 79 | scalars = self.parse_state_dict(state_dict) 80 | return scalars 81 | 82 | def parse_state_dict(self, state_dict): 83 | '''Parse state_dict of model and return scalars. 84 | 85 | Args: 86 | state_dict (dict): State dict of model 87 | ''' 88 | 89 | for k, v in self.module_dict.items(): 90 | if k in state_dict: 91 | v.load_state_dict(state_dict[k]) 92 | else: 93 | print('Warning: Could not find %s in checkpoint!' % k) 94 | scalars = {k: v for k, v in state_dict.items() 95 | if k not in self.module_dict} 96 | return scalars 97 | 98 | def is_url(url): 99 | scheme = urllib.parse.urlparse(url).scheme 100 | return scheme in ('http', 'https') -------------------------------------------------------------------------------- /PlushSim/README.md: -------------------------------------------------------------------------------- 1 | [![NVIDIA Source Code License](https://img.shields.io/badge/license-NSCL-blue.svg)](https://github.com/NVlabs/ACID/blob/master/LICENSE) 2 | ![Python 3.7](https://img.shields.io/badge/python-3.7-green.svg) 3 | 4 | # PlushSim 5 | 6 |
7 | 8 |
9 | 10 | Our PlushSim simulation environment is based on [Omniverse Kit](https://docs.omniverse.nvidia.com/prod_kit/prod_kit.html). This codebase contains the docker image and the code to simulate and manipulate deformable objects. 11 | 12 | ## Prerequisites 13 | Omniverse Kit has a set of hardware requirements. Specifically, it requires a RTX gpu (e.g. RTX 2080, RTX 30x0, Titan RTX etc.). Also, a 16GB+ memory is recommended. 14 | 15 | The codebase is tested on Linux Ubuntu 20.04. 16 | 17 | ## Getting the Docker Image 18 | First, you need to install [Docker](https://docs.docker.com/engine/install/ubuntu/) and [NVIDIA Container Toolkit](https://github.com/NVIDIA/nvidia-docker) before proceeding. 19 | 20 | After you have installed Docker and NVIDIA container toolkit, you can obtain the PlushSim Docker image from DockerHub, with command: 21 | ``` 22 | docker pull b0ku1/acid-docker:cleaned 23 | ``` 24 | 25 | ## Preparing Simulation Assets 26 | You can download the simulation assets `raw_assets.zip` at: [Google Drive](https://drive.google.com/file/d/1OO8Wi0PHF3ROmW8088JNOMJn4EcDLDPB/view?usp=sharing). 27 | 28 | After you download it, unzip the assets within this directory. You should have a folder structure like: 29 | ``` 30 | PlushSim/ 31 | assets/ 32 | animals/ 33 | ... 34 | attic_clean/ 35 | ... 36 | ``` 37 | 38 | ## Generating Manipulation Trajectories 39 | Generating manipulation data consists of two steps: 40 | 1. Start Docker image, and mount the correct directory. 41 | 2. Run script 42 | 43 | To start the docker image with an interactive session, run the following command inside `PlushSim/`: 44 | ``` 45 | export PLUSHSIM_ROOT=$(pwd) 46 | docker run -it -v $PLUSHSIM_ROOT:/result --gpus all b0ku1/acid-docker:cleaned bash 47 | ``` 48 | 49 | Aftery entering the interactive session, you can run the following commands to start generating manipulation trajectories: 50 | ``` 51 | ./python.sh /result/scripts/data_gen_attic.py 52 | ``` 53 | 54 | The above scripts will generate sample interaction sequences in `PlushSim/interaction_sequence`. There are various command line arguments that you can give to `data_gen_attic.py`. Please see documentation of the python script. 55 | 56 | ## Visualizing the assets in GUI 57 | To visualize the assets in Omniverse GUI, you need to download and install [Omniverse](https://docs.omniverse.nvidia.com/prod_install-guide/prod_install-guide.html). The link contains NVIDIA's official instruction for installation. 58 | 59 | After you install Omniverse, you can open the `.usda` files in the assets folder. To run PlushSim's scripts outside of Docker (e.g. with your native Omniverse installation), you can find more information at [Omniverse Kit's Python Manual](https://docs.omniverse.nvidia.com/py/kit/index.html). For questions regarding Omniverse usage, please visit [NVIDIA developer forum](https://forums.developer.nvidia.com/c/omniverse/300). 60 | 61 | 62 | ## License 63 | Please check the [LICENSE](../LICENSE) file. ACID may be used non-commercially, meaning for research or evaluation purposes only. For business inquiries, please contact researchinquiries@nvidia.com. 64 | 65 | If you find our code or paper useful, please consider citing 66 | ```bibtex 67 | @article{shen2022acid, 68 | title={ACID: Action-Conditional Implicit Visual Dynamics for Deformable Object Manipulation}, 69 | author={Shen, Bokui and Jiang, Zhenyu and Choy, Christopher and J. Guibas, Leonidas and Savarese, Silvio and Anandkumar, Anima and Zhu, Yuke}, 70 | journal={Robotics: Science and Systems (RSS)}, 71 | year={2022} 72 | } 73 | ``` -------------------------------------------------------------------------------- /ACID/README.md: -------------------------------------------------------------------------------- 1 | [![NVIDIA Source Code License](https://img.shields.io/badge/license-NSCL-blue.svg)](https://github.com/NVlabs/ACID/blob/master/LICENSE) 2 | ![Python 3.7](https://img.shields.io/badge/python-3.7-green.svg) 3 | 4 | # ACID model 5 | 6 |
7 | 8 |
9 | 10 | ## Prerequisites 11 | We use anaconda to manage necessary packages. You can create an anaconda environment called `acid_train` using 12 | ```bash 13 | conda env create -f environment.yaml 14 | conda activate acid_train 15 | pip install torch-scatter==2.0.4 -f https://pytorch-geometric.com/whl/torch-1.4.0+cu101.html 16 | ``` 17 | 18 | Next, we need to compile extension modules used for mesh utilies, which are from [Convolutional Occupancy Network](https://github.com/autonomousvision/convolutional_occupancy_networks). 19 | You can do this via 20 | ``` 21 | python setup.py build_ext --inplace 22 | ``` 23 | 24 | ## Get Raw Manipulation Data 25 | You can obtain our pre-generated manipulation trajectories from [PlushSim](../PlushSim/) from this [Google Drive](https://drive.google.com/drive/folders/1wOIk58e3wCfgOeYFBC1caYP2KAoFijbW?usp=sharing) directory. The manipulation trajectories are broken down to 10GB chunks. We recommend using [`gdown`](https://github.com/wkentaro/gdown) for downloading. 26 | 27 | After downloading, please run the following commands to decompress the data: 28 | ``` 29 | cat data_plush.zip.part-* > data_plush.zip 30 | unzip data_plush.zip 31 | ``` 32 | 33 | You should have the following folder structure: 34 | ``` 35 | ACID/ 36 | data_plush/ 37 | metadata/ 38 | split1/ 39 | ... 40 | split2/ 41 | ... 42 | split3/ 43 | ... 44 | split1/ 45 | ... 46 | split2/ 47 | ... 48 | split3/ 49 | ... 50 | ``` 51 | 52 | ### Generating Training Data 53 | To generate input-output pairs for ACID training, you need to run the following scripts to generate the data: 54 | ``` 55 | cd preprocess 56 | python gen_data_flow_plush.py 57 | python gen_data_flow_splits.py 58 | python gen_data_contrastive_pairs_flow.py 59 | ``` 60 | 61 | This should create `train_data` directory inside this folder, with the following structure: 62 | ``` 63 | ACID/ 64 | train_data/ 65 | flow/ 66 | split1/ 67 | split2/ 68 | split3/ 69 | train.pkl 70 | test.pkl 71 | pair/ 72 | split1/ 73 | split2/ 74 | split3/ 75 | ``` 76 | 77 | If you wish to generate the data at another location, you can pass in different flags. Check out each preprocess script for details. 78 | 79 | ## Training 80 | Finally, to train the ACID model from scratch, run: 81 | ``` 82 | python plush_train.py configs/plush_dyn_geodesics.yaml 83 | ``` 84 | For available training options, please take a look at `configs/default.yaml` and `configs/plush_dyn_geodesics.yaml`. 85 | 86 | ### Pretrained Weights 87 | You can download pretrained weights on [Google Drive](https://drive.google.com/file/d/15ClJpMx8LlgPHXp1EeCP3Z4kD5h5bDKl/view?usp=sharing), please save `model_best.pt` to `result/geodesics/`. 88 | 89 | ## License 90 | Please check the [LICENSE](../LICENSE) file. ACID may be used non-commercially, meaning for research or evaluation purposes only. For business inquiries, please contact researchinquiries@nvidia.com. 91 | 92 | If you find our code or paper useful, please consider citing 93 | ```bibtex 94 | @article{shen2022acid, 95 | title={ACID: Action-Conditional Implicit Visual Dynamics for Deformable Object Manipulation}, 96 | author={Shen, Bokui and Jiang, Zhenyu and Choy, Christopher and J. Guibas, Leonidas and Savarese, Silvio and Anandkumar, Anima and Zhu, Yuke}, 97 | journal={Robotics: Science and Systems (RSS)}, 98 | year={2022} 99 | } 100 | ``` -------------------------------------------------------------------------------- /ACID/src/utils/io.py: -------------------------------------------------------------------------------- 1 | import os 2 | from plyfile import PlyElement, PlyData 3 | import numpy as np 4 | 5 | 6 | def export_pointcloud(vertices, out_file, as_text=True): 7 | assert(vertices.shape[1] == 3) 8 | vertices = vertices.astype(np.float32) 9 | vertices = np.ascontiguousarray(vertices) 10 | vector_dtype = [('x', 'f4'), ('y', 'f4'), ('z', 'f4')] 11 | vertices = vertices.view(dtype=vector_dtype).flatten() 12 | plyel = PlyElement.describe(vertices, 'vertex') 13 | plydata = PlyData([plyel], text=as_text) 14 | plydata.write(out_file) 15 | 16 | 17 | def load_pointcloud(in_file): 18 | plydata = PlyData.read(in_file) 19 | vertices = np.stack([ 20 | plydata['vertex']['x'], 21 | plydata['vertex']['y'], 22 | plydata['vertex']['z'] 23 | ], axis=1) 24 | return vertices 25 | 26 | 27 | def read_off(file): 28 | """ 29 | Reads vertices and faces from an off file. 30 | 31 | :param file: path to file to read 32 | :type file: str 33 | :return: vertices and faces as lists of tuples 34 | :rtype: [(float)], [(int)] 35 | """ 36 | 37 | assert os.path.exists(file), 'file %s not found' % file 38 | 39 | with open(file, 'r') as fp: 40 | lines = fp.readlines() 41 | lines = [line.strip() for line in lines] 42 | 43 | # Fix for ModelNet bug were 'OFF' and the number of vertices and faces 44 | # are all in the first line. 45 | if len(lines[0]) > 3: 46 | assert lines[0][:3] == 'OFF' or lines[0][:3] == 'off', \ 47 | 'invalid OFF file %s' % file 48 | 49 | parts = lines[0][3:].split(' ') 50 | assert len(parts) == 3 51 | 52 | num_vertices = int(parts[0]) 53 | assert num_vertices > 0 54 | 55 | num_faces = int(parts[1]) 56 | assert num_faces > 0 57 | 58 | start_index = 1 59 | # This is the regular case! 60 | else: 61 | assert lines[0] == 'OFF' or lines[0] == 'off', \ 62 | 'invalid OFF file %s' % file 63 | 64 | parts = lines[1].split(' ') 65 | assert len(parts) == 3 66 | 67 | num_vertices = int(parts[0]) 68 | assert num_vertices > 0 69 | 70 | num_faces = int(parts[1]) 71 | assert num_faces > 0 72 | 73 | start_index = 2 74 | 75 | vertices = [] 76 | for i in range(num_vertices): 77 | vertex = lines[start_index + i].split(' ') 78 | vertex = [float(point.strip()) for point in vertex if point != ''] 79 | assert len(vertex) == 3 80 | 81 | vertices.append(vertex) 82 | 83 | faces = [] 84 | for i in range(num_faces): 85 | face = lines[start_index + num_vertices + i].split(' ') 86 | face = [index.strip() for index in face if index != ''] 87 | 88 | # check to be sure 89 | for index in face: 90 | assert index != '', \ 91 | 'found empty vertex index: %s (%s)' \ 92 | % (lines[start_index + num_vertices + i], file) 93 | 94 | face = [int(index) for index in face] 95 | 96 | assert face[0] == len(face) - 1, \ 97 | 'face should have %d vertices but as %d (%s)' \ 98 | % (face[0], len(face) - 1, file) 99 | assert face[0] == 3, \ 100 | 'only triangular meshes supported (%s)' % file 101 | for index in face: 102 | assert index >= 0 and index < num_vertices, \ 103 | 'vertex %d (of %d vertices) does not exist (%s)' \ 104 | % (index, num_vertices, file) 105 | 106 | assert len(face) > 1 107 | 108 | faces.append(face) 109 | 110 | return vertices, faces 111 | 112 | assert False, 'could not open %s' % file 113 | -------------------------------------------------------------------------------- /ACID/preprocess/gen_data_contrastive_pairs_flow.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import glob 4 | import tqdm 5 | import random 6 | import argparse 7 | import numpy as np 8 | import os.path as osp 9 | import time 10 | from multiprocessing import Pool 11 | ACID_dir = osp.dirname(osp.dirname(osp.realpath(__file__))) 12 | sys.path.insert(0,ACID_dir) 13 | 14 | parser = argparse.ArgumentParser("Training Contrastive Pair Data Generation") 15 | 16 | data_plush_default = osp.join(ACID_dir, "data_plush") 17 | meta_default = osp.join(ACID_dir, "data_plush", "metadata") 18 | flow_default = osp.join(ACID_dir, "train_data", "flow") 19 | pair_default = osp.join(ACID_dir, "train_data", "pair") 20 | parser.add_argument("--data_root", type=str, default=data_plush_default) 21 | parser.add_argument("--meta_root", type=str, default=meta_default) 22 | parser.add_argument("--flow_root", type=str, default=flow_default) 23 | parser.add_argument("--save_root", type=str, default=pair_default) 24 | args = parser.parse_args() 25 | 26 | data_root = args.data_root 27 | flow_root = args.flow_root 28 | save_root = args.save_root 29 | meta_root = args.meta_root 30 | os.makedirs(save_root, exist_ok=True) 31 | 32 | 33 | def using_complex(a): 34 | weight = 1j*np.linspace(0, a.shape[1], a.shape[0], endpoint=False) 35 | b = a + weight[:, np.newaxis] 36 | u, ind = np.unique(b, return_index=True) 37 | b = np.zeros_like(a) + 256 38 | np.put(b, ind, a.flat[ind]) 39 | return b 40 | 41 | def process(pair, num_samples=320, keep=80): 42 | split_id, model_name, f,p = pair 43 | src_file = np.load(f"{flow_root}/{split_id}/{model_name}/{f}") 44 | tgt_file = np.load(f"{flow_root}/{split_id}/{model_name}/{p}") 45 | 46 | src_inds = src_file['ind'] 47 | tgt_inds = tgt_file['ind'] 48 | src_inds = np.tile(src_inds, (num_samples,1)).T 49 | tgt_samples = np.random.randint(0, high=len(tgt_inds) - 1, size=(len(src_inds), num_samples)) 50 | tgt_samples_inds = tgt_inds[tgt_samples] 51 | 52 | dists = dist_matrix[src_inds.reshape(-1), tgt_samples_inds.reshape(-1)].reshape(*src_inds.shape) 53 | dists_unique = using_complex(dists) 54 | idx = np.argsort(dists_unique, axis=-1) 55 | dists_sorted = np.take_along_axis(dists, idx, axis=-1).astype(np.uint8)[:,:keep] 56 | 57 | tgt_samples_sorted = np.take_along_axis(tgt_samples, idx, axis=-1)[:,:keep] 58 | 59 | if tgt_samples_sorted.max() <= np.iinfo(np.uint16).max: 60 | tgt_samples_sorted = tgt_samples_sorted.astype(np.uint16) 61 | else: 62 | tgt_samples_sorted = tgt_samples_sorted.astype(np.uint32) 63 | 64 | results = {"target_file":p, "dists":dists_sorted, "inds":tgt_samples_sorted} 65 | np.savez_compressed(os.path.join(save_dir, f"pair_{f}"), **results) 66 | 67 | def export_pair_data(data_id): 68 | split_id, model_name = data_id 69 | all_files = all_geoms[data_id] 70 | print(split_id, model_name) 71 | global dist_matrix 72 | dist_matrix = np.load(f'{meta_root}/{split_id}/{model_name}_dist.npz')['arr_0'] 73 | global save_dir 74 | save_dir = os.path.join(save_root, split_id, model_name) 75 | os.makedirs(save_dir, exist_ok=True) 76 | pairs = [ (split_id, model_name, f,random.choice(all_files)) for f in all_files ] 77 | 78 | start_time = time.time() 79 | with Pool(10) as p: 80 | for _ in tqdm.tqdm(p.imap_unordered(process, pairs), total=len(all_files)): 81 | pass 82 | 83 | end_time = time.time() 84 | from datetime import timedelta 85 | time_str = str(timedelta(seconds=end_time - start_time)) 86 | print(f'Total processing takes: {time_str}') 87 | 88 | if __name__ == '__main__': 89 | from collections import defaultdict 90 | global all_geoms 91 | all_geoms = defaultdict(lambda: []) 92 | 93 | for g in glob.glob(f"{flow_root}/*/*/*"): 94 | split_id, model_name, file_name = g.split('/')[-3:] 95 | all_geoms[(split_id, model_name)].append(file_name) 96 | 97 | for k in all_geoms.keys(): 98 | export_pair_data(k) 99 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | NVIDIA Source Code License for ACID 2 | 3 | 1. Definitions 4 | 5 | “Licensor” means any person or entity that distributes its Work. 6 | 7 | “Software” means the original work of authorship made available under this License. 8 | 9 | “Work” means the Software and any additions to or derivative works of the Software that are made available under 10 | this License. 11 | 12 | The terms “reproduce,” “reproduction,” “derivative works,” and “distribution” have the meaning as provided under 13 | U.S. copyright law; provided, however, that for the purposes of this License, derivative works shall not include 14 | works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work. 15 | 16 | Works, including the Software, are “made available” under this License by including in or with the Work either 17 | (a) a copyright notice referencing the applicability of this License to the Work, or (b) a copy of this License. 18 | 19 | 2. License Grant 20 | 21 | 2.1 Copyright Grant. Subject to the terms and conditions of this License, each Licensor grants to you a perpetual, 22 | worldwide, non-exclusive, royalty-free, copyright license to reproduce, prepare derivative works of, publicly 23 | display, publicly perform, sublicense and distribute its Work and any resulting derivative works in any form. 24 | 25 | 3. Limitations 26 | 27 | 3.1 Redistribution. You may reproduce or distribute the Work only if (a) you do so under this License, (b) you 28 | include a complete copy of this License with your distribution, and (c) you retain without modification any 29 | copyright, patent, trademark, or attribution notices that are present in the Work. 30 | 31 | 3.2 Derivative Works. You may specify that additional or different terms apply to the use, reproduction, and 32 | distribution of your derivative works of the Work (“Your Terms”) only if (a) Your Terms provide that the use 33 | limitation in Section 3.3 applies to your derivative works, and (b) you identify the specific derivative works 34 | that are subject to Your Terms. Notwithstanding Your Terms, this License (including the redistribution 35 | requirements in Section 3.1) will continue to apply to the Work itself. 36 | 37 | 3.3 Use Limitation. The Work and any derivative works thereof only may be used or intended for use 38 | non-commercially. Notwithstanding the foregoing, NVIDIA and its affiliates may use the Work and any derivative 39 | works commercially. As used herein, “non-commercially” means for research or evaluation purposes only. 40 | 41 | 3.4 Patent Claims. If you bring or threaten to bring a patent claim against any Licensor (including any claim, 42 | cross-claim or counterclaim in a lawsuit) to enforce any patents that you allege are infringed by any Work, then 43 | your rights under this License from such Licensor (including the grant in Section 2.1) will terminate immediately. 44 | 45 | 3.5 Trademarks. This License does not grant any rights to use any Licensor’s or its affiliates’ names, logos, 46 | or trademarks, except as necessary to reproduce the notices described in this License. 47 | 48 | 3.6 Termination. If you violate any term of this License, then your rights under this License (including the 49 | grant in Section 2.1) will terminate immediately. 50 | 51 | 4. Disclaimer of Warranty. 52 | 53 | THE WORK IS PROVIDED “AS IS” WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING 54 | WARRANTIES OR CONDITIONS OF M ERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, TITLE OR NON-INFRINGEMENT. YOU 55 | BEAR THE RISK OF UNDERTAKING ANY ACTIVITIES UNDER THIS LICENSE. 56 | 57 | 5. Limitation of Liability. 58 | 59 | EXCEPT AS PROHIBITED BY APPLICABLE LAW, IN NO EVENT AND UNDER NO LEGAL THEORY, WHETHER IN TORT (INCLUDING 60 | NEGLIGENCE), CONTRACT, OR OTHERWISE SHALL ANY LICENSOR BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY DIRECT, 61 | INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT OF OR RELATED TO THIS LICENSE, THE USE OR 62 | INABILITY TO USE THE WORK (INCLUDING BUT NOT LIMITED TO LOSS OF GOODWILL, BUSINESS INTERRUPTION, LOST PROFITS OR 63 | DATA, COMPUTER FAILURE OR MALFUNCTION, OR ANY OTHER COMM ERCIAL DAMAGES OR LOSSES), EVEN IF THE LICENSOR HAS BEEN 64 | ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. -------------------------------------------------------------------------------- /ACID/src/data/transforms.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | # Transforms 5 | class PointcloudNoise(object): 6 | ''' Point cloud noise transformation class. 7 | 8 | It adds noise to point cloud data. 9 | 10 | Args: 11 | stddev (int): standard deviation 12 | ''' 13 | 14 | def __init__(self, stddev): 15 | self.stddev = stddev 16 | 17 | def __call__(self, data): 18 | ''' Calls the transformation. 19 | 20 | Args: 21 | data (dictionary): data dictionary 22 | ''' 23 | data_out = data.copy() 24 | points = data[None] 25 | noise = self.stddev * np.random.randn(*points.shape) 26 | noise = noise.astype(np.float32) 27 | data_out[None] = points + noise 28 | return data_out 29 | 30 | 31 | class SubsamplePointcloud(object): 32 | ''' Point cloud subsampling transformation class. 33 | 34 | It subsamples the point cloud data. 35 | 36 | Args: 37 | N (int): number of points to be subsampled 38 | ''' 39 | def __init__(self, N): 40 | self.N = N 41 | 42 | def __call__(self, data): 43 | ''' Calls the transformation. 44 | 45 | Args: 46 | data (dict): data dictionary 47 | ''' 48 | indices = np.random.randint(data.shape[0], size=self.N) 49 | 50 | return data[indices] 51 | 52 | 53 | class SubsamplePoints(object): 54 | ''' Points subsampling transformation class. 55 | 56 | It subsamples the points data. 57 | 58 | Args: 59 | N (int): number of points to be subsampled 60 | ''' 61 | def __init__(self, N): 62 | self.N = N 63 | 64 | def __call__(self, data): 65 | ''' Calls the transformation. 66 | 67 | Args: 68 | data (dictionary): data dictionary 69 | ''' 70 | points = data[None] 71 | occ = data['occ'] 72 | ind = data['ind'] 73 | flow1 = data['flow1'] 74 | flow2 = data['flow2'] 75 | 76 | data_out = data.copy() 77 | if isinstance(self.N, int): 78 | idx = np.random.randint(points.shape[0], size=self.N) 79 | data_out.update({ 80 | None: points[idx, :], 81 | 'occ': occ[idx], 82 | 'ind': ind[idx], 83 | 'flow1': flow1[idx], 84 | 'flow2': flow2[idx], 85 | }) 86 | else: 87 | Nt_out, Nt_in = self.N 88 | occ_binary = (occ >= 0.5) 89 | points0 = points[~occ_binary] 90 | points1 = points[occ_binary] 91 | 92 | ind0 = ind[~occ_binary] 93 | ind1 = ind[occ_binary] 94 | 95 | flow10 = flow1[~occ_binary] 96 | flow11 = flow1[occ_binary] 97 | flow20 = flow2[~occ_binary] 98 | flow21 = flow2[occ_binary] 99 | 100 | idx0 = np.random.randint(points0.shape[0], size=Nt_out) 101 | idx1 = np.random.randint(points1.shape[0], size=Nt_in) 102 | 103 | points0 = points0[idx0, :] 104 | points1 = points1[idx1, :] 105 | points = np.concatenate([points0, points1], axis=0) 106 | 107 | ind0 = ind0[idx0] 108 | ind1 = ind1[idx1] 109 | ind = np.concatenate([ind0, ind1], axis=0) 110 | 111 | flow10 = flow10[idx0] 112 | flow11 = flow11[idx1] 113 | flow1 = np.concatenate([flow10, flow11], axis=0) 114 | flow20 = flow20[idx0] 115 | flow21 = flow21[idx1] 116 | flow2 = np.concatenate([flow20, flow21], axis=0) 117 | 118 | occ0 = np.zeros(Nt_out, dtype=np.float32) 119 | occ1 = np.ones(Nt_in, dtype=np.float32) 120 | occ = np.concatenate([occ0, occ1], axis=0) 121 | 122 | volume = occ_binary.sum() / len(occ_binary) 123 | volume = volume.astype(np.float32) 124 | 125 | data_out.update({ 126 | None: points, 127 | 'occ': occ, 128 | 'volume': volume, 129 | 'ind': ind, 130 | 'flow1': flow1, 131 | 'flow2': flow2, 132 | }) 133 | return data_out 134 | -------------------------------------------------------------------------------- /ACID/preprocess/gen_data_flow_plush.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import os 3 | import time, datetime 4 | import sys 5 | import os.path as osp 6 | ACID_dir = osp.dirname(osp.dirname(osp.realpath(__file__))) 7 | sys.path.insert(0,ACID_dir) 8 | 9 | import json 10 | 11 | from src.utils import plushsim_util 12 | from src.utils import common_util 13 | import glob 14 | import tqdm 15 | from multiprocessing import Pool 16 | import argparse 17 | 18 | parser = argparse.ArgumentParser("Training Flow Data Generation") 19 | data_plush_default = osp.join(ACID_dir, "data_plush") 20 | flow_default = osp.join(ACID_dir, "train_data", "flow") 21 | parser.add_argument("--data_root", type=str, default=data_plush_default) 22 | parser.add_argument("--save_root", type=str, default=flow_default) 23 | args = parser.parse_args() 24 | 25 | data_root = args.data_root 26 | save_root = args.save_root 27 | 28 | scene_range = plushsim_util.SCENE_RANGE.copy() 29 | to_range = np.array([[-1.1,-1.1,-1.1],[1.1,1.1,1.1]]) * 0.5 30 | class_to_std = { 31 | 'teddy':0.12, 32 | 'elephant':0.15, 33 | 'octopus':0.12, 34 | 'rabbit':0.08, 35 | 'dog':0.08, 36 | 'snake':0.04, 37 | } 38 | def export_train_data(data_id): 39 | # try: 40 | # load action info 41 | split_id, model_category, model_name, reset_id, interaction_id = data_id 42 | grasp_loc, target_loc, f1, _, f2 = plushsim_util.get_action_info(model_category, model_name, split_id, reset_id, interaction_id, data_root) 43 | # get observations 44 | obj_pts1, env_pts1 = plushsim_util.get_scene_partial_pointcloud( 45 | model_category, model_name, split_id, reset_id, f1, data_root) 46 | obj_pts1=common_util.subsample_points( 47 | common_util.transform_points(obj_pts1, scene_range, to_range), resolution=0.005, return_index=False) 48 | env_pts1=common_util.subsample_points( 49 | common_util.transform_points(env_pts1, scene_range, to_range), resolution=0.020, return_index=False) 50 | # calculate flow 51 | sim_pts1, _, loc,_,_= plushsim_util.get_object_full_points( 52 | model_category, model_name, split_id, reset_id, f1, data_root) 53 | sim_pts2, _,_,_,_= plushsim_util.get_object_full_points( 54 | model_category, model_name, split_id, reset_id, f2, data_root) 55 | sim_pts1=common_util.transform_points(sim_pts1, scene_range, to_range) 56 | sim_pts2=common_util.transform_points(sim_pts2, scene_range, to_range) 57 | sim_pts_flow = sim_pts2 - sim_pts1 58 | 59 | # sample occupancy 60 | center =common_util.transform_points(loc, scene_range, to_range)[0] 61 | pts, occ, pt_class = plushsim_util.sample_occupancies(sim_pts1, center, 62 | std=class_to_std[model_category],sample_scheme='object') 63 | # get implicit flows 64 | flow = sim_pts_flow[pt_class] 65 | # save 66 | kwargs = {'sim_pts':sim_pts1.astype(np.float16), 67 | 'obj_pcloud_obs':obj_pts1.astype(np.float16), 68 | 'env_pcloud':env_pts1.astype(np.float16), 69 | 'pts':pts.astype(np.float16), 70 | 'occ':np.packbits(occ), 71 | 'ind':pt_class.astype(np.uint16), 72 | 'flow':flow.astype(np.float16), 73 | 'start_frame':f1, 74 | 'end_frame':f2, 75 | 'grasp_loc':grasp_loc, 76 | 'target_loc': target_loc} 77 | model_dir = os.path.join(save_root, f"{split_id}", f"{model_name}") 78 | save_path = os.path.join(model_dir, f"{reset_id:03d}_{interaction_id:03d}.npz") 79 | np.savez_compressed(save_path, **kwargs) 80 | 81 | def get_all_data_points_flow(data_root): 82 | good_interactions = glob.glob(f"{data_root}/*/*/*/info/good_interactions.json") 83 | good_ints = [] 84 | for g in tqdm.tqdm(good_interactions): 85 | split_id, model_category, model_name = g.split('/')[-5:-2] 86 | model_dir = os.path.join(save_root, f"{split_id}", f"{model_name}") 87 | os.makedirs(model_dir, exist_ok=True) 88 | model_dir = plushsim_util.get_model_dir(data_root, split_id, model_category, model_name) 89 | with open(g, 'r') as fp: 90 | good_ones = json.load(fp) 91 | for k,v in good_ones.items(): 92 | reset_id = int(k) 93 | for int_id in v: 94 | good_ints.append((split_id, model_category, model_name, reset_id, int_id)) 95 | return good_ints 96 | 97 | good_ints = get_all_data_points_flow(data_root)#[:100] 98 | 99 | start_time = time.time() 100 | with Pool(40) as p: 101 | for _ in tqdm.tqdm(p.imap_unordered(export_train_data, good_ints), total=len(good_ints)): 102 | pass 103 | 104 | end_time = time.time() 105 | from datetime import timedelta 106 | time_str = str(timedelta(seconds=end_time - start_time)) 107 | print(f'Total processing takes: {time_str}') -------------------------------------------------------------------------------- /ACID/src/encoder/pointnet.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | from src.layers import ResnetBlockFC 5 | from torch_scatter import scatter_mean, scatter_max 6 | from src.common import coordinate2index, normalize_coordinate 7 | from src.encoder.unet import UNet 8 | 9 | class GeomEncoder(nn.Module): 10 | ''' PointNet-based encoder network with ResNet blocks for each point. 11 | Number of input points are fixed. 12 | 13 | Args: 14 | c_dim (int): dimension of latent code c 15 | dim (int): input points dimension 16 | hidden_dim (int): hidden dimension of the network 17 | scatter_type (str): feature aggregation when doing local pooling 18 | unet (bool): weather to use U-Net 19 | unet_kwargs (str): U-Net parameters 20 | unet3d (bool): weather to use 3D U-Net 21 | unet3d_kwargs (str): 3D U-Net parameters 22 | plane_resolution (int): defined resolution for plane feature 23 | grid_resolution (int): defined resolution for grid feature 24 | plane_type (str): feature type, 'xz' - 1-plane, ['xz', 'xy', 'yz'] - 3-plane, ['grid'] - 3D grid volume 25 | padding (float): conventional padding paramter of ONet for unit cube, so [-0.5, 0.5] -> [-0.55, 0.55] 26 | n_blocks (int): number of blocks ResNetBlockFC layers 27 | ''' 28 | 29 | def __init__(self, c_dim=128, dim=3, f_dim=9, hidden_dim=128, scatter_type='max', 30 | unet_kwargs=None, plane_resolution=None, padding=0.1, n_blocks=5): 31 | super().__init__() 32 | self.c_dim = c_dim 33 | 34 | self.fc_pos = nn.Linear(dim+f_dim, 2*hidden_dim) 35 | self.blocks = nn.ModuleList([ 36 | ResnetBlockFC(2*hidden_dim, hidden_dim) for i in range(n_blocks) 37 | ]) 38 | self.fc_c = nn.Linear(hidden_dim, c_dim) 39 | 40 | self.actvn = nn.ReLU() 41 | self.hidden_dim = hidden_dim 42 | 43 | self.unet = UNet(c_dim, in_channels=c_dim, **unet_kwargs) 44 | 45 | self.reso_plane = plane_resolution 46 | self.padding = padding 47 | 48 | if scatter_type == 'max': 49 | self.scatter = scatter_max 50 | elif scatter_type == 'mean': 51 | self.scatter = scatter_mean 52 | else: 53 | raise ValueError('incorrect scatter type') 54 | 55 | 56 | def generate_plane_features(self, p, c, plane='xz'): 57 | # acquire indices of features in plane 58 | xy = normalize_coordinate(p.clone(), plane=plane, padding=self.padding) # normalize to the range of (0, 1) 59 | index = coordinate2index(xy, self.reso_plane) 60 | 61 | # scatter plane features from points 62 | fea_plane = c.new_zeros(p.size(0), self.c_dim, self.reso_plane**2) 63 | c = c.permute(0, 2, 1) # B x 512 x T 64 | fea_plane = scatter_mean(c, index, out=fea_plane) # B x 512 x reso^2 65 | fea_plane = fea_plane.reshape(p.size(0), self.c_dim, self.reso_plane, self.reso_plane) # sparce matrix (B x 512 x reso x reso) 66 | 67 | # process the plane features with UNet 68 | fea_plane = self.unet(fea_plane) 69 | 70 | return fea_plane 71 | 72 | def pool_local(self, xy, index, c): 73 | bs, fea_dim = c.size(0), c.size(2) 74 | keys = xy.keys() 75 | 76 | c_out = 0 77 | for key in keys: 78 | # scatter plane features from points 79 | fea = self.scatter(c.permute(0, 2, 1), index[key], dim_size=self.reso_plane**2) 80 | if self.scatter == scatter_max: 81 | fea = fea[0] 82 | # gather feature back to points 83 | fea = fea.gather(dim=2, index=index[key].expand(-1, fea_dim, -1)) 84 | c_out += fea 85 | return c_out.permute(0, 2, 1) 86 | 87 | 88 | def forward(self, p): 89 | if type(p) is tuple: 90 | p, pf = p 91 | else: 92 | pf = None 93 | # acquire the index for each point 94 | coord = {} 95 | index = {} 96 | coord['xz'] = normalize_coordinate(p.clone(), plane='xz', padding=self.padding) 97 | index['xz'] = coordinate2index(coord['xz'], self.reso_plane) 98 | coord['xy'] = normalize_coordinate(p.clone(), plane='xy', padding=self.padding) 99 | index['xy'] = coordinate2index(coord['xy'], self.reso_plane) 100 | coord['yz'] = normalize_coordinate(p.clone(), plane='yz', padding=self.padding) 101 | index['yz'] = coordinate2index(coord['yz'], self.reso_plane) 102 | 103 | net = self.fc_pos(torch.cat([p, pf],dim=-1)) 104 | 105 | net = self.blocks[0](net) 106 | for block in self.blocks[1:]: 107 | pooled = self.pool_local(coord, index, net) 108 | net = torch.cat([net, pooled], dim=2) 109 | net = block(net) 110 | 111 | c = self.fc_c(net) 112 | 113 | fea = {} 114 | fea['xz'] = self.generate_plane_features(p, c, plane='xz') 115 | fea['xy'] = self.generate_plane_features(p, c, plane='xy') 116 | fea['yz'] = self.generate_plane_features(p, c, plane='yz') 117 | 118 | return fea 119 | 120 | -------------------------------------------------------------------------------- /ACID/src/conv_onet/config.py: -------------------------------------------------------------------------------- 1 | import os 2 | from src.encoder import encoder_dict 3 | from src.conv_onet import models, training 4 | from src.conv_onet import generation 5 | from src import data 6 | 7 | def get_model(cfg,device=None, dataset=None, **kwargs): 8 | if cfg['model']['type'] == 'geom': 9 | return get_geom_model(cfg,device,dataset) 10 | elif cfg['model']['type'] == 'combined': 11 | return get_combined_model(cfg,device,dataset) 12 | 13 | 14 | def get_combined_model(cfg, device=None, dataset=None, **kwargs): 15 | ''' Return the Occupancy Network model. 16 | 17 | Args: 18 | cfg (dict): imported yaml config 19 | device (device): pytorch device 20 | dataset (dataset): dataset 21 | ''' 22 | dim = cfg['data']['dim'] 23 | act_dim = cfg['data']['act_dim'] 24 | obj_c_dim = cfg['model']['obj_c_dim'] 25 | decoder_kwargs = cfg['model']['decoder_kwargs'] 26 | obj_encoder_kwargs = cfg['model']['obj_encoder_kwargs'] 27 | padding = cfg['data']['padding'] 28 | decoder = 'combined_decoder' 29 | encoder = 'geom_encoder' 30 | 31 | if 'env_c_dim' in cfg['model'] and 'env_c_dim' != 0: 32 | env_c_dim = cfg['model']['env_c_dim'] 33 | env_encoder_kwargs = cfg['model']['env_encoder_kwargs'] 34 | env_encoder = encoder_dict[encoder]( 35 | dim=dim, c_dim=env_c_dim, padding=padding, 36 | **env_encoder_kwargs 37 | ) 38 | else: 39 | env_c_dim = 0 40 | env_encoder=None 41 | 42 | decoder = models.decoder_dict[decoder]( 43 | dim=dim, 44 | c_per_dim=obj_c_dim+env_c_dim, 45 | c_act_dim=obj_c_dim+env_c_dim, 46 | padding=padding, 47 | **decoder_kwargs 48 | ) 49 | 50 | obj_per_encoder = encoder_dict[encoder]( 51 | dim=dim, c_dim=obj_c_dim, padding=padding, 52 | **obj_encoder_kwargs 53 | ) 54 | obj_act_encoder = encoder_dict[encoder]( 55 | dim=act_dim, c_dim=obj_c_dim, padding=padding, 56 | **obj_encoder_kwargs 57 | ) 58 | 59 | model = models.ConvImpDyn( 60 | obj_per_encoder, obj_act_encoder, env_encoder, decoder, device=device 61 | ) 62 | 63 | return model 64 | 65 | def get_geom_model(cfg, device=None, dataset=None, **kwargs): 66 | ''' Return the Occupancy Network model. 67 | 68 | Args: 69 | cfg (dict): imported yaml config 70 | device (device): pytorch device 71 | dataset (dataset): dataset 72 | ''' 73 | dim = cfg['data']['dim'] 74 | obj_c_dim = cfg['model']['obj_c_dim'] 75 | decoder_kwargs = cfg['model']['decoder_kwargs'] 76 | obj_encoder_kwargs = cfg['model']['obj_encoder_kwargs'] 77 | padding = cfg['data']['padding'] 78 | decoder = 'geom_decoder' 79 | encoder = 'geom_encoder' 80 | 81 | if 'env_c_dim' in cfg['model'] and 'env_c_dim' != 0: 82 | env_c_dim = cfg['model']['env_c_dim'] 83 | env_encoder_kwargs = cfg['model']['env_encoder_kwargs'] 84 | env_encoder = encoder_dict[encoder]( 85 | dim=dim, c_dim=env_c_dim, padding=padding, 86 | **env_encoder_kwargs 87 | ) 88 | else: 89 | env_c_dim = 0 90 | env_encoder=None 91 | 92 | decoder = models.decoder_dict[decoder]( 93 | dim=dim, c_dim=obj_c_dim+env_c_dim, padding=padding, 94 | **decoder_kwargs 95 | ) 96 | 97 | obj_encoder = encoder_dict[encoder]( 98 | dim=dim, c_dim=obj_c_dim, padding=padding, 99 | **obj_encoder_kwargs 100 | ) 101 | 102 | model = models.ConvOccGeom( 103 | obj_encoder, env_encoder, decoder, device=device 104 | ) 105 | 106 | return model 107 | 108 | def get_trainer(model, optimizer, cfg, device, **kwargs): 109 | ''' Returns the trainer object. 110 | 111 | Args: 112 | model (nn.Module): the Occupancy Network model 113 | optimizer (optimizer): pytorch optimizer object 114 | cfg (dict): imported yaml config 115 | device (device): pytorch device 116 | ''' 117 | out_dir = cfg['training']['out_dir'] 118 | vis_dir = os.path.join(out_dir, 'vis') 119 | 120 | trainer = training.PlushTrainer( 121 | model, optimizer, cfg, 122 | device=device, 123 | vis_dir=vis_dir ) 124 | 125 | return trainer 126 | 127 | 128 | def get_generator(model, cfg, device, **kwargs): 129 | ''' Returns the generator object. 130 | 131 | Args: 132 | model (nn.Module): Occupancy Network model 133 | cfg (dict): imported yaml config 134 | device (device): pytorch device 135 | ''' 136 | generator = generation.Generator3D( 137 | model, 138 | device=device, 139 | threshold=cfg['test']['threshold'], 140 | resolution0=cfg['generation']['resolution_0'], 141 | upsampling_steps=cfg['generation']['upsampling_steps'], 142 | sample=cfg['generation']['use_sampling'], 143 | refinement_step=cfg['generation']['refinement_step'], 144 | simplify_nfaces=cfg['generation']['simplify_nfaces'], 145 | padding=cfg['data']['padding'], 146 | vol_info = None, 147 | vol_bound = None, 148 | ) 149 | return generator 150 | -------------------------------------------------------------------------------- /ACID/src/utils/libmcubes/pyarraymodule.h: -------------------------------------------------------------------------------- 1 | 2 | #ifndef _EXTMODULE_H 3 | #define _EXTMODULE_H 4 | 5 | #include 6 | #include 7 | 8 | // #define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION 9 | #define PY_ARRAY_UNIQUE_SYMBOL mcubes_PyArray_API 10 | #define NO_IMPORT_ARRAY 11 | #include "numpy/arrayobject.h" 12 | 13 | #include 14 | 15 | template 16 | struct numpy_typemap; 17 | 18 | #define define_numpy_type(ctype, dtype) \ 19 | template<> \ 20 | struct numpy_typemap \ 21 | {static const int type = dtype;}; 22 | 23 | define_numpy_type(bool, NPY_BOOL); 24 | define_numpy_type(char, NPY_BYTE); 25 | define_numpy_type(short, NPY_SHORT); 26 | define_numpy_type(int, NPY_INT); 27 | define_numpy_type(long, NPY_LONG); 28 | define_numpy_type(long long, NPY_LONGLONG); 29 | define_numpy_type(unsigned char, NPY_UBYTE); 30 | define_numpy_type(unsigned short, NPY_USHORT); 31 | define_numpy_type(unsigned int, NPY_UINT); 32 | define_numpy_type(unsigned long, NPY_ULONG); 33 | define_numpy_type(unsigned long long, NPY_ULONGLONG); 34 | define_numpy_type(float, NPY_FLOAT); 35 | define_numpy_type(double, NPY_DOUBLE); 36 | define_numpy_type(long double, NPY_LONGDOUBLE); 37 | define_numpy_type(std::complex, NPY_CFLOAT); 38 | define_numpy_type(std::complex, NPY_CDOUBLE); 39 | define_numpy_type(std::complex, NPY_CLONGDOUBLE); 40 | 41 | template 42 | T PyArray_SafeGet(const PyArrayObject* aobj, const npy_intp* indaux) 43 | { 44 | // HORROR. 45 | npy_intp* ind = const_cast(indaux); 46 | void* ptr = PyArray_GetPtr(const_cast(aobj), ind); 47 | switch(PyArray_TYPE(aobj)) 48 | { 49 | case NPY_BOOL: 50 | return static_cast(*reinterpret_cast(ptr)); 51 | case NPY_BYTE: 52 | return static_cast(*reinterpret_cast(ptr)); 53 | case NPY_SHORT: 54 | return static_cast(*reinterpret_cast(ptr)); 55 | case NPY_INT: 56 | return static_cast(*reinterpret_cast(ptr)); 57 | case NPY_LONG: 58 | return static_cast(*reinterpret_cast(ptr)); 59 | case NPY_LONGLONG: 60 | return static_cast(*reinterpret_cast(ptr)); 61 | case NPY_UBYTE: 62 | return static_cast(*reinterpret_cast(ptr)); 63 | case NPY_USHORT: 64 | return static_cast(*reinterpret_cast(ptr)); 65 | case NPY_UINT: 66 | return static_cast(*reinterpret_cast(ptr)); 67 | case NPY_ULONG: 68 | return static_cast(*reinterpret_cast(ptr)); 69 | case NPY_ULONGLONG: 70 | return static_cast(*reinterpret_cast(ptr)); 71 | case NPY_FLOAT: 72 | return static_cast(*reinterpret_cast(ptr)); 73 | case NPY_DOUBLE: 74 | return static_cast(*reinterpret_cast(ptr)); 75 | case NPY_LONGDOUBLE: 76 | return static_cast(*reinterpret_cast(ptr)); 77 | default: 78 | throw std::runtime_error("data type not supported"); 79 | } 80 | } 81 | 82 | template 83 | T PyArray_SafeSet(PyArrayObject* aobj, const npy_intp* indaux, const T& value) 84 | { 85 | // HORROR. 86 | npy_intp* ind = const_cast(indaux); 87 | void* ptr = PyArray_GetPtr(aobj, ind); 88 | switch(PyArray_TYPE(aobj)) 89 | { 90 | case NPY_BOOL: 91 | *reinterpret_cast(ptr) = static_cast(value); 92 | break; 93 | case NPY_BYTE: 94 | *reinterpret_cast(ptr) = static_cast(value); 95 | break; 96 | case NPY_SHORT: 97 | *reinterpret_cast(ptr) = static_cast(value); 98 | break; 99 | case NPY_INT: 100 | *reinterpret_cast(ptr) = static_cast(value); 101 | break; 102 | case NPY_LONG: 103 | *reinterpret_cast(ptr) = static_cast(value); 104 | break; 105 | case NPY_LONGLONG: 106 | *reinterpret_cast(ptr) = static_cast(value); 107 | break; 108 | case NPY_UBYTE: 109 | *reinterpret_cast(ptr) = static_cast(value); 110 | break; 111 | case NPY_USHORT: 112 | *reinterpret_cast(ptr) = static_cast(value); 113 | break; 114 | case NPY_UINT: 115 | *reinterpret_cast(ptr) = static_cast(value); 116 | break; 117 | case NPY_ULONG: 118 | *reinterpret_cast(ptr) = static_cast(value); 119 | break; 120 | case NPY_ULONGLONG: 121 | *reinterpret_cast(ptr) = static_cast(value); 122 | break; 123 | case NPY_FLOAT: 124 | *reinterpret_cast(ptr) = static_cast(value); 125 | break; 126 | case NPY_DOUBLE: 127 | *reinterpret_cast(ptr) = static_cast(value); 128 | break; 129 | case NPY_LONGDOUBLE: 130 | *reinterpret_cast(ptr) = static_cast(value); 131 | break; 132 | default: 133 | throw std::runtime_error("data type not supported"); 134 | } 135 | } 136 | 137 | #endif 138 | -------------------------------------------------------------------------------- /ACID/src/utils/libmcubes/pywrapper.cpp: -------------------------------------------------------------------------------- 1 | 2 | #include "pywrapper.h" 3 | 4 | #include "marchingcubes.h" 5 | 6 | #include 7 | 8 | struct PythonToCFunc 9 | { 10 | PyObject* func; 11 | PythonToCFunc(PyObject* func) {this->func = func;} 12 | double operator()(double x, double y, double z) 13 | { 14 | PyObject* res = PyObject_CallFunction(func, "(d,d,d)", x, y, z); // py::extract(func(x,y,z)); 15 | if(res == NULL) 16 | return 0.0; 17 | 18 | double result = PyFloat_AsDouble(res); 19 | Py_DECREF(res); 20 | return result; 21 | } 22 | }; 23 | 24 | PyObject* marching_cubes_func(PyObject* lower, PyObject* upper, 25 | int numx, int numy, int numz, PyObject* f, double isovalue) 26 | { 27 | std::vector vertices; 28 | std::vector polygons; 29 | 30 | // Copy the lower and upper coordinates to a C array. 31 | double lower_[3]; 32 | double upper_[3]; 33 | for(int i=0; i<3; ++i) 34 | { 35 | PyObject* l = PySequence_GetItem(lower, i); 36 | if(l == NULL) 37 | throw std::runtime_error("error"); 38 | PyObject* u = PySequence_GetItem(upper, i); 39 | if(u == NULL) 40 | { 41 | Py_DECREF(l); 42 | throw std::runtime_error("error"); 43 | } 44 | 45 | lower_[i] = PyFloat_AsDouble(l); 46 | upper_[i] = PyFloat_AsDouble(u); 47 | 48 | Py_DECREF(l); 49 | Py_DECREF(u); 50 | if(lower_[i]==-1.0 || upper_[i]==-1.0) 51 | { 52 | if(PyErr_Occurred()) 53 | throw std::runtime_error("error"); 54 | } 55 | } 56 | 57 | // Marching cubes. 58 | mc::marching_cubes(lower_, upper_, numx, numy, numz, PythonToCFunc(f), isovalue, vertices, polygons); 59 | 60 | // Copy the result to two Python ndarrays. 61 | npy_intp size_vertices = vertices.size(); 62 | npy_intp size_polygons = polygons.size(); 63 | PyArrayObject* verticesarr = reinterpret_cast(PyArray_SimpleNew(1, &size_vertices, PyArray_DOUBLE)); 64 | PyArrayObject* polygonsarr = reinterpret_cast(PyArray_SimpleNew(1, &size_polygons, PyArray_ULONG)); 65 | 66 | std::vector::const_iterator it = vertices.begin(); 67 | for(int i=0; it!=vertices.end(); ++i, ++it) 68 | *reinterpret_cast(PyArray_GETPTR1(verticesarr, i)) = *it; 69 | std::vector::const_iterator it2 = polygons.begin(); 70 | for(int i=0; it2!=polygons.end(); ++i, ++it2) 71 | *reinterpret_cast(PyArray_GETPTR1(polygonsarr, i)) = *it2; 72 | 73 | PyObject* res = Py_BuildValue("(O,O)", verticesarr, polygonsarr); 74 | Py_XDECREF(verticesarr); 75 | Py_XDECREF(polygonsarr); 76 | return res; 77 | } 78 | 79 | struct PyArrayToCFunc 80 | { 81 | PyArrayObject* arr; 82 | PyArrayToCFunc(PyArrayObject* arr) {this->arr = arr;} 83 | double operator()(int x, int y, int z) 84 | { 85 | npy_intp c[3] = {x,y,z}; 86 | return PyArray_SafeGet(arr, c); 87 | } 88 | }; 89 | 90 | PyObject* marching_cubes(PyArrayObject* arr, double isovalue) 91 | { 92 | if(PyArray_NDIM(arr) != 3) 93 | throw std::runtime_error("Only three-dimensional arrays are supported."); 94 | 95 | // Prepare data. 96 | npy_intp* shape = PyArray_DIMS(arr); 97 | double lower[3] = {0,0,0}; 98 | double upper[3] = {shape[0]-1, shape[1]-1, shape[2]-1}; 99 | long numx = upper[0] - lower[0] + 1; 100 | long numy = upper[1] - lower[1] + 1; 101 | long numz = upper[2] - lower[2] + 1; 102 | std::vector vertices; 103 | std::vector polygons; 104 | 105 | // Marching cubes. 106 | mc::marching_cubes(lower, upper, numx, numy, numz, PyArrayToCFunc(arr), isovalue, 107 | vertices, polygons); 108 | 109 | // Copy the result to two Python ndarrays. 110 | npy_intp size_vertices = vertices.size(); 111 | npy_intp size_polygons = polygons.size(); 112 | PyArrayObject* verticesarr = reinterpret_cast(PyArray_SimpleNew(1, &size_vertices, PyArray_DOUBLE)); 113 | PyArrayObject* polygonsarr = reinterpret_cast(PyArray_SimpleNew(1, &size_polygons, PyArray_ULONG)); 114 | 115 | std::vector::const_iterator it = vertices.begin(); 116 | for(int i=0; it!=vertices.end(); ++i, ++it) 117 | *reinterpret_cast(PyArray_GETPTR1(verticesarr, i)) = *it; 118 | std::vector::const_iterator it2 = polygons.begin(); 119 | for(int i=0; it2!=polygons.end(); ++i, ++it2) 120 | *reinterpret_cast(PyArray_GETPTR1(polygonsarr, i)) = *it2; 121 | 122 | PyObject* res = Py_BuildValue("(O,O)", verticesarr, polygonsarr); 123 | Py_XDECREF(verticesarr); 124 | Py_XDECREF(polygonsarr); 125 | 126 | return res; 127 | } 128 | 129 | PyObject* marching_cubes2(PyArrayObject* arr, double isovalue) 130 | { 131 | if(PyArray_NDIM(arr) != 3) 132 | throw std::runtime_error("Only three-dimensional arrays are supported."); 133 | 134 | // Prepare data. 135 | npy_intp* shape = PyArray_DIMS(arr); 136 | double lower[3] = {0,0,0}; 137 | double upper[3] = {shape[0]-1, shape[1]-1, shape[2]-1}; 138 | long numx = upper[0] - lower[0] + 1; 139 | long numy = upper[1] - lower[1] + 1; 140 | long numz = upper[2] - lower[2] + 1; 141 | std::vector vertices; 142 | std::vector polygons; 143 | 144 | // Marching cubes. 145 | mc::marching_cubes2(lower, upper, numx, numy, numz, PyArrayToCFunc(arr), isovalue, 146 | vertices, polygons); 147 | 148 | // Copy the result to two Python ndarrays. 149 | npy_intp size_vertices = vertices.size(); 150 | npy_intp size_polygons = polygons.size(); 151 | PyArrayObject* verticesarr = reinterpret_cast(PyArray_SimpleNew(1, &size_vertices, PyArray_DOUBLE)); 152 | PyArrayObject* polygonsarr = reinterpret_cast(PyArray_SimpleNew(1, &size_polygons, PyArray_ULONG)); 153 | 154 | std::vector::const_iterator it = vertices.begin(); 155 | for(int i=0; it!=vertices.end(); ++i, ++it) 156 | *reinterpret_cast(PyArray_GETPTR1(verticesarr, i)) = *it; 157 | std::vector::const_iterator it2 = polygons.begin(); 158 | for(int i=0; it2!=polygons.end(); ++i, ++it2) 159 | *reinterpret_cast(PyArray_GETPTR1(polygonsarr, i)) = *it2; 160 | 161 | PyObject* res = Py_BuildValue("(O,O)", verticesarr, polygonsarr); 162 | Py_XDECREF(verticesarr); 163 | Py_XDECREF(polygonsarr); 164 | 165 | return res; 166 | } 167 | 168 | PyObject* marching_cubes3(PyArrayObject* arr, double isovalue) 169 | { 170 | if(PyArray_NDIM(arr) != 3) 171 | throw std::runtime_error("Only three-dimensional arrays are supported."); 172 | 173 | // Prepare data. 174 | npy_intp* shape = PyArray_DIMS(arr); 175 | double lower[3] = {0,0,0}; 176 | double upper[3] = {shape[0]-1, shape[1]-1, shape[2]-1}; 177 | long numx = upper[0] - lower[0] + 1; 178 | long numy = upper[1] - lower[1] + 1; 179 | long numz = upper[2] - lower[2] + 1; 180 | std::vector vertices; 181 | std::vector polygons; 182 | 183 | // Marching cubes. 184 | mc::marching_cubes3(lower, upper, numx, numy, numz, PyArrayToCFunc(arr), isovalue, 185 | vertices, polygons); 186 | 187 | // Copy the result to two Python ndarrays. 188 | npy_intp size_vertices = vertices.size(); 189 | npy_intp size_polygons = polygons.size(); 190 | PyArrayObject* verticesarr = reinterpret_cast(PyArray_SimpleNew(1, &size_vertices, PyArray_DOUBLE)); 191 | PyArrayObject* polygonsarr = reinterpret_cast(PyArray_SimpleNew(1, &size_polygons, PyArray_ULONG)); 192 | 193 | std::vector::const_iterator it = vertices.begin(); 194 | for(int i=0; it!=vertices.end(); ++i, ++it) 195 | *reinterpret_cast(PyArray_GETPTR1(verticesarr, i)) = *it; 196 | std::vector::const_iterator it2 = polygons.begin(); 197 | for(int i=0; it2!=polygons.end(); ++i, ++it2) 198 | *reinterpret_cast(PyArray_GETPTR1(polygonsarr, i)) = *it2; 199 | 200 | PyObject* res = Py_BuildValue("(O,O)", verticesarr, polygonsarr); 201 | Py_XDECREF(verticesarr); 202 | Py_XDECREF(polygonsarr); 203 | 204 | return res; 205 | } -------------------------------------------------------------------------------- /PlushSim/scripts/data_gen_attic.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 3 | # 4 | # NVIDIA CORPORATION and its licensors retain all intellectual property 5 | # and proprietary rights in and to this software, related documentation 6 | # and any modifications thereto. Any use, reproduction, disclosure or 7 | # distribution of this software and related documentation without an express 8 | # license agreement from NVIDIA CORPORATION is strictly prohibited. 9 | 10 | import os 11 | import time 12 | import argparse 13 | import json 14 | 15 | from utils import * 16 | parser = argparse.ArgumentParser("Dataset generation") 17 | ################################################################ 18 | # save to args 19 | parser.add_argument("--save_dir", type=str, default="/result/interaction_sequence") 20 | parser.add_argument("--img_subdir", type=str, default='img') 21 | parser.add_argument("--geom_subdir", type=str, default='geom') 22 | parser.add_argument("--info_subdir", type=str, default='info') 23 | parser.add_argument("--save_every", type=int, default=25) 24 | 25 | ################################################################ 26 | # interaction args 27 | parser.add_argument("--num_interaction", type=int, default=18) 28 | parser.add_argument("--reset_every", type=int, default=6) 29 | 30 | ################################################################ 31 | # scene args 32 | parser.add_argument("--asset_root", type=str, default="/result/assets") 33 | parser.add_argument("--scene_path", type=str, default="attic_lean/Attic_clean_v2.usda") 34 | parser.add_argument("--plush_path", type=str, default="animals/teddy/teddy_scaled/teddy_scaled.usda") 35 | parser.add_argument("--skip_layout_randomization", action="store_true", default=False) 36 | parser.add_argument("--skip_lights_randomization", action="store_true", default=False) 37 | 38 | args = parser.parse_args() 39 | 40 | os.makedirs(args.save_dir, exist_ok=True) 41 | os.makedirs(os.path.join(args.save_dir, args.img_subdir), exist_ok=True) 42 | os.makedirs(os.path.join(args.save_dir, args.geom_subdir), exist_ok=True) 43 | os.makedirs(os.path.join(args.save_dir, args.info_subdir), exist_ok=True) 44 | img_dir = os.path.join(args.save_dir, args.img_subdir) 45 | geom_dir = os.path.join(args.save_dir, args.geom_subdir) 46 | info_dir = os.path.join(args.save_dir, args.info_subdir) 47 | 48 | def main(): 49 | from attic_scene import attic_scene 50 | scene_path = os.path.join(args.asset_root, args.scene_path) 51 | plush_path = os.path.join(args.asset_root, args.plush_path) 52 | scene = attic_scene( 53 | scene_path, 54 | plush_path, 55 | RESET_STATIC=True, 56 | RAND_LAYOUT=not args.skip_layout_randomization, 57 | RAND_LIGHTS=not args.skip_lights_randomization,) 58 | 59 | start_time = time.time() 60 | # save scene overall info 61 | with open(os.path.join(info_dir, "scene_meta.json"), 'w') as fp: 62 | json.dump(scene.get_scene_metadata(), fp) 63 | 64 | # number of resets 65 | num_resets = (args.num_interaction + args.reset_every - 1) // args.reset_every 66 | for reset in range(num_resets): 67 | # save scene reset collider info 68 | np.savez_compressed(os.path.join(info_dir, f"clutter_info_{reset:04d}.npz"), **scene.get_scene_background_state()) 69 | 70 | num_steps = min(args.num_interaction, (reset + 1) * args.reset_every) - reset * args.reset_every 71 | # sample interactions 72 | actions = { 73 | 'grasp_points':[], 74 | 'target_points':[], 75 | 'grasp_pixels':[], 76 | 'start_frames':[], 77 | 'release_frames':[], 78 | 'static_frames':[], } 79 | 80 | # save start frame 81 | save_frame(f"{reset:04d}_{scene.frame:06d}", scene.get_observations(), img_dir) 82 | np.savez_compressed( 83 | os.path.join(geom_dir, f"{reset:04d}_{scene.frame:06d}.npz"), 84 | **scene.get_scene_state_plush(convert_to=np.float16)) 85 | 86 | for interaction in range(num_steps): 87 | # stop simulating 88 | scene.kit.pause() 89 | action = scene.sample_action() 90 | if action is None: 91 | scene.kit.play() 92 | continue 93 | grasp_point, target_point, grasp_pixel = action 94 | actions['grasp_points'].append(np.array(grasp_point,np.float16)) 95 | actions['target_points'].append(np.array(target_point,np.float16)) 96 | actions['grasp_pixels'].append(np.array(grasp_pixel,np.uint16)) 97 | actions['start_frames'].append(np.array(scene.frame,np.uint16)) 98 | 99 | save_frame(f"{reset:04d}_{scene.frame:06d}", scene.get_observations(), img_dir) 100 | np.savez_compressed( 101 | os.path.join(geom_dir, f"{reset:04d}_{scene.frame:06d}.npz"), 102 | **scene.get_scene_state_plush(convert_to=np.float16)) 103 | 104 | 105 | scene.kit.play() 106 | 107 | init_traj = scene.gripper.plan_trajectory(scene.gripper.eef_default_loc, grasp_point) 108 | # move 109 | for pos in init_traj: 110 | scene.step() 111 | scene.gripper.set_translation(tuple(pos)) 112 | if scene.frame % args.save_every == args.save_every - 1: 113 | save_frame(f"{reset:04d}_{scene.frame:06d}", scene.get_observations(), img_dir) 114 | np.savez_compressed( 115 | os.path.join(geom_dir, f"{reset:04d}_{scene.frame:06d}.npz"), 116 | **scene.get_scene_state_plush(convert_to=np.float16)) 117 | 118 | scene.kit.pause() 119 | #init_move_traj = scene.gripper.set_translation(grasp_point) 120 | scene.gripper.grasp(scene.plush) 121 | 122 | scene.kit.play() 123 | traj = scene.gripper.plan_trajectory(grasp_point, target_point) 124 | 125 | # move 126 | for pos in traj: 127 | scene.step() 128 | scene.gripper.set_translation(tuple(pos)) 129 | if scene.frame % args.save_every == args.save_every - 1: 130 | save_frame(f"{reset:04d}_{scene.frame:06d}", scene.get_observations(), img_dir) 131 | np.savez_compressed( 132 | os.path.join(geom_dir, f"{reset:04d}_{scene.frame:06d}.npz"), 133 | **scene.get_scene_state_plush(convert_to=np.float16)) 134 | 135 | # wait until stable 136 | for ff in range(scene.FALL_MAX): 137 | scene.step() 138 | if scene.check_scene_static(): 139 | print(f"grasp reaching a resting state after {ff} steps") 140 | break 141 | 142 | save_frame(f"{reset:04d}_{scene.frame:06d}", scene.get_observations(), img_dir) 143 | np.savez_compressed( 144 | os.path.join(geom_dir, f"{reset:04d}_{scene.frame:06d}.npz"), 145 | **scene.get_scene_state_plush(convert_to=np.float16)) 146 | actions['release_frames'].append(np.array(scene.frame,np.uint16)) 147 | 148 | # release 149 | scene.kit.pause() 150 | scene.gripper.ungrasp() 151 | # TODO: delete gripper collider 152 | scene.kit.play() 153 | 154 | for ff in range(scene.FALL_MAX+scene.DROP_MIN): 155 | scene.step() 156 | if scene.frame % args.save_every == args.save_every - 1: 157 | save_frame(f"{reset:04d}_{scene.frame:06d}", scene.get_observations(), img_dir) 158 | np.savez_compressed( 159 | os.path.join(geom_dir, f"{reset:04d}_{scene.frame:06d}.npz"), 160 | **scene.get_scene_state_plush(convert_to=np.float16)) 161 | if ff < scene.DROP_MIN: 162 | continue 163 | if scene.check_scene_static(): 164 | print(f"release reaching a resting state after {ff} steps") 165 | break 166 | scene.gripper.reset_translation() 167 | 168 | save_frame(f"{reset:04d}_{scene.frame:06d}", scene.get_observations(), img_dir) 169 | np.savez_compressed( 170 | os.path.join(geom_dir, f"{reset:04d}_{scene.frame:06d}.npz"), 171 | **scene.get_scene_state_plush(convert_to=np.float16)) 172 | actions['static_frames'].append(np.array(scene.frame,np.uint16)) 173 | 174 | np.savez_compressed(os.path.join(info_dir, f"interaction_info_{reset:04d}.npz"), **actions) 175 | end_time = time.time() 176 | from datetime import timedelta 177 | time_str = str(timedelta(seconds=end_time - start_time)) 178 | print(f'Sampling {num_steps} interactions takes: {time_str}') 179 | 180 | scene.reset() 181 | 182 | # cleanup 183 | scene.kit.shutdown() 184 | 185 | 186 | if __name__ == "__main__": 187 | main() 188 | -------------------------------------------------------------------------------- /ACID/src/conv_onet/models/decoder.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | from src.layers import ResnetBlockFC 5 | from src.common import normalize_coordinate, normalize_3d_coordinate, map2local 6 | 7 | class GeomDecoder(nn.Module): 8 | ''' Decoder. 9 | Instead of conditioning on global features, on plane/volume local features. 10 | 11 | Args: 12 | dim (int): input dimension 13 | c_dim (int): dimension of latent conditioned code c 14 | hidden_size (int): hidden size of Decoder network 15 | n_blocks (int): number of blocks ResNetBlockFC layers 16 | leaky (bool): whether to use leaky ReLUs 17 | sample_mode (str): sampling feature strategy, bilinear|nearest 18 | padding (float): conventional padding paramter of ONet for unit cube, so [-0.5, 0.5] -> [-0.55, 0.55] 19 | ''' 20 | 21 | def __init__(self, dim=3, c_dim=128, 22 | corr_dim=0, corr_head=True, 23 | hidden_size=256, n_blocks=5, leaky=False, 24 | sample_mode='bilinear', padding=0.1): 25 | super().__init__() 26 | self.c_dim = c_dim 27 | self.n_blocks = n_blocks 28 | self.corr_dim = corr_dim 29 | self.corr_head = corr_head 30 | 31 | self.fc_c_occ = nn.ModuleList([ 32 | nn.Linear(c_dim, hidden_size) for i in range(n_blocks) 33 | ]) 34 | 35 | self.fc_p = nn.Linear(dim, hidden_size) 36 | 37 | self.blocks_occ = nn.ModuleList([ 38 | ResnetBlockFC(hidden_size) for i in range(n_blocks) 39 | ]) 40 | self.fc_occ = nn.Linear(hidden_size, 1) 41 | 42 | if self.corr_dim != 0 and corr_head: 43 | self.fc_out_corr = nn.Linear(hidden_size, corr_dim) 44 | 45 | if not leaky: 46 | self.actvn = F.relu 47 | else: 48 | self.actvn = lambda x: F.leaky_relu(x, 0.2) 49 | 50 | self.sample_mode = sample_mode 51 | self.padding = padding 52 | 53 | 54 | def sample_plane_feature(self, p, c, plane='xz'): 55 | xy = normalize_coordinate(p.clone(), plane=plane, padding=self.padding) # normalize to the range of (0, 1) 56 | xy = xy[:, :, None].float() 57 | vgrid = 2.0 * xy - 1.0 # normalize to (-1, 1) 58 | c = F.grid_sample(c, vgrid, padding_mode='border', align_corners=True, mode=self.sample_mode).squeeze(-1) 59 | return c 60 | 61 | def forward(self, p, c_plane, **kwargs): 62 | c = 0 63 | c += self.sample_plane_feature(p, c_plane['xz'], plane='xz') 64 | c += self.sample_plane_feature(p, c_plane['xy'], plane='xy') 65 | c += self.sample_plane_feature(p, c_plane['yz'], plane='yz') 66 | c = c.transpose(1, 2) 67 | 68 | p = p.float() 69 | x = self.fc_p(p) 70 | net = x 71 | 72 | for i in range(self.n_blocks): 73 | net = net + self.fc_c_occ[i](c) 74 | net = self.blocks_occ[i](net) 75 | 76 | results = {} 77 | if self.corr_dim != 0 and not self.corr_head: 78 | results['corr'] = net 79 | 80 | net = self.actvn(net) 81 | 82 | results['occ'] = self.fc_occ(net).squeeze(-1) 83 | if self.corr_dim != 0 and self.corr_head: 84 | results['corr'] = self.fc_out_corr(net) 85 | 86 | return results 87 | 88 | class CombinedDecoder(nn.Module): 89 | ''' Decoder. 90 | Instead of conditioning on global features, on plane/volume local features. 91 | 92 | Args: 93 | dim (int): input dimension 94 | c_dim (int): dimension of latent conditioned code c 95 | hidden_size (int): hidden size of Decoder network 96 | n_blocks (int): number of blocks ResNetBlockFC layers 97 | leaky (bool): whether to use leaky ReLUs 98 | sample_mode (str): sampling feature strategy, bilinear|nearest 99 | padding (float): conventional padding paramter of ONet for unit cube, so [-0.5, 0.5] -> [-0.55, 0.55] 100 | ''' 101 | 102 | def __init__(self, dim=3, c_per_dim=128, c_act_dim=128, 103 | corr_dim=0, corr_head=True, 104 | hidden_size=256, n_blocks=5, leaky=False, 105 | sample_mode='bilinear', padding=0.1, fuse=True, detach=False, anneal_gradient=True): 106 | super().__init__() 107 | self.c_per_dim = c_per_dim 108 | self.c_act_dim = c_act_dim 109 | self.n_blocks = n_blocks 110 | self.corr_dim = corr_dim 111 | self.corr_head = corr_head 112 | self.fuse = fuse 113 | self.detach = detach 114 | self.anneal_gradient = anneal_gradient 115 | 116 | self.fc_c_per = nn.ModuleList([ 117 | nn.Linear(c_per_dim, hidden_size) for i in range(n_blocks) 118 | ]) 119 | 120 | self.fc_c_act = nn.ModuleList([ 121 | nn.Linear(c_act_dim, hidden_size) for i in range(n_blocks) 122 | ]) 123 | 124 | if self.fuse: 125 | self.fc_c_merge = nn.ModuleList([ 126 | nn.Linear(hidden_size*2, hidden_size) for i in range(n_blocks) 127 | ]) 128 | 129 | self.fc_p_per = nn.Linear(dim, hidden_size) 130 | self.fc_p_act = nn.Linear(dim, hidden_size) 131 | 132 | self.blocks_per = nn.ModuleList([ 133 | ResnetBlockFC(hidden_size) for i in range(n_blocks) 134 | ]) 135 | self.blocks_act = nn.ModuleList([ 136 | ResnetBlockFC(hidden_size) for i in range(n_blocks) 137 | ]) 138 | 139 | self.fc_occ = nn.Linear(hidden_size, 1) 140 | self.fc_flow= nn.Linear(hidden_size, 3) 141 | 142 | if self.corr_dim != 0 and corr_head: 143 | self.fc_out_corr = nn.Linear(hidden_size, corr_dim) 144 | if self.fuse: 145 | self.fc_act_corr_merge = nn.Linear(hidden_size+corr_dim, hidden_size) 146 | 147 | if not leaky: 148 | self.actvn = F.relu 149 | else: 150 | self.actvn = lambda x: F.leaky_relu(x, 0.2) 151 | 152 | self.sample_mode = sample_mode 153 | self.padding = padding 154 | 155 | 156 | def sample_plane_feature(self, p, c, plane='xz'): 157 | xy = normalize_coordinate(p.clone(), plane=plane, padding=self.padding) # normalize to the range of (0, 1) 158 | xy = xy[:, :, None].float() 159 | vgrid = 2.0 * xy - 1.0 # normalize to (-1, 1) 160 | c = F.grid_sample(c, vgrid, padding_mode='border', align_corners=True, mode=self.sample_mode).squeeze(-1) 161 | return c 162 | 163 | def decode_perception(self, p, c_per_plane): 164 | c_per = 0 165 | c_per += self.sample_plane_feature(p, c_per_plane['xz'], plane='xz') 166 | c_per += self.sample_plane_feature(p, c_per_plane['xy'], plane='xy') 167 | c_per += self.sample_plane_feature(p, c_per_plane['yz'], plane='yz') 168 | c_per = c_per.transpose(1, 2) 169 | 170 | p = p.float() 171 | net_per = self.fc_p_per(p) 172 | features = [] 173 | for i in range(self.n_blocks): 174 | net_per = net_per + self.fc_c_per[i](c_per) 175 | net_per = self.blocks_per[i](net_per) 176 | if self.detach: 177 | features.append(net_per.detach()) 178 | else: 179 | features.append(net_per) 180 | net_per = self.actvn(net_per) 181 | 182 | results = {} 183 | results['occ'] = self.fc_occ(net_per).squeeze(-1) 184 | if self.corr_dim != 0 and self.corr_head: 185 | corr = self.fc_out_corr(net_per) 186 | features.append(corr) 187 | results['corr'] = corr 188 | # if self.anneal_gradient: 189 | # for i,p in enumerate(features): 190 | # features[i] = p * 0.1 + p.detach() * 0.9 191 | return results, features 192 | 193 | def decode_action(self, p, c_act_plane, per_features): 194 | c_act = 0 195 | c_act += self.sample_plane_feature(p, c_act_plane['xz'], plane='xz') 196 | c_act += self.sample_plane_feature(p, c_act_plane['xy'], plane='xy') 197 | c_act += self.sample_plane_feature(p, c_act_plane['yz'], plane='yz') 198 | c_act = c_act.transpose(1, 2) 199 | 200 | p = p.float() 201 | net_act = self.fc_p_act(p) 202 | 203 | for i in range(self.n_blocks): 204 | net_act = net_act + self.fc_c_act[i](c_act) 205 | if self.fuse: 206 | net_act = self.blocks_act[i]( 207 | self.fc_c_merge[i]( 208 | torch.cat( ( net_act, per_features[i]), dim=-1))) 209 | # (net_per.detach()*0.9+net_per * 0.1)), dim=-1))) 210 | else: 211 | net_act = self.blocks_act[i](net_act) 212 | 213 | 214 | net_act = self.actvn(net_act) 215 | 216 | if self.corr_dim != 0 and self.corr_head: 217 | if self.fuse: 218 | net_act = self.fc_act_corr_merge( 219 | torch.cat((net_act, per_features[-1].detach()), dim=-1)) 220 | return {'flow':self.fc_flow(net_act)} 221 | 222 | def forward(self, p, c_per_plane, c_act_plane): 223 | results, per_features = self.decode_perception(p, c_per_plane) 224 | results['flow'] = self.decode_action(p, c_act_plane, per_features)['flow'] 225 | return results 226 | -------------------------------------------------------------------------------- /ACID/src/encoder/unet.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Codes are from: 3 | https://github.com/jaxony/unet-pytorch/blob/master/model.py 4 | ''' 5 | 6 | import torch 7 | import torch.nn as nn 8 | import torch.nn.functional as F 9 | from torch.autograd import Variable 10 | from collections import OrderedDict 11 | from torch.nn import init 12 | import numpy as np 13 | 14 | def conv3x3(in_channels, out_channels, stride=1, 15 | padding=1, bias=True, groups=1): 16 | return nn.Conv2d( 17 | in_channels, 18 | out_channels, 19 | kernel_size=3, 20 | stride=stride, 21 | padding=padding, 22 | bias=bias, 23 | groups=groups) 24 | 25 | def upconv2x2(in_channels, out_channels, mode='transpose'): 26 | if mode == 'transpose': 27 | return nn.ConvTranspose2d( 28 | in_channels, 29 | out_channels, 30 | kernel_size=2, 31 | stride=2) 32 | else: 33 | # out_channels is always going to be the same 34 | # as in_channels 35 | return nn.Sequential( 36 | nn.Upsample(mode='bilinear', scale_factor=2), 37 | conv1x1(in_channels, out_channels)) 38 | 39 | def conv1x1(in_channels, out_channels, groups=1): 40 | return nn.Conv2d( 41 | in_channels, 42 | out_channels, 43 | kernel_size=1, 44 | groups=groups, 45 | stride=1) 46 | 47 | 48 | class DownConv(nn.Module): 49 | """ 50 | A helper Module that performs 2 convolutions and 1 MaxPool. 51 | A ReLU activation follows each convolution. 52 | """ 53 | def __init__(self, in_channels, out_channels, pooling=True): 54 | super(DownConv, self).__init__() 55 | 56 | self.in_channels = in_channels 57 | self.out_channels = out_channels 58 | self.pooling = pooling 59 | 60 | self.conv1 = conv3x3(self.in_channels, self.out_channels) 61 | self.conv2 = conv3x3(self.out_channels, self.out_channels) 62 | 63 | if self.pooling: 64 | self.pool = nn.MaxPool2d(kernel_size=2, stride=2) 65 | 66 | def forward(self, x): 67 | x = F.relu(self.conv1(x)) 68 | x = F.relu(self.conv2(x)) 69 | before_pool = x 70 | if self.pooling: 71 | x = self.pool(x) 72 | return x, before_pool 73 | 74 | 75 | class UpConv(nn.Module): 76 | """ 77 | A helper Module that performs 2 convolutions and 1 UpConvolution. 78 | A ReLU activation follows each convolution. 79 | """ 80 | def __init__(self, in_channels, out_channels, 81 | merge_mode='concat', up_mode='transpose'): 82 | super(UpConv, self).__init__() 83 | 84 | self.in_channels = in_channels 85 | self.out_channels = out_channels 86 | self.merge_mode = merge_mode 87 | self.up_mode = up_mode 88 | 89 | self.upconv = upconv2x2(self.in_channels, self.out_channels, 90 | mode=self.up_mode) 91 | 92 | if self.merge_mode == 'concat': 93 | self.conv1 = conv3x3( 94 | 2*self.out_channels, self.out_channels) 95 | else: 96 | # num of input channels to conv2 is same 97 | self.conv1 = conv3x3(self.out_channels, self.out_channels) 98 | self.conv2 = conv3x3(self.out_channels, self.out_channels) 99 | 100 | 101 | def forward(self, from_down, from_up): 102 | """ Forward pass 103 | Arguments: 104 | from_down: tensor from the encoder pathway 105 | from_up: upconv'd tensor from the decoder pathway 106 | """ 107 | from_up = self.upconv(from_up) 108 | if self.merge_mode == 'concat': 109 | x = torch.cat((from_up, from_down), 1) 110 | else: 111 | x = from_up + from_down 112 | x = F.relu(self.conv1(x)) 113 | x = F.relu(self.conv2(x)) 114 | return x 115 | 116 | 117 | class UNet(nn.Module): 118 | """ `UNet` class is based on https://arxiv.org/abs/1505.04597 119 | 120 | The U-Net is a convolutional encoder-decoder neural network. 121 | Contextual spatial information (from the decoding, 122 | expansive pathway) about an input tensor is merged with 123 | information representing the localization of details 124 | (from the encoding, compressive pathway). 125 | 126 | Modifications to the original paper: 127 | (1) padding is used in 3x3 convolutions to prevent loss 128 | of border pixels 129 | (2) merging outputs does not require cropping due to (1) 130 | (3) residual connections can be used by specifying 131 | UNet(merge_mode='add') 132 | (4) if non-parametric upsampling is used in the decoder 133 | pathway (specified by upmode='upsample'), then an 134 | additional 1x1 2d convolution occurs after upsampling 135 | to reduce channel dimensionality by a factor of 2. 136 | This channel halving happens with the convolution in 137 | the tranpose convolution (specified by upmode='transpose') 138 | """ 139 | 140 | def __init__(self, num_classes, in_channels=3, depth=5, 141 | start_filts=64, up_mode='transpose', 142 | merge_mode='concat', **kwargs): 143 | """ 144 | Arguments: 145 | in_channels: int, number of channels in the input tensor. 146 | Default is 3 for RGB images. 147 | depth: int, number of MaxPools in the U-Net. 148 | start_filts: int, number of convolutional filters for the 149 | first conv. 150 | up_mode: string, type of upconvolution. Choices: 'transpose' 151 | for transpose convolution or 'upsample' for nearest neighbour 152 | upsampling. 153 | """ 154 | super(UNet, self).__init__() 155 | 156 | if up_mode in ('transpose', 'upsample'): 157 | self.up_mode = up_mode 158 | else: 159 | raise ValueError("\"{}\" is not a valid mode for " 160 | "upsampling. Only \"transpose\" and " 161 | "\"upsample\" are allowed.".format(up_mode)) 162 | 163 | if merge_mode in ('concat', 'add'): 164 | self.merge_mode = merge_mode 165 | else: 166 | raise ValueError("\"{}\" is not a valid mode for" 167 | "merging up and down paths. " 168 | "Only \"concat\" and " 169 | "\"add\" are allowed.".format(up_mode)) 170 | 171 | # NOTE: up_mode 'upsample' is incompatible with merge_mode 'add' 172 | if self.up_mode == 'upsample' and self.merge_mode == 'add': 173 | raise ValueError("up_mode \"upsample\" is incompatible " 174 | "with merge_mode \"add\" at the moment " 175 | "because it doesn't make sense to use " 176 | "nearest neighbour to reduce " 177 | "depth channels (by half).") 178 | 179 | self.num_classes = num_classes 180 | self.in_channels = in_channels 181 | self.start_filts = start_filts 182 | self.depth = depth 183 | 184 | self.down_convs = [] 185 | self.up_convs = [] 186 | 187 | # create the encoder pathway and add to a list 188 | for i in range(depth): 189 | ins = self.in_channels if i == 0 else outs 190 | outs = self.start_filts*(2**i) 191 | pooling = True if i < depth-1 else False 192 | 193 | down_conv = DownConv(ins, outs, pooling=pooling) 194 | self.down_convs.append(down_conv) 195 | 196 | # create the decoder pathway and add to a list 197 | # - careful! decoding only requires depth-1 blocks 198 | for i in range(depth-1): 199 | ins = outs 200 | outs = ins // 2 201 | up_conv = UpConv(ins, outs, up_mode=up_mode, 202 | merge_mode=merge_mode) 203 | self.up_convs.append(up_conv) 204 | 205 | # add the list of modules to current module 206 | self.down_convs = nn.ModuleList(self.down_convs) 207 | self.up_convs = nn.ModuleList(self.up_convs) 208 | 209 | self.conv_final = conv1x1(outs, self.num_classes) 210 | 211 | self.reset_params() 212 | 213 | @staticmethod 214 | def weight_init(m): 215 | if isinstance(m, nn.Conv2d): 216 | init.xavier_normal_(m.weight) 217 | init.constant_(m.bias, 0) 218 | 219 | 220 | def reset_params(self): 221 | for i, m in enumerate(self.modules()): 222 | self.weight_init(m) 223 | 224 | 225 | def forward(self, x): 226 | encoder_outs = [] 227 | # encoder pathway, save outputs for merging 228 | for i, module in enumerate(self.down_convs): 229 | x, before_pool = module(x) 230 | encoder_outs.append(before_pool) 231 | for i, module in enumerate(self.up_convs): 232 | before_pool = encoder_outs[-(i+2)] 233 | x = module(before_pool, x) 234 | 235 | # No softmax is used. This means you need to use 236 | # nn.CrossEntropyLoss is your training script, 237 | # as this module includes a softmax already. 238 | x = self.conv_final(x) 239 | return x 240 | 241 | if __name__ == "__main__": 242 | """ 243 | testing 244 | """ 245 | model = UNet(1, depth=5, merge_mode='concat', in_channels=1, start_filts=32) 246 | print(model) 247 | print(sum(p.numel() for p in model.parameters())) 248 | 249 | reso = 176 250 | x = np.zeros((1, 1, reso, reso)) 251 | x[:,:,int(reso/2-1), int(reso/2-1)] = np.nan 252 | x = torch.FloatTensor(x) 253 | 254 | out = model(x) 255 | print('%f'%(torch.sum(torch.isnan(out)).detach().cpu().numpy()/(reso*reso))) 256 | 257 | # loss = torch.sum(out) 258 | # loss.backward() 259 | -------------------------------------------------------------------------------- /PlushSim/scripts/writer.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 3 | # 4 | # NVIDIA CORPORATION and its licensors retain all intellectual property 5 | # and proprietary rights in and to this software, related documentation 6 | # and any modifications thereto. Any use, reproduction, disclosure or 7 | # distribution of this software and related documentation without an express 8 | # license agreement from NVIDIA CORPORATION is strictly prohibited. 9 | 10 | 11 | """Helper class for writing groundtruth data offline. 12 | """ 13 | 14 | import atexit 15 | import colorsys 16 | import queue 17 | import omni 18 | import os 19 | import threading 20 | import numpy as np 21 | from PIL import Image, ImageDraw 22 | 23 | 24 | class DataWriter: 25 | def __init__(self, data_dir, num_worker_threads, max_queue_size=500, sensor_settings=None): 26 | from omni.isaac.synthetic_utils import visualization as vis 27 | 28 | self.vis = vis 29 | atexit.register(self.stop_threads) 30 | self.data_dir = data_dir 31 | 32 | # Threading for multiple scenes 33 | self.num_worker_threads = num_worker_threads 34 | # Initialize queue with a specified size 35 | self.q = queue.Queue(max_queue_size) 36 | self.threads = [] 37 | 38 | self._viewport = omni.kit.viewport.get_viewport_interface() 39 | self.create_output_folders(sensor_settings) 40 | 41 | def start_threads(self): 42 | """Start worker threads.""" 43 | for _ in range(self.num_worker_threads): 44 | t = threading.Thread(target=self.worker, daemon=True) 45 | t.start() 46 | self.threads.append(t) 47 | 48 | def stop_threads(self): 49 | """Waits for all tasks to be completed before stopping worker threads.""" 50 | print(f"Finish writing data...") 51 | 52 | # Block until all tasks are done 53 | self.q.join() 54 | 55 | # Stop workers 56 | for _ in range(self.num_worker_threads): 57 | self.q.put(None) 58 | for t in self.threads: 59 | t.join() 60 | 61 | print(f"Done.") 62 | 63 | def worker(self): 64 | """Processes task from queue. Each tasks contains groundtruth data and metadata which is used to transform the output and write it to disk.""" 65 | while True: 66 | groundtruth = self.q.get() 67 | if groundtruth is None: 68 | break 69 | filename = groundtruth["METADATA"]["image_id"] 70 | viewport_name = groundtruth["METADATA"]["viewport_name"] 71 | for gt_type, data in groundtruth["DATA"].items(): 72 | if gt_type == "RGB": 73 | self.save_image(viewport_name, gt_type, data, filename) 74 | elif gt_type == "DEPTH": 75 | if groundtruth["METADATA"]["DEPTH"]["NPY"]: 76 | self.depth_folder = self.data_dir + "/" + str(viewport_name) + "/depth/" 77 | np.save(self.depth_folder + filename + ".npy", data) 78 | if groundtruth["METADATA"]["DEPTH"]["COLORIZE"]: 79 | self.save_image(viewport_name, gt_type, data, filename) 80 | elif gt_type == "INSTANCE": 81 | self.save_segmentation( 82 | viewport_name, 83 | gt_type, 84 | data, 85 | filename, 86 | groundtruth["METADATA"]["INSTANCE"]["WIDTH"], 87 | groundtruth["METADATA"]["INSTANCE"]["HEIGHT"], 88 | groundtruth["METADATA"]["INSTANCE"]["COLORIZE"], 89 | groundtruth["METADATA"]["INSTANCE"]["NPY"], 90 | ) 91 | elif gt_type == "SEMANTIC": 92 | self.save_segmentation( 93 | viewport_name, 94 | gt_type, 95 | data, 96 | filename, 97 | groundtruth["METADATA"]["SEMANTIC"]["WIDTH"], 98 | groundtruth["METADATA"]["SEMANTIC"]["HEIGHT"], 99 | groundtruth["METADATA"]["SEMANTIC"]["COLORIZE"], 100 | groundtruth["METADATA"]["SEMANTIC"]["NPY"], 101 | ) 102 | elif gt_type in ["BBOX2DTIGHT", "BBOX2DLOOSE"]: 103 | self.save_bbox( 104 | viewport_name, 105 | gt_type, 106 | data, 107 | filename, 108 | groundtruth["METADATA"][gt_type]["COLORIZE"], 109 | groundtruth["DATA"]["RGB"], 110 | groundtruth["METADATA"][gt_type]["NPY"], 111 | ) 112 | elif gt_type == "CAMERA": 113 | self.camera_folder = self.data_dir + "/" + str(viewport_name) + "/camera/" 114 | np.save(self.camera_folder + filename + ".npy", data) 115 | elif gt_type == "POSES": 116 | self.poses_folder = self.data_dir + "/" + str(viewport_name) + "/poses/" 117 | np.save(self.poses_folder + filename + ".npy", data) 118 | else: 119 | raise NotImplementedError 120 | self.q.task_done() 121 | 122 | def save_segmentation( 123 | self, viewport_name, data_type, data, filename, width=1280, height=720, display_rgb=True, save_npy=True 124 | ): 125 | self.instance_folder = self.data_dir + "/" + str(viewport_name) + "/instance/" 126 | self.semantic_folder = self.data_dir + "/" + str(viewport_name) + "/semantic/" 127 | # Save ground truth data locally as npy 128 | if data_type == "INSTANCE" and save_npy: 129 | np.save(self.instance_folder + filename + ".npy", data) 130 | if data_type == "SEMANTIC" and save_npy: 131 | np.save(self.semantic_folder + filename + ".npy", data) 132 | if display_rgb: 133 | image_data = np.frombuffer(data, dtype=np.uint8).reshape(*data.shape, -1) 134 | num_colors = 50 if data_type == "SEMANTIC" else None 135 | color_image = self.vis.colorize_segmentation(image_data, width, height, 3, num_colors) 136 | # color_image = visualize.colorize_instance(image_data) 137 | color_image_rgb = Image.fromarray(color_image, "RGB") 138 | if data_type == "INSTANCE": 139 | color_image_rgb.save(f"{self.instance_folder}/{filename}.png") 140 | if data_type == "SEMANTIC": 141 | color_image_rgb.save(f"{self.semantic_folder}/{filename}.png") 142 | 143 | def save_image(self, viewport_name, img_type, image_data, filename): 144 | self.rgb_folder = self.data_dir + "/" + str(viewport_name) + "/rgb/" 145 | self.depth_folder = self.data_dir + "/" + str(viewport_name) + "/depth/" 146 | if img_type == "RGB": 147 | # Save ground truth data locally as png 148 | rgb_img = Image.fromarray(image_data, "RGBA") 149 | rgb_img.save(f"{self.rgb_folder}/{filename}.png") 150 | elif img_type == "DEPTH": 151 | # Convert linear depth to inverse depth for better visualization 152 | image_data = image_data * 100 153 | image_data = np.reciprocal(image_data) 154 | # Save ground truth data locally as png 155 | image_data[image_data == 0.0] = 1e-5 156 | image_data = np.clip(image_data, 0, 255) 157 | image_data -= np.min(image_data) 158 | if np.max(image_data) > 0: 159 | image_data /= np.max(image_data) 160 | depth_img = Image.fromarray((image_data * 255.0).astype(np.uint8)) 161 | depth_img.save(f"{self.depth_folder}/{filename}.png") 162 | 163 | def save_bbox(self, viewport_name, data_type, data, filename, display_rgb=True, rgb_data=None, save_npy=True): 164 | self.bbox_2d_tight_folder = self.data_dir + "/" + str(viewport_name) + "/bbox_2d_tight/" 165 | self.bbox_2d_loose_folder = self.data_dir + "/" + str(viewport_name) + "/bbox_2d_loose/" 166 | # Save ground truth data locally as npy 167 | if data_type == "BBOX2DTIGHT" and save_npy: 168 | np.save(self.bbox_2d_tight_folder + filename + ".npy", data) 169 | if data_type == "BBOX2DLOOSE" and save_npy: 170 | np.save(self.bbox_2d_loose_folder + filename + ".npy", data) 171 | if display_rgb and rgb_data is not None: 172 | color_image = self.vis.colorize_bboxes(data, rgb_data) 173 | color_image_rgb = Image.fromarray(color_image, "RGBA") 174 | if data_type == "BBOX2DTIGHT": 175 | color_image_rgb.save(f"{self.bbox_2d_tight_folder}/{filename}.png") 176 | if data_type == "BBOX2DLOOSE": 177 | color_image_rgb.save(f"{self.bbox_2d_loose_folder}/{filename}.png") 178 | 179 | def create_output_folders(self, sensor_settings=None): 180 | """Checks if the sensor output folder corresponding to each viewport is created. If not, it creates them.""" 181 | if not os.path.exists(self.data_dir): 182 | os.mkdir(self.data_dir) 183 | if sensor_settings is None: 184 | sensor_settings = dict() 185 | viewports = self._viewport.get_instance_list() 186 | viewport_names = [self._viewport.get_viewport_window_name(vp) for vp in viewports] 187 | sensor_settings_viewport = { 188 | "rgb": {"enabled": True}, 189 | "depth": {"enabled": True, "colorize": True, "npy": True}, 190 | "instance": {"enabled": True, "colorize": True, "npy": True}, 191 | "semantic": {"enabled": True, "colorize": True, "npy": True}, 192 | "bbox_2d_tight": {"enabled": True, "colorize": True, "npy": True}, 193 | "bbox_2d_loose": {"enabled": True, "colorize": True, "npy": True}, 194 | "camera": {"enabled": True, "npy": True}, 195 | "poses": {"enabled": True, "npy": True}, 196 | } 197 | for name in viewport_names: 198 | sensor_settings[name] = copy.deepcopy(sensor_settings_viewport) 199 | 200 | for viewport_name in sensor_settings: 201 | viewport_folder = self.data_dir + "/" + str(viewport_name) 202 | if not os.path.exists(viewport_folder): 203 | os.mkdir(viewport_folder) 204 | for sensor_name in sensor_settings[viewport_name]: 205 | if sensor_settings[viewport_name][sensor_name]["enabled"]: 206 | sensor_folder = self.data_dir + "/" + str(viewport_name) + "/" + str(sensor_name) 207 | if not os.path.exists(sensor_folder): 208 | os.mkdir(sensor_folder) 209 | -------------------------------------------------------------------------------- /PlushSim/scripts/syntheticdata.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 3 | # 4 | # NVIDIA CORPORATION and its licensors retain all intellectual property 5 | # and proprietary rights in and to this software, related documentation 6 | # and any modifications thereto. Any use, reproduction, disclosure or 7 | # distribution of this software and related documentation without an express 8 | # license agreement from NVIDIA CORPORATION is strictly prohibited. 9 | 10 | 11 | """Helper class for obtaining groundtruth data from OmniKit. 12 | 13 | Support provided for RGB, Depth, Bounding Box (2D Tight, 2D Loose, 3D), 14 | segmentation (instance and semantic), and camera parameters. 15 | 16 | Typical usage example: 17 | 18 | kit = OmniKitHelper() # Start omniverse kit 19 | sd_helper = SyntheticDataHelper() 20 | gt = sd_helper.get_groundtruth(('rgb', 'depth', 'boundingBox2DTight')) 21 | 22 | """ 23 | 24 | import math 25 | import carb 26 | import omni 27 | import time 28 | from pxr import UsdGeom, Semantics, Gf 29 | 30 | import numpy as np 31 | 32 | 33 | class SyntheticDataHelper: 34 | def __init__(self): 35 | self.app = omni.kit.app.get_app_interface() 36 | ext_manager = self.app.get_extension_manager() 37 | ext_manager.set_extension_enabled("omni.syntheticdata", True) 38 | 39 | from omni.syntheticdata import sensors, helpers 40 | import omni.syntheticdata._syntheticdata as sd # Must be imported after getting app interface 41 | 42 | self.sd = sd 43 | 44 | self.sd_interface = self.sd.acquire_syntheticdata_interface() 45 | self.viewport = omni.kit.viewport.get_viewport_interface() 46 | self.carb_settings = carb.settings.acquire_settings_interface() 47 | self.sensor_helper_lib = sensors 48 | self.generic_helper_lib = helpers 49 | 50 | mode = "numpy" 51 | 52 | self.sensor_helpers = { 53 | "rgb": sensors.get_rgb, 54 | "depth": sensors.get_depth_linear, 55 | "depthLinear": self.get_depth_linear, 56 | "instanceSegmentation": sensors.get_instance_segmentation, 57 | "semanticSegmentation": self.get_semantic_segmentation, 58 | "boundingBox2DTight": sensors.get_bounding_box_2d_tight, 59 | "boundingBox2DLoose": sensors.get_bounding_box_2d_loose, 60 | "boundingBox3D": sensors.get_bounding_box_3d, 61 | "camera": self.get_camera_params, 62 | "pose": self.get_pose, 63 | } 64 | 65 | self.sensor_types = { 66 | "rgb": self.sd.SensorType.Rgb, 67 | "depth": self.sd.SensorType.DepthLinear, 68 | "depthLinear": self.sd.SensorType.DepthLinear, 69 | "instanceSegmentation": self.sd.SensorType.InstanceSegmentation, 70 | "semanticSegmentation": self.sd.SensorType.SemanticSegmentation, 71 | "boundingBox2DTight": self.sd.SensorType.BoundingBox2DTight, 72 | "boundingBox2DLoose": self.sd.SensorType.BoundingBox2DLoose, 73 | "boundingBox3D": self.sd.SensorType.BoundingBox3D, 74 | } 75 | 76 | self.sensor_state = {s: False for s in list(self.sensor_helpers.keys())} 77 | 78 | def get_depth_linear(self, viewport): 79 | """ Get Depth Linear sensor output. 80 | 81 | Args: 82 | viewport (omni.kit.viewport._viewport.IViewportWindow): Viewport from which to retrieve/create sensor. 83 | 84 | Return: 85 | (numpy.ndarray): A float32 array of shape (height, width, 1). 86 | """ 87 | sensor = self.sensor_helper_lib.create_or_retrieve_sensor(viewport, self.sd.SensorType.DepthLinear) 88 | data = self.sd_interface.get_sensor_host_float_texture_array(sensor) 89 | h, w = data.shape[:2] 90 | return np.frombuffer(data, np.float32).reshape(h, w, -1) 91 | 92 | def get_semantic_segmentation(self, viewport): 93 | instance_data, instance_mappings = self.sensor_helpers['instanceSegmentation'](viewport, return_mapping=True) 94 | ins_to_sem = np.zeros(np.max(instance_data)+1,dtype=np.uint8) 95 | for im in instance_mappings[::-1]: 96 | for i in im["instanceIds"]: 97 | if i >= len(ins_to_sem): 98 | continue 99 | ins_to_sem[i] = 1 #if im['semanticLabel'] == 'teddy' else 2 100 | return np.take(ins_to_sem, instance_data) 101 | 102 | 103 | def get_camera_params(self, viewport): 104 | """Get active camera intrinsic and extrinsic parameters. 105 | 106 | Returns: 107 | A dict of the active camera's parameters. 108 | 109 | pose (numpy.ndarray): camera position in world coordinates, 110 | fov (float): horizontal field of view in radians 111 | focal_length (float) 112 | horizontal_aperture (float) 113 | view_projection_matrix (numpy.ndarray(dtype=float64, shape=(4, 4))) 114 | resolution (dict): resolution as a dict with 'width' and 'height'. 115 | clipping_range (tuple(float, float)): Near and Far clipping values. 116 | """ 117 | stage = omni.usd.get_context().get_stage() 118 | prim = stage.GetPrimAtPath(viewport.get_active_camera()) 119 | prim_tf = UsdGeom.Camera(prim).GetLocalTransformation() 120 | focal_length = prim.GetAttribute("focalLength").Get() 121 | horiz_aperture = prim.GetAttribute("horizontalAperture").Get() 122 | fov = 2 * math.atan(horiz_aperture / (2 * focal_length)) 123 | x_min, y_min, x_max, y_max = viewport.get_viewport_rect() 124 | width, height = x_max - x_min, y_max - y_min 125 | aspect_ratio = width / height 126 | near, far = prim.GetAttribute("clippingRange").Get() 127 | view_proj_mat = self.generic_helper_lib.get_view_proj_mat(prim, aspect_ratio, near, far) 128 | 129 | return { 130 | "pose": np.array(prim_tf), 131 | "fov": fov, 132 | "focal_length": focal_length, 133 | "horizontal_aperture": horiz_aperture, 134 | "view_projection_matrix": view_proj_mat, 135 | "resolution": {"width": width, "height": height}, 136 | "clipping_range": (near, far), 137 | } 138 | 139 | def get_pose(self): 140 | """Get pose of all objects with a semantic label. 141 | """ 142 | stage = omni.usd.get_context().get_stage() 143 | mappings = self.generic_helper_lib.get_instance_mappings() 144 | pose = [] 145 | for m in mappings: 146 | prim_path = m[0] 147 | prim = stage.GetPrimAtPath(prim_path) 148 | prim_tf = UsdGeom.Xformable(prim).ComputeLocalToWorldTransform(0.0) 149 | pose.append((str(prim_path), m[1], str(m[2]), np.array(prim_tf))) 150 | return pose 151 | 152 | async def initialize_async(self, viewport, sensor_types, timeout=10): 153 | """ Initialize sensors in the list provided. 154 | 155 | 156 | Args: 157 | viewport (omni.kit.viewport._viewport.IViewportWindow): Viewport from which to retrieve/create sensor. 158 | sensor_types (list of omni.syntheticdata._syntheticdata.SensorType): List of sensor types to initialize. 159 | timeout (int): Maximum time in seconds to attempt to initialize sensors. 160 | """ 161 | start = time.time() 162 | is_initialized = False 163 | while not is_initialized and time.time() < (start + timeout): 164 | sensors = [] 165 | for sensor_type in sensor_types: 166 | sensors.append(self.sensor_helper_lib.create_or_retrieve_sensor(viewport, sensor_type)) 167 | await omni.kit.app.get_app_interface().next_update_async() 168 | is_initialized = not any([not self.sd_interface.is_sensor_initialized(s) for s in sensors]) 169 | if not is_initialized: 170 | unititialized = [s for s in sensors if not self.sd_interface.is_sensor_initialized(s)] 171 | raise TimeoutError(f"Unable to initialized sensors: [{unititialized}] within {timeout} seconds.") 172 | 173 | await omni.kit.app.get_app_interface().next_update_async() # Extra frame required to prevent access violation error 174 | 175 | def get_groundtruth(self, gt_sensors, viewport, verify_sensor_init=True): 176 | """Get groundtruth from specified gt_sensors. 177 | 178 | Args: 179 | gt_sensors (list): List of strings of sensor names. Valid sensors names: rgb, depth, 180 | instanceSegmentation, semanticSegmentation, boundingBox2DTight, 181 | boundingBox2DLoose, boundingBox3D, camera 182 | viewport (omni.kit.viewport._viewport.IViewportWindow): Viewport from which to retrieve/create sensor. 183 | verify_sensor_init (bool): Additional check to verify creation and initialization of sensors. 184 | 185 | Returns: 186 | Dict of sensor outputs 187 | """ 188 | if isinstance(gt_sensors, str): 189 | gt_sensors = (gt_sensors,) 190 | 191 | # Create and initialize sensors 192 | while verify_sensor_init: 193 | flag = 0 194 | # Render frame 195 | self.app.update() 196 | for sensor_name in gt_sensors: 197 | if sensor_name != "camera" and sensor_name != "pose": 198 | current_sensor = self.sensor_helper_lib.create_or_retrieve_sensor( 199 | viewport, self.sensor_types[sensor_name] 200 | ) 201 | if not self.sd_interface.is_sensor_initialized(current_sensor): 202 | flag = 1 203 | # Render frame 204 | self.app.update() 205 | self.app.update() 206 | if flag == 0: 207 | break 208 | 209 | gt = {} 210 | sensor_state = {} 211 | # Process non-RT-only sensors 212 | for sensor in gt_sensors: 213 | if sensor not in ["camera", "pose"]: 214 | if sensor == "instanceSegmentation": 215 | gt[sensor] = self.sensor_helpers[sensor](viewport, parsed=True, return_mapping=True) 216 | elif sensor == "boundingBox3D": 217 | gt[sensor] = self.sensor_helpers[sensor](viewport, parsed=True, return_corners=True) 218 | else: 219 | gt[sensor] = self.sensor_helpers[sensor](viewport) 220 | current_sensor = self.sensor_helper_lib.create_or_retrieve_sensor(viewport, self.sensor_types[sensor]) 221 | current_sensor_state = self.sd_interface.is_sensor_initialized(current_sensor) 222 | sensor_state[sensor] = current_sensor_state 223 | else: 224 | gt[sensor] = self.sensor_helpers[sensor](viewport) 225 | gt["state"] = sensor_state 226 | 227 | return gt 228 | 229 | -------------------------------------------------------------------------------- /ACID/plush_train.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.optim as optim 3 | from tensorboardX import SummaryWriter 4 | import matplotlib; matplotlib.use('Agg') 5 | import numpy as np 6 | import os 7 | import argparse 8 | import time, datetime 9 | from src import config, data 10 | from src.checkpoints import CheckpointIO 11 | from collections import defaultdict 12 | import shutil 13 | from matplotlib.backends.backend_agg import FigureCanvasAgg as FigureCanvas 14 | from src.utils import common_util 15 | import matplotlib.pyplot as plt 16 | from PIL import Image 17 | 18 | # Arguments 19 | parser = argparse.ArgumentParser( 20 | description='Train a Plush Env dynamics model.' 21 | ) 22 | parser.add_argument('config', type=str, help='Path to config file.') 23 | parser.add_argument('--no-cuda', action='store_true', help='Do not use cuda.') 24 | parser.add_argument('--exit-after', type=int, default=-1, 25 | help='Checkpoint and exit after specified number of seconds' 26 | 'with exit code 2.') 27 | parser.add_argument('--debug', action='store_true', help='debugging') 28 | parser.add_argument('--eval_only', action='store_true', help='run eval only') 29 | 30 | args = parser.parse_args() 31 | cfg = config.load_config(args.config, 'configs/default.yaml') 32 | is_cuda = (torch.cuda.is_available() and not args.no_cuda) 33 | device = torch.device("cuda" if is_cuda else "cpu") 34 | # Set t0 35 | t0 = time.time() 36 | 37 | # Shorthands 38 | out_dir = cfg['training']['out_dir'] 39 | if args.debug: 40 | cfg['training']['batch_size'] = 2 41 | cfg['training']['vis_n_outputs'] = 1 42 | cfg['training']['print_every'] = 1 43 | cfg['training']['backup_every'] = 1 44 | cfg['training']['validate_every'] = 1 45 | cfg['training']['visualize_every'] = 1 46 | cfg['training']['checkpoint_every'] = 1 47 | cfg['training']['visualize_total'] = 1 48 | 49 | batch_size = cfg['training']['batch_size'] 50 | backup_every = cfg['training']['backup_every'] 51 | vis_n_outputs = cfg['generation']['vis_n_outputs'] 52 | exit_after = args.exit_after 53 | 54 | model_selection_metric = cfg['training']['model_selection_metric'] 55 | if cfg['training']['model_selection_mode'] == 'maximize': 56 | model_selection_sign = 1 57 | elif cfg['training']['model_selection_mode'] == 'minimize': 58 | model_selection_sign = -1 59 | else: 60 | raise ValueError('model_selection_mode must be ' 61 | 'either maximize or minimize.') 62 | 63 | # Output directory 64 | if not os.path.exists(out_dir): 65 | os.makedirs(out_dir) 66 | 67 | shutil.copyfile(args.config, os.path.join(out_dir, 'config.yaml')) 68 | 69 | # Dataset 70 | train_loader = data.core.get_plush_loader(cfg, cfg['model']['type'], split='train') 71 | val_loader = data.core.get_plush_loader(cfg, cfg['model']['type'], split='test') 72 | 73 | # Model 74 | model = config.get_model(cfg, device=device) 75 | 76 | # Generator 77 | generator = config.get_generator(model, cfg, device=device) 78 | 79 | # Intialize training 80 | optimizer = optim.Adam(model.parameters(), lr=1e-3, weight_decay=1e-4) 81 | trainer = config.get_trainer(model, optimizer, cfg, device=device) 82 | 83 | checkpoint_io = CheckpointIO(out_dir, model=model, optimizer=optimizer) 84 | try: 85 | load_dict = checkpoint_io.load('model_best.pt') 86 | except FileExistsError: 87 | load_dict = dict() 88 | epoch_it = load_dict.get('epoch_it', 0) 89 | it = load_dict.get('it', 0) 90 | metric_val_best = load_dict.get( 91 | 'loss_val_best', -model_selection_sign * np.inf) 92 | 93 | if metric_val_best == np.inf or metric_val_best == -np.inf: 94 | metric_val_best = -model_selection_sign * np.inf 95 | print('Current best validation metric (%s): %.8f' 96 | % (model_selection_metric, metric_val_best)) 97 | logger = SummaryWriter(os.path.join(out_dir, 'logs')) 98 | 99 | # Shorthands 100 | print_every = cfg['training']['print_every'] 101 | checkpoint_every = cfg['training']['checkpoint_every'] 102 | validate_every = cfg['training']['validate_every'] 103 | visualize_every = cfg['training']['visualize_every'] 104 | 105 | # Print model 106 | nparameters = sum(p.numel() for p in model.parameters()) 107 | print('Total number of parameters: %d' % nparameters) 108 | 109 | print('output path: ', cfg['training']['out_dir']) 110 | 111 | # For visualizations 112 | data_vis_list = [] 113 | if cfg['model']['type'] == 'geom': 114 | vis_dataset = data.core.get_geom_dataset(cfg, split='vis') 115 | elif cfg['model']['type'] == 'combined': 116 | vis_dataset = data.core.get_combined_dataset(cfg, split='vis') 117 | # Build a data dictionary for visualization 118 | np.random.seed(0) 119 | data_idxes = np.random.randint(len(vis_dataset), size=cfg['training']['visualize_total']) 120 | for i, id in enumerate(data_idxes): 121 | data_vis = data.core.collate_pair_fn([vis_dataset[id]]) 122 | data_vis_list.append({'it': i, 'data': data_vis}) 123 | 124 | 125 | if args.eval_only: 126 | eval_dict, figs = trainer.evaluate(val_loader) 127 | metric_val = eval_dict[model_selection_metric] 128 | for k, v in eval_dict.items(): 129 | print(f"metric {k}: {v}") 130 | print('Validation metric (%s): %.4f' 131 | % (model_selection_metric, metric_val)) 132 | for k,v in figs.items(): 133 | fig_path = os.path.join(out_dir, 'vis', f"{k}_eval_best.png") 134 | v.savefig(fig_path) 135 | for data_vis in data_vis_list: 136 | out = generator.generate_mesh(data_vis['data']) 137 | # Get statistics 138 | try: 139 | mesh, stats_dict = out 140 | except TypeError: 141 | mesh, stats_dict = out, {} 142 | mesh.export(os.path.join(out_dir, 'vis', f"best_{data_vis['it']}.off")) 143 | out2 = generator.generate_pointcloud(data_vis['data']) 144 | for i,pcloud in enumerate(out2): 145 | ipath = os.path.join(out_dir, 'vis', f"best_{data_vis['it']}_{i}.obj") 146 | common_util.write_pointcoud_as_obj(ipath, pcloud) 147 | pcloud_dict = [{"title":'source'if i == 0 else 'target', 148 | "pts": p[:,:3], 149 | "col": None if p.shape[1] == 3 else p[:,3:] 150 | } for i,p in enumerate(out2)] 151 | fig = common_util.side_by_side_point_clouds(pcloud_dict) 152 | width, height = fig.get_size_inches() * fig.get_dpi() 153 | canvas = FigureCanvas(fig) 154 | canvas.draw() 155 | img_path = os.path.join(out_dir, 'vis', f"best_{data_vis['it']}.png") 156 | Image.fromarray( 157 | np.frombuffer( 158 | canvas.tostring_rgb(), 159 | dtype='uint8').reshape(int(height), int(width), 3)).save( 160 | img_path 161 | ) 162 | plt.close(fig) 163 | quit() 164 | 165 | 166 | while True: 167 | epoch_it += 1 168 | 169 | for batch in train_loader: 170 | it += 1 171 | losses = trainer.train_step(batch, it) 172 | for k,v in losses.items(): 173 | logger.add_scalar(f'train/{k}_loss', v, it) 174 | 175 | # Print output 176 | if (it % print_every) == 0: 177 | t = datetime.datetime.now() 178 | print_str = f"[Epoch {epoch_it:04d}] it={it:04d}, time: {time.time()-t0:.3f}, " 179 | print_str += f"{t.hour:02d}:{t.minute:02d}, " 180 | for k,v in losses.items(): 181 | print_str += f"{k}:{v:.4f}, " 182 | print(print_str) 183 | 184 | # Save checkpoint 185 | if (checkpoint_every > 0 and (it % checkpoint_every) == 0): 186 | print('Saving checkpoint') 187 | checkpoint_io.save('model.pt', epoch_it=epoch_it, it=it, 188 | loss_val_best=metric_val_best) 189 | 190 | # Backup if necessary 191 | if (backup_every > 0 and (it % backup_every) == 0): 192 | print('Backup checkpoint') 193 | checkpoint_io.save('model_%d.pt' % it, epoch_it=epoch_it, it=it, 194 | loss_val_best=metric_val_best) 195 | # Run validation 196 | if validate_every > 0 and (it % validate_every) == 0: 197 | print('Running Validation') 198 | eval_dict, figs = trainer.evaluate(val_loader) 199 | for k,v in figs.items(): 200 | fig_path = os.path.join(out_dir, 'vis', f"{k}_{it}.png") 201 | v.savefig(fig_path) 202 | logger.add_figure(k, v, it) 203 | metric_val = eval_dict[model_selection_metric] 204 | print('Validation metric (%s): %.4f' 205 | % (model_selection_metric, metric_val)) 206 | 207 | for k, v in eval_dict.items(): 208 | print(f"metric {k}: {v}") 209 | logger.add_scalar('val/%s' % k, v, it) 210 | 211 | if model_selection_sign * (metric_val - metric_val_best) > 0: 212 | metric_val_best = metric_val 213 | print('New best model (loss %.4f)' % metric_val_best) 214 | checkpoint_io.save('model_best.pt', epoch_it=epoch_it, it=it, 215 | loss_val_best=metric_val_best) 216 | 217 | # Visualize output 218 | if visualize_every > 0 and (it % visualize_every) == 0: 219 | print('Visualizing') 220 | renders = [] 221 | for data_vis in data_vis_list: 222 | out = generator.generate_mesh(data_vis['data']) 223 | # Get statistics 224 | try: 225 | mesh, stats_dict = out 226 | except TypeError: 227 | mesh, stats_dict = out, {} 228 | mesh.export(os.path.join(out_dir, 'vis', '{}_{}.off'.format(it, data_vis['it']))) 229 | out2 = generator.generate_pointcloud(data_vis['data']) 230 | for i,pcloud in enumerate(out2): 231 | ipath = os.path.join(out_dir, 'vis', f"{it}_{data_vis['it']}_{i}.obj") 232 | common_util.write_pointcoud_as_obj(ipath, pcloud) 233 | name_dict = ['source', 'target', 'source_rollout', 'target_rollout'] 234 | pcloud_dict = [{"title":name_dict[i], 235 | "pts": p[:,:3], 236 | "col": None if p.shape[1] == 3 else p[:,3:] 237 | } for i,p in enumerate(out2)] 238 | fig = common_util.side_by_side_point_clouds(pcloud_dict) 239 | width, height = fig.get_size_inches() * fig.get_dpi() 240 | canvas = FigureCanvas(fig) 241 | canvas.draw() 242 | img_path = os.path.join(out_dir, 'vis', f"{it}_{data_vis['it']}.png") 243 | Image.fromarray( 244 | np.frombuffer( 245 | canvas.tostring_rgb(), 246 | dtype='uint8').reshape(int(height), int(width), 3)).save( 247 | img_path 248 | ) 249 | plt.close(fig) 250 | 251 | # Exit if necessary 252 | if exit_after > 0 and (time.time() - t0) >= exit_after: 253 | print('Time limit reached. Exiting.') 254 | checkpoint_io.save('model.pt', epoch_it=epoch_it, it=it, 255 | loss_val_best=metric_val_best) 256 | exit(3) 257 | -------------------------------------------------------------------------------- /ACID/src/common.py: -------------------------------------------------------------------------------- 1 | # import multiprocessing 2 | import torch 3 | import numpy as np 4 | import math 5 | 6 | import numpy as np 7 | def compute_iou(occ1, occ2): 8 | ''' Computes the Intersection over Union (IoU) value for two sets of 9 | occupancy values. 10 | 11 | Args: 12 | occ1 (tensor): first set of occupancy values 13 | occ2 (tensor): second set of occupancy values 14 | ''' 15 | occ1 = np.asarray(occ1) 16 | occ2 = np.asarray(occ2) 17 | 18 | # Put all data in second dimension 19 | # Also works for 1-dimensional data 20 | if occ1.ndim >= 2: 21 | occ1 = occ1.reshape(occ1.shape[0], -1) 22 | if occ2.ndim >= 2: 23 | occ2 = occ2.reshape(occ2.shape[0], -1) 24 | 25 | # Convert to boolean values 26 | occ1 = (occ1 >= 0.5) 27 | occ2 = (occ2 >= 0.5) 28 | 29 | # Compute IOU 30 | area_union = (occ1 | occ2).astype(np.float32).sum(axis=-1) 31 | area_intersect = (occ1 & occ2).astype(np.float32).sum(axis=-1) 32 | 33 | iou = (area_intersect / area_union) 34 | 35 | return iou 36 | 37 | 38 | def chamfer_distance(points1, points2, give_id=False): 39 | ''' Returns the chamfer distance for the sets of points. 40 | 41 | Args: 42 | points1 (numpy array): first point set 43 | points2 (numpy array): second point set 44 | use_kdtree (bool): whether to use a kdtree 45 | give_id (bool): whether to return the IDs of nearest points 46 | ''' 47 | return chamfer_distance_naive(points1, points2) 48 | 49 | 50 | def chamfer_distance_naive(points1, points2): 51 | ''' Naive implementation of the Chamfer distance. 52 | 53 | Args: 54 | points1 (numpy array): first point set 55 | points2 (numpy array): second point set 56 | ''' 57 | assert(points1.size() == points2.size()) 58 | batch_size, T, _ = points1.size() 59 | 60 | points1 = points1.view(batch_size, T, 1, 3) 61 | points2 = points2.view(batch_size, 1, T, 3) 62 | 63 | distances = (points1 - points2).pow(2).sum(-1) 64 | 65 | chamfer1 = distances.min(dim=1)[0].mean(dim=1) 66 | chamfer2 = distances.min(dim=2)[0].mean(dim=1) 67 | 68 | chamfer = chamfer1 + chamfer2 69 | return chamfer 70 | 71 | def make_3d_grid(bb_min, bb_max, shape): 72 | ''' Makes a 3D grid. 73 | 74 | Args: 75 | bb_min (tuple): bounding box minimum 76 | bb_max (tuple): bounding box maximum 77 | shape (tuple): output shape 78 | ''' 79 | size = shape[0] * shape[1] * shape[2] 80 | 81 | pxs = torch.linspace(bb_min[0], bb_max[0], shape[0]) 82 | pys = torch.linspace(bb_min[1], bb_max[1], shape[1]) 83 | pzs = torch.linspace(bb_min[2], bb_max[2], shape[2]) 84 | 85 | pxs = pxs.view(-1, 1, 1).expand(*shape).contiguous().view(size) 86 | pys = pys.view(1, -1, 1).expand(*shape).contiguous().view(size) 87 | pzs = pzs.view(1, 1, -1).expand(*shape).contiguous().view(size) 88 | p = torch.stack([pxs, pys, pzs], dim=1) 89 | 90 | return p 91 | 92 | 93 | def transform_points(points, transform): 94 | ''' Transforms points with regard to passed camera information. 95 | 96 | Args: 97 | points (tensor): points tensor 98 | transform (tensor): transformation matrices 99 | ''' 100 | assert(points.size(2) == 3) 101 | assert(transform.size(1) == 3) 102 | assert(points.size(0) == transform.size(0)) 103 | 104 | if transform.size(2) == 4: 105 | R = transform[:, :, :3] 106 | t = transform[:, :, 3:] 107 | points_out = points @ R.transpose(1, 2) + t.transpose(1, 2) 108 | elif transform.size(2) == 3: 109 | K = transform 110 | points_out = points @ K.transpose(1, 2) 111 | 112 | return points_out 113 | 114 | 115 | def b_inv(b_mat): 116 | ''' Performs batch matrix inversion. 117 | 118 | Arguments: 119 | b_mat: the batch of matrices that should be inverted 120 | ''' 121 | 122 | eye = b_mat.new_ones(b_mat.size(-1)).diag().expand_as(b_mat) 123 | b_inv, _ = torch.gesv(eye, b_mat) 124 | return b_inv 125 | 126 | def project_to_camera(points, transform): 127 | ''' Projects points to the camera plane. 128 | 129 | Args: 130 | points (tensor): points tensor 131 | transform (tensor): transformation matrices 132 | ''' 133 | p_camera = transform_points(points, transform) 134 | p_camera = p_camera[..., :2] / p_camera[..., 2:] 135 | return p_camera 136 | 137 | 138 | def fix_Rt_camera(Rt, loc, scale): 139 | ''' Fixes Rt camera matrix. 140 | 141 | Args: 142 | Rt (tensor): Rt camera matrix 143 | loc (tensor): location 144 | scale (float): scale 145 | ''' 146 | # Rt is B x 3 x 4 147 | # loc is B x 3 and scale is B 148 | batch_size = Rt.size(0) 149 | R = Rt[:, :, :3] 150 | t = Rt[:, :, 3:] 151 | 152 | scale = scale.view(batch_size, 1, 1) 153 | R_new = R * scale 154 | t_new = t + R @ loc.unsqueeze(2) 155 | 156 | Rt_new = torch.cat([R_new, t_new], dim=2) 157 | 158 | assert(Rt_new.size() == (batch_size, 3, 4)) 159 | return Rt_new 160 | 161 | def normalize_coordinate(p, padding=0.1, plane='xz'): 162 | ''' Normalize coordinate to [0, 1] for unit cube experiments 163 | 164 | Args: 165 | p (tensor): point 166 | padding (float): conventional padding paramter of ONet for unit cube, so [-0.5, 0.5] -> [-0.55, 0.55] 167 | plane (str): plane feature type, ['xz', 'xy', 'yz'] 168 | ''' 169 | if plane == 'xz': 170 | xy = p[:, :, [0, 2]] 171 | elif plane =='xy': 172 | xy = p[:, :, [0, 1]] 173 | else: 174 | xy = p[:, :, [1, 2]] 175 | 176 | xy_new = xy / (1 + padding + 10e-6) # (-0.5, 0.5) 177 | xy_new = xy_new + 0.5 # range (0, 1) 178 | 179 | # f there are outliers out of the range 180 | if xy_new.max() >= 1: 181 | xy_new[xy_new >= 1] = 1 - 10e-6 182 | if xy_new.min() < 0: 183 | xy_new[xy_new < 0] = 0.0 184 | return xy_new 185 | 186 | def normalize_3d_coordinate(p, padding=0.1): 187 | ''' Normalize coordinate to [0, 1] for unit cube experiments. 188 | Corresponds to our 3D model 189 | 190 | Args: 191 | p (tensor): point 192 | padding (float): conventional padding paramter of ONet for unit cube, so [-0.5, 0.5] -> [-0.55, 0.55] 193 | ''' 194 | 195 | p_nor = p / (1 + padding + 10e-4) # (-0.5, 0.5) 196 | p_nor = p_nor + 0.5 # range (0, 1) 197 | # f there are outliers out of the range 198 | if p_nor.max() >= 1: 199 | p_nor[p_nor >= 1] = 1 - 10e-4 200 | if p_nor.min() < 0: 201 | p_nor[p_nor < 0] = 0.0 202 | return p_nor 203 | 204 | def normalize_coord(p, vol_range, plane='xz'): 205 | ''' Normalize coordinate to [0, 1] for sliding-window experiments 206 | 207 | Args: 208 | p (tensor): point 209 | vol_range (numpy array): volume boundary 210 | plane (str): feature type, ['xz', 'xy', 'yz'] - canonical planes; ['grid'] - grid volume 211 | ''' 212 | p[:, 0] = (p[:, 0] - vol_range[0][0]) / (vol_range[1][0] - vol_range[0][0]) 213 | p[:, 1] = (p[:, 1] - vol_range[0][1]) / (vol_range[1][1] - vol_range[0][1]) 214 | p[:, 2] = (p[:, 2] - vol_range[0][2]) / (vol_range[1][2] - vol_range[0][2]) 215 | 216 | if plane == 'xz': 217 | x = p[:, [0, 2]] 218 | elif plane =='xy': 219 | x = p[:, [0, 1]] 220 | elif plane =='yz': 221 | x = p[:, [1, 2]] 222 | else: 223 | x = p 224 | return x 225 | 226 | def coordinate2index(x, reso, coord_type='2d'): 227 | ''' Normalize coordinate to [0, 1] for unit cube experiments. 228 | Corresponds to our 3D model 229 | 230 | Args: 231 | x (tensor): coordinate 232 | reso (int): defined resolution 233 | coord_type (str): coordinate type 234 | ''' 235 | x = (x * reso).long() 236 | if coord_type == '2d': # plane 237 | index = x[:, :, 0] + reso * x[:, :, 1] 238 | elif coord_type == '3d': # grid 239 | index = x[:, :, 0] + reso * (x[:, :, 1] + reso * x[:, :, 2]) 240 | index = index[:, None, :] 241 | return index 242 | 243 | def coord2index(p, vol_range, reso=None, plane='xz'): 244 | ''' Normalize coordinate to [0, 1] for sliding-window experiments. 245 | Corresponds to our 3D model 246 | 247 | Args: 248 | p (tensor): points 249 | vol_range (numpy array): volume boundary 250 | reso (int): defined resolution 251 | plane (str): feature type, ['xz', 'xy', 'yz'] - canonical planes; ['grid'] - grid volume 252 | ''' 253 | # normalize to [0, 1] 254 | x = normalize_coord(p, vol_range, plane=plane) 255 | 256 | if isinstance(x, np.ndarray): 257 | x = np.floor(x * reso).astype(int) 258 | else: #* pytorch tensor 259 | x = (x * reso).long() 260 | 261 | if x.shape[1] == 2: 262 | index = x[:, 0] + reso * x[:, 1] 263 | index[index > reso**2] = reso**2 264 | elif x.shape[1] == 3: 265 | index = x[:, 0] + reso * (x[:, 1] + reso * x[:, 2]) 266 | index[index > reso**3] = reso**3 267 | 268 | return index[None] 269 | 270 | def update_reso(reso, depth): 271 | ''' Update the defined resolution so that UNet can process. 272 | 273 | Args: 274 | reso (int): defined resolution 275 | depth (int): U-Net number of layers 276 | ''' 277 | base = 2**(int(depth) - 1) 278 | if ~(reso / base).is_integer(): # when this is not integer, U-Net dimension error 279 | for i in range(base): 280 | if ((reso + i) / base).is_integer(): 281 | reso = reso + i 282 | break 283 | return reso 284 | 285 | def decide_total_volume_range(query_vol_metric, recep_field, unit_size, unet_depth): 286 | ''' Update the defined resolution so that UNet can process. 287 | 288 | Args: 289 | query_vol_metric (numpy array): query volume size 290 | recep_field (int): defined the receptive field for U-Net 291 | unit_size (float): the defined voxel size 292 | unet_depth (int): U-Net number of layers 293 | ''' 294 | reso = query_vol_metric / unit_size + recep_field - 1 295 | reso = update_reso(int(reso), unet_depth) # make sure input reso can be processed by UNet 296 | input_vol_metric = reso * unit_size 297 | p_c = np.array([0.0, 0.0, 0.0]).astype(np.float32) 298 | lb_input_vol, ub_input_vol = p_c - input_vol_metric/2, p_c + input_vol_metric/2 299 | lb_query_vol, ub_query_vol = p_c - query_vol_metric/2, p_c + query_vol_metric/2 300 | input_vol = [lb_input_vol, ub_input_vol] 301 | query_vol = [lb_query_vol, ub_query_vol] 302 | 303 | # handle the case when resolution is too large 304 | if reso > 10000: 305 | reso = 1 306 | 307 | return input_vol, query_vol, reso 308 | 309 | def add_key(base, new, base_name, new_name, device=None): 310 | ''' Add new keys to the given input 311 | 312 | Args: 313 | base (tensor): inputs 314 | new (tensor): new info for the inputs 315 | base_name (str): name for the input 316 | new_name (str): name for the new info 317 | device (device): pytorch device 318 | ''' 319 | if (new is not None) and (isinstance(new, dict)): 320 | if device is not None: 321 | for key in new.keys(): 322 | new[key] = new[key].to(device) 323 | base = {base_name: base, 324 | new_name: new} 325 | return base 326 | 327 | class map2local(object): 328 | ''' Add new keys to the given input 329 | 330 | Args: 331 | s (float): the defined voxel size 332 | pos_encoding (str): method for the positional encoding, linear|sin_cos 333 | ''' 334 | def __init__(self, s, pos_encoding='linear'): 335 | super().__init__() 336 | self.s = s 337 | self.pe = positional_encoding(basis_function=pos_encoding) 338 | 339 | def __call__(self, p): 340 | p = torch.remainder(p, self.s) / self.s # always possitive 341 | # p = torch.fmod(p, self.s) / self.s # same sign as input p! 342 | p = self.pe(p) 343 | return p 344 | 345 | class positional_encoding(object): 346 | ''' Positional Encoding (presented in NeRF) 347 | 348 | Args: 349 | basis_function (str): basis function 350 | ''' 351 | def __init__(self, basis_function='sin_cos'): 352 | super().__init__() 353 | self.func = basis_function 354 | 355 | L = 10 356 | freq_bands = 2.**(np.linspace(0, L-1, L)) 357 | self.freq_bands = freq_bands * math.pi 358 | 359 | def __call__(self, p): 360 | if self.func == 'sin_cos': 361 | out = [] 362 | p = 2.0 * p - 1.0 # chagne to the range [-1, 1] 363 | for freq in self.freq_bands: 364 | out.append(torch.sin(freq * p)) 365 | out.append(torch.cos(freq * p)) 366 | p = torch.cat(out, dim=2) 367 | return p 368 | -------------------------------------------------------------------------------- /ACID/src/utils/common_util.py: -------------------------------------------------------------------------------- 1 | import os 2 | import glob 3 | import json 4 | import scipy 5 | import itertools 6 | import numpy as np 7 | from PIL import Image 8 | from scipy.spatial.transform import Rotation 9 | from sklearn.neighbors import NearestNeighbors 10 | 11 | from sklearn.manifold import TSNE 12 | from matplotlib import pyplot as plt 13 | 14 | 15 | def get_color_map(x): 16 | colours = plt.cm.Spectral(x) 17 | return colours[:, :3] 18 | 19 | def embed_tsne(data): 20 | """ 21 | N x D np.array data 22 | """ 23 | tsne = TSNE(n_components=1, verbose=0, perplexity=40, n_iter=300, random_state=0) 24 | tsne_results = tsne.fit_transform(data) 25 | tsne_results = np.squeeze(tsne_results) 26 | tsne_min = np.min(tsne_results) 27 | tsne_max = np.max(tsne_results) 28 | return (tsne_results - tsne_min) / (tsne_max - tsne_min) 29 | 30 | 31 | ######################################################################## 32 | # Viewpoint transform 33 | ######################################################################## 34 | view_to_order = { 35 | 'cam0': ('X', 'Y', 'Z'), 36 | 'cam1': ('-Z', 'Y', 'X'), 37 | 'cam2': ('Z', 'Y', '-X'), 38 | 'cam3': ('-X', 'Y', '-Z'), 39 | } 40 | def get_axis_pt(val, x, y, z): 41 | multiplier = -1 if '-' in val else 1 42 | if "X" in val: 43 | return x * multiplier 44 | elif "Y" in val: 45 | return y * multiplier 46 | elif "Z" in val: 47 | return z * multiplier 48 | 49 | def world_coord_view_augmentation(view, pts): 50 | order = view_to_order[view] 51 | pts = pts.reshape([-1,3]) 52 | x,y,z = np.moveaxis(pts, 1, 0) 53 | return np.array([get_axis_pt(o,x,y,z) for o in order]).T 54 | 55 | ######################################################################## 56 | # partial observation projection / transform / rendering utilities 57 | ######################################################################## 58 | def transform_points_cam_to_world(cam_pts, camera_pose): 59 | world_pts = np.transpose( 60 | np.dot(camera_pose[0:3, 0:3], np.transpose(cam_pts)) + np.tile(camera_pose[0:3, 3:], (1, cam_pts.shape[0]))) 61 | return world_pts 62 | 63 | def transform_points_world_to_cam(world_points, cam_extr): 64 | return np.transpose( 65 | np.dot( 66 | np.linalg.inv( 67 | cam_extr[0:3, 0:3]), 68 | np.transpose(world_points) 69 | - np.tile(cam_extr[0:3, 3:], (1, world_points.shape[0])))) 70 | 71 | def render_points_slowest(world_points, cam_extr, cam_intr): 72 | cam_points = transform_points_world_to_cam(world_points, cam_extr) 73 | cam_pts_x = cam_points[:,0] 74 | cam_pts_y = cam_points[:,1] 75 | cam_pts_z = cam_points[:,2] 76 | cam_pts_x = -cam_pts_x / cam_pts_z * cam_intr[0,0] + cam_intr[1,2] 77 | cam_pts_y = cam_pts_y / cam_pts_z * cam_intr[1,1] + cam_intr[0,2] 78 | cam_pts_x = np.rint(cam_pts_x).astype(int) 79 | cam_pts_y = np.rint(cam_pts_y).astype(int) 80 | points = np.stack([cam_pts_y, cam_pts_x, cam_pts_z, np.arange(len(cam_pts_x))]).T 81 | sorted_pts = sorted(points, key=lambda x: (x[0], x[1])) 82 | grouped_pts = [[*j] for i, j in itertools.groupby( 83 | sorted_pts, 84 | key=lambda x: (x[0] // 3, x[1] // 3))] 85 | min_depth = np.array([sorted(p, key=lambda x: -x[2])[0] for p in grouped_pts]) 86 | min_idx = min_depth[:,-1] 87 | min_depth = min_depth[:,:-1] 88 | return world_points[min_idx.astype(int)] 89 | 90 | def render_points_slow(world_points, cam_extr, cam_intr): 91 | cam_points = transform_points_world_to_cam(world_points, cam_extr) 92 | cam_pts_x = cam_points[:,0] 93 | cam_pts_y = cam_points[:,1] 94 | cam_pts_z = cam_points[:,2] 95 | cam_pts_x = -cam_pts_x / cam_pts_z * cam_intr[0,0] + cam_intr[1,2] 96 | cam_pts_y = cam_pts_y / cam_pts_z * cam_intr[1,1] + cam_intr[0,2] 97 | points = np.stack([cam_pts_y, cam_pts_x, cam_pts_z, np.arange(len(cam_pts_x))]).T 98 | points[:,:2] = np.rint(points[:,:2] / 2) 99 | points = points[points[:,1].argsort()] 100 | points = points[points[:,0].argsort(kind='mergesort')] 101 | grouped_pts = np.split(points[:,2:], np.unique(points[:, :2], axis=0, return_index=True)[1][1:]) 102 | min_depth = np.array([p[p[:,0].argsort()][-1] for p in grouped_pts]) 103 | min_idx = min_depth[:,-1].astype(int) 104 | return world_points[min_idx] 105 | 106 | def render_points(world_points, cam_extr, cam_intr, return_index=False): 107 | cam_points = transform_points_world_to_cam(world_points, cam_extr) 108 | cam_pts_x = cam_points[:,0] 109 | cam_pts_y = cam_points[:,1] 110 | cam_pts_z = cam_points[:,2] 111 | cam_pts_x = -cam_pts_x / cam_pts_z * cam_intr[0,0] + cam_intr[1,2] 112 | cam_pts_y = cam_pts_y / cam_pts_z * cam_intr[1,1] + cam_intr[0,2] 113 | idx = np.rint(cam_pts_y / 2) * 1000 + np.rint(cam_pts_x / 2) 114 | val = np.stack([cam_pts_z, np.arange(len(cam_pts_x))]).T 115 | order = idx.argsort() 116 | idx = idx[order] 117 | val = val[order] 118 | grouped_pts = np.split(val, np.unique(idx, return_index=True)[1][1:]) 119 | min_depth = np.array([p[p[:,0].argsort()][-1] for p in grouped_pts]) 120 | min_idx = min_depth[:,-1].astype(int) 121 | if return_index: 122 | return min_idx 123 | return world_points[min_idx] 124 | 125 | def project_depth_world_space(depth_image, camera_intr, camera_pose, keep_dim=False, project_factor=1.): 126 | cam_pts = project_depth_cam_space(depth_image, camera_intr, keep_dim=False,project_factor=project_factor) 127 | world_pts = transform_points_cam_to_world(cam_pts, camera_pose) 128 | W, H = depth_image.shape 129 | if keep_dim: 130 | world_pts = world_pts.reshape([W, H, 3]) 131 | return world_pts 132 | 133 | def project_depth_cam_space(depth_img, camera_intrinsics, keep_dim=True, project_factor=1.): 134 | # Get depth image size 135 | im_h = depth_img.shape[0] 136 | im_w = depth_img.shape[1] 137 | # Project depth into 3D point cloud in camera coordinates 138 | pix_x, pix_y = np.meshgrid(np.linspace(0, im_w - 1, im_w), np.linspace(0, im_h - 1, im_h)) 139 | cam_pts_x = np.multiply(pix_x - im_w / 2., -depth_img / camera_intrinsics[0, 0]) 140 | cam_pts_y = np.multiply(pix_y - im_h / 2., depth_img / camera_intrinsics[1, 1]) 141 | cam_pts_z = depth_img.copy() 142 | cam_pts_x.shape = (im_h * im_w, 1) 143 | cam_pts_y.shape = (im_h * im_w, 1) 144 | cam_pts_z.shape = (im_h * im_w, 1) 145 | cam_pts = np.concatenate((cam_pts_x, cam_pts_y, cam_pts_z), axis=1) * project_factor 146 | if keep_dim: 147 | cam_pts = cam_pts.reshape([im_h, im_w, 3]) 148 | return cam_pts 149 | 150 | def get_trunc_ab(mean, std, a, b): 151 | return (a - mean) / std, (b - mean) /std 152 | 153 | def get_trunc_ab_range(mean_min, mean_max, std, a, b): 154 | return (a - mean_min) / std, (b - mean_max) /std 155 | 156 | def transform_points(pointcloud, from_range, to_range): 157 | if len(pointcloud.shape) == 1: 158 | pointcloud = pointcloud.reshape([1,-1]) 159 | if pointcloud.shape[1] == 6: 160 | xyz = pointcloud[:,:3] 161 | rgb = pointcloud[:,3:] 162 | else: 163 | xyz = pointcloud 164 | rgb = None 165 | from_center = np.mean(from_range, axis=0) 166 | from_size = np.ptp(from_range, axis=0) 167 | to_center = np.mean(to_range, axis=0) 168 | to_size = np.ptp(to_range, axis=0) 169 | xyz = (xyz - from_center) / from_size * to_size + to_center 170 | if rgb is None: 171 | return xyz 172 | else: 173 | return np.concatenate([xyz, rgb], axis=-1) 174 | 175 | def extent_to_cube(extent): 176 | min_x,min_y,min_z = extent[0] 177 | max_x,max_y,max_z = extent[1] 178 | verts = np.array([ 179 | (max_x,max_y,max_z), 180 | (max_x,max_y,min_z), 181 | (max_x,min_y,max_z), 182 | (max_x,min_y,min_z), 183 | (min_x,max_y,max_z), 184 | (min_x,max_y,min_z), 185 | (min_x,min_y,max_z), 186 | (min_x,min_y,min_z),]) 187 | faces = np.array([ 188 | (1,5,7,3), 189 | (4,3,7,8), 190 | (8,7,5,6), 191 | (6,2,4,8), 192 | (2,1,3,4), 193 | (6,5,1,2),]) 194 | return verts, faces 195 | 196 | ######################################################################## 197 | # Visualization 198 | ######################################################################## 199 | import matplotlib.pyplot as plt 200 | from mpl_toolkits.mplot3d import Axes3D 201 | import math 202 | def set_axes_equal(ax): 203 | '''Make axes of 3D plot have equal scale so that spheres appear as spheres, 204 | cubes as cubes, etc.. This is one possible solution to Matplotlib's 205 | ax.set_aspect('equal') and ax.axis('equal') not working for 3D. 206 | 207 | Input 208 | ax: a matplotlib axis, e.g., as output from plt.gca(). 209 | ''' 210 | 211 | x_limits = ax.get_xlim3d() 212 | y_limits = ax.get_ylim3d() 213 | z_limits = ax.get_zlim3d() 214 | 215 | x_range = abs(x_limits[1] - x_limits[0]) 216 | x_middle = np.mean(x_limits) 217 | y_range = abs(y_limits[1] - y_limits[0]) 218 | y_middle = np.mean(y_limits) 219 | z_range = abs(z_limits[1] - z_limits[0]) 220 | z_middle = np.mean(z_limits) 221 | 222 | # The plot bounding box is a sphere in the sense of the infinity 223 | # norm, hence I call half the max range the plot radius. 224 | plot_radius = 0.5*max([x_range, y_range, z_range]) 225 | 226 | ax.set_xlim3d([x_middle - plot_radius, x_middle + plot_radius]) 227 | ax.set_ylim3d([y_middle - plot_radius, y_middle + plot_radius]) 228 | ax.set_zlim3d([z_middle - plot_radius, z_middle + plot_radius]) 229 | 230 | def set_background_blank(ax): 231 | # Hide grid lines 232 | ax.grid(False) 233 | ax.set_axis_off() 234 | # Hide axes ticks 235 | ax.set_xticks([]) 236 | ax.set_yticks([]) 237 | ax.set_zticks([]) 238 | # First remove fill 239 | ax.xaxis.pane.fill = False 240 | ax.yaxis.pane.fill = False 241 | ax.zaxis.pane.fill = False 242 | # Now set color to white (or whatever is "invisible") 243 | ax.xaxis.pane.set_edgecolor((1.0, 1.0, 1.0, 0.0)) 244 | ax.yaxis.pane.set_edgecolor((1.0, 1.0, 1.0, 0.0)) 245 | ax.zaxis.pane.set_edgecolor((1.0, 1.0, 1.0, 0.0)) 246 | 247 | def side_by_side_point_clouds(point_clouds, angle=(90,0)): 248 | fig = plt.figure() 249 | W = int(len(point_clouds) ** 0.5) 250 | H = math.ceil(len(point_clouds) / W) 251 | for i, pcloud in enumerate(point_clouds): 252 | action = None 253 | flow = None 254 | pts = pcloud['pts'] 255 | title = pcloud['title'] 256 | col = pcloud.get('col', None) 257 | flow = pcloud.get('flow', None) 258 | action = pcloud.get('action', None) 259 | ax = fig.add_subplot(W, H, i+1,projection='3d') 260 | ax.set_title(title) 261 | if flow is not None: 262 | flow_norm = np.linalg.norm(flow, axis=1) 263 | viz_idx = flow_norm > 0.0 264 | flow = flow[viz_idx] 265 | ax.quiver( 266 | pts[:,0][viz_idx], 267 | pts[:,1][viz_idx], 268 | pts[:,2][viz_idx], 269 | flow[:,0], flow[:,1], flow[:,2], 270 | color = 'red', linewidth=3, alpha=0.2 271 | ) 272 | if col is None: 273 | col = 'blue' 274 | ax.scatter(pts[:,0], 275 | pts[:,1], 276 | pts[:,2], color=col,s=0.5) 277 | ax.view_init(*angle) 278 | if action is not None: 279 | ax.scatter(action[0], action[1], 0., 280 | edgecolors='tomato', color='turquoise', marker='*',s=80) 281 | set_axes_equal(ax) 282 | set_background_blank(ax) 283 | fig.tight_layout() 284 | return fig 285 | 286 | def write_pointcoud_as_obj(path, xyzrgb, faces=None): 287 | with open(path, 'w') as fp: 288 | if xyzrgb.shape[1] == 6: 289 | for x,y,z,r,g,b in xyzrgb: 290 | fp.write(f"v {x:.3f} {y:.3f} {z:.3f} {r:.3f} {g:.3f} {b:.3f}\n") 291 | else: 292 | for x,y,z in xyzrgb: 293 | fp.write(f"v {x:.3f} {y:.3f} {z:.3f}\n") 294 | if faces is not None: 295 | for f in faces: 296 | f_str = " ".join([str(i) for i in f]) 297 | fp.write(f"f {f_str}\n") 298 | 299 | ################################# 300 | # Distance Metric 301 | ################################# 302 | def subsample_points(points, resolution=0.0125, return_index=True): 303 | if points.shape[1] == 6: 304 | xyz = points[:,:3] 305 | else: 306 | xyz = points 307 | if points.shape[0] == 0: 308 | if return_index: 309 | return np.arange(0) 310 | return points 311 | idx = np.unique(xyz// resolution * resolution, axis=0, return_index=True)[1] 312 | if return_index: 313 | return idx 314 | return points[idx] 315 | 316 | from sklearn.neighbors import NearestNeighbors 317 | def chamfer_distance(x, y, metric='l2', direction='bi'): 318 | x_nn = NearestNeighbors(n_neighbors=1, leaf_size=1, algorithm='kd_tree', metric=metric).fit(x) 319 | min_y_to_x = x_nn.kneighbors(y)[0] 320 | y_nn = NearestNeighbors(n_neighbors=1, leaf_size=1, algorithm='kd_tree', metric=metric).fit(y) 321 | min_x_to_y = y_nn.kneighbors(x)[0] 322 | return np.mean(min_y_to_x) + np.mean(min_x_to_y) 323 | 324 | def f1_score(x, y, metric='l2', th=0.01): 325 | # x is pred 326 | # y is gt 327 | if x.shape[0] == 0: 328 | return 0,0,0 329 | x_nn = NearestNeighbors(n_neighbors=1, leaf_size=1, algorithm='kd_tree', metric=metric).fit(x) 330 | d2 = x_nn.kneighbors(y)[0] 331 | y_nn = NearestNeighbors(n_neighbors=1, leaf_size=1, algorithm='kd_tree', metric=metric).fit(y) 332 | d1 = y_nn.kneighbors(x)[0] 333 | recall = float(sum(d < th for d in d2)) / float(len(d2)) 334 | precision = float(sum(d < th for d in d1)) / float(len(d1)) 335 | 336 | if recall+precision > 0: 337 | fscore = 2 * recall * precision / (recall + precision) 338 | else: 339 | fscore = 0 340 | 341 | return fscore, precision, recall -------------------------------------------------------------------------------- /ACID/src/utils/libmise/mise.pyx: -------------------------------------------------------------------------------- 1 | # distutils: language = c++ 2 | cimport cython 3 | from cython.operator cimport dereference as dref 4 | from libcpp.vector cimport vector 5 | from libcpp.map cimport map 6 | from libc.math cimport isnan, NAN 7 | import numpy as np 8 | 9 | 10 | cdef struct Vector3D: 11 | int x, y, z 12 | 13 | 14 | cdef struct Voxel: 15 | Vector3D loc 16 | unsigned int level 17 | bint is_leaf 18 | unsigned long children[2][2][2] 19 | 20 | 21 | cdef struct GridPoint: 22 | Vector3D loc 23 | double value 24 | bint known 25 | 26 | 27 | cdef inline unsigned long vec_to_idx(Vector3D coord, long resolution): 28 | cdef unsigned long idx 29 | idx = resolution * resolution * coord.x + resolution * coord.y + coord.z 30 | return idx 31 | 32 | 33 | cdef class MISE: 34 | cdef vector[Voxel] voxels 35 | cdef vector[GridPoint] grid_points 36 | cdef map[long, long] grid_point_hash 37 | cdef readonly int resolution_0 38 | cdef readonly int depth 39 | cdef readonly double threshold 40 | cdef readonly int voxel_size_0 41 | cdef readonly int resolution 42 | 43 | def __cinit__(self, int resolution_0, int depth, double threshold): 44 | self.resolution_0 = resolution_0 45 | self.depth = depth 46 | self.threshold = threshold 47 | self.voxel_size_0 = (1 << depth) 48 | self.resolution = resolution_0 * self.voxel_size_0 49 | 50 | # Create initial voxels 51 | self.voxels.reserve(resolution_0 * resolution_0 * resolution_0) 52 | 53 | cdef Voxel voxel 54 | cdef GridPoint point 55 | cdef Vector3D loc 56 | cdef int i, j, k 57 | for i in range(resolution_0): 58 | for j in range(resolution_0): 59 | for k in range (resolution_0): 60 | loc = Vector3D( 61 | i * self.voxel_size_0, 62 | j * self.voxel_size_0, 63 | k * self.voxel_size_0, 64 | ) 65 | voxel = Voxel( 66 | loc=loc, 67 | level=0, 68 | is_leaf=True, 69 | ) 70 | 71 | assert(self.voxels.size() == vec_to_idx(Vector3D(i, j, k), resolution_0)) 72 | self.voxels.push_back(voxel) 73 | 74 | # Create initial grid points 75 | self.grid_points.reserve((resolution_0 + 1) * (resolution_0 + 1) * (resolution_0 + 1)) 76 | for i in range(resolution_0 + 1): 77 | for j in range(resolution_0 + 1): 78 | for k in range(resolution_0 + 1): 79 | loc = Vector3D( 80 | i * self.voxel_size_0, 81 | j * self.voxel_size_0, 82 | k * self.voxel_size_0, 83 | ) 84 | assert(self.grid_points.size() == vec_to_idx(Vector3D(i, j, k), resolution_0 + 1)) 85 | self.add_grid_point(loc) 86 | 87 | def update(self, long[:, :] points, double[:] values): 88 | """Update points and set their values. Also determine all active voxels and subdivide them.""" 89 | assert(points.shape[0] == values.shape[0]) 90 | assert(points.shape[1] == 3) 91 | cdef Vector3D loc 92 | cdef long idx 93 | cdef int i 94 | 95 | # Find all indices of point and set value 96 | for i in range(points.shape[0]): 97 | loc = Vector3D(points[i, 0], points[i, 1], points[i, 2]) 98 | idx = self.get_grid_point_idx(loc) 99 | if idx == -1: 100 | raise ValueError('Point not in grid!') 101 | self.grid_points[idx].value = values[i] 102 | self.grid_points[idx].known = True 103 | # Subdivide activate voxels and add new points 104 | self.subdivide_voxels() 105 | 106 | def query(self): 107 | """Query points to evaluate.""" 108 | # Find all points with unknown value 109 | cdef vector[Vector3D] points 110 | cdef int n_unknown = 0 111 | for p in self.grid_points: 112 | if not p.known: 113 | n_unknown += 1 114 | 115 | points.reserve(n_unknown) 116 | for p in self.grid_points: 117 | if not p.known: 118 | points.push_back(p.loc) 119 | 120 | # Convert to numpy 121 | points_np = np.zeros((points.size(), 3), dtype=np.int64) 122 | cdef long[:, :] points_view = points_np 123 | for i in range(points.size()): 124 | points_view[i, 0] = points[i].x 125 | points_view[i, 1] = points[i].y 126 | points_view[i, 2] = points[i].z 127 | 128 | return points_np 129 | 130 | def to_dense(self): 131 | """Output dense matrix at highest resolution.""" 132 | out_array = np.full((self.resolution + 1,) * 3, np.nan) 133 | cdef double[:, :, :] out_view = out_array 134 | cdef GridPoint point 135 | cdef int i, j, k 136 | 137 | for point in self.grid_points: 138 | # Take voxel for which points is upper left corner 139 | # assert(point.known) 140 | out_view[point.loc.x, point.loc.y, point.loc.z] = point.value 141 | 142 | # Complete along x axis 143 | for i in range(1, self.resolution + 1): 144 | for j in range(self.resolution + 1): 145 | for k in range(self.resolution + 1): 146 | if isnan(out_view[i, j, k]): 147 | out_view[i, j, k] = out_view[i-1, j, k] 148 | 149 | # Complete along y axis 150 | for i in range(self.resolution + 1): 151 | for j in range(1, self.resolution + 1): 152 | for k in range(self.resolution + 1): 153 | if isnan(out_view[i, j, k]): 154 | out_view[i, j, k] = out_view[i, j-1, k] 155 | 156 | 157 | # Complete along z axis 158 | for i in range(self.resolution + 1): 159 | for j in range(self.resolution + 1): 160 | for k in range(1, self.resolution + 1): 161 | if isnan(out_view[i, j, k]): 162 | out_view[i, j, k] = out_view[i, j, k-1] 163 | assert(not isnan(out_view[i, j, k])) 164 | return out_array 165 | 166 | def get_points(self): 167 | points_np = np.zeros((self.grid_points.size(), 3), dtype=np.int64) 168 | values_np = np.zeros((self.grid_points.size()), dtype=np.float64) 169 | 170 | cdef long[:, :] points_view = points_np 171 | cdef double[:] values_view = values_np 172 | cdef Vector3D loc 173 | cdef int i 174 | 175 | for i in range(self.grid_points.size()): 176 | loc = self.grid_points[i].loc 177 | points_view[i, 0] = loc.x 178 | points_view[i, 1] = loc.y 179 | points_view[i, 2] = loc.z 180 | values_view[i] = self.grid_points[i].value 181 | 182 | return points_np, values_np 183 | 184 | cdef void subdivide_voxels(self) except +: 185 | cdef vector[bint] next_to_positive 186 | cdef vector[bint] next_to_negative 187 | cdef int i, j, k 188 | cdef long idx 189 | cdef Vector3D loc, adj_loc 190 | 191 | # Initialize vectors 192 | next_to_positive.resize(self.voxels.size(), False) 193 | next_to_negative.resize(self.voxels.size(), False) 194 | 195 | # Iterate over grid points and mark voxels active 196 | # TODO: can move this to update operation and add attibute to voxel 197 | for grid_point in self.grid_points: 198 | loc = grid_point.loc 199 | if not grid_point.known: 200 | continue 201 | 202 | # Iterate over the 8 adjacent voxels 203 | for i in range(-1, 1): 204 | for j in range(-1, 1): 205 | for k in range(-1, 1): 206 | adj_loc = Vector3D( 207 | x=loc.x + i, 208 | y=loc.y + j, 209 | z=loc.z + k, 210 | ) 211 | idx = self.get_voxel_idx(adj_loc) 212 | if idx == -1: 213 | continue 214 | 215 | if grid_point.value >= self.threshold: 216 | next_to_positive[idx] = True 217 | if grid_point.value <= self.threshold: 218 | next_to_negative[idx] = True 219 | 220 | cdef int n_subdivide = 0 221 | 222 | for idx in range(self.voxels.size()): 223 | if not self.voxels[idx].is_leaf or self.voxels[idx].level == self.depth: 224 | continue 225 | if next_to_positive[idx] and next_to_negative[idx]: 226 | n_subdivide += 1 227 | 228 | self.voxels.reserve(self.voxels.size() + 8 * n_subdivide) 229 | self.grid_points.reserve(self.voxels.size() + 19 * n_subdivide) 230 | 231 | for idx in range(self.voxels.size()): 232 | if not self.voxels[idx].is_leaf or self.voxels[idx].level == self.depth: 233 | continue 234 | if next_to_positive[idx] and next_to_negative[idx]: 235 | self.subdivide_voxel(idx) 236 | 237 | cdef void subdivide_voxel(self, long idx): 238 | cdef Voxel voxel 239 | cdef GridPoint point 240 | cdef Vector3D loc0 = self.voxels[idx].loc 241 | cdef Vector3D loc 242 | cdef int new_level = self.voxels[idx].level + 1 243 | cdef int new_size = 1 << (self.depth - new_level) 244 | assert(new_level <= self.depth) 245 | assert(1 <= new_size <= self.voxel_size_0) 246 | 247 | # Current voxel is not leaf anymore 248 | self.voxels[idx].is_leaf = False 249 | # Add new voxels 250 | cdef int i, j, k 251 | for i in range(2): 252 | for j in range(2): 253 | for k in range(2): 254 | loc = Vector3D( 255 | x=loc0.x + i * new_size, 256 | y=loc0.y + j * new_size, 257 | z=loc0.z + k * new_size, 258 | ) 259 | voxel = Voxel( 260 | loc=loc, 261 | level=new_level, 262 | is_leaf=True 263 | ) 264 | 265 | self.voxels[idx].children[i][j][k] = self.voxels.size() 266 | self.voxels.push_back(voxel) 267 | 268 | # Add new grid points 269 | for i in range(3): 270 | for j in range(3): 271 | for k in range(3): 272 | loc = Vector3D( 273 | loc0.x + i * new_size, 274 | loc0.y + j * new_size, 275 | loc0.z + k * new_size, 276 | ) 277 | 278 | # Only add new grid points 279 | if self.get_grid_point_idx(loc) == -1: 280 | self.add_grid_point(loc) 281 | 282 | 283 | @cython.cdivision(True) 284 | cdef long get_voxel_idx(self, Vector3D loc) except +: 285 | """Utility function for getting voxel index corresponding to 3D coordinates.""" 286 | # Shorthands 287 | cdef long resolution = self.resolution 288 | cdef long resolution_0 = self.resolution_0 289 | cdef long depth = self.depth 290 | cdef long voxel_size_0 = self.voxel_size_0 291 | 292 | # Return -1 if point lies outside bounds 293 | if not (0 <= loc.x < resolution and 0<= loc.y < resolution and 0 <= loc.z < resolution): 294 | return -1 295 | 296 | # Coordinates in coarse voxel grid 297 | cdef Vector3D loc0 = Vector3D( 298 | x=loc.x >> depth, 299 | y=loc.y >> depth, 300 | z=loc.z >> depth, 301 | ) 302 | 303 | # Initial voxels 304 | cdef int idx = vec_to_idx(loc0, resolution_0) 305 | cdef Voxel voxel = self.voxels[idx] 306 | assert(voxel.loc.x == loc0.x * voxel_size_0) 307 | assert(voxel.loc.y == loc0.y * voxel_size_0) 308 | assert(voxel.loc.z == loc0.z * voxel_size_0) 309 | 310 | # Relative coordinates 311 | cdef Vector3D loc_rel = Vector3D( 312 | x=loc.x - (loc0.x << depth), 313 | y=loc.y - (loc0.y << depth), 314 | z=loc.z - (loc0.z << depth), 315 | ) 316 | 317 | cdef Vector3D loc_offset 318 | cdef long voxel_size = voxel_size_0 319 | 320 | while not voxel.is_leaf: 321 | voxel_size = voxel_size >> 1 322 | assert(voxel_size >= 1) 323 | 324 | # Determine child 325 | loc_offset = Vector3D( 326 | x=1 if (loc_rel.x >= voxel_size) else 0, 327 | y=1 if (loc_rel.y >= voxel_size) else 0, 328 | z=1 if (loc_rel.z >= voxel_size) else 0, 329 | ) 330 | # New voxel 331 | idx = voxel.children[loc_offset.x][loc_offset.y][loc_offset.z] 332 | voxel = self.voxels[idx] 333 | 334 | # New relative coordinates 335 | loc_rel = Vector3D( 336 | x=loc_rel.x - loc_offset.x * voxel_size, 337 | y=loc_rel.y - loc_offset.y * voxel_size, 338 | z=loc_rel.z - loc_offset.z * voxel_size, 339 | ) 340 | 341 | assert(0<= loc_rel.x < voxel_size) 342 | assert(0<= loc_rel.y < voxel_size) 343 | assert(0<= loc_rel.z < voxel_size) 344 | 345 | 346 | # Return idx 347 | return idx 348 | 349 | 350 | cdef inline void add_grid_point(self, Vector3D loc): 351 | cdef GridPoint point = GridPoint( 352 | loc=loc, 353 | value=0., 354 | known=False, 355 | ) 356 | self.grid_point_hash[vec_to_idx(loc, self.resolution + 1)] = self.grid_points.size() 357 | self.grid_points.push_back(point) 358 | 359 | cdef inline int get_grid_point_idx(self, Vector3D loc): 360 | p_idx = self.grid_point_hash.find(vec_to_idx(loc, self.resolution + 1)) 361 | if p_idx == self.grid_point_hash.end(): 362 | return -1 363 | 364 | cdef int idx = dref(p_idx).second 365 | assert(self.grid_points[idx].loc.x == loc.x) 366 | assert(self.grid_points[idx].loc.y == loc.y) 367 | assert(self.grid_points[idx].loc.z == loc.z) 368 | 369 | return idx -------------------------------------------------------------------------------- /ACID/src/conv_onet/generation.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.optim as optim 3 | from torch import autograd 4 | import numpy as np 5 | from tqdm import trange, tqdm 6 | import trimesh 7 | from src.utils import libmcubes, common_util 8 | from src.common import make_3d_grid, normalize_coord, add_key, coord2index 9 | from src.utils.libmise import MISE 10 | import time 11 | import math 12 | 13 | counter = 0 14 | 15 | 16 | class Generator3D(object): 17 | ''' Generator class for Occupancy Networks. 18 | 19 | It provides functions to generate the final mesh as well refining options. 20 | 21 | Args: 22 | model (nn.Module): trained Occupancy Network model 23 | points_batch_size (int): batch size for points evaluation 24 | threshold (float): threshold value 25 | refinement_step (int): number of refinement steps 26 | device (device): pytorch device 27 | resolution0 (int): start resolution for MISE 28 | upsampling steps (int): number of upsampling steps 29 | with_normals (bool): whether normals should be estimated 30 | padding (float): how much padding should be used for MISE 31 | sample (bool): whether z should be sampled 32 | input_type (str): type of input 33 | vol_info (dict): volume infomation 34 | vol_bound (dict): volume boundary 35 | simplify_nfaces (int): number of faces the mesh should be simplified to 36 | ''' 37 | 38 | def __init__(self, model, points_batch_size=100000, 39 | threshold=0.5, refinement_step=0, device=None, 40 | resolution0=16, upsampling_steps=3, 41 | with_normals=False, padding=0.1, sample=False, 42 | input_type = None, 43 | vol_info = None, 44 | vol_bound = None, 45 | simplify_nfaces=None): 46 | self.model = model.to(device) 47 | self.points_batch_size = points_batch_size 48 | self.refinement_step = refinement_step 49 | self.threshold = threshold 50 | self.device = device 51 | self.resolution0 = resolution0 52 | self.upsampling_steps = upsampling_steps 53 | self.with_normals = with_normals 54 | self.input_type = input_type 55 | self.padding = padding 56 | self.sample = sample 57 | self.simplify_nfaces = simplify_nfaces 58 | 59 | # for pointcloud_crop 60 | self.vol_bound = vol_bound 61 | if vol_info is not None: 62 | self.input_vol, _, _ = vol_info 63 | 64 | def generate_mesh(self, data, return_stats=True): 65 | ''' Generates the output mesh. 66 | 67 | Args: 68 | data (tensor): data tensor 69 | return_stats (bool): whether stats should be returned 70 | ''' 71 | self.model.eval() 72 | device = self.device 73 | for k,v in data.items(): 74 | data[k] = v.to(device) 75 | stats_dict = {} 76 | 77 | t0 = time.time() 78 | 79 | # obtain features for all crops 80 | with torch.no_grad(): 81 | c = self.model.encode_inputs(data) 82 | if type(c) is tuple: 83 | for cs in c: 84 | for k,v in cs.items(): 85 | cs[k] = v[0].unsqueeze(0) 86 | else: 87 | for k,v in c.items(): 88 | c[k] = v[0].unsqueeze(0) 89 | stats_dict['time (encode inputs)'] = time.time() - t0 90 | 91 | mesh = self.generate_from_latent(c, stats_dict=stats_dict) 92 | 93 | if return_stats: 94 | return mesh, stats_dict 95 | else: 96 | return mesh 97 | 98 | def generate_from_latent(self, c=None, stats_dict={}, **kwargs): 99 | ''' Generates mesh from latent. 100 | Works for shapes normalized to a unit cube 101 | 102 | Args: 103 | c (tensor): latent conditioned code c 104 | stats_dict (dict): stats dictionary 105 | ''' 106 | threshold = np.log(self.threshold) - np.log(1. - self.threshold) 107 | 108 | t0 = time.time() 109 | # Compute bounding box size 110 | box_size = 1 + self.padding 111 | 112 | # Shortcut 113 | if self.upsampling_steps == 0: 114 | nx = self.resolution0 115 | pointsf = box_size * make_3d_grid( 116 | (-0.5,)*3, (0.5,)*3, (nx,)*3 117 | ) 118 | 119 | values = self.eval_points(pointsf, c, **kwargs).cpu().numpy() 120 | value_grid = values.reshape(nx, nx, nx) 121 | else: 122 | mesh_extractor = MISE( 123 | self.resolution0, self.upsampling_steps, threshold) 124 | 125 | points = mesh_extractor.query() 126 | while points.shape[0] != 0: 127 | # Query points 128 | pointsf = points / mesh_extractor.resolution 129 | # Normalize to bounding box 130 | pointsf = box_size * (pointsf - 0.5) 131 | pointsf = torch.FloatTensor(pointsf).to(self.device) 132 | # Evaluate model and update 133 | values = self.eval_points(pointsf, c, **kwargs).cpu().numpy() 134 | values = values.astype(np.float64) 135 | mesh_extractor.update(points, values) 136 | points = mesh_extractor.query() 137 | 138 | value_grid = mesh_extractor.to_dense() 139 | 140 | 141 | # Extract mesh 142 | stats_dict['time (eval points)'] = time.time() - t0 143 | 144 | mesh = self.extract_mesh(value_grid, c, stats_dict=stats_dict) 145 | return mesh 146 | 147 | def eval_points(self, p, c=None, vol_bound=None, **kwargs): 148 | ''' Evaluates the occupancy values for the points. 149 | 150 | Args: 151 | p (tensor): points 152 | c (tensor): encoded feature volumes 153 | ''' 154 | p_split = torch.split(p, self.points_batch_size) 155 | occ_hats = [] 156 | for pi in p_split: 157 | pi = pi.unsqueeze(0).to(self.device) 158 | with torch.no_grad(): 159 | occ_hat = self.model.eval_points(pi, c, **kwargs)['occ'].logits 160 | occ_hats.append(occ_hat.squeeze(0).detach().cpu()) 161 | 162 | occ_hat = torch.cat(occ_hats, dim=0) 163 | return occ_hat 164 | 165 | def extract_mesh(self, occ_hat, c=None, stats_dict=dict()): 166 | ''' Extracts the mesh from the predicted occupancy grid. 167 | 168 | Args: 169 | occ_hat (tensor): value grid of occupancies 170 | c (tensor): encoded feature volumes 171 | stats_dict (dict): stats dictionary 172 | ''' 173 | # Some short hands 174 | n_x, n_y, n_z = occ_hat.shape 175 | box_size = 1 + self.padding 176 | threshold = np.log(self.threshold) - np.log(1. - self.threshold) 177 | # Make sure that mesh is watertight 178 | t0 = time.time() 179 | occ_hat_padded = np.pad( 180 | occ_hat, 1, 'constant', constant_values=-1e6) 181 | vertices, triangles = libmcubes.marching_cubes( 182 | occ_hat_padded, threshold) 183 | stats_dict['time (marching cubes)'] = time.time() - t0 184 | # Strange behaviour in libmcubes: vertices are shifted by 0.5 185 | vertices -= 0.5 186 | # # Undo padding 187 | vertices -= 1 188 | 189 | if self.vol_bound is not None: 190 | # Scale the mesh back to its original metric 191 | bb_min = self.vol_bound['query_vol'][:, 0].min(axis=0) 192 | bb_max = self.vol_bound['query_vol'][:, 1].max(axis=0) 193 | mc_unit = max(bb_max - bb_min) / (self.vol_bound['axis_n_crop'].max() * self.resolution0*2**self.upsampling_steps) 194 | vertices = vertices * mc_unit + bb_min 195 | else: 196 | # Normalize to bounding box 197 | vertices /= np.array([n_x-1, n_y-1, n_z-1]) 198 | vertices = box_size * (vertices - 0.5) 199 | 200 | # Create mesh 201 | mesh = trimesh.Trimesh(vertices / (1., 1., 3), triangles, 202 | vertex_normals=None, 203 | process=False) 204 | 205 | # Directly return if mesh is empty 206 | if vertices.shape[0] == 0: 207 | return mesh 208 | 209 | # TODO: normals are lost here 210 | if self.simplify_nfaces is not None: 211 | t0 = time.time() 212 | from src.utils.libsimplify import simplify_mesh 213 | mesh = simplify_mesh(mesh, self.simplify_nfaces, 5.) 214 | stats_dict['time (simplify)'] = time.time() - t0 215 | 216 | # Refine mesh 217 | if self.refinement_step > 0: 218 | t0 = time.time() 219 | self.refine_mesh(mesh, occ_hat, c) 220 | stats_dict['time (refine)'] = time.time() - t0 221 | 222 | return mesh 223 | 224 | def generate_pointcloud(self, data, threshold=0.75, use_gt_occ=False): 225 | self.model.eval() 226 | device = self.device 227 | self.model.eval() 228 | device = self.device 229 | for k,v in data.items(): 230 | data[k] = v.to(device) 231 | stats_dict = {} 232 | 233 | t0 = time.time() 234 | 235 | # obtain features for all crops 236 | with torch.no_grad(): 237 | c = self.model.encode_inputs(data) 238 | pts = data['sampled_pts'] 239 | B,_,N,C = pts.shape 240 | pts = pts.reshape([B*2,N,C]) 241 | p_split = torch.split(pts, self.points_batch_size, dim=-1) 242 | occ_hats = [] 243 | features = [] 244 | flows = [] 245 | for pi in p_split: 246 | with torch.no_grad(): 247 | outputs = self.model.eval_points(pi, c) 248 | occ_hats.append((outputs['occ'].probs > threshold).detach().cpu()) 249 | if 'corr' in outputs: 250 | features.append(outputs['corr'].detach().cpu()) 251 | if 'flow' in outputs: 252 | flows.append(outputs['flow'].detach().cpu()) 253 | pts = pts.cpu().numpy() 254 | occ_hat = torch.cat(occ_hats, dim=1).numpy() 255 | if use_gt_occ: 256 | occ_hat = data['sampled_occ'].reshape([B*2, N]).cpu().numpy() 257 | pos_pts0 = pts[0][occ_hat[0] == 1.].reshape((-1,3)) 258 | pos_idx0 = common_util.subsample_points(pos_pts0, resolution=0.013) 259 | pos_pts0 = pos_pts0[pos_idx0] 260 | pos_pts1 = pts[1][occ_hat[1] == 1.].reshape((-1,3)) 261 | pos_idx1 = common_util.subsample_points(pos_pts1, resolution=0.013) 262 | pos_pts1 = pos_pts1[pos_idx1] 263 | pos_pts = np.concatenate([pos_pts0, pos_pts1], axis=0) / (1.,1.,3.) 264 | if len(features) != 0: 265 | feature = torch.cat(features, dim=1).numpy() 266 | f_dim = feature.shape[-1] 267 | pos_f0 = feature[0][occ_hat[0] == 1.].reshape((-1,f_dim)) 268 | pos_f1 = feature[1][occ_hat[1] == 1.].reshape((-1,f_dim)) 269 | pos_f0 = pos_f0[pos_idx0] 270 | pos_f1 = pos_f1[pos_idx1] 271 | pos_f = np.concatenate([pos_f0, pos_f1], axis=0) 272 | if pos_f.shape[0] < 100: 273 | pcloud_both = pos_pts 274 | else: 275 | tsne_result = common_util.embed_tsne(pos_f) 276 | colors = common_util.get_color_map(tsne_result) 277 | pcloud_both = np.concatenate([pos_pts, colors], axis=1) 278 | else: 279 | pcloud_both = pos_pts 280 | pcloud0 = pcloud_both[:pos_pts0.shape[0]] 281 | pcloud1 = pcloud_both[pos_pts0.shape[0]:] 282 | if len(flows) != 0: 283 | flow = torch.cat(flows, dim=1).numpy() / 10. 284 | pos_f0 = flow[0][occ_hat[0] == 1.].reshape((-1,3)) 285 | pos_f1 = flow[1][occ_hat[1] == 1.].reshape((-1,3)) 286 | pos_f0 = pos_f0[pos_idx0] 287 | pos_f1 = pos_f1[pos_idx1] 288 | pcloud_unroll_0 = pcloud0.copy() 289 | pcloud_unroll_0[:,:3] += pos_f0 / (1.,1.,3.) 290 | pcloud_unroll_1 = pcloud1.copy() 291 | pcloud_unroll_1[:,:3] += pos_f1 / (1.,1.,3.) 292 | return pcloud0, pcloud1,pcloud_unroll_0,pcloud_unroll_1 293 | return pcloud0, pcloud1 294 | 295 | 296 | def refine_mesh(self, mesh, occ_hat, c=None): 297 | ''' Refines the predicted mesh. 298 | 299 | Args: 300 | mesh (trimesh object): predicted mesh 301 | occ_hat (tensor): predicted occupancy grid 302 | c (tensor): latent conditioned code c 303 | ''' 304 | 305 | self.model.eval() 306 | 307 | # Some shorthands 308 | n_x, n_y, n_z = occ_hat.shape 309 | assert(n_x == n_y == n_z) 310 | # threshold = np.log(self.threshold) - np.log(1. - self.threshold) 311 | threshold = self.threshold 312 | 313 | # Vertex parameter 314 | v0 = torch.FloatTensor(mesh.vertices).to(self.device) 315 | v = torch.nn.Parameter(v0.clone()) 316 | 317 | # Faces of mesh 318 | faces = torch.LongTensor(mesh.faces).to(self.device) 319 | 320 | # Start optimization 321 | optimizer = optim.RMSprop([v], lr=1e-4) 322 | 323 | for it_r in trange(self.refinement_step): 324 | optimizer.zero_grad() 325 | 326 | # Loss 327 | face_vertex = v[faces] 328 | eps = np.random.dirichlet((0.5, 0.5, 0.5), size=faces.shape[0]) 329 | eps = torch.FloatTensor(eps).to(self.device) 330 | face_point = (face_vertex * eps[:, :, None]).sum(dim=1) 331 | 332 | face_v1 = face_vertex[:, 1, :] - face_vertex[:, 0, :] 333 | face_v2 = face_vertex[:, 2, :] - face_vertex[:, 1, :] 334 | face_normal = torch.cross(face_v1, face_v2) 335 | face_normal = face_normal / \ 336 | (face_normal.norm(dim=1, keepdim=True) + 1e-10) 337 | face_value = torch.sigmoid( 338 | self.model.eval_points(face_point.unsqueeze(0), c)['occ'].logits 339 | ) 340 | normal_target = -autograd.grad( 341 | [face_value.sum()], [face_point], create_graph=True)[0] 342 | 343 | normal_target = \ 344 | normal_target / \ 345 | (normal_target.norm(dim=1, keepdim=True) + 1e-10) 346 | loss_target = (face_value - threshold).pow(2).mean() 347 | loss_normal = \ 348 | (face_normal - normal_target).pow(2).sum(dim=1).mean() 349 | 350 | loss = loss_target + 0.01 * loss_normal 351 | 352 | # Update 353 | loss.backward() 354 | optimizer.step() 355 | 356 | mesh.vertices = v.data.cpu().numpy() 357 | 358 | return mesh 359 | 360 | def generate_occ_grid(self, c=None, stats_dict={}, **kwargs): 361 | ''' Generates mesh from latent. 362 | Works for shapes normalized to a unit cube 363 | 364 | Args: 365 | c (tensor): latent conditioned code c 366 | stats_dict (dict): stats dictionary 367 | ''' 368 | threshold = np.log(self.threshold) - np.log(1. - self.threshold) 369 | 370 | t0 = time.time() 371 | # Compute bounding box size 372 | box_size = 1 + self.padding 373 | 374 | # Shortcut 375 | if self.upsampling_steps == 0: 376 | nx = self.resolution0 377 | pointsf = box_size * make_3d_grid( 378 | (-0.5,)*3, (0.5,)*3, (nx,)*3 379 | ) 380 | 381 | values = self.eval_points(pointsf, c, **kwargs).cpu().numpy() 382 | value_grid = values.reshape(nx, nx, nx) 383 | else: 384 | mesh_extractor = MISE( 385 | self.resolution0, self.upsampling_steps, threshold) 386 | 387 | points = mesh_extractor.query() 388 | while points.shape[0] != 0: 389 | # Query points 390 | pointsf = points / mesh_extractor.resolution 391 | # Normalize to bounding box 392 | pointsf = box_size * (pointsf - 0.5) 393 | pointsf = torch.FloatTensor(pointsf).to(self.device) 394 | # Evaluate model and update 395 | values = self.eval_points(pointsf, c, **kwargs).cpu().numpy() 396 | values = values.astype(np.float64) 397 | mesh_extractor.update(points, values) 398 | points = mesh_extractor.query() 399 | 400 | value_grid = mesh_extractor.to_dense() 401 | 402 | return value_grid 403 | --------------------------------------------------------------------------------