├── README.md ├── data ├── test.json ├── train.json └── validation.json ├── models ├── __init__.py ├── encoder.py ├── fc_classifier.py └── idecoder.py ├── nerf ├── __init__.py ├── intant_ngp.py ├── loader.py ├── loader_gt.py └── utils.py ├── nerf2vec ├── __init__.py ├── config.py ├── export_embeddings.py ├── train_nerf2vec.py └── utils.py ├── settings.py ├── task_classification ├── __init__.py ├── config.py └── train_classifier.py ├── task_generation ├── __init__.py ├── export_embeddings.py ├── latent_3d_points │ ├── .gitignore │ ├── .gitmodules │ ├── LICENSE.md │ ├── README.md │ ├── __init__.py │ ├── doc │ │ └── images │ │ │ └── teaser.jpg │ ├── download_data.sh │ ├── external │ │ ├── __init__.py │ │ ├── python_plyfile │ │ │ ├── .gitignore │ │ │ ├── __init__.py │ │ │ └── plyfile.py │ │ └── structural_losses │ │ │ ├── __init__.py │ │ │ ├── approxmatch.cpp │ │ │ ├── approxmatch.cu │ │ │ ├── makefile │ │ │ ├── tf_approxmatch.cpp │ │ │ ├── tf_approxmatch.py │ │ │ ├── tf_approxmatch_compile.sh │ │ │ ├── tf_approxmatch_g.cu │ │ │ ├── tf_nndistance.cpp │ │ │ ├── tf_nndistance.py │ │ │ ├── tf_nndistance_compile.sh │ │ │ └── tf_nndistance_g.cu │ ├── notebooks │ │ ├── __init__.py │ │ ├── compute_evaluation_metrics.ipynb │ │ ├── train_latent_gan.ipynb │ │ ├── train_raw_gan.ipynb │ │ └── train_single_class_ae.ipynb │ └── src │ │ ├── __init__.py │ │ ├── ae_templates.py │ │ ├── autoencoder.py │ │ ├── encoders_decoders.py │ │ ├── evaluation_metrics.py │ │ ├── gan.py │ │ ├── general_utils.py │ │ ├── generators_discriminators.py │ │ ├── in_out.py │ │ ├── latent_gan.py │ │ ├── neural_net.py │ │ ├── point_net_ae.py │ │ ├── raw_gan.py │ │ ├── tf_utils.py │ │ ├── vanilla_gan.py │ │ └── w_gan_gp.py ├── train_latent_gan.py └── viz_nerf.py ├── task_interp_and_retrieval ├── interp.py └── retrieval.py └── task_mapping_network ├── README.md ├── cfg ├── completion.yaml ├── export_embeddings.yaml ├── inr2vec.yaml ├── inrs_dataset.yaml └── pcd_dataset.yaml ├── export_inrs_embeddings.py ├── export_nerfs_embeddings.py ├── inits └── in3_out1_h512_l4.pt ├── inr2vec ├── create_inrs_dataset.py ├── create_point_clouds_dataset.py ├── models │ ├── __init__.py │ ├── encoder.py │ ├── idecoder.py │ └── transfer.py ├── train_inr2vec.py └── utils.py ├── train_completion.py └── viz.py /README.md: -------------------------------------------------------------------------------- 1 | # nf2vec 2 | 3 | This repository contains the code related to **nf2vec** framework, which is detailed in the paper [Deep Learning on Object-centric 3D Neural Fields](https://arxiv.org/abs/2312.13277). In particular, here you can find the code regarding processing NeRFs. If you want to use the previous version of this framework for processing shapes, refer to [inr2vec](https://github.com/CVLAB-Unibo/inr2vec). 4 | 5 | 6 | ## MACHINE CONFIGURATION 7 | 8 | Before running the code, ensure that your machine is properly configured. 9 | This project was developed with the following main dependencies: 10 | * python==3.8.18 11 | * torch==1.12.0+cu113 12 | * torchvision==0.13.0+cu113 13 | * nerfacc==0.3.5 (with the proper CUDA version set) 14 | * wandb==0.16.0 15 | 16 | ### nf2vec 17 | 18 | What follows are commands that you can execute to replicate the environment in which *nf2vec* was originally trained: 19 | 20 | 1. Install Python 3.8.18: 21 | ```bash 22 | conda install python=3.8.18 23 | ``` 24 | 25 | 2. Install pip: 26 | ```bash 27 | conda install -c anaconda pip 28 | ``` 29 | 30 | 3. Install PyTorch and torchvision: 31 | ```bash 32 | pip install torch==1.12.0+cu113 torchvision==0.13.0+cu113 --extra-index-url https://download.pytorch.org/whl/cu113 33 | ``` 34 | 35 | 4. Install CUDA Toolkit: 36 | ```bash 37 | conda install -c "nvidia/label/cuda-11.7.1" cuda-toolkit 38 | ``` 39 | 40 | 5. Install Ninja and Tiny CUDA NN: 41 | ```bash 42 | pip install ninja git+https://github.com/NVlabs/tiny-cuda-nn/#subdirectory=bindings/torch 43 | ``` 44 | 45 | 6. Install NerfAcc: 46 | ```bash 47 | pip install nerfacc==0.3.5 -f https://nerfacc-bucket.s3.us-west-2.amazonaws.com/whl/torch-1.12.0_cu113.html 48 | ``` 49 | 50 | 7. Install Einops: 51 | ```bash 52 | conda install -c conda-forge einops 53 | ``` 54 | 55 | 8. Install ImageIO: 56 | ```bash 57 | conda install -c conda-forge imageio 58 | ``` 59 | 60 | 9. Install WanDB: 61 | ```bash 62 | pip install wandb==0.16.0 63 | ``` 64 | 10. Install h5py: 65 | ```bash 66 | conda install -c anaconda h5py 67 | ``` 68 | 11. Install TorchMetrics: 69 | ```bash 70 | pip install torchmetrics 71 | ``` 72 | 73 | ### Generation 74 | The generation task is based on a *Latent GAN* model detailed at [THIS](https://github.com/optas/latent_3d_points) link. Please, follow the instructions provided at that link to properly configure your environment. 75 | 76 | ### Mapping Network 77 | The mapping network task requires the training of the *inr2vec* framework. Please, refer to [THIS](https://github.com/CVLAB-Unibo/inr2vec?tab=readme-ov-file#setup) page to properly configure your environment. 78 | 79 | ## TRAINING AND EXPERIMENTS 80 | This section contains the details required to run the code. 81 | 82 | **IMPORTANT NOTES**: 83 | 1. each module cited below *must* be executed from the root of the project, and not within the corresponding packages. This will ensure that all the paths used can properly work. 84 | 85 | 2. the file *settings.py* contains all the paths (e.g., dataset location, model weights, etc...) and generic configurations that are used from each module explained below. 86 | 87 | 3. Some training and experiments, such as the training of the *nf2vec* framework and the classification task, use the *wandb* library. If you want to use it, then you need to change the following two variables: ``` os.environ["WANDB_SILENT"]``` and ```os.environ["WANDB_MODE"]```, which are located at the beginning of the *settings.py* module. 88 | 89 | ## Train *nf2vec* 90 | 91 | To train *nf2vec* you need to have a dataset of trained NeRFs. The implemented code expects that there exist the following files: 92 | * data/train.json 93 | * data/validation.json 94 | * data/test.json 95 | 96 | These JSONs hold a list of file paths, with each path corresponding to a NeRF model that has been trained, and then used in a specific data split. In particular, each path corresponds to a folder, and each folder contains the following relevant files: 97 | * the trained NeRF's weights 98 | * the NeRF's occupancy grid 99 | * JSON files with transform matrices and other paramters necessary to train NeRFs. 100 | 101 | The name of the files contained in these folders should not be changed. Within the repository, you can find the JSON files used to originally train the framework. 102 | 103 | Execute the following command to train *nf2vec*: 104 | ```bash 105 | python nerf2vec/train_nerf2vec.py 106 | ``` 107 | If you have enabled *wandb*, then you should update its settings located in the *config_wandb* method, which is localed in the *train_nerf2vec.py* module. 108 | 109 | ## Export *nerf2vec* embeddings 110 | Execute the following command to export the *nerf2vec*'s embeddings: 111 | ```bash 112 | python nerf2vec/export_embeddings.py 113 | ``` 114 | Note that these embeddings are **necessary** for other tasks, such as classification, retrieval and generation. 115 | 116 | ## Retrieval task 117 | Execute the following command to perform the retrieval task: 118 | ```bash 119 | python task_interp_and_retrieval/retrieval.py 120 | ``` 121 | The results will be shown in the *task_interp_and_retrieval/retrieval_plots_X* folder, where X depends on the chosen split (i.e., train, validation or test). The split can be set in the *main* method of the *retrieval.py* module. 122 | 123 | Each file created during a specific retrieval iteration will be named using the same prefix represented by a randomly generated UUID. 124 | 125 | 126 | ## Interpolation task 127 | Execute the following command to perform the interpolation task: 128 | ```bash 129 | python task_interp_and_retrieval/interp.py 130 | ``` 131 | The results will be shown in the *task_interp_and_retrieval/interp_plots_X* folder, where X depends on the chosen split (i.e., train, validation or test). The split can be set in the *main* method of the *retrieval.py* module. 132 | 133 | ## Classification task 134 | Execute the following command to perform the classification task: 135 | ```bash 136 | python task_classification/train_classifier.py 137 | ``` 138 | If you have enabled *wandb*, then you should update its settings located in the *config_wandb* method, which is localed in the *train_classifier.py* module. 139 | 140 | ## Generation task 141 | In order to generate and visualize the new embeddings, it is necessary to execute some operations following a specific order. 142 | 143 | ### 1) Export embeddings 144 | The following command creates the folder *task_generation/latent_embeddings*, which will contain the *nerf2vec*'s embedding properly organized for this task. 145 | ```bash 146 | python task_generation/export_embeddings.py 147 | ``` 148 | 149 | ### 2) Train GANs 150 | The following command creates the folder *task_generation/experiments*, which will contain both the weights of the trained models and the generated embeddings: 151 | ```bash 152 | python task_generation/train_latent_gan.py 153 | ``` 154 | All the hyperparameters used to train the *Latent GANs* can be found inside the *train_latent_gan.py* module. 155 | 156 | ### 3) Create renderings 157 | The following command creates renderings from the embeddings generated during the previous step: 158 | ```bash 159 | python task_generation/viz_nerf.py 160 | ``` 161 | The renderings will be created in the *GAN_plots_X* folder, where X is the ID of a specific class. 162 | 163 | ## Mapping network map 164 | Please refer to [THIS](task_mapping_network/README.md) README for this task. 165 | 166 | # Datasets and model weights 167 | Please contact us if you need access to the datasets, exported embeddings, and weights of the trained models used in all experiments. 168 | -------------------------------------------------------------------------------- /models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CVLAB-Unibo/nf2vec/7e10220040b89404914d3a8317d39a8ac7337a8d/models/__init__.py -------------------------------------------------------------------------------- /models/encoder.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | 3 | import torch 4 | from torch import nn 5 | 6 | 7 | class Encoder(nn.Module): 8 | def __init__(self, input_dim: int, hidden_dims: List[int], embed_dim: int) -> None: 9 | super().__init__() 10 | 11 | layers = [] 12 | for idx in range(len(hidden_dims)): 13 | in_ch = input_dim if idx == 0 else hidden_dims[idx - 1] 14 | out_ch = hidden_dims[idx] 15 | layers.append(nn.Conv1d(in_ch, out_ch, 1)) 16 | layers.append(nn.BatchNorm1d(out_ch)) 17 | layers.append(nn.ReLU()) 18 | 19 | layers.append(nn.Conv1d(hidden_dims[-1], embed_dim, 1)) 20 | 21 | self.layers = nn.Sequential(*layers) 22 | self.embed_dim = embed_dim 23 | 24 | def forward(self, x: torch.Tensor) -> torch.Tensor: 25 | x_channels_first = torch.transpose(x, 2, 1) 26 | x = self.layers(x_channels_first) 27 | x, _ = torch.max(x, 2) 28 | 29 | return x 30 | -------------------------------------------------------------------------------- /models/fc_classifier.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | 3 | from torch import Tensor, nn 4 | 5 | 6 | class FcClassifier(nn.Module): 7 | def __init__(self, layers_dim: List[int], num_classes: int) -> None: 8 | super().__init__() 9 | 10 | layers = [] 11 | if len(layers_dim) > 1: 12 | for i in range(len(layers_dim) - 1): 13 | layers.append(nn.Linear(layers_dim[i], layers_dim[i + 1])) 14 | layers.append(nn.BatchNorm1d(layers_dim[i + 1])) 15 | layers.append(nn.ReLU()) 16 | layers.append(nn.Dropout()) 17 | layers.append(nn.Linear(layers_dim[-1], num_classes)) 18 | 19 | self.net = nn.Sequential(*layers) 20 | 21 | def forward(self, x: Tensor) -> Tensor: 22 | return self.net(x) 23 | -------------------------------------------------------------------------------- /models/idecoder.py: -------------------------------------------------------------------------------- 1 | from typing import Callable, Tuple, List, Union 2 | 3 | import torch 4 | from einops import repeat 5 | from torch import Tensor, nn 6 | import tinycudann as tcnn 7 | 8 | from nerf.intant_ngp import _TruncExp 9 | 10 | class CoordsEncoder: 11 | def __init__( 12 | self, 13 | encoding_conf: dict, 14 | input_dims: int = 3 15 | ) -> None: 16 | self.input_dims = input_dims 17 | 18 | self.coords_enc = tcnn.Encoding(input_dims, encoding_conf, seed=999) 19 | self.out_dim = self.coords_enc.n_output_dims 20 | 21 | def apply_encoding(self, x): 22 | return self.coords_enc(x) 23 | 24 | def embed(self, inputs: Tensor) -> Tensor: 25 | # return torch.cat([fn(inputs) for fn in self.embed_fns], -1) 26 | result_encoding = self.apply_encoding(inputs.view(-1, 3)) 27 | result_encoding = result_encoding.view(inputs.size()[0],inputs.size()[1],-1) 28 | return result_encoding 29 | 30 | class ImplicitDecoder(nn.Module): 31 | def __init__( 32 | self, 33 | embed_dim: int, 34 | in_dim: int, 35 | hidden_dim: int, 36 | num_hidden_layers_before_skip: int, 37 | num_hidden_layers_after_skip: int, 38 | out_dim: int, 39 | encoding_conf: dict, # Added for NerfAcc 40 | aabb: Union[torch.Tensor, List[float]] # Added for NerfAcc 41 | ) -> None: 42 | super().__init__() 43 | 44 | self.coords_enc = CoordsEncoder(encoding_conf=encoding_conf, input_dims=in_dim) 45 | coords_dim = self.coords_enc.out_dim 46 | 47 | # ################################################################################ 48 | # Added for NerfAcc 49 | # ################################################################################ 50 | trunc_exp = _TruncExp.apply 51 | self.density_activation = lambda x: trunc_exp(x - 1) 52 | self.aabb = aabb 53 | self.in_dim = in_dim 54 | # ################################################################################ 55 | 56 | self.in_layer = nn.Sequential(nn.Linear(embed_dim + coords_dim, hidden_dim), nn.ReLU()) 57 | 58 | self.skip_proj = nn.Sequential(nn.Linear(embed_dim + coords_dim, hidden_dim), nn.ReLU()) 59 | 60 | before_skip = [] 61 | for _ in range(num_hidden_layers_before_skip): 62 | before_skip.append(nn.Sequential(nn.Linear(hidden_dim, hidden_dim), nn.ReLU())) 63 | self.before_skip = nn.Sequential(*before_skip) 64 | 65 | after_skip = [] 66 | for _ in range(num_hidden_layers_after_skip): 67 | after_skip.append(nn.Sequential(nn.Linear(hidden_dim, hidden_dim), nn.ReLU())) 68 | after_skip.append(nn.Linear(hidden_dim, out_dim)) 69 | self.after_skip = nn.Sequential(*after_skip) 70 | 71 | def forward(self, embeddings: Tensor, coords: Tensor) -> Tensor: 72 | 73 | 74 | # Sometimes the ray march algorithm calls the model with an input with 0 length. 75 | # The CutlassMLP crashes in these cases, therefore this fix has been applied. 76 | batch_size, n_coords, _ = coords.size() 77 | if n_coords == 0: 78 | rgb = torch.zeros([batch_size, 0, 3], device=coords.device) 79 | density = torch.zeros([batch_size, 0, 1], device=coords.device) 80 | return rgb, density 81 | 82 | # ################################################################################ 83 | # Added for NerfAcc 84 | # ################################################################################ 85 | aabb_min, aabb_max = torch.split(self.aabb, self.in_dim, dim=-1) 86 | coords = (coords - aabb_min) / (aabb_max - aabb_min) 87 | selector = ((coords > 0.0) & (coords < 1.0)).all(dim=-1) 88 | # ################################################################################ 89 | 90 | coords = self.coords_enc.embed(coords) 91 | 92 | repeated_embeddings = repeat(embeddings, "b d -> b n d", n=coords.shape[1]) 93 | 94 | emb_and_coords = torch.cat([repeated_embeddings, coords], dim=-1) 95 | 96 | x = self.in_layer(emb_and_coords) 97 | x = self.before_skip(x) 98 | 99 | inp_proj = self.skip_proj(emb_and_coords) 100 | x = x + inp_proj 101 | 102 | x = self.after_skip(x) 103 | # return x.squeeze(-1) # ORIGINAL INR2VEC IMPLEMENTATION 104 | 105 | # ################################################################################ 106 | # Added for NerfAcc 107 | # ################################################################################ 108 | rgb, density_before_activation = x[..., :3], x[..., 3] 109 | density_before_activation = density_before_activation[:, :, None] 110 | 111 | # Be sure that the density is non-negative 112 | density = ( 113 | self.density_activation(density_before_activation) 114 | * selector[..., None] 115 | ) 116 | 117 | rgb = torch.nn.Sigmoid()(rgb) 118 | 119 | return rgb, density 120 | # ################################################################################ 121 | 122 | -------------------------------------------------------------------------------- /nerf/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CVLAB-Unibo/nf2vec/7e10220040b89404914d3a8317d39a8ac7337a8d/nerf/__init__.py -------------------------------------------------------------------------------- /nerf/intant_ngp.py: -------------------------------------------------------------------------------- 1 | """ 2 | Copyright (c) 2022 Ruilong Li, UC Berkeley. 3 | """ 4 | 5 | from typing import Callable, List, Union 6 | 7 | import torch 8 | from torch.autograd import Function 9 | from torch.cuda.amp import custom_bwd, custom_fwd 10 | 11 | try: 12 | import tinycudann as tcnn 13 | except ImportError as e: 14 | print( 15 | f"Error: {e}! " 16 | "Please install tinycudann by: " 17 | "pip install git+https://github.com/NVlabs/tiny-cuda-nn/#subdirectory=bindings/torch" 18 | ) 19 | exit() 20 | 21 | 22 | class _TruncExp(Function): # pylint: disable=abstract-method 23 | # Implementation from torch-ngp: 24 | # https://github.com/ashawkey/torch-ngp/blob/93b08a0d4ec1cc6e69d85df7f0acdfb99603b628/activation.py 25 | @staticmethod 26 | @custom_fwd(cast_inputs=torch.float32) 27 | def forward(ctx, x): # pylint: disable=arguments-differ 28 | ctx.save_for_backward(x) 29 | return torch.exp(x) 30 | 31 | @staticmethod 32 | @custom_bwd 33 | def backward(ctx, g): # pylint: disable=arguments-differ 34 | x = ctx.saved_tensors[0] 35 | return g * torch.exp(torch.clamp(x, max=15)) 36 | 37 | 38 | trunc_exp = _TruncExp.apply 39 | 40 | 41 | def contract_to_unisphere( 42 | x: torch.Tensor, 43 | aabb: torch.Tensor, 44 | eps: float = 1e-6, 45 | derivative: bool = False, 46 | ): 47 | aabb_min, aabb_max = torch.split(aabb, 3, dim=-1) 48 | x = (x - aabb_min) / (aabb_max - aabb_min) 49 | x = x * 2 - 1 # aabb is at [-1, 1] 50 | mag = x.norm(dim=-1, keepdim=True) 51 | mask = mag.squeeze(-1) > 1 52 | 53 | if derivative: 54 | dev = (2 * mag - 1) / mag**2 + 2 * x**2 * ( 55 | 1 / mag**3 - (2 * mag - 1) / mag**4 56 | ) 57 | dev[~mask] = 1.0 58 | dev = torch.clamp(dev, min=eps) 59 | return dev 60 | else: 61 | x[mask] = (2 - 1 / mag[mask]) * (x[mask] / mag[mask]) 62 | x = x / 4 + 0.5 # [-inf, inf] is at [0, 1] 63 | return x 64 | 65 | 66 | class NGPradianceField(torch.nn.Module): 67 | """Instance-NGP radiance Field""" 68 | 69 | def __init__( 70 | self, 71 | aabb: Union[torch.Tensor, List[float]], 72 | num_dim: int = 3, 73 | use_viewdirs: bool = False, 74 | density_activation: Callable = lambda x: trunc_exp(x - 1), 75 | unbounded: bool = False, 76 | geo_feat_dim: int = 3, 77 | encoding='Frequency', 78 | mlp='CutlassMLP', 79 | activation='Sine', 80 | n_hidden_layers=4, 81 | n_neurons=256, 82 | encoding_size=24 83 | ) -> None: 84 | super().__init__() 85 | if not isinstance(aabb, torch.Tensor): 86 | aabb = torch.tensor(aabb, dtype=torch.float32,) 87 | # NERF2VEC: Added persisten=False 88 | self.register_buffer("aabb", aabb, persistent=False) 89 | self.num_dim = num_dim 90 | self.use_viewdirs = use_viewdirs 91 | self.density_activation = density_activation 92 | self.unbounded = unbounded 93 | 94 | self.geo_feat_dim = geo_feat_dim if use_viewdirs else 0 95 | 96 | if self.use_viewdirs: 97 | single_mlp_encoding_config = { 98 | "otype": "Composite", 99 | "nested": [ 100 | # POSITION ENCODING 101 | { 102 | "n_dims_to_encode": 3, 103 | "otype": "Frequency", 104 | "n_frequencies": 6, 105 | 106 | }, 107 | # DIRECTION ENCODING 108 | { 109 | "n_dims_to_encode": 3, 110 | "otype": "SphericalHarmonics", 111 | "degree": 1, # Determines the output's dimension, which is degree^2 112 | }, 113 | # {"otype": "Identity", "n_bins": 4, "degree": 4}, 114 | ] 115 | } 116 | else: 117 | if encoding == 'Frequency': 118 | single_mlp_encoding_config = { 119 | "otype": "Frequency", 120 | "n_frequencies": encoding_size 121 | } 122 | else: 123 | single_mlp_encoding_config = { 124 | "otype": "Identity" 125 | } 126 | 127 | # print(f'*'*40) 128 | # print(f'Initializing model: \n- mlp: {mlp} - {n_hidden_layers} hidden layers - {n_neurons} neurons\n- activation: {activation.upper()}\n- encoding: {encoding.upper()} - size: {encoding_size}') 129 | # print(f'*'*40) 130 | self.mlp_base = tcnn.NetworkWithInputEncoding( 131 | seed=999, 132 | n_input_dims=self.num_dim+self.geo_feat_dim, 133 | n_output_dims=4, 134 | encoding_config=single_mlp_encoding_config, 135 | network_config={ 136 | "otype": mlp, # FullyFusedMLP, CutlassMLP 137 | "activation": activation, 138 | "output_activation": "None", 139 | "n_neurons": n_neurons, 140 | "n_hidden_layers": n_hidden_layers 141 | }, 142 | ) 143 | 144 | def query_density(self, x, return_feat: bool = False): 145 | if self.unbounded: 146 | x = contract_to_unisphere(x, self.aabb) 147 | else: 148 | aabb_min, aabb_max = torch.split(self.aabb, self.num_dim, dim=-1) 149 | x = (x - aabb_min) / (aabb_max - aabb_min) 150 | selector = ((x > 0.0) & (x < 1.0)).all(dim=-1) 151 | x = ( 152 | # This view actually seems to do nothing 153 | self.mlp_base(x.view(-1, self.num_dim)) 154 | # change the shape of the tensor to [all dimension of x but last, 1 + the feature dimension] 155 | .view(list(x.shape[:-1]) + [1 + self.geo_feat_dim]) 156 | .to(x) # Same dtype as x (the input) 157 | ) 158 | 159 | density_before_activation, base_mlp_out = torch.split( 160 | x, [1, self.geo_feat_dim], dim=-1 161 | ) 162 | density = ( 163 | self.density_activation(density_before_activation) 164 | * selector[..., None] 165 | ) 166 | if return_feat: 167 | return density, base_mlp_out 168 | else: 169 | return density 170 | 171 | def _query_rgb(self, dir, embedding): 172 | # tcnn requires directions in the range [0, 1] 173 | if self.use_viewdirs: 174 | dir = (dir + 1.0) / 2.0 175 | d = self.direction_encoding(dir.view(-1, dir.shape[-1])) 176 | 177 | # Concatenation of the DENSITIY MLP and the encoded view direction 178 | h = torch.cat([d, embedding.view(-1, self.geo_feat_dim)], dim=-1) 179 | else: 180 | h = embedding.view(-1, self.geo_feat_dim) 181 | rgb = ( 182 | self.mlp_head(h) 183 | .view(list(embedding.shape[:-1]) + [3]) 184 | .to(embedding) 185 | ) 186 | return rgb 187 | 188 | def _query_density_and_rgb(self, x, dir=None): 189 | 190 | if self.unbounded: 191 | x = contract_to_unisphere(x, self.aabb) 192 | else: 193 | aabb_min, aabb_max = torch.split(self.aabb, self.num_dim, dim=-1) 194 | x = (x - aabb_min) / (aabb_max - aabb_min) 195 | selector = ((x > 0.0) & (x < 1.0)).all(dim=-1) 196 | 197 | if self.use_viewdirs: 198 | if dir is not None: 199 | dir = (dir + 1.0) / 2.0 200 | # d = self.direction_encoding(dir.view(-1, dir.shape[-1])) 201 | 202 | x = torch.cat([x, dir], dim=-1) 203 | else: 204 | # if self.random_tensor == None: 205 | # random = torch.ones(x.shape[0], self.geo_feat_dim, device=x.device).to(x) 206 | # all ones or zeros are detrimental for the loss. It is much better a random tensor. 207 | # random = self.random_tensor.repeat(x.shape[0], 1).to(device=x.device) 208 | random = torch.rand( 209 | x.shape[0], self.geo_feat_dim, device=x.device) 210 | # random = torch.zeros(x.shape[0], self.geo_feat_dim, device=x.device).to(x) 211 | x = torch.cat([x, random], dim=-1) 212 | 213 | # Sometimes the ray march algorithm calls the model with an input with 0 length. 214 | # The CutlassMLP crashes in these cases, therefore this fix has been applied. 215 | if len(x) == 0: 216 | rgb = torch.zeros([0, 3], device=x.device) 217 | density = torch.zeros([0, 1], device=x.device) 218 | return rgb, density 219 | 220 | out = ( 221 | # self.mlp_base(x.view(-1, self.num_dim)) # This view actually seems to do nothing 222 | # This view actually seems to do nothing 223 | self.mlp_base(x.view(-1, self.num_dim+self.geo_feat_dim)) 224 | # change the shape of the tensor to [all dimension of x but last, 1 + the feature dimension] 225 | # .view(list(x.shape[:-1]) + [1 + self.geo_feat_dim]) 226 | .to(x) # Same dtype as x (the input) 227 | ) 228 | 229 | rgb, density_before_activation = out[..., :3], out[..., 3] 230 | density_before_activation = density_before_activation[:, None] 231 | 232 | # Be sure that the density is non-negative 233 | density = ( 234 | self.density_activation(density_before_activation) 235 | * selector[..., None] 236 | ) 237 | 238 | rgb = torch.nn.Sigmoid()(rgb) 239 | 240 | return rgb, density 241 | 242 | def forward( 243 | self, 244 | positions: torch.Tensor, 245 | directions: torch.Tensor = None, 246 | ): 247 | """ 248 | if self.use_viewdirs and (directions is not None): 249 | assert ( 250 | positions.shape == directions.shape 251 | ), f"{positions.shape} v.s. {directions.shape}" 252 | 253 | # density, embedding = self.query_density(positions, return_feat=True) 254 | 255 | # rgb = self._query_rgb(directions, embedding=embedding) 256 | """ 257 | 258 | rgb, density = self._query_density_and_rgb(positions, directions) 259 | 260 | # print(f'rgb.shape: {rgb.shape}') 261 | # print(f'density.shape: {density.shape}') 262 | 263 | return rgb, density 264 | -------------------------------------------------------------------------------- /nerf/loader.py: -------------------------------------------------------------------------------- 1 | """ 2 | The NeRFLoder class inherits from Dataset, but it's not used as a Dataset in the training loop. This because the current 3 | implementation was inherited from the original NerfAcc implementation. In the future, it could be useful to remove this dependency. 4 | """ 5 | import json 6 | import os 7 | import torch 8 | import concurrent 9 | import numpy as np 10 | import imageio.v2 as imageio 11 | import torch.nn.functional as F 12 | 13 | from nerf.utils import Rays 14 | 15 | 16 | def read_image(file_path): 17 | return imageio.imread(file_path) 18 | 19 | 20 | def _load_renderings(data_dir: str, split: str): 21 | 22 | with open( 23 | os.path.join(data_dir, "transforms_{}.json".format(split)), "r" 24 | ) as fp: 25 | meta = json.load(fp) 26 | images = [] 27 | camtoworlds = [] 28 | 29 | file_paths = [] 30 | for i in range(len(meta["frames"])): 31 | frame = meta["frames"][i] 32 | fname = os.path.join(data_dir, frame["file_path"] + ".png") 33 | 34 | file_paths.append(fname) 35 | 36 | camtoworlds.append(frame["transform_matrix"]) 37 | 38 | with concurrent.futures.ThreadPoolExecutor() as executor: 39 | results = executor.map(read_image, file_paths) 40 | images = list(results) 41 | 42 | 43 | images = np.stack(images, axis=0) 44 | camtoworlds = np.stack(camtoworlds, axis=0) 45 | 46 | h, w = images.shape[1:3] 47 | camera_angle_x = float(meta["camera_angle_x"]) 48 | focal = 0.5 * w / np.tan(0.5 * camera_angle_x) 49 | 50 | return images, camtoworlds, focal 51 | 52 | 53 | class NeRFLoader(torch.utils.data.Dataset): 54 | 55 | WIDTH, HEIGHT = 224, 224 56 | NEAR, FAR = 2.0, 6.0 57 | OPENGL_CAMERA = True 58 | 59 | def __init__( 60 | self, 61 | data_dir: str, 62 | split: str = "train", 63 | color_bkgd_aug: str = "random", 64 | num_rays: int = None, 65 | near: float = None, 66 | far: float = None, 67 | device: str = "cuda:0", 68 | weights_file_name: str = "nerf_weights.pth", 69 | training: bool = True, 70 | images_RAM = None 71 | ): 72 | super().__init__() 73 | assert color_bkgd_aug in ["white", "black", "random"] 74 | self.num_rays = num_rays 75 | self.near = self.NEAR if near is None else near 76 | self.far = self.FAR if far is None else far 77 | 78 | self.training = training 79 | 80 | self.images_RAM = images_RAM 81 | 82 | self.color_bkgd_aug = color_bkgd_aug 83 | 84 | self.weights_file_path = os.path.join(data_dir, weights_file_name) 85 | 86 | self.images, self.camtoworlds, self.focal = _load_renderings(#_from_RAM( 87 | data_dir, split#, self.images_RAM 88 | ) 89 | self.images = torch.from_numpy(self.images).to(device).to(torch.uint8) 90 | self.camtoworlds = ( 91 | torch.from_numpy(self.camtoworlds).to(device).to(torch.float32) 92 | ) 93 | self.K = torch.tensor( 94 | [ 95 | [self.focal, 0, self.WIDTH / 2.0], 96 | [0, self.focal, self.HEIGHT / 2.0], 97 | [0, 0, 1], 98 | ], 99 | dtype=torch.float32, 100 | device=device, 101 | ) # (3, 3) 102 | 103 | assert self.images.shape[1:3] == (self.HEIGHT, self.WIDTH) 104 | 105 | def __len__(self): 106 | return len(self.images) 107 | 108 | @torch.no_grad() 109 | def __getitem__(self, index): 110 | data = self.fetch_data(index) 111 | data = self.preprocess(data) 112 | return data 113 | 114 | def preprocess(self, data): 115 | """Process the fetched / cached data with randomness.""" 116 | rgba, rays = data["rgba"], data["rays"] 117 | pixels, alpha = torch.split(rgba, [3, 1], dim=-1) 118 | 119 | if self.training: 120 | if self.color_bkgd_aug == "random": 121 | color_bkgd = torch.rand(3, device=self.images.device) 122 | elif self.color_bkgd_aug == "white": 123 | color_bkgd = torch.ones(3, device=self.images.device) 124 | elif self.color_bkgd_aug == "black": 125 | color_bkgd = torch.zeros(3, device=self.images.device) 126 | else: 127 | color_bkgd = torch.zeros(3, device=self.images.device) 128 | 129 | pixels = pixels * alpha + color_bkgd * (1.0 - alpha) 130 | return { 131 | "pixels": pixels, # [n_rays, 3] or [h, w, 3] 132 | "rays": rays, # [n_rays,] or [h, w] 133 | "color_bkgd": color_bkgd, # [3,] 134 | **{k: v for k, v in data.items() if k not in ["rgba", "rays"]}, 135 | } 136 | 137 | def update_num_rays(self, num_rays): 138 | self.num_rays = num_rays 139 | 140 | def fetch_data(self, index): 141 | """Fetch the data (it maybe cached for multiple batches).""" 142 | 143 | num_rays = self.num_rays 144 | 145 | if self.training: 146 | image_id = torch.randint( 147 | 0, 148 | len(self.images), 149 | size=(num_rays,), 150 | device=self.images.device, 151 | ) 152 | 153 | x = torch.randint( 154 | 0, self.WIDTH, size=(num_rays,), device=self.images.device 155 | ) 156 | y = torch.randint( 157 | 0, self.HEIGHT, size=(num_rays,), device=self.images.device 158 | ) 159 | else: 160 | image_id = [index] 161 | x, y = torch.meshgrid( 162 | torch.arange(self.WIDTH, device=self.images.device), 163 | torch.arange(self.HEIGHT, device=self.images.device), 164 | indexing="xy", 165 | ) 166 | x = x.flatten() 167 | y = y.flatten() 168 | 169 | # generate rays 170 | rgba = self.images[image_id, y, x] / 255.0 # (num_rays, 4) 171 | c2w = self.camtoworlds[image_id] # (num_rays, 3, 4) 172 | 173 | camera_dirs = F.pad( 174 | torch.stack( 175 | [ 176 | (x - self.K[0, 2] + 0.5) / self.K[0, 0], 177 | (y - self.K[1, 2] + 0.5) 178 | / self.K[1, 1] 179 | * (-1.0 if self.OPENGL_CAMERA else 1.0), 180 | ], 181 | dim=-1, 182 | ), 183 | (0, 1), 184 | value=(-1.0 if self.OPENGL_CAMERA else 1.0), 185 | ) # [num_rays, 3] 186 | 187 | directions = (camera_dirs[:, None, :] * c2w[:, :3, :3]).sum(dim=-1) 188 | origins = torch.broadcast_to(c2w[:, :3, -1], directions.shape) 189 | viewdirs = directions / torch.linalg.norm( 190 | directions, dim=-1, keepdims=True 191 | ) 192 | 193 | if self.training: 194 | origins = torch.reshape(origins, (num_rays, 3)) 195 | viewdirs = torch.reshape(viewdirs, (num_rays, 3)) 196 | rgba = torch.reshape(rgba, (num_rays, 4)) 197 | else: 198 | origins = torch.reshape(origins, (self.HEIGHT, self.WIDTH, 3)) 199 | viewdirs = torch.reshape(viewdirs, (self.HEIGHT, self.WIDTH, 3)) 200 | rgba = torch.reshape(rgba, (self.HEIGHT, self.WIDTH, 4)) 201 | 202 | rays = Rays(origins=origins, viewdirs=viewdirs) 203 | 204 | return { 205 | "rgba": rgba, # [h, w, 4] 206 | "rays": rays, # [h, w, 3] 207 | } 208 | -------------------------------------------------------------------------------- /nerf/loader_gt.py: -------------------------------------------------------------------------------- 1 | """ 2 | The NeRFLoderGT class inherits from Dataset, but it's not used as a Dataset in the training loop. This because the current 3 | implementation was inherited from the original NerfAcc implementation. In the future, it could be useful to remove this dependency. 4 | """ 5 | import json 6 | import os 7 | 8 | import imageio.v2 as imageio 9 | import numpy as np 10 | import torch 11 | import torch.nn.functional as F 12 | 13 | from nerf.utils import Rays 14 | 15 | def _load_renderings(data_dir: str, split: str, h: int, w: int): 16 | 17 | with open( 18 | os.path.join(data_dir, "transforms_{}_compressed.json".format(split)), "r" 19 | ) as fp: 20 | meta = json.load(fp) 21 | 22 | camtoworlds = [] 23 | for i in range(len(meta["frames"])): 24 | frame = meta["frames"][i] 25 | # fname = os.path.join(data_dir, frame["file_path"] + ".png") 26 | camtoworlds.append(frame["transform_matrix"]) 27 | 28 | camtoworlds = np.stack(camtoworlds, axis=0) 29 | 30 | camera_angle_x = float(meta["camera_angle_x"]) 31 | focal = 0.5 * w / np.tan(0.5 * camera_angle_x) 32 | 33 | return camtoworlds, focal 34 | 35 | 36 | class NeRFLoaderGT(torch.utils.data.Dataset): 37 | 38 | WIDTH, HEIGHT = 224, 224 39 | NEAR, FAR = 2.0, 6.0 40 | OPENGL_CAMERA = True 41 | 42 | def __init__( 43 | self, 44 | data_dir: str, 45 | split: str = "train", 46 | color_bkgd_aug: str = "random", 47 | num_rays: int = None, 48 | near: float = None, 49 | far: float = None, 50 | device: str = "cuda:0", 51 | weights_file_name: str = "nerf_weights.pth", 52 | training: bool = True 53 | ): 54 | super().__init__() 55 | assert color_bkgd_aug in ["white", "black", "random"] 56 | self.num_rays = num_rays 57 | self.near = self.NEAR if near is None else near 58 | self.far = self.FAR if far is None else far 59 | 60 | self.training = training 61 | 62 | self.device = device 63 | 64 | self.color_bkgd_aug = color_bkgd_aug 65 | 66 | self.weights_file_path = os.path.join(data_dir, weights_file_name) 67 | 68 | self.camtoworlds, self.focal = _load_renderings(#_from_RAM( 69 | data_dir, split, self.HEIGHT, self.WIDTH 70 | ) 71 | self.camtoworlds = ( 72 | torch.from_numpy(self.camtoworlds).to(self.device).to(torch.float32) 73 | ) 74 | self.K = torch.tensor( 75 | [ 76 | [self.focal, 0, self.WIDTH / 2.0], 77 | [0, self.focal, self.HEIGHT / 2.0], 78 | [0, 0, 1], 79 | ], 80 | dtype=torch.float32, 81 | device=device, 82 | ) # (3, 3) 83 | 84 | def __len__(self): 85 | return len(self.camtoworlds) 86 | 87 | @torch.no_grad() 88 | def __getitem__(self, index): 89 | data = self.fetch_data(index) 90 | data = self.preprocess(data) 91 | return data 92 | 93 | def preprocess(self, data): 94 | """Process the fetched / cached data with randomness.""" 95 | rays = data["rays"] 96 | # pixels, alpha = torch.split(rgba, [3, 1], dim=-1) 97 | 98 | if self.training: 99 | if self.color_bkgd_aug == "random": 100 | color_bkgd = torch.rand(3, device=self.device) 101 | elif self.color_bkgd_aug == "white": 102 | color_bkgd = torch.ones(3, device=self.device) 103 | elif self.color_bkgd_aug == "black": 104 | color_bkgd = torch.zeros(3, device=self.device) 105 | else: 106 | color_bkgd = torch.zeros(3, device=self.device) 107 | 108 | # pixels = pixels * alpha + color_bkgd * (1.0 - alpha) 109 | return { 110 | "rays": rays, # [n_rays,] or [h, w] 111 | "color_bkgd": color_bkgd, # [3,] 112 | **{k: v for k, v in data.items() if k not in ["rgba", "rays"]}, 113 | } 114 | 115 | def update_num_rays(self, num_rays): 116 | self.num_rays = num_rays 117 | 118 | def fetch_data(self, index): 119 | """Fetch the data (it maybe cached for multiple batches).""" 120 | 121 | num_rays = self.num_rays 122 | 123 | if self.training: 124 | camtoworld_id = torch.randint( 125 | 0, 126 | len(self.camtoworlds), 127 | size=(num_rays,), 128 | device=self.device, 129 | ) 130 | 131 | x = torch.randint( 132 | 0, self.WIDTH, size=(num_rays,), device=self.device 133 | ) 134 | y = torch.randint( 135 | 0, self.HEIGHT, size=(num_rays,), device=self.device 136 | ) 137 | else: 138 | camtoworld_id = [index] 139 | x, y = torch.meshgrid( 140 | torch.arange(self.WIDTH, device=self.device), 141 | torch.arange(self.HEIGHT, device=self.device), 142 | indexing="xy", 143 | ) 144 | x = x.flatten() 145 | y = y.flatten() 146 | 147 | # generate rays 148 | c2w = self.camtoworlds[camtoworld_id] # (num_rays, 3, 4) 149 | 150 | camera_dirs = F.pad( 151 | torch.stack( 152 | [ 153 | (x - self.K[0, 2] + 0.5) / self.K[0, 0], 154 | (y - self.K[1, 2] + 0.5) 155 | / self.K[1, 1] 156 | * (-1.0 if self.OPENGL_CAMERA else 1.0), 157 | ], 158 | dim=-1, 159 | ), 160 | (0, 1), 161 | value=(-1.0 if self.OPENGL_CAMERA else 1.0), 162 | ) # [num_rays, 3] 163 | 164 | directions = (camera_dirs[:, None, :] * c2w[:, :3, :3]).sum(dim=-1) 165 | origins = torch.broadcast_to(c2w[:, :3, -1], directions.shape) 166 | viewdirs = directions / torch.linalg.norm( 167 | directions, dim=-1, keepdims=True 168 | ) 169 | 170 | if self.training: 171 | origins = torch.reshape(origins, (num_rays, 3)) 172 | viewdirs = torch.reshape(viewdirs, (num_rays, 3)) 173 | else: 174 | origins = torch.reshape(origins, (self.HEIGHT, self.WIDTH, 3)) 175 | viewdirs = torch.reshape(viewdirs, (self.HEIGHT, self.WIDTH, 3)) 176 | 177 | rays = Rays(origins=origins, viewdirs=viewdirs) 178 | 179 | return { 180 | "rays": rays, # [h, w, 3] 181 | } 182 | -------------------------------------------------------------------------------- /nerf2vec/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CVLAB-Unibo/nf2vec/7e10220040b89404914d3a8317d39a8ac7337a8d/nerf2vec/__init__.py -------------------------------------------------------------------------------- /nerf2vec/config.py: -------------------------------------------------------------------------------- 1 | """ 2 | # #################### 3 | # NERF2VEC 4 | # #################### 5 | """ 6 | # 7 | # DIMENSIONS 8 | # 9 | ENCODER_EMBEDDING_DIM = 1024 10 | ENCODER_HIDDEN_DIM = [512, 512, 1024, 1024] 11 | 12 | 13 | DECODER_INPUT_DIM = 3 14 | DECODER_HIDDEN_DIM = 1024 15 | DECODER_NUM_HIDDEN_LAYERS_BEFORE_SKIP = 2 16 | DECODER_NUM_HIDDEN_LAYERS_AFTER_SKIP = 2 17 | DECODER_OUT_DIM = 4 18 | 19 | # 20 | # TRAIN 21 | # 22 | NUM_EPOCHS = 501 23 | BATCH_SIZE = 16 24 | LR = 1e-4 25 | WD = 1e-2 26 | BG_WEIGHT = 0.2 27 | FG_WEIGHT = 1 - BG_WEIGHT 28 | 29 | """ 30 | # #################### 31 | # NERFACC 32 | # #################### 33 | """ 34 | # 35 | # GRID 36 | # 37 | import os 38 | try: 39 | from nerfacc import ContractionType 40 | GRID_CONTRACTION_TYPE = ContractionType.AABB 41 | except ImportError: 42 | pass 43 | GRID_AABB = [-0.7, -0.7, -0.7, 0.7, 0.7, 0.7] 44 | GRID_RESOLUTION = 96 45 | GRID_CONFIG_N_SAMPLES = 1024 46 | 47 | GRID_RECONSTRUCTION_TOTAL_ITERATIONS = 20 48 | GRID_RECONSTRUCTION_WARMUP_ITERATIONS = 5 49 | GRID_NUMBER_OF_CELLS = 884736 # (884736 if resolution == 96, 2097152 if resolution == 128) 50 | GRID_BACKGROUND_CELLS_TO_SAMPLE = 32000 51 | 52 | # 53 | # RAYS 54 | # 55 | NUM_RAYS = 55000 56 | MAX_FOREGROUND_COORDINATES = 25000 57 | MAX_BACKGROUND_COORDINATES = 10000 58 | 59 | # 60 | # INSTANT-NGP 61 | # 62 | MLP_INPUT_SIZE = 3 63 | MLP_ENCODING_SIZE = 24 64 | MLP_INPUT_SIZE_AFTER_ENCODING = MLP_INPUT_SIZE * MLP_ENCODING_SIZE * 2 65 | MLP_OUTPUT_SIZE = 4 66 | MLP_HIDDEN_LAYERS = 3 67 | MLP_UNITS = 64 68 | 69 | INSTANT_NGP_MLP_CONF = { 70 | 'aabb': GRID_AABB, 71 | 'unbounded':False, 72 | 'encoding':'Frequency', 73 | 'mlp':'FullyFusedMLP', 74 | 'activation':'ReLU', 75 | 'n_hidden_layers':MLP_HIDDEN_LAYERS, 76 | 'n_neurons':MLP_UNITS, 77 | 'encoding_size':MLP_ENCODING_SIZE 78 | } 79 | 80 | INSTANT_NGP_ENCODING_CONF = { 81 | "otype": "Frequency", 82 | "n_frequencies": 24 83 | } 84 | 85 | NERF_WEIGHTS_FILE_NAME = 'nerf_weights.pth' 86 | 87 | # 88 | # TINY-CUDA 89 | # 90 | TINY_CUDA_MIN_SIZE = 16 91 | 92 | """ 93 | # #################### 94 | # LOGGING 95 | # #################### 96 | """ 97 | WANDB_CONFIG = { 98 | 'ENCODER_EMBEDDING_DIM': ENCODER_EMBEDDING_DIM, 99 | 'ENCODER_HIDDEN_DIM': ENCODER_HIDDEN_DIM, 100 | 'DECODER_INPUT_DIM': DECODER_INPUT_DIM, 101 | 'DECODER_HIDDEN_DIM': DECODER_HIDDEN_DIM, 102 | 'DECODER_NUM_HIDDEN_LAYERS_BEFORE_SKIP': DECODER_NUM_HIDDEN_LAYERS_BEFORE_SKIP, 103 | 'DECODER_NUM_HIDDEN_LAYERS_AFTER_SKIP': DECODER_NUM_HIDDEN_LAYERS_AFTER_SKIP, 104 | 'DECODER_OUT_DIM': DECODER_OUT_DIM, 105 | 'NUM_EPOCHS': NUM_EPOCHS, 106 | 'BATCH_SIZE': BATCH_SIZE, 107 | 'LR': LR, 108 | 'WD': WD, 109 | "NUM_RAYS": NUM_RAYS, 110 | "GRID_RESOLUTION": GRID_RESOLUTION 111 | } 112 | 113 | 114 | """ 115 | # #################### 116 | # DATASET 117 | # #################### 118 | """ 119 | TRAIN_SPLIT = 'train' 120 | VAL_SPLIT = 'val' 121 | TEST_SPLIT = 'test' 122 | 123 | 124 | LABELS_TO_IDS = { 125 | "02691156": 0, # airplane 126 | "02828884": 1, # bench 127 | "02933112": 2, # cabinet 128 | "02958343": 3, # car 129 | "03001627": 4, # chair 130 | "03211117": 5, # display 131 | "03636649": 6, # lamp 132 | "03691459": 7, # speaker 133 | "04090263": 8, # rifle 134 | "04256520": 9, # sofa 135 | "04379243": 10, # table 136 | "04401088": 11, # phone 137 | "04530566": 12 # watercraft 138 | } 139 | 140 | # TODO: COMMENT THESE! 141 | #'02992529': 4, tablet delete? 142 | #"03948459": 9, gun delete? 143 | 144 | NUM_CLASSES = len(LABELS_TO_IDS) 145 | -------------------------------------------------------------------------------- /nerf2vec/export_embeddings.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | script_dir = os.path.dirname(os.path.abspath(__file__)) 4 | parent_dir = os.path.dirname(script_dir) 5 | sys.path.append(parent_dir) 6 | 7 | import json 8 | import h5py 9 | import torch 10 | import settings 11 | 12 | from pathlib import Path 13 | from typing import Tuple 14 | from torch import Tensor 15 | from models.encoder import Encoder 16 | from nerf2vec import config as nerf2vec_config 17 | from torch.utils.data import DataLoader, Dataset 18 | from nerf2vec.utils import get_class_label, get_mlp_params_as_matrix 19 | 20 | class InrDataset(Dataset): 21 | def __init__(self, split_json: str, device: str, nerf_weights_file_name: str) -> None: 22 | super().__init__() 23 | 24 | with open(split_json) as file: 25 | self.nerf_paths = json.load(file) 26 | 27 | # self.nerf_paths = self._get_nerf_paths('data\\data_TRAINED') 28 | assert isinstance(self.nerf_paths, list), 'The json file provided is not a list.' 29 | 30 | self.device = device 31 | self.nerf_weights_file_name = nerf_weights_file_name 32 | 33 | def __len__(self) -> int: 34 | return len(self.nerf_paths) 35 | 36 | def __getitem__(self, index: int) -> Tuple[Tensor, Tensor, Tensor]: 37 | 38 | data_dir = self.nerf_paths[index] 39 | weights_file_path = os.path.join(data_dir, self.nerf_weights_file_name) 40 | 41 | class_label = get_class_label(weights_file_path) 42 | class_id = nerf2vec_config.LABELS_TO_IDS[get_class_label(weights_file_path)] if class_label != -1 else class_label 43 | 44 | matrix = torch.load(weights_file_path, map_location=torch.device(self.device)) 45 | matrix = get_mlp_params_as_matrix(matrix['mlp_base.params']) 46 | 47 | return matrix, class_id, data_dir 48 | 49 | def load_nerf2vec_checkpoint(): 50 | ckpts_path = Path(settings.NERF2VEC_CKPTS_PATH) 51 | ckpt_paths = [p for p in ckpts_path.glob("*.pt") if "best" not in p.name] 52 | error_msg = "Expected only one ckpt apart from best, found none or too many." 53 | assert len(ckpt_paths) == 1, error_msg 54 | ckpt_path = ckpt_paths[0] 55 | print(f'loading path: {ckpt_path}') 56 | ckpt = torch.load(ckpt_path) 57 | 58 | return ckpt 59 | 60 | 61 | def export_embeddings(device = 'cuda:0'): 62 | 63 | train_dset_json = os.path.abspath(os.path.join('data', 'train.json')) 64 | train_dset = InrDataset(train_dset_json, device='cpu', nerf_weights_file_name=nerf2vec_config.NERF_WEIGHTS_FILE_NAME) 65 | train_loader = DataLoader(train_dset, batch_size=1, num_workers=0, shuffle=False) 66 | 67 | val_dset_json = os.path.abspath(os.path.join('data', 'validation.json')) 68 | val_dset = InrDataset(val_dset_json, device='cpu', nerf_weights_file_name=nerf2vec_config.NERF_WEIGHTS_FILE_NAME) 69 | val_loader = DataLoader(val_dset, batch_size=1, num_workers=0, shuffle=False) 70 | 71 | test_dset_json = os.path.abspath(os.path.join('data', 'test.json')) 72 | test_dset = InrDataset(test_dset_json, device='cpu', nerf_weights_file_name=nerf2vec_config.NERF_WEIGHTS_FILE_NAME) 73 | test_loader = DataLoader(test_dset, batch_size=1, num_workers=0, shuffle=False) 74 | 75 | encoder = Encoder( 76 | nerf2vec_config.MLP_UNITS, 77 | nerf2vec_config.ENCODER_HIDDEN_DIM, 78 | nerf2vec_config.ENCODER_EMBEDDING_DIM 79 | ) 80 | encoder = encoder.to(device) 81 | ckpt = load_nerf2vec_checkpoint() 82 | encoder.load_state_dict(ckpt["encoder"]) 83 | encoder.eval() 84 | 85 | loaders = [train_loader, val_loader, test_loader] 86 | splits = [nerf2vec_config.TRAIN_SPLIT, nerf2vec_config.VAL_SPLIT, nerf2vec_config.TEST_SPLIT] 87 | 88 | 89 | for loader, split in zip(loaders, splits): 90 | idx = 0 91 | 92 | for batch in loader: 93 | matrices, class_ids, data_dirs = batch 94 | matrices = matrices.cuda() 95 | 96 | with torch.no_grad(): 97 | embeddings = encoder(matrices) 98 | 99 | out_root = Path(settings.NERF2VEC_EMBEDDINGS_DIR) 100 | h5_path = out_root / Path(f"{split}") / f"{idx}.h5" 101 | h5_path.parent.mkdir(parents=True, exist_ok=True) 102 | 103 | with h5py.File(h5_path, "w") as f: 104 | # print(f'dir: {data_dirs[0]}, class: {class_ids[0]}') 105 | f.create_dataset("data_dir", data=data_dirs[0]) 106 | f.create_dataset("embedding", data=embeddings[0].detach().cpu().numpy()) 107 | f.create_dataset("class_id", data=class_ids[0].detach().cpu().numpy()) 108 | 109 | idx += 1 110 | 111 | if idx % 5000 == 0: 112 | print(f'Created {idx} embeddings for {split} split') 113 | 114 | def main() -> None: 115 | export_embeddings(device=settings.DEVICE_NAME) 116 | 117 | if __name__ == "__main__": 118 | main() -------------------------------------------------------------------------------- /nerf2vec/utils.py: -------------------------------------------------------------------------------- 1 | 2 | import os 3 | import gzip 4 | import torch 5 | import shutil 6 | import numpy as np 7 | 8 | from collections import OrderedDict 9 | from typing import Any, Dict 10 | from torch import Tensor 11 | 12 | from nerf2vec import config as nerf2vec_config 13 | from nerf.utils import Rays 14 | 15 | import torch.nn.functional as F 16 | 17 | def next_multiple(val, divisor): 18 | """ 19 | Implementation ported directly from TinyCuda implementation 20 | See https://github.com/NVlabs/tiny-cuda-nn/blob/master/include/tiny-cuda-nn/common.h#L300 21 | """ 22 | return next_pot(div_round_up(val, divisor) * divisor) 23 | 24 | 25 | def div_round_up(val, divisor): 26 | return next_pot((val + divisor - 1) / divisor) 27 | 28 | 29 | def next_pot(v): 30 | v=int(v) 31 | v-=1 32 | v | v >> 1 33 | v | v >> 2 34 | v | v >> 4 35 | v | v >> 8 36 | v | v >> 16 37 | return v+1 38 | 39 | 40 | def next_multiple_2(val, divisor): 41 | """ 42 | Additional implementation added for testing purposes 43 | """ 44 | return ((val - 1) | (divisor -1)) + 1 45 | 46 | 47 | def get_mlp_params_as_matrix(flattened_params: Tensor, sd: Dict[str, Any] = None) -> Tensor: 48 | 49 | if sd is None: 50 | sd = get_mlp_sample_sd() 51 | 52 | params_shapes = [p.shape for p in sd.values()] 53 | feat_dim = params_shapes[0][0] 54 | 55 | padding_size = (feat_dim-params_shapes[-1][0]) * params_shapes[-1][1] 56 | padding_tensor = torch.zeros(padding_size) 57 | params = torch.cat((flattened_params, padding_tensor), dim=0) 58 | 59 | return params.reshape((-1, feat_dim)) 60 | 61 | 62 | def get_mlp_sample_sd(): 63 | sample_sd = OrderedDict() 64 | sample_sd['input'] = torch.zeros(nerf2vec_config.MLP_UNITS, next_multiple(nerf2vec_config.MLP_INPUT_SIZE_AFTER_ENCODING, nerf2vec_config.TINY_CUDA_MIN_SIZE)) 65 | for i in range(nerf2vec_config.MLP_HIDDEN_LAYERS): 66 | sample_sd[f'hid_{i}'] = torch.zeros(nerf2vec_config.MLP_UNITS, nerf2vec_config.MLP_UNITS) 67 | sample_sd['output'] = torch.zeros(next_multiple(nerf2vec_config.MLP_OUTPUT_SIZE, nerf2vec_config.TINY_CUDA_MIN_SIZE), nerf2vec_config.MLP_UNITS) 68 | 69 | return sample_sd 70 | 71 | 72 | def get_grid_file_name(file_path): 73 | # Split the path into individual directories 74 | directories = os.path.normpath(file_path).split(os.sep) 75 | # Get the last two directories 76 | last_two_dirs = directories[-2:] 77 | # Join the last two directories with an underscore 78 | file_name = '_'.join(last_two_dirs) + '.pth' 79 | return file_name 80 | 81 | 82 | def get_class_label(file_path): 83 | directories = os.path.normpath(file_path).split(os.sep) 84 | class_label = directories[-3] 85 | 86 | return class_label 87 | 88 | 89 | def get_class_label_from_nerf_root_path(file_path): 90 | directories = os.path.normpath(file_path).split(os.sep) 91 | class_label = directories[-2] 92 | 93 | return class_label 94 | 95 | 96 | def get_nerf_name_from_grid(file_path): 97 | grid_name = os.path.basename(file_path) 98 | nerf_name = os.path.splitext(grid_name)[0] 99 | return nerf_name 100 | 101 | 102 | def unzip_file(file_path, extract_dir, file_name): 103 | with gzip.open(os.path.join(file_path, 'grid.pth.gz'), 'rb') as f_in: 104 | output_path = os.path.join(extract_dir, file_name) 105 | with open(output_path, 'wb') as f_out: 106 | shutil.copyfileobj(f_in, f_out) 107 | 108 | 109 | # ################################################################################ 110 | # CAMERA POSE MATRIX GENERATION METHODS 111 | # ################################################################################ 112 | def get_translation_t(t): 113 | """Get the translation matrix for movement in t.""" 114 | matrix = [ 115 | [1, 0, 0, 0], 116 | [0, 1, 0, 0], 117 | [0, 0, 1, t], 118 | [0, 0, 0, 1], 119 | ] 120 | 121 | return torch.tensor(matrix, dtype=torch.float32) 122 | 123 | 124 | def get_rotation_phi(phi): 125 | """Get the rotation matrix for movement in phi.""" 126 | matrix = [ 127 | [1, 0, 0, 0], 128 | [0, torch.cos(phi), -torch.sin(phi), 0], 129 | [0, torch.sin(phi), torch.cos(phi), 0], 130 | [0, 0, 0, 1], 131 | ] 132 | return torch.tensor(matrix, dtype=torch.float32) 133 | 134 | 135 | def get_rotation_theta(theta): 136 | """Get the rotation matrix for movement in theta.""" 137 | matrix = [ 138 | [torch.cos(theta), 0, -torch.sin(theta), 0], 139 | [0, 1, 0, 0], 140 | [torch.sin(theta), 0, torch.cos(theta), 0], 141 | [0, 0, 0, 1], 142 | ] 143 | return torch.tensor(matrix, dtype=torch.float32) 144 | 145 | 146 | def pose_spherical(theta, phi, t): 147 | """ 148 | Get the camera to world matrix for the corresponding theta, phi 149 | and t. 150 | """ 151 | c2w = get_translation_t(t) 152 | c2w = get_rotation_phi(phi / 180.0 * np.pi) @ c2w 153 | c2w = get_rotation_theta(theta / 180.0 * np.pi) @ c2w 154 | c2w = torch.from_numpy(np.array([[-1, 0, 0, 0], [0, 0, 1, 0], [0, 1, 0, 0], [ 155 | 0, 0, 0, 1]], dtype=np.float32)) @ c2w 156 | return c2w 157 | 158 | # ################################################################################ 159 | # RAYS GENERATION 160 | # ################################################################################ 161 | def get_rays( 162 | device, 163 | camera_angle_x=0.8575560450553894, # Parameter taken from traned NeRFs 164 | width=224, 165 | height=224): 166 | 167 | # Get camera pose 168 | theta = torch.tensor(90.0, device=device) # The horizontal camera position (change the value between and 360 to make a full cycle around the object) 169 | phi = torch.tensor(-30.0, device=device) # The vertical camera position 170 | t = torch.tensor(1.5, device=device) # camera distance from object 171 | c2w = pose_spherical(theta, phi, t) 172 | c2w = c2w.to(device) 173 | 174 | # Compute the focal_length 175 | focal_length = 0.5 * width / np.tan(0.5 * camera_angle_x) 176 | 177 | rays = generate_rays(device, width, height, focal_length, c2w) 178 | 179 | return rays 180 | 181 | def generate_rays(device, width, height, focal, c2w, OPENGL_CAMERA=True): 182 | x, y = torch.meshgrid( 183 | torch.arange(width, device=device), 184 | torch.arange(height, device=device), 185 | indexing="xy", 186 | ) 187 | x = x.flatten() 188 | y = y.flatten() 189 | 190 | K = torch.tensor( 191 | [ 192 | [focal, 0, width / 2.0], 193 | [0, focal, height / 2.0], 194 | [0, 0, 1], 195 | ], 196 | dtype=torch.float32, 197 | device=device, 198 | ) # (3, 3) 199 | 200 | camera_dirs = F.pad( 201 | torch.stack( 202 | [ 203 | (x - K[0, 2] + 0.5) / K[0, 0], 204 | (y - K[1, 2] + 0.5) 205 | / K[1, 1] 206 | * (-1.0 if OPENGL_CAMERA else 1.0), 207 | ], 208 | dim=-1, 209 | ), 210 | (0, 1), 211 | value=(-1.0 if OPENGL_CAMERA else 1.0), 212 | ) # [num_rays, 3] 213 | camera_dirs.to(device) 214 | 215 | directions = (camera_dirs[:, None, :] * c2w[:3, :3]).sum(dim=-1) 216 | origins = torch.broadcast_to(c2w[:3, -1], directions.shape) 217 | viewdirs = directions / torch.linalg.norm( 218 | directions, dim=-1, keepdims=True 219 | ) 220 | 221 | origins = torch.reshape(origins, (height, width, 3))#.unsqueeze(0) 222 | viewdirs = torch.reshape(viewdirs, (height, width, 3))#.unsqueeze(0) 223 | 224 | rays = Rays(origins=origins, viewdirs=viewdirs) 225 | 226 | return rays 227 | 228 | def get_latest_checkpoints_path(ckpts_path): 229 | ckpt_paths = [p for p in ckpts_path.glob("*.pt") if "best" not in p.name] 230 | error_msg = "Expected only one ckpt apart from best, found none or too many." 231 | assert len(ckpt_paths) == 1, error_msg 232 | 233 | ckpt_path = ckpt_paths[0] 234 | 235 | return ckpt_path -------------------------------------------------------------------------------- /settings.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | os.environ["WANDB_SILENT"] = "true" 4 | os.environ["WANDB_MODE"] = "disabled" 5 | 6 | cuda_idx = 0 7 | DEVICE_NAME = 'cuda:%s' % cuda_idx # Keep compatibility with older code 8 | 9 | try: 10 | import torch 11 | torch.cuda.set_device(cuda_idx) 12 | print('set cuda device to %s' % cuda_idx) 13 | except ImportError: 14 | print('torch not installed, cannot set cuda device') 15 | pass 16 | 17 | """ 18 | # ################################################## 19 | # PATHS USED BY DIFFERENT MODULES 20 | # ################################################## 21 | """ 22 | 23 | # DATASET 24 | TRAIN_DSET_JSON = os.path.abspath(os.path.join('data', 'train.json')) 25 | VAL_DSET_JSON = os.path.abspath(os.path.join('data', 'validation.json')) 26 | TEST_DSET_JSON = os.path.abspath(os.path.join('data', 'test.json')) 27 | 28 | # NERF2VEC 29 | NERF2VEC_CKPTS_PATH = os.path.join('nerf2vec', 'train', 'ckpts') 30 | NERF2VEC_ALL_CKPTS_PATH = os.path.join('nerf2vec', 'train', 'all_ckpts') 31 | NERF2VEC_EMBEDDINGS_DIR = os.path.join('nerf2vec', 'embeddings') 32 | 33 | # CLASSIFICATION 34 | CLASSIFICATION_OUTPUT_DIR = os.path.join('task_classification', 'train') 35 | 36 | # GENERATION 37 | GENERATION_EMBEDDING_DIR = os.path.join('task_generation', 'latent_embeddings') 38 | GENERATION_OUT_DIR = os.path.join('task_generation', 'experiments', '{}') # The placeholder will contain the class index 39 | GENERATION_NERF2VEC_FULL_CKPT_PATH = os.path.join('task_classification', 'train', 'ckpts', '499.pt') 40 | GENERATION_LATENT_GAN_FULL_CKPT_PATH = os.path.join('task_generation', 'experiments', 'nerf2vec_{}', 'generated_embeddings', 'epoch_2000.npz') # The placeholder will contain the class index 41 | 42 | 43 | -------------------------------------------------------------------------------- /task_classification/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CVLAB-Unibo/nf2vec/7e10220040b89404914d3a8317d39a8ac7337a8d/task_classification/__init__.py -------------------------------------------------------------------------------- /task_classification/config.py: -------------------------------------------------------------------------------- 1 | TRAIN_BS = 256 2 | VAL_BS = 256 3 | LAYERS_DIM = [1024, 512, 256] 4 | 5 | LR = 1e-4 6 | WD = 1e-2 7 | NUM_EPOCHS = 150 8 | 9 | 10 | # Add any attributes that you want to be logged by wandb 11 | WANDB_CONFIG = { 12 | 13 | } 14 | -------------------------------------------------------------------------------- /task_generation/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CVLAB-Unibo/nf2vec/7e10220040b89404914d3a8317d39a8ac7337a8d/task_generation/__init__.py -------------------------------------------------------------------------------- /task_generation/export_embeddings.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | script_dir = os.path.dirname(os.path.abspath(__file__)) 4 | parent_dir = os.path.dirname(script_dir) 5 | sys.path.append(parent_dir) 6 | 7 | from nerf2vec import config as nerf2vec_config 8 | 9 | from pathlib import Path 10 | 11 | import h5py 12 | import numpy as np 13 | import settings 14 | 15 | 16 | def export_embeddings() -> None: 17 | 18 | split = 'train' 19 | nerf_embeddings_root = Path(settings.NERF2VEC_EMBEDDINGS_DIR) / split 20 | out_root = Path(settings.GENERATION_EMBEDDING_DIR) 21 | out_root.mkdir(parents=True, exist_ok=True) 22 | 23 | num_classes = nerf2vec_config.NUM_CLASSES 24 | 25 | embeddings_paths = list(nerf_embeddings_root.glob("*.h5")) 26 | 27 | embeddings = {} 28 | for cls in range(num_classes): 29 | embeddings[cls] = [] 30 | 31 | print('Extracting embeddings...') 32 | for idx, path in enumerate(embeddings_paths): 33 | with h5py.File(path, "r") as f: 34 | embedding = np.array(f.get("embedding")) 35 | class_id = np.array(f.get("class_id")).item() 36 | embeddings[class_id].append(embedding) 37 | 38 | if idx % 5000 == 0: 39 | print(f'\t {idx}/{len(embeddings_paths)}') 40 | 41 | for class_id in range(num_classes): 42 | print(f'Processing class: {class_id}') 43 | if class_id == 2: 44 | print() 45 | path_out = out_root / f"embeddings_{class_id}.npz" 46 | stacked_embeddings = np.stack(embeddings[class_id]) 47 | np.savez_compressed(path_out, embeddings=stacked_embeddings) 48 | 49 | def main() -> None: 50 | export_embeddings() 51 | 52 | if __name__ == "__main__": 53 | main() -------------------------------------------------------------------------------- /task_generation/latent_3d_points/.gitignore: -------------------------------------------------------------------------------- 1 | .project 2 | .ipynb_checkpoints 3 | .DS_Store 4 | .pydevproject 5 | *.pyc 6 | *.nfs* 7 | data/* 8 | external/structural_losses/*.o 9 | external/structural_losses/*.so 10 | -------------------------------------------------------------------------------- /task_generation/latent_3d_points/.gitmodules: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CVLAB-Unibo/nf2vec/7e10220040b89404914d3a8317d39a8ac7337a8d/task_generation/latent_3d_points/.gitmodules -------------------------------------------------------------------------------- /task_generation/latent_3d_points/LICENSE.md: -------------------------------------------------------------------------------- 1 | Learning Representations And Generative Models For 3D Point Clouds 2 | 3 | Copyright (c) 2017, Geometric Computation Group of Stanford University 4 | 5 | The MIT License (MIT) 6 | 7 | Copyright (c) 2017 Panos Achlioptas 8 | 9 | Permission is hereby granted, free of charge, to any person obtaining a copy 10 | of this software and associated documentation files (the "Software"), to deal 11 | in the Software without restriction, including without limitation the rights 12 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 13 | copies of the Software, and to permit persons to whom the Software is 14 | furnished to do so, subject to the following conditions: 15 | 16 | The above copyright notice and this permission notice shall be included in all 17 | copies or substantial portions of the Software. 18 | 19 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 20 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 21 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 22 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 23 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 24 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 25 | SOFTWARE. 26 | -------------------------------------------------------------------------------- /task_generation/latent_3d_points/README.md: -------------------------------------------------------------------------------- 1 | # Learning Representations and Generative Models For 3D Point Clouds 2 | Created by Panos Achlioptas, Olga Diamanti, Ioannis Mitliagkas, Leonidas J. Guibas. 3 | 4 | ![representative](https://github.com/optas/latent_3d_points/blob/master/doc/images/teaser.jpg) 5 | 6 | 7 | ## Introduction 8 | This work is based on our [arXiv tech report](https://arxiv.org/abs/1707.02392). We proposed a novel deep net architecture for auto-encoding point clouds. The learned representations were amenable to semantic part editting, shape analogies, linear classification and shape interpolations. 9 | 10 | 11 | 12 | ## Citation 13 | If you find our work useful in your research, please consider citing: 14 | 15 | @article{achlioptas2017latent_pc, 16 | title={Learning Representations and Generative Models For 3D Point Clouds}, 17 | author={Achlioptas, Panos and Diamanti, Olga and Mitliagkas, Ioannis and Guibas, Leonidas J}, 18 | journal={arXiv preprint arXiv:1707.02392}, 19 | year={2017} 20 | } 21 | 22 | 23 | ## Dependencies 24 | Requirements: 25 | - Python 2.7+ with Numpy, Scipy and Matplotlib 26 | - [Tensorflow (version 1.0+)](https://www.tensorflow.org/get_started/os_setup) 27 | - [TFLearn](http://tflearn.org/installation) 28 | 29 | Our code has been tested with Python 2.7, TensorFlow 1.3.0, TFLearn 0.3.2, CUDA 8.0 and cuDNN 6.0 on Ubuntu 14.04. 30 | 31 | 32 | ## Installation 33 | Download the source code from the git repository: 34 | ``` 35 | git clone https://github.com/optas/latent_3d_points 36 | ``` 37 | 38 | To be able to train your own model you need first to _compile_ the EMD/Chamfer losses. In latent_3d_points/external/structural_losses we have included the cuda implementations of [Fan et. al](https://github.com/fanhqme/PointSetGeneration). 39 | ``` 40 | cd latent_3d_points/external 41 | 42 | with your editor modify the first three lines of the makefile to point to 43 | your nvcc, cudalib and tensorflow library. 44 | 45 | make 46 | ``` 47 | 48 | ### Data Set 49 | We provide ~57K point-clouds, each sampled from a mesh model of 50 | ShapeNetCore 51 | with (area) uniform sampling. To download them (1.4GB): 52 | ``` 53 | cd latent_3d_points/ 54 | ./download_data.sh 55 | ``` 56 | The point-clouds will be stored in latent_3d_points/data/shape_net_core_uniform_samples_2048 57 | 58 | Use the function snc_category_to_synth_id, defined in src/in_out/, to map a class name such as "chair" to its synthetic_id: "03001627". Point-clouds of models of the same class are stored under a commonly named folder. 59 | 60 | 61 | ### Usage 62 | To train a point-cloud AE look at: 63 | 64 | latent_3d_points/notebooks/train_single_class_ae.ipynb 65 | 66 | To train a latent-GAN based on a pre-trained AE look at: 67 | 68 | latent_3d_points/notebooks/train_latent_gan.ipynb 69 | 70 | To train a raw-GAN: 71 | 72 | latent_3d_points/notebooks/train_raw_gan.ipynb 73 | 74 | To use the evaluation metrics (MMD, Coverage, JSD) between two point-cloud sets look at: 75 | 76 | latent_3d_points/notebooks/compute_evaluation_metrics.ipynb 77 | 78 | 79 | 80 | ## License 81 | This project is licensed under the terms of the MIT license (see LICENSE.md for details). 82 | -------------------------------------------------------------------------------- /task_generation/latent_3d_points/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CVLAB-Unibo/nf2vec/7e10220040b89404914d3a8317d39a8ac7337a8d/task_generation/latent_3d_points/__init__.py -------------------------------------------------------------------------------- /task_generation/latent_3d_points/doc/images/teaser.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CVLAB-Unibo/nf2vec/7e10220040b89404914d3a8317d39a8ac7337a8d/task_generation/latent_3d_points/doc/images/teaser.jpg -------------------------------------------------------------------------------- /task_generation/latent_3d_points/download_data.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # For each Mesh model of Shape-Net-Core download 1 point-cloud with 2048 points 3 | # sampled uniformly at random (around 1.4GB). 4 | wget https://www.dropbox.com/s/vmsdrae6x5xws1v/shape_net_core_uniform_samples_2048.zip?dl=0 5 | mv shape_net_core_uniform_samples_2048.zip\?dl\=0 shape_net_core_uniform_samples_2048.zip 6 | unzip shape_net_core_uniform_samples_2048.zip 7 | rm shape_net_core_uniform_samples_2048.zip 8 | mkdir -p data 9 | mv shape_net_core_uniform_samples_2048 data 10 | -------------------------------------------------------------------------------- /task_generation/latent_3d_points/external/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CVLAB-Unibo/nf2vec/7e10220040b89404914d3a8317d39a8ac7337a8d/task_generation/latent_3d_points/external/__init__.py -------------------------------------------------------------------------------- /task_generation/latent_3d_points/external/python_plyfile/.gitignore: -------------------------------------------------------------------------------- 1 | *~ 2 | *.pyc 3 | *.swp 4 | *.egg-info 5 | plyfile-venv/ 6 | build/ 7 | dist/ 8 | .tox 9 | .cache 10 | -------------------------------------------------------------------------------- /task_generation/latent_3d_points/external/python_plyfile/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CVLAB-Unibo/nf2vec/7e10220040b89404914d3a8317d39a8ac7337a8d/task_generation/latent_3d_points/external/python_plyfile/__init__.py -------------------------------------------------------------------------------- /task_generation/latent_3d_points/external/structural_losses/__init__.py: -------------------------------------------------------------------------------- 1 | try: 2 | from tf_nndistance import nn_distance 3 | from tf_approxmatch import approx_match, match_cost 4 | except: 5 | print('External Losses (Chamfer-EMD) were not loaded.') 6 | -------------------------------------------------------------------------------- /task_generation/latent_3d_points/external/structural_losses/approxmatch.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | using namespace std; 9 | float randomf(){ 10 | return (rand()+0.5)/(RAND_MAX+1.0); 11 | } 12 | static double get_time(){ 13 | timespec tp; 14 | clock_gettime(CLOCK_MONOTONIC,&tp); 15 | return tp.tv_sec+tp.tv_nsec*1e-9; 16 | } 17 | void approxmatch_cpu(int b,int n,int m,float * xyz1,float * xyz2,float * match){ 18 | for (int i=0;i saturatedl(n,double(factorl)),saturatedr(m,double(factorr)); 22 | vector weight(n*m); 23 | for (int j=0;j=-2;j--){ 26 | //printf("i=%d j=%d\n",i,j); 27 | double level=-powf(4.0,j); 28 | if (j==-2) 29 | level=0; 30 | for (int k=0;k ss(m,1e-9); 42 | for (int k=0;k ss2(m,0); 59 | for (int k=0;k1){ 154 | printf("bad i=%d j=%d k=%d u=%f\n",i,j,k,u); 155 | } 156 | s+=u; 157 | } 158 | if (s<0.999 || s>1.001){ 159 | printf("bad i=%d j=%d s=%f\n",i,j,s); 160 | } 161 | } 162 | for (int j=0;j4.001){ 168 | printf("bad i=%d j=%d s=%f\n",i,j,s); 169 | } 170 | } 171 | }*/ 172 | /*for (int j=0;j1e-3) 222 | if (fabs(double(match[i*n*m+k*n+j]-match_cpu[i*n*m+j*m+k]))>1e-2){ 223 | printf("i %d j %d k %d m %f %f\n",i,j,k,match[i*n*m+k*n+j],match_cpu[i*n*m+j*m+k]); 224 | flag=false; 225 | break; 226 | } 227 | //emax=max(emax,fabs(double(match[i*n*m+k*n+j]-match_cpu[i*n*m+j*m+k]))); 228 | emax+=fabs(double(match[i*n*m+k*n+j]-match_cpu[i*n*m+j*m+k])); 229 | } 230 | } 231 | printf("emax_match=%f\n",emax/2/n/m); 232 | emax=0; 233 | for (int i=0;i<2;i++) 234 | emax+=fabs(double(cost[i]-cost_cpu[i])); 235 | printf("emax_cost=%f\n",emax/2); 236 | emax=0; 237 | for (int i=0;i<2*m*3;i++) 238 | emax+=fabs(double(grad[i]-grad_cpu[i])); 239 | //for (int i=0;i<3*m;i++){ 240 | //if (grad[i]!=0) 241 | //printf("i %d %f %f\n",i,grad[i],grad_cpu[i]); 242 | //} 243 | printf("emax_grad=%f\n",emax/(2*m*3)); 244 | 245 | cudaFree(xyz1_g); 246 | cudaFree(xyz2_g); 247 | cudaFree(match_g); 248 | cudaFree(cost_g); 249 | cudaFree(grad_g); 250 | 251 | return 0; 252 | } 253 | 254 | -------------------------------------------------------------------------------- /task_generation/latent_3d_points/external/structural_losses/approxmatch.cu: -------------------------------------------------------------------------------- 1 | //n<=4096, m<=1024 2 | __global__ void approxmatch(int b,int n,int m,const float * __restrict__ xyz1,const float * __restrict__ xyz2,float * __restrict__ match){ 3 | const int MaxN=4096,MaxM=1024; 4 | __shared__ float remainL[MaxN],remainR[MaxM],ratioR[MaxM],ratioL[MaxN]; 5 | __shared__ int listR[MaxM],lc; 6 | float multiL,multiR; 7 | if (n>=m){ 8 | multiL=1; 9 | multiR=n/m; 10 | }else{ 11 | multiL=m/n; 12 | multiR=1; 13 | } 14 | for (int i=blockIdx.x;i=-2;j--){ 23 | float level=-powf(4.0f,j); 24 | if (j==-2){ 25 | level=0; 26 | } 27 | if (threadIdx.x==0){ 28 | lc=0; 29 | for (int k=0;k0) 31 | listR[lc++]=k; 32 | } 33 | __syncthreads(); 34 | int _lc=lc; 35 | for (int k=threadIdx.x;k>>(b,n,m,xyz1,xyz2,match); 94 | } 95 | __global__ void matchcost(int b,int n,int m,const float * __restrict__ xyz1,const float * __restrict__ xyz2,const float * __restrict__ match,float * __restrict__ out){ 96 | __shared__ float allsum[512]; 97 | const int Block=256; 98 | __shared__ float buf[Block*3]; 99 | for (int i=blockIdx.x;i>>(b,n,m,xyz1,xyz2,match,out); 138 | } 139 | __global__ void matchcostgrad(int b,int n,int m,const float * __restrict__ xyz1,const float * __restrict__ xyz2,const float * __restrict__ match,float * grad2){ 140 | __shared__ float sum_grad[256*3]; 141 | for (int i=blockIdx.x;i>>(b,n,m,xyz1,xyz2,match,grad2); 182 | } 183 | 184 | -------------------------------------------------------------------------------- /task_generation/latent_3d_points/external/structural_losses/makefile: -------------------------------------------------------------------------------- 1 | nvcc = /usr/local/cuda-8.0/bin/nvcc 2 | cudalib = /usr/local/cuda-8.0/lib64 3 | tensorflow = /orions4-zfs/projects/optas/Virt_Env/tf_1.3/lib/python2.7/site-packages/tensorflow/include 4 | 5 | all: tf_approxmatch_so.so tf_approxmatch_g.cu.o tf_nndistance_so.so tf_nndistance_g.cu.o 6 | 7 | 8 | tf_approxmatch_so.so: tf_approxmatch_g.cu.o tf_approxmatch.cpp 9 | g++ -std=c++11 tf_approxmatch.cpp tf_approxmatch_g.cu.o -o tf_approxmatch_so.so -shared -fPIC -I $(tensorflow) -lcudart -L $(cudalib) -O2 -D_GLIBCXX_USE_CXX11_ABI=0 10 | 11 | 12 | tf_approxmatch_g.cu.o: tf_approxmatch_g.cu 13 | $(nvcc) -D_GLIBCXX_USE_CXX11_ABI=0 -std=c++11 -c -o tf_approxmatch_g.cu.o tf_approxmatch_g.cu -I $(tensorflow) -DGOOGLE_CUDA=1 -x cu -Xcompiler -fPIC -O2 14 | 15 | 16 | tf_nndistance_so.so: tf_nndistance_g.cu.o tf_nndistance.cpp 17 | g++ -std=c++11 tf_nndistance.cpp tf_nndistance_g.cu.o -o tf_nndistance_so.so -shared -fPIC -I $(tensorflow) -lcudart -L $(cudalib) -O2 -D_GLIBCXX_USE_CXX11_ABI=0 18 | 19 | 20 | tf_nndistance_g.cu.o: tf_nndistance_g.cu 21 | $(nvcc) -D_GLIBCXX_USE_CXX11_ABI=0 -std=c++11 -c -o tf_nndistance_g.cu.o tf_nndistance_g.cu -I $(tensorflow) -DGOOGLE_CUDA=1 -x cu -Xcompiler -fPIC -O2 22 | 23 | 24 | clean: 25 | rm tf_approxmatch_so.so 26 | rm tf_nndistance_so.so 27 | rm *.cu.o 28 | -------------------------------------------------------------------------------- /task_generation/latent_3d_points/external/structural_losses/tf_approxmatch.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | from tensorflow.python.framework import ops 3 | import os.path as osp 4 | 5 | base_dir = osp.dirname(osp.abspath(__file__)) 6 | 7 | approxmatch_module = tf.load_op_library(osp.join(base_dir, 'tf_approxmatch_so.so')) 8 | 9 | 10 | def approx_match(xyz1,xyz2): 11 | ''' 12 | input: 13 | xyz1 : batch_size * #dataset_points * 3 14 | xyz2 : batch_size * #query_points * 3 15 | returns: 16 | match : batch_size * #query_points * #dataset_points 17 | ''' 18 | return approxmatch_module.approx_match(xyz1,xyz2) 19 | ops.NoGradient('ApproxMatch') 20 | #@tf.RegisterShape('ApproxMatch') 21 | @ops.RegisterShape('ApproxMatch') 22 | def _approx_match_shape(op): 23 | shape1=op.inputs[0].get_shape().with_rank(3) 24 | shape2=op.inputs[1].get_shape().with_rank(3) 25 | return [tf.TensorShape([shape1.dims[0],shape2.dims[1],shape1.dims[1]])] 26 | 27 | def match_cost(xyz1,xyz2,match): 28 | ''' 29 | input: 30 | xyz1 : batch_size * #dataset_points * 3 31 | xyz2 : batch_size * #query_points * 3 32 | match : batch_size * #query_points * #dataset_points 33 | returns: 34 | cost : batch_size 35 | ''' 36 | return approxmatch_module.match_cost(xyz1,xyz2,match) 37 | #@tf.RegisterShape('MatchCost') 38 | @ops.RegisterShape('MatchCost') 39 | def _match_cost_shape(op): 40 | shape1=op.inputs[0].get_shape().with_rank(3) 41 | shape2=op.inputs[1].get_shape().with_rank(3) 42 | shape3=op.inputs[2].get_shape().with_rank(3) 43 | return [tf.TensorShape([shape1.dims[0]])] 44 | @tf.RegisterGradient('MatchCost') 45 | def _match_cost_grad(op,grad_cost): 46 | xyz1=op.inputs[0] 47 | xyz2=op.inputs[1] 48 | match=op.inputs[2] 49 | grad_1,grad_2=approxmatch_module.match_cost_grad(xyz1,xyz2,match) 50 | return [grad_1*tf.expand_dims(tf.expand_dims(grad_cost,1),2),grad_2*tf.expand_dims(tf.expand_dims(grad_cost,1),2),None] 51 | 52 | if __name__=='__main__': 53 | alpha=0.5 54 | beta=2.0 55 | import bestmatch 56 | import numpy as np 57 | import math 58 | import random 59 | import cv2 60 | 61 | import tf_nndistance 62 | 63 | npoint=100 64 | 65 | with tf.device('/gpu:2'): 66 | pt_in=tf.placeholder(tf.float32,shape=(1,npoint*4,3)) 67 | mypoints=tf.Variable(np.random.randn(1,npoint,3).astype('float32')) 68 | match=approx_match(pt_in,mypoints) 69 | loss=tf.reduce_sum(match_cost(pt_in,mypoints,match)) 70 | #match=approx_match(mypoints,pt_in) 71 | #loss=tf.reduce_sum(match_cost(mypoints,pt_in,match)) 72 | #distf,_,distb,_=tf_nndistance.nn_distance(pt_in,mypoints) 73 | #loss=tf.reduce_sum((distf+1e-9)**0.5)*0.5+tf.reduce_sum((distb+1e-9)**0.5)*0.5 74 | #loss=tf.reduce_max((distf+1e-9)**0.5)*0.5*npoint+tf.reduce_max((distb+1e-9)**0.5)*0.5*npoint 75 | 76 | optimizer=tf.train.GradientDescentOptimizer(1e-4).minimize(loss) 77 | with tf.Session('') as sess: 78 | sess.run(tf.initialize_all_variables()) 79 | while True: 80 | meanloss=0 81 | meantrueloss=0 82 | for i in xrange(1001): 83 | #phi=np.random.rand(4*npoint)*math.pi*2 84 | #tpoints=(np.hstack([np.cos(phi)[:,None],np.sin(phi)[:,None],(phi*0)[:,None]])*random.random())[None,:,:] 85 | #tpoints=((np.random.rand(400)-0.5)[:,None]*[0,2,0]+[(random.random()-0.5)*2,0,0]).astype('float32')[None,:,:] 86 | tpoints=np.hstack([np.linspace(-1,1,400)[:,None],(random.random()*2*np.linspace(1,0,400)**2)[:,None],np.zeros((400,1))])[None,:,:] 87 | trainloss,_=sess.run([loss,optimizer],feed_dict={pt_in:tpoints.astype('float32')}) 88 | trainloss,trainmatch=sess.run([loss,match],feed_dict={pt_in:tpoints.astype('float32')}) 89 | #trainmatch=trainmatch.transpose((0,2,1)) 90 | show=np.zeros((400,400,3),dtype='uint8')^255 91 | trainmypoints=sess.run(mypoints) 92 | for i in xrange(len(tpoints[0])): 93 | u=np.random.choice(range(len(trainmypoints[0])),p=trainmatch[0].T[i]) 94 | cv2.line(show, 95 | (int(tpoints[0][i,1]*100+200),int(tpoints[0][i,0]*100+200)), 96 | (int(trainmypoints[0][u,1]*100+200),int(trainmypoints[0][u,0]*100+200)), 97 | cv2.cv.CV_RGB(0,255,0)) 98 | for x,y,z in tpoints[0]: 99 | cv2.circle(show,(int(y*100+200),int(x*100+200)),2,cv2.cv.CV_RGB(255,0,0)) 100 | for x,y,z in trainmypoints[0]: 101 | cv2.circle(show,(int(y*100+200),int(x*100+200)),3,cv2.cv.CV_RGB(0,0,255)) 102 | cost=((tpoints[0][:,None,:]-np.repeat(trainmypoints[0][None,:,:],4,axis=1))**2).sum(axis=2)**0.5 103 | #trueloss=bestmatch.bestmatch(cost)[0] 104 | print trainloss#,trueloss 105 | cv2.imshow('show',show) 106 | cmd=cv2.waitKey(10)%256 107 | if cmd==ord('q'): 108 | break 109 | -------------------------------------------------------------------------------- /task_generation/latent_3d_points/external/structural_losses/tf_approxmatch_compile.sh: -------------------------------------------------------------------------------- 1 | set -e 2 | if [ 'tf_approxmatch_g.cu.o' -ot 'tf_approxmatch_g.cu' ] ; then 3 | echo 'nvcc' 4 | /usr/local/cuda-8.0/bin/nvcc tf_approxmatch_g.cu -o tf_approxmatch_g.cu.o -c -O2 -DGOOGLE_CUDA=1 -x cu -Xcompiler -fPIC 5 | fi 6 | if [ 'tf_approxmatch_so.so' -ot 'tf_approxmatch.cpp' ] || [ 'tf_approxmatch_so.so' -ot 'tf_approxmatch_g.cu.o' ] ; then 7 | echo 'g++' 8 | g++ -std=c++11 tf_approxmatch.cpp tf_approxmatch_g.cu.o -o tf_approxmatch_so.so -shared -fPIC -I /orions4-zfs/projects/optas/Virt_Env/tf_1.3/lib/python2.7/site-packages/tensorflow/include -I /usr/local/cuda-8.0/include -L /usr/local/cuda-8.0/lib64/ -O2 9 | fi 10 | 11 | -------------------------------------------------------------------------------- /task_generation/latent_3d_points/external/structural_losses/tf_nndistance.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | from tensorflow.python.framework import ops 3 | import os.path as osp 4 | 5 | base_dir = osp.dirname(osp.abspath(__file__)) 6 | 7 | nn_distance_module = tf.load_op_library(osp.join(base_dir, 'tf_nndistance_so.so')) 8 | 9 | 10 | def nn_distance(xyz1, xyz2): 11 | ''' 12 | Computes the distance of nearest neighbors for a pair of point clouds 13 | input: xyz1: (batch_size,#points_1,3) the first point cloud 14 | input: xyz2: (batch_size,#points_2,3) the second point cloud 15 | output: dist1: (batch_size,#point_1) distance from first to second 16 | output: idx1: (batch_size,#point_1) nearest neighbor from first to second 17 | output: dist2: (batch_size,#point_2) distance from second to first 18 | output: idx2: (batch_size,#point_2) nearest neighbor from second to first 19 | ''' 20 | 21 | return nn_distance_module.nn_distance(xyz1,xyz2) 22 | 23 | #@tf.RegisterShape('NnDistance') 24 | @ops.RegisterShape('NnDistance') 25 | def _nn_distance_shape(op): 26 | shape1=op.inputs[0].get_shape().with_rank(3) 27 | shape2=op.inputs[1].get_shape().with_rank(3) 28 | return [tf.TensorShape([shape1.dims[0],shape1.dims[1]]),tf.TensorShape([shape1.dims[0],shape1.dims[1]]), 29 | tf.TensorShape([shape2.dims[0],shape2.dims[1]]),tf.TensorShape([shape2.dims[0],shape2.dims[1]])] 30 | @ops.RegisterGradient('NnDistance') 31 | def _nn_distance_grad(op,grad_dist1,grad_idx1,grad_dist2,grad_idx2): 32 | xyz1=op.inputs[0] 33 | xyz2=op.inputs[1] 34 | idx1=op.outputs[1] 35 | idx2=op.outputs[3] 36 | return nn_distance_module.nn_distance_grad(xyz1,xyz2,grad_dist1,idx1,grad_dist2,idx2) 37 | 38 | 39 | if __name__=='__main__': 40 | import numpy as np 41 | import random 42 | import time 43 | from tensorflow.python.kernel_tests.gradient_checker import compute_gradient 44 | random.seed(100) 45 | np.random.seed(100) 46 | with tf.Session('') as sess: 47 | xyz1=np.random.randn(32,16384,3).astype('float32') 48 | xyz2=np.random.randn(32,1024,3).astype('float32') 49 | with tf.device('/gpu:0'): 50 | inp1=tf.Variable(xyz1) 51 | inp2=tf.constant(xyz2) 52 | reta,retb,retc,retd=nn_distance(inp1,inp2) 53 | loss=tf.reduce_sum(reta)+tf.reduce_sum(retc) 54 | train=tf.train.GradientDescentOptimizer(learning_rate=0.05).minimize(loss) 55 | sess.run(tf.initialize_all_variables()) 56 | t0=time.time() 57 | t1=t0 58 | best=1e100 59 | for i in xrange(100): 60 | trainloss,_=sess.run([loss,train]) 61 | newt=time.time() 62 | best=min(best,newt-t1) 63 | print i,trainloss,(newt-t0)/(i+1),best 64 | t1=newt 65 | #print sess.run([inp1,retb,inp2,retd]) 66 | #grads=compute_gradient([inp1,inp2],[(16,32,3),(16,32,3)],loss,(1,),[xyz1,xyz2]) 67 | #for i,j in grads: 68 | #print i.shape,j.shape,np.mean(np.abs(i-j)),np.mean(np.abs(i)),np.mean(np.abs(j)) 69 | #for i in xrange(10): 70 | #t0=time.time() 71 | #a,b,c,d=sess.run([reta,retb,retc,retd],feed_dict={inp1:xyz1,inp2:xyz2}) 72 | #print 'time',time.time()-t0 73 | #print a.shape,b.shape,c.shape,d.shape 74 | #print a.dtype,b.dtype,c.dtype,d.dtype 75 | #samples=np.array(random.sample(range(xyz2.shape[1]),100),dtype='int32') 76 | #dist1=((xyz1[:,samples,None,:]-xyz2[:,None,:,:])**2).sum(axis=-1).min(axis=-1) 77 | #idx1=((xyz1[:,samples,None,:]-xyz2[:,None,:,:])**2).sum(axis=-1).argmin(axis=-1) 78 | #print np.abs(dist1-a[:,samples]).max() 79 | #print np.abs(idx1-b[:,samples]).max() 80 | #dist2=((xyz2[:,samples,None,:]-xyz1[:,None,:,:])**2).sum(axis=-1).min(axis=-1) 81 | #idx2=((xyz2[:,samples,None,:]-xyz1[:,None,:,:])**2).sum(axis=-1).argmin(axis=-1) 82 | #print np.abs(dist2-c[:,samples]).max() 83 | #print np.abs(idx2-d[:,samples]).max() 84 | 85 | -------------------------------------------------------------------------------- /task_generation/latent_3d_points/external/structural_losses/tf_nndistance_compile.sh: -------------------------------------------------------------------------------- 1 | /usr/local/cuda-8.0/bin/nvcc -std=c++11 -c -o tf_nndistance_g.cu.o tf_nndistance_g.cu -I /orions4-zfs/projects/optas/Virt_Env/tf_1.3/lib/python2.7/site-packages/tensorflow/include -DGOOGLE_CUDA=1 -x cu -Xcompiler -fPIC -O2 && g++ -std=c++11 tf_nndistance.cpp tf_nndistance_g.cu.o -o tf_nndistance_so.so -shared -fPIC -I /orions4-zfs/projects/optas/Virt_Env/tf_1.3/lib/python2.7/site-packages/tensorflow/include -L /usr/local/cuda-8.0/lib64 -O2 2 | -------------------------------------------------------------------------------- /task_generation/latent_3d_points/external/structural_losses/tf_nndistance_g.cu: -------------------------------------------------------------------------------- 1 | #if GOOGLE_CUDA 2 | #define EIGEN_USE_GPU 3 | #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" 4 | 5 | __global__ void NmDistanceKernel(int b,int n,const float * xyz,int m,const float * xyz2,float * result,int * result_i){ 6 | const int batch=512; 7 | __shared__ float buf[batch*3]; 8 | for (int i=blockIdx.x;ibest){ 120 | result[(i*n+j)]=best; 121 | result_i[(i*n+j)]=best_i; 122 | } 123 | } 124 | __syncthreads(); 125 | } 126 | } 127 | } 128 | void NmDistanceKernelLauncher(int b,int n,const float * xyz,int m,const float * xyz2,float * result,int * result_i,float * result2,int * result2_i){ 129 | NmDistanceKernel<<>>(b,n,xyz,m,xyz2,result,result_i); 130 | NmDistanceKernel<<>>(b,m,xyz2,n,xyz,result2,result2_i); 131 | } 132 | __global__ void NmDistanceGradKernel(int b,int n,const float * xyz1,int m,const float * xyz2,const float * grad_dist1,const int * idx1,float * grad_xyz1,float * grad_xyz2){ 133 | for (int i=blockIdx.x;i>>(b,n,xyz1,m,xyz2,grad_dist1,idx1,grad_xyz1,grad_xyz2); 156 | NmDistanceGradKernel<<>>(b,m,xyz2,n,xyz1,grad_dist2,idx2,grad_xyz2,grad_xyz1); 157 | } 158 | 159 | #endif 160 | -------------------------------------------------------------------------------- /task_generation/latent_3d_points/notebooks/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CVLAB-Unibo/nf2vec/7e10220040b89404914d3a8317d39a8ac7337a8d/task_generation/latent_3d_points/notebooks/__init__.py -------------------------------------------------------------------------------- /task_generation/latent_3d_points/notebooks/compute_evaluation_metrics.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Assuming 2 sets of point-clouds, we will compute the MMD, Coverage and JSD as done in the paper.\n", 8 | "\n", 9 | "(To compute these metrics you __don't need__ to have tflearn installed, only the structural: EMD, Chamfer losses and sklearn for the JSD.)" 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": 1, 15 | "metadata": { 16 | "collapsed": false 17 | }, 18 | "outputs": [], 19 | "source": [ 20 | "import numpy as np\n", 21 | "import os.path as osp\n", 22 | "\n", 23 | "from latent_3d_points.src.evaluation_metrics import minimum_mathing_distance, \\\n", 24 | "jsd_between_point_cloud_sets, coverage\n", 25 | "\n", 26 | "from latent_3d_points.src.in_out import snc_category_to_synth_id,\\\n", 27 | " load_all_point_clouds_under_folder" 28 | ] 29 | }, 30 | { 31 | "cell_type": "markdown", 32 | "metadata": {}, 33 | "source": [ 34 | "Load some point-clouds and make two sets (sample_pcs, ref_pcs) from them. The ref_pcs is considered as the __ground-truth__ data while the sample_pcs corresponds to a set that is matched against it, e.g. comes from a generative model." 35 | ] 36 | }, 37 | { 38 | "cell_type": "code", 39 | "execution_count": 2, 40 | "metadata": { 41 | "collapsed": true 42 | }, 43 | "outputs": [], 44 | "source": [ 45 | "%load_ext autoreload\n", 46 | "%autoreload 2\n", 47 | "%matplotlib inline" 48 | ] 49 | }, 50 | { 51 | "cell_type": "code", 52 | "execution_count": 3, 53 | "metadata": { 54 | "collapsed": false 55 | }, 56 | "outputs": [ 57 | { 58 | "name": "stdout", 59 | "output_type": "stream", 60 | "text": [ 61 | "Give me the class name (e.g. \"chair\"): chair\n", 62 | "6778 pclouds were loaded. They belong in 1 shape-classes.\n" 63 | ] 64 | } 65 | ], 66 | "source": [ 67 | "top_in_dir = '../data/shape_net_core_uniform_samples_2048/' # Top-dir of where point-clouds are stored.\n", 68 | "class_name = raw_input('Give me the class name (e.g. \"chair\"): ').lower()\n", 69 | "syn_id = snc_category_to_synth_id()[class_name]\n", 70 | "class_dir = osp.join(top_in_dir , syn_id)\n", 71 | "all_pc_data = load_all_point_clouds_under_folder(class_dir, n_threads=8, file_ending='.ply', verbose=True)" 72 | ] 73 | }, 74 | { 75 | "cell_type": "code", 76 | "execution_count": 4, 77 | "metadata": { 78 | "collapsed": true 79 | }, 80 | "outputs": [], 81 | "source": [ 82 | "n_ref = 100 # size of ref_pcs.\n", 83 | "n_sam = 150 # size of sample_pcs.\n", 84 | "all_ids = np.arange(all_pc_data.num_examples)\n", 85 | "ref_ids = np.random.choice(all_ids, n_ref, replace=False)\n", 86 | "sam_ids = np.random.choice(all_ids, n_sam, replace=False)\n", 87 | "ref_pcs = all_pc_data.point_clouds[ref_ids]\n", 88 | "sample_pcs = all_pc_data.point_clouds[sam_ids]" 89 | ] 90 | }, 91 | { 92 | "cell_type": "markdown", 93 | "metadata": {}, 94 | "source": [ 95 | "Compute the three metrics." 96 | ] 97 | }, 98 | { 99 | "cell_type": "code", 100 | "execution_count": 11, 101 | "metadata": { 102 | "collapsed": false 103 | }, 104 | "outputs": [], 105 | "source": [ 106 | "ae_loss = 'chamfer' # Which distance to use for the matchings.\n", 107 | "\n", 108 | "if ae_loss == 'emd':\n", 109 | " use_EMD = True\n", 110 | "else:\n", 111 | " use_EMD = False # Will use Chamfer instead.\n", 112 | " \n", 113 | "batch_size = 100 # Find appropriate number that fits in GPU.\n", 114 | "normalize = True # Matched distances are divided by the number of \n", 115 | " # points of thepoint-clouds.\n", 116 | "\n", 117 | "mmd, matched_dists = minimum_mathing_distance(sample_pcs, ref_pcs, batch_size, normalize=normalize, use_EMD=use_EMD)\n", 118 | "\n", 119 | "cov, matched_ids = coverage(sample_pcs, ref_pcs, batch_size, normalize=normalize, use_EMD=use_EMD)\n", 120 | "\n", 121 | "jsd = jsd_between_point_cloud_sets(sample_pcs, ref_pcs, resolution=28)" 122 | ] 123 | }, 124 | { 125 | "cell_type": "code", 126 | "execution_count": 12, 127 | "metadata": { 128 | "collapsed": false 129 | }, 130 | "outputs": [ 131 | { 132 | "name": "stdout", 133 | "output_type": "stream", 134 | "text": [ 135 | "0.0714721 0.73 0.0396569736382\n" 136 | ] 137 | } 138 | ], 139 | "source": [ 140 | "print mmd, cov, jsd" 141 | ] 142 | }, 143 | { 144 | "cell_type": "markdown", 145 | "metadata": {}, 146 | "source": [ 147 | "For a detailed breakdown of the evaluation functions, inspect their docs." 148 | ] 149 | }, 150 | { 151 | "cell_type": "code", 152 | "execution_count": 24, 153 | "metadata": { 154 | "collapsed": false 155 | }, 156 | "outputs": [ 157 | { 158 | "name": "stdout", 159 | "output_type": "stream", 160 | "text": [ 161 | "Computes the Coverage between two sets of point-clouds.\n", 162 | "\n", 163 | " Args:\n", 164 | " sample_pcs (numpy array SxKx3): the S point-clouds, each of K points that will be matched\n", 165 | " and compared to a set of \"reference\" point-clouds.\n", 166 | " ref_pcs (numpy array RxKx3): the R point-clouds, each of K points that constitute the\n", 167 | " set of \"reference\" point-clouds.\n", 168 | " batch_size (int): specifies how large will the batches be that the compute will use to\n", 169 | " make the comparisons of the sample-vs-ref point-clouds.\n", 170 | " normalize (boolean): When the matching is based on Chamfer (default behavior), if True,\n", 171 | " the Chamfer is computed as the average of the matched point-wise squared euclidean\n", 172 | " distances. Alternatively, is their sum.\n", 173 | " use_sqrt (boolean): When the matching is based on Chamfer (default behavior), if True,\n", 174 | " the Chamfer is computed based on the (not-squared) euclidean distances of the matched\n", 175 | " point-wise euclidean distances.\n", 176 | " sess (tf.Session): If None, it will make a new Session for this.\n", 177 | " use_EMD (boolean): If true, the matchings are based on the EMD.\n", 178 | " ret_dist (boolean): If true, it will also return the distances between each sample_pcs and\n", 179 | " it's matched ground-truth.\n", 180 | " Returns: the coverage score (int),\n", 181 | " the indices of the ref_pcs that are matched with each sample_pc\n", 182 | " and optionally the matched distances of the samples_pcs.\n", 183 | " \n", 184 | "Computes the MMD between two sets of point-clouds.\n", 185 | "\n", 186 | " Args:\n", 187 | " sample_pcs (numpy array SxKx3): the S point-clouds, each of K points that will be matched and\n", 188 | " compared to a set of \"reference\" point-clouds.\n", 189 | " ref_pcs (numpy array RxKx3): the R point-clouds, each of K points that constitute the set of\n", 190 | " \"reference\" point-clouds.\n", 191 | " batch_size (int): specifies how large will the batches be that the compute will use to make\n", 192 | " the comparisons of the sample-vs-ref point-clouds.\n", 193 | " normalize (boolean): When the matching is based on Chamfer (default behavior), if True, the\n", 194 | " Chamfer is computed as the average of the matched point-wise squared euclidean distances.\n", 195 | " Alternatively, is their sum.\n", 196 | " use_sqrt: (boolean): When the matching is based on Chamfer (default behavior), if True, the\n", 197 | " Chamfer is computed based on the (not-squared) euclidean distances of the matched point-wise\n", 198 | " euclidean distances.\n", 199 | " sess (tf.Session, default None): if None, it will make a new Session for this.\n", 200 | " use_EMD (boolean: If true, the matchings are based on the EMD.\n", 201 | "\n", 202 | " Returns:\n", 203 | " A tuple containing the MMD and all the matched distances of which the MMD is their mean.\n", 204 | " \n", 205 | " JSD between two sets of point-clouds, as introduced in the paper ```Learning Representations And Generative Models For 3D Point Clouds```. \n", 206 | " Args:\n", 207 | " sample_pcs: (np.ndarray S1xR2x3) S1 point-clouds, each of R1 points.\n", 208 | " ref_pcs: (np.ndarray S2xR2x3) S2 point-clouds, each of R2 points.\n", 209 | " resolution: (int) grid-resolution. Affects granularity of measurements.\n", 210 | " \n" 211 | ] 212 | } 213 | ], 214 | "source": [ 215 | "print coverage.__doc__\n", 216 | "print minimum_mathing_distance.__doc__\n", 217 | "print jsd_between_point_cloud_sets.__doc__" 218 | ] 219 | } 220 | ], 221 | "metadata": { 222 | "kernelspec": { 223 | "display_name": "TensorFlow1", 224 | "language": "python", 225 | "name": "tf1" 226 | }, 227 | "language_info": { 228 | "codemirror_mode": { 229 | "name": "ipython", 230 | "version": 2 231 | }, 232 | "file_extension": ".py", 233 | "mimetype": "text/x-python", 234 | "name": "python", 235 | "nbconvert_exporter": "python", 236 | "pygments_lexer": "ipython2", 237 | "version": "2.7.6" 238 | } 239 | }, 240 | "nbformat": 4, 241 | "nbformat_minor": 2 242 | } 243 | -------------------------------------------------------------------------------- /task_generation/latent_3d_points/src/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CVLAB-Unibo/nf2vec/7e10220040b89404914d3a8317d39a8ac7337a8d/task_generation/latent_3d_points/src/__init__.py -------------------------------------------------------------------------------- /task_generation/latent_3d_points/src/ae_templates.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Created on September 2, 2017 3 | 4 | @author: optas 5 | ''' 6 | import numpy as np 7 | 8 | from . encoders_decoders import encoder_with_convs_and_symmetry, decoder_with_fc_only 9 | 10 | 11 | def mlp_architecture_ala_iclr_18(n_pc_points, bneck_size, bneck_post_mlp=False): 12 | ''' Single class experiments. 13 | ''' 14 | if n_pc_points != 2048: 15 | raise ValueError() 16 | 17 | encoder = encoder_with_convs_and_symmetry 18 | decoder = decoder_with_fc_only 19 | 20 | n_input = [n_pc_points, 3] 21 | 22 | encoder_args = {'n_filters': [64, 128, 128, 256, bneck_size], 23 | 'filter_sizes': [1], 24 | 'strides': [1], 25 | 'b_norm': True, 26 | 'verbose': True 27 | } 28 | 29 | decoder_args = {'layer_sizes': [256, 256, np.prod(n_input)], 30 | 'b_norm': False, 31 | 'b_norm_finish': False, 32 | 'verbose': True 33 | } 34 | 35 | if bneck_post_mlp: 36 | encoder_args['n_filters'].pop() 37 | decoder_args['layer_sizes'][0] = bneck_size 38 | 39 | return encoder, decoder, encoder_args, decoder_args 40 | 41 | 42 | def default_train_params(single_class=True): 43 | params = {'batch_size': 50, 44 | 'training_epochs': 500, 45 | 'denoising': False, 46 | 'learning_rate': 0.0005, 47 | 'z_rotate': False, 48 | 'saver_step': 10, 49 | 'loss_display_step': 1 50 | } 51 | 52 | if not single_class: 53 | params['z_rotate'] = True 54 | params['training_epochs'] = 1000 55 | 56 | return params 57 | -------------------------------------------------------------------------------- /task_generation/latent_3d_points/src/encoders_decoders.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Created on February 4, 2017 3 | 4 | @author: optas 5 | 6 | ''' 7 | 8 | import tensorflow as tf 9 | import numpy as np 10 | import warnings 11 | 12 | from tflearn.layers.core import fully_connected, dropout 13 | from tflearn.layers.conv import conv_1d, avg_pool_1d 14 | from tflearn.layers.normalization import batch_normalization 15 | from tflearn.layers.core import fully_connected, dropout 16 | 17 | from . tf_utils import expand_scope_by_name, replicate_parameter_for_all_layers 18 | 19 | def encoder_with_convs_and_symmetry(in_signal, n_filters=[64, 128, 256, 1024], filter_sizes=[1], strides=[1], 20 | b_norm=True, non_linearity=tf.nn.relu, regularizer=None, weight_decay=0.001, 21 | symmetry=tf.reduce_max, dropout_prob=None, pool=avg_pool_1d, pool_sizes=None, scope=None, 22 | reuse=False, padding='same', verbose=False, closing=None, conv_op=conv_1d): 23 | '''An Encoder (recognition network), which maps inputs onto a latent space. 24 | ''' 25 | 26 | if verbose: 27 | print 'Building Encoder' 28 | 29 | n_layers = len(n_filters) 30 | filter_sizes = replicate_parameter_for_all_layers(filter_sizes, n_layers) 31 | strides = replicate_parameter_for_all_layers(strides, n_layers) 32 | dropout_prob = replicate_parameter_for_all_layers(dropout_prob, n_layers) 33 | 34 | if n_layers < 2: 35 | raise ValueError('More than 1 layers are expected.') 36 | 37 | for i in xrange(n_layers): 38 | if i == 0: 39 | layer = in_signal 40 | 41 | name = 'encoder_conv_layer_' + str(i) 42 | scope_i = expand_scope_by_name(scope, name) 43 | layer = conv_op(layer, nb_filter=n_filters[i], filter_size=filter_sizes[i], strides=strides[i], regularizer=regularizer, 44 | weight_decay=weight_decay, name=name, reuse=reuse, scope=scope_i, padding=padding) 45 | 46 | if verbose: 47 | print name, 'conv params = ', np.prod(layer.W.get_shape().as_list()) + np.prod(layer.b.get_shape().as_list()), 48 | 49 | if b_norm: 50 | name += '_bnorm' 51 | scope_i = expand_scope_by_name(scope, name) 52 | layer = batch_normalization(layer, name=name, reuse=reuse, scope=scope_i) 53 | if verbose: 54 | print 'bnorm params = ', np.prod(layer.beta.get_shape().as_list()) + np.prod(layer.gamma.get_shape().as_list()) 55 | 56 | if non_linearity is not None: 57 | layer = non_linearity(layer) 58 | 59 | if pool is not None and pool_sizes is not None: 60 | if pool_sizes[i] is not None: 61 | layer = pool(layer, kernel_size=pool_sizes[i]) 62 | 63 | if dropout_prob is not None and dropout_prob[i] > 0: 64 | layer = dropout(layer, 1.0 - dropout_prob[i]) 65 | 66 | if verbose: 67 | print layer 68 | print 'output size:', np.prod(layer.get_shape().as_list()[1:]), '\n' 69 | 70 | if symmetry is not None: 71 | layer = symmetry(layer, axis=1) 72 | if verbose: 73 | print layer 74 | 75 | if closing is not None: 76 | layer = closing(layer) 77 | print layer 78 | 79 | return layer 80 | 81 | 82 | def decoder_with_fc_only(latent_signal, layer_sizes=[], b_norm=True, non_linearity=tf.nn.relu, 83 | regularizer=None, weight_decay=0.001, reuse=False, scope=None, dropout_prob=None, 84 | b_norm_finish=False, verbose=False): 85 | '''A decoding network which maps points from the latent space back onto the data space. 86 | ''' 87 | if verbose: 88 | print 'Building Decoder' 89 | 90 | n_layers = len(layer_sizes) 91 | dropout_prob = replicate_parameter_for_all_layers(dropout_prob, n_layers) 92 | 93 | if n_layers < 2: 94 | raise ValueError('For an FC decoder with single a layer use simpler code.') 95 | 96 | for i in xrange(0, n_layers - 1): 97 | name = 'decoder_fc_' + str(i) 98 | scope_i = expand_scope_by_name(scope, name) 99 | 100 | if i == 0: 101 | layer = latent_signal 102 | 103 | layer = fully_connected(layer, layer_sizes[i], activation='linear', weights_init='xavier', name=name, regularizer=regularizer, weight_decay=weight_decay, reuse=reuse, scope=scope_i) 104 | 105 | if verbose: 106 | print name, 'FC params = ', np.prod(layer.W.get_shape().as_list()) + np.prod(layer.b.get_shape().as_list()), 107 | 108 | if b_norm: 109 | name += '_bnorm' 110 | scope_i = expand_scope_by_name(scope, name) 111 | layer = batch_normalization(layer, name=name, reuse=reuse, scope=scope_i) 112 | if verbose: 113 | print 'bnorm params = ', np.prod(layer.beta.get_shape().as_list()) + np.prod(layer.gamma.get_shape().as_list()) 114 | 115 | if non_linearity is not None: 116 | layer = non_linearity(layer) 117 | 118 | if dropout_prob is not None and dropout_prob[i] > 0: 119 | layer = dropout(layer, 1.0 - dropout_prob[i]) 120 | 121 | if verbose: 122 | print layer 123 | print 'output size:', np.prod(layer.get_shape().as_list()[1:]), '\n' 124 | 125 | # Last decoding layer never has a non-linearity. 126 | name = 'decoder_fc_' + str(n_layers - 1) 127 | scope_i = expand_scope_by_name(scope, name) 128 | layer = fully_connected(layer, layer_sizes[n_layers - 1], activation='linear', weights_init='xavier', name=name, regularizer=regularizer, weight_decay=weight_decay, reuse=reuse, scope=scope_i) 129 | if verbose: 130 | print name, 'FC params = ', np.prod(layer.W.get_shape().as_list()) + np.prod(layer.b.get_shape().as_list()), 131 | 132 | if b_norm_finish: 133 | name += '_bnorm' 134 | scope_i = expand_scope_by_name(scope, name) 135 | layer = batch_normalization(layer, name=name, reuse=reuse, scope=scope_i) 136 | if verbose: 137 | print 'bnorm params = ', np.prod(layer.beta.get_shape().as_list()) + np.prod(layer.gamma.get_shape().as_list()) 138 | 139 | if verbose: 140 | print layer 141 | print 'output size:', np.prod(layer.get_shape().as_list()[1:]), '\n' 142 | 143 | return layer 144 | 145 | 146 | def decoder_with_convs_only(in_signal, n_filters, filter_sizes, strides, padding='same', b_norm=True, non_linearity=tf.nn.relu, 147 | conv_op=conv_1d, regularizer=None, weight_decay=0.001, dropout_prob=None, upsample_sizes=None, 148 | b_norm_finish=False, scope=None, reuse=False, verbose=False): 149 | 150 | if verbose: 151 | print 'Building Decoder' 152 | 153 | n_layers = len(n_filters) 154 | filter_sizes = replicate_parameter_for_all_layers(filter_sizes, n_layers) 155 | strides = replicate_parameter_for_all_layers(strides, n_layers) 156 | dropout_prob = replicate_parameter_for_all_layers(dropout_prob, n_layers) 157 | 158 | for i in xrange(n_layers): 159 | if i == 0: 160 | layer = in_signal 161 | 162 | name = 'decoder_conv_layer_' + str(i) 163 | scope_i = expand_scope_by_name(scope, name) 164 | 165 | layer = conv_op(layer, nb_filter=n_filters[i], filter_size=filter_sizes[i], 166 | strides=strides[i], padding=padding, regularizer=regularizer, weight_decay=weight_decay, 167 | name=name, reuse=reuse, scope=scope_i) 168 | 169 | if verbose: 170 | print name, 'conv params = ', np.prod(layer.W.get_shape().as_list()) + np.prod(layer.b.get_shape().as_list()), 171 | 172 | if (b_norm and i < n_layers - 1) or (i == n_layers - 1 and b_norm_finish): 173 | name += '_bnorm' 174 | scope_i = expand_scope_by_name(scope, name) 175 | layer = batch_normalization(layer, name=name, reuse=reuse, scope=scope_i) 176 | if verbose: 177 | print 'bnorm params = ', np.prod(layer.beta.get_shape().as_list()) + np.prod(layer.gamma.get_shape().as_list()) 178 | 179 | if non_linearity is not None and i < n_layers - 1: # Last layer doesn't have a non-linearity. 180 | layer = non_linearity(layer) 181 | 182 | if dropout_prob is not None and dropout_prob[i] > 0: 183 | layer = dropout(layer, 1.0 - dropout_prob[i]) 184 | 185 | if upsample_sizes is not None and upsample_sizes[i] is not None: 186 | layer = tf.tile(layer, multiples=[1, upsample_sizes[i], 1]) 187 | 188 | if verbose: 189 | print layer 190 | print 'output size:', np.prod(layer.get_shape().as_list()[1:]), '\n' 191 | 192 | return layer 193 | -------------------------------------------------------------------------------- /task_generation/latent_3d_points/src/gan.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Created on May 3, 2017 3 | 4 | @author: optas 5 | ''' 6 | 7 | import os.path as osp 8 | import warnings 9 | import tensorflow as tf 10 | 11 | from . neural_net import Neural_Net 12 | from . tf_utils import safe_log 13 | 14 | class GAN(Neural_Net): 15 | 16 | def __init__(self, name, graph): 17 | Neural_Net.__init__(self, name, graph) 18 | 19 | def save_model(self, tick): 20 | self.saver.save(self.sess, self.MODEL_SAVER_ID, global_step=tick) 21 | 22 | def restore_model(self, model_path, epoch, verbose=False): 23 | '''Restore all the variables of a saved model. 24 | ''' 25 | self.saver.restore(self.sess, osp.join(model_path, self.MODEL_SAVER_ID + '-' + str(int(epoch)))) 26 | 27 | if self.epoch.eval(session=self.sess) != epoch: 28 | warnings.warn('Loaded model\'s epoch doesn\'t match the requested one.') 29 | else: 30 | if verbose: 31 | print('Model restored in epoch {0}.'.format(epoch)) 32 | 33 | def optimizer(self, learning_rate, beta, loss, var_list): 34 | initial_learning_rate = learning_rate 35 | optimizer = tf.train.AdamOptimizer(initial_learning_rate, beta1=beta).minimize(loss, var_list=var_list) 36 | return optimizer 37 | 38 | def generate(self, n_samples, noise_params): 39 | noise = self.generator_noise_distribution(n_samples, self.noise_dim, **noise_params) 40 | feed_dict = {self.noise: noise} 41 | return self.sess.run([self.generator_out], feed_dict=feed_dict)[0] 42 | 43 | def vanilla_gan_objective(self, real_prob, synthetic_prob, use_safe_log=True): 44 | if use_safe_log: 45 | log = safe_log 46 | else: 47 | log = tf.log 48 | 49 | loss_d = tf.reduce_mean(-log(real_prob) - log(1 - synthetic_prob)) 50 | loss_g = tf.reduce_mean(-log(synthetic_prob)) 51 | return loss_d, loss_g 52 | 53 | def w_gan_objective(self, real_logit, synthetic_logit): 54 | loss_d = tf.reduce_mean(synthetic_logit) - tf.reduce_mean(real_logit) 55 | loss_g = -tf.reduce_mean(synthetic_logit) 56 | return loss_d, loss_g -------------------------------------------------------------------------------- /task_generation/latent_3d_points/src/general_utils.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Created on November 26, 2017 3 | 4 | @author: optas 5 | ''' 6 | 7 | import numpy as np 8 | from numpy.linalg import norm 9 | import matplotlib.pylab as plt 10 | from mpl_toolkits.mplot3d import Axes3D 11 | 12 | 13 | def rand_rotation_matrix(deflection=1.0, seed=None): 14 | '''Creates a random rotation matrix. 15 | 16 | deflection: the magnitude of the rotation. For 0, no rotation; for 1, completely random 17 | rotation. Small deflection => small perturbation. 18 | 19 | DOI: http://www.realtimerendering.com/resources/GraphicsGems/gemsiii/rand_rotation.c 20 | http://blog.lostinmyterminal.com/python/2015/05/12/random-rotation-matrix.html 21 | ''' 22 | if seed is not None: 23 | np.random.seed(seed) 24 | 25 | randnums = np.random.uniform(size=(3,)) 26 | 27 | theta, phi, z = randnums 28 | 29 | theta = theta * 2.0 * deflection * np.pi # Rotation about the pole (Z). 30 | phi = phi * 2.0 * np.pi # For direction of pole deflection. 31 | z = z * 2.0 * deflection # For magnitude of pole deflection. 32 | 33 | # Compute a vector V used for distributing points over the sphere 34 | # via the reflection I - V Transpose(V). This formulation of V 35 | # will guarantee that if x[1] and x[2] are uniformly distributed, 36 | # the reflected points will be uniform on the sphere. Note that V 37 | # has length sqrt(2) to eliminate the 2 in the Householder matrix. 38 | 39 | r = np.sqrt(z) 40 | V = ( 41 | np.sin(phi) * r, 42 | np.cos(phi) * r, 43 | np.sqrt(2.0 - z)) 44 | 45 | st = np.sin(theta) 46 | ct = np.cos(theta) 47 | 48 | R = np.array(((ct, st, 0), (-st, ct, 0), (0, 0, 1))) 49 | 50 | # Construct the rotation matrix ( V Transpose(V) - I ) R. 51 | M = (np.outer(V, V) - np.eye(3)).dot(R) 52 | return M 53 | 54 | 55 | def iterate_in_chunks(l, n): 56 | '''Yield successive 'n'-sized chunks from iterable 'l'. 57 | Note: last chunk will be smaller than l if n doesn't divide l perfectly. 58 | ''' 59 | for i in xrange(0, len(l), n): 60 | yield l[i:i + n] 61 | 62 | 63 | def add_gaussian_noise_to_pcloud(pcloud, mu=0, sigma=1): 64 | gnoise = np.random.normal(mu, sigma, pcloud.shape[0]) 65 | gnoise = np.tile(gnoise, (3, 1)).T 66 | pcloud += gnoise 67 | return pcloud 68 | 69 | 70 | def apply_augmentations(batch, conf): 71 | if conf.gauss_augment is not None or conf.z_rotate: 72 | batch = batch.copy() 73 | 74 | if conf.gauss_augment is not None: 75 | mu = conf.gauss_augment['mu'] 76 | sigma = conf.gauss_augment['sigma'] 77 | batch += np.random.normal(mu, sigma, batch.shape) 78 | 79 | if conf.z_rotate: 80 | r_rotation = rand_rotation_matrix() 81 | r_rotation[0, 2] = 0 82 | r_rotation[2, 0] = 0 83 | r_rotation[1, 2] = 0 84 | r_rotation[2, 1] = 0 85 | r_rotation[2, 2] = 1 86 | batch = batch.dot(r_rotation) 87 | return batch 88 | 89 | 90 | def unit_cube_grid_point_cloud(resolution, clip_sphere=False): 91 | '''Returns the center coordinates of each cell of a 3D grid with resolution^3 cells, 92 | that is placed in the unit-cube. 93 | If clip_sphere it True it drops the "corner" cells that lie outside the unit-sphere. 94 | ''' 95 | grid = np.ndarray((resolution, resolution, resolution, 3), np.float32) 96 | spacing = 1.0 / float(resolution - 1) 97 | for i in xrange(resolution): 98 | for j in xrange(resolution): 99 | for k in xrange(resolution): 100 | grid[i, j, k, 0] = i * spacing - 0.5 101 | grid[i, j, k, 1] = j * spacing - 0.5 102 | grid[i, j, k, 2] = k * spacing - 0.5 103 | 104 | if clip_sphere: 105 | grid = grid.reshape(-1, 3) 106 | grid = grid[norm(grid, axis=1) <= 0.5] 107 | 108 | return grid, spacing 109 | 110 | def plot_3d_point_cloud(x, y, z, show=True, show_axis=True, in_u_sphere=False, marker='.', s=8, alpha=.8, figsize=(5, 5), elev=10, azim=240, axis=None, title=None, *args, **kwargs): 111 | 112 | if axis is None: 113 | fig = plt.figure(figsize=figsize) 114 | ax = fig.add_subplot(111, projection='3d') 115 | else: 116 | ax = axis 117 | fig = axis 118 | 119 | if title is not None: 120 | plt.title(title) 121 | 122 | sc = ax.scatter(x, y, z, marker=marker, s=s, alpha=alpha, *args, **kwargs) 123 | ax.view_init(elev=elev, azim=azim) 124 | 125 | if in_u_sphere: 126 | ax.set_xlim3d(-0.5, 0.5) 127 | ax.set_ylim3d(-0.5, 0.5) 128 | ax.set_zlim3d(-0.5, 0.5) 129 | else: 130 | miv = 0.7 * np.min([np.min(x), np.min(y), np.min(z)]) # Multiply with 0.7 to squeeze free-space. 131 | mav = 0.7 * np.max([np.max(x), np.max(y), np.max(z)]) 132 | ax.set_xlim(miv, mav) 133 | ax.set_ylim(miv, mav) 134 | ax.set_zlim(miv, mav) 135 | plt.tight_layout() 136 | 137 | if not show_axis: 138 | plt.axis('off') 139 | 140 | if 'c' in kwargs: 141 | plt.colorbar(sc) 142 | 143 | if show: 144 | plt.show() 145 | 146 | return fig -------------------------------------------------------------------------------- /task_generation/latent_3d_points/src/generators_discriminators.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Created on May 11, 2017 3 | 4 | @author: optas 5 | ''' 6 | 7 | import numpy as np 8 | import tensorflow as tf 9 | from tflearn.layers.normalization import batch_normalization 10 | from tflearn.layers.core import fully_connected, dropout 11 | 12 | from . encoders_decoders import encoder_with_convs_and_symmetry, decoder_with_fc_only 13 | from . tf_utils import leaky_relu 14 | from . tf_utils import expand_scope_by_name 15 | 16 | 17 | def mlp_discriminator(in_signal, non_linearity=tf.nn.relu, reuse=False, scope=None, b_norm=True, dropout_prob=None): 18 | ''' used in nips submission. 19 | ''' 20 | encoder_args = {'n_filters': [64, 128, 256, 256, 512], 'filter_sizes': [1, 1, 1, 1, 1], 'strides': [1, 1, 1, 1, 1]} 21 | encoder_args['reuse'] = reuse 22 | encoder_args['scope'] = scope 23 | encoder_args['non_linearity'] = non_linearity 24 | encoder_args['dropout_prob'] = dropout_prob 25 | encoder_args['b_norm'] = b_norm 26 | layer = encoder_with_convs_and_symmetry(in_signal, **encoder_args) 27 | 28 | name = 'decoding_logits' 29 | scope_e = expand_scope_by_name(scope, name) 30 | d_logit = decoder_with_fc_only(layer, layer_sizes=[128, 64, 1], b_norm=b_norm, reuse=reuse, scope=scope_e) 31 | d_prob = tf.nn.sigmoid(d_logit) 32 | return d_prob, d_logit 33 | 34 | 35 | def point_cloud_generator(z, pc_dims, layer_sizes=[64, 128, 512, 1024], non_linearity=tf.nn.relu, b_norm=False, b_norm_last=False, dropout_prob=None): 36 | ''' used in nips submission. 37 | ''' 38 | 39 | n_points, dummy = pc_dims 40 | if (dummy != 3): 41 | raise ValueError() 42 | 43 | out_signal = decoder_with_fc_only(z, layer_sizes=layer_sizes, non_linearity=non_linearity, b_norm=b_norm) 44 | out_signal = non_linearity(out_signal) 45 | 46 | if dropout_prob is not None: 47 | out_signal = dropout(out_signal, dropout_prob) 48 | 49 | if b_norm_last: 50 | out_signal = batch_normalization(out_signal) 51 | 52 | out_signal = fully_connected(out_signal, np.prod([n_points, 3]), activation='linear', weights_init='xavier') 53 | out_signal = tf.reshape(out_signal, [-1, n_points, 3]) 54 | return out_signal 55 | 56 | 57 | def convolutional_discriminator(in_signal, non_linearity=tf.nn.relu, 58 | encoder_args={'n_filters': [128, 128, 256, 512], 'filter_sizes': [40, 20, 10, 10], 'strides': [1, 2, 2, 1]}, 59 | decoder_layer_sizes=[128, 64, 1], 60 | reuse=False, scope=None): 61 | 62 | encoder_args['reuse'] = reuse 63 | encoder_args['scope'] = scope 64 | encoder_args['non_linearity'] = non_linearity 65 | layer = encoder_with_convs_and_symmetry(in_signal, **encoder_args) 66 | 67 | name = 'decoding_logits' 68 | scope_e = expand_scope_by_name(scope, name) 69 | d_logit = decoder_with_fc_only(layer, layer_sizes=decoder_layer_sizes, non_linearity=non_linearity, reuse=reuse, scope=scope_e) 70 | d_prob = tf.nn.sigmoid(d_logit) 71 | return d_prob, d_logit 72 | 73 | 74 | def latent_code_generator(z, out_dim, layer_sizes=[64, 128], b_norm=False): 75 | layer_sizes = layer_sizes + out_dim 76 | out_signal = decoder_with_fc_only(z, layer_sizes=layer_sizes, b_norm=b_norm) 77 | out_signal = tf.nn.relu(out_signal) 78 | return out_signal 79 | 80 | 81 | def latent_code_discriminator(in_singnal, layer_sizes=[64, 128, 256, 256, 512], b_norm=False, non_linearity=tf.nn.relu, reuse=False, scope=None): 82 | layer_sizes = layer_sizes + [1] 83 | d_logit = decoder_with_fc_only(in_singnal, layer_sizes=layer_sizes, non_linearity=non_linearity, b_norm=b_norm, reuse=reuse, scope=scope) 84 | d_prob = tf.nn.sigmoid(d_logit) 85 | return d_prob, d_logit 86 | 87 | 88 | def latent_code_discriminator_two_layers(in_signal, layer_sizes=[256, 512], b_norm=False, non_linearity=tf.nn.relu, reuse=False, scope=None): 89 | ''' Used in ICML submission. 90 | ''' 91 | layer_sizes = layer_sizes + [1] 92 | d_logit = decoder_with_fc_only(in_signal, layer_sizes=layer_sizes, non_linearity=non_linearity, b_norm=b_norm, reuse=reuse, scope=scope) 93 | d_prob = tf.nn.sigmoid(d_logit) 94 | return d_prob, d_logit 95 | 96 | 97 | def latent_code_generator_two_layers(z, out_dim, layer_sizes=[128], b_norm=False): 98 | ''' Used in ICML submission. 99 | ''' 100 | layer_sizes = layer_sizes + out_dim 101 | out_signal = decoder_with_fc_only(z, layer_sizes=layer_sizes, b_norm=b_norm) 102 | out_signal = tf.nn.relu(out_signal) 103 | return out_signal 104 | -------------------------------------------------------------------------------- /task_generation/latent_3d_points/src/in_out.py: -------------------------------------------------------------------------------- 1 | import six 2 | import warnings 3 | import numpy as np 4 | import os 5 | import os.path as osp 6 | import re 7 | from six.moves import cPickle 8 | from multiprocessing import Pool 9 | 10 | from . general_utils import rand_rotation_matrix 11 | from .. external.python_plyfile.plyfile import PlyElement, PlyData 12 | 13 | snc_synth_id_to_category = { 14 | '02691156': 'airplane', '02773838': 'bag', '02801938': 'basket', 15 | '02808440': 'bathtub', '02818832': 'bed', '02828884': 'bench', 16 | '02834778': 'bicycle', '02843684': 'birdhouse', '02871439': 'bookshelf', 17 | '02876657': 'bottle', '02880940': 'bowl', '02924116': 'bus', 18 | '02933112': 'cabinet', '02747177': 'can', '02942699': 'camera', 19 | '02954340': 'cap', '02958343': 'car', '03001627': 'chair', 20 | '03046257': 'clock', '03207941': 'dishwasher', '03211117': 'monitor', 21 | '04379243': 'table', '04401088': 'telephone', '02946921': 'tin_can', 22 | '04460130': 'tower', '04468005': 'train', '03085013': 'keyboard', 23 | '03261776': 'earphone', '03325088': 'faucet', '03337140': 'file', 24 | '03467517': 'guitar', '03513137': 'helmet', '03593526': 'jar', 25 | '03624134': 'knife', '03636649': 'lamp', '03642806': 'laptop', 26 | '03691459': 'speaker', '03710193': 'mailbox', '03759954': 'microphone', 27 | '03761084': 'microwave', '03790512': 'motorcycle', '03797390': 'mug', 28 | '03928116': 'piano', '03938244': 'pillow', '03948459': 'pistol', 29 | '03991062': 'pot', '04004475': 'printer', '04074963': 'remote_control', 30 | '04090263': 'rifle', '04099429': 'rocket', '04225987': 'skateboard', 31 | '04256520': 'sofa', '04330267': 'stove', '04530566': 'vessel', 32 | '04554684': 'washer', '02858304': 'boat', '02992529': 'cellphone' 33 | } 34 | 35 | 36 | def snc_category_to_synth_id(): 37 | d = snc_synth_id_to_category 38 | inv_map = {v: k for k, v in six.iteritems(d)} 39 | return inv_map 40 | 41 | 42 | def create_dir(dir_path): 43 | ''' Creates a directory (or nested directories) if they don't exist. 44 | ''' 45 | print('creating dir: ', dir_path) 46 | if not osp.exists(dir_path): 47 | os.makedirs(dir_path) 48 | 49 | return dir_path 50 | 51 | 52 | def pickle_data(file_name, *args): 53 | '''Using (c)Pickle to save multiple python objects in a single file. 54 | ''' 55 | myFile = open(file_name, 'wb') 56 | cPickle.dump(len(args), myFile, protocol=2) 57 | for item in args: 58 | cPickle.dump(item, myFile, protocol=2) 59 | myFile.close() 60 | 61 | 62 | def unpickle_data(file_name): 63 | '''Restore data previously saved with pickle_data(). 64 | ''' 65 | inFile = open(file_name, 'rb') 66 | size = cPickle.load(inFile) 67 | for _ in xrange(size): 68 | yield cPickle.load(inFile) 69 | inFile.close() 70 | 71 | 72 | def files_in_subdirs(top_dir, search_pattern): 73 | regex = re.compile(search_pattern) 74 | for path, _, files in os.walk(top_dir): 75 | for name in files: 76 | full_name = osp.join(path, name) 77 | if regex.search(full_name): 78 | yield full_name 79 | 80 | 81 | def load_ply(file_name, with_faces=False, with_color=False): 82 | ply_data = PlyData.read(file_name) 83 | points = ply_data['vertex'] 84 | points = np.vstack([points['x'], points['y'], points['z']]).T 85 | ret_val = [points] 86 | 87 | if with_faces: 88 | faces = np.vstack(ply_data['face']['vertex_indices']) 89 | ret_val.append(faces) 90 | 91 | if with_color: 92 | r = np.vstack(ply_data['vertex']['red']) 93 | g = np.vstack(ply_data['vertex']['green']) 94 | b = np.vstack(ply_data['vertex']['blue']) 95 | color = np.hstack((r, g, b)) 96 | ret_val.append(color) 97 | 98 | if len(ret_val) == 1: # Unwrap the list 99 | ret_val = ret_val[0] 100 | 101 | return ret_val 102 | 103 | 104 | def pc_loader(f_name): 105 | ''' loads a point-cloud saved under ShapeNet's "standar" folder scheme: 106 | i.e. /syn_id/model_name.ply 107 | ''' 108 | tokens = f_name.split('/') 109 | model_id = tokens[-1].split('.')[0] 110 | synet_id = tokens[-2] 111 | return load_ply(f_name), model_id, synet_id 112 | 113 | 114 | def load_all_point_clouds_under_folder(top_dir, n_threads=20, file_ending='.ply', verbose=False): 115 | file_names = [f for f in files_in_subdirs(top_dir, file_ending)] 116 | pclouds, model_ids, syn_ids = load_point_clouds_from_filenames(file_names, n_threads, loader=pc_loader, verbose=verbose) 117 | return PointCloudDataSet(pclouds, labels=syn_ids + '_' + model_ids, init_shuffle=False) 118 | 119 | 120 | def load_point_clouds_from_filenames(file_names, n_threads, loader, verbose=False): 121 | pc = loader(file_names[0])[0] 122 | pclouds = np.empty([len(file_names), pc.shape[0], pc.shape[1]], dtype=np.float32) 123 | model_names = np.empty([len(file_names)], dtype=object) 124 | class_ids = np.empty([len(file_names)], dtype=object) 125 | pool = Pool(n_threads) 126 | 127 | for i, data in enumerate(pool.imap(loader, file_names)): 128 | pclouds[i, :, :], model_names[i], class_ids[i] = data 129 | 130 | pool.close() 131 | pool.join() 132 | 133 | if len(np.unique(model_names)) != len(pclouds): 134 | warnings.warn('Point clouds with the same model name were loaded.') 135 | 136 | if verbose: 137 | print('{0} pclouds were loaded. They belong in {1} shape-classes.'.format(len(pclouds), len(np.unique(class_ids)))) 138 | 139 | return pclouds, model_names, class_ids 140 | 141 | 142 | class PointCloudDataSet(object): 143 | ''' 144 | See https://github.com/tensorflow/tensorflow/blob/a5d8217c4ed90041bea2616c14a8ddcf11ec8c03/tensorflow/examples/tutorials/mnist/input_data.py 145 | ''' 146 | 147 | def __init__(self, point_clouds, noise=None, labels=None, copy=True, init_shuffle=True): 148 | '''Construct a DataSet. 149 | Args: 150 | init_shuffle, shuffle data before first epoch has been reached. 151 | Output: 152 | original_pclouds, labels, (None or Feed) # TODO Rename 153 | ''' 154 | 155 | self.num_examples = point_clouds.shape[0] 156 | self.n_points = point_clouds.shape[1] 157 | 158 | if labels is not None: 159 | assert point_clouds.shape[0] == labels.shape[0], ('points.shape: %s labels.shape: %s' % (point_clouds.shape, labels.shape)) 160 | if copy: 161 | self.labels = labels.copy() 162 | else: 163 | self.labels = labels 164 | 165 | else: 166 | self.labels = np.ones(self.num_examples, dtype=np.int8) 167 | 168 | if noise is not None: 169 | assert (type(noise) is np.ndarray) 170 | if copy: 171 | self.noisy_point_clouds = noise.copy() 172 | else: 173 | self.noisy_point_clouds = noise 174 | else: 175 | self.noisy_point_clouds = None 176 | 177 | if copy: 178 | self.point_clouds = point_clouds.copy() 179 | else: 180 | self.point_clouds = point_clouds 181 | 182 | self.epochs_completed = 0 183 | self._index_in_epoch = 0 184 | if init_shuffle: 185 | self.shuffle_data() 186 | 187 | def shuffle_data(self, seed=None): 188 | if seed is not None: 189 | np.random.seed(seed) 190 | perm = np.arange(self.num_examples) 191 | np.random.shuffle(perm) 192 | self.point_clouds = self.point_clouds[perm] 193 | self.labels = self.labels[perm] 194 | if self.noisy_point_clouds is not None: 195 | self.noisy_point_clouds = self.noisy_point_clouds[perm] 196 | return self 197 | 198 | def next_batch(self, batch_size, seed=None): 199 | '''Return the next batch_size examples from this data set. 200 | ''' 201 | start = self._index_in_epoch 202 | self._index_in_epoch += batch_size 203 | if self._index_in_epoch > self.num_examples: 204 | self.epochs_completed += 1 # Finished epoch. 205 | self.shuffle_data(seed) 206 | # Start next epoch 207 | start = 0 208 | self._index_in_epoch = batch_size 209 | end = self._index_in_epoch 210 | 211 | if self.noisy_point_clouds is None: 212 | return self.point_clouds[start:end], self.labels[start:end], None 213 | else: 214 | return self.point_clouds[start:end], self.labels[start:end], self.noisy_point_clouds[start:end] 215 | 216 | def full_epoch_data(self, shuffle=True, seed=None): 217 | '''Returns a copy of the examples of the entire data set (i.e. an epoch's data), shuffled. 218 | ''' 219 | if shuffle and seed is not None: 220 | np.random.seed(seed) 221 | perm = np.arange(self.num_examples) # Shuffle the data. 222 | if shuffle: 223 | np.random.shuffle(perm) 224 | pc = self.point_clouds[perm] 225 | lb = self.labels[perm] 226 | ns = None 227 | if self.noisy_point_clouds is not None: 228 | ns = self.noisy_point_clouds[perm] 229 | return pc, lb, ns 230 | 231 | def merge(self, other_data_set): 232 | self._index_in_epoch = 0 233 | self.epochs_completed = 0 234 | self.point_clouds = np.vstack((self.point_clouds, other_data_set.point_clouds)) 235 | 236 | labels_1 = self.labels.reshape([self.num_examples, 1]) # TODO = move to init. 237 | labels_2 = other_data_set.labels.reshape([other_data_set.num_examples, 1]) 238 | self.labels = np.vstack((labels_1, labels_2)) 239 | self.labels = np.squeeze(self.labels) 240 | 241 | if self.noisy_point_clouds is not None: 242 | self.noisy_point_clouds = np.vstack((self.noisy_point_clouds, other_data_set.noisy_point_clouds)) 243 | 244 | self.num_examples = self.point_clouds.shape[0] 245 | 246 | return self 247 | -------------------------------------------------------------------------------- /task_generation/latent_3d_points/src/latent_gan.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Created on April 27, 2017 3 | 4 | @author: optas 5 | ''' 6 | import numpy as np 7 | import time 8 | import tensorflow as tf 9 | 10 | from . gan import GAN 11 | 12 | from .. fundamentals.layers import safe_log 13 | from tflearn import is_training 14 | 15 | 16 | class LatentGAN(GAN): 17 | def __init__(self, name, learning_rate, n_output, noise_dim, discriminator, generator, beta=0.9, gen_kwargs={}, disc_kwargs={}, graph=None): 18 | 19 | self.noise_dim = noise_dim 20 | self.n_output = n_output 21 | self.discriminator = discriminator 22 | self.generator = generator 23 | 24 | GAN.__init__(self, name, graph) 25 | 26 | with tf.variable_scope(name): 27 | 28 | self.noise = tf.placeholder(tf.float32, shape=[None, noise_dim]) # Noise vector. 29 | self.gt_data = tf.placeholder(tf.float32, shape=[None] + self.n_output) # Ground-truth. 30 | 31 | with tf.variable_scope('generator'): 32 | self.generator_out = self.generator(self.noise, self.n_output, **gen_kwargs) 33 | 34 | with tf.variable_scope('discriminator') as scope: 35 | self.real_prob, self.real_logit = self.discriminator(self.gt_data, scope=scope, **disc_kwargs) 36 | self.synthetic_prob, self.synthetic_logit = self.discriminator(self.generator_out, reuse=True, scope=scope, **disc_kwargs) 37 | 38 | self.loss_d = tf.reduce_mean(-tf.log(self.real_prob) - tf.log(1 - self.synthetic_prob)) 39 | self.loss_g = tf.reduce_mean(-tf.log(self.synthetic_prob)) 40 | 41 | #Post ICLR TRY: safe_log 42 | 43 | train_vars = tf.trainable_variables() 44 | 45 | d_params = [v for v in train_vars if v.name.startswith(name + '/discriminator/')] 46 | g_params = [v for v in train_vars if v.name.startswith(name + '/generator/')] 47 | 48 | self.opt_d = self.optimizer(learning_rate, beta, self.loss_d, d_params) 49 | self.opt_g = self.optimizer(learning_rate, beta, self.loss_g, g_params) 50 | self.saver = tf.train.Saver(tf.global_variables(), max_to_keep=None) 51 | self.init = tf.global_variables_initializer() 52 | 53 | # Launch the session 54 | config = tf.ConfigProto() 55 | config.gpu_options.allow_growth = True 56 | self.sess = tf.Session(config=config) 57 | self.sess.run(self.init) 58 | 59 | def generator_noise_distribution(self, n_samples, ndims, mu, sigma): 60 | return np.random.normal(mu, sigma, (n_samples, ndims)) 61 | 62 | def _single_epoch_train(self, train_data, batch_size, noise_params): 63 | ''' 64 | see: http://blog.aylien.com/introduction-generative-adversarial-networks-code-tensorflow/ 65 | http://wiseodd.github.io/techblog/2016/09/17/gan-tensorflow/ 66 | ''' 67 | n_examples = train_data.num_examples 68 | epoch_loss_d = 0. 69 | epoch_loss_g = 0. 70 | batch_size = batch_size 71 | n_batches = int(n_examples / batch_size) 72 | start_time = time.time() 73 | 74 | is_training(True, session=self.sess) 75 | try: 76 | # Loop over all batches 77 | for _ in xrange(n_batches): 78 | feed, _, _ = train_data.next_batch(batch_size) 79 | 80 | # Update discriminator. 81 | z = self.generator_noise_distribution(batch_size, self.noise_dim, **noise_params) 82 | feed_dict = {self.gt_data: feed, self.noise: z} 83 | loss_d, _ = self.sess.run([self.loss_d, self.opt_d], feed_dict=feed_dict) 84 | loss_g, _ = self.sess.run([self.loss_g, self.opt_g], feed_dict=feed_dict) 85 | 86 | # Compute average loss 87 | epoch_loss_d += loss_d 88 | epoch_loss_g += loss_g 89 | 90 | is_training(False, session=self.sess) 91 | except Exception: 92 | raise 93 | finally: 94 | is_training(False, session=self.sess) 95 | 96 | epoch_loss_d /= n_batches 97 | epoch_loss_g /= n_batches 98 | duration = time.time() - start_time 99 | return (epoch_loss_d, epoch_loss_g), duration 100 | -------------------------------------------------------------------------------- /task_generation/latent_3d_points/src/neural_net.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Created on August 28, 2017 3 | 4 | @author: optas 5 | ''' 6 | 7 | import os.path as osp 8 | import tensorflow as tf 9 | 10 | MODEL_SAVER_ID = 'models.ckpt' 11 | 12 | 13 | class Neural_Net(object): 14 | 15 | def __init__(self, name, graph): 16 | if graph is None: 17 | graph = tf.get_default_graph() 18 | 19 | self.graph = graph 20 | self.name = name 21 | 22 | with tf.variable_scope(name): 23 | with tf.device('/cpu:0'): 24 | self.epoch = tf.get_variable('epoch', [], initializer=tf.constant_initializer(0), trainable=False) 25 | self.increment_epoch = self.epoch.assign_add(tf.constant(1.0)) 26 | 27 | self.no_op = tf.no_op() 28 | 29 | def is_training(self): 30 | is_training_op = self.graph.get_collection('is_training') 31 | return self.sess.run(is_training_op)[0] 32 | 33 | def restore_model(self, model_path, epoch, verbose=False): 34 | '''Restore all the variables of a saved model. 35 | ''' 36 | self.saver.restore(self.sess, osp.join(model_path, MODEL_SAVER_ID + '-' + str(int(epoch)))) 37 | 38 | if self.epoch.eval(session=self.sess) != epoch: 39 | warnings.warn('Loaded model\'s epoch doesn\'t match the requested one.') 40 | else: 41 | if verbose: 42 | print('Model restored in epoch {0}.'.format(epoch)) 43 | -------------------------------------------------------------------------------- /task_generation/latent_3d_points/src/point_net_ae.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Created on January 26, 2017 3 | 4 | @author: optas 5 | ''' 6 | 7 | import time 8 | import tensorflow as tf 9 | import os.path as osp 10 | 11 | from tflearn.layers.conv import conv_1d 12 | from tflearn.layers.core import fully_connected 13 | 14 | from . in_out import create_dir 15 | from . autoencoder import AutoEncoder 16 | from . general_utils import apply_augmentations 17 | 18 | try: 19 | from .. external.structural_losses.tf_nndistance import nn_distance 20 | from .. external.structural_losses.tf_approxmatch import approx_match, match_cost 21 | except: 22 | print('External Losses (Chamfer-EMD) cannot be loaded. Please install them first.') 23 | 24 | 25 | class PointNetAutoEncoder(AutoEncoder): 26 | ''' 27 | An Auto-Encoder for point-clouds. 28 | ''' 29 | 30 | def __init__(self, name, configuration, graph=None): 31 | c = configuration 32 | self.configuration = c 33 | 34 | AutoEncoder.__init__(self, name, graph, configuration) 35 | 36 | with tf.variable_scope(name): 37 | self.z = c.encoder(self.x, **c.encoder_args) 38 | self.bottleneck_size = int(self.z.get_shape()[1]) 39 | layer = c.decoder(self.z, **c.decoder_args) 40 | 41 | if c.exists_and_is_not_none('close_with_tanh'): 42 | layer = tf.nn.tanh(layer) 43 | 44 | self.x_reconstr = tf.reshape(layer, [-1, self.n_output[0], self.n_output[1]]) 45 | 46 | self.saver = tf.train.Saver(tf.global_variables(), max_to_keep=c.saver_max_to_keep) 47 | 48 | self._create_loss() 49 | self._setup_optimizer() 50 | 51 | # GPU configuration 52 | if hasattr(c, 'allow_gpu_growth'): 53 | growth = c.allow_gpu_growth 54 | else: 55 | growth = True 56 | 57 | config = tf.ConfigProto() 58 | config.gpu_options.allow_growth = growth 59 | 60 | # Summaries 61 | self.merged_summaries = tf.summary.merge_all() 62 | self.train_writer = tf.summary.FileWriter(osp.join(configuration.train_dir, 'summaries'), self.graph) 63 | 64 | # Initializing the tensor flow variables 65 | self.init = tf.global_variables_initializer() 66 | 67 | # Launch the session 68 | self.sess = tf.Session(config=config) 69 | self.sess.run(self.init) 70 | 71 | def _create_loss(self): 72 | c = self.configuration 73 | 74 | if c.loss == 'chamfer': 75 | cost_p1_p2, _, cost_p2_p1, _ = nn_distance(self.x_reconstr, self.gt) 76 | self.loss = tf.reduce_mean(cost_p1_p2) + tf.reduce_mean(cost_p2_p1) 77 | elif c.loss == 'emd': 78 | match = approx_match(self.x_reconstr, self.gt) 79 | self.loss = tf.reduce_mean(match_cost(self.x_reconstr, self.gt, match)) 80 | 81 | reg_losses = self.graph.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES) 82 | if c.exists_and_is_not_none('w_reg_alpha'): 83 | w_reg_alpha = c.w_reg_alpha 84 | else: 85 | w_reg_alpha = 1.0 86 | 87 | for rl in reg_losses: 88 | self.loss += (w_reg_alpha * rl) 89 | 90 | def _setup_optimizer(self): 91 | c = self.configuration 92 | self.lr = c.learning_rate 93 | if hasattr(c, 'exponential_decay'): 94 | self.lr = tf.train.exponential_decay(c.learning_rate, self.epoch, c.decay_steps, decay_rate=0.5, staircase=True, name="learning_rate_decay") 95 | self.lr = tf.maximum(self.lr, 1e-5) 96 | tf.summary.scalar('learning_rate', self.lr) 97 | 98 | self.optimizer = tf.train.AdamOptimizer(learning_rate=self.lr) 99 | self.train_step = self.optimizer.minimize(self.loss) 100 | 101 | def _single_epoch_train(self, train_data, configuration, only_fw=False): 102 | n_examples = train_data.num_examples 103 | epoch_loss = 0. 104 | batch_size = configuration.batch_size 105 | n_batches = int(n_examples / batch_size) 106 | start_time = time.time() 107 | 108 | if only_fw: 109 | fit = self.reconstruct 110 | else: 111 | fit = self.partial_fit 112 | 113 | # Loop over all batches 114 | for _ in xrange(n_batches): 115 | 116 | if self.is_denoising: 117 | original_data, _, batch_i = train_data.next_batch(batch_size) 118 | if batch_i is None: # In this case the denoising concern only the augmentation. 119 | batch_i = original_data 120 | else: 121 | batch_i, _, _ = train_data.next_batch(batch_size) 122 | 123 | batch_i = apply_augmentations(batch_i, configuration) # This is a new copy of the batch. 124 | 125 | if self.is_denoising: 126 | _, loss = fit(batch_i, original_data) 127 | else: 128 | _, loss = fit(batch_i) 129 | 130 | # Compute average loss 131 | epoch_loss += loss 132 | epoch_loss /= n_batches 133 | duration = time.time() - start_time 134 | 135 | if configuration.loss == 'emd': 136 | epoch_loss /= len(train_data.point_clouds[0]) 137 | 138 | return epoch_loss, duration 139 | 140 | def gradient_of_input_wrt_loss(self, in_points, gt_points=None): 141 | if gt_points is None: 142 | gt_points = in_points 143 | return self.sess.run(tf.gradients(self.loss, self.x), feed_dict={self.x: in_points, self.gt: gt_points}) -------------------------------------------------------------------------------- /task_generation/latent_3d_points/src/raw_gan.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Created on Apr 27, 2017 3 | 4 | @author: optas 5 | ''' 6 | 7 | import numpy as np 8 | import time 9 | import tensorflow as tf 10 | from tflearn import is_training 11 | 12 | from . gan import GAN 13 | from .. fundamentals.layers import safe_log 14 | 15 | 16 | class RawGAN(GAN): 17 | 18 | def __init__(self, name, learning_rate, n_output, noise_dim, discriminator, generator, beta=0.9, gen_kwargs={}, disc_kwargs={}, graph=None): 19 | 20 | self.noise_dim = noise_dim 21 | self.n_output = n_output 22 | out_shape = [None] + self.n_output 23 | self.discriminator = discriminator 24 | self.generator = generator 25 | 26 | GAN.__init__(self, name, graph) 27 | 28 | with tf.variable_scope(name): 29 | 30 | self.noise = tf.placeholder(tf.float32, shape=[None, noise_dim]) # Noise vector. 31 | self.real_pc = tf.placeholder(tf.float32, shape=out_shape) # Ground-truth. 32 | 33 | with tf.variable_scope('generator'): 34 | self.generator_out = self.generator(self.noise, self.n_output[0], **gen_kwargs) 35 | 36 | with tf.variable_scope('discriminator') as scope: 37 | self.real_prob, self.real_logit = self.discriminator(self.real_pc, scope=scope, **disc_kwargs) 38 | self.synthetic_prob, self.synthetic_logit = self.discriminator(self.generator_out, reuse=True, scope=scope, **disc_kwargs) 39 | 40 | self.loss_d = tf.reduce_mean(-safe_log(self.real_prob) - safe_log(1 - self.synthetic_prob)) 41 | self.loss_g = tf.reduce_mean(-safe_log(self.synthetic_prob)) 42 | 43 | train_vars = tf.trainable_variables() 44 | 45 | d_params = [v for v in train_vars if v.name.startswith(name + '/discriminator/')] 46 | g_params = [v for v in train_vars if v.name.startswith(name + '/generator/')] 47 | 48 | self.opt_d = self.optimizer(learning_rate, beta, self.loss_d, d_params) 49 | self.opt_g = self.optimizer(learning_rate, beta, self.loss_g, g_params) 50 | self.saver = tf.train.Saver(tf.global_variables(), max_to_keep=None) 51 | self.init = tf.global_variables_initializer() 52 | 53 | # Launch the session 54 | config = tf.ConfigProto() 55 | config.gpu_options.allow_growth = True 56 | self.sess = tf.Session(config=config) 57 | self.sess.run(self.init) 58 | 59 | def generator_noise_distribution(self, n_samples, ndims, mu, sigma): 60 | return np.random.normal(mu, sigma, (n_samples, ndims)) 61 | 62 | def _single_epoch_train(self, train_data, batch_size, noise_params={}, adaptive=None): 63 | ''' 64 | see: http://blog.aylien.com/introduction-generative-adversarial-networks-code-tensorflow/ 65 | http://wiseodd.github.io/techblog/2016/09/17/gan-tensorflow/ 66 | ''' 67 | n_examples = train_data.num_examples 68 | epoch_loss_d = 0. 69 | epoch_loss_g = 0. 70 | batch_size = batch_size 71 | n_batches = int(n_examples / batch_size) 72 | start_time = time.time() 73 | updated_d = 0 74 | # Loop over all batches 75 | _real_s = [] 76 | _fake_s = [] 77 | is_training(True, session=self.sess) 78 | try: 79 | for _ in xrange(n_batches): 80 | feed, _, _ = train_data.next_batch(batch_size) 81 | # Update discriminator. 82 | z = self.generator_noise_distribution(batch_size, self.noise_dim, **noise_params) 83 | feed_dict = {self.real_pc: feed, self.noise: z} 84 | if adaptive is not None: 85 | s1 = tf.reduce_mean(self.real_prob) 86 | s2 = tf.reduce_mean(1 - self.synthetic_prob) 87 | sr, sf = self.sess.run([s1, s2], feed_dict=feed_dict) 88 | _real_s.append(sr) 89 | _fake_s.append(sf) 90 | if np.mean([sr, sf]) < adaptive: 91 | loss_d, _ = self.sess.run([self.loss_d, self.opt_d], feed_dict=feed_dict) 92 | updated_d += 1 93 | epoch_loss_d += loss_d 94 | else: 95 | loss_d, _ = self.sess.run([self.loss_d, self.opt_d], feed_dict=feed_dict) 96 | updated_d += 1 97 | epoch_loss_d += loss_d 98 | # Update generator. 99 | loss_g, _ = self.sess.run([self.loss_g, self.opt_g], feed_dict=feed_dict) 100 | # Compute average loss 101 | # epoch_loss_d += loss_d 102 | epoch_loss_g += loss_g 103 | is_training(False, session=self.sess) 104 | except Exception: 105 | raise 106 | finally: 107 | is_training(False, session=self.sess) 108 | 109 | # epoch_loss_d /= n_batches 110 | if updated_d > 1: 111 | epoch_loss_d /= updated_d 112 | else: 113 | print 'Discriminator was not updated in this epoch.' 114 | 115 | if adaptive is not None: 116 | print np.mean(_real_s), np.mean(_fake_s) 117 | 118 | epoch_loss_g /= n_batches 119 | duration = time.time() - start_time 120 | return (epoch_loss_d, epoch_loss_g), duration 121 | -------------------------------------------------------------------------------- /task_generation/latent_3d_points/src/tf_utils.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Created on November 26, 2017 3 | 4 | @author: optas 5 | ''' 6 | 7 | import tensorflow as tf 8 | import numpy as np 9 | 10 | 11 | def expand_scope_by_name(scope, name): 12 | """ expand tf scope by given name. 13 | """ 14 | 15 | if isinstance(scope, basestring): 16 | scope += '/' + name 17 | return scope 18 | 19 | if scope is not None: 20 | return scope.name + '/' + name 21 | else: 22 | return scope 23 | 24 | 25 | def replicate_parameter_for_all_layers(parameter, n_layers): 26 | if parameter is not None and len(parameter) != n_layers: 27 | if len(parameter) != 1: 28 | raise ValueError() 29 | parameter = np.array(parameter) 30 | parameter = parameter.repeat(n_layers).tolist() 31 | return parameter 32 | 33 | 34 | def reset_tf_graph(): 35 | ''' Reset's all variables of default-tf graph. Useful for jupyter. 36 | ''' 37 | if 'sess' in globals() and sess: 38 | sess.close() 39 | tf.reset_default_graph() 40 | 41 | 42 | def leaky_relu(alpha): 43 | if not (alpha < 1 and alpha > 0): 44 | raise ValueError() 45 | 46 | return lambda x: tf.maximum(alpha * x, x) 47 | 48 | 49 | def safe_log(x, eps=1e-12): 50 | return tf.log(tf.maximum(x, eps)) -------------------------------------------------------------------------------- /task_generation/latent_3d_points/src/vanilla_gan.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Created on 2018 3 | 4 | Author: Achlioptas Panos (Github ID: optas) 5 | ''' 6 | 7 | import numpy as np 8 | import time 9 | import tensorflow as tf 10 | 11 | from tflearn import is_training 12 | from . gan import GAN 13 | 14 | 15 | class Vanilla_GAN(GAN): 16 | 17 | def __init__(self, name, learning_rate, n_output, noise_dim, discriminator, generator, beta=0.9, gen_kwargs={}, disc_kwargs={}, graph=None): 18 | 19 | GAN.__init__(self, name, graph) 20 | 21 | self.noise_dim = noise_dim 22 | self.n_output = n_output 23 | self.discriminator = discriminator 24 | self.generator = generator 25 | 26 | with tf.variable_scope(name): 27 | 28 | self.noise = tf.placeholder(tf.float32, shape=[None, noise_dim]) # Noise vector. 29 | self.gt = tf.placeholder(tf.float32, shape=[None] + self.n_output) # Ground-truth. 30 | 31 | with tf.variable_scope('generator'): 32 | self.generator_out = self.generator(self.noise, self.n_output, **gen_kwargs) 33 | 34 | with tf.variable_scope('discriminator') as scope: 35 | self.real_prob, self.real_logit = self.discriminator(self.gt, scope=scope, **disc_kwargs) 36 | self.synthetic_prob, self.synthetic_logit = self.discriminator(self.generator_out, reuse=True, scope=scope, **disc_kwargs) 37 | 38 | self.loss_d, self.loss_g = self.vanilla_gan_objective(self.real_prob, self.synthetic_prob, use_safe_log=True) 39 | 40 | train_vars = tf.trainable_variables() 41 | 42 | d_params = [v for v in train_vars if v.name.startswith(name + '/discriminator/')] 43 | g_params = [v for v in train_vars if v.name.startswith(name + '/generator/')] 44 | 45 | self.opt_d = self.optimizer(learning_rate, beta, self.loss_d, d_params) 46 | self.opt_g = self.optimizer(learning_rate, beta, self.loss_g, g_params) 47 | self.saver = tf.train.Saver(tf.global_variables(), max_to_keep=None) 48 | self.init = tf.global_variables_initializer() 49 | 50 | # Launch the session 51 | config = tf.ConfigProto() 52 | config.gpu_options.allow_growth = True 53 | self.sess = tf.Session(config=config) 54 | self.sess.run(self.init) 55 | 56 | def generator_noise_distribution(self, n_samples, ndims, mu, sigma): 57 | return np.random.normal(mu, sigma, (n_samples, ndims)) 58 | 59 | def _single_epoch_train(self, train_data, batch_size, noise_params): 60 | n_examples = train_data.num_examples 61 | epoch_loss_d = 0. 62 | epoch_loss_g = 0. 63 | batch_size = batch_size 64 | n_batches = int(n_examples / batch_size) 65 | start_time = time.time() 66 | 67 | is_training(True, session=self.sess) 68 | try: 69 | # Loop over all batches 70 | for _ in xrange(n_batches): 71 | feed, _, _ = train_data.next_batch(batch_size) 72 | 73 | # Update discriminator. 74 | z = self.generator_noise_distribution(batch_size, self.noise_dim, **noise_params) 75 | feed_dict = {self.gt: feed, self.noise: z} 76 | loss_d, _ = self.sess.run([self.loss_d, self.opt_d], feed_dict=feed_dict) 77 | loss_g, _ = self.sess.run([self.loss_g, self.opt_g], feed_dict=feed_dict) 78 | 79 | # Compute average loss 80 | epoch_loss_d += loss_d 81 | epoch_loss_g += loss_g 82 | 83 | is_training(False, session=self.sess) 84 | except Exception: 85 | raise 86 | finally: 87 | is_training(False, session=self.sess) 88 | 89 | epoch_loss_d /= n_batches 90 | epoch_loss_g /= n_batches 91 | duration = time.time() - start_time 92 | return (epoch_loss_d, epoch_loss_g), duration -------------------------------------------------------------------------------- /task_generation/latent_3d_points/src/w_gan_gp.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Created on May 22, 2018 3 | 4 | Author: Achlioptas Panos (Github ID: optas) 5 | ''' 6 | 7 | import numpy as np 8 | import time 9 | import tensorflow as tf 10 | 11 | from tflearn import is_training 12 | from . gan import GAN 13 | 14 | 15 | class W_GAN_GP(GAN): 16 | '''Gradient Penalty. 17 | https://arxiv.org/abs/1704.00028 18 | ''' 19 | 20 | def __init__(self, name, learning_rate, lam, n_output, noise_dim, discriminator, generator, beta=0.5, gen_kwargs={}, disc_kwargs={}, graph=None): 21 | 22 | GAN.__init__(self, name, graph) 23 | 24 | self.noise_dim = noise_dim 25 | self.n_output = n_output 26 | self.discriminator = discriminator 27 | self.generator = generator 28 | 29 | with tf.variable_scope(name): 30 | self.noise = tf.placeholder(tf.float32, shape=[None, noise_dim]) # Noise vector. 31 | self.real_pc = tf.placeholder(tf.float32, shape=[None] + self.n_output) # Ground-truth. 32 | 33 | with tf.variable_scope('generator'): 34 | self.generator_out = self.generator(self.noise, self.n_output, **gen_kwargs) 35 | 36 | with tf.variable_scope('discriminator') as scope: 37 | self.real_prob, self.real_logit = self.discriminator(self.real_pc, scope=scope, **disc_kwargs) 38 | self.synthetic_prob, self.synthetic_logit = self.discriminator(self.generator_out, reuse=True, scope=scope, **disc_kwargs) 39 | 40 | 41 | # Compute WGAN losses 42 | self.loss_d = tf.reduce_mean(self.synthetic_logit) - tf.reduce_mean(self.real_logit) 43 | self.loss_g = -tf.reduce_mean(self.synthetic_logit) 44 | 45 | # Compute gradient penalty at interpolated points 46 | ndims = self.real_pc.get_shape().ndims 47 | batch_size = tf.shape(self.real_pc)[0] 48 | alpha = tf.random_uniform(shape=[batch_size] + [1] * (ndims - 1), minval=0., maxval=1.) 49 | differences = self.generator_out - self.real_pc 50 | interpolates = self.real_pc + (alpha * differences) 51 | 52 | with tf.variable_scope('discriminator') as scope: 53 | gradients = tf.gradients(self.discriminator(interpolates, reuse=True, scope=scope, **disc_kwargs)[1], [interpolates])[0] 54 | 55 | # Reduce over all but the first dimension 56 | slopes = tf.sqrt(tf.reduce_sum(tf.square(gradients), reduction_indices=range(1, ndims))) 57 | gradient_penalty = tf.reduce_mean((slopes - 1.) ** 2) 58 | self.loss_d += lam * gradient_penalty 59 | 60 | train_vars = tf.trainable_variables() 61 | d_params = [v for v in train_vars if v.name.startswith(name + '/discriminator/')] 62 | g_params = [v for v in train_vars if v.name.startswith(name + '/generator/')] 63 | 64 | self.opt_d = self.optimizer(learning_rate, beta, self.loss_d, d_params) 65 | self.opt_g = self.optimizer(learning_rate, beta, self.loss_g, g_params) 66 | 67 | self.saver = tf.train.Saver(tf.global_variables(), max_to_keep=None) 68 | self.init = tf.global_variables_initializer() 69 | 70 | # Launch the session 71 | config = tf.ConfigProto() 72 | config.gpu_options.allow_growth = True 73 | self.sess = tf.Session(config=config) 74 | self.sess.run(self.init) 75 | 76 | def generator_noise_distribution(self, n_samples, ndims, mu, sigma): 77 | return np.random.normal(mu, sigma, (n_samples, ndims)) 78 | 79 | def _single_epoch_train(self, train_data, batch_size, noise_params, discriminator_boost=5): 80 | ''' 81 | see: http://blog.aylien.com/introduction-generative-adversarial-networks-code-tensorflow/ 82 | http://wiseodd.github.io/techblog/2016/09/17/gan-tensorflow/ 83 | ''' 84 | n_examples = train_data.num_examples 85 | epoch_loss_d = 0. 86 | epoch_loss_g = 0. 87 | batch_size = batch_size 88 | n_batches = int(n_examples / batch_size) 89 | start_time = time.time() 90 | 91 | iterations_for_epoch = n_batches / discriminator_boost 92 | 93 | is_training(True, session=self.sess) 94 | try: 95 | # Loop over all batches 96 | for _ in xrange(iterations_for_epoch): 97 | for _ in range(discriminator_boost): 98 | feed, _, _ = train_data.next_batch(batch_size) 99 | z = self.generator_noise_distribution(batch_size, self.noise_dim, **noise_params) 100 | feed_dict = {self.real_pc: feed, self.noise: z} 101 | _, loss_d = self.sess.run([self.opt_d, self.loss_d], feed_dict=feed_dict) 102 | epoch_loss_d += loss_d 103 | 104 | # Update generator. 105 | z = self.generator_noise_distribution(batch_size, self.noise_dim, **noise_params) 106 | feed_dict = {self.noise: z} 107 | _, loss_g = self.sess.run([self.opt_g, self.loss_g], feed_dict=feed_dict) 108 | epoch_loss_g += loss_g 109 | 110 | is_training(False, session=self.sess) 111 | except Exception: 112 | raise 113 | finally: 114 | is_training(False, session=self.sess) 115 | epoch_loss_d /= (iterations_for_epoch * discriminator_boost) 116 | epoch_loss_g /= iterations_for_epoch 117 | duration = time.time() - start_time 118 | return (epoch_loss_d, epoch_loss_g), duration 119 | -------------------------------------------------------------------------------- /task_generation/train_latent_gan.py: -------------------------------------------------------------------------------- 1 | """ 2 | This module is used to train a Generative Adversarial Network (GAN) on the latent 3 | codes of a given class. The GAN consists of a generator and a discriminator, both 4 | with two layers, which are imported from the `latent_3d_points.src.generators_discriminators` module. 5 | 6 | The training data is derived from the embeddings obtained by nerf2vec. 7 | 8 | The output of the training process is stored in a directory specified by `paths.GENERATION_OUT_DIR`, 9 | with the directory name formatted to include the class index. 10 | 11 | The code in this module is based on the code from the repository: https://github.com/optas/latent_3d_points 12 | 13 | """ 14 | import os 15 | import sys 16 | script_dir = os.path.dirname(os.path.abspath(__file__)) 17 | parent_dir = os.path.dirname(script_dir) 18 | sys.path.append(parent_dir) 19 | import settings 20 | 21 | import os.path as osp 22 | 23 | import numpy as np 24 | from task_generation.latent_3d_points.src.generators_discriminators import ( 25 | latent_code_discriminator_two_layers, 26 | latent_code_generator_two_layers, 27 | ) 28 | from task_generation.latent_3d_points.src.in_out import PointCloudDataSet, create_dir 29 | from task_generation.latent_3d_points.src.tf_utils import reset_tf_graph 30 | from task_generation.latent_3d_points.src.w_gan_gp import W_GAN_GP 31 | 32 | from nerf2vec import config as nerf2vec_config 33 | 34 | 35 | def train(class_idx=0): 36 | 37 | experiment_name = 'nerf2vec_{}'.format(class_idx) 38 | top_out_dir = settings.GENERATION_OUT_DIR.format(experiment_name) 39 | embedding_size = 1024 40 | n_epochs = 2000 41 | n_syn_samples = 1000 # how many synthetic samples to produce at each save step 42 | saver_step = np.hstack([np.array([1, 5, 10]), np.arange(50, n_epochs + 1, 50)]) 43 | 44 | latent_codes_path = os.path.join(settings.GENERATION_EMBEDDING_DIR, "embeddings_{}.npz".format(class_idx)) 45 | latent_codes = np.load(latent_codes_path)["embeddings"] 46 | latent_data = PointCloudDataSet(latent_codes) 47 | print(latent_data.num_examples) 48 | 49 | # optimization parameters 50 | init_lr = 0.0001 51 | batch_size = 50 52 | noise_params = {"mu": 0, "sigma": 0.2} 53 | beta = 0.5 # ADAM's momentum 54 | 55 | train_dir = osp.join(top_out_dir, "latent_gan_ckpts") 56 | create_dir(train_dir) 57 | synthetic_data_out_dir = osp.join(top_out_dir, "generated_embeddings") 58 | create_dir(synthetic_data_out_dir) 59 | 60 | reset_tf_graph() 61 | 62 | gan = W_GAN_GP( 63 | experiment_name, 64 | init_lr, 65 | 10, 66 | [embedding_size], 67 | embedding_size, 68 | latent_code_discriminator_two_layers, 69 | latent_code_generator_two_layers, 70 | beta=beta, 71 | ) 72 | 73 | print("Start") 74 | 75 | for _ in range(n_epochs): 76 | loss, duration = gan._single_epoch_train(latent_data, batch_size, noise_params) 77 | epoch = int(gan.sess.run(gan.increment_epoch)) 78 | print("epoch:", epoch, "loss:", loss) 79 | 80 | if epoch in saver_step: 81 | checkpoint_path = osp.join(train_dir, "epoch_" + str(epoch) + ".ckpt") 82 | gan.saver.save(gan.sess, checkpoint_path, global_step=gan.epoch) 83 | 84 | syn_latent_data = gan.generate(n_syn_samples, noise_params) 85 | np.savez( 86 | osp.join(synthetic_data_out_dir, "epoch_" + str(epoch) + ".npz"), 87 | embeddings=syn_latent_data, 88 | ) 89 | 90 | def main(): 91 | # Train a GAN for each class 92 | for class_idx in range(nerf2vec_config.NUM_CLASSES): 93 | train(class_idx) 94 | 95 | if __name__ == "__main__": 96 | main() -------------------------------------------------------------------------------- /task_generation/viz_nerf.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | script_dir = os.path.dirname(os.path.abspath(__file__)) 4 | parent_dir = os.path.dirname(script_dir) 5 | sys.path.append(parent_dir) 6 | import settings 7 | 8 | import uuid 9 | import math 10 | import torch 11 | import numpy as np 12 | import imageio.v2 as imageio 13 | 14 | from random import randint 15 | from nerf2vec.utils import get_rays 16 | 17 | from torch.cuda.amp import autocast 18 | from models.idecoder import ImplicitDecoder 19 | from nerf.utils import Rays, render_image 20 | from nerf2vec import config as nerf2vec_config 21 | 22 | 23 | @torch.no_grad() 24 | def draw_images(decoder, embeddings, device='cuda:0', class_idx=0): 25 | 26 | scene_aabb = torch.tensor(nerf2vec_config.GRID_AABB, dtype=torch.float32, device=device) 27 | render_step_size = ( 28 | (scene_aabb[3:] - scene_aabb[:3]).max() 29 | * math.sqrt(3) 30 | / nerf2vec_config.GRID_CONFIG_N_SAMPLES 31 | ).item() 32 | rays = get_rays(device) 33 | 34 | # WHITE BACKGROUND 35 | color_bkgd = torch.ones((1,3), device=device) 36 | 37 | img_name = str(uuid.uuid4()) 38 | plots_path = os.path.join('task_generation', f'GAN_plots_{class_idx}') 39 | os.makedirs(plots_path, exist_ok=True) 40 | 41 | for idx, emb in enumerate(embeddings): 42 | emb = torch.tensor(emb, device=device, dtype=torch.float32) 43 | emb = emb.unsqueeze(dim=0) 44 | with autocast(): 45 | rgb_A, alpha, b, c, _, _ = render_image( 46 | radiance_field=decoder, 47 | embeddings=emb, 48 | occupancy_grid=None, 49 | rays=Rays(origins=rays.origins.unsqueeze(dim=0), viewdirs=rays.viewdirs.unsqueeze(dim=0)), 50 | scene_aabb=scene_aabb, 51 | render_step_size=render_step_size, 52 | render_bkgd=color_bkgd, 53 | grid_weights=None, 54 | device=device 55 | ) 56 | 57 | imageio.imwrite( 58 | os.path.join(plots_path, f'{img_name}_{idx}.png'), 59 | (rgb_A.squeeze(dim=0).cpu().detach().numpy() * 255).astype(np.uint8) 60 | ) 61 | 62 | 63 | @torch.no_grad() 64 | def create_renderings_from_GAN_embeddings(device='cuda:0', class_idx=0, n_images=10): 65 | 66 | # Init nerf2vec 67 | decoder = ImplicitDecoder( 68 | embed_dim=nerf2vec_config.ENCODER_EMBEDDING_DIM, 69 | in_dim=nerf2vec_config.DECODER_INPUT_DIM, 70 | hidden_dim=nerf2vec_config.DECODER_HIDDEN_DIM, 71 | num_hidden_layers_before_skip=nerf2vec_config.DECODER_NUM_HIDDEN_LAYERS_BEFORE_SKIP, 72 | num_hidden_layers_after_skip=nerf2vec_config.DECODER_NUM_HIDDEN_LAYERS_AFTER_SKIP, 73 | out_dim=nerf2vec_config.DECODER_OUT_DIM, 74 | encoding_conf=nerf2vec_config.INSTANT_NGP_ENCODING_CONF, 75 | aabb=torch.tensor(nerf2vec_config.GRID_AABB, dtype=torch.float32, device=device) 76 | ) 77 | decoder.eval() 78 | decoder = decoder.to(device) 79 | 80 | ckpt_path = settings.GENERATION_NERF2VEC_FULL_CKPT_PATH 81 | print(f'loading weights: {ckpt_path}') 82 | ckpt = torch.load(ckpt_path) 83 | decoder.load_state_dict(ckpt["decoder"]) 84 | 85 | latent_gan_embeddings_path = settings.GENERATION_LATENT_GAN_FULL_CKPT_PATH.format(class_idx) 86 | embeddings = np.load(latent_gan_embeddings_path)["embeddings"] 87 | embeddings = torch.from_numpy(embeddings) 88 | 89 | for _ in range(0, n_images): 90 | idx = randint(0, embeddings.shape[0]-1) 91 | emb = embeddings[idx].unsqueeze(0).cuda() 92 | draw_images(decoder, emb, device, class_idx) 93 | 94 | 95 | def main() -> None: 96 | # Create renderings for each class 97 | for class_idx in range(0, nerf2vec_config.NUM_CLASSES): 98 | create_renderings_from_GAN_embeddings(device=settings.DEVICE_NAME, class_idx=class_idx, n_images=10) 99 | 100 | if __name__ == "__main__": 101 | main() -------------------------------------------------------------------------------- /task_interp_and_retrieval/interp.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | script_dir = os.path.dirname(os.path.abspath(__file__)) 4 | parent_dir = os.path.dirname(script_dir) 5 | sys.path.append(parent_dir) 6 | import settings 7 | 8 | import math 9 | import uuid 10 | import torch 11 | import numpy as np 12 | import imageio.v2 as imageio 13 | 14 | from pathlib import Path 15 | from random import randint 16 | from nerf2vec import config as nerf2vec_config 17 | from nerf2vec.train_nerf2vec import NeRFDataset 18 | from nerf2vec.utils import get_class_label_from_nerf_root_path 19 | from models.encoder import Encoder 20 | from models.idecoder import ImplicitDecoder 21 | from nerf.utils import Rays, render_image 22 | from torch.cuda.amp import autocast 23 | 24 | 25 | def draw_images( 26 | rays, 27 | color_bkgds, 28 | embeddings, 29 | decoder, 30 | scene_aabb, 31 | render_step_size, 32 | curr_folder_path, 33 | device): 34 | 35 | for idx in range(len(embeddings)): 36 | with autocast(): 37 | rgb, _, _, _, _, _ = render_image( 38 | radiance_field=decoder, 39 | embeddings=embeddings[idx].unsqueeze(dim=0), 40 | occupancy_grid=None, 41 | rays=Rays(origins=rays.origins.unsqueeze(dim=0), viewdirs=rays.viewdirs.unsqueeze(dim=0)), 42 | scene_aabb=scene_aabb, 43 | render_step_size=render_step_size, 44 | render_bkgd=color_bkgds.unsqueeze(dim=0), 45 | grid_weights=None, 46 | device=device 47 | ) 48 | 49 | img_name = f'{idx}.png' 50 | full_path = os.path.join(curr_folder_path, img_name) 51 | 52 | imageio.imwrite( 53 | full_path, 54 | (rgb.cpu().detach().numpy()[0] * 255).astype(np.uint8) 55 | ) 56 | 57 | 58 | @torch.no_grad() 59 | def do_interpolation(device = 'cuda:0', split = nerf2vec_config.TRAIN_SPLIT): 60 | scene_aabb = torch.tensor(nerf2vec_config.GRID_AABB, dtype=torch.float32, device=device) 61 | render_step_size = ( 62 | (scene_aabb[3:] - scene_aabb[:3]).max() 63 | * math.sqrt(3) 64 | / nerf2vec_config.GRID_CONFIG_N_SAMPLES 65 | ).item() 66 | 67 | 68 | ckpts_path = Path(settings.NERF2VEC_CKPTS_PATH) 69 | ckpt_paths = [p for p in ckpts_path.glob("*.pt") if "best" not in p.name] 70 | ckpt_path = ckpt_paths[0] 71 | ckpt = torch.load(ckpt_path) 72 | 73 | print(f'loaded weights: {ckpt_path}') 74 | 75 | encoder = Encoder( 76 | nerf2vec_config.MLP_UNITS, 77 | nerf2vec_config.ENCODER_HIDDEN_DIM, 78 | nerf2vec_config.ENCODER_EMBEDDING_DIM 79 | ) 80 | encoder.load_state_dict(ckpt["encoder"]) 81 | encoder = encoder.cuda() 82 | encoder.eval() 83 | 84 | decoder = ImplicitDecoder( 85 | embed_dim=nerf2vec_config.ENCODER_EMBEDDING_DIM, 86 | in_dim=nerf2vec_config.DECODER_INPUT_DIM, 87 | hidden_dim=nerf2vec_config.DECODER_HIDDEN_DIM, 88 | num_hidden_layers_before_skip=nerf2vec_config.DECODER_NUM_HIDDEN_LAYERS_BEFORE_SKIP, 89 | num_hidden_layers_after_skip=nerf2vec_config.DECODER_NUM_HIDDEN_LAYERS_AFTER_SKIP, 90 | out_dim=nerf2vec_config.DECODER_OUT_DIM, 91 | encoding_conf=nerf2vec_config.INSTANT_NGP_ENCODING_CONF, 92 | aabb=torch.tensor(nerf2vec_config.GRID_AABB, dtype=torch.float32, device=device) 93 | ) 94 | decoder.load_state_dict(ckpt["decoder"]) 95 | decoder = decoder.cuda() 96 | decoder.eval() 97 | 98 | dset_json_path = get_dset_json_path(split) 99 | dset = NeRFDataset(dset_json_path, device='cpu') 100 | 101 | n_images = 0 102 | max_images = 100 103 | 104 | while n_images < max_images: 105 | idx_A = randint(0, len(dset) - 1) 106 | _, test_nerf_A, matrices_unflattened_A, matrices_flattened_A, _, data_dir_A, _, _ = dset[idx_A] 107 | class_id_A = get_class_label_from_nerf_root_path(data_dir_A) 108 | 109 | # Ignore augmented samples 110 | if is_nerf_augmented(data_dir_A): 111 | continue 112 | matrices_unflattened_A = matrices_unflattened_A['mlp_base.params'] 113 | 114 | class_id_B = -1 115 | while class_id_B != class_id_A: 116 | idx_B = randint(0, len(dset) - 1) 117 | _, _, matrices_unflattened_B, matrices_flattened_B, _, data_dir_B, _, _ = dset[idx_B] 118 | class_id_B = get_class_label_from_nerf_root_path(data_dir_B) 119 | 120 | if is_nerf_augmented(data_dir_B): 121 | continue 122 | matrices_unflattened_B = matrices_unflattened_B['mlp_base.params'] 123 | 124 | print(f'Progress: {n_images}/{max_images}') 125 | 126 | matrices_flattened_A = matrices_flattened_A.cuda().unsqueeze(0) 127 | matrices_flattened_B = matrices_flattened_B.cuda().unsqueeze(0) 128 | 129 | with autocast(): 130 | embedding_A = encoder(matrices_flattened_A).squeeze(0) 131 | embedding_B = encoder(matrices_flattened_B).squeeze(0) 132 | 133 | 134 | embeddings = [embedding_A] 135 | for gamma in [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]: 136 | emb_interp = (1 - gamma) * embedding_A + gamma * embedding_B 137 | embeddings.append(emb_interp) 138 | embeddings.append(embedding_B) 139 | 140 | curr_folder_path = os.path.join('task_interp_and_retrieval', f'interp_plots_{split}', str(uuid.uuid4())) 141 | os.makedirs(curr_folder_path, exist_ok=True) 142 | 143 | rays = test_nerf_A['rays'] 144 | rays = rays._replace(origins=rays.origins.cuda(), viewdirs=rays.viewdirs.cuda()) 145 | 146 | # WHITE BACKGROUND 147 | color_bkgds = torch.ones(test_nerf_A['color_bkgd'].shape) 148 | color_bkgds = color_bkgds.cuda() 149 | 150 | # Interpolation 151 | draw_images( 152 | rays, 153 | color_bkgds, 154 | embeddings, 155 | decoder, 156 | scene_aabb, 157 | render_step_size, 158 | curr_folder_path, 159 | device 160 | ) 161 | 162 | n_images += 1 163 | 164 | def get_dset_json_path(split): 165 | dset_json_path = settings.TRAIN_DSET_JSON 166 | 167 | if split == nerf2vec_config.VAL_SPLIT: 168 | dset_json_path = settings.VAL_DSET_JSON 169 | else: 170 | dset_json_path = settings.TEST_DSET_JSON 171 | 172 | 173 | return dset_json_path 174 | 175 | def is_nerf_augmented(data_dir): 176 | return "_A1" in data_dir or "_A2" in data_dir 177 | 178 | def main() -> None: 179 | do_interpolation(device=settings.DEVICE_NAME, split=nerf2vec_config.TRAIN_SPLIT) 180 | 181 | if __name__ == "__main__": 182 | main() -------------------------------------------------------------------------------- /task_interp_and_retrieval/retrieval.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | script_dir = os.path.dirname(os.path.abspath(__file__)) 4 | parent_dir = os.path.dirname(script_dir) 5 | sys.path.append(parent_dir) 6 | import settings 7 | 8 | 9 | import math 10 | import uuid 11 | 12 | import h5py 13 | import torch 14 | import numpy as np 15 | import imageio.v2 as imageio 16 | 17 | from pathlib import Path 18 | from typing import Dict, List, Tuple 19 | from collections import defaultdict 20 | 21 | from torch import Tensor 22 | from torch.cuda.amp import autocast 23 | from torch.utils.data import Dataset 24 | 25 | from nerf.utils import Rays, render_image 26 | from nerf2vec import config as nerf2vec_config 27 | from nerf2vec.utils import get_latest_checkpoints_path, get_rays 28 | 29 | from sklearn.neighbors import KDTree 30 | from models.idecoder import ImplicitDecoder 31 | 32 | 33 | class InrEmbeddingDataset(Dataset): 34 | def __init__(self, root: Path, split: str) -> None: 35 | super().__init__() 36 | 37 | self.root = root / split 38 | self.item_paths = sorted(self.root.glob("*.h5"), key=lambda x: int(x.stem)) 39 | 40 | def __len__(self) -> int: 41 | return len(self.item_paths) 42 | 43 | def __getitem__(self, index: int) -> Tuple[Tensor, Tensor]: 44 | with h5py.File(self.item_paths[index], "r") as f: 45 | embedding = np.array(f.get("embedding")) 46 | embedding = torch.from_numpy(embedding) 47 | class_id = np.array(f.get("class_id")) 48 | class_id = torch.from_numpy(class_id).long() 49 | 50 | return embedding, class_id 51 | 52 | 53 | @torch.no_grad() 54 | def draw_images(decoder, embeddings, plots_path, device): 55 | 56 | scene_aabb = torch.tensor(nerf2vec_config.GRID_AABB, dtype=torch.float32, device=device) 57 | render_step_size = ( 58 | (scene_aabb[3:] - scene_aabb[:3]).max() 59 | * math.sqrt(3) 60 | / nerf2vec_config.GRID_CONFIG_N_SAMPLES 61 | ).item() 62 | 63 | rays = get_rays(device) 64 | 65 | # WHITE BACKGROUND 66 | color_bkgd = torch.ones((1,3), device=device) 67 | 68 | img_name = str(uuid.uuid4()) 69 | 70 | for idx, emb in enumerate(embeddings): 71 | emb = torch.tensor(emb, device=device, dtype=torch.float32) 72 | emb = emb.unsqueeze(dim=0) 73 | with autocast(): 74 | rgb_A, alpha, b, c, _, _ = render_image( 75 | radiance_field=decoder, 76 | embeddings=emb, 77 | occupancy_grid=None, 78 | rays=Rays(origins=rays.origins.unsqueeze(dim=0), viewdirs=rays.viewdirs.unsqueeze(dim=0)), 79 | scene_aabb=scene_aabb, 80 | render_step_size=render_step_size, 81 | render_bkgd=color_bkgd, 82 | grid_weights=None, 83 | device=device 84 | ) 85 | 86 | imageio.imwrite( 87 | os.path.join(plots_path, f'{img_name}_{idx}.png'), 88 | (rgb_A.squeeze(dim=0).cpu().detach().numpy() * 255).astype(np.uint8) 89 | ) 90 | 91 | print(f' {img_name}_{idx}.png saved') 92 | 93 | 94 | @torch.no_grad() 95 | def get_recalls(gallery: Tensor, 96 | labels_gallery: Tensor, 97 | kk: List[int], decoder, 98 | plots_path: str, 99 | device:str) -> Dict[int, float]: 100 | max_nn = max(kk) 101 | recalls = {idx: 0.0 for idx in kk} 102 | targets = labels_gallery.cpu().numpy() 103 | gallery = gallery.cpu().numpy() 104 | tree = KDTree(gallery) 105 | 106 | dic_renderings = defaultdict(int) 107 | 108 | for query, label_query in zip(gallery, targets): 109 | with torch.no_grad(): 110 | query = np.expand_dims(query, 0) 111 | _, indices_matched = tree.query(query, k=max_nn + 1) 112 | indices_matched = indices_matched[0] 113 | 114 | # Draw the query and the first N neighbours 115 | if dic_renderings[label_query] < 10: 116 | print(f'Generating images for class {label_query}...') 117 | draw_images(decoder, gallery[indices_matched], plots_path, device) 118 | dic_renderings[label_query] += 1 119 | 120 | for k in kk: 121 | indices_matched_temp = indices_matched[1 : k + 1] 122 | classes_matched = targets[indices_matched_temp] 123 | recalls[k] += np.count_nonzero(classes_matched == label_query) > 0 124 | 125 | for key, value in recalls.items(): 126 | recalls[key] = value / (1.0 * len(gallery)) 127 | 128 | return recalls 129 | 130 | @torch.no_grad() 131 | def do_retrieval(device='cuda:0', split=nerf2vec_config.TEST_SPLIT): 132 | 133 | # Init nerf2vec 134 | decoder = ImplicitDecoder( 135 | embed_dim=nerf2vec_config.ENCODER_EMBEDDING_DIM, 136 | in_dim=nerf2vec_config.DECODER_INPUT_DIM, 137 | hidden_dim=nerf2vec_config.DECODER_HIDDEN_DIM, 138 | num_hidden_layers_before_skip=nerf2vec_config.DECODER_NUM_HIDDEN_LAYERS_BEFORE_SKIP, 139 | num_hidden_layers_after_skip=nerf2vec_config.DECODER_NUM_HIDDEN_LAYERS_AFTER_SKIP, 140 | out_dim=nerf2vec_config.DECODER_OUT_DIM, 141 | encoding_conf=nerf2vec_config.INSTANT_NGP_ENCODING_CONF, 142 | aabb=torch.tensor(nerf2vec_config.GRID_AABB, dtype=torch.float32, device=device) 143 | ) 144 | decoder.eval() 145 | decoder = decoder.to(device) 146 | 147 | ckpt_path = get_latest_checkpoints_path(Path(settings.NERF2VEC_CKPTS_PATH)) 148 | print(f'loading weights: {ckpt_path}') 149 | ckpt = torch.load(ckpt_path) 150 | decoder.load_state_dict(ckpt["decoder"]) 151 | 152 | dset_root = Path(settings.NERF2VEC_EMBEDDINGS_DIR) 153 | dset = InrEmbeddingDataset(dset_root, split) 154 | 155 | embeddings = [] 156 | labels = [] 157 | 158 | for i in range(len(dset)): 159 | embedding, label = dset[i] 160 | embeddings.append(embedding) 161 | labels.append(label) 162 | 163 | embeddings = torch.stack(embeddings) 164 | labels = torch.stack(labels) 165 | 166 | plots_path = os.path.join('task_interp_and_retrieval', f'retrieval_plots_{split}') 167 | os.makedirs(plots_path, exist_ok=True) 168 | 169 | recalls = get_recalls(embeddings, labels, [1, 5, 10], decoder, plots_path, device) 170 | for key, value in recalls.items(): 171 | print(f"Recall@{key} : {100. * value:.2f}%") 172 | 173 | 174 | def main() -> None: 175 | do_retrieval(device=settings.DEVICE_NAME, split=nerf2vec_config.TEST_SPLIT) 176 | 177 | if __name__ == "__main__": 178 | main() -------------------------------------------------------------------------------- /task_mapping_network/README.md: -------------------------------------------------------------------------------- 1 | # Task Mapping Network 2 | The mapping network task requires the training of the *inr2vec* framework. Please, refer to [THIS](https://github.com/CVLAB-Unibo/inr2vec?tab=readme-ov-file#setup) page to properly configure your environment. 3 | 4 | In order to complete this task, it is necessary to execute some operations following a specific order. 5 | 6 | ## 1) Create point clouds 7 | This step is necessary to create the dataset on which *inr2vec* will be trained. It is important to update the variable *shapenet_root* found in *task_mapping_network/cfg/pcd_dataset.yaml*. This variable should point to the root of the *ShapeNet* folder. 8 | 9 | Then, execute the following command: 10 | ```bash 11 | python task_mapping_network/inr2vec/create_point_clouds_dataset.py 12 | ``` 13 | 14 | ## 2) Create INRs dataset 15 | Create the INRs dataset by executing the following command: 16 | ```bash 17 | python task_mapping_network/inr2vec/create_inrs_dataset.py 18 | ``` 19 | The file *task_mapping_network/cfg/inrs_dataset.yaml* contains all the configurations used for this step. 20 | 21 | ## 3) Train *inr2vec* 22 | Train *inr2vec* with the following command: 23 | ```bash 24 | python task_mapping_network/inr2vec/train_inr2vec.py 25 | ``` 26 | The file *task_mapping_network/cfg/inr2vec.yaml* contains all the configurations used for this step. 27 | 28 | ## 4) Export *inr2vec* and *nerf2vec* embeddings 29 | Create embeddings that will be properly organized to train the mapping network: 30 | ```bash 31 | python task_mapping_network/export_inrs_embeddings.py 32 | python task_mapping_network/export_nerfs_embeddings.py 33 | ``` 34 | 35 | The file *task_mapping_network/cfg/export_embeddings.yaml* contains all the configurations used for this step. 36 | 37 | ## 5) Train the mapping network 38 | Train the mapping network: 39 | ```bash 40 | python task_mapping_network/train_completion.py 41 | ``` 42 | The file *task_mapping_network/cfg/completion.yaml* contains all the configurations used for this step. 43 | 44 | 45 | ## 6) Export results 46 | Visualize the results by executing: 47 | ```bash 48 | python task_mapping_network/viz.py 49 | ``` 50 | The file *task_mapping_network/cfg/completion.yaml* contains all the configurations used for this step. 51 | 52 | The results will be saved in the *task_mapping_network/completion_plots* folder. 53 | 54 | -------------------------------------------------------------------------------- /task_mapping_network/cfg/completion.yaml: -------------------------------------------------------------------------------- 1 | # IMPORTANT: Use relative paths to the root of the project or absolute paths 2 | 3 | out_root: "task_mapping_network/train" 4 | 5 | inrs_dset_root: "task_mapping_network/inr_embeddings" 6 | nerfs_dset_root: "task_mapping_network/nerf_embeddings" 7 | pcd_root: "task_mapping_network/point_clouds" 8 | 9 | train_split: "train" 10 | train_bs: 256 11 | 12 | val_split: "val" 13 | val_bs: 16 14 | 15 | test_split: "test" 16 | 17 | embedding_dim: 1024 18 | num_layers_transfer: 8 19 | 20 | nerf2vec_decoder_ckpt_path: "nerf2vec/train/ckpts/499.pt" 21 | inr2vec_decoder_ckpt_path: "task_mapping_network/inr2vec/train/ckpts/299.pt" 22 | completion_ckpt_path: "task_mapping_network/train/ckpts/299.pt" 23 | 24 | inr_decoder: 25 | input_dim: 3 26 | hidden_dim: 512 27 | num_hidden_layers_before_skip: 2 28 | num_hidden_layers_after_skip: 2 29 | out_dim: 1 30 | 31 | nerf_decoder: 32 | input_dim: 3 33 | hidden_dim: 1024 34 | num_hidden_layers_before_skip: 2 35 | num_hidden_layers_after_skip: 2 36 | out_dim: 4 37 | 38 | lr: 1e-4 39 | wd: 1e-4 40 | num_epochs: 300 -------------------------------------------------------------------------------- /task_mapping_network/cfg/export_embeddings.yaml: -------------------------------------------------------------------------------- 1 | # IMPORTANT: Use relative paths to the root of the project or absolute paths 2 | 3 | inrs_root: "task_mapping_network/inrs" 4 | ckpt_path: "task_mapping_network/inr2vec/train/299.pt" 5 | out_root: "task_mapping_network/inr_embeddings" 6 | 7 | nerf2vec_ckpt_path: "nerf2vec/train/ckpts/499.pt" 8 | nerf2vec_train_json_path: "data/train.json" 9 | nerf2vec_val_json_path: "data/val.json" 10 | nerf2vec_test_json_path: "data/test.json" 11 | nerf_out_root: "task_mapping_network/nerf_embeddings" 12 | 13 | encoder: 14 | hidden_dims: [512, 512, 1024, 1024] 15 | embedding_dim: 1024 16 | 17 | mlp: 18 | hidden_dim: 512 19 | num_hidden_layers: 4 20 | 21 | train_split: "train" 22 | val_split: "val" 23 | test_split: "test" 24 | -------------------------------------------------------------------------------- /task_mapping_network/cfg/inr2vec.yaml: -------------------------------------------------------------------------------- 1 | # IMPORTANT: Use relative paths to the root of the project or absolute paths 2 | 3 | inrs_root: task_mapping_network/inrs 4 | out_root: "task_mapping_network/inr2vec/train" 5 | 6 | num_queries_on_surface: 3_500 7 | stds: [0.003, 0.01, 0.1] 8 | num_points_per_std: [3_500,2_000,500,500] 9 | 10 | encoder: 11 | hidden_dims: [512, 512, 1024, 1024] 12 | embedding_dim: 1024 13 | 14 | decoder: 15 | input_dim: 3 16 | hidden_dim: 512 17 | num_hidden_layers_before_skip: 2 18 | num_hidden_layers_after_skip: 2 19 | out_dim: 1 20 | 21 | mlp: 22 | hidden_dim: 512 23 | num_hidden_layers: 4 24 | 25 | train_split: "train" 26 | val_split: "val" 27 | 28 | train_bs: 16 29 | val_bs: 16 30 | 31 | lr: 1e-4 32 | wd: 1e-2 33 | num_epochs: 300 34 | -------------------------------------------------------------------------------- /task_mapping_network/cfg/inrs_dataset.yaml: -------------------------------------------------------------------------------- 1 | # IMPORTANT: Use relative paths to the root of the project or absolute paths 2 | 3 | pcd_root: "task_mapping_network/point_clouds" 4 | split_json_root_path: "data" 5 | 6 | splits: ["train", "validation", "test"] 7 | num_points_pcd: 2048 8 | 9 | num_required_train_shapes: 100_000 10 | 11 | num_queries_on_surface: 100_000 12 | stds: [0.003, 0.01, 0.1] 13 | num_points_per_std: [250_000, 200_000, 25_000, 25_000] 14 | 15 | num_points_fitting: 10_000 16 | num_parallel_mlps: 16 17 | 18 | mlp: 19 | hidden_dim: 512 20 | num_hidden_layers: 4 21 | init_path: "task_mapping_network/inits/in3_out1_h512_l4.pt" 22 | 23 | num_steps: 500 24 | lr: 1e-4 25 | 26 | out_root: "task_mapping_network/inrs" 27 | -------------------------------------------------------------------------------- /task_mapping_network/cfg/pcd_dataset.yaml: -------------------------------------------------------------------------------- 1 | # IMPORTANT: Use relative paths to the root of the project or absolute paths 2 | 3 | splits: ["train", "validation", "test"] 4 | 5 | split_json_root_path: "data" 6 | out_point_clouds_path: "mapping_network/point_clouds" 7 | shapenet_root: "/media/data7/dsirocchi/ShapeNetCore.v1" 8 | -------------------------------------------------------------------------------- /task_mapping_network/export_inrs_embeddings.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | script_dir = os.path.dirname(os.path.abspath(__file__)) 4 | parent_dir = os.path.dirname(script_dir) 5 | sys.path.append(parent_dir) 6 | import settings 7 | 8 | from pathlib import Path 9 | from typing import Any, Dict, Tuple 10 | 11 | import h5py 12 | import numpy as np 13 | import torch 14 | from hesiod import hcfg, hmain 15 | from pycarus.learning.models.siren import SIREN 16 | from pycarus.utils import progress_bar 17 | from torch import Tensor 18 | from torch.utils.data import DataLoader, Dataset 19 | 20 | from task_mapping_network.inr2vec.models.encoder import Encoder 21 | from task_mapping_network.inr2vec.utils import get_mlp_params_as_matrix 22 | 23 | 24 | class InrDataset(Dataset): 25 | def __init__(self, inrs_root: Path, split: str, sample_sd: Dict[str, Any]) -> None: 26 | super().__init__() 27 | 28 | self.inrs_root = inrs_root / split 29 | self.mlps_paths = sorted(self.inrs_root.glob("*.h5"), key=lambda x: int(x.stem)) 30 | self.sample_sd = sample_sd 31 | 32 | def __len__(self) -> int: 33 | return len(self.mlps_paths) 34 | 35 | def __getitem__(self, index: int) -> Tuple[Tensor, Tensor, Tensor]: 36 | with h5py.File(self.mlps_paths[index], "r") as f: 37 | pcd = torch.from_numpy(np.array(f.get("pcd"))) 38 | params = np.array(f.get("params")) 39 | params = torch.from_numpy(params).float() 40 | matrix = get_mlp_params_as_matrix(params, self.sample_sd) 41 | class_id = torch.from_numpy(np.array(f.get("class_id"))).long() 42 | uuid = f.get("uuid")[()].decode() 43 | 44 | return pcd, matrix, class_id, uuid 45 | 46 | 47 | @hmain( 48 | base_cfg_dir="cfg/bases", 49 | template_cfg_file="task_mapping_network/cfg/export_embeddings.yaml", 50 | create_out_dir=False, 51 | out_dir_root="task_mapping_network/logs" 52 | ) 53 | def main() -> None: 54 | 55 | inrs_root = Path(hcfg("inrs_root", str)) 56 | 57 | mlp_hdim = hcfg("mlp.hidden_dim", int) 58 | num_hidden_layers = hcfg("mlp.num_hidden_layers", int) 59 | mlp = SIREN(3, mlp_hdim, num_hidden_layers, 1) 60 | sample_sd = mlp.state_dict() 61 | 62 | train_split = hcfg("train_split", str) 63 | train_dset = InrDataset(inrs_root, train_split, sample_sd) 64 | train_loader = DataLoader(train_dset, batch_size=1, num_workers=0, shuffle=False) 65 | 66 | val_split = hcfg("val_split", str) 67 | val_dset = InrDataset(inrs_root, val_split, sample_sd) 68 | val_loader = DataLoader(val_dset, batch_size=1, num_workers=0, shuffle=False) 69 | 70 | test_split = hcfg("test_split", str) 71 | test_dset = InrDataset(inrs_root, test_split, sample_sd) 72 | test_loader = DataLoader(test_dset, batch_size=1, num_workers=0, shuffle=False) 73 | 74 | encoder_cfg = hcfg("encoder", Dict[str, Any]) 75 | encoder = Encoder( 76 | mlp_hdim, 77 | encoder_cfg["hidden_dims"], 78 | encoder_cfg["embedding_dim"], 79 | ) 80 | ckpt = torch.load(hcfg("ckpt_path", str), map_location="cpu") 81 | encoder.load_state_dict(ckpt["encoder"]) 82 | encoder = encoder.cuda() 83 | encoder.eval() 84 | 85 | loaders = [train_loader, val_loader, test_loader] 86 | splits = [train_split, val_split, test_split] 87 | 88 | for loader, split in zip(loaders, splits): 89 | idx = 0 90 | 91 | for batch in progress_bar(loader, f"{split}"): 92 | 93 | # Limit the number of samples in the train set to 32414, which corresponds to the number non-augmented samples 94 | if split == 'train' and idx == 32414: 95 | break 96 | 97 | pcds, matrices, class_ids, uuids = batch 98 | matrices = matrices.cuda() 99 | 100 | with torch.no_grad(): 101 | embedding = encoder(matrices) 102 | 103 | h5_path = Path(hcfg("out_root", str)) / Path(f"{split}") / f"{idx}.h5" 104 | h5_path.parent.mkdir(parents=True, exist_ok=True) 105 | 106 | with h5py.File(h5_path, "w") as f: 107 | f.create_dataset("pcd", data=pcds[0].detach().cpu().numpy()) 108 | f.create_dataset("embedding", data=embedding[0].detach().cpu().numpy()) 109 | f.create_dataset("class_id", data=class_ids[0].detach().cpu().numpy()) 110 | f.create_dataset("uuid", data=uuids[0]) 111 | 112 | idx += 1 113 | 114 | if __name__ == "__main__": 115 | main() -------------------------------------------------------------------------------- /task_mapping_network/export_nerfs_embeddings.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | script_dir = os.path.dirname(os.path.abspath(__file__)) 4 | parent_dir = os.path.dirname(script_dir) 5 | sys.path.append(parent_dir) 6 | import settings 7 | 8 | import os 9 | import json 10 | import h5py 11 | import torch 12 | 13 | from hesiod import hmain 14 | from pathlib import Path 15 | from typing import Tuple 16 | from torch import Tensor 17 | from torch.utils.data import DataLoader, Dataset 18 | from hesiod import hcfg, hmain 19 | 20 | from models.encoder import Encoder 21 | from nerf2vec import config as nerf2vec_config 22 | from nerf2vec.utils import get_class_label, get_mlp_params_as_matrix 23 | 24 | 25 | class InrDataset(Dataset): 26 | def __init__(self, split_json: str, device: str, nerf_weights_file_name: str) -> None: 27 | super().__init__() 28 | 29 | with open(split_json) as file: 30 | self.nerf_paths = json.load(file) 31 | self.nerf_paths = sorted(self.nerf_paths) 32 | 33 | assert isinstance(self.nerf_paths, list), 'The json file provided is not a list.' 34 | 35 | self.device = device 36 | self.nerf_weights_file_name = nerf_weights_file_name 37 | 38 | def __len__(self) -> int: 39 | return len(self.nerf_paths) 40 | 41 | def __getitem__(self, index: int) -> Tuple[Tensor, Tensor, Tensor]: 42 | 43 | data_dir = self.nerf_paths[index] 44 | weights_file_path = os.path.join(data_dir, self.nerf_weights_file_name) 45 | 46 | class_id = nerf2vec_config.LABELS_TO_IDS[get_class_label(weights_file_path)] 47 | 48 | matrix = torch.load(weights_file_path, map_location=torch.device(self.device)) 49 | matrix = get_mlp_params_as_matrix(matrix['mlp_base.params']) 50 | 51 | return matrix, class_id, data_dir 52 | 53 | @hmain( 54 | base_cfg_dir="cfg/bases", 55 | template_cfg_file="task_mapping_network/cfg/export_embeddings.yaml", 56 | create_out_dir=False, 57 | out_dir_root="task_mapping_network/logs" 58 | ) 59 | def export_embeddings(): 60 | 61 | device = settings.DEVICE_NAME 62 | 63 | train_dset_json = hcfg("nerf2vec_train_json_path", str) 64 | train_dset = InrDataset(train_dset_json, device='cpu', nerf_weights_file_name=nerf2vec_config.NERF_WEIGHTS_FILE_NAME) 65 | train_loader = DataLoader(train_dset, batch_size=1, num_workers=0, shuffle=False) 66 | 67 | """ 68 | val_dset_json = settings.VAL_DSET_JSON 69 | val_dset = InrDataset(val_dset_json, device='cpu', nerf_weights_file_name=config.NERF_WEIGHTS_FILE_NAME) 70 | val_loader = DataLoader(val_dset, batch_size=1, num_workers=0, shuffle=False) 71 | 72 | test_dset_json = settings.TEST_DSET_JSON 73 | test_dset = InrDataset(test_dset_json, device='cpu', nerf_weights_file_name=config.NERF_WEIGHTS_FILE_NAME) 74 | test_loader = DataLoader(test_dset, batch_size=1, num_workers=0, shuffle=False) 75 | """ 76 | 77 | encoder = Encoder( 78 | nerf2vec_config.MLP_UNITS, 79 | nerf2vec_config.ENCODER_HIDDEN_DIM, 80 | nerf2vec_config.ENCODER_EMBEDDING_DIM 81 | ) 82 | encoder = encoder.to(device) 83 | ckpt = torch.load(hcfg("nerf2vec_ckpt_path", str)) 84 | encoder.load_state_dict(ckpt["encoder"]) 85 | encoder.eval() 86 | 87 | loaders = [train_loader] # , val_loader, test_loader] 88 | splits = [nerf2vec_config.TRAIN_SPLIT] #, config.VAL_SPLIT, config.TEST_SPLIT] 89 | 90 | 91 | for loader, split in zip(loaders, splits): 92 | idx = 0 93 | 94 | for batch in loader: 95 | matrices, class_ids, data_dirs = batch 96 | matrices = matrices.cuda() 97 | 98 | with torch.no_grad(): 99 | embeddings = encoder(matrices) 100 | 101 | out_root = Path(hcfg("nerf_out_root", str)) 102 | h5_path = out_root / Path(f"{split}") / f"{idx}.h5" 103 | h5_path.parent.mkdir(parents=True, exist_ok=True) 104 | 105 | with h5py.File(h5_path, "w") as f: 106 | 107 | p = Path(data_dirs[0]) 108 | uuid = p.parts[-1].replace('.ply','') 109 | 110 | f.create_dataset("data_dir", data=data_dirs[0]) 111 | f.create_dataset("embedding", data=embeddings[0].detach().cpu().numpy()) 112 | f.create_dataset("class_id", data=class_ids[0].detach().cpu().numpy()) 113 | f.create_dataset("uuid", data=uuid) 114 | 115 | idx += 1 116 | 117 | if idx % 5000 == 0: 118 | print(f'Created {idx} embeddings for {split} split') 119 | 120 | if __name__ == "__main__": 121 | export_embeddings() -------------------------------------------------------------------------------- /task_mapping_network/inits/in3_out1_h512_l4.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CVLAB-Unibo/nf2vec/7e10220040b89404914d3a8317d39a8ac7337a8d/task_mapping_network/inits/in3_out1_h512_l4.pt -------------------------------------------------------------------------------- /task_mapping_network/inr2vec/create_inrs_dataset.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | script_dir = os.path.dirname(os.path.abspath(__file__)) 4 | parent_dir = os.path.dirname(script_dir) 5 | project_root_dir = os.path.dirname(parent_dir) 6 | sys.path.append(parent_dir) 7 | sys.path.append(project_root_dir) 8 | 9 | import settings 10 | 11 | from math import ceil 12 | from pathlib import Path 13 | from typing import Callable, List 14 | 15 | import h5py 16 | import torch 17 | import torch.nn.functional as F 18 | from hesiod import hcfg, hmain 19 | from pycarus.datasets.ply import PlyDataset 20 | from pycarus.geometry.pcd import compute_udf_from_pcd, farthest_point_sampling 21 | from pycarus.geometry.pcd import random_point_sampling, shuffle_pcd 22 | from pycarus.learning.models.siren import SIREN 23 | from pycarus.transforms.pcd import JitterPcd, NormalizePcdIntoUnitSphere, RandomScalePcd 24 | from pycarus.utils import progress_bar 25 | from torch.optim import Adam 26 | from torch.utils.data import DataLoader, Dataset 27 | 28 | from task_mapping_network.inr2vec.utils import get_mlps_batched_params, mlp_batched_forward 29 | 30 | 31 | class InrsDatasetCreator: 32 | def __init__(self) -> None: 33 | 34 | self.split_json_root_path = hcfg('split_json_root_path') 35 | self.pcd_root = Path(hcfg("pcd_root", str)) 36 | 37 | self.splits = hcfg("splits", List[str]) 38 | self.num_points_pcd = hcfg("num_points_pcd", int) 39 | 40 | self.num_queries_on_surface = hcfg("num_queries_on_surface", int) 41 | self.stds = hcfg("stds", List[float]) 42 | self.num_points_per_std = hcfg("num_points_per_std", List[int]) 43 | 44 | self.num_required_train_shapes = hcfg("num_required_train_shapes", int) 45 | 46 | dset = self.get_dataset("train") 47 | num_train_shapes = len(dset) 48 | self.num_augmentations = ceil(self.num_required_train_shapes / num_train_shapes) - 1 49 | 50 | self.num_points_fitting = hcfg("num_points_fitting", int) 51 | self.num_parallel_mlps = hcfg("num_parallel_mlps", int) 52 | self.hdim = hcfg("mlp.hidden_dim", int) 53 | self.num_hidden_layers = hcfg("mlp.num_hidden_layers", int) 54 | self.mlp_init_path = Path(hcfg("mlp.init_path", str)) 55 | 56 | self.num_steps = hcfg("num_steps", int) 57 | self.lr = hcfg("lr", float) 58 | 59 | self.out_root = Path(hcfg("out_root", str)) 60 | self.out_root.mkdir(parents=True) 61 | 62 | 63 | def build_mlp(self) -> SIREN: 64 | mlp = SIREN( 65 | input_dim=3, 66 | hidden_dim=self.hdim, 67 | num_hidden_layers=self.num_hidden_layers, 68 | out_dim=1, 69 | ) 70 | 71 | mlp.load_state_dict(torch.load(self.mlp_init_path)) 72 | 73 | return mlp 74 | 75 | 76 | def get_dataset(self, split: str, transforms: List[Callable] = []) -> Dataset: 77 | dset = PlyDataset(self.pcd_root, split, transforms) 78 | return dset 79 | 80 | def create_dataset(self) -> None: 81 | 82 | for split in self.splits: 83 | global_idx = 0 84 | 85 | augs = [False] 86 | if "train" in split: 87 | augs += [True] * self.num_augmentations 88 | 89 | for aug_idx, aug in enumerate(augs): 90 | if aug: 91 | transforms = [ 92 | RandomScalePcd(2 / 3, 3 / 2), 93 | JitterPcd(sigma=0.01, clip=0.05), 94 | NormalizePcdIntoUnitSphere(), 95 | ] 96 | else: 97 | transforms = [NormalizePcdIntoUnitSphere()] 98 | 99 | dset = self.get_dataset(split, transforms) 100 | 101 | loader = DataLoader( 102 | dset, 103 | batch_size=self.num_parallel_mlps, 104 | shuffle=False, 105 | num_workers=8, 106 | ) 107 | 108 | desc = f"Fitting {split} set ({aug_idx + 1}/{len(augs)})" 109 | for batch in progress_bar(loader, desc, 80): 110 | pcds, class_ids, uuids = batch 111 | 112 | bs = pcds.shape[0] 113 | pcds = pcds.cuda() 114 | 115 | if pcds.shape[1] != self.num_points_pcd: 116 | pcds = farthest_point_sampling(pcds, self.num_points_pcd) 117 | 118 | coords = [] 119 | labels = [] 120 | for idx in range(bs): 121 | pcd_coords, pcd_labels = compute_udf_from_pcd( 122 | pcds[idx], 123 | self.num_queries_on_surface, 124 | self.stds, 125 | self.num_points_per_std, 126 | coords_range=(-1, 1), 127 | convert_to_bce_labels=True, 128 | ) 129 | coords.append(pcd_coords) 130 | labels.append(pcd_labels) 131 | 132 | coords = torch.stack(coords, dim=0) 133 | labels = torch.stack(labels, dim=0) 134 | 135 | coords_and_labels = torch.cat((coords, labels.unsqueeze(-1)), dim=-1).cuda() 136 | coords_and_labels = shuffle_pcd(coords_and_labels) 137 | 138 | mlps = [self.build_mlp().cuda() for _ in range(bs)] 139 | batched_params = get_mlps_batched_params(mlps) 140 | 141 | optimizer = Adam(batched_params, lr=self.lr) 142 | 143 | for _ in progress_bar(range(self.num_steps)): 144 | selected_c_and_l = random_point_sampling( 145 | coords_and_labels, 146 | self.num_points_fitting, 147 | ) 148 | 149 | selected_coords = selected_c_and_l[:, :, :3] 150 | selected_labels = selected_c_and_l[:, :, 3] 151 | 152 | pred = mlp_batched_forward(batched_params, selected_coords) 153 | loss = F.binary_cross_entropy_with_logits(pred, selected_labels) 154 | 155 | optimizer.zero_grad() 156 | loss.backward() 157 | optimizer.step() 158 | 159 | for idx in range(bs): 160 | pcd = pcds[idx] 161 | class_id = class_ids[idx] 162 | uuid = uuids[idx] 163 | 164 | flattened_params = [p[idx].view(-1) for p in batched_params] 165 | flattened_params = torch.cat(flattened_params, dim=0) 166 | 167 | h5_path = self.out_root / split / f"{global_idx}.h5" 168 | h5_path.parent.mkdir(parents=True, exist_ok=True) 169 | 170 | with h5py.File(h5_path, "w") as f: 171 | f.create_dataset("pcd", data=pcd.detach().cpu().numpy()) 172 | f.create_dataset("params", data=flattened_params.detach().cpu().numpy()) 173 | f.create_dataset("class_id", data=class_id.detach().cpu().numpy()) 174 | f.create_dataset("uuid", data=uuid) 175 | 176 | global_idx += 1 177 | 178 | @hmain(base_cfg_dir="cfg/bases", template_cfg_file="task_mapping_network/cfg/inrs_dataset.yaml", create_out_dir=False) 179 | def create() -> None: 180 | dset_creator = InrsDatasetCreator() 181 | dset_creator.create_dataset() 182 | 183 | if __name__ == "__main__": 184 | create() -------------------------------------------------------------------------------- /task_mapping_network/inr2vec/create_point_clouds_dataset.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | from anyio import Path 4 | from hesiod import hcfg, hmain 5 | import open3d as o3d 6 | 7 | from typing import List 8 | 9 | 10 | def get_dataset_json(root:str, split: str): 11 | json_path = os.path.join(root, f'{split}.json') 12 | 13 | folders = [] 14 | 15 | with open(json_path) as file: 16 | dset = json.load(file) 17 | 18 | for nerf_path in dset: 19 | # Skip augmented data 20 | if nerf_path.endswith('_A1') or nerf_path.endswith('_A2'): 21 | continue 22 | 23 | full_path = Path(nerf_path) 24 | relative_path = os.path.join(full_path.parts[-2], full_path.parts[-1]) 25 | folders.append(relative_path) 26 | 27 | return folders 28 | 29 | @hmain( 30 | base_cfg_dir="task_mapping_network/cfg/bases", 31 | template_cfg_file="task_mapping_network/cfg/pcd_dataset.yaml", 32 | run_cfg_file=None, 33 | parse_cmd_line=False, 34 | out_dir_root="task_mapping_network/logs" 35 | ) 36 | def create_dataset(): 37 | 38 | split_json_root_path = hcfg("split_json_root_path", str) 39 | out_point_clouds_path = hcfg("out_point_clouds_path", str) 40 | mesh_root = hcfg("shapenet_root", str) 41 | 42 | 43 | splits = hcfg("splits", List[str]) 44 | 45 | 46 | for split in splits: 47 | shapes = get_dataset_json(split_json_root_path, split) 48 | for shape in shapes: 49 | 50 | mesh_class = shape.split('/')[0] 51 | mesh_id = shape.split('/')[1] 52 | 53 | mesh_path = os.path.join(mesh_root, shape, 'model.obj') 54 | 55 | 56 | mesh = o3d.io.read_triangle_mesh(mesh_path) 57 | 58 | num_points = 10000 # Adjust the number of points as needed 59 | pcd = mesh.sample_points_uniformly(number_of_points=num_points) 60 | pcd_folder = os.path.join(out_point_clouds_path, mesh_class, split) 61 | os.makedirs(pcd_folder, exist_ok=True) 62 | 63 | pcd_full_path = os.path.join(pcd_folder, f'{mesh_id}.ply') 64 | 65 | o3d.io.write_point_cloud(pcd_full_path, pcd) 66 | 67 | 68 | if __name__ == "__main__": 69 | create_dataset() 70 | 71 | 72 | 73 | 74 | -------------------------------------------------------------------------------- /task_mapping_network/inr2vec/models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CVLAB-Unibo/nf2vec/7e10220040b89404914d3a8317d39a8ac7337a8d/task_mapping_network/inr2vec/models/__init__.py -------------------------------------------------------------------------------- /task_mapping_network/inr2vec/models/encoder.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | 3 | import torch 4 | from torch import nn 5 | 6 | 7 | class Encoder(nn.Module): 8 | def __init__(self, input_dim: int, hidden_dims: List[int], embed_dim: int) -> None: 9 | super().__init__() 10 | 11 | layers = [] 12 | for idx in range(len(hidden_dims)): 13 | in_ch = input_dim if idx == 0 else hidden_dims[idx - 1] 14 | out_ch = hidden_dims[idx] 15 | layers.append(nn.Conv1d(in_ch, out_ch, 1)) 16 | layers.append(nn.BatchNorm1d(out_ch)) 17 | layers.append(nn.ReLU()) 18 | 19 | layers.append(nn.Conv1d(hidden_dims[-1], embed_dim, 1)) 20 | 21 | self.layers = nn.Sequential(*layers) 22 | self.embed_dim = embed_dim 23 | 24 | def forward(self, x: torch.Tensor) -> torch.Tensor: 25 | x_channels_first = torch.transpose(x, 2, 1) 26 | x = self.layers(x_channels_first) 27 | x, _ = torch.max(x, 2) 28 | 29 | return x -------------------------------------------------------------------------------- /task_mapping_network/inr2vec/models/idecoder.py: -------------------------------------------------------------------------------- 1 | from typing import Callable, Tuple 2 | 3 | import torch 4 | from einops import repeat 5 | from torch import Tensor, nn 6 | 7 | 8 | class CoordsEncoder: 9 | def __init__( 10 | self, 11 | input_dims: int = 3, 12 | include_input: bool = True, 13 | max_freq_log2: int = 9, 14 | num_freqs: int = 10, 15 | log_sampling: bool = True, 16 | periodic_fns: Tuple[Callable, Callable] = (torch.sin, torch.cos), 17 | ) -> None: 18 | self.input_dims = input_dims 19 | self.include_input = include_input 20 | self.max_freq_log2 = max_freq_log2 21 | self.num_freqs = num_freqs 22 | self.log_sampling = log_sampling 23 | self.periodic_fns = periodic_fns 24 | self.create_embedding_fn() 25 | 26 | def create_embedding_fn(self) -> None: 27 | embed_fns = [] 28 | d = self.input_dims 29 | out_dim = 0 30 | if self.include_input: 31 | embed_fns.append(lambda x: x) 32 | out_dim += d 33 | 34 | if self.log_sampling: 35 | freq_bands = 2.0 ** torch.linspace(0.0, self.max_freq_log2, steps=self.num_freqs) 36 | else: 37 | freq_bands = torch.linspace(2.0**0.0, 2.0**self.max_freq_log2, steps=self.num_freqs) 38 | 39 | for freq in freq_bands: 40 | for p_fn in self.periodic_fns: 41 | embed_fns.append(lambda x, p_fn=p_fn, freq=freq: p_fn(x * freq)) 42 | out_dim += d 43 | 44 | self.embed_fns = embed_fns 45 | self.out_dim = out_dim 46 | 47 | def embed(self, inputs: Tensor) -> Tensor: 48 | return torch.cat([fn(inputs) for fn in self.embed_fns], -1) 49 | 50 | 51 | class ImplicitDecoder(nn.Module): 52 | def __init__( 53 | self, 54 | embed_dim: int, 55 | in_dim: int, 56 | hidden_dim: int, 57 | num_hidden_layes_before_skip: int, 58 | num_hidden_layes_after_skip: int, 59 | out_dim: int, 60 | ) -> None: 61 | super().__init__() 62 | 63 | self.coords_enc = CoordsEncoder(in_dim) 64 | coords_dim = self.coords_enc.out_dim 65 | 66 | self.in_layer = nn.Sequential(nn.Linear(embed_dim + coords_dim, hidden_dim), nn.ReLU()) 67 | 68 | self.skip_proj = nn.Sequential(nn.Linear(embed_dim + coords_dim, hidden_dim), nn.ReLU()) 69 | 70 | before_skip = [] 71 | for _ in range(num_hidden_layes_before_skip): 72 | before_skip.append(nn.Sequential(nn.Linear(hidden_dim, hidden_dim), nn.ReLU())) 73 | self.before_skip = nn.Sequential(*before_skip) 74 | 75 | after_skip = [] 76 | for _ in range(num_hidden_layes_after_skip): 77 | after_skip.append(nn.Sequential(nn.Linear(hidden_dim, hidden_dim), nn.ReLU())) 78 | after_skip.append(nn.Linear(hidden_dim, out_dim)) 79 | self.after_skip = nn.Sequential(*after_skip) 80 | 81 | def forward(self, embeddings: Tensor, coords: Tensor) -> Tensor: 82 | # embeddings (B, D1) 83 | # coords (B, N, D2) 84 | coords = self.coords_enc.embed(coords) 85 | 86 | repeated_embeddings = repeat(embeddings, "b d -> b n d", n=coords.shape[1]) 87 | 88 | emb_and_coords = torch.cat([repeated_embeddings, coords], dim=-1) 89 | 90 | x = self.in_layer(emb_and_coords) 91 | x = self.before_skip(x) 92 | 93 | inp_proj = self.skip_proj(emb_and_coords) 94 | x = x + inp_proj 95 | 96 | x = self.after_skip(x) 97 | 98 | return x.squeeze(-1) -------------------------------------------------------------------------------- /task_mapping_network/inr2vec/models/transfer.py: -------------------------------------------------------------------------------- 1 | from torch import Tensor, nn 2 | 3 | 4 | class Transfer(nn.Module): 5 | def __init__(self, emb_dim: int, num_layers: int) -> None: 6 | super().__init__() 7 | 8 | layers = [] 9 | for i in range(num_layers): 10 | layers.append(nn.Linear(emb_dim, emb_dim)) 11 | 12 | if i != num_layers - 1: 13 | layers.append(nn.BatchNorm1d(emb_dim)) 14 | layers.append(nn.ReLU()) 15 | 16 | self.net = nn.Sequential(*layers) 17 | 18 | def forward(self, x: Tensor) -> Tensor: 19 | return self.net(x) -------------------------------------------------------------------------------- /task_mapping_network/inr2vec/utils.py: -------------------------------------------------------------------------------- 1 | import collections 2 | from typing import Any, Dict, List 3 | 4 | import torch 5 | import torch.nn.functional as F 6 | from pycarus.learning.models.siren import SIREN 7 | from torch import Tensor 8 | 9 | 10 | def get_mlps_batched_params(mlps: List[SIREN]) -> List[Tensor]: 11 | params = [] 12 | for i in range(len(mlps)): 13 | params.append(list(mlps[i].parameters())) 14 | 15 | batched_params = [] 16 | for i in range(len(params[0])): 17 | p = torch.stack([p[i] for p in params], dim=0) 18 | p = torch.clone(p.detach()) 19 | p.requires_grad = True 20 | batched_params.append(p) 21 | 22 | return batched_params 23 | 24 | 25 | def flatten_mlp_params(sd: Dict[str, Any]) -> Tensor: 26 | all_params = [] 27 | for k in sd: 28 | all_params.append(sd[k].view(-1)) 29 | all_params = torch.cat(all_params, dim=-1) 30 | return all_params 31 | 32 | 33 | def unflatten_mlp_params( 34 | params: Tensor, 35 | sample_sd: Dict[str, Any], 36 | ) -> Dict[str, Any]: 37 | sd = collections.OrderedDict() 38 | 39 | start = 0 40 | for k in sample_sd: 41 | end = start + sample_sd[k].numel() 42 | layer_params = params[start:end].view(sample_sd[k].shape) 43 | sd[k] = layer_params 44 | start = end 45 | 46 | return sd 47 | 48 | 49 | def get_mlp_params_as_matrix(flattened_params: Tensor, sd: Dict[str, Any]) -> Tensor: 50 | params_shapes = [p.shape for p in sd.values()] 51 | feat_dim = params_shapes[0][0] 52 | start = params_shapes[0].numel() + params_shapes[1].numel() 53 | end = params_shapes[-1].numel() + params_shapes[-2].numel() 54 | params = flattened_params[start:-end] 55 | return params.reshape((-1, feat_dim)) 56 | 57 | 58 | def mlp_batched_forward(batched_params: List[Tensor], coords: Tensor) -> Tensor: 59 | num_layers = len(batched_params) // 2 60 | 61 | f = coords 62 | 63 | for i in range(num_layers): 64 | weights = batched_params[i * 2] 65 | biases = batched_params[i * 2 + 1] 66 | 67 | f = torch.bmm(f, weights.permute(0, 2, 1)) + biases.unsqueeze(1) 68 | 69 | if i < num_layers - 1: 70 | f = torch.sin(30 * f) 71 | 72 | return f.squeeze(-1) 73 | 74 | 75 | def focal_loss(pred: Tensor, gt: Tensor, alpha: float = 0.1, gamma: float = 3) -> Tensor: 76 | alpha_w = torch.tensor([alpha, 1 - alpha]).cuda() 77 | 78 | bce_loss = F.binary_cross_entropy_with_logits(pred, gt.float(), reduction="none") 79 | bce_loss = bce_loss.view(-1) 80 | 81 | gt = gt.type(torch.long) 82 | at = alpha_w.gather(0, gt.view(-1)) 83 | pt = torch.exp(-bce_loss) 84 | f_loss = at * ((1 - pt) ** gamma) * bce_loss 85 | 86 | return f_loss.mean() 87 | 88 | 89 | def get_class_to_parts(dset_name: str) -> Dict[str, List[int]]: 90 | shapenet_partseg = { 91 | "02691156": [0, 1, 2, 3], 92 | "02773838": [4, 5], 93 | "02954340": [6, 7], 94 | "02958343": [8, 9, 10, 11], 95 | "03001627": [12, 13, 14, 15], 96 | "03261776": [16, 17, 18], 97 | "03467517": [19, 20, 21], 98 | "03624134": [22, 23], 99 | "03636649": [24, 25, 26, 27], 100 | "03642806": [28, 29], 101 | "03790512": [30, 31, 32, 33, 34, 35], 102 | "03797390": [36, 37], 103 | "03948459": [38, 39, 40], 104 | "04099429": [41, 42, 43], 105 | "04225987": [44, 45, 46], 106 | "04379243": [47, 48, 49], 107 | } 108 | 109 | gallery = {"shapenet-partseg": shapenet_partseg} 110 | 111 | return gallery[dset_name] --------------------------------------------------------------------------------