├── README.md
├── data
├── test.json
├── train.json
└── validation.json
├── models
├── __init__.py
├── encoder.py
├── fc_classifier.py
└── idecoder.py
├── nerf
├── __init__.py
├── intant_ngp.py
├── loader.py
├── loader_gt.py
└── utils.py
├── nerf2vec
├── __init__.py
├── config.py
├── export_embeddings.py
├── train_nerf2vec.py
└── utils.py
├── settings.py
├── task_classification
├── __init__.py
├── config.py
└── train_classifier.py
├── task_generation
├── __init__.py
├── export_embeddings.py
├── latent_3d_points
│ ├── .gitignore
│ ├── .gitmodules
│ ├── LICENSE.md
│ ├── README.md
│ ├── __init__.py
│ ├── doc
│ │ └── images
│ │ │ └── teaser.jpg
│ ├── download_data.sh
│ ├── external
│ │ ├── __init__.py
│ │ ├── python_plyfile
│ │ │ ├── .gitignore
│ │ │ ├── __init__.py
│ │ │ └── plyfile.py
│ │ └── structural_losses
│ │ │ ├── __init__.py
│ │ │ ├── approxmatch.cpp
│ │ │ ├── approxmatch.cu
│ │ │ ├── makefile
│ │ │ ├── tf_approxmatch.cpp
│ │ │ ├── tf_approxmatch.py
│ │ │ ├── tf_approxmatch_compile.sh
│ │ │ ├── tf_approxmatch_g.cu
│ │ │ ├── tf_nndistance.cpp
│ │ │ ├── tf_nndistance.py
│ │ │ ├── tf_nndistance_compile.sh
│ │ │ └── tf_nndistance_g.cu
│ ├── notebooks
│ │ ├── __init__.py
│ │ ├── compute_evaluation_metrics.ipynb
│ │ ├── train_latent_gan.ipynb
│ │ ├── train_raw_gan.ipynb
│ │ └── train_single_class_ae.ipynb
│ └── src
│ │ ├── __init__.py
│ │ ├── ae_templates.py
│ │ ├── autoencoder.py
│ │ ├── encoders_decoders.py
│ │ ├── evaluation_metrics.py
│ │ ├── gan.py
│ │ ├── general_utils.py
│ │ ├── generators_discriminators.py
│ │ ├── in_out.py
│ │ ├── latent_gan.py
│ │ ├── neural_net.py
│ │ ├── point_net_ae.py
│ │ ├── raw_gan.py
│ │ ├── tf_utils.py
│ │ ├── vanilla_gan.py
│ │ └── w_gan_gp.py
├── train_latent_gan.py
└── viz_nerf.py
├── task_interp_and_retrieval
├── interp.py
└── retrieval.py
└── task_mapping_network
├── README.md
├── cfg
├── completion.yaml
├── export_embeddings.yaml
├── inr2vec.yaml
├── inrs_dataset.yaml
└── pcd_dataset.yaml
├── export_inrs_embeddings.py
├── export_nerfs_embeddings.py
├── inits
└── in3_out1_h512_l4.pt
├── inr2vec
├── create_inrs_dataset.py
├── create_point_clouds_dataset.py
├── models
│ ├── __init__.py
│ ├── encoder.py
│ ├── idecoder.py
│ └── transfer.py
├── train_inr2vec.py
└── utils.py
├── train_completion.py
└── viz.py
/README.md:
--------------------------------------------------------------------------------
1 | # nf2vec
2 |
3 | This repository contains the code related to **nf2vec** framework, which is detailed in the paper [Deep Learning on Object-centric 3D Neural Fields](https://arxiv.org/abs/2312.13277). In particular, here you can find the code regarding processing NeRFs. If you want to use the previous version of this framework for processing shapes, refer to [inr2vec](https://github.com/CVLAB-Unibo/inr2vec).
4 |
5 |
6 | ## MACHINE CONFIGURATION
7 |
8 | Before running the code, ensure that your machine is properly configured.
9 | This project was developed with the following main dependencies:
10 | * python==3.8.18
11 | * torch==1.12.0+cu113
12 | * torchvision==0.13.0+cu113
13 | * nerfacc==0.3.5 (with the proper CUDA version set)
14 | * wandb==0.16.0
15 |
16 | ### nf2vec
17 |
18 | What follows are commands that you can execute to replicate the environment in which *nf2vec* was originally trained:
19 |
20 | 1. Install Python 3.8.18:
21 | ```bash
22 | conda install python=3.8.18
23 | ```
24 |
25 | 2. Install pip:
26 | ```bash
27 | conda install -c anaconda pip
28 | ```
29 |
30 | 3. Install PyTorch and torchvision:
31 | ```bash
32 | pip install torch==1.12.0+cu113 torchvision==0.13.0+cu113 --extra-index-url https://download.pytorch.org/whl/cu113
33 | ```
34 |
35 | 4. Install CUDA Toolkit:
36 | ```bash
37 | conda install -c "nvidia/label/cuda-11.7.1" cuda-toolkit
38 | ```
39 |
40 | 5. Install Ninja and Tiny CUDA NN:
41 | ```bash
42 | pip install ninja git+https://github.com/NVlabs/tiny-cuda-nn/#subdirectory=bindings/torch
43 | ```
44 |
45 | 6. Install NerfAcc:
46 | ```bash
47 | pip install nerfacc==0.3.5 -f https://nerfacc-bucket.s3.us-west-2.amazonaws.com/whl/torch-1.12.0_cu113.html
48 | ```
49 |
50 | 7. Install Einops:
51 | ```bash
52 | conda install -c conda-forge einops
53 | ```
54 |
55 | 8. Install ImageIO:
56 | ```bash
57 | conda install -c conda-forge imageio
58 | ```
59 |
60 | 9. Install WanDB:
61 | ```bash
62 | pip install wandb==0.16.0
63 | ```
64 | 10. Install h5py:
65 | ```bash
66 | conda install -c anaconda h5py
67 | ```
68 | 11. Install TorchMetrics:
69 | ```bash
70 | pip install torchmetrics
71 | ```
72 |
73 | ### Generation
74 | The generation task is based on a *Latent GAN* model detailed at [THIS](https://github.com/optas/latent_3d_points) link. Please, follow the instructions provided at that link to properly configure your environment.
75 |
76 | ### Mapping Network
77 | The mapping network task requires the training of the *inr2vec* framework. Please, refer to [THIS](https://github.com/CVLAB-Unibo/inr2vec?tab=readme-ov-file#setup) page to properly configure your environment.
78 |
79 | ## TRAINING AND EXPERIMENTS
80 | This section contains the details required to run the code.
81 |
82 | **IMPORTANT NOTES**:
83 | 1. each module cited below *must* be executed from the root of the project, and not within the corresponding packages. This will ensure that all the paths used can properly work.
84 |
85 | 2. the file *settings.py* contains all the paths (e.g., dataset location, model weights, etc...) and generic configurations that are used from each module explained below.
86 |
87 | 3. Some training and experiments, such as the training of the *nf2vec* framework and the classification task, use the *wandb* library. If you want to use it, then you need to change the following two variables: ``` os.environ["WANDB_SILENT"]``` and ```os.environ["WANDB_MODE"]```, which are located at the beginning of the *settings.py* module.
88 |
89 | ## Train *nf2vec*
90 |
91 | To train *nf2vec* you need to have a dataset of trained NeRFs. The implemented code expects that there exist the following files:
92 | * data/train.json
93 | * data/validation.json
94 | * data/test.json
95 |
96 | These JSONs hold a list of file paths, with each path corresponding to a NeRF model that has been trained, and then used in a specific data split. In particular, each path corresponds to a folder, and each folder contains the following relevant files:
97 | * the trained NeRF's weights
98 | * the NeRF's occupancy grid
99 | * JSON files with transform matrices and other paramters necessary to train NeRFs.
100 |
101 | The name of the files contained in these folders should not be changed. Within the repository, you can find the JSON files used to originally train the framework.
102 |
103 | Execute the following command to train *nf2vec*:
104 | ```bash
105 | python nerf2vec/train_nerf2vec.py
106 | ```
107 | If you have enabled *wandb*, then you should update its settings located in the *config_wandb* method, which is localed in the *train_nerf2vec.py* module.
108 |
109 | ## Export *nerf2vec* embeddings
110 | Execute the following command to export the *nerf2vec*'s embeddings:
111 | ```bash
112 | python nerf2vec/export_embeddings.py
113 | ```
114 | Note that these embeddings are **necessary** for other tasks, such as classification, retrieval and generation.
115 |
116 | ## Retrieval task
117 | Execute the following command to perform the retrieval task:
118 | ```bash
119 | python task_interp_and_retrieval/retrieval.py
120 | ```
121 | The results will be shown in the *task_interp_and_retrieval/retrieval_plots_X* folder, where X depends on the chosen split (i.e., train, validation or test). The split can be set in the *main* method of the *retrieval.py* module.
122 |
123 | Each file created during a specific retrieval iteration will be named using the same prefix represented by a randomly generated UUID.
124 |
125 |
126 | ## Interpolation task
127 | Execute the following command to perform the interpolation task:
128 | ```bash
129 | python task_interp_and_retrieval/interp.py
130 | ```
131 | The results will be shown in the *task_interp_and_retrieval/interp_plots_X* folder, where X depends on the chosen split (i.e., train, validation or test). The split can be set in the *main* method of the *retrieval.py* module.
132 |
133 | ## Classification task
134 | Execute the following command to perform the classification task:
135 | ```bash
136 | python task_classification/train_classifier.py
137 | ```
138 | If you have enabled *wandb*, then you should update its settings located in the *config_wandb* method, which is localed in the *train_classifier.py* module.
139 |
140 | ## Generation task
141 | In order to generate and visualize the new embeddings, it is necessary to execute some operations following a specific order.
142 |
143 | ### 1) Export embeddings
144 | The following command creates the folder *task_generation/latent_embeddings*, which will contain the *nerf2vec*'s embedding properly organized for this task.
145 | ```bash
146 | python task_generation/export_embeddings.py
147 | ```
148 |
149 | ### 2) Train GANs
150 | The following command creates the folder *task_generation/experiments*, which will contain both the weights of the trained models and the generated embeddings:
151 | ```bash
152 | python task_generation/train_latent_gan.py
153 | ```
154 | All the hyperparameters used to train the *Latent GANs* can be found inside the *train_latent_gan.py* module.
155 |
156 | ### 3) Create renderings
157 | The following command creates renderings from the embeddings generated during the previous step:
158 | ```bash
159 | python task_generation/viz_nerf.py
160 | ```
161 | The renderings will be created in the *GAN_plots_X* folder, where X is the ID of a specific class.
162 |
163 | ## Mapping network map
164 | Please refer to [THIS](task_mapping_network/README.md) README for this task.
165 |
166 | # Datasets and model weights
167 | Please contact us if you need access to the datasets, exported embeddings, and weights of the trained models used in all experiments.
168 |
--------------------------------------------------------------------------------
/models/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CVLAB-Unibo/nf2vec/7e10220040b89404914d3a8317d39a8ac7337a8d/models/__init__.py
--------------------------------------------------------------------------------
/models/encoder.py:
--------------------------------------------------------------------------------
1 | from typing import List
2 |
3 | import torch
4 | from torch import nn
5 |
6 |
7 | class Encoder(nn.Module):
8 | def __init__(self, input_dim: int, hidden_dims: List[int], embed_dim: int) -> None:
9 | super().__init__()
10 |
11 | layers = []
12 | for idx in range(len(hidden_dims)):
13 | in_ch = input_dim if idx == 0 else hidden_dims[idx - 1]
14 | out_ch = hidden_dims[idx]
15 | layers.append(nn.Conv1d(in_ch, out_ch, 1))
16 | layers.append(nn.BatchNorm1d(out_ch))
17 | layers.append(nn.ReLU())
18 |
19 | layers.append(nn.Conv1d(hidden_dims[-1], embed_dim, 1))
20 |
21 | self.layers = nn.Sequential(*layers)
22 | self.embed_dim = embed_dim
23 |
24 | def forward(self, x: torch.Tensor) -> torch.Tensor:
25 | x_channels_first = torch.transpose(x, 2, 1)
26 | x = self.layers(x_channels_first)
27 | x, _ = torch.max(x, 2)
28 |
29 | return x
30 |
--------------------------------------------------------------------------------
/models/fc_classifier.py:
--------------------------------------------------------------------------------
1 | from typing import List
2 |
3 | from torch import Tensor, nn
4 |
5 |
6 | class FcClassifier(nn.Module):
7 | def __init__(self, layers_dim: List[int], num_classes: int) -> None:
8 | super().__init__()
9 |
10 | layers = []
11 | if len(layers_dim) > 1:
12 | for i in range(len(layers_dim) - 1):
13 | layers.append(nn.Linear(layers_dim[i], layers_dim[i + 1]))
14 | layers.append(nn.BatchNorm1d(layers_dim[i + 1]))
15 | layers.append(nn.ReLU())
16 | layers.append(nn.Dropout())
17 | layers.append(nn.Linear(layers_dim[-1], num_classes))
18 |
19 | self.net = nn.Sequential(*layers)
20 |
21 | def forward(self, x: Tensor) -> Tensor:
22 | return self.net(x)
23 |
--------------------------------------------------------------------------------
/models/idecoder.py:
--------------------------------------------------------------------------------
1 | from typing import Callable, Tuple, List, Union
2 |
3 | import torch
4 | from einops import repeat
5 | from torch import Tensor, nn
6 | import tinycudann as tcnn
7 |
8 | from nerf.intant_ngp import _TruncExp
9 |
10 | class CoordsEncoder:
11 | def __init__(
12 | self,
13 | encoding_conf: dict,
14 | input_dims: int = 3
15 | ) -> None:
16 | self.input_dims = input_dims
17 |
18 | self.coords_enc = tcnn.Encoding(input_dims, encoding_conf, seed=999)
19 | self.out_dim = self.coords_enc.n_output_dims
20 |
21 | def apply_encoding(self, x):
22 | return self.coords_enc(x)
23 |
24 | def embed(self, inputs: Tensor) -> Tensor:
25 | # return torch.cat([fn(inputs) for fn in self.embed_fns], -1)
26 | result_encoding = self.apply_encoding(inputs.view(-1, 3))
27 | result_encoding = result_encoding.view(inputs.size()[0],inputs.size()[1],-1)
28 | return result_encoding
29 |
30 | class ImplicitDecoder(nn.Module):
31 | def __init__(
32 | self,
33 | embed_dim: int,
34 | in_dim: int,
35 | hidden_dim: int,
36 | num_hidden_layers_before_skip: int,
37 | num_hidden_layers_after_skip: int,
38 | out_dim: int,
39 | encoding_conf: dict, # Added for NerfAcc
40 | aabb: Union[torch.Tensor, List[float]] # Added for NerfAcc
41 | ) -> None:
42 | super().__init__()
43 |
44 | self.coords_enc = CoordsEncoder(encoding_conf=encoding_conf, input_dims=in_dim)
45 | coords_dim = self.coords_enc.out_dim
46 |
47 | # ################################################################################
48 | # Added for NerfAcc
49 | # ################################################################################
50 | trunc_exp = _TruncExp.apply
51 | self.density_activation = lambda x: trunc_exp(x - 1)
52 | self.aabb = aabb
53 | self.in_dim = in_dim
54 | # ################################################################################
55 |
56 | self.in_layer = nn.Sequential(nn.Linear(embed_dim + coords_dim, hidden_dim), nn.ReLU())
57 |
58 | self.skip_proj = nn.Sequential(nn.Linear(embed_dim + coords_dim, hidden_dim), nn.ReLU())
59 |
60 | before_skip = []
61 | for _ in range(num_hidden_layers_before_skip):
62 | before_skip.append(nn.Sequential(nn.Linear(hidden_dim, hidden_dim), nn.ReLU()))
63 | self.before_skip = nn.Sequential(*before_skip)
64 |
65 | after_skip = []
66 | for _ in range(num_hidden_layers_after_skip):
67 | after_skip.append(nn.Sequential(nn.Linear(hidden_dim, hidden_dim), nn.ReLU()))
68 | after_skip.append(nn.Linear(hidden_dim, out_dim))
69 | self.after_skip = nn.Sequential(*after_skip)
70 |
71 | def forward(self, embeddings: Tensor, coords: Tensor) -> Tensor:
72 |
73 |
74 | # Sometimes the ray march algorithm calls the model with an input with 0 length.
75 | # The CutlassMLP crashes in these cases, therefore this fix has been applied.
76 | batch_size, n_coords, _ = coords.size()
77 | if n_coords == 0:
78 | rgb = torch.zeros([batch_size, 0, 3], device=coords.device)
79 | density = torch.zeros([batch_size, 0, 1], device=coords.device)
80 | return rgb, density
81 |
82 | # ################################################################################
83 | # Added for NerfAcc
84 | # ################################################################################
85 | aabb_min, aabb_max = torch.split(self.aabb, self.in_dim, dim=-1)
86 | coords = (coords - aabb_min) / (aabb_max - aabb_min)
87 | selector = ((coords > 0.0) & (coords < 1.0)).all(dim=-1)
88 | # ################################################################################
89 |
90 | coords = self.coords_enc.embed(coords)
91 |
92 | repeated_embeddings = repeat(embeddings, "b d -> b n d", n=coords.shape[1])
93 |
94 | emb_and_coords = torch.cat([repeated_embeddings, coords], dim=-1)
95 |
96 | x = self.in_layer(emb_and_coords)
97 | x = self.before_skip(x)
98 |
99 | inp_proj = self.skip_proj(emb_and_coords)
100 | x = x + inp_proj
101 |
102 | x = self.after_skip(x)
103 | # return x.squeeze(-1) # ORIGINAL INR2VEC IMPLEMENTATION
104 |
105 | # ################################################################################
106 | # Added for NerfAcc
107 | # ################################################################################
108 | rgb, density_before_activation = x[..., :3], x[..., 3]
109 | density_before_activation = density_before_activation[:, :, None]
110 |
111 | # Be sure that the density is non-negative
112 | density = (
113 | self.density_activation(density_before_activation)
114 | * selector[..., None]
115 | )
116 |
117 | rgb = torch.nn.Sigmoid()(rgb)
118 |
119 | return rgb, density
120 | # ################################################################################
121 |
122 |
--------------------------------------------------------------------------------
/nerf/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CVLAB-Unibo/nf2vec/7e10220040b89404914d3a8317d39a8ac7337a8d/nerf/__init__.py
--------------------------------------------------------------------------------
/nerf/intant_ngp.py:
--------------------------------------------------------------------------------
1 | """
2 | Copyright (c) 2022 Ruilong Li, UC Berkeley.
3 | """
4 |
5 | from typing import Callable, List, Union
6 |
7 | import torch
8 | from torch.autograd import Function
9 | from torch.cuda.amp import custom_bwd, custom_fwd
10 |
11 | try:
12 | import tinycudann as tcnn
13 | except ImportError as e:
14 | print(
15 | f"Error: {e}! "
16 | "Please install tinycudann by: "
17 | "pip install git+https://github.com/NVlabs/tiny-cuda-nn/#subdirectory=bindings/torch"
18 | )
19 | exit()
20 |
21 |
22 | class _TruncExp(Function): # pylint: disable=abstract-method
23 | # Implementation from torch-ngp:
24 | # https://github.com/ashawkey/torch-ngp/blob/93b08a0d4ec1cc6e69d85df7f0acdfb99603b628/activation.py
25 | @staticmethod
26 | @custom_fwd(cast_inputs=torch.float32)
27 | def forward(ctx, x): # pylint: disable=arguments-differ
28 | ctx.save_for_backward(x)
29 | return torch.exp(x)
30 |
31 | @staticmethod
32 | @custom_bwd
33 | def backward(ctx, g): # pylint: disable=arguments-differ
34 | x = ctx.saved_tensors[0]
35 | return g * torch.exp(torch.clamp(x, max=15))
36 |
37 |
38 | trunc_exp = _TruncExp.apply
39 |
40 |
41 | def contract_to_unisphere(
42 | x: torch.Tensor,
43 | aabb: torch.Tensor,
44 | eps: float = 1e-6,
45 | derivative: bool = False,
46 | ):
47 | aabb_min, aabb_max = torch.split(aabb, 3, dim=-1)
48 | x = (x - aabb_min) / (aabb_max - aabb_min)
49 | x = x * 2 - 1 # aabb is at [-1, 1]
50 | mag = x.norm(dim=-1, keepdim=True)
51 | mask = mag.squeeze(-1) > 1
52 |
53 | if derivative:
54 | dev = (2 * mag - 1) / mag**2 + 2 * x**2 * (
55 | 1 / mag**3 - (2 * mag - 1) / mag**4
56 | )
57 | dev[~mask] = 1.0
58 | dev = torch.clamp(dev, min=eps)
59 | return dev
60 | else:
61 | x[mask] = (2 - 1 / mag[mask]) * (x[mask] / mag[mask])
62 | x = x / 4 + 0.5 # [-inf, inf] is at [0, 1]
63 | return x
64 |
65 |
66 | class NGPradianceField(torch.nn.Module):
67 | """Instance-NGP radiance Field"""
68 |
69 | def __init__(
70 | self,
71 | aabb: Union[torch.Tensor, List[float]],
72 | num_dim: int = 3,
73 | use_viewdirs: bool = False,
74 | density_activation: Callable = lambda x: trunc_exp(x - 1),
75 | unbounded: bool = False,
76 | geo_feat_dim: int = 3,
77 | encoding='Frequency',
78 | mlp='CutlassMLP',
79 | activation='Sine',
80 | n_hidden_layers=4,
81 | n_neurons=256,
82 | encoding_size=24
83 | ) -> None:
84 | super().__init__()
85 | if not isinstance(aabb, torch.Tensor):
86 | aabb = torch.tensor(aabb, dtype=torch.float32,)
87 | # NERF2VEC: Added persisten=False
88 | self.register_buffer("aabb", aabb, persistent=False)
89 | self.num_dim = num_dim
90 | self.use_viewdirs = use_viewdirs
91 | self.density_activation = density_activation
92 | self.unbounded = unbounded
93 |
94 | self.geo_feat_dim = geo_feat_dim if use_viewdirs else 0
95 |
96 | if self.use_viewdirs:
97 | single_mlp_encoding_config = {
98 | "otype": "Composite",
99 | "nested": [
100 | # POSITION ENCODING
101 | {
102 | "n_dims_to_encode": 3,
103 | "otype": "Frequency",
104 | "n_frequencies": 6,
105 |
106 | },
107 | # DIRECTION ENCODING
108 | {
109 | "n_dims_to_encode": 3,
110 | "otype": "SphericalHarmonics",
111 | "degree": 1, # Determines the output's dimension, which is degree^2
112 | },
113 | # {"otype": "Identity", "n_bins": 4, "degree": 4},
114 | ]
115 | }
116 | else:
117 | if encoding == 'Frequency':
118 | single_mlp_encoding_config = {
119 | "otype": "Frequency",
120 | "n_frequencies": encoding_size
121 | }
122 | else:
123 | single_mlp_encoding_config = {
124 | "otype": "Identity"
125 | }
126 |
127 | # print(f'*'*40)
128 | # print(f'Initializing model: \n- mlp: {mlp} - {n_hidden_layers} hidden layers - {n_neurons} neurons\n- activation: {activation.upper()}\n- encoding: {encoding.upper()} - size: {encoding_size}')
129 | # print(f'*'*40)
130 | self.mlp_base = tcnn.NetworkWithInputEncoding(
131 | seed=999,
132 | n_input_dims=self.num_dim+self.geo_feat_dim,
133 | n_output_dims=4,
134 | encoding_config=single_mlp_encoding_config,
135 | network_config={
136 | "otype": mlp, # FullyFusedMLP, CutlassMLP
137 | "activation": activation,
138 | "output_activation": "None",
139 | "n_neurons": n_neurons,
140 | "n_hidden_layers": n_hidden_layers
141 | },
142 | )
143 |
144 | def query_density(self, x, return_feat: bool = False):
145 | if self.unbounded:
146 | x = contract_to_unisphere(x, self.aabb)
147 | else:
148 | aabb_min, aabb_max = torch.split(self.aabb, self.num_dim, dim=-1)
149 | x = (x - aabb_min) / (aabb_max - aabb_min)
150 | selector = ((x > 0.0) & (x < 1.0)).all(dim=-1)
151 | x = (
152 | # This view actually seems to do nothing
153 | self.mlp_base(x.view(-1, self.num_dim))
154 | # change the shape of the tensor to [all dimension of x but last, 1 + the feature dimension]
155 | .view(list(x.shape[:-1]) + [1 + self.geo_feat_dim])
156 | .to(x) # Same dtype as x (the input)
157 | )
158 |
159 | density_before_activation, base_mlp_out = torch.split(
160 | x, [1, self.geo_feat_dim], dim=-1
161 | )
162 | density = (
163 | self.density_activation(density_before_activation)
164 | * selector[..., None]
165 | )
166 | if return_feat:
167 | return density, base_mlp_out
168 | else:
169 | return density
170 |
171 | def _query_rgb(self, dir, embedding):
172 | # tcnn requires directions in the range [0, 1]
173 | if self.use_viewdirs:
174 | dir = (dir + 1.0) / 2.0
175 | d = self.direction_encoding(dir.view(-1, dir.shape[-1]))
176 |
177 | # Concatenation of the DENSITIY MLP and the encoded view direction
178 | h = torch.cat([d, embedding.view(-1, self.geo_feat_dim)], dim=-1)
179 | else:
180 | h = embedding.view(-1, self.geo_feat_dim)
181 | rgb = (
182 | self.mlp_head(h)
183 | .view(list(embedding.shape[:-1]) + [3])
184 | .to(embedding)
185 | )
186 | return rgb
187 |
188 | def _query_density_and_rgb(self, x, dir=None):
189 |
190 | if self.unbounded:
191 | x = contract_to_unisphere(x, self.aabb)
192 | else:
193 | aabb_min, aabb_max = torch.split(self.aabb, self.num_dim, dim=-1)
194 | x = (x - aabb_min) / (aabb_max - aabb_min)
195 | selector = ((x > 0.0) & (x < 1.0)).all(dim=-1)
196 |
197 | if self.use_viewdirs:
198 | if dir is not None:
199 | dir = (dir + 1.0) / 2.0
200 | # d = self.direction_encoding(dir.view(-1, dir.shape[-1]))
201 |
202 | x = torch.cat([x, dir], dim=-1)
203 | else:
204 | # if self.random_tensor == None:
205 | # random = torch.ones(x.shape[0], self.geo_feat_dim, device=x.device).to(x)
206 | # all ones or zeros are detrimental for the loss. It is much better a random tensor.
207 | # random = self.random_tensor.repeat(x.shape[0], 1).to(device=x.device)
208 | random = torch.rand(
209 | x.shape[0], self.geo_feat_dim, device=x.device)
210 | # random = torch.zeros(x.shape[0], self.geo_feat_dim, device=x.device).to(x)
211 | x = torch.cat([x, random], dim=-1)
212 |
213 | # Sometimes the ray march algorithm calls the model with an input with 0 length.
214 | # The CutlassMLP crashes in these cases, therefore this fix has been applied.
215 | if len(x) == 0:
216 | rgb = torch.zeros([0, 3], device=x.device)
217 | density = torch.zeros([0, 1], device=x.device)
218 | return rgb, density
219 |
220 | out = (
221 | # self.mlp_base(x.view(-1, self.num_dim)) # This view actually seems to do nothing
222 | # This view actually seems to do nothing
223 | self.mlp_base(x.view(-1, self.num_dim+self.geo_feat_dim))
224 | # change the shape of the tensor to [all dimension of x but last, 1 + the feature dimension]
225 | # .view(list(x.shape[:-1]) + [1 + self.geo_feat_dim])
226 | .to(x) # Same dtype as x (the input)
227 | )
228 |
229 | rgb, density_before_activation = out[..., :3], out[..., 3]
230 | density_before_activation = density_before_activation[:, None]
231 |
232 | # Be sure that the density is non-negative
233 | density = (
234 | self.density_activation(density_before_activation)
235 | * selector[..., None]
236 | )
237 |
238 | rgb = torch.nn.Sigmoid()(rgb)
239 |
240 | return rgb, density
241 |
242 | def forward(
243 | self,
244 | positions: torch.Tensor,
245 | directions: torch.Tensor = None,
246 | ):
247 | """
248 | if self.use_viewdirs and (directions is not None):
249 | assert (
250 | positions.shape == directions.shape
251 | ), f"{positions.shape} v.s. {directions.shape}"
252 |
253 | # density, embedding = self.query_density(positions, return_feat=True)
254 |
255 | # rgb = self._query_rgb(directions, embedding=embedding)
256 | """
257 |
258 | rgb, density = self._query_density_and_rgb(positions, directions)
259 |
260 | # print(f'rgb.shape: {rgb.shape}')
261 | # print(f'density.shape: {density.shape}')
262 |
263 | return rgb, density
264 |
--------------------------------------------------------------------------------
/nerf/loader.py:
--------------------------------------------------------------------------------
1 | """
2 | The NeRFLoder class inherits from Dataset, but it's not used as a Dataset in the training loop. This because the current
3 | implementation was inherited from the original NerfAcc implementation. In the future, it could be useful to remove this dependency.
4 | """
5 | import json
6 | import os
7 | import torch
8 | import concurrent
9 | import numpy as np
10 | import imageio.v2 as imageio
11 | import torch.nn.functional as F
12 |
13 | from nerf.utils import Rays
14 |
15 |
16 | def read_image(file_path):
17 | return imageio.imread(file_path)
18 |
19 |
20 | def _load_renderings(data_dir: str, split: str):
21 |
22 | with open(
23 | os.path.join(data_dir, "transforms_{}.json".format(split)), "r"
24 | ) as fp:
25 | meta = json.load(fp)
26 | images = []
27 | camtoworlds = []
28 |
29 | file_paths = []
30 | for i in range(len(meta["frames"])):
31 | frame = meta["frames"][i]
32 | fname = os.path.join(data_dir, frame["file_path"] + ".png")
33 |
34 | file_paths.append(fname)
35 |
36 | camtoworlds.append(frame["transform_matrix"])
37 |
38 | with concurrent.futures.ThreadPoolExecutor() as executor:
39 | results = executor.map(read_image, file_paths)
40 | images = list(results)
41 |
42 |
43 | images = np.stack(images, axis=0)
44 | camtoworlds = np.stack(camtoworlds, axis=0)
45 |
46 | h, w = images.shape[1:3]
47 | camera_angle_x = float(meta["camera_angle_x"])
48 | focal = 0.5 * w / np.tan(0.5 * camera_angle_x)
49 |
50 | return images, camtoworlds, focal
51 |
52 |
53 | class NeRFLoader(torch.utils.data.Dataset):
54 |
55 | WIDTH, HEIGHT = 224, 224
56 | NEAR, FAR = 2.0, 6.0
57 | OPENGL_CAMERA = True
58 |
59 | def __init__(
60 | self,
61 | data_dir: str,
62 | split: str = "train",
63 | color_bkgd_aug: str = "random",
64 | num_rays: int = None,
65 | near: float = None,
66 | far: float = None,
67 | device: str = "cuda:0",
68 | weights_file_name: str = "nerf_weights.pth",
69 | training: bool = True,
70 | images_RAM = None
71 | ):
72 | super().__init__()
73 | assert color_bkgd_aug in ["white", "black", "random"]
74 | self.num_rays = num_rays
75 | self.near = self.NEAR if near is None else near
76 | self.far = self.FAR if far is None else far
77 |
78 | self.training = training
79 |
80 | self.images_RAM = images_RAM
81 |
82 | self.color_bkgd_aug = color_bkgd_aug
83 |
84 | self.weights_file_path = os.path.join(data_dir, weights_file_name)
85 |
86 | self.images, self.camtoworlds, self.focal = _load_renderings(#_from_RAM(
87 | data_dir, split#, self.images_RAM
88 | )
89 | self.images = torch.from_numpy(self.images).to(device).to(torch.uint8)
90 | self.camtoworlds = (
91 | torch.from_numpy(self.camtoworlds).to(device).to(torch.float32)
92 | )
93 | self.K = torch.tensor(
94 | [
95 | [self.focal, 0, self.WIDTH / 2.0],
96 | [0, self.focal, self.HEIGHT / 2.0],
97 | [0, 0, 1],
98 | ],
99 | dtype=torch.float32,
100 | device=device,
101 | ) # (3, 3)
102 |
103 | assert self.images.shape[1:3] == (self.HEIGHT, self.WIDTH)
104 |
105 | def __len__(self):
106 | return len(self.images)
107 |
108 | @torch.no_grad()
109 | def __getitem__(self, index):
110 | data = self.fetch_data(index)
111 | data = self.preprocess(data)
112 | return data
113 |
114 | def preprocess(self, data):
115 | """Process the fetched / cached data with randomness."""
116 | rgba, rays = data["rgba"], data["rays"]
117 | pixels, alpha = torch.split(rgba, [3, 1], dim=-1)
118 |
119 | if self.training:
120 | if self.color_bkgd_aug == "random":
121 | color_bkgd = torch.rand(3, device=self.images.device)
122 | elif self.color_bkgd_aug == "white":
123 | color_bkgd = torch.ones(3, device=self.images.device)
124 | elif self.color_bkgd_aug == "black":
125 | color_bkgd = torch.zeros(3, device=self.images.device)
126 | else:
127 | color_bkgd = torch.zeros(3, device=self.images.device)
128 |
129 | pixels = pixels * alpha + color_bkgd * (1.0 - alpha)
130 | return {
131 | "pixels": pixels, # [n_rays, 3] or [h, w, 3]
132 | "rays": rays, # [n_rays,] or [h, w]
133 | "color_bkgd": color_bkgd, # [3,]
134 | **{k: v for k, v in data.items() if k not in ["rgba", "rays"]},
135 | }
136 |
137 | def update_num_rays(self, num_rays):
138 | self.num_rays = num_rays
139 |
140 | def fetch_data(self, index):
141 | """Fetch the data (it maybe cached for multiple batches)."""
142 |
143 | num_rays = self.num_rays
144 |
145 | if self.training:
146 | image_id = torch.randint(
147 | 0,
148 | len(self.images),
149 | size=(num_rays,),
150 | device=self.images.device,
151 | )
152 |
153 | x = torch.randint(
154 | 0, self.WIDTH, size=(num_rays,), device=self.images.device
155 | )
156 | y = torch.randint(
157 | 0, self.HEIGHT, size=(num_rays,), device=self.images.device
158 | )
159 | else:
160 | image_id = [index]
161 | x, y = torch.meshgrid(
162 | torch.arange(self.WIDTH, device=self.images.device),
163 | torch.arange(self.HEIGHT, device=self.images.device),
164 | indexing="xy",
165 | )
166 | x = x.flatten()
167 | y = y.flatten()
168 |
169 | # generate rays
170 | rgba = self.images[image_id, y, x] / 255.0 # (num_rays, 4)
171 | c2w = self.camtoworlds[image_id] # (num_rays, 3, 4)
172 |
173 | camera_dirs = F.pad(
174 | torch.stack(
175 | [
176 | (x - self.K[0, 2] + 0.5) / self.K[0, 0],
177 | (y - self.K[1, 2] + 0.5)
178 | / self.K[1, 1]
179 | * (-1.0 if self.OPENGL_CAMERA else 1.0),
180 | ],
181 | dim=-1,
182 | ),
183 | (0, 1),
184 | value=(-1.0 if self.OPENGL_CAMERA else 1.0),
185 | ) # [num_rays, 3]
186 |
187 | directions = (camera_dirs[:, None, :] * c2w[:, :3, :3]).sum(dim=-1)
188 | origins = torch.broadcast_to(c2w[:, :3, -1], directions.shape)
189 | viewdirs = directions / torch.linalg.norm(
190 | directions, dim=-1, keepdims=True
191 | )
192 |
193 | if self.training:
194 | origins = torch.reshape(origins, (num_rays, 3))
195 | viewdirs = torch.reshape(viewdirs, (num_rays, 3))
196 | rgba = torch.reshape(rgba, (num_rays, 4))
197 | else:
198 | origins = torch.reshape(origins, (self.HEIGHT, self.WIDTH, 3))
199 | viewdirs = torch.reshape(viewdirs, (self.HEIGHT, self.WIDTH, 3))
200 | rgba = torch.reshape(rgba, (self.HEIGHT, self.WIDTH, 4))
201 |
202 | rays = Rays(origins=origins, viewdirs=viewdirs)
203 |
204 | return {
205 | "rgba": rgba, # [h, w, 4]
206 | "rays": rays, # [h, w, 3]
207 | }
208 |
--------------------------------------------------------------------------------
/nerf/loader_gt.py:
--------------------------------------------------------------------------------
1 | """
2 | The NeRFLoderGT class inherits from Dataset, but it's not used as a Dataset in the training loop. This because the current
3 | implementation was inherited from the original NerfAcc implementation. In the future, it could be useful to remove this dependency.
4 | """
5 | import json
6 | import os
7 |
8 | import imageio.v2 as imageio
9 | import numpy as np
10 | import torch
11 | import torch.nn.functional as F
12 |
13 | from nerf.utils import Rays
14 |
15 | def _load_renderings(data_dir: str, split: str, h: int, w: int):
16 |
17 | with open(
18 | os.path.join(data_dir, "transforms_{}_compressed.json".format(split)), "r"
19 | ) as fp:
20 | meta = json.load(fp)
21 |
22 | camtoworlds = []
23 | for i in range(len(meta["frames"])):
24 | frame = meta["frames"][i]
25 | # fname = os.path.join(data_dir, frame["file_path"] + ".png")
26 | camtoworlds.append(frame["transform_matrix"])
27 |
28 | camtoworlds = np.stack(camtoworlds, axis=0)
29 |
30 | camera_angle_x = float(meta["camera_angle_x"])
31 | focal = 0.5 * w / np.tan(0.5 * camera_angle_x)
32 |
33 | return camtoworlds, focal
34 |
35 |
36 | class NeRFLoaderGT(torch.utils.data.Dataset):
37 |
38 | WIDTH, HEIGHT = 224, 224
39 | NEAR, FAR = 2.0, 6.0
40 | OPENGL_CAMERA = True
41 |
42 | def __init__(
43 | self,
44 | data_dir: str,
45 | split: str = "train",
46 | color_bkgd_aug: str = "random",
47 | num_rays: int = None,
48 | near: float = None,
49 | far: float = None,
50 | device: str = "cuda:0",
51 | weights_file_name: str = "nerf_weights.pth",
52 | training: bool = True
53 | ):
54 | super().__init__()
55 | assert color_bkgd_aug in ["white", "black", "random"]
56 | self.num_rays = num_rays
57 | self.near = self.NEAR if near is None else near
58 | self.far = self.FAR if far is None else far
59 |
60 | self.training = training
61 |
62 | self.device = device
63 |
64 | self.color_bkgd_aug = color_bkgd_aug
65 |
66 | self.weights_file_path = os.path.join(data_dir, weights_file_name)
67 |
68 | self.camtoworlds, self.focal = _load_renderings(#_from_RAM(
69 | data_dir, split, self.HEIGHT, self.WIDTH
70 | )
71 | self.camtoworlds = (
72 | torch.from_numpy(self.camtoworlds).to(self.device).to(torch.float32)
73 | )
74 | self.K = torch.tensor(
75 | [
76 | [self.focal, 0, self.WIDTH / 2.0],
77 | [0, self.focal, self.HEIGHT / 2.0],
78 | [0, 0, 1],
79 | ],
80 | dtype=torch.float32,
81 | device=device,
82 | ) # (3, 3)
83 |
84 | def __len__(self):
85 | return len(self.camtoworlds)
86 |
87 | @torch.no_grad()
88 | def __getitem__(self, index):
89 | data = self.fetch_data(index)
90 | data = self.preprocess(data)
91 | return data
92 |
93 | def preprocess(self, data):
94 | """Process the fetched / cached data with randomness."""
95 | rays = data["rays"]
96 | # pixels, alpha = torch.split(rgba, [3, 1], dim=-1)
97 |
98 | if self.training:
99 | if self.color_bkgd_aug == "random":
100 | color_bkgd = torch.rand(3, device=self.device)
101 | elif self.color_bkgd_aug == "white":
102 | color_bkgd = torch.ones(3, device=self.device)
103 | elif self.color_bkgd_aug == "black":
104 | color_bkgd = torch.zeros(3, device=self.device)
105 | else:
106 | color_bkgd = torch.zeros(3, device=self.device)
107 |
108 | # pixels = pixels * alpha + color_bkgd * (1.0 - alpha)
109 | return {
110 | "rays": rays, # [n_rays,] or [h, w]
111 | "color_bkgd": color_bkgd, # [3,]
112 | **{k: v for k, v in data.items() if k not in ["rgba", "rays"]},
113 | }
114 |
115 | def update_num_rays(self, num_rays):
116 | self.num_rays = num_rays
117 |
118 | def fetch_data(self, index):
119 | """Fetch the data (it maybe cached for multiple batches)."""
120 |
121 | num_rays = self.num_rays
122 |
123 | if self.training:
124 | camtoworld_id = torch.randint(
125 | 0,
126 | len(self.camtoworlds),
127 | size=(num_rays,),
128 | device=self.device,
129 | )
130 |
131 | x = torch.randint(
132 | 0, self.WIDTH, size=(num_rays,), device=self.device
133 | )
134 | y = torch.randint(
135 | 0, self.HEIGHT, size=(num_rays,), device=self.device
136 | )
137 | else:
138 | camtoworld_id = [index]
139 | x, y = torch.meshgrid(
140 | torch.arange(self.WIDTH, device=self.device),
141 | torch.arange(self.HEIGHT, device=self.device),
142 | indexing="xy",
143 | )
144 | x = x.flatten()
145 | y = y.flatten()
146 |
147 | # generate rays
148 | c2w = self.camtoworlds[camtoworld_id] # (num_rays, 3, 4)
149 |
150 | camera_dirs = F.pad(
151 | torch.stack(
152 | [
153 | (x - self.K[0, 2] + 0.5) / self.K[0, 0],
154 | (y - self.K[1, 2] + 0.5)
155 | / self.K[1, 1]
156 | * (-1.0 if self.OPENGL_CAMERA else 1.0),
157 | ],
158 | dim=-1,
159 | ),
160 | (0, 1),
161 | value=(-1.0 if self.OPENGL_CAMERA else 1.0),
162 | ) # [num_rays, 3]
163 |
164 | directions = (camera_dirs[:, None, :] * c2w[:, :3, :3]).sum(dim=-1)
165 | origins = torch.broadcast_to(c2w[:, :3, -1], directions.shape)
166 | viewdirs = directions / torch.linalg.norm(
167 | directions, dim=-1, keepdims=True
168 | )
169 |
170 | if self.training:
171 | origins = torch.reshape(origins, (num_rays, 3))
172 | viewdirs = torch.reshape(viewdirs, (num_rays, 3))
173 | else:
174 | origins = torch.reshape(origins, (self.HEIGHT, self.WIDTH, 3))
175 | viewdirs = torch.reshape(viewdirs, (self.HEIGHT, self.WIDTH, 3))
176 |
177 | rays = Rays(origins=origins, viewdirs=viewdirs)
178 |
179 | return {
180 | "rays": rays, # [h, w, 3]
181 | }
182 |
--------------------------------------------------------------------------------
/nerf2vec/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CVLAB-Unibo/nf2vec/7e10220040b89404914d3a8317d39a8ac7337a8d/nerf2vec/__init__.py
--------------------------------------------------------------------------------
/nerf2vec/config.py:
--------------------------------------------------------------------------------
1 | """
2 | # ####################
3 | # NERF2VEC
4 | # ####################
5 | """
6 | #
7 | # DIMENSIONS
8 | #
9 | ENCODER_EMBEDDING_DIM = 1024
10 | ENCODER_HIDDEN_DIM = [512, 512, 1024, 1024]
11 |
12 |
13 | DECODER_INPUT_DIM = 3
14 | DECODER_HIDDEN_DIM = 1024
15 | DECODER_NUM_HIDDEN_LAYERS_BEFORE_SKIP = 2
16 | DECODER_NUM_HIDDEN_LAYERS_AFTER_SKIP = 2
17 | DECODER_OUT_DIM = 4
18 |
19 | #
20 | # TRAIN
21 | #
22 | NUM_EPOCHS = 501
23 | BATCH_SIZE = 16
24 | LR = 1e-4
25 | WD = 1e-2
26 | BG_WEIGHT = 0.2
27 | FG_WEIGHT = 1 - BG_WEIGHT
28 |
29 | """
30 | # ####################
31 | # NERFACC
32 | # ####################
33 | """
34 | #
35 | # GRID
36 | #
37 | import os
38 | try:
39 | from nerfacc import ContractionType
40 | GRID_CONTRACTION_TYPE = ContractionType.AABB
41 | except ImportError:
42 | pass
43 | GRID_AABB = [-0.7, -0.7, -0.7, 0.7, 0.7, 0.7]
44 | GRID_RESOLUTION = 96
45 | GRID_CONFIG_N_SAMPLES = 1024
46 |
47 | GRID_RECONSTRUCTION_TOTAL_ITERATIONS = 20
48 | GRID_RECONSTRUCTION_WARMUP_ITERATIONS = 5
49 | GRID_NUMBER_OF_CELLS = 884736 # (884736 if resolution == 96, 2097152 if resolution == 128)
50 | GRID_BACKGROUND_CELLS_TO_SAMPLE = 32000
51 |
52 | #
53 | # RAYS
54 | #
55 | NUM_RAYS = 55000
56 | MAX_FOREGROUND_COORDINATES = 25000
57 | MAX_BACKGROUND_COORDINATES = 10000
58 |
59 | #
60 | # INSTANT-NGP
61 | #
62 | MLP_INPUT_SIZE = 3
63 | MLP_ENCODING_SIZE = 24
64 | MLP_INPUT_SIZE_AFTER_ENCODING = MLP_INPUT_SIZE * MLP_ENCODING_SIZE * 2
65 | MLP_OUTPUT_SIZE = 4
66 | MLP_HIDDEN_LAYERS = 3
67 | MLP_UNITS = 64
68 |
69 | INSTANT_NGP_MLP_CONF = {
70 | 'aabb': GRID_AABB,
71 | 'unbounded':False,
72 | 'encoding':'Frequency',
73 | 'mlp':'FullyFusedMLP',
74 | 'activation':'ReLU',
75 | 'n_hidden_layers':MLP_HIDDEN_LAYERS,
76 | 'n_neurons':MLP_UNITS,
77 | 'encoding_size':MLP_ENCODING_SIZE
78 | }
79 |
80 | INSTANT_NGP_ENCODING_CONF = {
81 | "otype": "Frequency",
82 | "n_frequencies": 24
83 | }
84 |
85 | NERF_WEIGHTS_FILE_NAME = 'nerf_weights.pth'
86 |
87 | #
88 | # TINY-CUDA
89 | #
90 | TINY_CUDA_MIN_SIZE = 16
91 |
92 | """
93 | # ####################
94 | # LOGGING
95 | # ####################
96 | """
97 | WANDB_CONFIG = {
98 | 'ENCODER_EMBEDDING_DIM': ENCODER_EMBEDDING_DIM,
99 | 'ENCODER_HIDDEN_DIM': ENCODER_HIDDEN_DIM,
100 | 'DECODER_INPUT_DIM': DECODER_INPUT_DIM,
101 | 'DECODER_HIDDEN_DIM': DECODER_HIDDEN_DIM,
102 | 'DECODER_NUM_HIDDEN_LAYERS_BEFORE_SKIP': DECODER_NUM_HIDDEN_LAYERS_BEFORE_SKIP,
103 | 'DECODER_NUM_HIDDEN_LAYERS_AFTER_SKIP': DECODER_NUM_HIDDEN_LAYERS_AFTER_SKIP,
104 | 'DECODER_OUT_DIM': DECODER_OUT_DIM,
105 | 'NUM_EPOCHS': NUM_EPOCHS,
106 | 'BATCH_SIZE': BATCH_SIZE,
107 | 'LR': LR,
108 | 'WD': WD,
109 | "NUM_RAYS": NUM_RAYS,
110 | "GRID_RESOLUTION": GRID_RESOLUTION
111 | }
112 |
113 |
114 | """
115 | # ####################
116 | # DATASET
117 | # ####################
118 | """
119 | TRAIN_SPLIT = 'train'
120 | VAL_SPLIT = 'val'
121 | TEST_SPLIT = 'test'
122 |
123 |
124 | LABELS_TO_IDS = {
125 | "02691156": 0, # airplane
126 | "02828884": 1, # bench
127 | "02933112": 2, # cabinet
128 | "02958343": 3, # car
129 | "03001627": 4, # chair
130 | "03211117": 5, # display
131 | "03636649": 6, # lamp
132 | "03691459": 7, # speaker
133 | "04090263": 8, # rifle
134 | "04256520": 9, # sofa
135 | "04379243": 10, # table
136 | "04401088": 11, # phone
137 | "04530566": 12 # watercraft
138 | }
139 |
140 | # TODO: COMMENT THESE!
141 | #'02992529': 4, tablet delete?
142 | #"03948459": 9, gun delete?
143 |
144 | NUM_CLASSES = len(LABELS_TO_IDS)
145 |
--------------------------------------------------------------------------------
/nerf2vec/export_embeddings.py:
--------------------------------------------------------------------------------
1 | import os
2 | import sys
3 | script_dir = os.path.dirname(os.path.abspath(__file__))
4 | parent_dir = os.path.dirname(script_dir)
5 | sys.path.append(parent_dir)
6 |
7 | import json
8 | import h5py
9 | import torch
10 | import settings
11 |
12 | from pathlib import Path
13 | from typing import Tuple
14 | from torch import Tensor
15 | from models.encoder import Encoder
16 | from nerf2vec import config as nerf2vec_config
17 | from torch.utils.data import DataLoader, Dataset
18 | from nerf2vec.utils import get_class_label, get_mlp_params_as_matrix
19 |
20 | class InrDataset(Dataset):
21 | def __init__(self, split_json: str, device: str, nerf_weights_file_name: str) -> None:
22 | super().__init__()
23 |
24 | with open(split_json) as file:
25 | self.nerf_paths = json.load(file)
26 |
27 | # self.nerf_paths = self._get_nerf_paths('data\\data_TRAINED')
28 | assert isinstance(self.nerf_paths, list), 'The json file provided is not a list.'
29 |
30 | self.device = device
31 | self.nerf_weights_file_name = nerf_weights_file_name
32 |
33 | def __len__(self) -> int:
34 | return len(self.nerf_paths)
35 |
36 | def __getitem__(self, index: int) -> Tuple[Tensor, Tensor, Tensor]:
37 |
38 | data_dir = self.nerf_paths[index]
39 | weights_file_path = os.path.join(data_dir, self.nerf_weights_file_name)
40 |
41 | class_label = get_class_label(weights_file_path)
42 | class_id = nerf2vec_config.LABELS_TO_IDS[get_class_label(weights_file_path)] if class_label != -1 else class_label
43 |
44 | matrix = torch.load(weights_file_path, map_location=torch.device(self.device))
45 | matrix = get_mlp_params_as_matrix(matrix['mlp_base.params'])
46 |
47 | return matrix, class_id, data_dir
48 |
49 | def load_nerf2vec_checkpoint():
50 | ckpts_path = Path(settings.NERF2VEC_CKPTS_PATH)
51 | ckpt_paths = [p for p in ckpts_path.glob("*.pt") if "best" not in p.name]
52 | error_msg = "Expected only one ckpt apart from best, found none or too many."
53 | assert len(ckpt_paths) == 1, error_msg
54 | ckpt_path = ckpt_paths[0]
55 | print(f'loading path: {ckpt_path}')
56 | ckpt = torch.load(ckpt_path)
57 |
58 | return ckpt
59 |
60 |
61 | def export_embeddings(device = 'cuda:0'):
62 |
63 | train_dset_json = os.path.abspath(os.path.join('data', 'train.json'))
64 | train_dset = InrDataset(train_dset_json, device='cpu', nerf_weights_file_name=nerf2vec_config.NERF_WEIGHTS_FILE_NAME)
65 | train_loader = DataLoader(train_dset, batch_size=1, num_workers=0, shuffle=False)
66 |
67 | val_dset_json = os.path.abspath(os.path.join('data', 'validation.json'))
68 | val_dset = InrDataset(val_dset_json, device='cpu', nerf_weights_file_name=nerf2vec_config.NERF_WEIGHTS_FILE_NAME)
69 | val_loader = DataLoader(val_dset, batch_size=1, num_workers=0, shuffle=False)
70 |
71 | test_dset_json = os.path.abspath(os.path.join('data', 'test.json'))
72 | test_dset = InrDataset(test_dset_json, device='cpu', nerf_weights_file_name=nerf2vec_config.NERF_WEIGHTS_FILE_NAME)
73 | test_loader = DataLoader(test_dset, batch_size=1, num_workers=0, shuffle=False)
74 |
75 | encoder = Encoder(
76 | nerf2vec_config.MLP_UNITS,
77 | nerf2vec_config.ENCODER_HIDDEN_DIM,
78 | nerf2vec_config.ENCODER_EMBEDDING_DIM
79 | )
80 | encoder = encoder.to(device)
81 | ckpt = load_nerf2vec_checkpoint()
82 | encoder.load_state_dict(ckpt["encoder"])
83 | encoder.eval()
84 |
85 | loaders = [train_loader, val_loader, test_loader]
86 | splits = [nerf2vec_config.TRAIN_SPLIT, nerf2vec_config.VAL_SPLIT, nerf2vec_config.TEST_SPLIT]
87 |
88 |
89 | for loader, split in zip(loaders, splits):
90 | idx = 0
91 |
92 | for batch in loader:
93 | matrices, class_ids, data_dirs = batch
94 | matrices = matrices.cuda()
95 |
96 | with torch.no_grad():
97 | embeddings = encoder(matrices)
98 |
99 | out_root = Path(settings.NERF2VEC_EMBEDDINGS_DIR)
100 | h5_path = out_root / Path(f"{split}") / f"{idx}.h5"
101 | h5_path.parent.mkdir(parents=True, exist_ok=True)
102 |
103 | with h5py.File(h5_path, "w") as f:
104 | # print(f'dir: {data_dirs[0]}, class: {class_ids[0]}')
105 | f.create_dataset("data_dir", data=data_dirs[0])
106 | f.create_dataset("embedding", data=embeddings[0].detach().cpu().numpy())
107 | f.create_dataset("class_id", data=class_ids[0].detach().cpu().numpy())
108 |
109 | idx += 1
110 |
111 | if idx % 5000 == 0:
112 | print(f'Created {idx} embeddings for {split} split')
113 |
114 | def main() -> None:
115 | export_embeddings(device=settings.DEVICE_NAME)
116 |
117 | if __name__ == "__main__":
118 | main()
--------------------------------------------------------------------------------
/nerf2vec/utils.py:
--------------------------------------------------------------------------------
1 |
2 | import os
3 | import gzip
4 | import torch
5 | import shutil
6 | import numpy as np
7 |
8 | from collections import OrderedDict
9 | from typing import Any, Dict
10 | from torch import Tensor
11 |
12 | from nerf2vec import config as nerf2vec_config
13 | from nerf.utils import Rays
14 |
15 | import torch.nn.functional as F
16 |
17 | def next_multiple(val, divisor):
18 | """
19 | Implementation ported directly from TinyCuda implementation
20 | See https://github.com/NVlabs/tiny-cuda-nn/blob/master/include/tiny-cuda-nn/common.h#L300
21 | """
22 | return next_pot(div_round_up(val, divisor) * divisor)
23 |
24 |
25 | def div_round_up(val, divisor):
26 | return next_pot((val + divisor - 1) / divisor)
27 |
28 |
29 | def next_pot(v):
30 | v=int(v)
31 | v-=1
32 | v | v >> 1
33 | v | v >> 2
34 | v | v >> 4
35 | v | v >> 8
36 | v | v >> 16
37 | return v+1
38 |
39 |
40 | def next_multiple_2(val, divisor):
41 | """
42 | Additional implementation added for testing purposes
43 | """
44 | return ((val - 1) | (divisor -1)) + 1
45 |
46 |
47 | def get_mlp_params_as_matrix(flattened_params: Tensor, sd: Dict[str, Any] = None) -> Tensor:
48 |
49 | if sd is None:
50 | sd = get_mlp_sample_sd()
51 |
52 | params_shapes = [p.shape for p in sd.values()]
53 | feat_dim = params_shapes[0][0]
54 |
55 | padding_size = (feat_dim-params_shapes[-1][0]) * params_shapes[-1][1]
56 | padding_tensor = torch.zeros(padding_size)
57 | params = torch.cat((flattened_params, padding_tensor), dim=0)
58 |
59 | return params.reshape((-1, feat_dim))
60 |
61 |
62 | def get_mlp_sample_sd():
63 | sample_sd = OrderedDict()
64 | sample_sd['input'] = torch.zeros(nerf2vec_config.MLP_UNITS, next_multiple(nerf2vec_config.MLP_INPUT_SIZE_AFTER_ENCODING, nerf2vec_config.TINY_CUDA_MIN_SIZE))
65 | for i in range(nerf2vec_config.MLP_HIDDEN_LAYERS):
66 | sample_sd[f'hid_{i}'] = torch.zeros(nerf2vec_config.MLP_UNITS, nerf2vec_config.MLP_UNITS)
67 | sample_sd['output'] = torch.zeros(next_multiple(nerf2vec_config.MLP_OUTPUT_SIZE, nerf2vec_config.TINY_CUDA_MIN_SIZE), nerf2vec_config.MLP_UNITS)
68 |
69 | return sample_sd
70 |
71 |
72 | def get_grid_file_name(file_path):
73 | # Split the path into individual directories
74 | directories = os.path.normpath(file_path).split(os.sep)
75 | # Get the last two directories
76 | last_two_dirs = directories[-2:]
77 | # Join the last two directories with an underscore
78 | file_name = '_'.join(last_two_dirs) + '.pth'
79 | return file_name
80 |
81 |
82 | def get_class_label(file_path):
83 | directories = os.path.normpath(file_path).split(os.sep)
84 | class_label = directories[-3]
85 |
86 | return class_label
87 |
88 |
89 | def get_class_label_from_nerf_root_path(file_path):
90 | directories = os.path.normpath(file_path).split(os.sep)
91 | class_label = directories[-2]
92 |
93 | return class_label
94 |
95 |
96 | def get_nerf_name_from_grid(file_path):
97 | grid_name = os.path.basename(file_path)
98 | nerf_name = os.path.splitext(grid_name)[0]
99 | return nerf_name
100 |
101 |
102 | def unzip_file(file_path, extract_dir, file_name):
103 | with gzip.open(os.path.join(file_path, 'grid.pth.gz'), 'rb') as f_in:
104 | output_path = os.path.join(extract_dir, file_name)
105 | with open(output_path, 'wb') as f_out:
106 | shutil.copyfileobj(f_in, f_out)
107 |
108 |
109 | # ################################################################################
110 | # CAMERA POSE MATRIX GENERATION METHODS
111 | # ################################################################################
112 | def get_translation_t(t):
113 | """Get the translation matrix for movement in t."""
114 | matrix = [
115 | [1, 0, 0, 0],
116 | [0, 1, 0, 0],
117 | [0, 0, 1, t],
118 | [0, 0, 0, 1],
119 | ]
120 |
121 | return torch.tensor(matrix, dtype=torch.float32)
122 |
123 |
124 | def get_rotation_phi(phi):
125 | """Get the rotation matrix for movement in phi."""
126 | matrix = [
127 | [1, 0, 0, 0],
128 | [0, torch.cos(phi), -torch.sin(phi), 0],
129 | [0, torch.sin(phi), torch.cos(phi), 0],
130 | [0, 0, 0, 1],
131 | ]
132 | return torch.tensor(matrix, dtype=torch.float32)
133 |
134 |
135 | def get_rotation_theta(theta):
136 | """Get the rotation matrix for movement in theta."""
137 | matrix = [
138 | [torch.cos(theta), 0, -torch.sin(theta), 0],
139 | [0, 1, 0, 0],
140 | [torch.sin(theta), 0, torch.cos(theta), 0],
141 | [0, 0, 0, 1],
142 | ]
143 | return torch.tensor(matrix, dtype=torch.float32)
144 |
145 |
146 | def pose_spherical(theta, phi, t):
147 | """
148 | Get the camera to world matrix for the corresponding theta, phi
149 | and t.
150 | """
151 | c2w = get_translation_t(t)
152 | c2w = get_rotation_phi(phi / 180.0 * np.pi) @ c2w
153 | c2w = get_rotation_theta(theta / 180.0 * np.pi) @ c2w
154 | c2w = torch.from_numpy(np.array([[-1, 0, 0, 0], [0, 0, 1, 0], [0, 1, 0, 0], [
155 | 0, 0, 0, 1]], dtype=np.float32)) @ c2w
156 | return c2w
157 |
158 | # ################################################################################
159 | # RAYS GENERATION
160 | # ################################################################################
161 | def get_rays(
162 | device,
163 | camera_angle_x=0.8575560450553894, # Parameter taken from traned NeRFs
164 | width=224,
165 | height=224):
166 |
167 | # Get camera pose
168 | theta = torch.tensor(90.0, device=device) # The horizontal camera position (change the value between and 360 to make a full cycle around the object)
169 | phi = torch.tensor(-30.0, device=device) # The vertical camera position
170 | t = torch.tensor(1.5, device=device) # camera distance from object
171 | c2w = pose_spherical(theta, phi, t)
172 | c2w = c2w.to(device)
173 |
174 | # Compute the focal_length
175 | focal_length = 0.5 * width / np.tan(0.5 * camera_angle_x)
176 |
177 | rays = generate_rays(device, width, height, focal_length, c2w)
178 |
179 | return rays
180 |
181 | def generate_rays(device, width, height, focal, c2w, OPENGL_CAMERA=True):
182 | x, y = torch.meshgrid(
183 | torch.arange(width, device=device),
184 | torch.arange(height, device=device),
185 | indexing="xy",
186 | )
187 | x = x.flatten()
188 | y = y.flatten()
189 |
190 | K = torch.tensor(
191 | [
192 | [focal, 0, width / 2.0],
193 | [0, focal, height / 2.0],
194 | [0, 0, 1],
195 | ],
196 | dtype=torch.float32,
197 | device=device,
198 | ) # (3, 3)
199 |
200 | camera_dirs = F.pad(
201 | torch.stack(
202 | [
203 | (x - K[0, 2] + 0.5) / K[0, 0],
204 | (y - K[1, 2] + 0.5)
205 | / K[1, 1]
206 | * (-1.0 if OPENGL_CAMERA else 1.0),
207 | ],
208 | dim=-1,
209 | ),
210 | (0, 1),
211 | value=(-1.0 if OPENGL_CAMERA else 1.0),
212 | ) # [num_rays, 3]
213 | camera_dirs.to(device)
214 |
215 | directions = (camera_dirs[:, None, :] * c2w[:3, :3]).sum(dim=-1)
216 | origins = torch.broadcast_to(c2w[:3, -1], directions.shape)
217 | viewdirs = directions / torch.linalg.norm(
218 | directions, dim=-1, keepdims=True
219 | )
220 |
221 | origins = torch.reshape(origins, (height, width, 3))#.unsqueeze(0)
222 | viewdirs = torch.reshape(viewdirs, (height, width, 3))#.unsqueeze(0)
223 |
224 | rays = Rays(origins=origins, viewdirs=viewdirs)
225 |
226 | return rays
227 |
228 | def get_latest_checkpoints_path(ckpts_path):
229 | ckpt_paths = [p for p in ckpts_path.glob("*.pt") if "best" not in p.name]
230 | error_msg = "Expected only one ckpt apart from best, found none or too many."
231 | assert len(ckpt_paths) == 1, error_msg
232 |
233 | ckpt_path = ckpt_paths[0]
234 |
235 | return ckpt_path
--------------------------------------------------------------------------------
/settings.py:
--------------------------------------------------------------------------------
1 | import os
2 |
3 | os.environ["WANDB_SILENT"] = "true"
4 | os.environ["WANDB_MODE"] = "disabled"
5 |
6 | cuda_idx = 0
7 | DEVICE_NAME = 'cuda:%s' % cuda_idx # Keep compatibility with older code
8 |
9 | try:
10 | import torch
11 | torch.cuda.set_device(cuda_idx)
12 | print('set cuda device to %s' % cuda_idx)
13 | except ImportError:
14 | print('torch not installed, cannot set cuda device')
15 | pass
16 |
17 | """
18 | # ##################################################
19 | # PATHS USED BY DIFFERENT MODULES
20 | # ##################################################
21 | """
22 |
23 | # DATASET
24 | TRAIN_DSET_JSON = os.path.abspath(os.path.join('data', 'train.json'))
25 | VAL_DSET_JSON = os.path.abspath(os.path.join('data', 'validation.json'))
26 | TEST_DSET_JSON = os.path.abspath(os.path.join('data', 'test.json'))
27 |
28 | # NERF2VEC
29 | NERF2VEC_CKPTS_PATH = os.path.join('nerf2vec', 'train', 'ckpts')
30 | NERF2VEC_ALL_CKPTS_PATH = os.path.join('nerf2vec', 'train', 'all_ckpts')
31 | NERF2VEC_EMBEDDINGS_DIR = os.path.join('nerf2vec', 'embeddings')
32 |
33 | # CLASSIFICATION
34 | CLASSIFICATION_OUTPUT_DIR = os.path.join('task_classification', 'train')
35 |
36 | # GENERATION
37 | GENERATION_EMBEDDING_DIR = os.path.join('task_generation', 'latent_embeddings')
38 | GENERATION_OUT_DIR = os.path.join('task_generation', 'experiments', '{}') # The placeholder will contain the class index
39 | GENERATION_NERF2VEC_FULL_CKPT_PATH = os.path.join('task_classification', 'train', 'ckpts', '499.pt')
40 | GENERATION_LATENT_GAN_FULL_CKPT_PATH = os.path.join('task_generation', 'experiments', 'nerf2vec_{}', 'generated_embeddings', 'epoch_2000.npz') # The placeholder will contain the class index
41 |
42 |
43 |
--------------------------------------------------------------------------------
/task_classification/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CVLAB-Unibo/nf2vec/7e10220040b89404914d3a8317d39a8ac7337a8d/task_classification/__init__.py
--------------------------------------------------------------------------------
/task_classification/config.py:
--------------------------------------------------------------------------------
1 | TRAIN_BS = 256
2 | VAL_BS = 256
3 | LAYERS_DIM = [1024, 512, 256]
4 |
5 | LR = 1e-4
6 | WD = 1e-2
7 | NUM_EPOCHS = 150
8 |
9 |
10 | # Add any attributes that you want to be logged by wandb
11 | WANDB_CONFIG = {
12 |
13 | }
14 |
--------------------------------------------------------------------------------
/task_generation/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CVLAB-Unibo/nf2vec/7e10220040b89404914d3a8317d39a8ac7337a8d/task_generation/__init__.py
--------------------------------------------------------------------------------
/task_generation/export_embeddings.py:
--------------------------------------------------------------------------------
1 | import os
2 | import sys
3 | script_dir = os.path.dirname(os.path.abspath(__file__))
4 | parent_dir = os.path.dirname(script_dir)
5 | sys.path.append(parent_dir)
6 |
7 | from nerf2vec import config as nerf2vec_config
8 |
9 | from pathlib import Path
10 |
11 | import h5py
12 | import numpy as np
13 | import settings
14 |
15 |
16 | def export_embeddings() -> None:
17 |
18 | split = 'train'
19 | nerf_embeddings_root = Path(settings.NERF2VEC_EMBEDDINGS_DIR) / split
20 | out_root = Path(settings.GENERATION_EMBEDDING_DIR)
21 | out_root.mkdir(parents=True, exist_ok=True)
22 |
23 | num_classes = nerf2vec_config.NUM_CLASSES
24 |
25 | embeddings_paths = list(nerf_embeddings_root.glob("*.h5"))
26 |
27 | embeddings = {}
28 | for cls in range(num_classes):
29 | embeddings[cls] = []
30 |
31 | print('Extracting embeddings...')
32 | for idx, path in enumerate(embeddings_paths):
33 | with h5py.File(path, "r") as f:
34 | embedding = np.array(f.get("embedding"))
35 | class_id = np.array(f.get("class_id")).item()
36 | embeddings[class_id].append(embedding)
37 |
38 | if idx % 5000 == 0:
39 | print(f'\t {idx}/{len(embeddings_paths)}')
40 |
41 | for class_id in range(num_classes):
42 | print(f'Processing class: {class_id}')
43 | if class_id == 2:
44 | print()
45 | path_out = out_root / f"embeddings_{class_id}.npz"
46 | stacked_embeddings = np.stack(embeddings[class_id])
47 | np.savez_compressed(path_out, embeddings=stacked_embeddings)
48 |
49 | def main() -> None:
50 | export_embeddings()
51 |
52 | if __name__ == "__main__":
53 | main()
--------------------------------------------------------------------------------
/task_generation/latent_3d_points/.gitignore:
--------------------------------------------------------------------------------
1 | .project
2 | .ipynb_checkpoints
3 | .DS_Store
4 | .pydevproject
5 | *.pyc
6 | *.nfs*
7 | data/*
8 | external/structural_losses/*.o
9 | external/structural_losses/*.so
10 |
--------------------------------------------------------------------------------
/task_generation/latent_3d_points/.gitmodules:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CVLAB-Unibo/nf2vec/7e10220040b89404914d3a8317d39a8ac7337a8d/task_generation/latent_3d_points/.gitmodules
--------------------------------------------------------------------------------
/task_generation/latent_3d_points/LICENSE.md:
--------------------------------------------------------------------------------
1 | Learning Representations And Generative Models For 3D Point Clouds
2 |
3 | Copyright (c) 2017, Geometric Computation Group of Stanford University
4 |
5 | The MIT License (MIT)
6 |
7 | Copyright (c) 2017 Panos Achlioptas
8 |
9 | Permission is hereby granted, free of charge, to any person obtaining a copy
10 | of this software and associated documentation files (the "Software"), to deal
11 | in the Software without restriction, including without limitation the rights
12 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
13 | copies of the Software, and to permit persons to whom the Software is
14 | furnished to do so, subject to the following conditions:
15 |
16 | The above copyright notice and this permission notice shall be included in all
17 | copies or substantial portions of the Software.
18 |
19 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
22 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
24 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
25 | SOFTWARE.
26 |
--------------------------------------------------------------------------------
/task_generation/latent_3d_points/README.md:
--------------------------------------------------------------------------------
1 | # Learning Representations and Generative Models For 3D Point Clouds
2 | Created by Panos Achlioptas, Olga Diamanti, Ioannis Mitliagkas, Leonidas J. Guibas.
3 |
4 | 
5 |
6 |
7 | ## Introduction
8 | This work is based on our [arXiv tech report](https://arxiv.org/abs/1707.02392). We proposed a novel deep net architecture for auto-encoding point clouds. The learned representations were amenable to semantic part editting, shape analogies, linear classification and shape interpolations.
9 |
10 |
11 |
12 | ## Citation
13 | If you find our work useful in your research, please consider citing:
14 |
15 | @article{achlioptas2017latent_pc,
16 | title={Learning Representations and Generative Models For 3D Point Clouds},
17 | author={Achlioptas, Panos and Diamanti, Olga and Mitliagkas, Ioannis and Guibas, Leonidas J},
18 | journal={arXiv preprint arXiv:1707.02392},
19 | year={2017}
20 | }
21 |
22 |
23 | ## Dependencies
24 | Requirements:
25 | - Python 2.7+ with Numpy, Scipy and Matplotlib
26 | - [Tensorflow (version 1.0+)](https://www.tensorflow.org/get_started/os_setup)
27 | - [TFLearn](http://tflearn.org/installation)
28 |
29 | Our code has been tested with Python 2.7, TensorFlow 1.3.0, TFLearn 0.3.2, CUDA 8.0 and cuDNN 6.0 on Ubuntu 14.04.
30 |
31 |
32 | ## Installation
33 | Download the source code from the git repository:
34 | ```
35 | git clone https://github.com/optas/latent_3d_points
36 | ```
37 |
38 | To be able to train your own model you need first to _compile_ the EMD/Chamfer losses. In latent_3d_points/external/structural_losses we have included the cuda implementations of [Fan et. al](https://github.com/fanhqme/PointSetGeneration).
39 | ```
40 | cd latent_3d_points/external
41 |
42 | with your editor modify the first three lines of the makefile to point to
43 | your nvcc, cudalib and tensorflow library.
44 |
45 | make
46 | ```
47 |
48 | ### Data Set
49 | We provide ~57K point-clouds, each sampled from a mesh model of
50 | ShapeNetCore
51 | with (area) uniform sampling. To download them (1.4GB):
52 | ```
53 | cd latent_3d_points/
54 | ./download_data.sh
55 | ```
56 | The point-clouds will be stored in latent_3d_points/data/shape_net_core_uniform_samples_2048
57 |
58 | Use the function snc_category_to_synth_id, defined in src/in_out/, to map a class name such as "chair" to its synthetic_id: "03001627". Point-clouds of models of the same class are stored under a commonly named folder.
59 |
60 |
61 | ### Usage
62 | To train a point-cloud AE look at:
63 |
64 | latent_3d_points/notebooks/train_single_class_ae.ipynb
65 |
66 | To train a latent-GAN based on a pre-trained AE look at:
67 |
68 | latent_3d_points/notebooks/train_latent_gan.ipynb
69 |
70 | To train a raw-GAN:
71 |
72 | latent_3d_points/notebooks/train_raw_gan.ipynb
73 |
74 | To use the evaluation metrics (MMD, Coverage, JSD) between two point-cloud sets look at:
75 |
76 | latent_3d_points/notebooks/compute_evaluation_metrics.ipynb
77 |
78 |
79 |
80 | ## License
81 | This project is licensed under the terms of the MIT license (see LICENSE.md for details).
82 |
--------------------------------------------------------------------------------
/task_generation/latent_3d_points/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CVLAB-Unibo/nf2vec/7e10220040b89404914d3a8317d39a8ac7337a8d/task_generation/latent_3d_points/__init__.py
--------------------------------------------------------------------------------
/task_generation/latent_3d_points/doc/images/teaser.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CVLAB-Unibo/nf2vec/7e10220040b89404914d3a8317d39a8ac7337a8d/task_generation/latent_3d_points/doc/images/teaser.jpg
--------------------------------------------------------------------------------
/task_generation/latent_3d_points/download_data.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | # For each Mesh model of Shape-Net-Core download 1 point-cloud with 2048 points
3 | # sampled uniformly at random (around 1.4GB).
4 | wget https://www.dropbox.com/s/vmsdrae6x5xws1v/shape_net_core_uniform_samples_2048.zip?dl=0
5 | mv shape_net_core_uniform_samples_2048.zip\?dl\=0 shape_net_core_uniform_samples_2048.zip
6 | unzip shape_net_core_uniform_samples_2048.zip
7 | rm shape_net_core_uniform_samples_2048.zip
8 | mkdir -p data
9 | mv shape_net_core_uniform_samples_2048 data
10 |
--------------------------------------------------------------------------------
/task_generation/latent_3d_points/external/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CVLAB-Unibo/nf2vec/7e10220040b89404914d3a8317d39a8ac7337a8d/task_generation/latent_3d_points/external/__init__.py
--------------------------------------------------------------------------------
/task_generation/latent_3d_points/external/python_plyfile/.gitignore:
--------------------------------------------------------------------------------
1 | *~
2 | *.pyc
3 | *.swp
4 | *.egg-info
5 | plyfile-venv/
6 | build/
7 | dist/
8 | .tox
9 | .cache
10 |
--------------------------------------------------------------------------------
/task_generation/latent_3d_points/external/python_plyfile/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CVLAB-Unibo/nf2vec/7e10220040b89404914d3a8317d39a8ac7337a8d/task_generation/latent_3d_points/external/python_plyfile/__init__.py
--------------------------------------------------------------------------------
/task_generation/latent_3d_points/external/structural_losses/__init__.py:
--------------------------------------------------------------------------------
1 | try:
2 | from tf_nndistance import nn_distance
3 | from tf_approxmatch import approx_match, match_cost
4 | except:
5 | print('External Losses (Chamfer-EMD) were not loaded.')
6 |
--------------------------------------------------------------------------------
/task_generation/latent_3d_points/external/structural_losses/approxmatch.cpp:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 | #include
4 | #include
5 | #include
6 | #include
7 | #include
8 | using namespace std;
9 | float randomf(){
10 | return (rand()+0.5)/(RAND_MAX+1.0);
11 | }
12 | static double get_time(){
13 | timespec tp;
14 | clock_gettime(CLOCK_MONOTONIC,&tp);
15 | return tp.tv_sec+tp.tv_nsec*1e-9;
16 | }
17 | void approxmatch_cpu(int b,int n,int m,float * xyz1,float * xyz2,float * match){
18 | for (int i=0;i saturatedl(n,double(factorl)),saturatedr(m,double(factorr));
22 | vector weight(n*m);
23 | for (int j=0;j=-2;j--){
26 | //printf("i=%d j=%d\n",i,j);
27 | double level=-powf(4.0,j);
28 | if (j==-2)
29 | level=0;
30 | for (int k=0;k ss(m,1e-9);
42 | for (int k=0;k ss2(m,0);
59 | for (int k=0;k1){
154 | printf("bad i=%d j=%d k=%d u=%f\n",i,j,k,u);
155 | }
156 | s+=u;
157 | }
158 | if (s<0.999 || s>1.001){
159 | printf("bad i=%d j=%d s=%f\n",i,j,s);
160 | }
161 | }
162 | for (int j=0;j4.001){
168 | printf("bad i=%d j=%d s=%f\n",i,j,s);
169 | }
170 | }
171 | }*/
172 | /*for (int j=0;j1e-3)
222 | if (fabs(double(match[i*n*m+k*n+j]-match_cpu[i*n*m+j*m+k]))>1e-2){
223 | printf("i %d j %d k %d m %f %f\n",i,j,k,match[i*n*m+k*n+j],match_cpu[i*n*m+j*m+k]);
224 | flag=false;
225 | break;
226 | }
227 | //emax=max(emax,fabs(double(match[i*n*m+k*n+j]-match_cpu[i*n*m+j*m+k])));
228 | emax+=fabs(double(match[i*n*m+k*n+j]-match_cpu[i*n*m+j*m+k]));
229 | }
230 | }
231 | printf("emax_match=%f\n",emax/2/n/m);
232 | emax=0;
233 | for (int i=0;i<2;i++)
234 | emax+=fabs(double(cost[i]-cost_cpu[i]));
235 | printf("emax_cost=%f\n",emax/2);
236 | emax=0;
237 | for (int i=0;i<2*m*3;i++)
238 | emax+=fabs(double(grad[i]-grad_cpu[i]));
239 | //for (int i=0;i<3*m;i++){
240 | //if (grad[i]!=0)
241 | //printf("i %d %f %f\n",i,grad[i],grad_cpu[i]);
242 | //}
243 | printf("emax_grad=%f\n",emax/(2*m*3));
244 |
245 | cudaFree(xyz1_g);
246 | cudaFree(xyz2_g);
247 | cudaFree(match_g);
248 | cudaFree(cost_g);
249 | cudaFree(grad_g);
250 |
251 | return 0;
252 | }
253 |
254 |
--------------------------------------------------------------------------------
/task_generation/latent_3d_points/external/structural_losses/approxmatch.cu:
--------------------------------------------------------------------------------
1 | //n<=4096, m<=1024
2 | __global__ void approxmatch(int b,int n,int m,const float * __restrict__ xyz1,const float * __restrict__ xyz2,float * __restrict__ match){
3 | const int MaxN=4096,MaxM=1024;
4 | __shared__ float remainL[MaxN],remainR[MaxM],ratioR[MaxM],ratioL[MaxN];
5 | __shared__ int listR[MaxM],lc;
6 | float multiL,multiR;
7 | if (n>=m){
8 | multiL=1;
9 | multiR=n/m;
10 | }else{
11 | multiL=m/n;
12 | multiR=1;
13 | }
14 | for (int i=blockIdx.x;i=-2;j--){
23 | float level=-powf(4.0f,j);
24 | if (j==-2){
25 | level=0;
26 | }
27 | if (threadIdx.x==0){
28 | lc=0;
29 | for (int k=0;k0)
31 | listR[lc++]=k;
32 | }
33 | __syncthreads();
34 | int _lc=lc;
35 | for (int k=threadIdx.x;k>>(b,n,m,xyz1,xyz2,match);
94 | }
95 | __global__ void matchcost(int b,int n,int m,const float * __restrict__ xyz1,const float * __restrict__ xyz2,const float * __restrict__ match,float * __restrict__ out){
96 | __shared__ float allsum[512];
97 | const int Block=256;
98 | __shared__ float buf[Block*3];
99 | for (int i=blockIdx.x;i>>(b,n,m,xyz1,xyz2,match,out);
138 | }
139 | __global__ void matchcostgrad(int b,int n,int m,const float * __restrict__ xyz1,const float * __restrict__ xyz2,const float * __restrict__ match,float * grad2){
140 | __shared__ float sum_grad[256*3];
141 | for (int i=blockIdx.x;i>>(b,n,m,xyz1,xyz2,match,grad2);
182 | }
183 |
184 |
--------------------------------------------------------------------------------
/task_generation/latent_3d_points/external/structural_losses/makefile:
--------------------------------------------------------------------------------
1 | nvcc = /usr/local/cuda-8.0/bin/nvcc
2 | cudalib = /usr/local/cuda-8.0/lib64
3 | tensorflow = /orions4-zfs/projects/optas/Virt_Env/tf_1.3/lib/python2.7/site-packages/tensorflow/include
4 |
5 | all: tf_approxmatch_so.so tf_approxmatch_g.cu.o tf_nndistance_so.so tf_nndistance_g.cu.o
6 |
7 |
8 | tf_approxmatch_so.so: tf_approxmatch_g.cu.o tf_approxmatch.cpp
9 | g++ -std=c++11 tf_approxmatch.cpp tf_approxmatch_g.cu.o -o tf_approxmatch_so.so -shared -fPIC -I $(tensorflow) -lcudart -L $(cudalib) -O2 -D_GLIBCXX_USE_CXX11_ABI=0
10 |
11 |
12 | tf_approxmatch_g.cu.o: tf_approxmatch_g.cu
13 | $(nvcc) -D_GLIBCXX_USE_CXX11_ABI=0 -std=c++11 -c -o tf_approxmatch_g.cu.o tf_approxmatch_g.cu -I $(tensorflow) -DGOOGLE_CUDA=1 -x cu -Xcompiler -fPIC -O2
14 |
15 |
16 | tf_nndistance_so.so: tf_nndistance_g.cu.o tf_nndistance.cpp
17 | g++ -std=c++11 tf_nndistance.cpp tf_nndistance_g.cu.o -o tf_nndistance_so.so -shared -fPIC -I $(tensorflow) -lcudart -L $(cudalib) -O2 -D_GLIBCXX_USE_CXX11_ABI=0
18 |
19 |
20 | tf_nndistance_g.cu.o: tf_nndistance_g.cu
21 | $(nvcc) -D_GLIBCXX_USE_CXX11_ABI=0 -std=c++11 -c -o tf_nndistance_g.cu.o tf_nndistance_g.cu -I $(tensorflow) -DGOOGLE_CUDA=1 -x cu -Xcompiler -fPIC -O2
22 |
23 |
24 | clean:
25 | rm tf_approxmatch_so.so
26 | rm tf_nndistance_so.so
27 | rm *.cu.o
28 |
--------------------------------------------------------------------------------
/task_generation/latent_3d_points/external/structural_losses/tf_approxmatch.py:
--------------------------------------------------------------------------------
1 | import tensorflow as tf
2 | from tensorflow.python.framework import ops
3 | import os.path as osp
4 |
5 | base_dir = osp.dirname(osp.abspath(__file__))
6 |
7 | approxmatch_module = tf.load_op_library(osp.join(base_dir, 'tf_approxmatch_so.so'))
8 |
9 |
10 | def approx_match(xyz1,xyz2):
11 | '''
12 | input:
13 | xyz1 : batch_size * #dataset_points * 3
14 | xyz2 : batch_size * #query_points * 3
15 | returns:
16 | match : batch_size * #query_points * #dataset_points
17 | '''
18 | return approxmatch_module.approx_match(xyz1,xyz2)
19 | ops.NoGradient('ApproxMatch')
20 | #@tf.RegisterShape('ApproxMatch')
21 | @ops.RegisterShape('ApproxMatch')
22 | def _approx_match_shape(op):
23 | shape1=op.inputs[0].get_shape().with_rank(3)
24 | shape2=op.inputs[1].get_shape().with_rank(3)
25 | return [tf.TensorShape([shape1.dims[0],shape2.dims[1],shape1.dims[1]])]
26 |
27 | def match_cost(xyz1,xyz2,match):
28 | '''
29 | input:
30 | xyz1 : batch_size * #dataset_points * 3
31 | xyz2 : batch_size * #query_points * 3
32 | match : batch_size * #query_points * #dataset_points
33 | returns:
34 | cost : batch_size
35 | '''
36 | return approxmatch_module.match_cost(xyz1,xyz2,match)
37 | #@tf.RegisterShape('MatchCost')
38 | @ops.RegisterShape('MatchCost')
39 | def _match_cost_shape(op):
40 | shape1=op.inputs[0].get_shape().with_rank(3)
41 | shape2=op.inputs[1].get_shape().with_rank(3)
42 | shape3=op.inputs[2].get_shape().with_rank(3)
43 | return [tf.TensorShape([shape1.dims[0]])]
44 | @tf.RegisterGradient('MatchCost')
45 | def _match_cost_grad(op,grad_cost):
46 | xyz1=op.inputs[0]
47 | xyz2=op.inputs[1]
48 | match=op.inputs[2]
49 | grad_1,grad_2=approxmatch_module.match_cost_grad(xyz1,xyz2,match)
50 | return [grad_1*tf.expand_dims(tf.expand_dims(grad_cost,1),2),grad_2*tf.expand_dims(tf.expand_dims(grad_cost,1),2),None]
51 |
52 | if __name__=='__main__':
53 | alpha=0.5
54 | beta=2.0
55 | import bestmatch
56 | import numpy as np
57 | import math
58 | import random
59 | import cv2
60 |
61 | import tf_nndistance
62 |
63 | npoint=100
64 |
65 | with tf.device('/gpu:2'):
66 | pt_in=tf.placeholder(tf.float32,shape=(1,npoint*4,3))
67 | mypoints=tf.Variable(np.random.randn(1,npoint,3).astype('float32'))
68 | match=approx_match(pt_in,mypoints)
69 | loss=tf.reduce_sum(match_cost(pt_in,mypoints,match))
70 | #match=approx_match(mypoints,pt_in)
71 | #loss=tf.reduce_sum(match_cost(mypoints,pt_in,match))
72 | #distf,_,distb,_=tf_nndistance.nn_distance(pt_in,mypoints)
73 | #loss=tf.reduce_sum((distf+1e-9)**0.5)*0.5+tf.reduce_sum((distb+1e-9)**0.5)*0.5
74 | #loss=tf.reduce_max((distf+1e-9)**0.5)*0.5*npoint+tf.reduce_max((distb+1e-9)**0.5)*0.5*npoint
75 |
76 | optimizer=tf.train.GradientDescentOptimizer(1e-4).minimize(loss)
77 | with tf.Session('') as sess:
78 | sess.run(tf.initialize_all_variables())
79 | while True:
80 | meanloss=0
81 | meantrueloss=0
82 | for i in xrange(1001):
83 | #phi=np.random.rand(4*npoint)*math.pi*2
84 | #tpoints=(np.hstack([np.cos(phi)[:,None],np.sin(phi)[:,None],(phi*0)[:,None]])*random.random())[None,:,:]
85 | #tpoints=((np.random.rand(400)-0.5)[:,None]*[0,2,0]+[(random.random()-0.5)*2,0,0]).astype('float32')[None,:,:]
86 | tpoints=np.hstack([np.linspace(-1,1,400)[:,None],(random.random()*2*np.linspace(1,0,400)**2)[:,None],np.zeros((400,1))])[None,:,:]
87 | trainloss,_=sess.run([loss,optimizer],feed_dict={pt_in:tpoints.astype('float32')})
88 | trainloss,trainmatch=sess.run([loss,match],feed_dict={pt_in:tpoints.astype('float32')})
89 | #trainmatch=trainmatch.transpose((0,2,1))
90 | show=np.zeros((400,400,3),dtype='uint8')^255
91 | trainmypoints=sess.run(mypoints)
92 | for i in xrange(len(tpoints[0])):
93 | u=np.random.choice(range(len(trainmypoints[0])),p=trainmatch[0].T[i])
94 | cv2.line(show,
95 | (int(tpoints[0][i,1]*100+200),int(tpoints[0][i,0]*100+200)),
96 | (int(trainmypoints[0][u,1]*100+200),int(trainmypoints[0][u,0]*100+200)),
97 | cv2.cv.CV_RGB(0,255,0))
98 | for x,y,z in tpoints[0]:
99 | cv2.circle(show,(int(y*100+200),int(x*100+200)),2,cv2.cv.CV_RGB(255,0,0))
100 | for x,y,z in trainmypoints[0]:
101 | cv2.circle(show,(int(y*100+200),int(x*100+200)),3,cv2.cv.CV_RGB(0,0,255))
102 | cost=((tpoints[0][:,None,:]-np.repeat(trainmypoints[0][None,:,:],4,axis=1))**2).sum(axis=2)**0.5
103 | #trueloss=bestmatch.bestmatch(cost)[0]
104 | print trainloss#,trueloss
105 | cv2.imshow('show',show)
106 | cmd=cv2.waitKey(10)%256
107 | if cmd==ord('q'):
108 | break
109 |
--------------------------------------------------------------------------------
/task_generation/latent_3d_points/external/structural_losses/tf_approxmatch_compile.sh:
--------------------------------------------------------------------------------
1 | set -e
2 | if [ 'tf_approxmatch_g.cu.o' -ot 'tf_approxmatch_g.cu' ] ; then
3 | echo 'nvcc'
4 | /usr/local/cuda-8.0/bin/nvcc tf_approxmatch_g.cu -o tf_approxmatch_g.cu.o -c -O2 -DGOOGLE_CUDA=1 -x cu -Xcompiler -fPIC
5 | fi
6 | if [ 'tf_approxmatch_so.so' -ot 'tf_approxmatch.cpp' ] || [ 'tf_approxmatch_so.so' -ot 'tf_approxmatch_g.cu.o' ] ; then
7 | echo 'g++'
8 | g++ -std=c++11 tf_approxmatch.cpp tf_approxmatch_g.cu.o -o tf_approxmatch_so.so -shared -fPIC -I /orions4-zfs/projects/optas/Virt_Env/tf_1.3/lib/python2.7/site-packages/tensorflow/include -I /usr/local/cuda-8.0/include -L /usr/local/cuda-8.0/lib64/ -O2
9 | fi
10 |
11 |
--------------------------------------------------------------------------------
/task_generation/latent_3d_points/external/structural_losses/tf_nndistance.py:
--------------------------------------------------------------------------------
1 | import tensorflow as tf
2 | from tensorflow.python.framework import ops
3 | import os.path as osp
4 |
5 | base_dir = osp.dirname(osp.abspath(__file__))
6 |
7 | nn_distance_module = tf.load_op_library(osp.join(base_dir, 'tf_nndistance_so.so'))
8 |
9 |
10 | def nn_distance(xyz1, xyz2):
11 | '''
12 | Computes the distance of nearest neighbors for a pair of point clouds
13 | input: xyz1: (batch_size,#points_1,3) the first point cloud
14 | input: xyz2: (batch_size,#points_2,3) the second point cloud
15 | output: dist1: (batch_size,#point_1) distance from first to second
16 | output: idx1: (batch_size,#point_1) nearest neighbor from first to second
17 | output: dist2: (batch_size,#point_2) distance from second to first
18 | output: idx2: (batch_size,#point_2) nearest neighbor from second to first
19 | '''
20 |
21 | return nn_distance_module.nn_distance(xyz1,xyz2)
22 |
23 | #@tf.RegisterShape('NnDistance')
24 | @ops.RegisterShape('NnDistance')
25 | def _nn_distance_shape(op):
26 | shape1=op.inputs[0].get_shape().with_rank(3)
27 | shape2=op.inputs[1].get_shape().with_rank(3)
28 | return [tf.TensorShape([shape1.dims[0],shape1.dims[1]]),tf.TensorShape([shape1.dims[0],shape1.dims[1]]),
29 | tf.TensorShape([shape2.dims[0],shape2.dims[1]]),tf.TensorShape([shape2.dims[0],shape2.dims[1]])]
30 | @ops.RegisterGradient('NnDistance')
31 | def _nn_distance_grad(op,grad_dist1,grad_idx1,grad_dist2,grad_idx2):
32 | xyz1=op.inputs[0]
33 | xyz2=op.inputs[1]
34 | idx1=op.outputs[1]
35 | idx2=op.outputs[3]
36 | return nn_distance_module.nn_distance_grad(xyz1,xyz2,grad_dist1,idx1,grad_dist2,idx2)
37 |
38 |
39 | if __name__=='__main__':
40 | import numpy as np
41 | import random
42 | import time
43 | from tensorflow.python.kernel_tests.gradient_checker import compute_gradient
44 | random.seed(100)
45 | np.random.seed(100)
46 | with tf.Session('') as sess:
47 | xyz1=np.random.randn(32,16384,3).astype('float32')
48 | xyz2=np.random.randn(32,1024,3).astype('float32')
49 | with tf.device('/gpu:0'):
50 | inp1=tf.Variable(xyz1)
51 | inp2=tf.constant(xyz2)
52 | reta,retb,retc,retd=nn_distance(inp1,inp2)
53 | loss=tf.reduce_sum(reta)+tf.reduce_sum(retc)
54 | train=tf.train.GradientDescentOptimizer(learning_rate=0.05).minimize(loss)
55 | sess.run(tf.initialize_all_variables())
56 | t0=time.time()
57 | t1=t0
58 | best=1e100
59 | for i in xrange(100):
60 | trainloss,_=sess.run([loss,train])
61 | newt=time.time()
62 | best=min(best,newt-t1)
63 | print i,trainloss,(newt-t0)/(i+1),best
64 | t1=newt
65 | #print sess.run([inp1,retb,inp2,retd])
66 | #grads=compute_gradient([inp1,inp2],[(16,32,3),(16,32,3)],loss,(1,),[xyz1,xyz2])
67 | #for i,j in grads:
68 | #print i.shape,j.shape,np.mean(np.abs(i-j)),np.mean(np.abs(i)),np.mean(np.abs(j))
69 | #for i in xrange(10):
70 | #t0=time.time()
71 | #a,b,c,d=sess.run([reta,retb,retc,retd],feed_dict={inp1:xyz1,inp2:xyz2})
72 | #print 'time',time.time()-t0
73 | #print a.shape,b.shape,c.shape,d.shape
74 | #print a.dtype,b.dtype,c.dtype,d.dtype
75 | #samples=np.array(random.sample(range(xyz2.shape[1]),100),dtype='int32')
76 | #dist1=((xyz1[:,samples,None,:]-xyz2[:,None,:,:])**2).sum(axis=-1).min(axis=-1)
77 | #idx1=((xyz1[:,samples,None,:]-xyz2[:,None,:,:])**2).sum(axis=-1).argmin(axis=-1)
78 | #print np.abs(dist1-a[:,samples]).max()
79 | #print np.abs(idx1-b[:,samples]).max()
80 | #dist2=((xyz2[:,samples,None,:]-xyz1[:,None,:,:])**2).sum(axis=-1).min(axis=-1)
81 | #idx2=((xyz2[:,samples,None,:]-xyz1[:,None,:,:])**2).sum(axis=-1).argmin(axis=-1)
82 | #print np.abs(dist2-c[:,samples]).max()
83 | #print np.abs(idx2-d[:,samples]).max()
84 |
85 |
--------------------------------------------------------------------------------
/task_generation/latent_3d_points/external/structural_losses/tf_nndistance_compile.sh:
--------------------------------------------------------------------------------
1 | /usr/local/cuda-8.0/bin/nvcc -std=c++11 -c -o tf_nndistance_g.cu.o tf_nndistance_g.cu -I /orions4-zfs/projects/optas/Virt_Env/tf_1.3/lib/python2.7/site-packages/tensorflow/include -DGOOGLE_CUDA=1 -x cu -Xcompiler -fPIC -O2 && g++ -std=c++11 tf_nndistance.cpp tf_nndistance_g.cu.o -o tf_nndistance_so.so -shared -fPIC -I /orions4-zfs/projects/optas/Virt_Env/tf_1.3/lib/python2.7/site-packages/tensorflow/include -L /usr/local/cuda-8.0/lib64 -O2
2 |
--------------------------------------------------------------------------------
/task_generation/latent_3d_points/external/structural_losses/tf_nndistance_g.cu:
--------------------------------------------------------------------------------
1 | #if GOOGLE_CUDA
2 | #define EIGEN_USE_GPU
3 | #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
4 |
5 | __global__ void NmDistanceKernel(int b,int n,const float * xyz,int m,const float * xyz2,float * result,int * result_i){
6 | const int batch=512;
7 | __shared__ float buf[batch*3];
8 | for (int i=blockIdx.x;ibest){
120 | result[(i*n+j)]=best;
121 | result_i[(i*n+j)]=best_i;
122 | }
123 | }
124 | __syncthreads();
125 | }
126 | }
127 | }
128 | void NmDistanceKernelLauncher(int b,int n,const float * xyz,int m,const float * xyz2,float * result,int * result_i,float * result2,int * result2_i){
129 | NmDistanceKernel<<>>(b,n,xyz,m,xyz2,result,result_i);
130 | NmDistanceKernel<<>>(b,m,xyz2,n,xyz,result2,result2_i);
131 | }
132 | __global__ void NmDistanceGradKernel(int b,int n,const float * xyz1,int m,const float * xyz2,const float * grad_dist1,const int * idx1,float * grad_xyz1,float * grad_xyz2){
133 | for (int i=blockIdx.x;i>>(b,n,xyz1,m,xyz2,grad_dist1,idx1,grad_xyz1,grad_xyz2);
156 | NmDistanceGradKernel<<>>(b,m,xyz2,n,xyz1,grad_dist2,idx2,grad_xyz2,grad_xyz1);
157 | }
158 |
159 | #endif
160 |
--------------------------------------------------------------------------------
/task_generation/latent_3d_points/notebooks/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CVLAB-Unibo/nf2vec/7e10220040b89404914d3a8317d39a8ac7337a8d/task_generation/latent_3d_points/notebooks/__init__.py
--------------------------------------------------------------------------------
/task_generation/latent_3d_points/notebooks/compute_evaluation_metrics.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# Assuming 2 sets of point-clouds, we will compute the MMD, Coverage and JSD as done in the paper.\n",
8 | "\n",
9 | "(To compute these metrics you __don't need__ to have tflearn installed, only the structural: EMD, Chamfer losses and sklearn for the JSD.)"
10 | ]
11 | },
12 | {
13 | "cell_type": "code",
14 | "execution_count": 1,
15 | "metadata": {
16 | "collapsed": false
17 | },
18 | "outputs": [],
19 | "source": [
20 | "import numpy as np\n",
21 | "import os.path as osp\n",
22 | "\n",
23 | "from latent_3d_points.src.evaluation_metrics import minimum_mathing_distance, \\\n",
24 | "jsd_between_point_cloud_sets, coverage\n",
25 | "\n",
26 | "from latent_3d_points.src.in_out import snc_category_to_synth_id,\\\n",
27 | " load_all_point_clouds_under_folder"
28 | ]
29 | },
30 | {
31 | "cell_type": "markdown",
32 | "metadata": {},
33 | "source": [
34 | "Load some point-clouds and make two sets (sample_pcs, ref_pcs) from them. The ref_pcs is considered as the __ground-truth__ data while the sample_pcs corresponds to a set that is matched against it, e.g. comes from a generative model."
35 | ]
36 | },
37 | {
38 | "cell_type": "code",
39 | "execution_count": 2,
40 | "metadata": {
41 | "collapsed": true
42 | },
43 | "outputs": [],
44 | "source": [
45 | "%load_ext autoreload\n",
46 | "%autoreload 2\n",
47 | "%matplotlib inline"
48 | ]
49 | },
50 | {
51 | "cell_type": "code",
52 | "execution_count": 3,
53 | "metadata": {
54 | "collapsed": false
55 | },
56 | "outputs": [
57 | {
58 | "name": "stdout",
59 | "output_type": "stream",
60 | "text": [
61 | "Give me the class name (e.g. \"chair\"): chair\n",
62 | "6778 pclouds were loaded. They belong in 1 shape-classes.\n"
63 | ]
64 | }
65 | ],
66 | "source": [
67 | "top_in_dir = '../data/shape_net_core_uniform_samples_2048/' # Top-dir of where point-clouds are stored.\n",
68 | "class_name = raw_input('Give me the class name (e.g. \"chair\"): ').lower()\n",
69 | "syn_id = snc_category_to_synth_id()[class_name]\n",
70 | "class_dir = osp.join(top_in_dir , syn_id)\n",
71 | "all_pc_data = load_all_point_clouds_under_folder(class_dir, n_threads=8, file_ending='.ply', verbose=True)"
72 | ]
73 | },
74 | {
75 | "cell_type": "code",
76 | "execution_count": 4,
77 | "metadata": {
78 | "collapsed": true
79 | },
80 | "outputs": [],
81 | "source": [
82 | "n_ref = 100 # size of ref_pcs.\n",
83 | "n_sam = 150 # size of sample_pcs.\n",
84 | "all_ids = np.arange(all_pc_data.num_examples)\n",
85 | "ref_ids = np.random.choice(all_ids, n_ref, replace=False)\n",
86 | "sam_ids = np.random.choice(all_ids, n_sam, replace=False)\n",
87 | "ref_pcs = all_pc_data.point_clouds[ref_ids]\n",
88 | "sample_pcs = all_pc_data.point_clouds[sam_ids]"
89 | ]
90 | },
91 | {
92 | "cell_type": "markdown",
93 | "metadata": {},
94 | "source": [
95 | "Compute the three metrics."
96 | ]
97 | },
98 | {
99 | "cell_type": "code",
100 | "execution_count": 11,
101 | "metadata": {
102 | "collapsed": false
103 | },
104 | "outputs": [],
105 | "source": [
106 | "ae_loss = 'chamfer' # Which distance to use for the matchings.\n",
107 | "\n",
108 | "if ae_loss == 'emd':\n",
109 | " use_EMD = True\n",
110 | "else:\n",
111 | " use_EMD = False # Will use Chamfer instead.\n",
112 | " \n",
113 | "batch_size = 100 # Find appropriate number that fits in GPU.\n",
114 | "normalize = True # Matched distances are divided by the number of \n",
115 | " # points of thepoint-clouds.\n",
116 | "\n",
117 | "mmd, matched_dists = minimum_mathing_distance(sample_pcs, ref_pcs, batch_size, normalize=normalize, use_EMD=use_EMD)\n",
118 | "\n",
119 | "cov, matched_ids = coverage(sample_pcs, ref_pcs, batch_size, normalize=normalize, use_EMD=use_EMD)\n",
120 | "\n",
121 | "jsd = jsd_between_point_cloud_sets(sample_pcs, ref_pcs, resolution=28)"
122 | ]
123 | },
124 | {
125 | "cell_type": "code",
126 | "execution_count": 12,
127 | "metadata": {
128 | "collapsed": false
129 | },
130 | "outputs": [
131 | {
132 | "name": "stdout",
133 | "output_type": "stream",
134 | "text": [
135 | "0.0714721 0.73 0.0396569736382\n"
136 | ]
137 | }
138 | ],
139 | "source": [
140 | "print mmd, cov, jsd"
141 | ]
142 | },
143 | {
144 | "cell_type": "markdown",
145 | "metadata": {},
146 | "source": [
147 | "For a detailed breakdown of the evaluation functions, inspect their docs."
148 | ]
149 | },
150 | {
151 | "cell_type": "code",
152 | "execution_count": 24,
153 | "metadata": {
154 | "collapsed": false
155 | },
156 | "outputs": [
157 | {
158 | "name": "stdout",
159 | "output_type": "stream",
160 | "text": [
161 | "Computes the Coverage between two sets of point-clouds.\n",
162 | "\n",
163 | " Args:\n",
164 | " sample_pcs (numpy array SxKx3): the S point-clouds, each of K points that will be matched\n",
165 | " and compared to a set of \"reference\" point-clouds.\n",
166 | " ref_pcs (numpy array RxKx3): the R point-clouds, each of K points that constitute the\n",
167 | " set of \"reference\" point-clouds.\n",
168 | " batch_size (int): specifies how large will the batches be that the compute will use to\n",
169 | " make the comparisons of the sample-vs-ref point-clouds.\n",
170 | " normalize (boolean): When the matching is based on Chamfer (default behavior), if True,\n",
171 | " the Chamfer is computed as the average of the matched point-wise squared euclidean\n",
172 | " distances. Alternatively, is their sum.\n",
173 | " use_sqrt (boolean): When the matching is based on Chamfer (default behavior), if True,\n",
174 | " the Chamfer is computed based on the (not-squared) euclidean distances of the matched\n",
175 | " point-wise euclidean distances.\n",
176 | " sess (tf.Session): If None, it will make a new Session for this.\n",
177 | " use_EMD (boolean): If true, the matchings are based on the EMD.\n",
178 | " ret_dist (boolean): If true, it will also return the distances between each sample_pcs and\n",
179 | " it's matched ground-truth.\n",
180 | " Returns: the coverage score (int),\n",
181 | " the indices of the ref_pcs that are matched with each sample_pc\n",
182 | " and optionally the matched distances of the samples_pcs.\n",
183 | " \n",
184 | "Computes the MMD between two sets of point-clouds.\n",
185 | "\n",
186 | " Args:\n",
187 | " sample_pcs (numpy array SxKx3): the S point-clouds, each of K points that will be matched and\n",
188 | " compared to a set of \"reference\" point-clouds.\n",
189 | " ref_pcs (numpy array RxKx3): the R point-clouds, each of K points that constitute the set of\n",
190 | " \"reference\" point-clouds.\n",
191 | " batch_size (int): specifies how large will the batches be that the compute will use to make\n",
192 | " the comparisons of the sample-vs-ref point-clouds.\n",
193 | " normalize (boolean): When the matching is based on Chamfer (default behavior), if True, the\n",
194 | " Chamfer is computed as the average of the matched point-wise squared euclidean distances.\n",
195 | " Alternatively, is their sum.\n",
196 | " use_sqrt: (boolean): When the matching is based on Chamfer (default behavior), if True, the\n",
197 | " Chamfer is computed based on the (not-squared) euclidean distances of the matched point-wise\n",
198 | " euclidean distances.\n",
199 | " sess (tf.Session, default None): if None, it will make a new Session for this.\n",
200 | " use_EMD (boolean: If true, the matchings are based on the EMD.\n",
201 | "\n",
202 | " Returns:\n",
203 | " A tuple containing the MMD and all the matched distances of which the MMD is their mean.\n",
204 | " \n",
205 | " JSD between two sets of point-clouds, as introduced in the paper ```Learning Representations And Generative Models For 3D Point Clouds```. \n",
206 | " Args:\n",
207 | " sample_pcs: (np.ndarray S1xR2x3) S1 point-clouds, each of R1 points.\n",
208 | " ref_pcs: (np.ndarray S2xR2x3) S2 point-clouds, each of R2 points.\n",
209 | " resolution: (int) grid-resolution. Affects granularity of measurements.\n",
210 | " \n"
211 | ]
212 | }
213 | ],
214 | "source": [
215 | "print coverage.__doc__\n",
216 | "print minimum_mathing_distance.__doc__\n",
217 | "print jsd_between_point_cloud_sets.__doc__"
218 | ]
219 | }
220 | ],
221 | "metadata": {
222 | "kernelspec": {
223 | "display_name": "TensorFlow1",
224 | "language": "python",
225 | "name": "tf1"
226 | },
227 | "language_info": {
228 | "codemirror_mode": {
229 | "name": "ipython",
230 | "version": 2
231 | },
232 | "file_extension": ".py",
233 | "mimetype": "text/x-python",
234 | "name": "python",
235 | "nbconvert_exporter": "python",
236 | "pygments_lexer": "ipython2",
237 | "version": "2.7.6"
238 | }
239 | },
240 | "nbformat": 4,
241 | "nbformat_minor": 2
242 | }
243 |
--------------------------------------------------------------------------------
/task_generation/latent_3d_points/src/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CVLAB-Unibo/nf2vec/7e10220040b89404914d3a8317d39a8ac7337a8d/task_generation/latent_3d_points/src/__init__.py
--------------------------------------------------------------------------------
/task_generation/latent_3d_points/src/ae_templates.py:
--------------------------------------------------------------------------------
1 | '''
2 | Created on September 2, 2017
3 |
4 | @author: optas
5 | '''
6 | import numpy as np
7 |
8 | from . encoders_decoders import encoder_with_convs_and_symmetry, decoder_with_fc_only
9 |
10 |
11 | def mlp_architecture_ala_iclr_18(n_pc_points, bneck_size, bneck_post_mlp=False):
12 | ''' Single class experiments.
13 | '''
14 | if n_pc_points != 2048:
15 | raise ValueError()
16 |
17 | encoder = encoder_with_convs_and_symmetry
18 | decoder = decoder_with_fc_only
19 |
20 | n_input = [n_pc_points, 3]
21 |
22 | encoder_args = {'n_filters': [64, 128, 128, 256, bneck_size],
23 | 'filter_sizes': [1],
24 | 'strides': [1],
25 | 'b_norm': True,
26 | 'verbose': True
27 | }
28 |
29 | decoder_args = {'layer_sizes': [256, 256, np.prod(n_input)],
30 | 'b_norm': False,
31 | 'b_norm_finish': False,
32 | 'verbose': True
33 | }
34 |
35 | if bneck_post_mlp:
36 | encoder_args['n_filters'].pop()
37 | decoder_args['layer_sizes'][0] = bneck_size
38 |
39 | return encoder, decoder, encoder_args, decoder_args
40 |
41 |
42 | def default_train_params(single_class=True):
43 | params = {'batch_size': 50,
44 | 'training_epochs': 500,
45 | 'denoising': False,
46 | 'learning_rate': 0.0005,
47 | 'z_rotate': False,
48 | 'saver_step': 10,
49 | 'loss_display_step': 1
50 | }
51 |
52 | if not single_class:
53 | params['z_rotate'] = True
54 | params['training_epochs'] = 1000
55 |
56 | return params
57 |
--------------------------------------------------------------------------------
/task_generation/latent_3d_points/src/encoders_decoders.py:
--------------------------------------------------------------------------------
1 | '''
2 | Created on February 4, 2017
3 |
4 | @author: optas
5 |
6 | '''
7 |
8 | import tensorflow as tf
9 | import numpy as np
10 | import warnings
11 |
12 | from tflearn.layers.core import fully_connected, dropout
13 | from tflearn.layers.conv import conv_1d, avg_pool_1d
14 | from tflearn.layers.normalization import batch_normalization
15 | from tflearn.layers.core import fully_connected, dropout
16 |
17 | from . tf_utils import expand_scope_by_name, replicate_parameter_for_all_layers
18 |
19 | def encoder_with_convs_and_symmetry(in_signal, n_filters=[64, 128, 256, 1024], filter_sizes=[1], strides=[1],
20 | b_norm=True, non_linearity=tf.nn.relu, regularizer=None, weight_decay=0.001,
21 | symmetry=tf.reduce_max, dropout_prob=None, pool=avg_pool_1d, pool_sizes=None, scope=None,
22 | reuse=False, padding='same', verbose=False, closing=None, conv_op=conv_1d):
23 | '''An Encoder (recognition network), which maps inputs onto a latent space.
24 | '''
25 |
26 | if verbose:
27 | print 'Building Encoder'
28 |
29 | n_layers = len(n_filters)
30 | filter_sizes = replicate_parameter_for_all_layers(filter_sizes, n_layers)
31 | strides = replicate_parameter_for_all_layers(strides, n_layers)
32 | dropout_prob = replicate_parameter_for_all_layers(dropout_prob, n_layers)
33 |
34 | if n_layers < 2:
35 | raise ValueError('More than 1 layers are expected.')
36 |
37 | for i in xrange(n_layers):
38 | if i == 0:
39 | layer = in_signal
40 |
41 | name = 'encoder_conv_layer_' + str(i)
42 | scope_i = expand_scope_by_name(scope, name)
43 | layer = conv_op(layer, nb_filter=n_filters[i], filter_size=filter_sizes[i], strides=strides[i], regularizer=regularizer,
44 | weight_decay=weight_decay, name=name, reuse=reuse, scope=scope_i, padding=padding)
45 |
46 | if verbose:
47 | print name, 'conv params = ', np.prod(layer.W.get_shape().as_list()) + np.prod(layer.b.get_shape().as_list()),
48 |
49 | if b_norm:
50 | name += '_bnorm'
51 | scope_i = expand_scope_by_name(scope, name)
52 | layer = batch_normalization(layer, name=name, reuse=reuse, scope=scope_i)
53 | if verbose:
54 | print 'bnorm params = ', np.prod(layer.beta.get_shape().as_list()) + np.prod(layer.gamma.get_shape().as_list())
55 |
56 | if non_linearity is not None:
57 | layer = non_linearity(layer)
58 |
59 | if pool is not None and pool_sizes is not None:
60 | if pool_sizes[i] is not None:
61 | layer = pool(layer, kernel_size=pool_sizes[i])
62 |
63 | if dropout_prob is not None and dropout_prob[i] > 0:
64 | layer = dropout(layer, 1.0 - dropout_prob[i])
65 |
66 | if verbose:
67 | print layer
68 | print 'output size:', np.prod(layer.get_shape().as_list()[1:]), '\n'
69 |
70 | if symmetry is not None:
71 | layer = symmetry(layer, axis=1)
72 | if verbose:
73 | print layer
74 |
75 | if closing is not None:
76 | layer = closing(layer)
77 | print layer
78 |
79 | return layer
80 |
81 |
82 | def decoder_with_fc_only(latent_signal, layer_sizes=[], b_norm=True, non_linearity=tf.nn.relu,
83 | regularizer=None, weight_decay=0.001, reuse=False, scope=None, dropout_prob=None,
84 | b_norm_finish=False, verbose=False):
85 | '''A decoding network which maps points from the latent space back onto the data space.
86 | '''
87 | if verbose:
88 | print 'Building Decoder'
89 |
90 | n_layers = len(layer_sizes)
91 | dropout_prob = replicate_parameter_for_all_layers(dropout_prob, n_layers)
92 |
93 | if n_layers < 2:
94 | raise ValueError('For an FC decoder with single a layer use simpler code.')
95 |
96 | for i in xrange(0, n_layers - 1):
97 | name = 'decoder_fc_' + str(i)
98 | scope_i = expand_scope_by_name(scope, name)
99 |
100 | if i == 0:
101 | layer = latent_signal
102 |
103 | layer = fully_connected(layer, layer_sizes[i], activation='linear', weights_init='xavier', name=name, regularizer=regularizer, weight_decay=weight_decay, reuse=reuse, scope=scope_i)
104 |
105 | if verbose:
106 | print name, 'FC params = ', np.prod(layer.W.get_shape().as_list()) + np.prod(layer.b.get_shape().as_list()),
107 |
108 | if b_norm:
109 | name += '_bnorm'
110 | scope_i = expand_scope_by_name(scope, name)
111 | layer = batch_normalization(layer, name=name, reuse=reuse, scope=scope_i)
112 | if verbose:
113 | print 'bnorm params = ', np.prod(layer.beta.get_shape().as_list()) + np.prod(layer.gamma.get_shape().as_list())
114 |
115 | if non_linearity is not None:
116 | layer = non_linearity(layer)
117 |
118 | if dropout_prob is not None and dropout_prob[i] > 0:
119 | layer = dropout(layer, 1.0 - dropout_prob[i])
120 |
121 | if verbose:
122 | print layer
123 | print 'output size:', np.prod(layer.get_shape().as_list()[1:]), '\n'
124 |
125 | # Last decoding layer never has a non-linearity.
126 | name = 'decoder_fc_' + str(n_layers - 1)
127 | scope_i = expand_scope_by_name(scope, name)
128 | layer = fully_connected(layer, layer_sizes[n_layers - 1], activation='linear', weights_init='xavier', name=name, regularizer=regularizer, weight_decay=weight_decay, reuse=reuse, scope=scope_i)
129 | if verbose:
130 | print name, 'FC params = ', np.prod(layer.W.get_shape().as_list()) + np.prod(layer.b.get_shape().as_list()),
131 |
132 | if b_norm_finish:
133 | name += '_bnorm'
134 | scope_i = expand_scope_by_name(scope, name)
135 | layer = batch_normalization(layer, name=name, reuse=reuse, scope=scope_i)
136 | if verbose:
137 | print 'bnorm params = ', np.prod(layer.beta.get_shape().as_list()) + np.prod(layer.gamma.get_shape().as_list())
138 |
139 | if verbose:
140 | print layer
141 | print 'output size:', np.prod(layer.get_shape().as_list()[1:]), '\n'
142 |
143 | return layer
144 |
145 |
146 | def decoder_with_convs_only(in_signal, n_filters, filter_sizes, strides, padding='same', b_norm=True, non_linearity=tf.nn.relu,
147 | conv_op=conv_1d, regularizer=None, weight_decay=0.001, dropout_prob=None, upsample_sizes=None,
148 | b_norm_finish=False, scope=None, reuse=False, verbose=False):
149 |
150 | if verbose:
151 | print 'Building Decoder'
152 |
153 | n_layers = len(n_filters)
154 | filter_sizes = replicate_parameter_for_all_layers(filter_sizes, n_layers)
155 | strides = replicate_parameter_for_all_layers(strides, n_layers)
156 | dropout_prob = replicate_parameter_for_all_layers(dropout_prob, n_layers)
157 |
158 | for i in xrange(n_layers):
159 | if i == 0:
160 | layer = in_signal
161 |
162 | name = 'decoder_conv_layer_' + str(i)
163 | scope_i = expand_scope_by_name(scope, name)
164 |
165 | layer = conv_op(layer, nb_filter=n_filters[i], filter_size=filter_sizes[i],
166 | strides=strides[i], padding=padding, regularizer=regularizer, weight_decay=weight_decay,
167 | name=name, reuse=reuse, scope=scope_i)
168 |
169 | if verbose:
170 | print name, 'conv params = ', np.prod(layer.W.get_shape().as_list()) + np.prod(layer.b.get_shape().as_list()),
171 |
172 | if (b_norm and i < n_layers - 1) or (i == n_layers - 1 and b_norm_finish):
173 | name += '_bnorm'
174 | scope_i = expand_scope_by_name(scope, name)
175 | layer = batch_normalization(layer, name=name, reuse=reuse, scope=scope_i)
176 | if verbose:
177 | print 'bnorm params = ', np.prod(layer.beta.get_shape().as_list()) + np.prod(layer.gamma.get_shape().as_list())
178 |
179 | if non_linearity is not None and i < n_layers - 1: # Last layer doesn't have a non-linearity.
180 | layer = non_linearity(layer)
181 |
182 | if dropout_prob is not None and dropout_prob[i] > 0:
183 | layer = dropout(layer, 1.0 - dropout_prob[i])
184 |
185 | if upsample_sizes is not None and upsample_sizes[i] is not None:
186 | layer = tf.tile(layer, multiples=[1, upsample_sizes[i], 1])
187 |
188 | if verbose:
189 | print layer
190 | print 'output size:', np.prod(layer.get_shape().as_list()[1:]), '\n'
191 |
192 | return layer
193 |
--------------------------------------------------------------------------------
/task_generation/latent_3d_points/src/gan.py:
--------------------------------------------------------------------------------
1 | '''
2 | Created on May 3, 2017
3 |
4 | @author: optas
5 | '''
6 |
7 | import os.path as osp
8 | import warnings
9 | import tensorflow as tf
10 |
11 | from . neural_net import Neural_Net
12 | from . tf_utils import safe_log
13 |
14 | class GAN(Neural_Net):
15 |
16 | def __init__(self, name, graph):
17 | Neural_Net.__init__(self, name, graph)
18 |
19 | def save_model(self, tick):
20 | self.saver.save(self.sess, self.MODEL_SAVER_ID, global_step=tick)
21 |
22 | def restore_model(self, model_path, epoch, verbose=False):
23 | '''Restore all the variables of a saved model.
24 | '''
25 | self.saver.restore(self.sess, osp.join(model_path, self.MODEL_SAVER_ID + '-' + str(int(epoch))))
26 |
27 | if self.epoch.eval(session=self.sess) != epoch:
28 | warnings.warn('Loaded model\'s epoch doesn\'t match the requested one.')
29 | else:
30 | if verbose:
31 | print('Model restored in epoch {0}.'.format(epoch))
32 |
33 | def optimizer(self, learning_rate, beta, loss, var_list):
34 | initial_learning_rate = learning_rate
35 | optimizer = tf.train.AdamOptimizer(initial_learning_rate, beta1=beta).minimize(loss, var_list=var_list)
36 | return optimizer
37 |
38 | def generate(self, n_samples, noise_params):
39 | noise = self.generator_noise_distribution(n_samples, self.noise_dim, **noise_params)
40 | feed_dict = {self.noise: noise}
41 | return self.sess.run([self.generator_out], feed_dict=feed_dict)[0]
42 |
43 | def vanilla_gan_objective(self, real_prob, synthetic_prob, use_safe_log=True):
44 | if use_safe_log:
45 | log = safe_log
46 | else:
47 | log = tf.log
48 |
49 | loss_d = tf.reduce_mean(-log(real_prob) - log(1 - synthetic_prob))
50 | loss_g = tf.reduce_mean(-log(synthetic_prob))
51 | return loss_d, loss_g
52 |
53 | def w_gan_objective(self, real_logit, synthetic_logit):
54 | loss_d = tf.reduce_mean(synthetic_logit) - tf.reduce_mean(real_logit)
55 | loss_g = -tf.reduce_mean(synthetic_logit)
56 | return loss_d, loss_g
--------------------------------------------------------------------------------
/task_generation/latent_3d_points/src/general_utils.py:
--------------------------------------------------------------------------------
1 | '''
2 | Created on November 26, 2017
3 |
4 | @author: optas
5 | '''
6 |
7 | import numpy as np
8 | from numpy.linalg import norm
9 | import matplotlib.pylab as plt
10 | from mpl_toolkits.mplot3d import Axes3D
11 |
12 |
13 | def rand_rotation_matrix(deflection=1.0, seed=None):
14 | '''Creates a random rotation matrix.
15 |
16 | deflection: the magnitude of the rotation. For 0, no rotation; for 1, completely random
17 | rotation. Small deflection => small perturbation.
18 |
19 | DOI: http://www.realtimerendering.com/resources/GraphicsGems/gemsiii/rand_rotation.c
20 | http://blog.lostinmyterminal.com/python/2015/05/12/random-rotation-matrix.html
21 | '''
22 | if seed is not None:
23 | np.random.seed(seed)
24 |
25 | randnums = np.random.uniform(size=(3,))
26 |
27 | theta, phi, z = randnums
28 |
29 | theta = theta * 2.0 * deflection * np.pi # Rotation about the pole (Z).
30 | phi = phi * 2.0 * np.pi # For direction of pole deflection.
31 | z = z * 2.0 * deflection # For magnitude of pole deflection.
32 |
33 | # Compute a vector V used for distributing points over the sphere
34 | # via the reflection I - V Transpose(V). This formulation of V
35 | # will guarantee that if x[1] and x[2] are uniformly distributed,
36 | # the reflected points will be uniform on the sphere. Note that V
37 | # has length sqrt(2) to eliminate the 2 in the Householder matrix.
38 |
39 | r = np.sqrt(z)
40 | V = (
41 | np.sin(phi) * r,
42 | np.cos(phi) * r,
43 | np.sqrt(2.0 - z))
44 |
45 | st = np.sin(theta)
46 | ct = np.cos(theta)
47 |
48 | R = np.array(((ct, st, 0), (-st, ct, 0), (0, 0, 1)))
49 |
50 | # Construct the rotation matrix ( V Transpose(V) - I ) R.
51 | M = (np.outer(V, V) - np.eye(3)).dot(R)
52 | return M
53 |
54 |
55 | def iterate_in_chunks(l, n):
56 | '''Yield successive 'n'-sized chunks from iterable 'l'.
57 | Note: last chunk will be smaller than l if n doesn't divide l perfectly.
58 | '''
59 | for i in xrange(0, len(l), n):
60 | yield l[i:i + n]
61 |
62 |
63 | def add_gaussian_noise_to_pcloud(pcloud, mu=0, sigma=1):
64 | gnoise = np.random.normal(mu, sigma, pcloud.shape[0])
65 | gnoise = np.tile(gnoise, (3, 1)).T
66 | pcloud += gnoise
67 | return pcloud
68 |
69 |
70 | def apply_augmentations(batch, conf):
71 | if conf.gauss_augment is not None or conf.z_rotate:
72 | batch = batch.copy()
73 |
74 | if conf.gauss_augment is not None:
75 | mu = conf.gauss_augment['mu']
76 | sigma = conf.gauss_augment['sigma']
77 | batch += np.random.normal(mu, sigma, batch.shape)
78 |
79 | if conf.z_rotate:
80 | r_rotation = rand_rotation_matrix()
81 | r_rotation[0, 2] = 0
82 | r_rotation[2, 0] = 0
83 | r_rotation[1, 2] = 0
84 | r_rotation[2, 1] = 0
85 | r_rotation[2, 2] = 1
86 | batch = batch.dot(r_rotation)
87 | return batch
88 |
89 |
90 | def unit_cube_grid_point_cloud(resolution, clip_sphere=False):
91 | '''Returns the center coordinates of each cell of a 3D grid with resolution^3 cells,
92 | that is placed in the unit-cube.
93 | If clip_sphere it True it drops the "corner" cells that lie outside the unit-sphere.
94 | '''
95 | grid = np.ndarray((resolution, resolution, resolution, 3), np.float32)
96 | spacing = 1.0 / float(resolution - 1)
97 | for i in xrange(resolution):
98 | for j in xrange(resolution):
99 | for k in xrange(resolution):
100 | grid[i, j, k, 0] = i * spacing - 0.5
101 | grid[i, j, k, 1] = j * spacing - 0.5
102 | grid[i, j, k, 2] = k * spacing - 0.5
103 |
104 | if clip_sphere:
105 | grid = grid.reshape(-1, 3)
106 | grid = grid[norm(grid, axis=1) <= 0.5]
107 |
108 | return grid, spacing
109 |
110 | def plot_3d_point_cloud(x, y, z, show=True, show_axis=True, in_u_sphere=False, marker='.', s=8, alpha=.8, figsize=(5, 5), elev=10, azim=240, axis=None, title=None, *args, **kwargs):
111 |
112 | if axis is None:
113 | fig = plt.figure(figsize=figsize)
114 | ax = fig.add_subplot(111, projection='3d')
115 | else:
116 | ax = axis
117 | fig = axis
118 |
119 | if title is not None:
120 | plt.title(title)
121 |
122 | sc = ax.scatter(x, y, z, marker=marker, s=s, alpha=alpha, *args, **kwargs)
123 | ax.view_init(elev=elev, azim=azim)
124 |
125 | if in_u_sphere:
126 | ax.set_xlim3d(-0.5, 0.5)
127 | ax.set_ylim3d(-0.5, 0.5)
128 | ax.set_zlim3d(-0.5, 0.5)
129 | else:
130 | miv = 0.7 * np.min([np.min(x), np.min(y), np.min(z)]) # Multiply with 0.7 to squeeze free-space.
131 | mav = 0.7 * np.max([np.max(x), np.max(y), np.max(z)])
132 | ax.set_xlim(miv, mav)
133 | ax.set_ylim(miv, mav)
134 | ax.set_zlim(miv, mav)
135 | plt.tight_layout()
136 |
137 | if not show_axis:
138 | plt.axis('off')
139 |
140 | if 'c' in kwargs:
141 | plt.colorbar(sc)
142 |
143 | if show:
144 | plt.show()
145 |
146 | return fig
--------------------------------------------------------------------------------
/task_generation/latent_3d_points/src/generators_discriminators.py:
--------------------------------------------------------------------------------
1 | '''
2 | Created on May 11, 2017
3 |
4 | @author: optas
5 | '''
6 |
7 | import numpy as np
8 | import tensorflow as tf
9 | from tflearn.layers.normalization import batch_normalization
10 | from tflearn.layers.core import fully_connected, dropout
11 |
12 | from . encoders_decoders import encoder_with_convs_and_symmetry, decoder_with_fc_only
13 | from . tf_utils import leaky_relu
14 | from . tf_utils import expand_scope_by_name
15 |
16 |
17 | def mlp_discriminator(in_signal, non_linearity=tf.nn.relu, reuse=False, scope=None, b_norm=True, dropout_prob=None):
18 | ''' used in nips submission.
19 | '''
20 | encoder_args = {'n_filters': [64, 128, 256, 256, 512], 'filter_sizes': [1, 1, 1, 1, 1], 'strides': [1, 1, 1, 1, 1]}
21 | encoder_args['reuse'] = reuse
22 | encoder_args['scope'] = scope
23 | encoder_args['non_linearity'] = non_linearity
24 | encoder_args['dropout_prob'] = dropout_prob
25 | encoder_args['b_norm'] = b_norm
26 | layer = encoder_with_convs_and_symmetry(in_signal, **encoder_args)
27 |
28 | name = 'decoding_logits'
29 | scope_e = expand_scope_by_name(scope, name)
30 | d_logit = decoder_with_fc_only(layer, layer_sizes=[128, 64, 1], b_norm=b_norm, reuse=reuse, scope=scope_e)
31 | d_prob = tf.nn.sigmoid(d_logit)
32 | return d_prob, d_logit
33 |
34 |
35 | def point_cloud_generator(z, pc_dims, layer_sizes=[64, 128, 512, 1024], non_linearity=tf.nn.relu, b_norm=False, b_norm_last=False, dropout_prob=None):
36 | ''' used in nips submission.
37 | '''
38 |
39 | n_points, dummy = pc_dims
40 | if (dummy != 3):
41 | raise ValueError()
42 |
43 | out_signal = decoder_with_fc_only(z, layer_sizes=layer_sizes, non_linearity=non_linearity, b_norm=b_norm)
44 | out_signal = non_linearity(out_signal)
45 |
46 | if dropout_prob is not None:
47 | out_signal = dropout(out_signal, dropout_prob)
48 |
49 | if b_norm_last:
50 | out_signal = batch_normalization(out_signal)
51 |
52 | out_signal = fully_connected(out_signal, np.prod([n_points, 3]), activation='linear', weights_init='xavier')
53 | out_signal = tf.reshape(out_signal, [-1, n_points, 3])
54 | return out_signal
55 |
56 |
57 | def convolutional_discriminator(in_signal, non_linearity=tf.nn.relu,
58 | encoder_args={'n_filters': [128, 128, 256, 512], 'filter_sizes': [40, 20, 10, 10], 'strides': [1, 2, 2, 1]},
59 | decoder_layer_sizes=[128, 64, 1],
60 | reuse=False, scope=None):
61 |
62 | encoder_args['reuse'] = reuse
63 | encoder_args['scope'] = scope
64 | encoder_args['non_linearity'] = non_linearity
65 | layer = encoder_with_convs_and_symmetry(in_signal, **encoder_args)
66 |
67 | name = 'decoding_logits'
68 | scope_e = expand_scope_by_name(scope, name)
69 | d_logit = decoder_with_fc_only(layer, layer_sizes=decoder_layer_sizes, non_linearity=non_linearity, reuse=reuse, scope=scope_e)
70 | d_prob = tf.nn.sigmoid(d_logit)
71 | return d_prob, d_logit
72 |
73 |
74 | def latent_code_generator(z, out_dim, layer_sizes=[64, 128], b_norm=False):
75 | layer_sizes = layer_sizes + out_dim
76 | out_signal = decoder_with_fc_only(z, layer_sizes=layer_sizes, b_norm=b_norm)
77 | out_signal = tf.nn.relu(out_signal)
78 | return out_signal
79 |
80 |
81 | def latent_code_discriminator(in_singnal, layer_sizes=[64, 128, 256, 256, 512], b_norm=False, non_linearity=tf.nn.relu, reuse=False, scope=None):
82 | layer_sizes = layer_sizes + [1]
83 | d_logit = decoder_with_fc_only(in_singnal, layer_sizes=layer_sizes, non_linearity=non_linearity, b_norm=b_norm, reuse=reuse, scope=scope)
84 | d_prob = tf.nn.sigmoid(d_logit)
85 | return d_prob, d_logit
86 |
87 |
88 | def latent_code_discriminator_two_layers(in_signal, layer_sizes=[256, 512], b_norm=False, non_linearity=tf.nn.relu, reuse=False, scope=None):
89 | ''' Used in ICML submission.
90 | '''
91 | layer_sizes = layer_sizes + [1]
92 | d_logit = decoder_with_fc_only(in_signal, layer_sizes=layer_sizes, non_linearity=non_linearity, b_norm=b_norm, reuse=reuse, scope=scope)
93 | d_prob = tf.nn.sigmoid(d_logit)
94 | return d_prob, d_logit
95 |
96 |
97 | def latent_code_generator_two_layers(z, out_dim, layer_sizes=[128], b_norm=False):
98 | ''' Used in ICML submission.
99 | '''
100 | layer_sizes = layer_sizes + out_dim
101 | out_signal = decoder_with_fc_only(z, layer_sizes=layer_sizes, b_norm=b_norm)
102 | out_signal = tf.nn.relu(out_signal)
103 | return out_signal
104 |
--------------------------------------------------------------------------------
/task_generation/latent_3d_points/src/in_out.py:
--------------------------------------------------------------------------------
1 | import six
2 | import warnings
3 | import numpy as np
4 | import os
5 | import os.path as osp
6 | import re
7 | from six.moves import cPickle
8 | from multiprocessing import Pool
9 |
10 | from . general_utils import rand_rotation_matrix
11 | from .. external.python_plyfile.plyfile import PlyElement, PlyData
12 |
13 | snc_synth_id_to_category = {
14 | '02691156': 'airplane', '02773838': 'bag', '02801938': 'basket',
15 | '02808440': 'bathtub', '02818832': 'bed', '02828884': 'bench',
16 | '02834778': 'bicycle', '02843684': 'birdhouse', '02871439': 'bookshelf',
17 | '02876657': 'bottle', '02880940': 'bowl', '02924116': 'bus',
18 | '02933112': 'cabinet', '02747177': 'can', '02942699': 'camera',
19 | '02954340': 'cap', '02958343': 'car', '03001627': 'chair',
20 | '03046257': 'clock', '03207941': 'dishwasher', '03211117': 'monitor',
21 | '04379243': 'table', '04401088': 'telephone', '02946921': 'tin_can',
22 | '04460130': 'tower', '04468005': 'train', '03085013': 'keyboard',
23 | '03261776': 'earphone', '03325088': 'faucet', '03337140': 'file',
24 | '03467517': 'guitar', '03513137': 'helmet', '03593526': 'jar',
25 | '03624134': 'knife', '03636649': 'lamp', '03642806': 'laptop',
26 | '03691459': 'speaker', '03710193': 'mailbox', '03759954': 'microphone',
27 | '03761084': 'microwave', '03790512': 'motorcycle', '03797390': 'mug',
28 | '03928116': 'piano', '03938244': 'pillow', '03948459': 'pistol',
29 | '03991062': 'pot', '04004475': 'printer', '04074963': 'remote_control',
30 | '04090263': 'rifle', '04099429': 'rocket', '04225987': 'skateboard',
31 | '04256520': 'sofa', '04330267': 'stove', '04530566': 'vessel',
32 | '04554684': 'washer', '02858304': 'boat', '02992529': 'cellphone'
33 | }
34 |
35 |
36 | def snc_category_to_synth_id():
37 | d = snc_synth_id_to_category
38 | inv_map = {v: k for k, v in six.iteritems(d)}
39 | return inv_map
40 |
41 |
42 | def create_dir(dir_path):
43 | ''' Creates a directory (or nested directories) if they don't exist.
44 | '''
45 | print('creating dir: ', dir_path)
46 | if not osp.exists(dir_path):
47 | os.makedirs(dir_path)
48 |
49 | return dir_path
50 |
51 |
52 | def pickle_data(file_name, *args):
53 | '''Using (c)Pickle to save multiple python objects in a single file.
54 | '''
55 | myFile = open(file_name, 'wb')
56 | cPickle.dump(len(args), myFile, protocol=2)
57 | for item in args:
58 | cPickle.dump(item, myFile, protocol=2)
59 | myFile.close()
60 |
61 |
62 | def unpickle_data(file_name):
63 | '''Restore data previously saved with pickle_data().
64 | '''
65 | inFile = open(file_name, 'rb')
66 | size = cPickle.load(inFile)
67 | for _ in xrange(size):
68 | yield cPickle.load(inFile)
69 | inFile.close()
70 |
71 |
72 | def files_in_subdirs(top_dir, search_pattern):
73 | regex = re.compile(search_pattern)
74 | for path, _, files in os.walk(top_dir):
75 | for name in files:
76 | full_name = osp.join(path, name)
77 | if regex.search(full_name):
78 | yield full_name
79 |
80 |
81 | def load_ply(file_name, with_faces=False, with_color=False):
82 | ply_data = PlyData.read(file_name)
83 | points = ply_data['vertex']
84 | points = np.vstack([points['x'], points['y'], points['z']]).T
85 | ret_val = [points]
86 |
87 | if with_faces:
88 | faces = np.vstack(ply_data['face']['vertex_indices'])
89 | ret_val.append(faces)
90 |
91 | if with_color:
92 | r = np.vstack(ply_data['vertex']['red'])
93 | g = np.vstack(ply_data['vertex']['green'])
94 | b = np.vstack(ply_data['vertex']['blue'])
95 | color = np.hstack((r, g, b))
96 | ret_val.append(color)
97 |
98 | if len(ret_val) == 1: # Unwrap the list
99 | ret_val = ret_val[0]
100 |
101 | return ret_val
102 |
103 |
104 | def pc_loader(f_name):
105 | ''' loads a point-cloud saved under ShapeNet's "standar" folder scheme:
106 | i.e. /syn_id/model_name.ply
107 | '''
108 | tokens = f_name.split('/')
109 | model_id = tokens[-1].split('.')[0]
110 | synet_id = tokens[-2]
111 | return load_ply(f_name), model_id, synet_id
112 |
113 |
114 | def load_all_point_clouds_under_folder(top_dir, n_threads=20, file_ending='.ply', verbose=False):
115 | file_names = [f for f in files_in_subdirs(top_dir, file_ending)]
116 | pclouds, model_ids, syn_ids = load_point_clouds_from_filenames(file_names, n_threads, loader=pc_loader, verbose=verbose)
117 | return PointCloudDataSet(pclouds, labels=syn_ids + '_' + model_ids, init_shuffle=False)
118 |
119 |
120 | def load_point_clouds_from_filenames(file_names, n_threads, loader, verbose=False):
121 | pc = loader(file_names[0])[0]
122 | pclouds = np.empty([len(file_names), pc.shape[0], pc.shape[1]], dtype=np.float32)
123 | model_names = np.empty([len(file_names)], dtype=object)
124 | class_ids = np.empty([len(file_names)], dtype=object)
125 | pool = Pool(n_threads)
126 |
127 | for i, data in enumerate(pool.imap(loader, file_names)):
128 | pclouds[i, :, :], model_names[i], class_ids[i] = data
129 |
130 | pool.close()
131 | pool.join()
132 |
133 | if len(np.unique(model_names)) != len(pclouds):
134 | warnings.warn('Point clouds with the same model name were loaded.')
135 |
136 | if verbose:
137 | print('{0} pclouds were loaded. They belong in {1} shape-classes.'.format(len(pclouds), len(np.unique(class_ids))))
138 |
139 | return pclouds, model_names, class_ids
140 |
141 |
142 | class PointCloudDataSet(object):
143 | '''
144 | See https://github.com/tensorflow/tensorflow/blob/a5d8217c4ed90041bea2616c14a8ddcf11ec8c03/tensorflow/examples/tutorials/mnist/input_data.py
145 | '''
146 |
147 | def __init__(self, point_clouds, noise=None, labels=None, copy=True, init_shuffle=True):
148 | '''Construct a DataSet.
149 | Args:
150 | init_shuffle, shuffle data before first epoch has been reached.
151 | Output:
152 | original_pclouds, labels, (None or Feed) # TODO Rename
153 | '''
154 |
155 | self.num_examples = point_clouds.shape[0]
156 | self.n_points = point_clouds.shape[1]
157 |
158 | if labels is not None:
159 | assert point_clouds.shape[0] == labels.shape[0], ('points.shape: %s labels.shape: %s' % (point_clouds.shape, labels.shape))
160 | if copy:
161 | self.labels = labels.copy()
162 | else:
163 | self.labels = labels
164 |
165 | else:
166 | self.labels = np.ones(self.num_examples, dtype=np.int8)
167 |
168 | if noise is not None:
169 | assert (type(noise) is np.ndarray)
170 | if copy:
171 | self.noisy_point_clouds = noise.copy()
172 | else:
173 | self.noisy_point_clouds = noise
174 | else:
175 | self.noisy_point_clouds = None
176 |
177 | if copy:
178 | self.point_clouds = point_clouds.copy()
179 | else:
180 | self.point_clouds = point_clouds
181 |
182 | self.epochs_completed = 0
183 | self._index_in_epoch = 0
184 | if init_shuffle:
185 | self.shuffle_data()
186 |
187 | def shuffle_data(self, seed=None):
188 | if seed is not None:
189 | np.random.seed(seed)
190 | perm = np.arange(self.num_examples)
191 | np.random.shuffle(perm)
192 | self.point_clouds = self.point_clouds[perm]
193 | self.labels = self.labels[perm]
194 | if self.noisy_point_clouds is not None:
195 | self.noisy_point_clouds = self.noisy_point_clouds[perm]
196 | return self
197 |
198 | def next_batch(self, batch_size, seed=None):
199 | '''Return the next batch_size examples from this data set.
200 | '''
201 | start = self._index_in_epoch
202 | self._index_in_epoch += batch_size
203 | if self._index_in_epoch > self.num_examples:
204 | self.epochs_completed += 1 # Finished epoch.
205 | self.shuffle_data(seed)
206 | # Start next epoch
207 | start = 0
208 | self._index_in_epoch = batch_size
209 | end = self._index_in_epoch
210 |
211 | if self.noisy_point_clouds is None:
212 | return self.point_clouds[start:end], self.labels[start:end], None
213 | else:
214 | return self.point_clouds[start:end], self.labels[start:end], self.noisy_point_clouds[start:end]
215 |
216 | def full_epoch_data(self, shuffle=True, seed=None):
217 | '''Returns a copy of the examples of the entire data set (i.e. an epoch's data), shuffled.
218 | '''
219 | if shuffle and seed is not None:
220 | np.random.seed(seed)
221 | perm = np.arange(self.num_examples) # Shuffle the data.
222 | if shuffle:
223 | np.random.shuffle(perm)
224 | pc = self.point_clouds[perm]
225 | lb = self.labels[perm]
226 | ns = None
227 | if self.noisy_point_clouds is not None:
228 | ns = self.noisy_point_clouds[perm]
229 | return pc, lb, ns
230 |
231 | def merge(self, other_data_set):
232 | self._index_in_epoch = 0
233 | self.epochs_completed = 0
234 | self.point_clouds = np.vstack((self.point_clouds, other_data_set.point_clouds))
235 |
236 | labels_1 = self.labels.reshape([self.num_examples, 1]) # TODO = move to init.
237 | labels_2 = other_data_set.labels.reshape([other_data_set.num_examples, 1])
238 | self.labels = np.vstack((labels_1, labels_2))
239 | self.labels = np.squeeze(self.labels)
240 |
241 | if self.noisy_point_clouds is not None:
242 | self.noisy_point_clouds = np.vstack((self.noisy_point_clouds, other_data_set.noisy_point_clouds))
243 |
244 | self.num_examples = self.point_clouds.shape[0]
245 |
246 | return self
247 |
--------------------------------------------------------------------------------
/task_generation/latent_3d_points/src/latent_gan.py:
--------------------------------------------------------------------------------
1 | '''
2 | Created on April 27, 2017
3 |
4 | @author: optas
5 | '''
6 | import numpy as np
7 | import time
8 | import tensorflow as tf
9 |
10 | from . gan import GAN
11 |
12 | from .. fundamentals.layers import safe_log
13 | from tflearn import is_training
14 |
15 |
16 | class LatentGAN(GAN):
17 | def __init__(self, name, learning_rate, n_output, noise_dim, discriminator, generator, beta=0.9, gen_kwargs={}, disc_kwargs={}, graph=None):
18 |
19 | self.noise_dim = noise_dim
20 | self.n_output = n_output
21 | self.discriminator = discriminator
22 | self.generator = generator
23 |
24 | GAN.__init__(self, name, graph)
25 |
26 | with tf.variable_scope(name):
27 |
28 | self.noise = tf.placeholder(tf.float32, shape=[None, noise_dim]) # Noise vector.
29 | self.gt_data = tf.placeholder(tf.float32, shape=[None] + self.n_output) # Ground-truth.
30 |
31 | with tf.variable_scope('generator'):
32 | self.generator_out = self.generator(self.noise, self.n_output, **gen_kwargs)
33 |
34 | with tf.variable_scope('discriminator') as scope:
35 | self.real_prob, self.real_logit = self.discriminator(self.gt_data, scope=scope, **disc_kwargs)
36 | self.synthetic_prob, self.synthetic_logit = self.discriminator(self.generator_out, reuse=True, scope=scope, **disc_kwargs)
37 |
38 | self.loss_d = tf.reduce_mean(-tf.log(self.real_prob) - tf.log(1 - self.synthetic_prob))
39 | self.loss_g = tf.reduce_mean(-tf.log(self.synthetic_prob))
40 |
41 | #Post ICLR TRY: safe_log
42 |
43 | train_vars = tf.trainable_variables()
44 |
45 | d_params = [v for v in train_vars if v.name.startswith(name + '/discriminator/')]
46 | g_params = [v for v in train_vars if v.name.startswith(name + '/generator/')]
47 |
48 | self.opt_d = self.optimizer(learning_rate, beta, self.loss_d, d_params)
49 | self.opt_g = self.optimizer(learning_rate, beta, self.loss_g, g_params)
50 | self.saver = tf.train.Saver(tf.global_variables(), max_to_keep=None)
51 | self.init = tf.global_variables_initializer()
52 |
53 | # Launch the session
54 | config = tf.ConfigProto()
55 | config.gpu_options.allow_growth = True
56 | self.sess = tf.Session(config=config)
57 | self.sess.run(self.init)
58 |
59 | def generator_noise_distribution(self, n_samples, ndims, mu, sigma):
60 | return np.random.normal(mu, sigma, (n_samples, ndims))
61 |
62 | def _single_epoch_train(self, train_data, batch_size, noise_params):
63 | '''
64 | see: http://blog.aylien.com/introduction-generative-adversarial-networks-code-tensorflow/
65 | http://wiseodd.github.io/techblog/2016/09/17/gan-tensorflow/
66 | '''
67 | n_examples = train_data.num_examples
68 | epoch_loss_d = 0.
69 | epoch_loss_g = 0.
70 | batch_size = batch_size
71 | n_batches = int(n_examples / batch_size)
72 | start_time = time.time()
73 |
74 | is_training(True, session=self.sess)
75 | try:
76 | # Loop over all batches
77 | for _ in xrange(n_batches):
78 | feed, _, _ = train_data.next_batch(batch_size)
79 |
80 | # Update discriminator.
81 | z = self.generator_noise_distribution(batch_size, self.noise_dim, **noise_params)
82 | feed_dict = {self.gt_data: feed, self.noise: z}
83 | loss_d, _ = self.sess.run([self.loss_d, self.opt_d], feed_dict=feed_dict)
84 | loss_g, _ = self.sess.run([self.loss_g, self.opt_g], feed_dict=feed_dict)
85 |
86 | # Compute average loss
87 | epoch_loss_d += loss_d
88 | epoch_loss_g += loss_g
89 |
90 | is_training(False, session=self.sess)
91 | except Exception:
92 | raise
93 | finally:
94 | is_training(False, session=self.sess)
95 |
96 | epoch_loss_d /= n_batches
97 | epoch_loss_g /= n_batches
98 | duration = time.time() - start_time
99 | return (epoch_loss_d, epoch_loss_g), duration
100 |
--------------------------------------------------------------------------------
/task_generation/latent_3d_points/src/neural_net.py:
--------------------------------------------------------------------------------
1 | '''
2 | Created on August 28, 2017
3 |
4 | @author: optas
5 | '''
6 |
7 | import os.path as osp
8 | import tensorflow as tf
9 |
10 | MODEL_SAVER_ID = 'models.ckpt'
11 |
12 |
13 | class Neural_Net(object):
14 |
15 | def __init__(self, name, graph):
16 | if graph is None:
17 | graph = tf.get_default_graph()
18 |
19 | self.graph = graph
20 | self.name = name
21 |
22 | with tf.variable_scope(name):
23 | with tf.device('/cpu:0'):
24 | self.epoch = tf.get_variable('epoch', [], initializer=tf.constant_initializer(0), trainable=False)
25 | self.increment_epoch = self.epoch.assign_add(tf.constant(1.0))
26 |
27 | self.no_op = tf.no_op()
28 |
29 | def is_training(self):
30 | is_training_op = self.graph.get_collection('is_training')
31 | return self.sess.run(is_training_op)[0]
32 |
33 | def restore_model(self, model_path, epoch, verbose=False):
34 | '''Restore all the variables of a saved model.
35 | '''
36 | self.saver.restore(self.sess, osp.join(model_path, MODEL_SAVER_ID + '-' + str(int(epoch))))
37 |
38 | if self.epoch.eval(session=self.sess) != epoch:
39 | warnings.warn('Loaded model\'s epoch doesn\'t match the requested one.')
40 | else:
41 | if verbose:
42 | print('Model restored in epoch {0}.'.format(epoch))
43 |
--------------------------------------------------------------------------------
/task_generation/latent_3d_points/src/point_net_ae.py:
--------------------------------------------------------------------------------
1 | '''
2 | Created on January 26, 2017
3 |
4 | @author: optas
5 | '''
6 |
7 | import time
8 | import tensorflow as tf
9 | import os.path as osp
10 |
11 | from tflearn.layers.conv import conv_1d
12 | from tflearn.layers.core import fully_connected
13 |
14 | from . in_out import create_dir
15 | from . autoencoder import AutoEncoder
16 | from . general_utils import apply_augmentations
17 |
18 | try:
19 | from .. external.structural_losses.tf_nndistance import nn_distance
20 | from .. external.structural_losses.tf_approxmatch import approx_match, match_cost
21 | except:
22 | print('External Losses (Chamfer-EMD) cannot be loaded. Please install them first.')
23 |
24 |
25 | class PointNetAutoEncoder(AutoEncoder):
26 | '''
27 | An Auto-Encoder for point-clouds.
28 | '''
29 |
30 | def __init__(self, name, configuration, graph=None):
31 | c = configuration
32 | self.configuration = c
33 |
34 | AutoEncoder.__init__(self, name, graph, configuration)
35 |
36 | with tf.variable_scope(name):
37 | self.z = c.encoder(self.x, **c.encoder_args)
38 | self.bottleneck_size = int(self.z.get_shape()[1])
39 | layer = c.decoder(self.z, **c.decoder_args)
40 |
41 | if c.exists_and_is_not_none('close_with_tanh'):
42 | layer = tf.nn.tanh(layer)
43 |
44 | self.x_reconstr = tf.reshape(layer, [-1, self.n_output[0], self.n_output[1]])
45 |
46 | self.saver = tf.train.Saver(tf.global_variables(), max_to_keep=c.saver_max_to_keep)
47 |
48 | self._create_loss()
49 | self._setup_optimizer()
50 |
51 | # GPU configuration
52 | if hasattr(c, 'allow_gpu_growth'):
53 | growth = c.allow_gpu_growth
54 | else:
55 | growth = True
56 |
57 | config = tf.ConfigProto()
58 | config.gpu_options.allow_growth = growth
59 |
60 | # Summaries
61 | self.merged_summaries = tf.summary.merge_all()
62 | self.train_writer = tf.summary.FileWriter(osp.join(configuration.train_dir, 'summaries'), self.graph)
63 |
64 | # Initializing the tensor flow variables
65 | self.init = tf.global_variables_initializer()
66 |
67 | # Launch the session
68 | self.sess = tf.Session(config=config)
69 | self.sess.run(self.init)
70 |
71 | def _create_loss(self):
72 | c = self.configuration
73 |
74 | if c.loss == 'chamfer':
75 | cost_p1_p2, _, cost_p2_p1, _ = nn_distance(self.x_reconstr, self.gt)
76 | self.loss = tf.reduce_mean(cost_p1_p2) + tf.reduce_mean(cost_p2_p1)
77 | elif c.loss == 'emd':
78 | match = approx_match(self.x_reconstr, self.gt)
79 | self.loss = tf.reduce_mean(match_cost(self.x_reconstr, self.gt, match))
80 |
81 | reg_losses = self.graph.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)
82 | if c.exists_and_is_not_none('w_reg_alpha'):
83 | w_reg_alpha = c.w_reg_alpha
84 | else:
85 | w_reg_alpha = 1.0
86 |
87 | for rl in reg_losses:
88 | self.loss += (w_reg_alpha * rl)
89 |
90 | def _setup_optimizer(self):
91 | c = self.configuration
92 | self.lr = c.learning_rate
93 | if hasattr(c, 'exponential_decay'):
94 | self.lr = tf.train.exponential_decay(c.learning_rate, self.epoch, c.decay_steps, decay_rate=0.5, staircase=True, name="learning_rate_decay")
95 | self.lr = tf.maximum(self.lr, 1e-5)
96 | tf.summary.scalar('learning_rate', self.lr)
97 |
98 | self.optimizer = tf.train.AdamOptimizer(learning_rate=self.lr)
99 | self.train_step = self.optimizer.minimize(self.loss)
100 |
101 | def _single_epoch_train(self, train_data, configuration, only_fw=False):
102 | n_examples = train_data.num_examples
103 | epoch_loss = 0.
104 | batch_size = configuration.batch_size
105 | n_batches = int(n_examples / batch_size)
106 | start_time = time.time()
107 |
108 | if only_fw:
109 | fit = self.reconstruct
110 | else:
111 | fit = self.partial_fit
112 |
113 | # Loop over all batches
114 | for _ in xrange(n_batches):
115 |
116 | if self.is_denoising:
117 | original_data, _, batch_i = train_data.next_batch(batch_size)
118 | if batch_i is None: # In this case the denoising concern only the augmentation.
119 | batch_i = original_data
120 | else:
121 | batch_i, _, _ = train_data.next_batch(batch_size)
122 |
123 | batch_i = apply_augmentations(batch_i, configuration) # This is a new copy of the batch.
124 |
125 | if self.is_denoising:
126 | _, loss = fit(batch_i, original_data)
127 | else:
128 | _, loss = fit(batch_i)
129 |
130 | # Compute average loss
131 | epoch_loss += loss
132 | epoch_loss /= n_batches
133 | duration = time.time() - start_time
134 |
135 | if configuration.loss == 'emd':
136 | epoch_loss /= len(train_data.point_clouds[0])
137 |
138 | return epoch_loss, duration
139 |
140 | def gradient_of_input_wrt_loss(self, in_points, gt_points=None):
141 | if gt_points is None:
142 | gt_points = in_points
143 | return self.sess.run(tf.gradients(self.loss, self.x), feed_dict={self.x: in_points, self.gt: gt_points})
--------------------------------------------------------------------------------
/task_generation/latent_3d_points/src/raw_gan.py:
--------------------------------------------------------------------------------
1 | '''
2 | Created on Apr 27, 2017
3 |
4 | @author: optas
5 | '''
6 |
7 | import numpy as np
8 | import time
9 | import tensorflow as tf
10 | from tflearn import is_training
11 |
12 | from . gan import GAN
13 | from .. fundamentals.layers import safe_log
14 |
15 |
16 | class RawGAN(GAN):
17 |
18 | def __init__(self, name, learning_rate, n_output, noise_dim, discriminator, generator, beta=0.9, gen_kwargs={}, disc_kwargs={}, graph=None):
19 |
20 | self.noise_dim = noise_dim
21 | self.n_output = n_output
22 | out_shape = [None] + self.n_output
23 | self.discriminator = discriminator
24 | self.generator = generator
25 |
26 | GAN.__init__(self, name, graph)
27 |
28 | with tf.variable_scope(name):
29 |
30 | self.noise = tf.placeholder(tf.float32, shape=[None, noise_dim]) # Noise vector.
31 | self.real_pc = tf.placeholder(tf.float32, shape=out_shape) # Ground-truth.
32 |
33 | with tf.variable_scope('generator'):
34 | self.generator_out = self.generator(self.noise, self.n_output[0], **gen_kwargs)
35 |
36 | with tf.variable_scope('discriminator') as scope:
37 | self.real_prob, self.real_logit = self.discriminator(self.real_pc, scope=scope, **disc_kwargs)
38 | self.synthetic_prob, self.synthetic_logit = self.discriminator(self.generator_out, reuse=True, scope=scope, **disc_kwargs)
39 |
40 | self.loss_d = tf.reduce_mean(-safe_log(self.real_prob) - safe_log(1 - self.synthetic_prob))
41 | self.loss_g = tf.reduce_mean(-safe_log(self.synthetic_prob))
42 |
43 | train_vars = tf.trainable_variables()
44 |
45 | d_params = [v for v in train_vars if v.name.startswith(name + '/discriminator/')]
46 | g_params = [v for v in train_vars if v.name.startswith(name + '/generator/')]
47 |
48 | self.opt_d = self.optimizer(learning_rate, beta, self.loss_d, d_params)
49 | self.opt_g = self.optimizer(learning_rate, beta, self.loss_g, g_params)
50 | self.saver = tf.train.Saver(tf.global_variables(), max_to_keep=None)
51 | self.init = tf.global_variables_initializer()
52 |
53 | # Launch the session
54 | config = tf.ConfigProto()
55 | config.gpu_options.allow_growth = True
56 | self.sess = tf.Session(config=config)
57 | self.sess.run(self.init)
58 |
59 | def generator_noise_distribution(self, n_samples, ndims, mu, sigma):
60 | return np.random.normal(mu, sigma, (n_samples, ndims))
61 |
62 | def _single_epoch_train(self, train_data, batch_size, noise_params={}, adaptive=None):
63 | '''
64 | see: http://blog.aylien.com/introduction-generative-adversarial-networks-code-tensorflow/
65 | http://wiseodd.github.io/techblog/2016/09/17/gan-tensorflow/
66 | '''
67 | n_examples = train_data.num_examples
68 | epoch_loss_d = 0.
69 | epoch_loss_g = 0.
70 | batch_size = batch_size
71 | n_batches = int(n_examples / batch_size)
72 | start_time = time.time()
73 | updated_d = 0
74 | # Loop over all batches
75 | _real_s = []
76 | _fake_s = []
77 | is_training(True, session=self.sess)
78 | try:
79 | for _ in xrange(n_batches):
80 | feed, _, _ = train_data.next_batch(batch_size)
81 | # Update discriminator.
82 | z = self.generator_noise_distribution(batch_size, self.noise_dim, **noise_params)
83 | feed_dict = {self.real_pc: feed, self.noise: z}
84 | if adaptive is not None:
85 | s1 = tf.reduce_mean(self.real_prob)
86 | s2 = tf.reduce_mean(1 - self.synthetic_prob)
87 | sr, sf = self.sess.run([s1, s2], feed_dict=feed_dict)
88 | _real_s.append(sr)
89 | _fake_s.append(sf)
90 | if np.mean([sr, sf]) < adaptive:
91 | loss_d, _ = self.sess.run([self.loss_d, self.opt_d], feed_dict=feed_dict)
92 | updated_d += 1
93 | epoch_loss_d += loss_d
94 | else:
95 | loss_d, _ = self.sess.run([self.loss_d, self.opt_d], feed_dict=feed_dict)
96 | updated_d += 1
97 | epoch_loss_d += loss_d
98 | # Update generator.
99 | loss_g, _ = self.sess.run([self.loss_g, self.opt_g], feed_dict=feed_dict)
100 | # Compute average loss
101 | # epoch_loss_d += loss_d
102 | epoch_loss_g += loss_g
103 | is_training(False, session=self.sess)
104 | except Exception:
105 | raise
106 | finally:
107 | is_training(False, session=self.sess)
108 |
109 | # epoch_loss_d /= n_batches
110 | if updated_d > 1:
111 | epoch_loss_d /= updated_d
112 | else:
113 | print 'Discriminator was not updated in this epoch.'
114 |
115 | if adaptive is not None:
116 | print np.mean(_real_s), np.mean(_fake_s)
117 |
118 | epoch_loss_g /= n_batches
119 | duration = time.time() - start_time
120 | return (epoch_loss_d, epoch_loss_g), duration
121 |
--------------------------------------------------------------------------------
/task_generation/latent_3d_points/src/tf_utils.py:
--------------------------------------------------------------------------------
1 | '''
2 | Created on November 26, 2017
3 |
4 | @author: optas
5 | '''
6 |
7 | import tensorflow as tf
8 | import numpy as np
9 |
10 |
11 | def expand_scope_by_name(scope, name):
12 | """ expand tf scope by given name.
13 | """
14 |
15 | if isinstance(scope, basestring):
16 | scope += '/' + name
17 | return scope
18 |
19 | if scope is not None:
20 | return scope.name + '/' + name
21 | else:
22 | return scope
23 |
24 |
25 | def replicate_parameter_for_all_layers(parameter, n_layers):
26 | if parameter is not None and len(parameter) != n_layers:
27 | if len(parameter) != 1:
28 | raise ValueError()
29 | parameter = np.array(parameter)
30 | parameter = parameter.repeat(n_layers).tolist()
31 | return parameter
32 |
33 |
34 | def reset_tf_graph():
35 | ''' Reset's all variables of default-tf graph. Useful for jupyter.
36 | '''
37 | if 'sess' in globals() and sess:
38 | sess.close()
39 | tf.reset_default_graph()
40 |
41 |
42 | def leaky_relu(alpha):
43 | if not (alpha < 1 and alpha > 0):
44 | raise ValueError()
45 |
46 | return lambda x: tf.maximum(alpha * x, x)
47 |
48 |
49 | def safe_log(x, eps=1e-12):
50 | return tf.log(tf.maximum(x, eps))
--------------------------------------------------------------------------------
/task_generation/latent_3d_points/src/vanilla_gan.py:
--------------------------------------------------------------------------------
1 | '''
2 | Created on 2018
3 |
4 | Author: Achlioptas Panos (Github ID: optas)
5 | '''
6 |
7 | import numpy as np
8 | import time
9 | import tensorflow as tf
10 |
11 | from tflearn import is_training
12 | from . gan import GAN
13 |
14 |
15 | class Vanilla_GAN(GAN):
16 |
17 | def __init__(self, name, learning_rate, n_output, noise_dim, discriminator, generator, beta=0.9, gen_kwargs={}, disc_kwargs={}, graph=None):
18 |
19 | GAN.__init__(self, name, graph)
20 |
21 | self.noise_dim = noise_dim
22 | self.n_output = n_output
23 | self.discriminator = discriminator
24 | self.generator = generator
25 |
26 | with tf.variable_scope(name):
27 |
28 | self.noise = tf.placeholder(tf.float32, shape=[None, noise_dim]) # Noise vector.
29 | self.gt = tf.placeholder(tf.float32, shape=[None] + self.n_output) # Ground-truth.
30 |
31 | with tf.variable_scope('generator'):
32 | self.generator_out = self.generator(self.noise, self.n_output, **gen_kwargs)
33 |
34 | with tf.variable_scope('discriminator') as scope:
35 | self.real_prob, self.real_logit = self.discriminator(self.gt, scope=scope, **disc_kwargs)
36 | self.synthetic_prob, self.synthetic_logit = self.discriminator(self.generator_out, reuse=True, scope=scope, **disc_kwargs)
37 |
38 | self.loss_d, self.loss_g = self.vanilla_gan_objective(self.real_prob, self.synthetic_prob, use_safe_log=True)
39 |
40 | train_vars = tf.trainable_variables()
41 |
42 | d_params = [v for v in train_vars if v.name.startswith(name + '/discriminator/')]
43 | g_params = [v for v in train_vars if v.name.startswith(name + '/generator/')]
44 |
45 | self.opt_d = self.optimizer(learning_rate, beta, self.loss_d, d_params)
46 | self.opt_g = self.optimizer(learning_rate, beta, self.loss_g, g_params)
47 | self.saver = tf.train.Saver(tf.global_variables(), max_to_keep=None)
48 | self.init = tf.global_variables_initializer()
49 |
50 | # Launch the session
51 | config = tf.ConfigProto()
52 | config.gpu_options.allow_growth = True
53 | self.sess = tf.Session(config=config)
54 | self.sess.run(self.init)
55 |
56 | def generator_noise_distribution(self, n_samples, ndims, mu, sigma):
57 | return np.random.normal(mu, sigma, (n_samples, ndims))
58 |
59 | def _single_epoch_train(self, train_data, batch_size, noise_params):
60 | n_examples = train_data.num_examples
61 | epoch_loss_d = 0.
62 | epoch_loss_g = 0.
63 | batch_size = batch_size
64 | n_batches = int(n_examples / batch_size)
65 | start_time = time.time()
66 |
67 | is_training(True, session=self.sess)
68 | try:
69 | # Loop over all batches
70 | for _ in xrange(n_batches):
71 | feed, _, _ = train_data.next_batch(batch_size)
72 |
73 | # Update discriminator.
74 | z = self.generator_noise_distribution(batch_size, self.noise_dim, **noise_params)
75 | feed_dict = {self.gt: feed, self.noise: z}
76 | loss_d, _ = self.sess.run([self.loss_d, self.opt_d], feed_dict=feed_dict)
77 | loss_g, _ = self.sess.run([self.loss_g, self.opt_g], feed_dict=feed_dict)
78 |
79 | # Compute average loss
80 | epoch_loss_d += loss_d
81 | epoch_loss_g += loss_g
82 |
83 | is_training(False, session=self.sess)
84 | except Exception:
85 | raise
86 | finally:
87 | is_training(False, session=self.sess)
88 |
89 | epoch_loss_d /= n_batches
90 | epoch_loss_g /= n_batches
91 | duration = time.time() - start_time
92 | return (epoch_loss_d, epoch_loss_g), duration
--------------------------------------------------------------------------------
/task_generation/latent_3d_points/src/w_gan_gp.py:
--------------------------------------------------------------------------------
1 | '''
2 | Created on May 22, 2018
3 |
4 | Author: Achlioptas Panos (Github ID: optas)
5 | '''
6 |
7 | import numpy as np
8 | import time
9 | import tensorflow as tf
10 |
11 | from tflearn import is_training
12 | from . gan import GAN
13 |
14 |
15 | class W_GAN_GP(GAN):
16 | '''Gradient Penalty.
17 | https://arxiv.org/abs/1704.00028
18 | '''
19 |
20 | def __init__(self, name, learning_rate, lam, n_output, noise_dim, discriminator, generator, beta=0.5, gen_kwargs={}, disc_kwargs={}, graph=None):
21 |
22 | GAN.__init__(self, name, graph)
23 |
24 | self.noise_dim = noise_dim
25 | self.n_output = n_output
26 | self.discriminator = discriminator
27 | self.generator = generator
28 |
29 | with tf.variable_scope(name):
30 | self.noise = tf.placeholder(tf.float32, shape=[None, noise_dim]) # Noise vector.
31 | self.real_pc = tf.placeholder(tf.float32, shape=[None] + self.n_output) # Ground-truth.
32 |
33 | with tf.variable_scope('generator'):
34 | self.generator_out = self.generator(self.noise, self.n_output, **gen_kwargs)
35 |
36 | with tf.variable_scope('discriminator') as scope:
37 | self.real_prob, self.real_logit = self.discriminator(self.real_pc, scope=scope, **disc_kwargs)
38 | self.synthetic_prob, self.synthetic_logit = self.discriminator(self.generator_out, reuse=True, scope=scope, **disc_kwargs)
39 |
40 |
41 | # Compute WGAN losses
42 | self.loss_d = tf.reduce_mean(self.synthetic_logit) - tf.reduce_mean(self.real_logit)
43 | self.loss_g = -tf.reduce_mean(self.synthetic_logit)
44 |
45 | # Compute gradient penalty at interpolated points
46 | ndims = self.real_pc.get_shape().ndims
47 | batch_size = tf.shape(self.real_pc)[0]
48 | alpha = tf.random_uniform(shape=[batch_size] + [1] * (ndims - 1), minval=0., maxval=1.)
49 | differences = self.generator_out - self.real_pc
50 | interpolates = self.real_pc + (alpha * differences)
51 |
52 | with tf.variable_scope('discriminator') as scope:
53 | gradients = tf.gradients(self.discriminator(interpolates, reuse=True, scope=scope, **disc_kwargs)[1], [interpolates])[0]
54 |
55 | # Reduce over all but the first dimension
56 | slopes = tf.sqrt(tf.reduce_sum(tf.square(gradients), reduction_indices=range(1, ndims)))
57 | gradient_penalty = tf.reduce_mean((slopes - 1.) ** 2)
58 | self.loss_d += lam * gradient_penalty
59 |
60 | train_vars = tf.trainable_variables()
61 | d_params = [v for v in train_vars if v.name.startswith(name + '/discriminator/')]
62 | g_params = [v for v in train_vars if v.name.startswith(name + '/generator/')]
63 |
64 | self.opt_d = self.optimizer(learning_rate, beta, self.loss_d, d_params)
65 | self.opt_g = self.optimizer(learning_rate, beta, self.loss_g, g_params)
66 |
67 | self.saver = tf.train.Saver(tf.global_variables(), max_to_keep=None)
68 | self.init = tf.global_variables_initializer()
69 |
70 | # Launch the session
71 | config = tf.ConfigProto()
72 | config.gpu_options.allow_growth = True
73 | self.sess = tf.Session(config=config)
74 | self.sess.run(self.init)
75 |
76 | def generator_noise_distribution(self, n_samples, ndims, mu, sigma):
77 | return np.random.normal(mu, sigma, (n_samples, ndims))
78 |
79 | def _single_epoch_train(self, train_data, batch_size, noise_params, discriminator_boost=5):
80 | '''
81 | see: http://blog.aylien.com/introduction-generative-adversarial-networks-code-tensorflow/
82 | http://wiseodd.github.io/techblog/2016/09/17/gan-tensorflow/
83 | '''
84 | n_examples = train_data.num_examples
85 | epoch_loss_d = 0.
86 | epoch_loss_g = 0.
87 | batch_size = batch_size
88 | n_batches = int(n_examples / batch_size)
89 | start_time = time.time()
90 |
91 | iterations_for_epoch = n_batches / discriminator_boost
92 |
93 | is_training(True, session=self.sess)
94 | try:
95 | # Loop over all batches
96 | for _ in xrange(iterations_for_epoch):
97 | for _ in range(discriminator_boost):
98 | feed, _, _ = train_data.next_batch(batch_size)
99 | z = self.generator_noise_distribution(batch_size, self.noise_dim, **noise_params)
100 | feed_dict = {self.real_pc: feed, self.noise: z}
101 | _, loss_d = self.sess.run([self.opt_d, self.loss_d], feed_dict=feed_dict)
102 | epoch_loss_d += loss_d
103 |
104 | # Update generator.
105 | z = self.generator_noise_distribution(batch_size, self.noise_dim, **noise_params)
106 | feed_dict = {self.noise: z}
107 | _, loss_g = self.sess.run([self.opt_g, self.loss_g], feed_dict=feed_dict)
108 | epoch_loss_g += loss_g
109 |
110 | is_training(False, session=self.sess)
111 | except Exception:
112 | raise
113 | finally:
114 | is_training(False, session=self.sess)
115 | epoch_loss_d /= (iterations_for_epoch * discriminator_boost)
116 | epoch_loss_g /= iterations_for_epoch
117 | duration = time.time() - start_time
118 | return (epoch_loss_d, epoch_loss_g), duration
119 |
--------------------------------------------------------------------------------
/task_generation/train_latent_gan.py:
--------------------------------------------------------------------------------
1 | """
2 | This module is used to train a Generative Adversarial Network (GAN) on the latent
3 | codes of a given class. The GAN consists of a generator and a discriminator, both
4 | with two layers, which are imported from the `latent_3d_points.src.generators_discriminators` module.
5 |
6 | The training data is derived from the embeddings obtained by nerf2vec.
7 |
8 | The output of the training process is stored in a directory specified by `paths.GENERATION_OUT_DIR`,
9 | with the directory name formatted to include the class index.
10 |
11 | The code in this module is based on the code from the repository: https://github.com/optas/latent_3d_points
12 |
13 | """
14 | import os
15 | import sys
16 | script_dir = os.path.dirname(os.path.abspath(__file__))
17 | parent_dir = os.path.dirname(script_dir)
18 | sys.path.append(parent_dir)
19 | import settings
20 |
21 | import os.path as osp
22 |
23 | import numpy as np
24 | from task_generation.latent_3d_points.src.generators_discriminators import (
25 | latent_code_discriminator_two_layers,
26 | latent_code_generator_two_layers,
27 | )
28 | from task_generation.latent_3d_points.src.in_out import PointCloudDataSet, create_dir
29 | from task_generation.latent_3d_points.src.tf_utils import reset_tf_graph
30 | from task_generation.latent_3d_points.src.w_gan_gp import W_GAN_GP
31 |
32 | from nerf2vec import config as nerf2vec_config
33 |
34 |
35 | def train(class_idx=0):
36 |
37 | experiment_name = 'nerf2vec_{}'.format(class_idx)
38 | top_out_dir = settings.GENERATION_OUT_DIR.format(experiment_name)
39 | embedding_size = 1024
40 | n_epochs = 2000
41 | n_syn_samples = 1000 # how many synthetic samples to produce at each save step
42 | saver_step = np.hstack([np.array([1, 5, 10]), np.arange(50, n_epochs + 1, 50)])
43 |
44 | latent_codes_path = os.path.join(settings.GENERATION_EMBEDDING_DIR, "embeddings_{}.npz".format(class_idx))
45 | latent_codes = np.load(latent_codes_path)["embeddings"]
46 | latent_data = PointCloudDataSet(latent_codes)
47 | print(latent_data.num_examples)
48 |
49 | # optimization parameters
50 | init_lr = 0.0001
51 | batch_size = 50
52 | noise_params = {"mu": 0, "sigma": 0.2}
53 | beta = 0.5 # ADAM's momentum
54 |
55 | train_dir = osp.join(top_out_dir, "latent_gan_ckpts")
56 | create_dir(train_dir)
57 | synthetic_data_out_dir = osp.join(top_out_dir, "generated_embeddings")
58 | create_dir(synthetic_data_out_dir)
59 |
60 | reset_tf_graph()
61 |
62 | gan = W_GAN_GP(
63 | experiment_name,
64 | init_lr,
65 | 10,
66 | [embedding_size],
67 | embedding_size,
68 | latent_code_discriminator_two_layers,
69 | latent_code_generator_two_layers,
70 | beta=beta,
71 | )
72 |
73 | print("Start")
74 |
75 | for _ in range(n_epochs):
76 | loss, duration = gan._single_epoch_train(latent_data, batch_size, noise_params)
77 | epoch = int(gan.sess.run(gan.increment_epoch))
78 | print("epoch:", epoch, "loss:", loss)
79 |
80 | if epoch in saver_step:
81 | checkpoint_path = osp.join(train_dir, "epoch_" + str(epoch) + ".ckpt")
82 | gan.saver.save(gan.sess, checkpoint_path, global_step=gan.epoch)
83 |
84 | syn_latent_data = gan.generate(n_syn_samples, noise_params)
85 | np.savez(
86 | osp.join(synthetic_data_out_dir, "epoch_" + str(epoch) + ".npz"),
87 | embeddings=syn_latent_data,
88 | )
89 |
90 | def main():
91 | # Train a GAN for each class
92 | for class_idx in range(nerf2vec_config.NUM_CLASSES):
93 | train(class_idx)
94 |
95 | if __name__ == "__main__":
96 | main()
--------------------------------------------------------------------------------
/task_generation/viz_nerf.py:
--------------------------------------------------------------------------------
1 | import os
2 | import sys
3 | script_dir = os.path.dirname(os.path.abspath(__file__))
4 | parent_dir = os.path.dirname(script_dir)
5 | sys.path.append(parent_dir)
6 | import settings
7 |
8 | import uuid
9 | import math
10 | import torch
11 | import numpy as np
12 | import imageio.v2 as imageio
13 |
14 | from random import randint
15 | from nerf2vec.utils import get_rays
16 |
17 | from torch.cuda.amp import autocast
18 | from models.idecoder import ImplicitDecoder
19 | from nerf.utils import Rays, render_image
20 | from nerf2vec import config as nerf2vec_config
21 |
22 |
23 | @torch.no_grad()
24 | def draw_images(decoder, embeddings, device='cuda:0', class_idx=0):
25 |
26 | scene_aabb = torch.tensor(nerf2vec_config.GRID_AABB, dtype=torch.float32, device=device)
27 | render_step_size = (
28 | (scene_aabb[3:] - scene_aabb[:3]).max()
29 | * math.sqrt(3)
30 | / nerf2vec_config.GRID_CONFIG_N_SAMPLES
31 | ).item()
32 | rays = get_rays(device)
33 |
34 | # WHITE BACKGROUND
35 | color_bkgd = torch.ones((1,3), device=device)
36 |
37 | img_name = str(uuid.uuid4())
38 | plots_path = os.path.join('task_generation', f'GAN_plots_{class_idx}')
39 | os.makedirs(plots_path, exist_ok=True)
40 |
41 | for idx, emb in enumerate(embeddings):
42 | emb = torch.tensor(emb, device=device, dtype=torch.float32)
43 | emb = emb.unsqueeze(dim=0)
44 | with autocast():
45 | rgb_A, alpha, b, c, _, _ = render_image(
46 | radiance_field=decoder,
47 | embeddings=emb,
48 | occupancy_grid=None,
49 | rays=Rays(origins=rays.origins.unsqueeze(dim=0), viewdirs=rays.viewdirs.unsqueeze(dim=0)),
50 | scene_aabb=scene_aabb,
51 | render_step_size=render_step_size,
52 | render_bkgd=color_bkgd,
53 | grid_weights=None,
54 | device=device
55 | )
56 |
57 | imageio.imwrite(
58 | os.path.join(plots_path, f'{img_name}_{idx}.png'),
59 | (rgb_A.squeeze(dim=0).cpu().detach().numpy() * 255).astype(np.uint8)
60 | )
61 |
62 |
63 | @torch.no_grad()
64 | def create_renderings_from_GAN_embeddings(device='cuda:0', class_idx=0, n_images=10):
65 |
66 | # Init nerf2vec
67 | decoder = ImplicitDecoder(
68 | embed_dim=nerf2vec_config.ENCODER_EMBEDDING_DIM,
69 | in_dim=nerf2vec_config.DECODER_INPUT_DIM,
70 | hidden_dim=nerf2vec_config.DECODER_HIDDEN_DIM,
71 | num_hidden_layers_before_skip=nerf2vec_config.DECODER_NUM_HIDDEN_LAYERS_BEFORE_SKIP,
72 | num_hidden_layers_after_skip=nerf2vec_config.DECODER_NUM_HIDDEN_LAYERS_AFTER_SKIP,
73 | out_dim=nerf2vec_config.DECODER_OUT_DIM,
74 | encoding_conf=nerf2vec_config.INSTANT_NGP_ENCODING_CONF,
75 | aabb=torch.tensor(nerf2vec_config.GRID_AABB, dtype=torch.float32, device=device)
76 | )
77 | decoder.eval()
78 | decoder = decoder.to(device)
79 |
80 | ckpt_path = settings.GENERATION_NERF2VEC_FULL_CKPT_PATH
81 | print(f'loading weights: {ckpt_path}')
82 | ckpt = torch.load(ckpt_path)
83 | decoder.load_state_dict(ckpt["decoder"])
84 |
85 | latent_gan_embeddings_path = settings.GENERATION_LATENT_GAN_FULL_CKPT_PATH.format(class_idx)
86 | embeddings = np.load(latent_gan_embeddings_path)["embeddings"]
87 | embeddings = torch.from_numpy(embeddings)
88 |
89 | for _ in range(0, n_images):
90 | idx = randint(0, embeddings.shape[0]-1)
91 | emb = embeddings[idx].unsqueeze(0).cuda()
92 | draw_images(decoder, emb, device, class_idx)
93 |
94 |
95 | def main() -> None:
96 | # Create renderings for each class
97 | for class_idx in range(0, nerf2vec_config.NUM_CLASSES):
98 | create_renderings_from_GAN_embeddings(device=settings.DEVICE_NAME, class_idx=class_idx, n_images=10)
99 |
100 | if __name__ == "__main__":
101 | main()
--------------------------------------------------------------------------------
/task_interp_and_retrieval/interp.py:
--------------------------------------------------------------------------------
1 | import os
2 | import sys
3 | script_dir = os.path.dirname(os.path.abspath(__file__))
4 | parent_dir = os.path.dirname(script_dir)
5 | sys.path.append(parent_dir)
6 | import settings
7 |
8 | import math
9 | import uuid
10 | import torch
11 | import numpy as np
12 | import imageio.v2 as imageio
13 |
14 | from pathlib import Path
15 | from random import randint
16 | from nerf2vec import config as nerf2vec_config
17 | from nerf2vec.train_nerf2vec import NeRFDataset
18 | from nerf2vec.utils import get_class_label_from_nerf_root_path
19 | from models.encoder import Encoder
20 | from models.idecoder import ImplicitDecoder
21 | from nerf.utils import Rays, render_image
22 | from torch.cuda.amp import autocast
23 |
24 |
25 | def draw_images(
26 | rays,
27 | color_bkgds,
28 | embeddings,
29 | decoder,
30 | scene_aabb,
31 | render_step_size,
32 | curr_folder_path,
33 | device):
34 |
35 | for idx in range(len(embeddings)):
36 | with autocast():
37 | rgb, _, _, _, _, _ = render_image(
38 | radiance_field=decoder,
39 | embeddings=embeddings[idx].unsqueeze(dim=0),
40 | occupancy_grid=None,
41 | rays=Rays(origins=rays.origins.unsqueeze(dim=0), viewdirs=rays.viewdirs.unsqueeze(dim=0)),
42 | scene_aabb=scene_aabb,
43 | render_step_size=render_step_size,
44 | render_bkgd=color_bkgds.unsqueeze(dim=0),
45 | grid_weights=None,
46 | device=device
47 | )
48 |
49 | img_name = f'{idx}.png'
50 | full_path = os.path.join(curr_folder_path, img_name)
51 |
52 | imageio.imwrite(
53 | full_path,
54 | (rgb.cpu().detach().numpy()[0] * 255).astype(np.uint8)
55 | )
56 |
57 |
58 | @torch.no_grad()
59 | def do_interpolation(device = 'cuda:0', split = nerf2vec_config.TRAIN_SPLIT):
60 | scene_aabb = torch.tensor(nerf2vec_config.GRID_AABB, dtype=torch.float32, device=device)
61 | render_step_size = (
62 | (scene_aabb[3:] - scene_aabb[:3]).max()
63 | * math.sqrt(3)
64 | / nerf2vec_config.GRID_CONFIG_N_SAMPLES
65 | ).item()
66 |
67 |
68 | ckpts_path = Path(settings.NERF2VEC_CKPTS_PATH)
69 | ckpt_paths = [p for p in ckpts_path.glob("*.pt") if "best" not in p.name]
70 | ckpt_path = ckpt_paths[0]
71 | ckpt = torch.load(ckpt_path)
72 |
73 | print(f'loaded weights: {ckpt_path}')
74 |
75 | encoder = Encoder(
76 | nerf2vec_config.MLP_UNITS,
77 | nerf2vec_config.ENCODER_HIDDEN_DIM,
78 | nerf2vec_config.ENCODER_EMBEDDING_DIM
79 | )
80 | encoder.load_state_dict(ckpt["encoder"])
81 | encoder = encoder.cuda()
82 | encoder.eval()
83 |
84 | decoder = ImplicitDecoder(
85 | embed_dim=nerf2vec_config.ENCODER_EMBEDDING_DIM,
86 | in_dim=nerf2vec_config.DECODER_INPUT_DIM,
87 | hidden_dim=nerf2vec_config.DECODER_HIDDEN_DIM,
88 | num_hidden_layers_before_skip=nerf2vec_config.DECODER_NUM_HIDDEN_LAYERS_BEFORE_SKIP,
89 | num_hidden_layers_after_skip=nerf2vec_config.DECODER_NUM_HIDDEN_LAYERS_AFTER_SKIP,
90 | out_dim=nerf2vec_config.DECODER_OUT_DIM,
91 | encoding_conf=nerf2vec_config.INSTANT_NGP_ENCODING_CONF,
92 | aabb=torch.tensor(nerf2vec_config.GRID_AABB, dtype=torch.float32, device=device)
93 | )
94 | decoder.load_state_dict(ckpt["decoder"])
95 | decoder = decoder.cuda()
96 | decoder.eval()
97 |
98 | dset_json_path = get_dset_json_path(split)
99 | dset = NeRFDataset(dset_json_path, device='cpu')
100 |
101 | n_images = 0
102 | max_images = 100
103 |
104 | while n_images < max_images:
105 | idx_A = randint(0, len(dset) - 1)
106 | _, test_nerf_A, matrices_unflattened_A, matrices_flattened_A, _, data_dir_A, _, _ = dset[idx_A]
107 | class_id_A = get_class_label_from_nerf_root_path(data_dir_A)
108 |
109 | # Ignore augmented samples
110 | if is_nerf_augmented(data_dir_A):
111 | continue
112 | matrices_unflattened_A = matrices_unflattened_A['mlp_base.params']
113 |
114 | class_id_B = -1
115 | while class_id_B != class_id_A:
116 | idx_B = randint(0, len(dset) - 1)
117 | _, _, matrices_unflattened_B, matrices_flattened_B, _, data_dir_B, _, _ = dset[idx_B]
118 | class_id_B = get_class_label_from_nerf_root_path(data_dir_B)
119 |
120 | if is_nerf_augmented(data_dir_B):
121 | continue
122 | matrices_unflattened_B = matrices_unflattened_B['mlp_base.params']
123 |
124 | print(f'Progress: {n_images}/{max_images}')
125 |
126 | matrices_flattened_A = matrices_flattened_A.cuda().unsqueeze(0)
127 | matrices_flattened_B = matrices_flattened_B.cuda().unsqueeze(0)
128 |
129 | with autocast():
130 | embedding_A = encoder(matrices_flattened_A).squeeze(0)
131 | embedding_B = encoder(matrices_flattened_B).squeeze(0)
132 |
133 |
134 | embeddings = [embedding_A]
135 | for gamma in [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]:
136 | emb_interp = (1 - gamma) * embedding_A + gamma * embedding_B
137 | embeddings.append(emb_interp)
138 | embeddings.append(embedding_B)
139 |
140 | curr_folder_path = os.path.join('task_interp_and_retrieval', f'interp_plots_{split}', str(uuid.uuid4()))
141 | os.makedirs(curr_folder_path, exist_ok=True)
142 |
143 | rays = test_nerf_A['rays']
144 | rays = rays._replace(origins=rays.origins.cuda(), viewdirs=rays.viewdirs.cuda())
145 |
146 | # WHITE BACKGROUND
147 | color_bkgds = torch.ones(test_nerf_A['color_bkgd'].shape)
148 | color_bkgds = color_bkgds.cuda()
149 |
150 | # Interpolation
151 | draw_images(
152 | rays,
153 | color_bkgds,
154 | embeddings,
155 | decoder,
156 | scene_aabb,
157 | render_step_size,
158 | curr_folder_path,
159 | device
160 | )
161 |
162 | n_images += 1
163 |
164 | def get_dset_json_path(split):
165 | dset_json_path = settings.TRAIN_DSET_JSON
166 |
167 | if split == nerf2vec_config.VAL_SPLIT:
168 | dset_json_path = settings.VAL_DSET_JSON
169 | else:
170 | dset_json_path = settings.TEST_DSET_JSON
171 |
172 |
173 | return dset_json_path
174 |
175 | def is_nerf_augmented(data_dir):
176 | return "_A1" in data_dir or "_A2" in data_dir
177 |
178 | def main() -> None:
179 | do_interpolation(device=settings.DEVICE_NAME, split=nerf2vec_config.TRAIN_SPLIT)
180 |
181 | if __name__ == "__main__":
182 | main()
--------------------------------------------------------------------------------
/task_interp_and_retrieval/retrieval.py:
--------------------------------------------------------------------------------
1 | import os
2 | import sys
3 | script_dir = os.path.dirname(os.path.abspath(__file__))
4 | parent_dir = os.path.dirname(script_dir)
5 | sys.path.append(parent_dir)
6 | import settings
7 |
8 |
9 | import math
10 | import uuid
11 |
12 | import h5py
13 | import torch
14 | import numpy as np
15 | import imageio.v2 as imageio
16 |
17 | from pathlib import Path
18 | from typing import Dict, List, Tuple
19 | from collections import defaultdict
20 |
21 | from torch import Tensor
22 | from torch.cuda.amp import autocast
23 | from torch.utils.data import Dataset
24 |
25 | from nerf.utils import Rays, render_image
26 | from nerf2vec import config as nerf2vec_config
27 | from nerf2vec.utils import get_latest_checkpoints_path, get_rays
28 |
29 | from sklearn.neighbors import KDTree
30 | from models.idecoder import ImplicitDecoder
31 |
32 |
33 | class InrEmbeddingDataset(Dataset):
34 | def __init__(self, root: Path, split: str) -> None:
35 | super().__init__()
36 |
37 | self.root = root / split
38 | self.item_paths = sorted(self.root.glob("*.h5"), key=lambda x: int(x.stem))
39 |
40 | def __len__(self) -> int:
41 | return len(self.item_paths)
42 |
43 | def __getitem__(self, index: int) -> Tuple[Tensor, Tensor]:
44 | with h5py.File(self.item_paths[index], "r") as f:
45 | embedding = np.array(f.get("embedding"))
46 | embedding = torch.from_numpy(embedding)
47 | class_id = np.array(f.get("class_id"))
48 | class_id = torch.from_numpy(class_id).long()
49 |
50 | return embedding, class_id
51 |
52 |
53 | @torch.no_grad()
54 | def draw_images(decoder, embeddings, plots_path, device):
55 |
56 | scene_aabb = torch.tensor(nerf2vec_config.GRID_AABB, dtype=torch.float32, device=device)
57 | render_step_size = (
58 | (scene_aabb[3:] - scene_aabb[:3]).max()
59 | * math.sqrt(3)
60 | / nerf2vec_config.GRID_CONFIG_N_SAMPLES
61 | ).item()
62 |
63 | rays = get_rays(device)
64 |
65 | # WHITE BACKGROUND
66 | color_bkgd = torch.ones((1,3), device=device)
67 |
68 | img_name = str(uuid.uuid4())
69 |
70 | for idx, emb in enumerate(embeddings):
71 | emb = torch.tensor(emb, device=device, dtype=torch.float32)
72 | emb = emb.unsqueeze(dim=0)
73 | with autocast():
74 | rgb_A, alpha, b, c, _, _ = render_image(
75 | radiance_field=decoder,
76 | embeddings=emb,
77 | occupancy_grid=None,
78 | rays=Rays(origins=rays.origins.unsqueeze(dim=0), viewdirs=rays.viewdirs.unsqueeze(dim=0)),
79 | scene_aabb=scene_aabb,
80 | render_step_size=render_step_size,
81 | render_bkgd=color_bkgd,
82 | grid_weights=None,
83 | device=device
84 | )
85 |
86 | imageio.imwrite(
87 | os.path.join(plots_path, f'{img_name}_{idx}.png'),
88 | (rgb_A.squeeze(dim=0).cpu().detach().numpy() * 255).astype(np.uint8)
89 | )
90 |
91 | print(f' {img_name}_{idx}.png saved')
92 |
93 |
94 | @torch.no_grad()
95 | def get_recalls(gallery: Tensor,
96 | labels_gallery: Tensor,
97 | kk: List[int], decoder,
98 | plots_path: str,
99 | device:str) -> Dict[int, float]:
100 | max_nn = max(kk)
101 | recalls = {idx: 0.0 for idx in kk}
102 | targets = labels_gallery.cpu().numpy()
103 | gallery = gallery.cpu().numpy()
104 | tree = KDTree(gallery)
105 |
106 | dic_renderings = defaultdict(int)
107 |
108 | for query, label_query in zip(gallery, targets):
109 | with torch.no_grad():
110 | query = np.expand_dims(query, 0)
111 | _, indices_matched = tree.query(query, k=max_nn + 1)
112 | indices_matched = indices_matched[0]
113 |
114 | # Draw the query and the first N neighbours
115 | if dic_renderings[label_query] < 10:
116 | print(f'Generating images for class {label_query}...')
117 | draw_images(decoder, gallery[indices_matched], plots_path, device)
118 | dic_renderings[label_query] += 1
119 |
120 | for k in kk:
121 | indices_matched_temp = indices_matched[1 : k + 1]
122 | classes_matched = targets[indices_matched_temp]
123 | recalls[k] += np.count_nonzero(classes_matched == label_query) > 0
124 |
125 | for key, value in recalls.items():
126 | recalls[key] = value / (1.0 * len(gallery))
127 |
128 | return recalls
129 |
130 | @torch.no_grad()
131 | def do_retrieval(device='cuda:0', split=nerf2vec_config.TEST_SPLIT):
132 |
133 | # Init nerf2vec
134 | decoder = ImplicitDecoder(
135 | embed_dim=nerf2vec_config.ENCODER_EMBEDDING_DIM,
136 | in_dim=nerf2vec_config.DECODER_INPUT_DIM,
137 | hidden_dim=nerf2vec_config.DECODER_HIDDEN_DIM,
138 | num_hidden_layers_before_skip=nerf2vec_config.DECODER_NUM_HIDDEN_LAYERS_BEFORE_SKIP,
139 | num_hidden_layers_after_skip=nerf2vec_config.DECODER_NUM_HIDDEN_LAYERS_AFTER_SKIP,
140 | out_dim=nerf2vec_config.DECODER_OUT_DIM,
141 | encoding_conf=nerf2vec_config.INSTANT_NGP_ENCODING_CONF,
142 | aabb=torch.tensor(nerf2vec_config.GRID_AABB, dtype=torch.float32, device=device)
143 | )
144 | decoder.eval()
145 | decoder = decoder.to(device)
146 |
147 | ckpt_path = get_latest_checkpoints_path(Path(settings.NERF2VEC_CKPTS_PATH))
148 | print(f'loading weights: {ckpt_path}')
149 | ckpt = torch.load(ckpt_path)
150 | decoder.load_state_dict(ckpt["decoder"])
151 |
152 | dset_root = Path(settings.NERF2VEC_EMBEDDINGS_DIR)
153 | dset = InrEmbeddingDataset(dset_root, split)
154 |
155 | embeddings = []
156 | labels = []
157 |
158 | for i in range(len(dset)):
159 | embedding, label = dset[i]
160 | embeddings.append(embedding)
161 | labels.append(label)
162 |
163 | embeddings = torch.stack(embeddings)
164 | labels = torch.stack(labels)
165 |
166 | plots_path = os.path.join('task_interp_and_retrieval', f'retrieval_plots_{split}')
167 | os.makedirs(plots_path, exist_ok=True)
168 |
169 | recalls = get_recalls(embeddings, labels, [1, 5, 10], decoder, plots_path, device)
170 | for key, value in recalls.items():
171 | print(f"Recall@{key} : {100. * value:.2f}%")
172 |
173 |
174 | def main() -> None:
175 | do_retrieval(device=settings.DEVICE_NAME, split=nerf2vec_config.TEST_SPLIT)
176 |
177 | if __name__ == "__main__":
178 | main()
--------------------------------------------------------------------------------
/task_mapping_network/README.md:
--------------------------------------------------------------------------------
1 | # Task Mapping Network
2 | The mapping network task requires the training of the *inr2vec* framework. Please, refer to [THIS](https://github.com/CVLAB-Unibo/inr2vec?tab=readme-ov-file#setup) page to properly configure your environment.
3 |
4 | In order to complete this task, it is necessary to execute some operations following a specific order.
5 |
6 | ## 1) Create point clouds
7 | This step is necessary to create the dataset on which *inr2vec* will be trained. It is important to update the variable *shapenet_root* found in *task_mapping_network/cfg/pcd_dataset.yaml*. This variable should point to the root of the *ShapeNet* folder.
8 |
9 | Then, execute the following command:
10 | ```bash
11 | python task_mapping_network/inr2vec/create_point_clouds_dataset.py
12 | ```
13 |
14 | ## 2) Create INRs dataset
15 | Create the INRs dataset by executing the following command:
16 | ```bash
17 | python task_mapping_network/inr2vec/create_inrs_dataset.py
18 | ```
19 | The file *task_mapping_network/cfg/inrs_dataset.yaml* contains all the configurations used for this step.
20 |
21 | ## 3) Train *inr2vec*
22 | Train *inr2vec* with the following command:
23 | ```bash
24 | python task_mapping_network/inr2vec/train_inr2vec.py
25 | ```
26 | The file *task_mapping_network/cfg/inr2vec.yaml* contains all the configurations used for this step.
27 |
28 | ## 4) Export *inr2vec* and *nerf2vec* embeddings
29 | Create embeddings that will be properly organized to train the mapping network:
30 | ```bash
31 | python task_mapping_network/export_inrs_embeddings.py
32 | python task_mapping_network/export_nerfs_embeddings.py
33 | ```
34 |
35 | The file *task_mapping_network/cfg/export_embeddings.yaml* contains all the configurations used for this step.
36 |
37 | ## 5) Train the mapping network
38 | Train the mapping network:
39 | ```bash
40 | python task_mapping_network/train_completion.py
41 | ```
42 | The file *task_mapping_network/cfg/completion.yaml* contains all the configurations used for this step.
43 |
44 |
45 | ## 6) Export results
46 | Visualize the results by executing:
47 | ```bash
48 | python task_mapping_network/viz.py
49 | ```
50 | The file *task_mapping_network/cfg/completion.yaml* contains all the configurations used for this step.
51 |
52 | The results will be saved in the *task_mapping_network/completion_plots* folder.
53 |
54 |
--------------------------------------------------------------------------------
/task_mapping_network/cfg/completion.yaml:
--------------------------------------------------------------------------------
1 | # IMPORTANT: Use relative paths to the root of the project or absolute paths
2 |
3 | out_root: "task_mapping_network/train"
4 |
5 | inrs_dset_root: "task_mapping_network/inr_embeddings"
6 | nerfs_dset_root: "task_mapping_network/nerf_embeddings"
7 | pcd_root: "task_mapping_network/point_clouds"
8 |
9 | train_split: "train"
10 | train_bs: 256
11 |
12 | val_split: "val"
13 | val_bs: 16
14 |
15 | test_split: "test"
16 |
17 | embedding_dim: 1024
18 | num_layers_transfer: 8
19 |
20 | nerf2vec_decoder_ckpt_path: "nerf2vec/train/ckpts/499.pt"
21 | inr2vec_decoder_ckpt_path: "task_mapping_network/inr2vec/train/ckpts/299.pt"
22 | completion_ckpt_path: "task_mapping_network/train/ckpts/299.pt"
23 |
24 | inr_decoder:
25 | input_dim: 3
26 | hidden_dim: 512
27 | num_hidden_layers_before_skip: 2
28 | num_hidden_layers_after_skip: 2
29 | out_dim: 1
30 |
31 | nerf_decoder:
32 | input_dim: 3
33 | hidden_dim: 1024
34 | num_hidden_layers_before_skip: 2
35 | num_hidden_layers_after_skip: 2
36 | out_dim: 4
37 |
38 | lr: 1e-4
39 | wd: 1e-4
40 | num_epochs: 300
--------------------------------------------------------------------------------
/task_mapping_network/cfg/export_embeddings.yaml:
--------------------------------------------------------------------------------
1 | # IMPORTANT: Use relative paths to the root of the project or absolute paths
2 |
3 | inrs_root: "task_mapping_network/inrs"
4 | ckpt_path: "task_mapping_network/inr2vec/train/299.pt"
5 | out_root: "task_mapping_network/inr_embeddings"
6 |
7 | nerf2vec_ckpt_path: "nerf2vec/train/ckpts/499.pt"
8 | nerf2vec_train_json_path: "data/train.json"
9 | nerf2vec_val_json_path: "data/val.json"
10 | nerf2vec_test_json_path: "data/test.json"
11 | nerf_out_root: "task_mapping_network/nerf_embeddings"
12 |
13 | encoder:
14 | hidden_dims: [512, 512, 1024, 1024]
15 | embedding_dim: 1024
16 |
17 | mlp:
18 | hidden_dim: 512
19 | num_hidden_layers: 4
20 |
21 | train_split: "train"
22 | val_split: "val"
23 | test_split: "test"
24 |
--------------------------------------------------------------------------------
/task_mapping_network/cfg/inr2vec.yaml:
--------------------------------------------------------------------------------
1 | # IMPORTANT: Use relative paths to the root of the project or absolute paths
2 |
3 | inrs_root: task_mapping_network/inrs
4 | out_root: "task_mapping_network/inr2vec/train"
5 |
6 | num_queries_on_surface: 3_500
7 | stds: [0.003, 0.01, 0.1]
8 | num_points_per_std: [3_500,2_000,500,500]
9 |
10 | encoder:
11 | hidden_dims: [512, 512, 1024, 1024]
12 | embedding_dim: 1024
13 |
14 | decoder:
15 | input_dim: 3
16 | hidden_dim: 512
17 | num_hidden_layers_before_skip: 2
18 | num_hidden_layers_after_skip: 2
19 | out_dim: 1
20 |
21 | mlp:
22 | hidden_dim: 512
23 | num_hidden_layers: 4
24 |
25 | train_split: "train"
26 | val_split: "val"
27 |
28 | train_bs: 16
29 | val_bs: 16
30 |
31 | lr: 1e-4
32 | wd: 1e-2
33 | num_epochs: 300
34 |
--------------------------------------------------------------------------------
/task_mapping_network/cfg/inrs_dataset.yaml:
--------------------------------------------------------------------------------
1 | # IMPORTANT: Use relative paths to the root of the project or absolute paths
2 |
3 | pcd_root: "task_mapping_network/point_clouds"
4 | split_json_root_path: "data"
5 |
6 | splits: ["train", "validation", "test"]
7 | num_points_pcd: 2048
8 |
9 | num_required_train_shapes: 100_000
10 |
11 | num_queries_on_surface: 100_000
12 | stds: [0.003, 0.01, 0.1]
13 | num_points_per_std: [250_000, 200_000, 25_000, 25_000]
14 |
15 | num_points_fitting: 10_000
16 | num_parallel_mlps: 16
17 |
18 | mlp:
19 | hidden_dim: 512
20 | num_hidden_layers: 4
21 | init_path: "task_mapping_network/inits/in3_out1_h512_l4.pt"
22 |
23 | num_steps: 500
24 | lr: 1e-4
25 |
26 | out_root: "task_mapping_network/inrs"
27 |
--------------------------------------------------------------------------------
/task_mapping_network/cfg/pcd_dataset.yaml:
--------------------------------------------------------------------------------
1 | # IMPORTANT: Use relative paths to the root of the project or absolute paths
2 |
3 | splits: ["train", "validation", "test"]
4 |
5 | split_json_root_path: "data"
6 | out_point_clouds_path: "mapping_network/point_clouds"
7 | shapenet_root: "/media/data7/dsirocchi/ShapeNetCore.v1"
8 |
--------------------------------------------------------------------------------
/task_mapping_network/export_inrs_embeddings.py:
--------------------------------------------------------------------------------
1 | import os
2 | import sys
3 | script_dir = os.path.dirname(os.path.abspath(__file__))
4 | parent_dir = os.path.dirname(script_dir)
5 | sys.path.append(parent_dir)
6 | import settings
7 |
8 | from pathlib import Path
9 | from typing import Any, Dict, Tuple
10 |
11 | import h5py
12 | import numpy as np
13 | import torch
14 | from hesiod import hcfg, hmain
15 | from pycarus.learning.models.siren import SIREN
16 | from pycarus.utils import progress_bar
17 | from torch import Tensor
18 | from torch.utils.data import DataLoader, Dataset
19 |
20 | from task_mapping_network.inr2vec.models.encoder import Encoder
21 | from task_mapping_network.inr2vec.utils import get_mlp_params_as_matrix
22 |
23 |
24 | class InrDataset(Dataset):
25 | def __init__(self, inrs_root: Path, split: str, sample_sd: Dict[str, Any]) -> None:
26 | super().__init__()
27 |
28 | self.inrs_root = inrs_root / split
29 | self.mlps_paths = sorted(self.inrs_root.glob("*.h5"), key=lambda x: int(x.stem))
30 | self.sample_sd = sample_sd
31 |
32 | def __len__(self) -> int:
33 | return len(self.mlps_paths)
34 |
35 | def __getitem__(self, index: int) -> Tuple[Tensor, Tensor, Tensor]:
36 | with h5py.File(self.mlps_paths[index], "r") as f:
37 | pcd = torch.from_numpy(np.array(f.get("pcd")))
38 | params = np.array(f.get("params"))
39 | params = torch.from_numpy(params).float()
40 | matrix = get_mlp_params_as_matrix(params, self.sample_sd)
41 | class_id = torch.from_numpy(np.array(f.get("class_id"))).long()
42 | uuid = f.get("uuid")[()].decode()
43 |
44 | return pcd, matrix, class_id, uuid
45 |
46 |
47 | @hmain(
48 | base_cfg_dir="cfg/bases",
49 | template_cfg_file="task_mapping_network/cfg/export_embeddings.yaml",
50 | create_out_dir=False,
51 | out_dir_root="task_mapping_network/logs"
52 | )
53 | def main() -> None:
54 |
55 | inrs_root = Path(hcfg("inrs_root", str))
56 |
57 | mlp_hdim = hcfg("mlp.hidden_dim", int)
58 | num_hidden_layers = hcfg("mlp.num_hidden_layers", int)
59 | mlp = SIREN(3, mlp_hdim, num_hidden_layers, 1)
60 | sample_sd = mlp.state_dict()
61 |
62 | train_split = hcfg("train_split", str)
63 | train_dset = InrDataset(inrs_root, train_split, sample_sd)
64 | train_loader = DataLoader(train_dset, batch_size=1, num_workers=0, shuffle=False)
65 |
66 | val_split = hcfg("val_split", str)
67 | val_dset = InrDataset(inrs_root, val_split, sample_sd)
68 | val_loader = DataLoader(val_dset, batch_size=1, num_workers=0, shuffle=False)
69 |
70 | test_split = hcfg("test_split", str)
71 | test_dset = InrDataset(inrs_root, test_split, sample_sd)
72 | test_loader = DataLoader(test_dset, batch_size=1, num_workers=0, shuffle=False)
73 |
74 | encoder_cfg = hcfg("encoder", Dict[str, Any])
75 | encoder = Encoder(
76 | mlp_hdim,
77 | encoder_cfg["hidden_dims"],
78 | encoder_cfg["embedding_dim"],
79 | )
80 | ckpt = torch.load(hcfg("ckpt_path", str), map_location="cpu")
81 | encoder.load_state_dict(ckpt["encoder"])
82 | encoder = encoder.cuda()
83 | encoder.eval()
84 |
85 | loaders = [train_loader, val_loader, test_loader]
86 | splits = [train_split, val_split, test_split]
87 |
88 | for loader, split in zip(loaders, splits):
89 | idx = 0
90 |
91 | for batch in progress_bar(loader, f"{split}"):
92 |
93 | # Limit the number of samples in the train set to 32414, which corresponds to the number non-augmented samples
94 | if split == 'train' and idx == 32414:
95 | break
96 |
97 | pcds, matrices, class_ids, uuids = batch
98 | matrices = matrices.cuda()
99 |
100 | with torch.no_grad():
101 | embedding = encoder(matrices)
102 |
103 | h5_path = Path(hcfg("out_root", str)) / Path(f"{split}") / f"{idx}.h5"
104 | h5_path.parent.mkdir(parents=True, exist_ok=True)
105 |
106 | with h5py.File(h5_path, "w") as f:
107 | f.create_dataset("pcd", data=pcds[0].detach().cpu().numpy())
108 | f.create_dataset("embedding", data=embedding[0].detach().cpu().numpy())
109 | f.create_dataset("class_id", data=class_ids[0].detach().cpu().numpy())
110 | f.create_dataset("uuid", data=uuids[0])
111 |
112 | idx += 1
113 |
114 | if __name__ == "__main__":
115 | main()
--------------------------------------------------------------------------------
/task_mapping_network/export_nerfs_embeddings.py:
--------------------------------------------------------------------------------
1 | import os
2 | import sys
3 | script_dir = os.path.dirname(os.path.abspath(__file__))
4 | parent_dir = os.path.dirname(script_dir)
5 | sys.path.append(parent_dir)
6 | import settings
7 |
8 | import os
9 | import json
10 | import h5py
11 | import torch
12 |
13 | from hesiod import hmain
14 | from pathlib import Path
15 | from typing import Tuple
16 | from torch import Tensor
17 | from torch.utils.data import DataLoader, Dataset
18 | from hesiod import hcfg, hmain
19 |
20 | from models.encoder import Encoder
21 | from nerf2vec import config as nerf2vec_config
22 | from nerf2vec.utils import get_class_label, get_mlp_params_as_matrix
23 |
24 |
25 | class InrDataset(Dataset):
26 | def __init__(self, split_json: str, device: str, nerf_weights_file_name: str) -> None:
27 | super().__init__()
28 |
29 | with open(split_json) as file:
30 | self.nerf_paths = json.load(file)
31 | self.nerf_paths = sorted(self.nerf_paths)
32 |
33 | assert isinstance(self.nerf_paths, list), 'The json file provided is not a list.'
34 |
35 | self.device = device
36 | self.nerf_weights_file_name = nerf_weights_file_name
37 |
38 | def __len__(self) -> int:
39 | return len(self.nerf_paths)
40 |
41 | def __getitem__(self, index: int) -> Tuple[Tensor, Tensor, Tensor]:
42 |
43 | data_dir = self.nerf_paths[index]
44 | weights_file_path = os.path.join(data_dir, self.nerf_weights_file_name)
45 |
46 | class_id = nerf2vec_config.LABELS_TO_IDS[get_class_label(weights_file_path)]
47 |
48 | matrix = torch.load(weights_file_path, map_location=torch.device(self.device))
49 | matrix = get_mlp_params_as_matrix(matrix['mlp_base.params'])
50 |
51 | return matrix, class_id, data_dir
52 |
53 | @hmain(
54 | base_cfg_dir="cfg/bases",
55 | template_cfg_file="task_mapping_network/cfg/export_embeddings.yaml",
56 | create_out_dir=False,
57 | out_dir_root="task_mapping_network/logs"
58 | )
59 | def export_embeddings():
60 |
61 | device = settings.DEVICE_NAME
62 |
63 | train_dset_json = hcfg("nerf2vec_train_json_path", str)
64 | train_dset = InrDataset(train_dset_json, device='cpu', nerf_weights_file_name=nerf2vec_config.NERF_WEIGHTS_FILE_NAME)
65 | train_loader = DataLoader(train_dset, batch_size=1, num_workers=0, shuffle=False)
66 |
67 | """
68 | val_dset_json = settings.VAL_DSET_JSON
69 | val_dset = InrDataset(val_dset_json, device='cpu', nerf_weights_file_name=config.NERF_WEIGHTS_FILE_NAME)
70 | val_loader = DataLoader(val_dset, batch_size=1, num_workers=0, shuffle=False)
71 |
72 | test_dset_json = settings.TEST_DSET_JSON
73 | test_dset = InrDataset(test_dset_json, device='cpu', nerf_weights_file_name=config.NERF_WEIGHTS_FILE_NAME)
74 | test_loader = DataLoader(test_dset, batch_size=1, num_workers=0, shuffle=False)
75 | """
76 |
77 | encoder = Encoder(
78 | nerf2vec_config.MLP_UNITS,
79 | nerf2vec_config.ENCODER_HIDDEN_DIM,
80 | nerf2vec_config.ENCODER_EMBEDDING_DIM
81 | )
82 | encoder = encoder.to(device)
83 | ckpt = torch.load(hcfg("nerf2vec_ckpt_path", str))
84 | encoder.load_state_dict(ckpt["encoder"])
85 | encoder.eval()
86 |
87 | loaders = [train_loader] # , val_loader, test_loader]
88 | splits = [nerf2vec_config.TRAIN_SPLIT] #, config.VAL_SPLIT, config.TEST_SPLIT]
89 |
90 |
91 | for loader, split in zip(loaders, splits):
92 | idx = 0
93 |
94 | for batch in loader:
95 | matrices, class_ids, data_dirs = batch
96 | matrices = matrices.cuda()
97 |
98 | with torch.no_grad():
99 | embeddings = encoder(matrices)
100 |
101 | out_root = Path(hcfg("nerf_out_root", str))
102 | h5_path = out_root / Path(f"{split}") / f"{idx}.h5"
103 | h5_path.parent.mkdir(parents=True, exist_ok=True)
104 |
105 | with h5py.File(h5_path, "w") as f:
106 |
107 | p = Path(data_dirs[0])
108 | uuid = p.parts[-1].replace('.ply','')
109 |
110 | f.create_dataset("data_dir", data=data_dirs[0])
111 | f.create_dataset("embedding", data=embeddings[0].detach().cpu().numpy())
112 | f.create_dataset("class_id", data=class_ids[0].detach().cpu().numpy())
113 | f.create_dataset("uuid", data=uuid)
114 |
115 | idx += 1
116 |
117 | if idx % 5000 == 0:
118 | print(f'Created {idx} embeddings for {split} split')
119 |
120 | if __name__ == "__main__":
121 | export_embeddings()
--------------------------------------------------------------------------------
/task_mapping_network/inits/in3_out1_h512_l4.pt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CVLAB-Unibo/nf2vec/7e10220040b89404914d3a8317d39a8ac7337a8d/task_mapping_network/inits/in3_out1_h512_l4.pt
--------------------------------------------------------------------------------
/task_mapping_network/inr2vec/create_inrs_dataset.py:
--------------------------------------------------------------------------------
1 | import os
2 | import sys
3 | script_dir = os.path.dirname(os.path.abspath(__file__))
4 | parent_dir = os.path.dirname(script_dir)
5 | project_root_dir = os.path.dirname(parent_dir)
6 | sys.path.append(parent_dir)
7 | sys.path.append(project_root_dir)
8 |
9 | import settings
10 |
11 | from math import ceil
12 | from pathlib import Path
13 | from typing import Callable, List
14 |
15 | import h5py
16 | import torch
17 | import torch.nn.functional as F
18 | from hesiod import hcfg, hmain
19 | from pycarus.datasets.ply import PlyDataset
20 | from pycarus.geometry.pcd import compute_udf_from_pcd, farthest_point_sampling
21 | from pycarus.geometry.pcd import random_point_sampling, shuffle_pcd
22 | from pycarus.learning.models.siren import SIREN
23 | from pycarus.transforms.pcd import JitterPcd, NormalizePcdIntoUnitSphere, RandomScalePcd
24 | from pycarus.utils import progress_bar
25 | from torch.optim import Adam
26 | from torch.utils.data import DataLoader, Dataset
27 |
28 | from task_mapping_network.inr2vec.utils import get_mlps_batched_params, mlp_batched_forward
29 |
30 |
31 | class InrsDatasetCreator:
32 | def __init__(self) -> None:
33 |
34 | self.split_json_root_path = hcfg('split_json_root_path')
35 | self.pcd_root = Path(hcfg("pcd_root", str))
36 |
37 | self.splits = hcfg("splits", List[str])
38 | self.num_points_pcd = hcfg("num_points_pcd", int)
39 |
40 | self.num_queries_on_surface = hcfg("num_queries_on_surface", int)
41 | self.stds = hcfg("stds", List[float])
42 | self.num_points_per_std = hcfg("num_points_per_std", List[int])
43 |
44 | self.num_required_train_shapes = hcfg("num_required_train_shapes", int)
45 |
46 | dset = self.get_dataset("train")
47 | num_train_shapes = len(dset)
48 | self.num_augmentations = ceil(self.num_required_train_shapes / num_train_shapes) - 1
49 |
50 | self.num_points_fitting = hcfg("num_points_fitting", int)
51 | self.num_parallel_mlps = hcfg("num_parallel_mlps", int)
52 | self.hdim = hcfg("mlp.hidden_dim", int)
53 | self.num_hidden_layers = hcfg("mlp.num_hidden_layers", int)
54 | self.mlp_init_path = Path(hcfg("mlp.init_path", str))
55 |
56 | self.num_steps = hcfg("num_steps", int)
57 | self.lr = hcfg("lr", float)
58 |
59 | self.out_root = Path(hcfg("out_root", str))
60 | self.out_root.mkdir(parents=True)
61 |
62 |
63 | def build_mlp(self) -> SIREN:
64 | mlp = SIREN(
65 | input_dim=3,
66 | hidden_dim=self.hdim,
67 | num_hidden_layers=self.num_hidden_layers,
68 | out_dim=1,
69 | )
70 |
71 | mlp.load_state_dict(torch.load(self.mlp_init_path))
72 |
73 | return mlp
74 |
75 |
76 | def get_dataset(self, split: str, transforms: List[Callable] = []) -> Dataset:
77 | dset = PlyDataset(self.pcd_root, split, transforms)
78 | return dset
79 |
80 | def create_dataset(self) -> None:
81 |
82 | for split in self.splits:
83 | global_idx = 0
84 |
85 | augs = [False]
86 | if "train" in split:
87 | augs += [True] * self.num_augmentations
88 |
89 | for aug_idx, aug in enumerate(augs):
90 | if aug:
91 | transforms = [
92 | RandomScalePcd(2 / 3, 3 / 2),
93 | JitterPcd(sigma=0.01, clip=0.05),
94 | NormalizePcdIntoUnitSphere(),
95 | ]
96 | else:
97 | transforms = [NormalizePcdIntoUnitSphere()]
98 |
99 | dset = self.get_dataset(split, transforms)
100 |
101 | loader = DataLoader(
102 | dset,
103 | batch_size=self.num_parallel_mlps,
104 | shuffle=False,
105 | num_workers=8,
106 | )
107 |
108 | desc = f"Fitting {split} set ({aug_idx + 1}/{len(augs)})"
109 | for batch in progress_bar(loader, desc, 80):
110 | pcds, class_ids, uuids = batch
111 |
112 | bs = pcds.shape[0]
113 | pcds = pcds.cuda()
114 |
115 | if pcds.shape[1] != self.num_points_pcd:
116 | pcds = farthest_point_sampling(pcds, self.num_points_pcd)
117 |
118 | coords = []
119 | labels = []
120 | for idx in range(bs):
121 | pcd_coords, pcd_labels = compute_udf_from_pcd(
122 | pcds[idx],
123 | self.num_queries_on_surface,
124 | self.stds,
125 | self.num_points_per_std,
126 | coords_range=(-1, 1),
127 | convert_to_bce_labels=True,
128 | )
129 | coords.append(pcd_coords)
130 | labels.append(pcd_labels)
131 |
132 | coords = torch.stack(coords, dim=0)
133 | labels = torch.stack(labels, dim=0)
134 |
135 | coords_and_labels = torch.cat((coords, labels.unsqueeze(-1)), dim=-1).cuda()
136 | coords_and_labels = shuffle_pcd(coords_and_labels)
137 |
138 | mlps = [self.build_mlp().cuda() for _ in range(bs)]
139 | batched_params = get_mlps_batched_params(mlps)
140 |
141 | optimizer = Adam(batched_params, lr=self.lr)
142 |
143 | for _ in progress_bar(range(self.num_steps)):
144 | selected_c_and_l = random_point_sampling(
145 | coords_and_labels,
146 | self.num_points_fitting,
147 | )
148 |
149 | selected_coords = selected_c_and_l[:, :, :3]
150 | selected_labels = selected_c_and_l[:, :, 3]
151 |
152 | pred = mlp_batched_forward(batched_params, selected_coords)
153 | loss = F.binary_cross_entropy_with_logits(pred, selected_labels)
154 |
155 | optimizer.zero_grad()
156 | loss.backward()
157 | optimizer.step()
158 |
159 | for idx in range(bs):
160 | pcd = pcds[idx]
161 | class_id = class_ids[idx]
162 | uuid = uuids[idx]
163 |
164 | flattened_params = [p[idx].view(-1) for p in batched_params]
165 | flattened_params = torch.cat(flattened_params, dim=0)
166 |
167 | h5_path = self.out_root / split / f"{global_idx}.h5"
168 | h5_path.parent.mkdir(parents=True, exist_ok=True)
169 |
170 | with h5py.File(h5_path, "w") as f:
171 | f.create_dataset("pcd", data=pcd.detach().cpu().numpy())
172 | f.create_dataset("params", data=flattened_params.detach().cpu().numpy())
173 | f.create_dataset("class_id", data=class_id.detach().cpu().numpy())
174 | f.create_dataset("uuid", data=uuid)
175 |
176 | global_idx += 1
177 |
178 | @hmain(base_cfg_dir="cfg/bases", template_cfg_file="task_mapping_network/cfg/inrs_dataset.yaml", create_out_dir=False)
179 | def create() -> None:
180 | dset_creator = InrsDatasetCreator()
181 | dset_creator.create_dataset()
182 |
183 | if __name__ == "__main__":
184 | create()
--------------------------------------------------------------------------------
/task_mapping_network/inr2vec/create_point_clouds_dataset.py:
--------------------------------------------------------------------------------
1 | import json
2 | import os
3 | from anyio import Path
4 | from hesiod import hcfg, hmain
5 | import open3d as o3d
6 |
7 | from typing import List
8 |
9 |
10 | def get_dataset_json(root:str, split: str):
11 | json_path = os.path.join(root, f'{split}.json')
12 |
13 | folders = []
14 |
15 | with open(json_path) as file:
16 | dset = json.load(file)
17 |
18 | for nerf_path in dset:
19 | # Skip augmented data
20 | if nerf_path.endswith('_A1') or nerf_path.endswith('_A2'):
21 | continue
22 |
23 | full_path = Path(nerf_path)
24 | relative_path = os.path.join(full_path.parts[-2], full_path.parts[-1])
25 | folders.append(relative_path)
26 |
27 | return folders
28 |
29 | @hmain(
30 | base_cfg_dir="task_mapping_network/cfg/bases",
31 | template_cfg_file="task_mapping_network/cfg/pcd_dataset.yaml",
32 | run_cfg_file=None,
33 | parse_cmd_line=False,
34 | out_dir_root="task_mapping_network/logs"
35 | )
36 | def create_dataset():
37 |
38 | split_json_root_path = hcfg("split_json_root_path", str)
39 | out_point_clouds_path = hcfg("out_point_clouds_path", str)
40 | mesh_root = hcfg("shapenet_root", str)
41 |
42 |
43 | splits = hcfg("splits", List[str])
44 |
45 |
46 | for split in splits:
47 | shapes = get_dataset_json(split_json_root_path, split)
48 | for shape in shapes:
49 |
50 | mesh_class = shape.split('/')[0]
51 | mesh_id = shape.split('/')[1]
52 |
53 | mesh_path = os.path.join(mesh_root, shape, 'model.obj')
54 |
55 |
56 | mesh = o3d.io.read_triangle_mesh(mesh_path)
57 |
58 | num_points = 10000 # Adjust the number of points as needed
59 | pcd = mesh.sample_points_uniformly(number_of_points=num_points)
60 | pcd_folder = os.path.join(out_point_clouds_path, mesh_class, split)
61 | os.makedirs(pcd_folder, exist_ok=True)
62 |
63 | pcd_full_path = os.path.join(pcd_folder, f'{mesh_id}.ply')
64 |
65 | o3d.io.write_point_cloud(pcd_full_path, pcd)
66 |
67 |
68 | if __name__ == "__main__":
69 | create_dataset()
70 |
71 |
72 |
73 |
74 |
--------------------------------------------------------------------------------
/task_mapping_network/inr2vec/models/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CVLAB-Unibo/nf2vec/7e10220040b89404914d3a8317d39a8ac7337a8d/task_mapping_network/inr2vec/models/__init__.py
--------------------------------------------------------------------------------
/task_mapping_network/inr2vec/models/encoder.py:
--------------------------------------------------------------------------------
1 | from typing import List
2 |
3 | import torch
4 | from torch import nn
5 |
6 |
7 | class Encoder(nn.Module):
8 | def __init__(self, input_dim: int, hidden_dims: List[int], embed_dim: int) -> None:
9 | super().__init__()
10 |
11 | layers = []
12 | for idx in range(len(hidden_dims)):
13 | in_ch = input_dim if idx == 0 else hidden_dims[idx - 1]
14 | out_ch = hidden_dims[idx]
15 | layers.append(nn.Conv1d(in_ch, out_ch, 1))
16 | layers.append(nn.BatchNorm1d(out_ch))
17 | layers.append(nn.ReLU())
18 |
19 | layers.append(nn.Conv1d(hidden_dims[-1], embed_dim, 1))
20 |
21 | self.layers = nn.Sequential(*layers)
22 | self.embed_dim = embed_dim
23 |
24 | def forward(self, x: torch.Tensor) -> torch.Tensor:
25 | x_channels_first = torch.transpose(x, 2, 1)
26 | x = self.layers(x_channels_first)
27 | x, _ = torch.max(x, 2)
28 |
29 | return x
--------------------------------------------------------------------------------
/task_mapping_network/inr2vec/models/idecoder.py:
--------------------------------------------------------------------------------
1 | from typing import Callable, Tuple
2 |
3 | import torch
4 | from einops import repeat
5 | from torch import Tensor, nn
6 |
7 |
8 | class CoordsEncoder:
9 | def __init__(
10 | self,
11 | input_dims: int = 3,
12 | include_input: bool = True,
13 | max_freq_log2: int = 9,
14 | num_freqs: int = 10,
15 | log_sampling: bool = True,
16 | periodic_fns: Tuple[Callable, Callable] = (torch.sin, torch.cos),
17 | ) -> None:
18 | self.input_dims = input_dims
19 | self.include_input = include_input
20 | self.max_freq_log2 = max_freq_log2
21 | self.num_freqs = num_freqs
22 | self.log_sampling = log_sampling
23 | self.periodic_fns = periodic_fns
24 | self.create_embedding_fn()
25 |
26 | def create_embedding_fn(self) -> None:
27 | embed_fns = []
28 | d = self.input_dims
29 | out_dim = 0
30 | if self.include_input:
31 | embed_fns.append(lambda x: x)
32 | out_dim += d
33 |
34 | if self.log_sampling:
35 | freq_bands = 2.0 ** torch.linspace(0.0, self.max_freq_log2, steps=self.num_freqs)
36 | else:
37 | freq_bands = torch.linspace(2.0**0.0, 2.0**self.max_freq_log2, steps=self.num_freqs)
38 |
39 | for freq in freq_bands:
40 | for p_fn in self.periodic_fns:
41 | embed_fns.append(lambda x, p_fn=p_fn, freq=freq: p_fn(x * freq))
42 | out_dim += d
43 |
44 | self.embed_fns = embed_fns
45 | self.out_dim = out_dim
46 |
47 | def embed(self, inputs: Tensor) -> Tensor:
48 | return torch.cat([fn(inputs) for fn in self.embed_fns], -1)
49 |
50 |
51 | class ImplicitDecoder(nn.Module):
52 | def __init__(
53 | self,
54 | embed_dim: int,
55 | in_dim: int,
56 | hidden_dim: int,
57 | num_hidden_layes_before_skip: int,
58 | num_hidden_layes_after_skip: int,
59 | out_dim: int,
60 | ) -> None:
61 | super().__init__()
62 |
63 | self.coords_enc = CoordsEncoder(in_dim)
64 | coords_dim = self.coords_enc.out_dim
65 |
66 | self.in_layer = nn.Sequential(nn.Linear(embed_dim + coords_dim, hidden_dim), nn.ReLU())
67 |
68 | self.skip_proj = nn.Sequential(nn.Linear(embed_dim + coords_dim, hidden_dim), nn.ReLU())
69 |
70 | before_skip = []
71 | for _ in range(num_hidden_layes_before_skip):
72 | before_skip.append(nn.Sequential(nn.Linear(hidden_dim, hidden_dim), nn.ReLU()))
73 | self.before_skip = nn.Sequential(*before_skip)
74 |
75 | after_skip = []
76 | for _ in range(num_hidden_layes_after_skip):
77 | after_skip.append(nn.Sequential(nn.Linear(hidden_dim, hidden_dim), nn.ReLU()))
78 | after_skip.append(nn.Linear(hidden_dim, out_dim))
79 | self.after_skip = nn.Sequential(*after_skip)
80 |
81 | def forward(self, embeddings: Tensor, coords: Tensor) -> Tensor:
82 | # embeddings (B, D1)
83 | # coords (B, N, D2)
84 | coords = self.coords_enc.embed(coords)
85 |
86 | repeated_embeddings = repeat(embeddings, "b d -> b n d", n=coords.shape[1])
87 |
88 | emb_and_coords = torch.cat([repeated_embeddings, coords], dim=-1)
89 |
90 | x = self.in_layer(emb_and_coords)
91 | x = self.before_skip(x)
92 |
93 | inp_proj = self.skip_proj(emb_and_coords)
94 | x = x + inp_proj
95 |
96 | x = self.after_skip(x)
97 |
98 | return x.squeeze(-1)
--------------------------------------------------------------------------------
/task_mapping_network/inr2vec/models/transfer.py:
--------------------------------------------------------------------------------
1 | from torch import Tensor, nn
2 |
3 |
4 | class Transfer(nn.Module):
5 | def __init__(self, emb_dim: int, num_layers: int) -> None:
6 | super().__init__()
7 |
8 | layers = []
9 | for i in range(num_layers):
10 | layers.append(nn.Linear(emb_dim, emb_dim))
11 |
12 | if i != num_layers - 1:
13 | layers.append(nn.BatchNorm1d(emb_dim))
14 | layers.append(nn.ReLU())
15 |
16 | self.net = nn.Sequential(*layers)
17 |
18 | def forward(self, x: Tensor) -> Tensor:
19 | return self.net(x)
--------------------------------------------------------------------------------
/task_mapping_network/inr2vec/utils.py:
--------------------------------------------------------------------------------
1 | import collections
2 | from typing import Any, Dict, List
3 |
4 | import torch
5 | import torch.nn.functional as F
6 | from pycarus.learning.models.siren import SIREN
7 | from torch import Tensor
8 |
9 |
10 | def get_mlps_batched_params(mlps: List[SIREN]) -> List[Tensor]:
11 | params = []
12 | for i in range(len(mlps)):
13 | params.append(list(mlps[i].parameters()))
14 |
15 | batched_params = []
16 | for i in range(len(params[0])):
17 | p = torch.stack([p[i] for p in params], dim=0)
18 | p = torch.clone(p.detach())
19 | p.requires_grad = True
20 | batched_params.append(p)
21 |
22 | return batched_params
23 |
24 |
25 | def flatten_mlp_params(sd: Dict[str, Any]) -> Tensor:
26 | all_params = []
27 | for k in sd:
28 | all_params.append(sd[k].view(-1))
29 | all_params = torch.cat(all_params, dim=-1)
30 | return all_params
31 |
32 |
33 | def unflatten_mlp_params(
34 | params: Tensor,
35 | sample_sd: Dict[str, Any],
36 | ) -> Dict[str, Any]:
37 | sd = collections.OrderedDict()
38 |
39 | start = 0
40 | for k in sample_sd:
41 | end = start + sample_sd[k].numel()
42 | layer_params = params[start:end].view(sample_sd[k].shape)
43 | sd[k] = layer_params
44 | start = end
45 |
46 | return sd
47 |
48 |
49 | def get_mlp_params_as_matrix(flattened_params: Tensor, sd: Dict[str, Any]) -> Tensor:
50 | params_shapes = [p.shape for p in sd.values()]
51 | feat_dim = params_shapes[0][0]
52 | start = params_shapes[0].numel() + params_shapes[1].numel()
53 | end = params_shapes[-1].numel() + params_shapes[-2].numel()
54 | params = flattened_params[start:-end]
55 | return params.reshape((-1, feat_dim))
56 |
57 |
58 | def mlp_batched_forward(batched_params: List[Tensor], coords: Tensor) -> Tensor:
59 | num_layers = len(batched_params) // 2
60 |
61 | f = coords
62 |
63 | for i in range(num_layers):
64 | weights = batched_params[i * 2]
65 | biases = batched_params[i * 2 + 1]
66 |
67 | f = torch.bmm(f, weights.permute(0, 2, 1)) + biases.unsqueeze(1)
68 |
69 | if i < num_layers - 1:
70 | f = torch.sin(30 * f)
71 |
72 | return f.squeeze(-1)
73 |
74 |
75 | def focal_loss(pred: Tensor, gt: Tensor, alpha: float = 0.1, gamma: float = 3) -> Tensor:
76 | alpha_w = torch.tensor([alpha, 1 - alpha]).cuda()
77 |
78 | bce_loss = F.binary_cross_entropy_with_logits(pred, gt.float(), reduction="none")
79 | bce_loss = bce_loss.view(-1)
80 |
81 | gt = gt.type(torch.long)
82 | at = alpha_w.gather(0, gt.view(-1))
83 | pt = torch.exp(-bce_loss)
84 | f_loss = at * ((1 - pt) ** gamma) * bce_loss
85 |
86 | return f_loss.mean()
87 |
88 |
89 | def get_class_to_parts(dset_name: str) -> Dict[str, List[int]]:
90 | shapenet_partseg = {
91 | "02691156": [0, 1, 2, 3],
92 | "02773838": [4, 5],
93 | "02954340": [6, 7],
94 | "02958343": [8, 9, 10, 11],
95 | "03001627": [12, 13, 14, 15],
96 | "03261776": [16, 17, 18],
97 | "03467517": [19, 20, 21],
98 | "03624134": [22, 23],
99 | "03636649": [24, 25, 26, 27],
100 | "03642806": [28, 29],
101 | "03790512": [30, 31, 32, 33, 34, 35],
102 | "03797390": [36, 37],
103 | "03948459": [38, 39, 40],
104 | "04099429": [41, 42, 43],
105 | "04225987": [44, 45, 46],
106 | "04379243": [47, 48, 49],
107 | }
108 |
109 | gallery = {"shapenet-partseg": shapenet_partseg}
110 |
111 | return gallery[dset_name]
--------------------------------------------------------------------------------