├── .gitignore
├── CodeSLAM.ipynb
├── Makefile
├── README.md
├── U-Net
    ├── Unet.py
    ├── compute_errors.py
    └── image_utils.py
├── preprocessing.py
├── read_protobuf.py
├── requirements.txt
└── scenenet.proto


/.gitignore:
--------------------------------------------------------------------------------
 1 | # datasets
 2 | data/*
 3 | 
 4 | # vscode changes
 5 | .vscode
 6 | 
 7 | # Byte-compiled / optimized / DLL files
 8 | __pycache__/
 9 | *.py[cod]
10 | *$py.class
11 | scenenet_pb2.py
12 | 
13 | # C extensions
14 | *.so
15 | 
16 | # Distribution / packaging
17 | .Python
18 | env/
19 | build/
20 | develop-eggs/
21 | dist/
22 | downloads/
23 | eggs/
24 | .eggs/
25 | lib/
26 | lib64/
27 | parts/
28 | sdist/
29 | var/
30 | *.egg-info/
31 | .installed.cfg
32 | *.egg
33 | 
34 | # PyInstaller
35 | #  Usually these files are written by a python script from a template
36 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
37 | *.manifest
38 | *.spec
39 | 
40 | # Installer logs
41 | pip-log.txt
42 | pip-delete-this-directory.txt
43 | 
44 | # Unit test / coverage reports
45 | htmlcov/
46 | .tox/
47 | .coverage
48 | .coverage.*
49 | .cache
50 | nosetests.xml
51 | coverage.xml
52 | *,cover
53 | .hypothesis/
54 | 
55 | # Translations
56 | *.mo
57 | *.pot
58 | 
59 | # Django stuff:
60 | *.log
61 | local_settings.py
62 | 
63 | # Flask stuff:
64 | instance/
65 | .webassets-cache
66 | 
67 | # Scrapy stuff:
68 | .scrapy
69 | 
70 | # Sphinx documentation
71 | docs/_build/
72 | 
73 | # PyBuilder
74 | target/
75 | 
76 | # IPython Notebook
77 | .ipynb_checkpoints
78 | 
79 | # pyenv
80 | .python-version
81 | 
82 | # celery beat schedule file
83 | celerybeat-schedule
84 | 
85 | # dotenv
86 | .env
87 | 
88 | # virtualenv
89 | venv/
90 | ENV/
91 | 
92 | # Spyder project settings
93 | .spyderproject
94 | 
95 | # Rope project settings
96 | .ropeproject


--------------------------------------------------------------------------------
/CodeSLAM.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |   "nbformat": 4,
 3 |   "nbformat_minor": 0,
 4 |   "metadata": {
 5 |     "colab": {
 6 |       "name": "CodeSLAM.ipynb",
 7 |       "provenance": [],
 8 |       "include_colab_link": true
 9 |     },
10 |     "kernelspec": {
11 |       "name": "python3",
12 |       "display_name": "Python 3"
13 |     }
14 |   },
15 |   "cells": [
16 |     {
17 |       "cell_type": "markdown",
18 |       "metadata": {
19 |         "id": "view-in-github",
20 |         "colab_type": "text"
21 |       },
22 |       "source": [
23 |         "<a href=\"https://colab.research.google.com/github/silviutroscot/CodeSLAM/blob/master/CodeSLAM.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
24 |       ]
25 |     },
26 |     {
27 |       "cell_type": "markdown",
28 |       "metadata": {
29 |         "id": "8_azmJjSK6Fu",
30 |         "colab_type": "text"
31 |       },
32 |       "source": [
33 |         "#CodeSLAM\n",
34 |         "\n",
35 |         "## Abstract"
36 |       ]
37 |     },
38 |     {
39 |       "cell_type": "code",
40 |       "metadata": {
41 |         "id": "gTygVZ5SIC4O",
42 |         "colab_type": "code",
43 |         "colab": {}
44 |       },
45 |       "source": [
46 |         "from torch import vis\n",
47 |         "import matplotlib as plt\n",
48 |         "import torch"
49 |       ],
50 |       "execution_count": 0,
51 |       "outputs": []
52 |     }
53 |   ]
54 | }


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
1 | all:
2 | 	protoc --python_out=./ scenenet.proto
3 | 
4 | clean:
5 | 	$(RM) scenenet_pb2.py
6 | 	$(RM) -r __pycache__


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # CodeSLAM
 2 | 
 3 | PyTorch implementation of [CodeSLAM - Learning a Compact, Optimisable Representation for Dense Visual SLAM](https://arxiv.org/pdf/1804.00874.pdf).
 4 | 
 5 | ## Summary
 6 | 
 7 | ### Problems it tries to tackle/solve
 8 | - Representation of geometry in real 3D perception systems.
 9 | 	- <u>Dense</u> representations, possibly augmented with semantic labels are high dimensional and unsuitable for probabilistic inference.
10 | 	- <u>Sparse</u> representations, which avoid these problems but capture only partial scene information.
11 | 
12 | ### The new approach/solution
13 | - New compact but dense representation of scene geometry, conditioned on the intensity data from a single image and generated from a code consisting of a small number of parameters.
14 | - Each keyframe can produce a <u>depth map</u>, but the code can be optimised jointly with pose variables and with the codes of overlapping keyframes, for global consistency.
15 | 
16 | ### Introduction
17 | - As the uncertainty propagation quickly becomes intractable for large degrees of freedom, the approaches on SLAM are split into 2 categories:
18 | 	- <u>sparse</u> SLAM, representing geometry by a sparse set of features
19 | 	- <u>dense</u> SLAM, that attempts to retrieve a more complete description of the environment.
20 | - The geometry of natural scenes exhibits a high degree of order, so we may not need a <u>large number of params</U> to represent it.
21 | - Besides that, a scene could be <u>decomposed into a set of semantic objects</u> (e.g a chair) together with some <u>internal params</u> (e.g. size of chair, no of legs) and a pose. Other more general scene elements, which exhibit simple regularity, can be recognised and parametrised within SLAM systems.
22 | - A straightforward AE might oversimplify the reconstruction of natural scenes, the **novelty** is to <u>condition the training on intensity images</u>.
23 | - A **scene map** consists of a set of selected and estimated <U>historical camera poses</u>  together with the <u>corresponding captured images</U> and <u>supplementary local information</u> such as depth estimates. The intensity images are usually required for additional tasks.
24 | - **Depth map estimate** becomes a function of <u>corresponding intensity image</u> and an unknown compact representation (referred to as **code**).
25 | - We can think of the image providing <u>local details</u> and the code supplying more <u>global shape params</u> and can be seen as a step towards enabling optimisation in general semantic space.
26 | - The **2 key contributions** of this paper are:
27 | 	- The derivation of a compact and optimisable representation of dense geometry by conditioning a depth autoencoder on intensity images.
28 | 	- The implementation of the first real-time targeted monocular system that achieves such a tight joint optimisation of motion and dense geometry.
29 | 
30 | ## Usage
31 | - generate the python module for the protobuf: `protoc --python_out=./ scenenet.proto`
32 | 
33 | ## Results
34 | 
35 | ## Requirements
36 | - Python 3.4+
37 | - PyTorch 1.0+
38 | - Torchvision 0.4.0+


--------------------------------------------------------------------------------
/U-Net/Unet.py:
--------------------------------------------------------------------------------
  1 | import torch.nn as nn
  2 | import torch
  3 | import os
  4 | 
  5 | def conv3x3(in_planes, out_planes, stride=1):
  6 |     """3x3 conv layer with padding."""
  7 |     return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, padding=1, bias=False)
  8 | 
  9 | class Bottleneck(nn.Module):
 10 |     expansion = 4
 11 | 
 12 |     def __init__(self, inplanes, planes, stride=1, downsample=None):
 13 |         super(Bottleneck, self).__init__()
 14 |         self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
 15 |         self.bn1 = nn.BatchNorm2d(planes)
 16 |         self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
 17 |         self.bn2 = nn.BatchNorm2d(planes)
 18 |         self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False)
 19 |         self.bn3 = nn.BatchNorm2d(planes * 4)
 20 |         self.relu = nn.ReLU(inplace=True)
 21 |         self.downsample = downsample
 22 |         self.stride = stride
 23 | 
 24 |     def forward(self, x):
 25 |         residual = x
 26 | 
 27 |         out = self.conv1(x)
 28 |         out = self.bn1(out)
 29 |         out = self.relu(out)
 30 | 
 31 |         out = self.conv2(out)
 32 |         out = self.bn2(out)
 33 |         out = self.relu(out)
 34 | 
 35 |         out = self.conv3(out)
 36 |         out = self.bn3(out)
 37 | 
 38 |         if self.downsample is not None:
 39 |             residual = self.downsample(x)
 40 | 
 41 |         out += residual
 42 |         out = self.relu(out)
 43 | 
 44 |         return out
 45 | 
 46 | def get_incoming_shape(incoming):
 47 |     size = incoming.size()
 48 |     # returns the incoming data shape as a list
 49 |     return [size[0], size[1], size[2], size[3]]
 50 | 
 51 | def interleave(tensors, axis):
 52 |     # change the first element (batch_size to -1)
 53 |     old_shape = get_incoming_shape(tensors[0])[1:]
 54 |     new_shape = [-1] + old_shape
 55 | 
 56 |     # double 1 dimension
 57 |     new_shape[axis] *= len(tensors)
 58 | 
 59 |     # pack the tensors on top of each other
 60 |     stacked = torch.stack(tensors, axis+1)
 61 | 
 62 |     # reshape and return
 63 |     reshaped = stacked.view(new_shape)
 64 |     return reshaped
 65 | 
 66 | class UnpoolingAsConvolution(nn.Module):
 67 |     def __init__(self, inplanes, planes):
 68 |         super(UnpoolingAsConvolution, self).__init__()
 69 | 
 70 |         # interleaving convolutions
 71 |         self.conv_A = nn.Conv2d(in_channels=inplanes, out_channels=planes, kernel_size=(3, 3), stride=1, padding=1)
 72 |         self.conv_B = nn.Conv2d(in_channels=inplanes, out_channels=planes, kernel_size=(2, 3), stride=1, padding=0)
 73 |         self.conv_C = nn.Conv2d(in_channels=inplanes, out_channels=planes, kernel_size=(3, 2), stride=1, padding=0)
 74 |         self.conv_D = nn.Conv2d(in_channels=inplanes, out_channels=planes, kernel_size=(2, 2), stride=1, padding=0)
 75 | 
 76 |     def forward(self, x):
 77 |         output_a = self.conv_A(x)
 78 | 
 79 |         padded_b = nn.functional.pad(x, (1, 1, 0, 1))
 80 |         output_b = self.conv_B(padded_b)
 81 | 
 82 |         padded_c = nn.functional.pad(x, (0, 1, 1, 1))
 83 |         output_c = self.conv_C(padded_c)
 84 | 
 85 |         padded_d = nn.functional.pad(x, (0, 1, 0, 1))
 86 |         output_d = self.conv_D(padded_d)
 87 | 
 88 |         left = interleave([output_a, output_b], axis=2)
 89 |         right = interleave([output_c, output_d], axis=2)
 90 |         y = interleave([left, right], axis=3)
 91 |         return y
 92 | 
 93 | class UpProjection(nn.Module):
 94 |     def __init__(self, inplanes, planes):
 95 |         super(UpProjection, self).__init__()
 96 | 
 97 |         self.unpool_main = UnpoolingAsConvolution(inplanes, planes)
 98 |         self.unpool_res = UnpoolingAsConvolution(inplanes, planes)
 99 | 
100 |         self.main_branch = nn.Sequential(
101 |             self.unpool_main,
102 |             nn.BatchNorm2d(planes),
103 |             nn.ReLU(inplace=False),
104 |             nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1),
105 |             nn.BatchNorm2d(planes)
106 |         )
107 | 
108 |         self.residual_branch = nn.Sequential(
109 |             self.unpool_res,
110 |             nn.BatchNorm2d(planes),
111 |         )
112 | 
113 |         self.relu = nn.ReLU(inplace=False)
114 | 
115 |     def forward(self, input_data):
116 |         x = self.main_branch(input_data)
117 |         res = self.residual_branch(input_data)
118 |         x += res
119 |         x = self.relu(x)
120 |         return x
121 | 
122 | class ConConv(nn.Module):
123 |     def __init__(self, inplanes_x1, inplanes_x2, planes):
124 |         super(ConConv, self).__init__()
125 |         self.conv = nn.Conv2d(inplanes_x1 + inplanes_x2, planes, kernel_size=1, bias=True)
126 | 
127 |     def forward(self, x1, x2):
128 |         x1 = torch.cat([x2, x1], dim=1)
129 |         x1 = self.conv(x1)
130 |         return x1
131 | 
132 | class ResnetUnetHybrid(nn.Module):
133 |     def __init__(self, block, layers):
134 |         self.inplanes = 64
135 | 
136 |         # resnet layers
137 |         super(ResnetUnetHybrid, self).__init__()
138 |         self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False)
139 |         self.bn1 = nn.BatchNorm2d(64)
140 |         self.relu = nn.ReLU(inplace=True)
141 |         self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
142 | 
143 |         self.layer1 = self._make_layer(block, 64, layers[0])
144 |         self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
145 |         self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
146 |         self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
147 | 
148 |         # additional up projection layers parts
149 |         self.conv2 = nn.Conv2d(2048, 1024, 1, bias=True)
150 |         self.bn2 = nn.BatchNorm2d(1024)
151 | 
152 |         self.up_proj1 = UpProjection(1024, 512)
153 |         self.up_proj2 = UpProjection(512, 256)
154 |         self.up_proj3 = UpProjection(256, 128)
155 |         self.up_proj4 = UpProjection(128, 64)
156 | 
157 |         self.drop = nn.Dropout(0.5, False)
158 |         self.conv3 = nn.Conv2d(64, 1, kernel_size=3, stride=1, padding=1, bias=True)
159 | 
160 |         # padding + concat for unet stuff
161 |         self.con_conv1 = ConConv(1024, 512, 512)
162 |         self.con_conv2 = ConConv(512, 256, 256)
163 |         self.con_conv3 = ConConv(256, 128, 128)
164 |         self.con_conv4 = ConConv(64, 64, 64)
165 | 
166 |         for m in self.modules():
167 |             if isinstance(m, nn.Conv2d):
168 |                 nn.init.normal_(m.weight, 0, 0.01)
169 | 
170 |             elif isinstance(m, nn.BatchNorm2d):
171 |                 nn.init.constant_(m.weight, 1)
172 |                 nn.init.constant_(m.bias, 0)
173 | 
174 |     def _make_layer(self, block, planes, blocks, stride=1):
175 |         downsample = None
176 |         if stride != 1 or self.inplanes != planes * block.expansion:
177 |             downsample = nn.Sequential(
178 |                 nn.Conv2d(self.inplanes, planes * block.expansion,
179 |                           kernel_size=1, stride=stride, bias=False),
180 |                 nn.BatchNorm2d(planes * block.expansion),
181 |             )
182 | 
183 |         layers = list()
184 |         layers.append(block(self.inplanes, planes, stride, downsample))
185 |         self.inplanes = planes * block.expansion
186 |         for i in range(1, blocks):
187 |             layers.append(block(self.inplanes, planes))
188 | 
189 |         return nn.Sequential(*layers)
190 | 
191 |     def forward(self, x):
192 |         x = self.conv1(x)
193 |         x = self.bn1(x)
194 |         x_to_conv4 = self.relu(x)
195 | 
196 |         x = self.maxpool(x_to_conv4)
197 |         x_to_conv3 = self.layer1(x)
198 |         x_to_conv2 = self.layer2(x_to_conv3)
199 |         x_to_conv1 = self.layer3(x_to_conv2)
200 |         x = self.layer4(x_to_conv1)
201 | 
202 |         # additional layers
203 |         x = self.conv2(x)
204 |         x = self.bn2(x)
205 | 
206 |         # up project part
207 |         x = self.up_proj1(x)
208 |         x = self.con_conv1(x, x_to_conv1)
209 | 
210 |         x = self.up_proj2(x)
211 |         x = self.con_conv2(x, x_to_conv2)
212 | 
213 |         x = self.up_proj3(x)
214 |         x = self.con_conv3(x, x_to_conv3)
215 | 
216 |         x = self.up_proj4(x)
217 |         x = self.con_conv4(x, x_to_conv4)
218 | 
219 |         x = self.drop(x)
220 |         x = self.conv3(x)
221 |         x = self.relu(x)
222 | 
223 |         return x
224 | 
225 |     @classmethod
226 |     def load_pretrained(cls, device, load_path='hyb_net_weights.model'):
227 |         model = cls(Bottleneck, [3, 4, 6, 3])
228 | 
229 |         # download the weights if they are not present
230 |         if not os.path.exists(load_path):
231 |             print('Downloading model weights...')
232 |             os.system('wget https://www.dropbox.com/s/amad4ko9opi4kts/hyb_net_weights.model')
233 | 
234 |         model = model.to(device)
235 |         model.load_state_dict(torch.load(load_path, map_location=device))
236 | 
237 |         return model


--------------------------------------------------------------------------------
/U-Net/compute_errors.py:
--------------------------------------------------------------------------------
 1 | import glob
 2 | import os
 3 | import sys
 4 | import cv2
 5 | import numpy as np
 6 | import torch
 7 | import torch.nn.functional as F
 8 | from Unet import ResnetUnetHybrid
 9 | import image_utils
10 | 
11 | subfolders = range(0, 1000)
12 | test_dir = '../data/val'
13 | 
14 | def show_test_files():
15 |     # build test files list
16 |     test_paths = [os.path.join(os.path.join(test_dir, str(f))) for f in subfolders]
17 |     test_img_paths = []
18 |     for img_path in test_paths:
19 |         current_image_path = os.path.join(img_path, 'photo')
20 |         for filename in glob.iglob(current_image_path + '/*', recursive=True):
21 |             test_img_paths.append(filename)
22 |    
23 |     test_img_paths.sort()
24 |     # build labels list
25 |     test_label_paths = []
26 |     for img_path in test_paths:
27 |         current_image_path = os.path.join(img_path, 'depth')
28 |         for filename in glob.iglob(current_image_path + '/*', recursive=True):
29 |             test_label_paths.append(filename)
30 | 
31 |     test_label_paths.sort()
32 | 
33 |     return test_img_paths, test_label_paths
34 | 
35 | if __name__ == '__main__':
36 |     
37 | 


--------------------------------------------------------------------------------
/U-Net/image_utils.py:
--------------------------------------------------------------------------------
 1 | import cv2
 2 | import numpy as np
 3 | import math
 4 | import matplotlib.pyplot as plt
 5 | from torchvision import transforms
 6 | 
 7 | HEIGHT = 240
 8 | WIDTH = 320
 9 | 
10 | def show_img_and_pred(img, depth):
11 |     """Plot an image and a corresponding prediction next to each other."""
12 |     plt.figure()
13 |     plt.subplot(1, 2, 1)
14 |     plt.imshow(img)
15 |     plt.subplot(1, 2, 2)
16 |     pred = np.transpose(depth, (1, 2, 0))
17 |     plt.imshow(pred[:, :, 0])
18 |     plt.show()
19 | 
20 | def scale_image(img, scale=None):
21 |     """Resize/scale an image. If a scale is not provided, scale it closer to HEIGHT x WIDTH."""
22 |     # if scale is None, scale to the longer size
23 |     if scale is None:
24 |         scale = max(WIDTH / img.shape[1], HEIGHT / img.shape[0])
25 | 
26 |     new_size = (math.ceil(img.shape[1] * scale), math.ceil(img.shape[0] * scale))
27 |     image = cv2.resize(img, new_size, interpolation=cv2.INTER_NEAREST)
28 |     return image
29 | 
30 | def img_transform(img):
31 |     """Normalize an image."""
32 |     data_transform = transforms.Compose([
33 |         transforms.ToTensor(),
34 |         transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
35 |     ])
36 |     img = data_transform(img)
37 |     return img
38 | 
39 | def depth_to_grayscale(depth, max_dist=10.0):
40 |     """Transform a prediction into a grayscale 8-bit image."""
41 |     depth = np.transpose(depth, (1, 2, 0))
42 |     depth[depth > max_dist] = max_dist
43 |     depth = depth / max_dist
44 | 
45 |     depth = np.array(depth * 255.0, dtype=np.uint8)
46 |     depth = cv2.resize(depth, (WIDTH, HEIGHT))
47 | 
48 |     bgr_depth_img = cv2.cvtColor(depth, cv2.COLOR_GRAY2BGR)
49 |     bgr_depth_img = np.clip(bgr_depth_img, 0, 255)
50 |     return bgr_depth_img


--------------------------------------------------------------------------------
/preprocessing.py:
--------------------------------------------------------------------------------
  1 | import glob
  2 | import multiprocessing as mp
  3 | import numpy as np
  4 | import os
  5 | from PIL import Image
  6 | import statistics
  7 | import sys
  8 | 
  9 | 
 10 | TRAINING_SET_PATH = 'data/train/'
 11 | DATASET_SUBFOLDERS = range(0, 15)
 12 | IMAGE_NEW_WIDTH = 256
 13 | IMAGE_NEW_HEIGHT = 192
 14 | 
 15 | def create_intensity_images_from_rgb_images_folder(path, subfolder):
 16 |     print("create intensity is called")
 17 |     for filename in glob.iglob(path + str(subfolder) + '/*/photo/*', recursive=True):
 18 |         # don't duplicate intensity images
 19 |         if not filename.endswith("_intensity.jpg"):
 20 |             img = Image.open(filename).convert('L')
 21 |             image_name_without_extension = filename.split('.')[0]
 22 |             intensity_image_name = image_name_without_extension + '_intensity.jpg'
 23 |             print(intensity_image_name)
 24 |             img.save(intensity_image_name)
 25 | 
 26 | def resize_intensity_images(path, new_width, new_height, subfolder):
 27 |     for filename in glob.iglob(path + str(subfolder) + '/*/photo/*_intensity.jpg', recursive=True):
 28 |         print(filename)
 29 |         img = Image.open(filename)
 30 |         resized_image = img.resize((new_width, new_height))
 31 |         image_name_without_extension = filename.split('.')[0]
 32 |         resized_intensity_image_name = image_name_without_extension + '_resized.jpg'
 33 |         resized_image.save(resized_intensity_image_name, "JPEG", optimize=True)
 34 |         # remove the original intensity image
 35 |         os.remove(filename)
 36 | 
 37 | def scale_depth(image, average):
 38 |     for i in [0, len(image)-1]:
 39 |         image[i] = average / (average + image[i])
 40 | 
 41 | def normalize_depth_values(path, subfolder):
 42 |     for filename in glob.iglob(path + str(subfolder) + '/*/depth/*[0-9].png', recursive=True):
 43 |         print(filename)
 44 |         img = Image.open(filename)
 45 |         size = img.size
 46 |         image_values = img.histogram()
 47 |         average_depth = statistics.mean(image_values)
 48 |         if average_depth is 0:
 49 |             average_depth = 0.000001
 50 |         scale_depth(image_values, average_depth)
 51 |         image_array = np.array(image_values, dtype=np.float32)
 52 |         image = Image.new("L", size)
 53 |         image.putdata(image_array)
 54 |         normalized_image_name = filename.split('.')[0] + '_normalized.png'
 55 |         image.save(normalized_image_name, "PNG", optimize=True)
 56 | 
 57 | def remove_normalized_depth_images(path):
 58 |      for filename in glob.iglob(path + '**/*/depth/*_normalized.png', recursive=True):
 59 |         os.remove(filename)
 60 | 
 61 | def remove_intensity_images(path):
 62 |     for filename in glob.iglob(path + '**/*/photo/*_intensity.jpg', recursive=True):
 63 |         os.remove(filename)
 64 |     for filename in glob.iglob(path + '**/*/photo/*_resized.jpg', recursive=True):
 65 |         os.remove(filename)
 66 | 
 67 | if __name__ == '__main__':
 68 |     pool = mp.Pool(mp.cpu_count())
 69 |     # if no args are passed, don't alter images
 70 |     if len(sys.argv) is 1:
 71 |         print("You should specify resizing (r) or converting to intensity (i).")
 72 |     elif len(sys.argv) is 2:
 73 |         if str(sys.argv[1]) is "i":
 74 |             [pool.apply(create_intensity_images_from_rgb_images_folder, args=(TRAINING_SET_PATH, subfolder)) for subfolder in DATASET_SUBFOLDERS]
 75 |         elif str(sys.argv[1]) is "r":
 76 |             [pool.apply(resize_intensity_images, args=(TRAINING_SET_PATH, IMAGE_NEW_WIDTH, IMAGE_NEW_HEIGHT, subfolder)) for subfolder in DATASET_SUBFOLDERS]
 77 |         elif str(sys.argv[1]) is "n":
 78 |             [pool.apply(normalize_depth_values, args=(TRAINING_SET_PATH, subfolder)) for subfolder in DATASET_SUBFOLDERS]
 79 |         else:
 80 |             print("Invalid argument: use 'i' for convert images to intensity images, 'r' to resize the intensity images, 'n' for depth normalization or any combination of them")
 81 |     elif len(sys.argv) is 3:
 82 |         if (str(sys.argv[1]) is "i" and str(sys.argv[2]) is "r") or (str(sys.argv[2]) is "i" and str(sys.argv[1]) is "r"):
 83 |             [pool.apply(create_intensity_images_from_rgb_images_folder, args=(TRAINING_SET_PATH, subfolder)) for subfolder in DATASET_SUBFOLDERS]
 84 |             [pool.apply(resize_intensity_images, args=(TRAINING_SET_PATH, IMAGE_NEW_WIDTH, IMAGE_NEW_HEIGHT, subfolder)) for subfolder in DATASET_SUBFOLDERS]
 85 |         elif (str(sys.argv[1]) is "i" and str(sys.argv[2]) is "n") or (str(sys.argv[1]) is "n" and str(sys.argv[2]) is "i"):
 86 |             [pool.apply(create_intensity_images_from_rgb_images_folder, args=(TRAINING_SET_PATH, subfolder)) for subfolder in DATASET_SUBFOLDERS]
 87 |             [pool.apply(normalize_depth_values, args=(TRAINING_SET_PATH, subfolder)) for subfolder in DATASET_SUBFOLDERS]
 88 |         elif (str(sys.argv[1]) is "r" and str(sys.argv[2]) is "n") or (str(sys.argv[1]) is "n" and str(sys.argv[2]) is "r"):
 89 |             [pool.apply(resize_intensity_images, args=(TRAINING_SET_PATH, IMAGE_NEW_WIDTH, IMAGE_NEW_HEIGHT, subfolder)) for subfolder in DATASET_SUBFOLDERS]
 90 |             [pool.apply(normalize_depth_values, args=(TRAINING_SET_PATH, subfolder)) for subfolder in DATASET_SUBFOLDERS]
 91 |         else:
 92 |             print("Invalid arguments: use 'i' for convert images to intensity images, 'r' to resize the intensity images, 'n' for depth normalization or any combination of them")
 93 |     elif len(sys.argv) is 4:
 94 |         if ((str(sys.argv[1]) is "i" and str(sys.argv[2]) is "r" and str(sys.argv[3]) is "n") or
 95 |             (str(sys.argv[1]) is "i" and str(sys.argv[2]) is "n" and str(sys.argv[3]) is "r") or 
 96 |             (str(sys.argv[1]) is "r" and str(sys.argv[2]) is "i" and str(sys.argv[3]) is "n") or 
 97 |             (str(sys.argv[1]) is "r" and str(sys.argv[2]) is "n" and str(sys.argv[3]) is "i") or 
 98 |             (str(sys.argv[1]) is "n" and str(sys.argv[2]) is "i" and str(sys.argv[3]) is "r") or 
 99 |             (str(sys.argv[1]) is "n" and str(sys.argv[2]) is "r" and str(sys.argv[3]) is "i")):
100 | 
101 |             [pool.apply(create_intensity_images_from_rgb_images_folder, args=(TRAINING_SET_PATH, subfolder)) for subfolder in DATASET_SUBFOLDERS]
102 |             [pool.apply(resize_intensity_images, args=(TRAINING_SET_PATH, IMAGE_NEW_WIDTH, IMAGE_NEW_HEIGHT, subfolder)) for subfolder in DATASET_SUBFOLDERS]
103 |             [pool.apply(normalize_depth_values, args=(TRAINING_SET_PATH, subfolder)) for subfolder in DATASET_SUBFOLDERS]
104 |     else:
105 |         print("Too many arguments: use 'i' for convert images to intensity images, 'r' to resize the intensity images, 'n' for depth normalization or any combination of them")
106 |     
107 |     pool.close()
108 |     
109 | 


--------------------------------------------------------------------------------
/read_protobuf.py:
--------------------------------------------------------------------------------
 1 | import scenenet_pb2 as sn
 2 | import os
 3 | 
 4 | DATA_ROOT_PATH = 'data/train_0/train/0'
 5 | PROTOBUF_PATH = 'data/train_protobufs/scenenet_rgbd_train_0.pb'
 6 | 
 7 | # These functions produce a file path (on Linux systems) to the image given
 8 | # a view and render path from a trajectory.  As long the DATA_ROOT_PATH to the
 9 | # root of the dataset is given.  I.e. to either val or train
10 | def photo_path_from_view(render_path,view):
11 |     photo_path = os.path.join(render_path,'photo')
12 |     image_path = os.path.join(photo_path,'{0}.jpg'.format(view.frame_num))
13 |     return os.path.join(DATA_ROOT_PATH,image_path)
14 | 
15 | def instance_path_from_view(render_path,view):
16 |     photo_path = os.path.join(render_path,'instance')
17 |     image_path = os.path.join(photo_path,'{0}.png'.format(view.frame_num))
18 |     return os.path.join(DATA_ROOT_PATH,image_path)
19 | 
20 | def depth_path_from_view(render_path,view):
21 |     photo_path = os.path.join(render_path,'depth')
22 |     image_path = os.path.join(photo_path,'{0}.png'.format(view.frame_num))
23 |     return os.path.join(DATA_ROOT_PATH,image_path)
24 | 
25 | 
26 | if __name__ == '__main__':
27 |     trajectories = sn.Trajectories()
28 |     try:
29 |         with open(PROTOBUF_PATH,'rb') as f:
30 |             trajectories.ParseFromString(f.read())
31 |     except IOError:
32 |         print('Scenenet protobuf data not found at location:{0}'.format(DATA_ROOT_PATH))
33 |         print('Please ensure you have copied the pb file to the data directory')
34 | 
35 |     print('Number of trajectories:{0}'.format(len(trajectories.trajectories)))
36 |     for traj in trajectories.trajectories:
37 |         layout_type = sn.SceneLayout.LayoutType.Name(traj.layout.layout_type)
38 |         layout_path = traj.layout.model
39 |         print('='*20)
40 |         print('Render path:{0}'.format(traj.render_path))
41 |         print('Layout type:{0} path:{1}'.format(layout_type,layout_path))
42 |         print('='*20)
43 |         print('')
44 |         print('Number of instances: {0}'.format(len(traj.instances)))
45 |         '''
46 |         The instances attribute of trajectories contains all of the information
47 |         about the different instances.  The instance.instance_id attribute provides
48 |         correspondences with the rendered instance.png files.  I.e. for a given
49 |         trajectory, if a pixel is of value 1, the information about that instance,
50 |         such as its type, semantic class, and wordnet id, is stored here.
51 |         For more information about the exact information available refer to the
52 |         scenenet.proto file.
53 |         '''
54 |         for instance in traj.instances:
55 |             instance_type = sn.Instance.InstanceType.Name(instance.instance_type)
56 |             print('='*20)
57 |             print('Instance id:{0}'.format(instance.instance_id))
58 |             print('Instance type:{0}'.format(instance_type))
59 |             if instance.instance_type != sn.Instance.BACKGROUND:
60 |                 print('Wordnet id:{0}'.format(instance.semantic_wordnet_id))
61 |                 print('Plain english name:{0}'.format(instance.semantic_english))
62 |             if instance.instance_type == sn.Instance.LIGHT_OBJECT:
63 |                 light_type = sn.LightInfo.LightType.Name(instance.light_info.light_type)
64 |                 print('Light type:{0}'.format(light_type))
65 |             if instance.instance_type == sn.Instance.RANDOM_OBJECT:
66 |                 print('Object info:{0}'.format(instance.object_info))
67 |             print('-'*20)
68 |             print('')
69 |         print('Render path:{0}'.format(traj.render_path))
70 |         '''
71 |         The views attribute of trajectories contains all of the information
72 |         about the rendered frames of a scene.  This includes camera poses,
73 |         frame numbers and timestamps.
74 |         '''
75 |         for view in traj.views:
76 |             print(photo_path_from_view(traj.render_path,view))
77 |             print(depth_path_from_view(traj.render_path,view))
78 |             print(instance_path_from_view(traj.render_path,view))
79 |             print(view)
80 |         break


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | numpy==1.22.0
2 | Pillow==9.0.1
3 | protobuf==3.15.0
4 | six==1.14.0
5 | torch==1.4.0+cpu
6 | torchvision==0.5.0+cpu
7 | 


--------------------------------------------------------------------------------
/scenenet.proto:
--------------------------------------------------------------------------------
  1 | syntax = "proto2";
  2 | 
  3 | package scenenet;
  4 | 
  5 | message SceneLayout {
  6 |     enum LayoutType {
  7 |         BATHROOM = 1;
  8 |         BEDROOM = 2;
  9 |         KITCHEN = 3;
 10 |         LIVING_ROOM = 4;
 11 |         OFFICE = 5;
 12 |     }
 13 |     optional LayoutType layout_type = 1;
 14 |     // This is the name of the SceneNet model used for the layout
 15 |     optional string model = 2;
 16 | }
 17 | 
 18 | message LightInfo {
 19 |     enum LightType {
 20 |         SPHERE = 1;
 21 |         PARALLELOGRAM = 2;
 22 |     }
 23 |     optional LightType light_type = 1;
 24 |     // Light intensity
 25 |     optional Power light_output = 2;
 26 |     // This is the center for sphere type lights. And corner for others
 27 |     optional Position position = 3;
 28 |     // This is only for SPHERE lights
 29 |     optional float radius = 4;
 30 |     // This is only for PARALLELOGRAM lights
 31 |     optional Position v1 = 5;
 32 |     optional Position v2 = 6;
 33 | }
 34 | 
 35 | message RandomObjectInfo {
 36 |     optional string shapenet_hash = 1;
 37 |     optional float height_meters = 2;
 38 |     message Transformation {
 39 |         // The 3x4 matrix is as follows:
 40 |         // rotation_mat11 rotation_mat12 rotation_mat13 translation_x
 41 |         // rotation_mat21 rotation_mat22 rotation_mat23 translation_y
 42 |         // rotation_mat31 rotation_mat32 rotation_mat33 translation_y
 43 |         optional float translation_x = 1;
 44 |         optional float translation_y = 2;
 45 |         optional float translation_z = 3;
 46 |         optional float rotation_mat11 = 4;
 47 |         optional float rotation_mat12 = 5;
 48 |         optional float rotation_mat13 = 6;
 49 |         optional float rotation_mat21 = 7;
 50 |         optional float rotation_mat22 = 8;
 51 |         optional float rotation_mat23 = 9;
 52 |         optional float rotation_mat31 = 10;
 53 |         optional float rotation_mat32 = 11;
 54 |         optional float rotation_mat33 = 12;
 55 |     }
 56 |     // The transformation gives the transformation applies to an object, about
 57 |     // the center of the base plane of its axis-aligned bounding box.
 58 |     optional Transformation object_pose = 3;
 59 | }
 60 | 
 61 | message Instance {
 62 |     optional int32 instance_id = 1;
 63 |     optional string semantic_wordnet_id = 2;
 64 |     optional string semantic_english = 3;
 65 |     enum InstanceType {
 66 |         // This is the instance type when no object is present, e.g. because of
 67 |         // looking out a window into nothingness
 68 |         BACKGROUND = 1;
 69 |         // This is an object that is hard coded into the layout and does not
 70 |         // move.  This type does not have a transformation or shapenet hash
 71 |         LAYOUT_OBJECT = 2;
 72 |         // This is a randomly positioned light source
 73 |         LIGHT_OBJECT = 3;
 74 |         // This means the object is a randomly positioned shapenet object. The
 75 |         // object has a transformation and scale parameter in the object_info
 76 |         // variable.
 77 |         RANDOM_OBJECT = 4;
 78 |     }
 79 |     optional InstanceType instance_type = 4;
 80 |     // This information is only filled in for the respective type
 81 |     optional LightInfo light_info = 5;
 82 |     optional RandomObjectInfo object_info = 6;
 83 | }
 84 | 
 85 | message Power {
 86 |     optional float r = 1;
 87 |     optional float g = 2;
 88 |     optional float b = 3;
 89 | }
 90 | 
 91 | message Position {
 92 |     optional float x = 1;
 93 |     optional float y = 2;
 94 |     optional float z = 3;
 95 | }
 96 | 
 97 | message Pose {
 98 |     // The position of these two points define the camera view. The y vector is
 99 |     // defined as [0,1,0].  For an example of how to calculate the camera view
100 |     // coordinate system, see the python codebase.
101 |     optional Position camera = 1;
102 |     optional Position lookat = 2;
103 |     optional float timestamp = 3;
104 | }
105 | 
106 | message View {
107 |     // These increment by the number of skip frames, i.e. 0,25,50...7475.
108 |     optional int32 frame_num = 1;
109 |     // The photo is rendered by integrating uniformly sampled 
110 |     // exposures between the following two poses
111 |     optional Pose shutter_open = 2;
112 |     optional Pose shutter_close = 3;
113 | }
114 | 
115 | message Trajectory {
116 |     optional SceneLayout layout = 1;
117 |     // The first instances[0] is always the 'background' and
118 |     // undefined class when for example looking out windows
119 |     repeated Instance instances = 2;
120 |     // These are ordered sequentially for a trajectory
121 |     repeated View views = 3;
122 |     // This stores the path from the root data directory to the trajectory data
123 |     // folder.  If the trajectories are stored as:
124 |     // /path/i/extracted/{val/train}/0/123/photo/0.jpg 
125 |     // then this path will be '0/123' designating the trajectories folder
126 |     optional string render_path = 4;
127 | }
128 | 
129 | message Trajectories {
130 |     // This is the root list which stores all of the available trajectories
131 |     repeated Trajectory trajectories = 1;
132 | }


--------------------------------------------------------------------------------