├── .gitignore
├── CodeSLAM.ipynb
├── Makefile
├── README.md
├── U-Net
├── Unet.py
├── compute_errors.py
└── image_utils.py
├── preprocessing.py
├── read_protobuf.py
├── requirements.txt
└── scenenet.proto
/.gitignore:
--------------------------------------------------------------------------------
1 | # datasets
2 | data/*
3 |
4 | # vscode changes
5 | .vscode
6 |
7 | # Byte-compiled / optimized / DLL files
8 | __pycache__/
9 | *.py[cod]
10 | *$py.class
11 | scenenet_pb2.py
12 |
13 | # C extensions
14 | *.so
15 |
16 | # Distribution / packaging
17 | .Python
18 | env/
19 | build/
20 | develop-eggs/
21 | dist/
22 | downloads/
23 | eggs/
24 | .eggs/
25 | lib/
26 | lib64/
27 | parts/
28 | sdist/
29 | var/
30 | *.egg-info/
31 | .installed.cfg
32 | *.egg
33 |
34 | # PyInstaller
35 | # Usually these files are written by a python script from a template
36 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
37 | *.manifest
38 | *.spec
39 |
40 | # Installer logs
41 | pip-log.txt
42 | pip-delete-this-directory.txt
43 |
44 | # Unit test / coverage reports
45 | htmlcov/
46 | .tox/
47 | .coverage
48 | .coverage.*
49 | .cache
50 | nosetests.xml
51 | coverage.xml
52 | *,cover
53 | .hypothesis/
54 |
55 | # Translations
56 | *.mo
57 | *.pot
58 |
59 | # Django stuff:
60 | *.log
61 | local_settings.py
62 |
63 | # Flask stuff:
64 | instance/
65 | .webassets-cache
66 |
67 | # Scrapy stuff:
68 | .scrapy
69 |
70 | # Sphinx documentation
71 | docs/_build/
72 |
73 | # PyBuilder
74 | target/
75 |
76 | # IPython Notebook
77 | .ipynb_checkpoints
78 |
79 | # pyenv
80 | .python-version
81 |
82 | # celery beat schedule file
83 | celerybeat-schedule
84 |
85 | # dotenv
86 | .env
87 |
88 | # virtualenv
89 | venv/
90 | ENV/
91 |
92 | # Spyder project settings
93 | .spyderproject
94 |
95 | # Rope project settings
96 | .ropeproject
--------------------------------------------------------------------------------
/CodeSLAM.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "nbformat": 4,
3 | "nbformat_minor": 0,
4 | "metadata": {
5 | "colab": {
6 | "name": "CodeSLAM.ipynb",
7 | "provenance": [],
8 | "include_colab_link": true
9 | },
10 | "kernelspec": {
11 | "name": "python3",
12 | "display_name": "Python 3"
13 | }
14 | },
15 | "cells": [
16 | {
17 | "cell_type": "markdown",
18 | "metadata": {
19 | "id": "view-in-github",
20 | "colab_type": "text"
21 | },
22 | "source": [
23 | "
"
24 | ]
25 | },
26 | {
27 | "cell_type": "markdown",
28 | "metadata": {
29 | "id": "8_azmJjSK6Fu",
30 | "colab_type": "text"
31 | },
32 | "source": [
33 | "#CodeSLAM\n",
34 | "\n",
35 | "## Abstract"
36 | ]
37 | },
38 | {
39 | "cell_type": "code",
40 | "metadata": {
41 | "id": "gTygVZ5SIC4O",
42 | "colab_type": "code",
43 | "colab": {}
44 | },
45 | "source": [
46 | "from torch import vis\n",
47 | "import matplotlib as plt\n",
48 | "import torch"
49 | ],
50 | "execution_count": 0,
51 | "outputs": []
52 | }
53 | ]
54 | }
--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
1 | all:
2 | protoc --python_out=./ scenenet.proto
3 |
4 | clean:
5 | $(RM) scenenet_pb2.py
6 | $(RM) -r __pycache__
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # CodeSLAM
2 |
3 | PyTorch implementation of [CodeSLAM - Learning a Compact, Optimisable Representation for Dense Visual SLAM](https://arxiv.org/pdf/1804.00874.pdf).
4 |
5 | ## Summary
6 |
7 | ### Problems it tries to tackle/solve
8 | - Representation of geometry in real 3D perception systems.
9 | - Dense representations, possibly augmented with semantic labels are high dimensional and unsuitable for probabilistic inference.
10 | - Sparse representations, which avoid these problems but capture only partial scene information.
11 |
12 | ### The new approach/solution
13 | - New compact but dense representation of scene geometry, conditioned on the intensity data from a single image and generated from a code consisting of a small number of parameters.
14 | - Each keyframe can produce a depth map, but the code can be optimised jointly with pose variables and with the codes of overlapping keyframes, for global consistency.
15 |
16 | ### Introduction
17 | - As the uncertainty propagation quickly becomes intractable for large degrees of freedom, the approaches on SLAM are split into 2 categories:
18 | - sparse SLAM, representing geometry by a sparse set of features
19 | - dense SLAM, that attempts to retrieve a more complete description of the environment.
20 | - The geometry of natural scenes exhibits a high degree of order, so we may not need a large number of params to represent it.
21 | - Besides that, a scene could be decomposed into a set of semantic objects (e.g a chair) together with some internal params (e.g. size of chair, no of legs) and a pose. Other more general scene elements, which exhibit simple regularity, can be recognised and parametrised within SLAM systems.
22 | - A straightforward AE might oversimplify the reconstruction of natural scenes, the **novelty** is to condition the training on intensity images.
23 | - A **scene map** consists of a set of selected and estimated historical camera poses together with the corresponding captured images and supplementary local information such as depth estimates. The intensity images are usually required for additional tasks.
24 | - **Depth map estimate** becomes a function of corresponding intensity image and an unknown compact representation (referred to as **code**).
25 | - We can think of the image providing local details and the code supplying more global shape params and can be seen as a step towards enabling optimisation in general semantic space.
26 | - The **2 key contributions** of this paper are:
27 | - The derivation of a compact and optimisable representation of dense geometry by conditioning a depth autoencoder on intensity images.
28 | - The implementation of the first real-time targeted monocular system that achieves such a tight joint optimisation of motion and dense geometry.
29 |
30 | ## Usage
31 | - generate the python module for the protobuf: `protoc --python_out=./ scenenet.proto`
32 |
33 | ## Results
34 |
35 | ## Requirements
36 | - Python 3.4+
37 | - PyTorch 1.0+
38 | - Torchvision 0.4.0+
--------------------------------------------------------------------------------
/U-Net/Unet.py:
--------------------------------------------------------------------------------
1 | import torch.nn as nn
2 | import torch
3 | import os
4 |
5 | def conv3x3(in_planes, out_planes, stride=1):
6 | """3x3 conv layer with padding."""
7 | return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, padding=1, bias=False)
8 |
9 | class Bottleneck(nn.Module):
10 | expansion = 4
11 |
12 | def __init__(self, inplanes, planes, stride=1, downsample=None):
13 | super(Bottleneck, self).__init__()
14 | self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
15 | self.bn1 = nn.BatchNorm2d(planes)
16 | self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
17 | self.bn2 = nn.BatchNorm2d(planes)
18 | self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False)
19 | self.bn3 = nn.BatchNorm2d(planes * 4)
20 | self.relu = nn.ReLU(inplace=True)
21 | self.downsample = downsample
22 | self.stride = stride
23 |
24 | def forward(self, x):
25 | residual = x
26 |
27 | out = self.conv1(x)
28 | out = self.bn1(out)
29 | out = self.relu(out)
30 |
31 | out = self.conv2(out)
32 | out = self.bn2(out)
33 | out = self.relu(out)
34 |
35 | out = self.conv3(out)
36 | out = self.bn3(out)
37 |
38 | if self.downsample is not None:
39 | residual = self.downsample(x)
40 |
41 | out += residual
42 | out = self.relu(out)
43 |
44 | return out
45 |
46 | def get_incoming_shape(incoming):
47 | size = incoming.size()
48 | # returns the incoming data shape as a list
49 | return [size[0], size[1], size[2], size[3]]
50 |
51 | def interleave(tensors, axis):
52 | # change the first element (batch_size to -1)
53 | old_shape = get_incoming_shape(tensors[0])[1:]
54 | new_shape = [-1] + old_shape
55 |
56 | # double 1 dimension
57 | new_shape[axis] *= len(tensors)
58 |
59 | # pack the tensors on top of each other
60 | stacked = torch.stack(tensors, axis+1)
61 |
62 | # reshape and return
63 | reshaped = stacked.view(new_shape)
64 | return reshaped
65 |
66 | class UnpoolingAsConvolution(nn.Module):
67 | def __init__(self, inplanes, planes):
68 | super(UnpoolingAsConvolution, self).__init__()
69 |
70 | # interleaving convolutions
71 | self.conv_A = nn.Conv2d(in_channels=inplanes, out_channels=planes, kernel_size=(3, 3), stride=1, padding=1)
72 | self.conv_B = nn.Conv2d(in_channels=inplanes, out_channels=planes, kernel_size=(2, 3), stride=1, padding=0)
73 | self.conv_C = nn.Conv2d(in_channels=inplanes, out_channels=planes, kernel_size=(3, 2), stride=1, padding=0)
74 | self.conv_D = nn.Conv2d(in_channels=inplanes, out_channels=planes, kernel_size=(2, 2), stride=1, padding=0)
75 |
76 | def forward(self, x):
77 | output_a = self.conv_A(x)
78 |
79 | padded_b = nn.functional.pad(x, (1, 1, 0, 1))
80 | output_b = self.conv_B(padded_b)
81 |
82 | padded_c = nn.functional.pad(x, (0, 1, 1, 1))
83 | output_c = self.conv_C(padded_c)
84 |
85 | padded_d = nn.functional.pad(x, (0, 1, 0, 1))
86 | output_d = self.conv_D(padded_d)
87 |
88 | left = interleave([output_a, output_b], axis=2)
89 | right = interleave([output_c, output_d], axis=2)
90 | y = interleave([left, right], axis=3)
91 | return y
92 |
93 | class UpProjection(nn.Module):
94 | def __init__(self, inplanes, planes):
95 | super(UpProjection, self).__init__()
96 |
97 | self.unpool_main = UnpoolingAsConvolution(inplanes, planes)
98 | self.unpool_res = UnpoolingAsConvolution(inplanes, planes)
99 |
100 | self.main_branch = nn.Sequential(
101 | self.unpool_main,
102 | nn.BatchNorm2d(planes),
103 | nn.ReLU(inplace=False),
104 | nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1),
105 | nn.BatchNorm2d(planes)
106 | )
107 |
108 | self.residual_branch = nn.Sequential(
109 | self.unpool_res,
110 | nn.BatchNorm2d(planes),
111 | )
112 |
113 | self.relu = nn.ReLU(inplace=False)
114 |
115 | def forward(self, input_data):
116 | x = self.main_branch(input_data)
117 | res = self.residual_branch(input_data)
118 | x += res
119 | x = self.relu(x)
120 | return x
121 |
122 | class ConConv(nn.Module):
123 | def __init__(self, inplanes_x1, inplanes_x2, planes):
124 | super(ConConv, self).__init__()
125 | self.conv = nn.Conv2d(inplanes_x1 + inplanes_x2, planes, kernel_size=1, bias=True)
126 |
127 | def forward(self, x1, x2):
128 | x1 = torch.cat([x2, x1], dim=1)
129 | x1 = self.conv(x1)
130 | return x1
131 |
132 | class ResnetUnetHybrid(nn.Module):
133 | def __init__(self, block, layers):
134 | self.inplanes = 64
135 |
136 | # resnet layers
137 | super(ResnetUnetHybrid, self).__init__()
138 | self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False)
139 | self.bn1 = nn.BatchNorm2d(64)
140 | self.relu = nn.ReLU(inplace=True)
141 | self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
142 |
143 | self.layer1 = self._make_layer(block, 64, layers[0])
144 | self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
145 | self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
146 | self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
147 |
148 | # additional up projection layers parts
149 | self.conv2 = nn.Conv2d(2048, 1024, 1, bias=True)
150 | self.bn2 = nn.BatchNorm2d(1024)
151 |
152 | self.up_proj1 = UpProjection(1024, 512)
153 | self.up_proj2 = UpProjection(512, 256)
154 | self.up_proj3 = UpProjection(256, 128)
155 | self.up_proj4 = UpProjection(128, 64)
156 |
157 | self.drop = nn.Dropout(0.5, False)
158 | self.conv3 = nn.Conv2d(64, 1, kernel_size=3, stride=1, padding=1, bias=True)
159 |
160 | # padding + concat for unet stuff
161 | self.con_conv1 = ConConv(1024, 512, 512)
162 | self.con_conv2 = ConConv(512, 256, 256)
163 | self.con_conv3 = ConConv(256, 128, 128)
164 | self.con_conv4 = ConConv(64, 64, 64)
165 |
166 | for m in self.modules():
167 | if isinstance(m, nn.Conv2d):
168 | nn.init.normal_(m.weight, 0, 0.01)
169 |
170 | elif isinstance(m, nn.BatchNorm2d):
171 | nn.init.constant_(m.weight, 1)
172 | nn.init.constant_(m.bias, 0)
173 |
174 | def _make_layer(self, block, planes, blocks, stride=1):
175 | downsample = None
176 | if stride != 1 or self.inplanes != planes * block.expansion:
177 | downsample = nn.Sequential(
178 | nn.Conv2d(self.inplanes, planes * block.expansion,
179 | kernel_size=1, stride=stride, bias=False),
180 | nn.BatchNorm2d(planes * block.expansion),
181 | )
182 |
183 | layers = list()
184 | layers.append(block(self.inplanes, planes, stride, downsample))
185 | self.inplanes = planes * block.expansion
186 | for i in range(1, blocks):
187 | layers.append(block(self.inplanes, planes))
188 |
189 | return nn.Sequential(*layers)
190 |
191 | def forward(self, x):
192 | x = self.conv1(x)
193 | x = self.bn1(x)
194 | x_to_conv4 = self.relu(x)
195 |
196 | x = self.maxpool(x_to_conv4)
197 | x_to_conv3 = self.layer1(x)
198 | x_to_conv2 = self.layer2(x_to_conv3)
199 | x_to_conv1 = self.layer3(x_to_conv2)
200 | x = self.layer4(x_to_conv1)
201 |
202 | # additional layers
203 | x = self.conv2(x)
204 | x = self.bn2(x)
205 |
206 | # up project part
207 | x = self.up_proj1(x)
208 | x = self.con_conv1(x, x_to_conv1)
209 |
210 | x = self.up_proj2(x)
211 | x = self.con_conv2(x, x_to_conv2)
212 |
213 | x = self.up_proj3(x)
214 | x = self.con_conv3(x, x_to_conv3)
215 |
216 | x = self.up_proj4(x)
217 | x = self.con_conv4(x, x_to_conv4)
218 |
219 | x = self.drop(x)
220 | x = self.conv3(x)
221 | x = self.relu(x)
222 |
223 | return x
224 |
225 | @classmethod
226 | def load_pretrained(cls, device, load_path='hyb_net_weights.model'):
227 | model = cls(Bottleneck, [3, 4, 6, 3])
228 |
229 | # download the weights if they are not present
230 | if not os.path.exists(load_path):
231 | print('Downloading model weights...')
232 | os.system('wget https://www.dropbox.com/s/amad4ko9opi4kts/hyb_net_weights.model')
233 |
234 | model = model.to(device)
235 | model.load_state_dict(torch.load(load_path, map_location=device))
236 |
237 | return model
--------------------------------------------------------------------------------
/U-Net/compute_errors.py:
--------------------------------------------------------------------------------
1 | import glob
2 | import os
3 | import sys
4 | import cv2
5 | import numpy as np
6 | import torch
7 | import torch.nn.functional as F
8 | from Unet import ResnetUnetHybrid
9 | import image_utils
10 |
11 | subfolders = range(0, 1000)
12 | test_dir = '../data/val'
13 |
14 | def show_test_files():
15 | # build test files list
16 | test_paths = [os.path.join(os.path.join(test_dir, str(f))) for f in subfolders]
17 | test_img_paths = []
18 | for img_path in test_paths:
19 | current_image_path = os.path.join(img_path, 'photo')
20 | for filename in glob.iglob(current_image_path + '/*', recursive=True):
21 | test_img_paths.append(filename)
22 |
23 | test_img_paths.sort()
24 | # build labels list
25 | test_label_paths = []
26 | for img_path in test_paths:
27 | current_image_path = os.path.join(img_path, 'depth')
28 | for filename in glob.iglob(current_image_path + '/*', recursive=True):
29 | test_label_paths.append(filename)
30 |
31 | test_label_paths.sort()
32 |
33 | return test_img_paths, test_label_paths
34 |
35 | if __name__ == '__main__':
36 |
37 |
--------------------------------------------------------------------------------
/U-Net/image_utils.py:
--------------------------------------------------------------------------------
1 | import cv2
2 | import numpy as np
3 | import math
4 | import matplotlib.pyplot as plt
5 | from torchvision import transforms
6 |
7 | HEIGHT = 240
8 | WIDTH = 320
9 |
10 | def show_img_and_pred(img, depth):
11 | """Plot an image and a corresponding prediction next to each other."""
12 | plt.figure()
13 | plt.subplot(1, 2, 1)
14 | plt.imshow(img)
15 | plt.subplot(1, 2, 2)
16 | pred = np.transpose(depth, (1, 2, 0))
17 | plt.imshow(pred[:, :, 0])
18 | plt.show()
19 |
20 | def scale_image(img, scale=None):
21 | """Resize/scale an image. If a scale is not provided, scale it closer to HEIGHT x WIDTH."""
22 | # if scale is None, scale to the longer size
23 | if scale is None:
24 | scale = max(WIDTH / img.shape[1], HEIGHT / img.shape[0])
25 |
26 | new_size = (math.ceil(img.shape[1] * scale), math.ceil(img.shape[0] * scale))
27 | image = cv2.resize(img, new_size, interpolation=cv2.INTER_NEAREST)
28 | return image
29 |
30 | def img_transform(img):
31 | """Normalize an image."""
32 | data_transform = transforms.Compose([
33 | transforms.ToTensor(),
34 | transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
35 | ])
36 | img = data_transform(img)
37 | return img
38 |
39 | def depth_to_grayscale(depth, max_dist=10.0):
40 | """Transform a prediction into a grayscale 8-bit image."""
41 | depth = np.transpose(depth, (1, 2, 0))
42 | depth[depth > max_dist] = max_dist
43 | depth = depth / max_dist
44 |
45 | depth = np.array(depth * 255.0, dtype=np.uint8)
46 | depth = cv2.resize(depth, (WIDTH, HEIGHT))
47 |
48 | bgr_depth_img = cv2.cvtColor(depth, cv2.COLOR_GRAY2BGR)
49 | bgr_depth_img = np.clip(bgr_depth_img, 0, 255)
50 | return bgr_depth_img
--------------------------------------------------------------------------------
/preprocessing.py:
--------------------------------------------------------------------------------
1 | import glob
2 | import multiprocessing as mp
3 | import numpy as np
4 | import os
5 | from PIL import Image
6 | import statistics
7 | import sys
8 |
9 |
10 | TRAINING_SET_PATH = 'data/train/'
11 | DATASET_SUBFOLDERS = range(0, 15)
12 | IMAGE_NEW_WIDTH = 256
13 | IMAGE_NEW_HEIGHT = 192
14 |
15 | def create_intensity_images_from_rgb_images_folder(path, subfolder):
16 | print("create intensity is called")
17 | for filename in glob.iglob(path + str(subfolder) + '/*/photo/*', recursive=True):
18 | # don't duplicate intensity images
19 | if not filename.endswith("_intensity.jpg"):
20 | img = Image.open(filename).convert('L')
21 | image_name_without_extension = filename.split('.')[0]
22 | intensity_image_name = image_name_without_extension + '_intensity.jpg'
23 | print(intensity_image_name)
24 | img.save(intensity_image_name)
25 |
26 | def resize_intensity_images(path, new_width, new_height, subfolder):
27 | for filename in glob.iglob(path + str(subfolder) + '/*/photo/*_intensity.jpg', recursive=True):
28 | print(filename)
29 | img = Image.open(filename)
30 | resized_image = img.resize((new_width, new_height))
31 | image_name_without_extension = filename.split('.')[0]
32 | resized_intensity_image_name = image_name_without_extension + '_resized.jpg'
33 | resized_image.save(resized_intensity_image_name, "JPEG", optimize=True)
34 | # remove the original intensity image
35 | os.remove(filename)
36 |
37 | def scale_depth(image, average):
38 | for i in [0, len(image)-1]:
39 | image[i] = average / (average + image[i])
40 |
41 | def normalize_depth_values(path, subfolder):
42 | for filename in glob.iglob(path + str(subfolder) + '/*/depth/*[0-9].png', recursive=True):
43 | print(filename)
44 | img = Image.open(filename)
45 | size = img.size
46 | image_values = img.histogram()
47 | average_depth = statistics.mean(image_values)
48 | if average_depth is 0:
49 | average_depth = 0.000001
50 | scale_depth(image_values, average_depth)
51 | image_array = np.array(image_values, dtype=np.float32)
52 | image = Image.new("L", size)
53 | image.putdata(image_array)
54 | normalized_image_name = filename.split('.')[0] + '_normalized.png'
55 | image.save(normalized_image_name, "PNG", optimize=True)
56 |
57 | def remove_normalized_depth_images(path):
58 | for filename in glob.iglob(path + '**/*/depth/*_normalized.png', recursive=True):
59 | os.remove(filename)
60 |
61 | def remove_intensity_images(path):
62 | for filename in glob.iglob(path + '**/*/photo/*_intensity.jpg', recursive=True):
63 | os.remove(filename)
64 | for filename in glob.iglob(path + '**/*/photo/*_resized.jpg', recursive=True):
65 | os.remove(filename)
66 |
67 | if __name__ == '__main__':
68 | pool = mp.Pool(mp.cpu_count())
69 | # if no args are passed, don't alter images
70 | if len(sys.argv) is 1:
71 | print("You should specify resizing (r) or converting to intensity (i).")
72 | elif len(sys.argv) is 2:
73 | if str(sys.argv[1]) is "i":
74 | [pool.apply(create_intensity_images_from_rgb_images_folder, args=(TRAINING_SET_PATH, subfolder)) for subfolder in DATASET_SUBFOLDERS]
75 | elif str(sys.argv[1]) is "r":
76 | [pool.apply(resize_intensity_images, args=(TRAINING_SET_PATH, IMAGE_NEW_WIDTH, IMAGE_NEW_HEIGHT, subfolder)) for subfolder in DATASET_SUBFOLDERS]
77 | elif str(sys.argv[1]) is "n":
78 | [pool.apply(normalize_depth_values, args=(TRAINING_SET_PATH, subfolder)) for subfolder in DATASET_SUBFOLDERS]
79 | else:
80 | print("Invalid argument: use 'i' for convert images to intensity images, 'r' to resize the intensity images, 'n' for depth normalization or any combination of them")
81 | elif len(sys.argv) is 3:
82 | if (str(sys.argv[1]) is "i" and str(sys.argv[2]) is "r") or (str(sys.argv[2]) is "i" and str(sys.argv[1]) is "r"):
83 | [pool.apply(create_intensity_images_from_rgb_images_folder, args=(TRAINING_SET_PATH, subfolder)) for subfolder in DATASET_SUBFOLDERS]
84 | [pool.apply(resize_intensity_images, args=(TRAINING_SET_PATH, IMAGE_NEW_WIDTH, IMAGE_NEW_HEIGHT, subfolder)) for subfolder in DATASET_SUBFOLDERS]
85 | elif (str(sys.argv[1]) is "i" and str(sys.argv[2]) is "n") or (str(sys.argv[1]) is "n" and str(sys.argv[2]) is "i"):
86 | [pool.apply(create_intensity_images_from_rgb_images_folder, args=(TRAINING_SET_PATH, subfolder)) for subfolder in DATASET_SUBFOLDERS]
87 | [pool.apply(normalize_depth_values, args=(TRAINING_SET_PATH, subfolder)) for subfolder in DATASET_SUBFOLDERS]
88 | elif (str(sys.argv[1]) is "r" and str(sys.argv[2]) is "n") or (str(sys.argv[1]) is "n" and str(sys.argv[2]) is "r"):
89 | [pool.apply(resize_intensity_images, args=(TRAINING_SET_PATH, IMAGE_NEW_WIDTH, IMAGE_NEW_HEIGHT, subfolder)) for subfolder in DATASET_SUBFOLDERS]
90 | [pool.apply(normalize_depth_values, args=(TRAINING_SET_PATH, subfolder)) for subfolder in DATASET_SUBFOLDERS]
91 | else:
92 | print("Invalid arguments: use 'i' for convert images to intensity images, 'r' to resize the intensity images, 'n' for depth normalization or any combination of them")
93 | elif len(sys.argv) is 4:
94 | if ((str(sys.argv[1]) is "i" and str(sys.argv[2]) is "r" and str(sys.argv[3]) is "n") or
95 | (str(sys.argv[1]) is "i" and str(sys.argv[2]) is "n" and str(sys.argv[3]) is "r") or
96 | (str(sys.argv[1]) is "r" and str(sys.argv[2]) is "i" and str(sys.argv[3]) is "n") or
97 | (str(sys.argv[1]) is "r" and str(sys.argv[2]) is "n" and str(sys.argv[3]) is "i") or
98 | (str(sys.argv[1]) is "n" and str(sys.argv[2]) is "i" and str(sys.argv[3]) is "r") or
99 | (str(sys.argv[1]) is "n" and str(sys.argv[2]) is "r" and str(sys.argv[3]) is "i")):
100 |
101 | [pool.apply(create_intensity_images_from_rgb_images_folder, args=(TRAINING_SET_PATH, subfolder)) for subfolder in DATASET_SUBFOLDERS]
102 | [pool.apply(resize_intensity_images, args=(TRAINING_SET_PATH, IMAGE_NEW_WIDTH, IMAGE_NEW_HEIGHT, subfolder)) for subfolder in DATASET_SUBFOLDERS]
103 | [pool.apply(normalize_depth_values, args=(TRAINING_SET_PATH, subfolder)) for subfolder in DATASET_SUBFOLDERS]
104 | else:
105 | print("Too many arguments: use 'i' for convert images to intensity images, 'r' to resize the intensity images, 'n' for depth normalization or any combination of them")
106 |
107 | pool.close()
108 |
109 |
--------------------------------------------------------------------------------
/read_protobuf.py:
--------------------------------------------------------------------------------
1 | import scenenet_pb2 as sn
2 | import os
3 |
4 | DATA_ROOT_PATH = 'data/train_0/train/0'
5 | PROTOBUF_PATH = 'data/train_protobufs/scenenet_rgbd_train_0.pb'
6 |
7 | # These functions produce a file path (on Linux systems) to the image given
8 | # a view and render path from a trajectory. As long the DATA_ROOT_PATH to the
9 | # root of the dataset is given. I.e. to either val or train
10 | def photo_path_from_view(render_path,view):
11 | photo_path = os.path.join(render_path,'photo')
12 | image_path = os.path.join(photo_path,'{0}.jpg'.format(view.frame_num))
13 | return os.path.join(DATA_ROOT_PATH,image_path)
14 |
15 | def instance_path_from_view(render_path,view):
16 | photo_path = os.path.join(render_path,'instance')
17 | image_path = os.path.join(photo_path,'{0}.png'.format(view.frame_num))
18 | return os.path.join(DATA_ROOT_PATH,image_path)
19 |
20 | def depth_path_from_view(render_path,view):
21 | photo_path = os.path.join(render_path,'depth')
22 | image_path = os.path.join(photo_path,'{0}.png'.format(view.frame_num))
23 | return os.path.join(DATA_ROOT_PATH,image_path)
24 |
25 |
26 | if __name__ == '__main__':
27 | trajectories = sn.Trajectories()
28 | try:
29 | with open(PROTOBUF_PATH,'rb') as f:
30 | trajectories.ParseFromString(f.read())
31 | except IOError:
32 | print('Scenenet protobuf data not found at location:{0}'.format(DATA_ROOT_PATH))
33 | print('Please ensure you have copied the pb file to the data directory')
34 |
35 | print('Number of trajectories:{0}'.format(len(trajectories.trajectories)))
36 | for traj in trajectories.trajectories:
37 | layout_type = sn.SceneLayout.LayoutType.Name(traj.layout.layout_type)
38 | layout_path = traj.layout.model
39 | print('='*20)
40 | print('Render path:{0}'.format(traj.render_path))
41 | print('Layout type:{0} path:{1}'.format(layout_type,layout_path))
42 | print('='*20)
43 | print('')
44 | print('Number of instances: {0}'.format(len(traj.instances)))
45 | '''
46 | The instances attribute of trajectories contains all of the information
47 | about the different instances. The instance.instance_id attribute provides
48 | correspondences with the rendered instance.png files. I.e. for a given
49 | trajectory, if a pixel is of value 1, the information about that instance,
50 | such as its type, semantic class, and wordnet id, is stored here.
51 | For more information about the exact information available refer to the
52 | scenenet.proto file.
53 | '''
54 | for instance in traj.instances:
55 | instance_type = sn.Instance.InstanceType.Name(instance.instance_type)
56 | print('='*20)
57 | print('Instance id:{0}'.format(instance.instance_id))
58 | print('Instance type:{0}'.format(instance_type))
59 | if instance.instance_type != sn.Instance.BACKGROUND:
60 | print('Wordnet id:{0}'.format(instance.semantic_wordnet_id))
61 | print('Plain english name:{0}'.format(instance.semantic_english))
62 | if instance.instance_type == sn.Instance.LIGHT_OBJECT:
63 | light_type = sn.LightInfo.LightType.Name(instance.light_info.light_type)
64 | print('Light type:{0}'.format(light_type))
65 | if instance.instance_type == sn.Instance.RANDOM_OBJECT:
66 | print('Object info:{0}'.format(instance.object_info))
67 | print('-'*20)
68 | print('')
69 | print('Render path:{0}'.format(traj.render_path))
70 | '''
71 | The views attribute of trajectories contains all of the information
72 | about the rendered frames of a scene. This includes camera poses,
73 | frame numbers and timestamps.
74 | '''
75 | for view in traj.views:
76 | print(photo_path_from_view(traj.render_path,view))
77 | print(depth_path_from_view(traj.render_path,view))
78 | print(instance_path_from_view(traj.render_path,view))
79 | print(view)
80 | break
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | numpy==1.22.0
2 | Pillow==9.0.1
3 | protobuf==3.15.0
4 | six==1.14.0
5 | torch==1.4.0+cpu
6 | torchvision==0.5.0+cpu
7 |
--------------------------------------------------------------------------------
/scenenet.proto:
--------------------------------------------------------------------------------
1 | syntax = "proto2";
2 |
3 | package scenenet;
4 |
5 | message SceneLayout {
6 | enum LayoutType {
7 | BATHROOM = 1;
8 | BEDROOM = 2;
9 | KITCHEN = 3;
10 | LIVING_ROOM = 4;
11 | OFFICE = 5;
12 | }
13 | optional LayoutType layout_type = 1;
14 | // This is the name of the SceneNet model used for the layout
15 | optional string model = 2;
16 | }
17 |
18 | message LightInfo {
19 | enum LightType {
20 | SPHERE = 1;
21 | PARALLELOGRAM = 2;
22 | }
23 | optional LightType light_type = 1;
24 | // Light intensity
25 | optional Power light_output = 2;
26 | // This is the center for sphere type lights. And corner for others
27 | optional Position position = 3;
28 | // This is only for SPHERE lights
29 | optional float radius = 4;
30 | // This is only for PARALLELOGRAM lights
31 | optional Position v1 = 5;
32 | optional Position v2 = 6;
33 | }
34 |
35 | message RandomObjectInfo {
36 | optional string shapenet_hash = 1;
37 | optional float height_meters = 2;
38 | message Transformation {
39 | // The 3x4 matrix is as follows:
40 | // rotation_mat11 rotation_mat12 rotation_mat13 translation_x
41 | // rotation_mat21 rotation_mat22 rotation_mat23 translation_y
42 | // rotation_mat31 rotation_mat32 rotation_mat33 translation_y
43 | optional float translation_x = 1;
44 | optional float translation_y = 2;
45 | optional float translation_z = 3;
46 | optional float rotation_mat11 = 4;
47 | optional float rotation_mat12 = 5;
48 | optional float rotation_mat13 = 6;
49 | optional float rotation_mat21 = 7;
50 | optional float rotation_mat22 = 8;
51 | optional float rotation_mat23 = 9;
52 | optional float rotation_mat31 = 10;
53 | optional float rotation_mat32 = 11;
54 | optional float rotation_mat33 = 12;
55 | }
56 | // The transformation gives the transformation applies to an object, about
57 | // the center of the base plane of its axis-aligned bounding box.
58 | optional Transformation object_pose = 3;
59 | }
60 |
61 | message Instance {
62 | optional int32 instance_id = 1;
63 | optional string semantic_wordnet_id = 2;
64 | optional string semantic_english = 3;
65 | enum InstanceType {
66 | // This is the instance type when no object is present, e.g. because of
67 | // looking out a window into nothingness
68 | BACKGROUND = 1;
69 | // This is an object that is hard coded into the layout and does not
70 | // move. This type does not have a transformation or shapenet hash
71 | LAYOUT_OBJECT = 2;
72 | // This is a randomly positioned light source
73 | LIGHT_OBJECT = 3;
74 | // This means the object is a randomly positioned shapenet object. The
75 | // object has a transformation and scale parameter in the object_info
76 | // variable.
77 | RANDOM_OBJECT = 4;
78 | }
79 | optional InstanceType instance_type = 4;
80 | // This information is only filled in for the respective type
81 | optional LightInfo light_info = 5;
82 | optional RandomObjectInfo object_info = 6;
83 | }
84 |
85 | message Power {
86 | optional float r = 1;
87 | optional float g = 2;
88 | optional float b = 3;
89 | }
90 |
91 | message Position {
92 | optional float x = 1;
93 | optional float y = 2;
94 | optional float z = 3;
95 | }
96 |
97 | message Pose {
98 | // The position of these two points define the camera view. The y vector is
99 | // defined as [0,1,0]. For an example of how to calculate the camera view
100 | // coordinate system, see the python codebase.
101 | optional Position camera = 1;
102 | optional Position lookat = 2;
103 | optional float timestamp = 3;
104 | }
105 |
106 | message View {
107 | // These increment by the number of skip frames, i.e. 0,25,50...7475.
108 | optional int32 frame_num = 1;
109 | // The photo is rendered by integrating uniformly sampled
110 | // exposures between the following two poses
111 | optional Pose shutter_open = 2;
112 | optional Pose shutter_close = 3;
113 | }
114 |
115 | message Trajectory {
116 | optional SceneLayout layout = 1;
117 | // The first instances[0] is always the 'background' and
118 | // undefined class when for example looking out windows
119 | repeated Instance instances = 2;
120 | // These are ordered sequentially for a trajectory
121 | repeated View views = 3;
122 | // This stores the path from the root data directory to the trajectory data
123 | // folder. If the trajectories are stored as:
124 | // /path/i/extracted/{val/train}/0/123/photo/0.jpg
125 | // then this path will be '0/123' designating the trajectories folder
126 | optional string render_path = 4;
127 | }
128 |
129 | message Trajectories {
130 | // This is the root list which stores all of the available trajectories
131 | repeated Trajectory trajectories = 1;
132 | }
--------------------------------------------------------------------------------