├── .ipynb_checkpoints ├── test_img-checkpoint.ipynb └── train_mobilenetv2-checkpoint.ipynb ├── CombineGIF.gif ├── Densenet_depth_model ├── Densenet.ipynb ├── DepthData.py ├── DepthImageVisualize.ipynb ├── Depthestimation.ipynb ├── UtilityTest.py ├── model_dense.py ├── test_img.ipynb ├── test_video.ipynb └── video.avi ├── DepthData_mob.py ├── Example generated images ├── 10_depth.jpg ├── 10_image.jpg ├── 11_depth.jpg ├── 11_image.jpg ├── 12_depth.jpg ├── 12_image.jpg ├── 13_depth.jpg ├── 13_image.jpg ├── 14_depth.jpg ├── 14_image.jpg ├── 15_depth.jpg ├── 15_image.jpg ├── 16_depth.jpg ├── 16_image.jpg ├── 17_depth.jpg ├── 17_image.jpg ├── 18_depth.jpg ├── 18_image.jpg ├── 2_depth.jpg ├── 2_image.jpg ├── 3_depth.jpg ├── 3_image.jpg ├── 4_depth.jpg ├── 4_image.jpg ├── 6_depth.jpg ├── 6_image.jpg ├── 7_depth.jpg ├── 7_image.jpg ├── 8_depth.jpg ├── 8_image.jpg ├── 9_depth.jpg └── 9_image.jpg ├── LICENSE ├── Mobile_model.py ├── README.md ├── UtilityTest.py ├── movie_depth.gif ├── movie_real.gif ├── test_img.ipynb ├── test_video.ipynb └── train_mobilenetv2.ipynb /CombineGIF.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alinstein/Depth_estimation/49f1d5e6f6f141c697efa3edc02264e2b5b4e1a4/CombineGIF.gif -------------------------------------------------------------------------------- /Densenet_depth_model/Densenet.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "nbformat": 4, 3 | "nbformat_minor": 0, 4 | "metadata": { 5 | "colab": { 6 | "name": "Densenet.ipynb", 7 | "provenance": [] 8 | }, 9 | "kernelspec": { 10 | "name": "python3", 11 | "display_name": "Python 3" 12 | } 13 | }, 14 | "cells": [ 15 | { 16 | "cell_type": "code", 17 | "metadata": { 18 | "id": "JIivNnSwyODH", 19 | "colab_type": "code", 20 | "colab": { 21 | "base_uri": "https://localhost:8080/", 22 | "height": 461 23 | }, 24 | "outputId": "514e38dd-86d7-49dc-c238-aeb814b574f8" 25 | }, 26 | "source": [ 27 | "import torch\n", 28 | "import torchvision.models as models\n", 29 | "import torch.nn as nn\n", 30 | "from torch.autograd import Variable\n", 31 | "import torch.nn.functional as F\n", 32 | "model_ft = models.densenet161(pretrained=True)\n", 33 | "x = torch.randn([1,3,640 , 480])\n", 34 | "\n", 35 | "model_ft.features._modules.items()\n", 36 | "features = [x]\n", 37 | "i=-2;\n", 38 | "for k, v in (model_ft.features._modules.items()): \n", 39 | " print(i)\n", 40 | " i+=1\n", 41 | " features.append( v(features[-1]) )\n", 42 | " # print(k)\n", 43 | " # print(v)\n", 44 | " # print(features[-1].shape)\n", 45 | "\n", 46 | "for i,x in enumerate(features):\n", 47 | " print(i,x.shape)" 48 | ], 49 | "execution_count": 14, 50 | "outputs": [ 51 | { 52 | "output_type": "stream", 53 | "text": [ 54 | "-2\n", 55 | "-1\n", 56 | "0\n", 57 | "1\n", 58 | "2\n", 59 | "3\n", 60 | "4\n", 61 | "5\n", 62 | "6\n", 63 | "7\n", 64 | "8\n", 65 | "9\n", 66 | "0 torch.Size([1, 3, 640, 480])\n", 67 | "1 torch.Size([1, 96, 320, 240])\n", 68 | "2 torch.Size([1, 96, 320, 240])\n", 69 | "3 torch.Size([1, 96, 320, 240])\n", 70 | "4 torch.Size([1, 96, 160, 120])\n", 71 | "5 torch.Size([1, 384, 160, 120])\n", 72 | "6 torch.Size([1, 192, 80, 60])\n", 73 | "7 torch.Size([1, 768, 80, 60])\n", 74 | "8 torch.Size([1, 384, 40, 30])\n", 75 | "9 torch.Size([1, 2112, 40, 30])\n", 76 | "10 torch.Size([1, 1056, 20, 15])\n", 77 | "11 torch.Size([1, 2208, 20, 15])\n", 78 | "12 torch.Size([1, 2208, 20, 15])\n" 79 | ], 80 | "name": "stdout" 81 | } 82 | ] 83 | }, 84 | { 85 | "cell_type": "code", 86 | "metadata": { 87 | "id": "eoGzvGeB2mgp", 88 | "colab_type": "code", 89 | "colab": {} 90 | }, 91 | "source": [ 92 | "import torch\n", 93 | "import torch.nn as nn\n", 94 | "import torch.nn.functional as F\n", 95 | "\n", 96 | "class UpSample(nn.Sequential):\n", 97 | " def __init__(self, skip_input, output_features):\n", 98 | " super(UpSample, self).__init__() \n", 99 | " self.convA = nn.Conv2d(skip_input, output_features, kernel_size=3, stride=1, padding=1)\n", 100 | " self.leakyreluA = nn.LeakyReLU(0.2)\n", 101 | " self.convB = nn.Conv2d(output_features, output_features, kernel_size=3, stride=1, padding=1)\n", 102 | " self.leakyreluB = nn.LeakyReLU(0.2)\n", 103 | "\n", 104 | " def forward(self, x, concat_with):\n", 105 | " up_x = F.interpolate(x, size=[concat_with.size(2), concat_with.size(3)], mode='bilinear', align_corners=True)\n", 106 | " return self.leakyreluB( self.convB( self.leakyreluA(self.convA( torch.cat([up_x, concat_with], dim=1) ) ) ) )\n", 107 | "\n", 108 | "class Decoder(nn.Module):\n", 109 | " def __init__(self, num_features=2208, decoder_width = 0.5):\n", 110 | " super(Decoder, self).__init__()\n", 111 | " features = int(num_features * decoder_width)\n", 112 | "\n", 113 | " self.conv2 = nn.Conv2d(num_features, features, kernel_size=1, stride=1, padding=1)\n", 114 | "\n", 115 | " self.up1 = UpSample(skip_input=features//1 + 384, output_features=features//2)\n", 116 | " self.up2 = UpSample(skip_input=features//2 + 192, output_features=features//4)\n", 117 | " self.up3 = UpSample(skip_input=features//4 + 96, output_features=features//8)\n", 118 | " self.up4 = UpSample(skip_input=features//8 + 96, output_features=features//16)\n", 119 | "\n", 120 | " self.conv3 = nn.Conv2d(features//16, 1, kernel_size=3, stride=1, padding=1)\n", 121 | "\n", 122 | " def forward(self, features):\n", 123 | " x_block0, x_block1, x_block2, x_block3, x_block4 = features[3], features[4], features[6], features[8], features[11]\n", 124 | " x_d0 = self.conv2(x_block4)\n", 125 | " x_d1 = self.up1(x_d0, x_block3)\n", 126 | " x_d2 = self.up2(x_d1, x_block2)\n", 127 | " x_d3 = self.up3(x_d2, x_block1)\n", 128 | " x_d4 = self.up4(x_d3, x_block0)\n", 129 | " return self.conv3(x_d4)\n", 130 | "\n", 131 | "class Encoder(nn.Module):\n", 132 | " def __init__(self):\n", 133 | " super(Encoder, self).__init__() \n", 134 | " import torchvision.models as models\n", 135 | " self.original_model = models.densenet161( pretrained=True )\n", 136 | "\n", 137 | " def forward(self, x):\n", 138 | " features = [x]\n", 139 | " for k, v in self.original_model.features._modules.items(): features.append( v(features[-1]) )\n", 140 | " return features\n", 141 | "\n", 142 | "class Model(nn.Module):\n", 143 | " def __init__(self):\n", 144 | " super(Model, self).__init__()\n", 145 | " self.encoder = Encoder()\n", 146 | " self.decoder = Decoder()\n", 147 | "\n", 148 | " def forward(self, x):\n", 149 | " return self.decoder( self.encoder(x) )" 150 | ], 151 | "execution_count": 0, 152 | "outputs": [] 153 | } 154 | ] 155 | } -------------------------------------------------------------------------------- /Densenet_depth_model/DepthData.py: -------------------------------------------------------------------------------- 1 | """ 2 | Created on Sun Dec 29 23:17:26 2019 3 | 4 | @author: alin 5 | """ 6 | 7 | # from torch.utils.data import Dataset, DataLoader 8 | from torch.utils.data import Dataset, DataLoader 9 | import os 10 | from PIL import Image 11 | import random 12 | import numpy as np 13 | import torch 14 | 15 | #Depth Datasetclass 16 | 17 | def _is_pil_image(img): 18 | return isinstance(img, Image.Image) 19 | 20 | def _is_numpy_image(img): 21 | return isinstance(img, np.ndarray) and (img.ndim in {2, 3}) 22 | 23 | 24 | class DepthDataset(Dataset): 25 | os = __import__('os') 26 | def __init__(self, traincsv, root_dir, transform=None): 27 | self.traincsv = traincsv 28 | self.root_dir = root_dir 29 | self.transform = transform 30 | 31 | def __len__(self): 32 | return len(self.traincsv) 33 | 34 | def __getitem__(self, idx): 35 | 36 | sample = self.traincsv[idx] 37 | img_name = os.path.join(self.root_dir,sample[0]) 38 | image = (Image.open(img_name)) 39 | depth_name = os.path.join(self.root_dir,sample[1]) 40 | depth =(Image.open(depth_name)) 41 | # depth = depth[..., np.newaxis] 42 | sample1={'image': image, 'depth': depth} 43 | 44 | if self.transform: sample1 = self.transform({'image': image, 'depth': depth}) 45 | return sample1 46 | 47 | 48 | 49 | class Augmentation(object): 50 | def __init__(self, probability): 51 | from itertools import permutations 52 | self.probability = probability 53 | #generate some output like this [(0, 1, 2), (0, 2, 1), (1, 0, 2), (1, 2, 0), (2, 0, 1), (2, 1, 0)] 54 | self.indices = list(permutations(range(3), 3)) 55 | #followed by randomly picking one channel in the list above 56 | 57 | def __call__(self, sample): 58 | image, depth = sample['image'], sample['depth'] 59 | 60 | if not _is_pil_image(image): 61 | raise TypeError( 62 | 'img should be PIL Image. Got {}'.format(type(image))) 63 | if not _is_pil_image(depth): 64 | raise TypeError( 65 | 'img should be PIL Image. Got {}'.format(type(depth))) 66 | 67 | # flipping the image 68 | if random.random() < 0.5: 69 | #random number generated is less than 0.5 then flip image and depth 70 | image = image.transpose(Image.FLIP_LEFT_RIGHT) 71 | depth = depth.transpose(Image.FLIP_LEFT_RIGHT) 72 | 73 | # rearranging the channels 74 | if random.random() < self.probability: 75 | image = np.asarray(image) 76 | image = Image.fromarray(image[...,list(self.indices[random.randint(0, len(self.indices) - 1)])]) 77 | 78 | return {'image': image, 'depth': depth} 79 | 80 | 81 | 82 | class ToTensor(object): 83 | def __init__(self,is_test=False): 84 | self.is_test = is_test 85 | 86 | def __call__(self, sample): 87 | image, depth = sample['image'], sample['depth'] 88 | 89 | 90 | image = self.to_tensor(image) 91 | 92 | depth = depth.resize((160, 120)) 93 | 94 | if self.is_test: 95 | depth = self.to_tensor(depth).float() / 1000 96 | else: 97 | depth = self.to_tensor(depth).float() * 1000 98 | 99 | # put in expected range 100 | depth = torch.clamp(depth, 10, 1000) 101 | 102 | return {'image': image, 'depth': depth} 103 | 104 | def to_tensor(self, pic): 105 | pic = np.array(pic) 106 | if not (_is_numpy_image(pic) or _is_pil_image(pic)): 107 | raise TypeError( 'pic should be PIL Image or ndarray. Got {}'.format(type(pic))) 108 | 109 | if isinstance(pic, np.ndarray): 110 | if pic.ndim==2: 111 | pic=pic[..., np.newaxis] 112 | 113 | img = torch.from_numpy(pic.transpose((2, 0, 1))) 114 | 115 | return img.float().div(255) -------------------------------------------------------------------------------- /Densenet_depth_model/DepthImageVisualize.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 19, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import pandas as pd\n", 10 | "import os\n", 11 | "from sklearn.utils import shuffle\n", 12 | "from skimage import io, transform\n", 13 | "import matplotlib.pyplot as plt" 14 | ] 15 | }, 16 | { 17 | "cell_type": "code", 18 | "execution_count": 20, 19 | "metadata": {}, 20 | "outputs": [ 21 | { 22 | "name": "stdout", 23 | "output_type": "stream", 24 | "text": [ 25 | "0.5843137254901961\n", 26 | "0.14901960784313725\n" 27 | ] 28 | }, 29 | { 30 | "data": { 31 | "text/plain": [ 32 | "" 33 | ] 34 | }, 35 | "execution_count": 20, 36 | "metadata": {}, 37 | "output_type": "execute_result" 38 | }, 39 | { 40 | "data": { 41 | "image/png": "\n", 42 | "text/plain": [ 43 | "
" 44 | ] 45 | }, 46 | "metadata": { 47 | "needs_background": "light" 48 | }, 49 | "output_type": "display_data" 50 | }, 51 | { 52 | "data": { 53 | "image/png": "\n", 54 | "text/plain": [ 55 | "
" 56 | ] 57 | }, 58 | "metadata": { 59 | "needs_background": "light" 60 | }, 61 | "output_type": "display_data" 62 | } 63 | ], 64 | "source": [ 65 | "#for testing image\n", 66 | "traincsv=pd.read_csv('/workspace/data/nyu2_train.csv')\n", 67 | "traincsv = traincsv.rename(columns={'data/nyu2_train/living_room_0038_out/37.jpg': 'image', 'data/nyu2_train/living_room_0038_out/37.png': 'depth'})\n", 68 | "traincsv = shuffle(traincsv, random_state=1)\n", 69 | "root_dir='/workspace/'\n", 70 | "img_name = os.path.join(root_dir,traincsv.iloc[2,0])\n", 71 | "image = io.imread(img_name)\n", 72 | "depth_name = os.path.join(root_dir,traincsv.iloc[2,1])\n", 73 | "depth_image = io.imread((depth_name))\n", 74 | "from skimage import img_as_float\n", 75 | "depth_float = img_as_float(depth_image)\n", 76 | "print(depth_float.max())\n", 77 | "print(depth_float.min())\n", 78 | "\n", 79 | "plt.imshow(image)\n", 80 | "plt.figure()\n", 81 | "plt.imshow(depth_image)" 82 | ] 83 | } 84 | ], 85 | "metadata": { 86 | "kernelspec": { 87 | "display_name": "Python 3", 88 | "language": "python", 89 | "name": "python3" 90 | }, 91 | "language_info": { 92 | "codemirror_mode": { 93 | "name": "ipython", 94 | "version": 3 95 | }, 96 | "file_extension": ".py", 97 | "mimetype": "text/x-python", 98 | "name": "python", 99 | "nbconvert_exporter": "python", 100 | "pygments_lexer": "ipython3", 101 | "version": "3.6.9" 102 | } 103 | }, 104 | "nbformat": 4, 105 | "nbformat_minor": 2 106 | } 107 | -------------------------------------------------------------------------------- /Densenet_depth_model/UtilityTest.py: -------------------------------------------------------------------------------- 1 | import os 2 | import glob 3 | import time 4 | from PIL import Image 5 | import numpy as np 6 | import PIL 7 | import random 8 | import torch 9 | import matplotlib.pyplot as plt 10 | from torch.utils.data import Dataset, DataLoader 11 | from torchvision import transforms, utils 12 | 13 | def _is_pil_image(img): 14 | return isinstance(img, Image.Image) 15 | 16 | def _is_numpy_image(img): 17 | return isinstance(img, np.ndarray) and (img.ndim in {2, 3}) 18 | 19 | class DepthDataset(Dataset): 20 | def __init__(self, root_dir, transform=None): 21 | 22 | self.root_dir = root_dir 23 | self.transform = transform 24 | 25 | def __len__(self): 26 | return len(os.listdir(self.root_dir)) 27 | 28 | def __getitem__(self, idx): 29 | 30 | img_name = os.path.join(self.root_dir,os.listdir(self.root_dir)[idx]) 31 | image = (Image.open(img_name)) 32 | 33 | sample1={'image': image} 34 | 35 | if self.transform: sample1 = self.transform({'image': image}) 36 | return sample1 37 | 38 | 39 | 40 | class ToTensor(object): 41 | def __init__(self,is_test=False): 42 | self.is_test = is_test 43 | 44 | def __call__(self, sample): 45 | image= sample['image'] 46 | 47 | image = image.resize((640, 480)) 48 | image = self.to_tensor(image) 49 | 50 | return {'image': image} 51 | 52 | def to_tensor(self, pic): 53 | pic = np.array(pic) 54 | if not (_is_numpy_image(pic) or _is_pil_image(pic)): 55 | raise TypeError( 'pic should be PIL Image or ndarray. Got {}'.format(type(pic))) 56 | 57 | if isinstance(pic, np.ndarray): 58 | if pic.ndim==2: 59 | pic=pic[..., np.newaxis] 60 | 61 | img = torch.from_numpy(pic.transpose((2, 0, 1))) 62 | 63 | return img.float().div(255) -------------------------------------------------------------------------------- /Densenet_depth_model/model_dense.py: -------------------------------------------------------------------------------- 1 | """ 2 | Created on Sun Dec 29 23:17:26 2019 3 | 4 | @author: alin 5 | """ 6 | 7 | import torch 8 | import torch.nn as nn 9 | import torch.nn.functional as F 10 | 11 | class UpSample(nn.Sequential): 12 | def __init__(self, skip_input, output_features): 13 | super(UpSample, self).__init__() 14 | self.convA = nn.Conv2d(skip_input, output_features, kernel_size=3, stride=1, padding=1) 15 | self.leakyreluA = nn.LeakyReLU(0.2) 16 | self.convB = nn.Conv2d(output_features, output_features, kernel_size=3, stride=1, padding=1) 17 | self.leakyreluB = nn.LeakyReLU(0.2) 18 | 19 | def forward(self, x, concat_with): 20 | #interpolate x from x.size to larger size(concat_with.shape=(C,H,W)) 21 | up_x = F.interpolate(x, size=[concat_with.size(2), concat_with.size(3)], mode='bilinear', align_corners=True) 22 | return self.leakyreluB( self.convB( self.leakyreluA(self.convA( torch.cat([up_x, concat_with], dim=1) ) ) ) ) 23 | 24 | class Decoder(nn.Module): 25 | def __init__(self, num_features=2208, decoder_width = 0.25): 26 | super(Decoder, self).__init__() 27 | features = int(num_features * decoder_width) 28 | 29 | self.conv2 = nn.Conv2d(num_features, features, kernel_size=1, stride=1, padding=1) 30 | 31 | self.up1 = UpSample(skip_input=features//1 + 384, output_features=features//2) 32 | self.up2 = UpSample(skip_input=features//2 + 192, output_features=features//4) 33 | # self.up3 = UpSample(skip_input=features//4 + 96, output_features=features//8) 34 | self.up3 = UpSample(skip_input=features//4 + 96, output_features=features//16) 35 | self.up4 = UpSample(skip_input=features//8 + 96, output_features=features//16) 36 | 37 | self.conv3 = nn.Conv2d(features//16, 1, kernel_size=3, stride=1, padding=1) 38 | 39 | def forward(self, features): 40 | x_block0, x_block1, x_block2, x_block3, x_block4 = features[3], features[4], features[6], features[8], features[11] 41 | x_d0 = self.conv2(x_block4) 42 | #15x20 to 15x20 43 | x_d1 = self.up1(x_d0, x_block3) 44 | #15x20 to 30x40 45 | x_d2 = self.up2(x_d1, x_block2) 46 | #30x40 to 60x80 47 | x_d3 = self.up3(x_d2, x_block1) 48 | #60x80 to 120x160 49 | # x_d4 = self.up4(x_d3, x_block0) 50 | #120x160 to 240x320 51 | return self.conv3(x_d3) 52 | # return self.conv3(x_d4) 53 | 54 | #Encoder uses the densenet_161 pretrained model 55 | #following encoder encodes the image and store the features output from each output of layer 56 | class Encoder(nn.Module): 57 | def __init__(self): 58 | super(Encoder, self).__init__() 59 | import torchvision.models as models 60 | self.original_model = models.densenet161( pretrained=True ) 61 | 62 | def forward(self, x): 63 | features = [x] 64 | for k, v in self.original_model.features._modules.items(): features.append( v(features[-1]) ) 65 | return features 66 | 67 | class Model(nn.Module): 68 | def __init__(self): 69 | super(Model, self).__init__() 70 | self.encoder = Encoder() 71 | self.decoder = Decoder() 72 | 73 | def forward(self, x): 74 | return self.decoder( self.encoder(x) ) -------------------------------------------------------------------------------- /Densenet_depth_model/video.avi: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alinstein/Depth_estimation/49f1d5e6f6f141c697efa3edc02264e2b5b4e1a4/Densenet_depth_model/video.avi -------------------------------------------------------------------------------- /DepthData_mob.py: -------------------------------------------------------------------------------- 1 | """ 2 | Created on Sun Dec 29 23:17:26 2019 3 | 4 | @author: alin 5 | """ 6 | 7 | # from torch.utils.data import Dataset, DataLoader 8 | from torch.utils.data import Dataset, DataLoader 9 | import os 10 | from PIL import Image 11 | import random 12 | import numpy as np 13 | import torch 14 | 15 | #Depth Datasetclass 16 | 17 | def _is_pil_image(img): 18 | return isinstance(img, Image.Image) 19 | 20 | def _is_numpy_image(img): 21 | return isinstance(img, np.ndarray) and (img.ndim in {2, 3}) 22 | 23 | 24 | class DepthDataset(Dataset): 25 | os = __import__('os') 26 | def __init__(self, traincsv, root_dir, transform=None): 27 | self.traincsv = traincsv 28 | self.root_dir = root_dir 29 | self.transform = transform 30 | 31 | def __len__(self): 32 | return len(self.traincsv) 33 | 34 | def __getitem__(self, idx): 35 | 36 | sample = self.traincsv[idx] 37 | img_name = os.path.join(self.root_dir,sample[0]) 38 | image = (Image.open(img_name)) 39 | depth_name = os.path.join(self.root_dir,sample[1]) 40 | depth =(Image.open(depth_name)) 41 | # depth = depth[..., np.newaxis] 42 | sample1={'image': image, 'depth': depth} 43 | 44 | if self.transform: sample1 = self.transform({'image': image, 'depth': depth}) 45 | return sample1 46 | 47 | 48 | 49 | class Augmentation(object): 50 | def __init__(self, probability): 51 | from itertools import permutations 52 | self.probability = probability 53 | #generate some output like this [(0, 1, 2), (0, 2, 1), (1, 0, 2), (1, 2, 0), (2, 0, 1), (2, 1, 0)] 54 | self.indices = list(permutations(range(3), 3)) 55 | #followed by randomly picking one channel in the list above 56 | 57 | def __call__(self, sample): 58 | image, depth = sample['image'], sample['depth'] 59 | 60 | if not _is_pil_image(image): 61 | raise TypeError( 62 | 'img should be PIL Image. Got {}'.format(type(image))) 63 | if not _is_pil_image(depth): 64 | raise TypeError( 65 | 'img should be PIL Image. Got {}'.format(type(depth))) 66 | 67 | # flipping the image 68 | if random.random() < 0.5: 69 | #random number generated is less than 0.5 then flip image and depth 70 | image = image.transpose(Image.FLIP_LEFT_RIGHT) 71 | depth = depth.transpose(Image.FLIP_LEFT_RIGHT) 72 | 73 | # rearranging the channels 74 | if random.random() < self.probability: 75 | image = np.asarray(image) 76 | image = Image.fromarray(image[...,list(self.indices[random.randint(0, len(self.indices) - 1)])]) 77 | 78 | return {'image': image, 'depth': depth} 79 | 80 | 81 | 82 | class ToTensor(object): 83 | def __init__(self,is_test=False): 84 | self.is_test = is_test 85 | 86 | def __call__(self, sample): 87 | image, depth = sample['image'], sample['depth'] 88 | 89 | 90 | image = self.to_tensor(image) 91 | 92 | depth = depth.resize((320, 240)) 93 | 94 | if self.is_test: 95 | depth = self.to_tensor(depth).float() / 1000 96 | else: 97 | depth = self.to_tensor(depth).float() * 1000 98 | 99 | # put in expected range 100 | depth = torch.clamp(depth, 10, 1000) 101 | 102 | return {'image': image, 'depth': depth} 103 | 104 | def to_tensor(self, pic): 105 | pic = np.array(pic) 106 | if not (_is_numpy_image(pic) or _is_pil_image(pic)): 107 | raise TypeError( 'pic should be PIL Image or ndarray. Got {}'.format(type(pic))) 108 | 109 | if isinstance(pic, np.ndarray): 110 | if pic.ndim==2: 111 | pic=pic[..., np.newaxis] 112 | 113 | img = torch.from_numpy(pic.transpose((2, 0, 1))) 114 | 115 | return img.float().div(255) -------------------------------------------------------------------------------- /Example generated images/10_depth.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alinstein/Depth_estimation/49f1d5e6f6f141c697efa3edc02264e2b5b4e1a4/Example generated images/10_depth.jpg -------------------------------------------------------------------------------- /Example generated images/10_image.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alinstein/Depth_estimation/49f1d5e6f6f141c697efa3edc02264e2b5b4e1a4/Example generated images/10_image.jpg -------------------------------------------------------------------------------- /Example generated images/11_depth.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alinstein/Depth_estimation/49f1d5e6f6f141c697efa3edc02264e2b5b4e1a4/Example generated images/11_depth.jpg -------------------------------------------------------------------------------- /Example generated images/11_image.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alinstein/Depth_estimation/49f1d5e6f6f141c697efa3edc02264e2b5b4e1a4/Example generated images/11_image.jpg -------------------------------------------------------------------------------- /Example generated images/12_depth.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alinstein/Depth_estimation/49f1d5e6f6f141c697efa3edc02264e2b5b4e1a4/Example generated images/12_depth.jpg -------------------------------------------------------------------------------- /Example generated images/12_image.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alinstein/Depth_estimation/49f1d5e6f6f141c697efa3edc02264e2b5b4e1a4/Example generated images/12_image.jpg -------------------------------------------------------------------------------- /Example generated images/13_depth.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alinstein/Depth_estimation/49f1d5e6f6f141c697efa3edc02264e2b5b4e1a4/Example generated images/13_depth.jpg -------------------------------------------------------------------------------- /Example generated images/13_image.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alinstein/Depth_estimation/49f1d5e6f6f141c697efa3edc02264e2b5b4e1a4/Example generated images/13_image.jpg -------------------------------------------------------------------------------- /Example generated images/14_depth.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alinstein/Depth_estimation/49f1d5e6f6f141c697efa3edc02264e2b5b4e1a4/Example generated images/14_depth.jpg -------------------------------------------------------------------------------- /Example generated images/14_image.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alinstein/Depth_estimation/49f1d5e6f6f141c697efa3edc02264e2b5b4e1a4/Example generated images/14_image.jpg -------------------------------------------------------------------------------- /Example generated images/15_depth.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alinstein/Depth_estimation/49f1d5e6f6f141c697efa3edc02264e2b5b4e1a4/Example generated images/15_depth.jpg -------------------------------------------------------------------------------- /Example generated images/15_image.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alinstein/Depth_estimation/49f1d5e6f6f141c697efa3edc02264e2b5b4e1a4/Example generated images/15_image.jpg -------------------------------------------------------------------------------- /Example generated images/16_depth.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alinstein/Depth_estimation/49f1d5e6f6f141c697efa3edc02264e2b5b4e1a4/Example generated images/16_depth.jpg -------------------------------------------------------------------------------- /Example generated images/16_image.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alinstein/Depth_estimation/49f1d5e6f6f141c697efa3edc02264e2b5b4e1a4/Example generated images/16_image.jpg -------------------------------------------------------------------------------- /Example generated images/17_depth.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alinstein/Depth_estimation/49f1d5e6f6f141c697efa3edc02264e2b5b4e1a4/Example generated images/17_depth.jpg -------------------------------------------------------------------------------- /Example generated images/17_image.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alinstein/Depth_estimation/49f1d5e6f6f141c697efa3edc02264e2b5b4e1a4/Example generated images/17_image.jpg -------------------------------------------------------------------------------- /Example generated images/18_depth.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alinstein/Depth_estimation/49f1d5e6f6f141c697efa3edc02264e2b5b4e1a4/Example generated images/18_depth.jpg -------------------------------------------------------------------------------- /Example generated images/18_image.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alinstein/Depth_estimation/49f1d5e6f6f141c697efa3edc02264e2b5b4e1a4/Example generated images/18_image.jpg -------------------------------------------------------------------------------- /Example generated images/2_depth.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alinstein/Depth_estimation/49f1d5e6f6f141c697efa3edc02264e2b5b4e1a4/Example generated images/2_depth.jpg -------------------------------------------------------------------------------- /Example generated images/2_image.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alinstein/Depth_estimation/49f1d5e6f6f141c697efa3edc02264e2b5b4e1a4/Example generated images/2_image.jpg -------------------------------------------------------------------------------- /Example generated images/3_depth.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alinstein/Depth_estimation/49f1d5e6f6f141c697efa3edc02264e2b5b4e1a4/Example generated images/3_depth.jpg -------------------------------------------------------------------------------- /Example generated images/3_image.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alinstein/Depth_estimation/49f1d5e6f6f141c697efa3edc02264e2b5b4e1a4/Example generated images/3_image.jpg -------------------------------------------------------------------------------- /Example generated images/4_depth.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alinstein/Depth_estimation/49f1d5e6f6f141c697efa3edc02264e2b5b4e1a4/Example generated images/4_depth.jpg -------------------------------------------------------------------------------- /Example generated images/4_image.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alinstein/Depth_estimation/49f1d5e6f6f141c697efa3edc02264e2b5b4e1a4/Example generated images/4_image.jpg -------------------------------------------------------------------------------- /Example generated images/6_depth.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alinstein/Depth_estimation/49f1d5e6f6f141c697efa3edc02264e2b5b4e1a4/Example generated images/6_depth.jpg -------------------------------------------------------------------------------- /Example generated images/6_image.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alinstein/Depth_estimation/49f1d5e6f6f141c697efa3edc02264e2b5b4e1a4/Example generated images/6_image.jpg -------------------------------------------------------------------------------- /Example generated images/7_depth.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alinstein/Depth_estimation/49f1d5e6f6f141c697efa3edc02264e2b5b4e1a4/Example generated images/7_depth.jpg -------------------------------------------------------------------------------- /Example generated images/7_image.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alinstein/Depth_estimation/49f1d5e6f6f141c697efa3edc02264e2b5b4e1a4/Example generated images/7_image.jpg -------------------------------------------------------------------------------- /Example generated images/8_depth.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alinstein/Depth_estimation/49f1d5e6f6f141c697efa3edc02264e2b5b4e1a4/Example generated images/8_depth.jpg -------------------------------------------------------------------------------- /Example generated images/8_image.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alinstein/Depth_estimation/49f1d5e6f6f141c697efa3edc02264e2b5b4e1a4/Example generated images/8_image.jpg -------------------------------------------------------------------------------- /Example generated images/9_depth.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alinstein/Depth_estimation/49f1d5e6f6f141c697efa3edc02264e2b5b4e1a4/Example generated images/9_depth.jpg -------------------------------------------------------------------------------- /Example generated images/9_image.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alinstein/Depth_estimation/49f1d5e6f6f141c697efa3edc02264e2b5b4e1a4/Example generated images/9_image.jpg -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2021 Alinstein Jose 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /Mobile_model.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | class UpSample(nn.Sequential): 6 | def __init__(self, skip_input, output_features): 7 | super(UpSample, self).__init__() 8 | self.convA = nn.Conv2d(skip_input, output_features, kernel_size=3, stride=1, padding=1) 9 | self.leakyreluA = nn.LeakyReLU(0.2) 10 | self.convB = nn.Conv2d(output_features, output_features, kernel_size=3, stride=1, padding=1) 11 | self.leakyreluB = nn.LeakyReLU(0.2) 12 | 13 | def forward(self, x, concat_with): 14 | up_x = F.interpolate(x, size=[concat_with.size(2), concat_with.size(3)], mode='bilinear', align_corners=True) 15 | return self.leakyreluB( self.convB( self.leakyreluA(self.convA( torch.cat([up_x, concat_with], dim=1) ) ) ) ) 16 | 17 | class Decoder(nn.Module): 18 | def __init__(self, num_features=1280, decoder_width = .6): 19 | super(Decoder, self).__init__() 20 | features = int(num_features * decoder_width) 21 | 22 | self.conv2 = nn.Conv2d(num_features, features, kernel_size=1, stride=1, padding=1) 23 | 24 | self.up0 = UpSample(skip_input=features//1 + 320, output_features=features//2) 25 | self.up1 = UpSample(skip_input=features//2 + 160, output_features=features//2) 26 | self.up2 = UpSample(skip_input=features//2 + 64, output_features=features//4) 27 | self.up3 = UpSample(skip_input=features//4 + 32, output_features=features//8) 28 | self.up4 = UpSample(skip_input=features//8 + 24, output_features=features//8) 29 | self.up5 = UpSample(skip_input=features//8 + 16, output_features=features//16) 30 | 31 | self.conv3 = nn.Conv2d(features//16, 1, kernel_size=3, stride=1, padding=1) 32 | 33 | def forward(self, features): 34 | x_block0, x_block1, x_block2, x_block3, x_block4,x_block5,x_block6 = features[2], features[4], features[6], features[9], features[15],features[18],features[19] 35 | x_d0 = self.conv2(x_block6) 36 | x_d1 = self.up0(x_d0, x_block5) 37 | x_d2 = self.up1(x_d1, x_block4) 38 | x_d3 = self.up2(x_d2, x_block3) 39 | x_d4 = self.up3(x_d3, x_block2) 40 | x_d5 = self.up4(x_d4, x_block1) 41 | x_d6 = self.up5(x_d5, x_block0) 42 | return self.conv3(x_d6) 43 | 44 | class Encoder(nn.Module): 45 | def __init__(self): 46 | super(Encoder, self).__init__() 47 | import torchvision.models as models 48 | self.original_model = models.mobilenet_v2( pretrained=True ) 49 | 50 | def forward(self, x): 51 | features = [x] 52 | for k, v in self.original_model.features._modules.items(): features.append( v(features[-1]) ) 53 | return features 54 | 55 | class Model(nn.Module): 56 | def __init__(self): 57 | super(Model, self).__init__() 58 | self.encoder = Encoder() 59 | self.decoder = Decoder() 60 | 61 | def forward(self, x): 62 | return self.decoder( self.encoder(x) ) -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Monocular Depth Estimation with Transfer Learning pretrained MobileNetV2 2 | 3 | 4 | This project implements a deep learning neural network model to generate the depth image of a given image. 5 | Model is a U-net model with MobileNetV2 as the encoder, and model has utilized skip connection from encoder to decoder. 6 | Model generates a depth image of resolution 480x640 for input image of same size. 7 | 8 | ![Results](https://github.com/alinstein/Depth_estimation/blob/master/CombineGIF.gif) 9 | 10 | 11 | This project was implemented taking reference from the following paper: 12 | 13 | [High Quality Monocular Depth Estimation via Transfer Learning (arXiv 2018)](https://arxiv.org/abs/1812.11941) 14 | **[Ibraheem Alhashim]** and **Peter Wonka** 15 | 16 | ## Getting Started 17 | 18 | ##### Model is trained using the IPYTHON file "train_mobilenetv2.ipynb". 19 | 20 | * Download the dataset and give the location of dataset. 21 | * Change the following according to the needs: batch_size, epochs, lr (learning rate). 22 | Load the pretrained model if needed. 23 | 24 | ##### IPYTHON file "test_img.ipynb" can be used to generate the depth image on pretrained model. 25 | 26 | * Give the location for the dictionary of images to be converted and load the pretrained model 27 | 28 | ##### IPYTHON file "test_video.ipynb" can be used to generate the depth video on pretrained model. 29 | 30 | * Give the location for the dictionary of images to be converted and load the pretrained model. 31 | 32 | #### Implementation of the Depth estimation using Densenet model is in the folder "Densenet_depth_model". 33 | 34 | 35 | ## Dataset 36 | * [NYU Depth V2 (50K)](https://s3-eu-west-1.amazonaws.com/densedepth/nyu_data.zip) (4.1 GB): File is extraced while running the "train_mobilenetv2.ipynb". 37 | 38 | ## Download the pretrained model 39 | * [Mobilenet](https://drive.google.com/drive/folders/1rDvtiwUgYbhzk8ZPdQ176abv-u6SaZzI?usp=sharing) (55 MB). Pretrained model is trained on 2 NVIDIA GeForce GTX 1080 for 6 hours(6 epoches). 40 | 41 | ## Author 42 | 43 | Written by Alinstein Jose, University of Victoria. 44 | 45 | -------------------------------------------------------------------------------- /UtilityTest.py: -------------------------------------------------------------------------------- 1 | import os 2 | import glob 3 | import time 4 | from PIL import Image 5 | import numpy as np 6 | import PIL 7 | import random 8 | import torch 9 | import matplotlib.pyplot as plt 10 | from torch.utils.data import Dataset, DataLoader 11 | from torchvision import transforms, utils 12 | 13 | def _is_pil_image(img): 14 | return isinstance(img, Image.Image) 15 | 16 | def _is_numpy_image(img): 17 | return isinstance(img, np.ndarray) and (img.ndim in {2, 3}) 18 | 19 | class DepthDataset(Dataset): 20 | def __init__(self, root_dir, transform=None): 21 | 22 | self.root_dir = root_dir 23 | self.transform = transform 24 | 25 | def __len__(self): 26 | return len(os.listdir(self.root_dir)) 27 | 28 | def __getitem__(self, idx): 29 | 30 | img_name = os.path.join(self.root_dir,os.listdir(self.root_dir)[idx]) 31 | image = (Image.open(img_name)) 32 | 33 | sample1={'image': image} 34 | 35 | if self.transform: sample1 = self.transform({'image': image}) 36 | return sample1 37 | 38 | 39 | 40 | class ToTensor(object): 41 | def __init__(self,is_test=False): 42 | self.is_test = is_test 43 | 44 | def __call__(self, sample): 45 | image= sample['image'] 46 | 47 | image = image.resize((640, 480)) 48 | image = self.to_tensor(image) 49 | 50 | return {'image': image} 51 | 52 | def to_tensor(self, pic): 53 | pic = np.array(pic) 54 | if not (_is_numpy_image(pic) or _is_pil_image(pic)): 55 | raise TypeError( 'pic should be PIL Image or ndarray. Got {}'.format(type(pic))) 56 | 57 | if isinstance(pic, np.ndarray): 58 | if pic.ndim==2: 59 | pic=pic[..., np.newaxis] 60 | 61 | img = torch.from_numpy(pic.transpose((2, 0, 1))) 62 | 63 | return img.float().div(255) -------------------------------------------------------------------------------- /movie_depth.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alinstein/Depth_estimation/49f1d5e6f6f141c697efa3edc02264e2b5b4e1a4/movie_depth.gif -------------------------------------------------------------------------------- /movie_real.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alinstein/Depth_estimation/49f1d5e6f6f141c697efa3edc02264e2b5b4e1a4/movie_real.gif -------------------------------------------------------------------------------- /test_video.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import cv2\n", 10 | "import os\n", 11 | "import glob\n", 12 | "import time\n", 13 | "from PIL import Image\n", 14 | "import numpy as np\n", 15 | "import PIL\n", 16 | "import imageio\n", 17 | "import random\n", 18 | "import torch\n", 19 | "import matplotlib.pyplot as plt\n", 20 | "from torch.utils.data import Dataset, DataLoader\n", 21 | "from torchvision import transforms, utils\n", 22 | "\n", 23 | "from UtilityTest import DepthDataset\n", 24 | "from UtilityTest import ToTensor" 25 | ] 26 | }, 27 | { 28 | "cell_type": "code", 29 | "execution_count": null, 30 | "metadata": {}, 31 | "outputs": [], 32 | "source": [ 33 | "Location_video='/workspace/test_vid/VID_20200102_142929.mp4'" 34 | ] 35 | }, 36 | { 37 | "cell_type": "code", 38 | "execution_count": null, 39 | "metadata": {}, 40 | "outputs": [], 41 | "source": [ 42 | "#Extract the video from zipfile\n", 43 | "from zipfile import ZipFile\n", 44 | "zf = ZipFile('/workspace/test_vid.zip', 'r')\n", 45 | "zf.extractall('/workspace/')\n", 46 | "zf.close()" 47 | ] 48 | }, 49 | { 50 | "cell_type": "code", 51 | "execution_count": null, 52 | "metadata": {}, 53 | "outputs": [], 54 | "source": [ 55 | "#converting video into frames\n", 56 | "vidcap = cv2.VideoCapture(Location_video)\n", 57 | "success,image = vidcap.read()\n", 58 | "count = 0\n", 59 | "while success:\n", 60 | " cv2.imwrite(\"/workspace/test_vid/Frames/frame%d.jpg\" % count, image) # save frame as JPEG file \n", 61 | " success,image = vidcap.read()\n", 62 | " print('Read a new frame: ', success)\n", 63 | " count += 1" 64 | ] 65 | }, 66 | { 67 | "cell_type": "code", 68 | "execution_count": null, 69 | "metadata": {}, 70 | "outputs": [], 71 | "source": [ 72 | "#location of video frames\n", 73 | "loc_img='/workspace/test_vid/Frames/'\n", 74 | "\n", 75 | "#ploting few sample frames\n", 76 | "depth_dataset = DepthDataset(root_dir=loc_img)\n", 77 | "fig = plt.figure()\n", 78 | "len(depth_dataset)\n", 79 | "for i in range(len(depth_dataset)):\n", 80 | " sample = depth_dataset[i]\n", 81 | "\n", 82 | " print(i, sample['image'].size)\n", 83 | "\n", 84 | "\n", 85 | " plt.imshow(sample['image'])\n", 86 | " plt.figure()\n", 87 | "\n", 88 | "\n", 89 | " if i == 2:\n", 90 | " plt.show()\n", 91 | " break" 92 | ] 93 | }, 94 | { 95 | "cell_type": "code", 96 | "execution_count": null, 97 | "metadata": {}, 98 | "outputs": [], 99 | "source": [ 100 | "depth_dataset = DepthDataset(root_dir=loc_img,transform=transforms.Compose([ToTensor()]))\n", 101 | "# depth_dataset\n", 102 | "batch_size=1\n", 103 | "train_loader=torch.utils.data.DataLoader(depth_dataset, batch_size)\n", 104 | "# train_loader\n", 105 | "dataiter = iter(train_loader)\n", 106 | "images = dataiter.next()\n" 107 | ] 108 | }, 109 | { 110 | "cell_type": "code", 111 | "execution_count": null, 112 | "metadata": {}, 113 | "outputs": [], 114 | "source": [ 115 | "import torch\n", 116 | "import torch.nn as nn\n", 117 | "import torch.nn.utils as utils\n", 118 | "import torchvision.utils as vutils \n", 119 | "import torchvision.models as models\n", 120 | "from Mobile_model import Model\n", 121 | "model = Model().cuda()\n", 122 | "model = nn.DataParallel(model)\n", 123 | "#loading the the trained model\n", 124 | "model.load_state_dict(torch.load('/workspace/17.pth'))\n", 125 | "model.eval()\n" 126 | ] 127 | }, 128 | { 129 | "cell_type": "code", 130 | "execution_count": null, 131 | "metadata": {}, 132 | "outputs": [], 133 | "source": [ 134 | "os.mkdir('/workspace/test_vid/depth_frames/')\n", 135 | "\n", 136 | "#generating the depth image of frames\n", 137 | "\n", 138 | "import matplotlib.cm as cm\n", 139 | "for i,sample_batched1 in enumerate (train_loader):\n", 140 | " image1 = torch.autograd.Variable(sample_batched1['image'].cuda())\n", 141 | " \n", 142 | " outtt=model(image1 )\n", 143 | " x=outtt.detach().cpu().numpy()\n", 144 | " x.shape\n", 145 | " x=x.reshape(240,320)\n", 146 | " img=x\n", 147 | " scale_percent = 200 # percent of original size\n", 148 | " width = int(img.shape[1] * scale_percent / 100)\n", 149 | " height = int(img.shape[0] * scale_percent / 100)\n", 150 | " dim = (width, height)\n", 151 | " # resize image\n", 152 | " resized = cv2.resize(img, dim, interpolation = cv2.INTER_AREA)\n", 153 | " plt.imsave('/workspace/test_vid/depth_frames/geeks%d.jpg' %i, resized, cmap='inferno')\n" 154 | ] 155 | }, 156 | { 157 | "cell_type": "code", 158 | "execution_count": null, 159 | "metadata": {}, 160 | "outputs": [], 161 | "source": [ 162 | "from os.path import isfile, join\n", 163 | " \n", 164 | "def convert_frames_to_video(pathIn,pathOut,fps):\n", 165 | " frame_array = []\n", 166 | " files = [f for f in os.listdir(pathIn) if isfile(join(pathIn, f))]\n", 167 | " \n", 168 | " #for sorting the file names properly\n", 169 | " files.sort(key = lambda x: int(x[5:-4]))\n", 170 | " \n", 171 | " for i in range(len(files)):\n", 172 | " filename=pathIn + files[i]\n", 173 | " #reading each files\n", 174 | " img = cv2.imread(filename)\n", 175 | " height, width, layers = img.shape\n", 176 | " size = (width,height)\n", 177 | " print(filename)\n", 178 | " #inserting the frames into an image array\n", 179 | " frame_array.append(img)\n", 180 | " \n", 181 | " out = cv2.VideoWriter(pathOut,cv2.VideoWriter_fourcc(*'MP4V'), fps, size)\n", 182 | " \n", 183 | " for i in range(len(frame_array)):\n", 184 | " # writing to a image array\n", 185 | " out.write(frame_array[i])\n", 186 | " out.release()\n", 187 | " " 188 | ] 189 | }, 190 | { 191 | "cell_type": "code", 192 | "execution_count": null, 193 | "metadata": {}, 194 | "outputs": [], 195 | "source": [ 196 | " #convert frames to videos\n", 197 | " pathIn= '/workspace/test_vid/depth_frames/'\n", 198 | " pathOut = '/workspace/test_vid/video1.mp4'\n", 199 | " #enter the correct frame rates\n", 200 | " fps = 30.01\n", 201 | " convert_frames_to_video(pathIn, pathOut, fps)" 202 | ] 203 | }, 204 | { 205 | "cell_type": "code", 206 | "execution_count": null, 207 | "metadata": {}, 208 | "outputs": [], 209 | "source": [ 210 | "#converting the frames to gif\n", 211 | "\n", 212 | "filenames='/workspace/test_vid/depth_frames/'\n", 213 | "files = [f for f in os.listdir(filenames) if isfile(join(filenames, f))]\n", 214 | "\n", 215 | "files.sort(key = lambda x: int(float(x[5:-4])))\n", 216 | "files\n", 217 | "images = []\n", 218 | "for i,filename in enumerate(files):\n", 219 | " if i%10==0:\n", 220 | " images.append(imageio.imread(os.path.join('/workspace/test_vid/depth_frames/',filename)))\n", 221 | " print(filename)\n", 222 | "imageio.mimsave('/workspace/test_vid/movie_depth.gif', images)" 223 | ] 224 | }, 225 | { 226 | "cell_type": "code", 227 | "execution_count": null, 228 | "metadata": {}, 229 | "outputs": [], 230 | "source": [ 231 | "from skimage import transform,io\n", 232 | "filenames='/workspace/test_vid/Frames/'\n", 233 | "files = [f for f in os.listdir(filenames) if isfile(join(filenames, f))]\n", 234 | "\n", 235 | "files.sort(key = lambda x: int(float(x[5:-4])))\n", 236 | "files\n", 237 | "images = []\n", 238 | "for i,filename in enumerate(files):\n", 239 | " if i%10==0:\n", 240 | " \n", 241 | "\n", 242 | " # resize to 28x28\n", 243 | " grey=(imageio.imread(os.path.join('/workspace/test_vid/Frames/',filename)))\n", 244 | " image = transform.resize(grey, (480,640), mode='symmetric', preserve_range=True)\n", 245 | " images.append(image)\n", 246 | " print(filename)\n", 247 | "imageio.mimsave('/workspace/test_vid/movie_real.gif', images)" 248 | ] 249 | } 250 | ], 251 | "metadata": { 252 | "kernelspec": { 253 | "display_name": "Python 3", 254 | "language": "python", 255 | "name": "python3" 256 | }, 257 | "language_info": { 258 | "codemirror_mode": { 259 | "name": "ipython", 260 | "version": 3 261 | }, 262 | "file_extension": ".py", 263 | "mimetype": "text/x-python", 264 | "name": "python", 265 | "nbconvert_exporter": "python", 266 | "pygments_lexer": "ipython3", 267 | "version": "3.6.9" 268 | } 269 | }, 270 | "nbformat": 4, 271 | "nbformat_minor": 2 272 | } 273 | --------------------------------------------------------------------------------