├── +helper ├── camvidPixelLabelIDs.m ├── downloadPretrainedDeepLabv3Plus.m ├── partitionCamVidData.m └── pascal-voc-classes.txt ├── .circleci └── config.yml ├── .gitignore ├── LICENSE ├── README.md ├── SECURITY.md ├── codegenDeepLabv3Plus.m ├── configureDeepLabv3PlusTransferLearn.m ├── deepLabv3PlusSemanticSegmentationExample.m ├── deepLabv3Plus_predict.m ├── images └── result.png ├── model └── .gitkeep └── test ├── tPretrainedDeeplebV3Plus.m ├── tdownloadPretrainedDeeplebV3Plus.m ├── tload.m └── tools ├── DownloadDeeplabV3PlusFixture.m └── getRepoRoot.m /+helper/camvidPixelLabelIDs.m: -------------------------------------------------------------------------------- 1 | function labelIDs = camvidPixelLabelIDs() 2 | % Return the label IDs corresponding to each class. 3 | % 4 | % The CamVid dataset has 32 classes. Group them into 11 classes. 5 | % 6 | % The 11 classes are: 7 | % "Sky" "Building", "Pole", "Road", "Pavement", "Tree", "SignSymbol", 8 | % "Fence", "Car", "Pedestrian", and "Bicyclist". 9 | % 10 | % CamVid pixel label IDs are provided as RGB color values. Group them into 11 | % 11 classes and return them as a cell array of M-by-3 matrices. The 12 | % original CamVid class names are listed alongside each RGB value. Note 13 | % that the Other/Void class are excluded below. 14 | % 15 | % Copyright 2021 The MathWorks, Inc. 16 | 17 | labelIDs = { ... 18 | 19 | % "Sky" 20 | [ 21 | 128 128 128; ... % "Sky" 22 | ] 23 | 24 | % "Building" 25 | [ 26 | 000 128 064; ... % "Bridge" 27 | 128 000 000; ... % "Building" 28 | 064 192 000; ... % "Wall" 29 | 064 000 064; ... % "Tunnel" 30 | 192 000 128; ... % "Archway" 31 | ] 32 | 33 | % "Pole" 34 | [ 35 | 192 192 128; ... % "Column_Pole" 36 | 000 000 064; ... % "TrafficCone" 37 | ] 38 | 39 | % Road 40 | [ 41 | 128 064 128; ... % "Road" 42 | 128 000 192; ... % "LaneMkgsDriv" 43 | 192 000 064; ... % "LaneMkgsNonDriv" 44 | ] 45 | 46 | % "Pavement" 47 | [ 48 | 000 000 192; ... % "Sidewalk" 49 | 064 192 128; ... % "ParkingBlock" 50 | 128 128 192; ... % "RoadShoulder" 51 | ] 52 | 53 | % "Tree" 54 | [ 55 | 128 128 000; ... % "Tree" 56 | 192 192 000; ... % "VegetationMisc" 57 | ] 58 | 59 | % "SignSymbol" 60 | [ 61 | 192 128 128; ... % "SignSymbol" 62 | 128 128 064; ... % "Misc_Text" 63 | 000 064 064; ... % "TrafficLight" 64 | ] 65 | 66 | % "Fence" 67 | [ 68 | 064 064 128; ... % "Fence" 69 | ] 70 | 71 | % "Car" 72 | [ 73 | 064 000 128; ... % "Car" 74 | 064 128 192; ... % "SUVPickupTruck" 75 | 192 128 192; ... % "Truck_Bus" 76 | 192 064 128; ... % "Train" 77 | 128 064 064; ... % "OtherMoving" 78 | ] 79 | 80 | % "Pedestrian" 81 | [ 82 | 064 064 000; ... % "Pedestrian" 83 | 192 128 064; ... % "Child" 84 | 064 000 192; ... % "CartLuggagePram" 85 | 064 128 064; ... % "Animal" 86 | ] 87 | 88 | % "Bicyclist" 89 | [ 90 | 000 128 192; ... % "Bicyclist" 91 | 192 000 192; ... % "MotorcycleScooter" 92 | ] 93 | 94 | }; 95 | end -------------------------------------------------------------------------------- /+helper/downloadPretrainedDeepLabv3Plus.m: -------------------------------------------------------------------------------- 1 | function model = downloadPretrainedDeepLabv3Plus() 2 | % The downloadPretrainedDeepLabv3Plus function loads a pretrained 3 | % DeepLabv3Plus network. 4 | % 5 | % Copyright 2021 The MathWorks, Inc. 6 | 7 | dataPath = 'model'; 8 | modelName = 'deepLabV3Plus-voc'; 9 | netFileFullPath = fullfile(dataPath, modelName); 10 | 11 | % Add '.mat' extension to the data. 12 | netFileFull = [netFileFullPath,'.zip']; 13 | 14 | if ~exist(netFileFull,'file') 15 | fprintf(['Downloading pretrained', modelName ,'network.\n']); 16 | fprintf('This can take several minutes to download...\n'); 17 | url = 'https://ssd.mathworks.com/supportfiles/vision/deeplearning/models/deepLabV3Plus/deepLabV3Plus-voc.zip'; 18 | websave (netFileFullPath,url); 19 | unzip(netFileFullPath, dataPath); 20 | model = load([dataPath, '/deepLabV3Plus-voc.mat']); 21 | else 22 | fprintf('Pretrained DeepLabv3Plus network already exists.\n\n'); 23 | unzip(netFileFullPath, dataPath); 24 | model = load([dataPath, '/deepLabV3Plus-voc.mat']); 25 | end 26 | end -------------------------------------------------------------------------------- /+helper/partitionCamVidData.m: -------------------------------------------------------------------------------- 1 | function [imdsTrain, imdsVal, imdsTest, pxdsTrain, pxdsVal, pxdsTest] = partitionCamVidData(imds,pxds) 2 | % Partition CamVid data by randomly selecting 60% of the data for training. The 3 | % rest is used for testing. 4 | % 5 | % Copyright 2021 The MathWorks, Inc. 6 | 7 | % Set initial random state for example reproducibility. 8 | rng(0); 9 | numFiles = numel(imds.Files); 10 | shuffledIndices = randperm(numFiles); 11 | 12 | % Use 60% of the images for training. 13 | numTrain = round(0.60 * numFiles); 14 | trainingIdx = shuffledIndices(1:numTrain); 15 | 16 | % Use 20% of the images for validation 17 | numVal = round(0.20 * numFiles); 18 | valIdx = shuffledIndices(numTrain+1:numTrain+numVal); 19 | 20 | % Use the rest for testing. 21 | testIdx = shuffledIndices(numTrain+numVal+1:end); 22 | 23 | % Create image datastores for training and test. 24 | trainingImages = imds.Files(trainingIdx); 25 | valImages = imds.Files(valIdx); 26 | testImages = imds.Files(testIdx); 27 | 28 | imdsTrain = imageDatastore(trainingImages); 29 | imdsVal = imageDatastore(valImages); 30 | imdsTest = imageDatastore(testImages); 31 | 32 | % Extract class and label IDs info. 33 | classes = pxds.ClassNames; 34 | labelIDs = helper.camvidPixelLabelIDs; 35 | 36 | % Create pixel label datastores for training and test. 37 | trainingLabels = pxds.Files(trainingIdx); 38 | valLabels = pxds.Files(valIdx); 39 | testLabels = pxds.Files(testIdx); 40 | 41 | pxdsTrain = pixelLabelDatastore(trainingLabels, classes, labelIDs); 42 | pxdsVal = pixelLabelDatastore(valLabels, classes, labelIDs); 43 | pxdsTest = pixelLabelDatastore(testLabels, classes, labelIDs); 44 | end -------------------------------------------------------------------------------- /+helper/pascal-voc-classes.txt: -------------------------------------------------------------------------------- 1 | aeroplane 2 | bicycle 3 | bird 4 | boat 5 | bottle 6 | bus 7 | car 8 | cat 9 | chair 10 | cow 11 | diningtable 12 | dog 13 | horse 14 | motorbike 15 | person 16 | pottedplant 17 | sheep 18 | sofa 19 | train 20 | tvmonitor -------------------------------------------------------------------------------- /.circleci/config.yml: -------------------------------------------------------------------------------- 1 | version: 2.1 2 | orbs: 3 | matlab: mathworks/matlab@0.4.0 4 | 5 | jobs: 6 | build: 7 | machine: 8 | image: ubuntu-1604:201903-01 9 | steps: 10 | - checkout 11 | - matlab/install 12 | - matlab/run-tests: 13 | test-results-junit: artifacts/test_results/matlab/results.xml 14 | # Have to add test/tools to the path for certain tests. 15 | source-folder: .;test/tools 16 | - store_test_results: 17 | path: artifacts/test_results 18 | - store_artifacts: 19 | path: artifacts/ 20 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | model/ 2 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2021, The MathWorks, Inc. 2 | All rights reserved. 3 | Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 4 | 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 5 | 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 6 | 3. In all cases, the software is, and all modifications and derivatives of the software shall be, licensed to you solely for use in conjunction with MathWorks products and service offerings. 7 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 8 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Pretrained DeepLabv3+ Network for Semantic Segmentation 2 | 3 | This repository provides a pretrained DeepLabv3+[1] semantic segmentation model for MATLAB®. 4 | 5 | Requirements 6 | ------------ 7 | 8 | - MATLAB® R2020a or later. 9 | - Deep Learning Toolbox™. 10 | - Computer Vision Toolbox™. 11 | 12 | Overview 13 | -------- 14 | 15 | Semantic segmentation is a computer vision technique for segmenting different classes of objects in images or videos. This pretrained network is trained using PASCAL VOC dataset[2] which have 20 different classes including airplane, bus, car, train, person, horse etc. 16 | 17 | For more information about semantic segmentation, see [Getting Started with Semantic Segmentation Using Deep Learning](https://mathworks.com/help/vision/ug/getting-started-with-semantic-segmentation-using-deep-learning.html). 18 | 19 | 20 | Getting Started 21 | --------------- 22 | Download or clone this repository to your machine and open it in MATLAB®. 23 | 24 | ### Download the pretrained network 25 | Use the below helper to download the pretrained network. 26 | 27 | ``` 28 | model = helper.downloadPretrainedDeepLabv3Plus; 29 | net = model.net; 30 | ``` 31 | 32 | Semantic Segmentation Using Pretrained DeepLabv3+ 33 | ------------------------------------------------- 34 | 35 | ``` 36 | % Read test image from images folder 37 | image = imread('visionteam.jpg'); 38 | 39 | % Resize the image to the size used to train the network. 40 | % The image is resized such that smallest dimension is 513. 41 | sz = size(image); 42 | [~,k] = min(sz(1:2)); 43 | scale = 513/sz(k); 44 | img = imresize(image, scale, "bilinear"); 45 | 46 | % Use semanticseg function to generate segmentation map. 47 | result = semanticseg(img, net); 48 | 49 | % Generate the overlaid result using generated map. 50 | overlay = labeloverlay(img , result, 'Transparency', 0.4); 51 | 52 | % Visualize the input and the result. 53 | overlay = imresize(overlay, sz(1:2), 'bilinear'); 54 | montage({image, overlay}); 55 | ``` 56 | Left-side image is the input and right-side image is the corresponding segmentation output. 57 | 58 | ![alt text](images/result.png?raw=true) 59 | 60 | 61 | Train Custom DeepLabv3+ Using Transfer Learning 62 | ----------------------------------------------- 63 | Transfer learning enables you to adapt a pretrained DeepLabv3+ network to your dataset. Create a custom DeepLabv3+ network for transfer learning with a new set of classes using the `configureDeepLabv3PlusTransferLearn.m` script. For more information about training a DeepLabv3+ network, see [Semantic Segmentation Using Deep Learning](https://www.mathworks.com/help/vision/ug/semantic-segmentation-using-deep-learning.html) 64 | 65 | 66 | Code Generation for DeepLabV3+ 67 | ------------------------------ 68 | Code generation enables you to generate code and deploy DeepLabv3+ on multiple embedded platforms. 69 | 70 | Run `codegenDeepLabv3Plus.m`. This script calls the `deepLabv3Plus_predict.m` entry point function and generate CUDA code for it. It will run the generated MEX and gives output. 71 | 72 | | Model | Inference Speed (FPS) | 73 | | ------ | ------ | 74 | | DeepLabv3Plus w/o codegen | 3.5265 | 75 | | DeepLabv3Plus with codegen | 21.5526 | 76 | 77 | - Performance (in FPS) is measured on a TITAN-RTX GPU using 513x513 image. 78 | 79 | For more information about codegen, see [Deep Learning with GPU Coder](https://www.mathworks.com/help/gpucoder/gpucoder-deep-learning.html) 80 | 81 | 82 | Accuracy 83 | -------- 84 | Metrics are mIoU, global accuracy and mean accuracy computed over 2012 PASCAL VOC val data. 85 | 86 | | Model | mIoU | Global Accuracy | Mean Accuracy | Size (MB) | Classes | 87 | | ------ | ------ | ------ | ------ | ------ | ------ | 88 | | DeepLabv3Plus-VOC | 0.77299 | 0.94146 | 0.87279 | 209 | [voc class names](+helper/pascal-voc-classes.txt) | 89 | 90 | - During computation of these metrics, val images are first resized such that the smaller dimension of the images are scaled to 513 because that matches the training preprocessing and then a center crop of size 513x513 is used for evaluation. 91 | 92 | 93 | References 94 | ----------- 95 | [1] Chen, Liang-Chieh, et al. "Encoder-decoder with atrous separable convolution for semantic image segmentation." Proceedings of the European conference on computer vision (ECCV). 2018. 96 | 97 | [2] The PASCAL Visual Object Classes Challenge: A Retrospective Everingham, M., Eslami, S. M. A., Van Gool, L., Williams, C. K. I., Winn, J. and Zisserman, A. International Journal of Computer Vision, 111(1), 98-136, 2015. 98 | 99 | 100 | Copyright 2021 The MathWorks, Inc. 101 | -------------------------------------------------------------------------------- /SECURITY.md: -------------------------------------------------------------------------------- 1 | # Reporting Security Vulnerabilities 2 | 3 | If you believe you have discovered a security vulnerability, please report it to 4 | [security@mathworks.com](mailto:security@mathworks.com). Please see 5 | [MathWorks Vulnerability Disclosure Policy for Security Researchers](https://www.mathworks.com/company/aboutus/policies_statements/vulnerability-disclosure-policy.html) 6 | for additional information. 7 | -------------------------------------------------------------------------------- /codegenDeepLabv3Plus.m: -------------------------------------------------------------------------------- 1 | %% Code generation For DeepLabv3+ Network 2 | % The following script demonstrates how to perform code generation for a pretrained 3 | % DeepLabv3+ semantic segmentation network, trained on PASCAL VOC dataset. 4 | 5 | %% Download the Pre-trained Network 6 | helper.downloadPretrainedDeepLabv3Plus; 7 | 8 | %% Preprocess the input image 9 | % Read test image. 10 | image = imread('visionteam.jpg'); 11 | 12 | % Resize the image such that its smaller dimension is scaled to 513. 13 | sz = size(image); 14 | [~,k] = min(sz(1:2)); 15 | scale = 513/sz(k); 16 | img = imresize(image, scale, "bilinear"); 17 | newSz = size(img); 18 | 19 | %% Run MEX code generation 20 | % The deepLabv3Plus_predict.m is entry-point function that takes an input image 21 | % and gives output. The function uses a persistent object deepLabv3PlusObj to 22 | % load the DAG network object and reuses the persistent object for prediction 23 | % on subsequent calls. 24 | % 25 | % To generate CUDA code for the deepLabv3Plus_predict entry-point function, 26 | % create a GPU code configuration object for a MEX target and set the 27 | % target language to C++. 28 | % 29 | % Use the coder.DeepLearningConfig (GPU Coder) function to create a CuDNN 30 | % deep learning configuration object and assign it to the DeepLearningConfig 31 | % property of the GPU code configuration object. 32 | % 33 | % Run the codegen command and specify the input size. 34 | cfg = coder.gpuConfig('mex'); 35 | cfg.TargetLang = 'C++'; 36 | cfg.DeepLearningConfig = coder.DeepLearningConfig('cudnn'); 37 | codegen -config cfg deepLabv3Plus_predict -args {ones(newSz(1),newSz(2),3,'uint8')} -report 38 | 39 | 40 | %% Perform Semantic Segmentation Using Generated MEX 41 | % Call deepLabv3Plus_predict_mex on the input image. 42 | predict_scores = deepLabv3Plus_predict_mex(img); 43 | 44 | % The predict_scores variable is a three-dimensional matrix that has 21 channels 45 | % corresponding to the pixel-wise prediction scores for every class. 46 | % Compute the channel by using the maximum prediction score to get pixel-wise labels. 47 | [~,argmax] = max(predict_scores,[],3); 48 | 49 | % Overlay the segmented labels. 50 | overlay = labeloverlay(img , argmax, 'Transparency', 0.4); 51 | 52 | % Visualize the input and the result. 53 | overlay = imresize(overlay, sz(1:2), 'bilinear'); 54 | montage({image, overlay}); 55 | 56 | 57 | % Copyright 2021 The MathWorks, Inc. -------------------------------------------------------------------------------- /configureDeepLabv3PlusTransferLearn.m: -------------------------------------------------------------------------------- 1 | %% Configure Pretrained DeepLabv3+ Network for Transfer Learning 2 | % The following code demonstrates configuring a pretrained 3 | % DeepLabv3+[1] network on the custom dataset. 4 | 5 | %% Download Pretrained Model 6 | model = helper.downloadPretrainedDeepLabv3Plus; 7 | net = model.net; 8 | 9 | %% Download CamVid Dataset 10 | % This example uses the CamVid dataset[2] from the University of Cambridge for training. 11 | % This dataset is a collection of images containing street-level views obtained while 12 | % driving. The dataset provides pixel-level labels for 32 semantic classes including car, 13 | % pedestrian, and road. 14 | % 15 | % Download the CamVid dataset from the following URLs. 16 | imageURL = 'http://web4.cs.ucl.ac.uk/staff/g.brostow/MotionSegRecData/files/701_StillsRaw_full.zip'; 17 | labelURL = 'http://web4.cs.ucl.ac.uk/staff/g.brostow/MotionSegRecData/data/LabeledApproved_full.zip'; 18 | 19 | outputFolder = fullfile(tempdir,'CamVid'); 20 | labelsZip = fullfile(outputFolder,'labels.zip'); 21 | imagesZip = fullfile(outputFolder,'images.zip'); 22 | 23 | if ~exist(labelsZip, 'file') || ~exist(imagesZip,'file') 24 | mkdir(outputFolder) 25 | 26 | disp('Downloading 16 MB CamVid dataset labels...'); 27 | websave(labelsZip, labelURL); 28 | unzip(labelsZip, fullfile(outputFolder,'labels')); 29 | 30 | disp('Downloading 557 MB CamVid dataset images...'); 31 | websave(imagesZip, imageURL); 32 | unzip(imagesZip, fullfile(outputFolder,'images')); 33 | end 34 | 35 | % Note: Download time of the data depends on your Internet connection. The commands 36 | % used above block MATLAB until the download is complete. Alternatively, you can 37 | % use your web browser to first download the dataset to your local disk. To use 38 | % the file you downloaded from the web, change the 'outputFolder' variable above 39 | % to the location of the downloaded file. 40 | 41 | %% Load CamVid Images 42 | imgDir = fullfile(outputFolder,'images','701_StillsRaw_full'); 43 | imds = imageDatastore(imgDir); 44 | 45 | %% Load CamVid Pixel-Labeled Images 46 | % To make training easier, the 32 original classes in CamVid are grouped into 47 | % 11 classes as follows. To reduce 32 classes into 11, multiple classes from the 48 | % original dataset are grouped together. For example, "Car" is a combination of 49 | % "Car", "SUVPickupTruck", "Truck_Bus", "Train", and "OtherMoving". 50 | classes = [ 51 | "Sky" 52 | "Building" 53 | "Pole" 54 | "Road" 55 | "Pavement" 56 | "Tree" 57 | "SignSymbol" 58 | "Fence" 59 | "Car" 60 | "Pedestrian" 61 | "Bicyclist" 62 | ]; 63 | 64 | % Return the grouped label IDs by using the helper function 'camvidPixelLabelIDs'. 65 | labelIDs = helper.camvidPixelLabelIDs; 66 | 67 | % Use the classes and label IDs to create the pixelLabelDatastore. 68 | labelDir = fullfile(outputFolder,'labels'); 69 | pxds = pixelLabelDatastore(labelDir,classes,labelIDs); 70 | 71 | %% Analyze Dataset Statistics 72 | % To see the distribution of class labels in the CamVid dataset, use 'countEachLabel'. 73 | % This function counts the number of pixels by class label. 74 | tbl = countEachLabel(pxds); 75 | 76 | % Ideally, all classes would have an equal number of observations. However, 77 | % the classes in CamVid are imbalanced, which is a common issue in automotive 78 | % data-sets of street scenes. Such scenes have more sky, building, and road pixels 79 | % than pedestrian and bicyclist pixels because sky, buildings and roads cover 80 | % more area in the image. If not handled correctly, this imbalance can be detrimental 81 | % to the learning process because the learning is biased in favor of the dominant 82 | % classes. To handle this issue, class weighting has been used. 83 | 84 | %% Prepare Training, Validation, and Test Sets 85 | % Deeplabv3+ is trained using 60% of the images from the dataset. The rest 86 | % of the images are split evenly in 20% and 20% for validation and testing 87 | % respectively. The following code randomly splits the image and pixel label 88 | % data into a training, validation and test set. 89 | [imdsTrain, imdsVal, imdsTest, pxdsTrain, pxdsVal, pxdsTest] = helper.partitionCamVidData(imds,pxds); 90 | 91 | %% Configure Pretrained Network 92 | % To configure the DeepLabv3+ network for transfer learning, you should replace 93 | % the last convolutional layer and pixelClassificationLayer in the layergraph 94 | % obtained from the pretrained model. 95 | 96 | % Specify the number of classes. 97 | numClasses = numel(classes); 98 | 99 | % Extract the layergraph from the pretrained network to perform custom 100 | % modification. 101 | lgraph = layerGraph(net); 102 | 103 | % Replace the last convolution layer in the pretrained network with the new 104 | % convolution layer. 105 | convLayer = convolution2dLayer([1 1], numClasses,'Name', 'node_398'); 106 | lgraph = replaceLayer(lgraph,"node_398",convLayer); 107 | 108 | % Balance classes using class weighting. 109 | imageFreq = tbl.PixelCount ./ tbl.ImagePixelCount; 110 | classWeights = median(imageFreq) ./ imageFreq; 111 | 112 | % Replace the pixel classification layer in the pretrained network with the classweights 113 | % and new pixel classification layer. 114 | pxLayer = pixelClassificationLayer('Name','labels','Classes',tbl.Name,'ClassWeights',classWeights); 115 | lgraph = replaceLayer(lgraph,"labels",pxLayer); 116 | 117 | % Use analyzeNetwork to visualize the new network. 118 | analyzeNetwork(lgraph); 119 | 120 | %% Data Augmentation 121 | % Data augmentation is used to improve network accuracy by randomly transforming 122 | % the original data during training. By using data augmentation, you can add 123 | % more variety to the training data without increasing the number of labeled 124 | % training samples. 125 | % 126 | % This pretrained model has input size of [513,513,3] and the CamVid images 127 | % are of size [720,960,3]. Hence, it would be better to use random patches 128 | % of size [513,513,3] from the given input images for training. 129 | % 130 | % In this case, 'randomPatchExtractionDatastore' is useful for creating 131 | % such training and validation datastores. 132 | % 133 | % To apply the same random transformation to both image and pixel label data 134 | % use 'imageDataAugmenter' object in 'DataAugmentation' NVP during creating 135 | % 'randomPatchExtractionDatastore' object. Here, random left/right reflection 136 | % and random X/Y translation of +/- 10 pixels is used for data augmentation. 137 | xTrans = [-10 10]; 138 | yTrans = [-10 10]; 139 | 140 | augmenter = imageDataAugmenter('RandXReflection',true, 'RandXTranslation',xTrans, 'RandYTranslation',yTrans); 141 | dsTrain = randomPatchExtractionDatastore(imdsTrain,pxdsTrain,[513 513],'PatchesPerImage',8, 'DataAugmentation', augmenter); 142 | 143 | % Note that data augmentation is not applied to the test and validation data. 144 | % Ideally, test and validation data should be representative of the original 145 | % data and is left unmodified for unbiased evaluation. 146 | 147 | %% Select Training Options 148 | % The optimization algorithm used for training is stochastic gradient descent 149 | % with momentum (SGDM). Use trainingOptions to specify the hyper-parameters 150 | % used for SGDM. 151 | 152 | % Define validation datastore. 153 | dsVal = randomPatchExtractionDatastore(imdsVal,pxdsVal,[513 513],'PatchesPerImage',8); 154 | 155 | % Define training options. 156 | options = trainingOptions('sgdm', ... 157 | 'LearnRateSchedule','piecewise',... 158 | 'LearnRateDropPeriod',10,... 159 | 'LearnRateDropFactor',0.3,... 160 | 'Momentum',0.9, ... 161 | 'InitialLearnRate',1e-3, ... 162 | 'L2Regularization',0.005, ... 163 | 'ValidationData',dsVal,... 164 | 'MaxEpochs',6, ... 165 | 'MiniBatchSize',16, ... 166 | 'Shuffle','every-epoch', ... 167 | 'CheckpointPath', tempdir, ... 168 | 'VerboseFrequency',2,... 169 | 'Plots','training-progress',... 170 | 'ValidationPatience', 4); 171 | 172 | % The learning rate uses a piecewise schedule. The learning rate is reduced 173 | % by a factor of 0.3 every 10 epochs. This allows the network to learn quickly 174 | % with a higher initial learning rate, while being able to find a solution 175 | % close to the local optimum once the learning rate drops. 176 | % 177 | % The network is tested against the validation data every epoch by setting 178 | % the 'ValidationData' parameter. The 'ValidationPatience' is set to 4 to 179 | % stop training early when the validation accuracy converges. This prevents 180 | % the network from overfitting on the training dataset. 181 | % 182 | % A mini-batch size of 16 is used for training. You can increase or decrease 183 | % this value based on the amount of GPU memory you have on your system. 184 | % 185 | % In addition, 'CheckpointPath' is set to a temporary location. This name-value 186 | % pair enables the saving of network checkpoints at the end of every training 187 | % epoch. If training is interrupted due to a system failure or power outage, 188 | % you can resume training from the saved checkpoint. Make sure that the location 189 | % specified by 'CheckpointPath' has enough space to store the network checkpoints. 190 | 191 | 192 | % Now, you can pass the 'dsTrain', 'lgraph' and 'options' to trainNetwork 193 | % as shown in 'Start Training' section of the example 'Semantic Segmentation 194 | % Using Deep Learning' to obtain deepLabv3+ model trained on the custom dataset. 195 | % 196 | % You can follow the sections 'Test Network on One Image' for inference using 197 | % the trained model and 'Evaluate Trained Network' for evaluating metrics. 198 | 199 | 200 | %% References 201 | 202 | % [1] Chen, Liang-Chieh et al. “Encoder-Decoder with Atrous Separable Convolution 203 | % for Semantic Image Segmentation.” ECCV (2018). 204 | % 205 | % [2] Brostow, G. J., J. Fauqueur, and R. Cipolla. "Semantic object classes 206 | % in video: A high-definition ground truth database." Pattern Recognition Letters. 207 | % Vol. 30, Issue 2, 2009, pp 88-97. 208 | % 209 | % Copyright 2021 The MathWorks, Inc. -------------------------------------------------------------------------------- /deepLabv3PlusSemanticSegmentationExample.m: -------------------------------------------------------------------------------- 1 | %% Semantic Segmentation Using DeepLabv3+ Network 2 | % The following code demonstrates running semantic segmentation on a pre-trained 3 | % DeepLabv3+ network, trained on PASCAL VOC dataset. 4 | 5 | %% Prerequisites 6 | % To run this example you need the following prerequisites - 7 | % # MATLAB (R2020a or later) with Computer Vision and Deep Learning Toolbox. 8 | % # Pretrained DeepLabv3+ network (download instructions below). 9 | 10 | %% Download the Pre-trained Network 11 | model = helper.downloadPretrainedDeepLabv3Plus; 12 | net = model.net; 13 | 14 | %% Perform Semantic Segmentation Using DeepLabv3+ Network 15 | % Read test image. 16 | image = imread('visionteam.jpg'); 17 | 18 | % Resize the image such that its smaller dimension is scaled to 513. 19 | sz = size(image); 20 | [~,k] = min(sz(1:2)); 21 | scale = 513/sz(k); 22 | img = imresize(image, scale, "bilinear"); 23 | 24 | % Use semanticseg function to generate segmentation map. 25 | result = semanticseg(img, net); 26 | 27 | % Generate the overlaid result using generated map. 28 | overlay = labeloverlay(img , result, 'Transparency', 0.4); 29 | 30 | % Visualize the input and the result. 31 | overlay = imresize(overlay, sz(1:2), 'bilinear'); 32 | montage({image, overlay}); 33 | 34 | 35 | % Copyright 2021 The MathWorks, Inc. -------------------------------------------------------------------------------- /deepLabv3Plus_predict.m: -------------------------------------------------------------------------------- 1 | function out = deepLabv3Plus_predict(in) 2 | %#codegen 3 | % Copyright 2021 The MathWorks, Inc. 4 | 5 | persistent deepLabv3PlusObj; 6 | 7 | if isempty(deepLabv3PlusObj) 8 | deepLabv3PlusObj = coder.loadDeepLearningNetwork('model/deepLabV3Plus-voc.mat'); 9 | end 10 | 11 | % Pass input. 12 | netInputSize = [513,513,3]; 13 | 14 | if isequal(size(in), netInputSize) 15 | out = predict(deepLabv3PlusObj,in); 16 | else 17 | out = activations(deepLabv3PlusObj,in,'labels'); 18 | end -------------------------------------------------------------------------------- /images/result.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/matlab-deep-learning/pretrained-deeplabv3plus/f0aa293f9abe12768d0ffba84582653a431767c0/images/result.png -------------------------------------------------------------------------------- /model/.gitkeep: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /test/tPretrainedDeeplebV3Plus.m: -------------------------------------------------------------------------------- 1 | classdef(SharedTestFixtures = {DownloadDeeplabV3PlusFixture}) tPretrainedDeeplebV3Plus < matlab.unittest.TestCase 2 | % Test for tPretrainedDeeplebV3Plus 3 | 4 | % Copyright 2021 The MathWorks, Inc. 5 | 6 | % The shared test fixture downloads the model. Here we check the 7 | % inference on the pretrained model. 8 | properties 9 | RepoRoot = getRepoRoot; 10 | ModelName = 'deepLabV3Plus-voc.mat'; 11 | end 12 | 13 | methods(Test) 14 | function exerciseDetection(test) 15 | model = load(fullfile(test.RepoRoot,'model',test.ModelName)); 16 | image = imread('visionteam.jpg'); 17 | sz = size(image); 18 | [~,k] = min(sz(1:2)); 19 | scale = 513/sz(k); 20 | img = imresize(image, scale, "bilinear"); 21 | imSize = size(img); 22 | imSize = imSize(:,1:2); 23 | actualLabel1Count = 252888; 24 | actualLabel2Count = 257034; 25 | 26 | result = semanticseg(img, model.net); 27 | labelsCountTbl = countlabels(result(:)); 28 | labelCount = labelsCountTbl.Count(find(labelsCountTbl.Count)); 29 | 30 | % verifying size of output from semanticseg. 31 | test.verifyEqual(size(result),imSize); 32 | % verifying that all the pixels are labelled. 33 | test.verifyEqual(sum(labelCount),prod(imSize)); 34 | % verifying the count of each labels on the result. 35 | test.verifyEqual(labelCount(1),actualLabel1Count); 36 | test.verifyEqual(labelCount(2),actualLabel2Count); 37 | end 38 | end 39 | end -------------------------------------------------------------------------------- /test/tdownloadPretrainedDeeplebV3Plus.m: -------------------------------------------------------------------------------- 1 | classdef(SharedTestFixtures = {DownloadDeeplabV3PlusFixture}) tdownloadPretrainedDeeplebV3Plus < matlab.unittest.TestCase 2 | % Test for downloadPretrainedDeeplebV3Plus 3 | 4 | % Copyright 2021 The MathWorks, Inc. 5 | 6 | % The shared test fixture DownloadDeeplabV3PlusFixture calls 7 | % downloadPretrainedDeeplebV3Plus. Here we check that the downloaded files 8 | % exists in the appropriate location. 9 | 10 | properties 11 | DataDir = fullfile(getRepoRoot(),'model'); 12 | end 13 | 14 | methods(Test) 15 | function verifyDownloadedFilesExist(test) 16 | dataFileName = 'deepLabV3Plus-voc.mat'; 17 | test.verifyTrue(isequal(exist(fullfile(test.DataDir,dataFileName),'file'),2)); 18 | end 19 | end 20 | end 21 | -------------------------------------------------------------------------------- /test/tload.m: -------------------------------------------------------------------------------- 1 | classdef(SharedTestFixtures = {DownloadDeeplabV3PlusFixture}) tload < matlab.unittest.TestCase 2 | % Test for loading the downloaded models. 3 | 4 | % Copyright 2021 The MathWorks, Inc. 5 | 6 | % The shared test fixture DownloadDeeplabV3PlusFixture calls 7 | % downloadPretrainedDeeplabV3Plus. Here we check that the properties of 8 | % downloaded models. 9 | 10 | properties 11 | DataDir = fullfile(getRepoRoot(),'model'); 12 | end 13 | 14 | methods(Test) 15 | function verifyModelAndFields(test) 16 | % Test point to verify the fields of the downloaded models are 17 | % as expected. 18 | 19 | loadedModel = load(fullfile(test.DataDir,'deepLabV3Plus-voc.mat')); 20 | 21 | test.verifyClass(loadedModel.net,'DAGNetwork'); 22 | test.verifyEqual(numel(loadedModel.net.Layers),376); 23 | test.verifyEqual(size(loadedModel.net.Connections),[416 2]) 24 | test.verifyEqual(loadedModel.net.InputNames,{'Input'}); 25 | test.verifyEqual(loadedModel.net.OutputNames,{'labels'}); 26 | end 27 | end 28 | end -------------------------------------------------------------------------------- /test/tools/DownloadDeeplabV3PlusFixture.m: -------------------------------------------------------------------------------- 1 | classdef DownloadDeeplabV3PlusFixture < matlab.unittest.fixtures.Fixture 2 | % DownloadDeeplabFixture A fixture for calling downloadPretrainedDeepLabV3Plus if 3 | % necessary. This is to ensure that this function is only called once 4 | % and only when tests need it. It also provides a teardown to return 5 | % the test environment to the expected state before testing. 6 | 7 | % Copyright 2021 The MathWorks, Inc 8 | 9 | properties(Constant) 10 | DeeplabV3DataDir = fullfile(getRepoRoot(),'model') 11 | end 12 | 13 | properties 14 | DeeplabV3Exist (1,1) logical 15 | end 16 | 17 | methods 18 | function setup(this) 19 | this.DeeplabV3Exist = exist(fullfile(this.DeeplabV3DataDir,'deepLabV3Plus-voc.mat'),'file')==2; 20 | 21 | % Call this in eval to capture and drop any standard output 22 | % that we don't want polluting the test logs. 23 | if ~this.DeeplabV3Exist 24 | evalc('helper.downloadPretrainedDeepLabv3Plus();'); 25 | end 26 | end 27 | 28 | function teardown(this) 29 | if this.DeeplabV3Exist 30 | delete(fullfile(this.DeeplabV3DataDir,'deepLabV3Plus-voc.mat')); 31 | end 32 | end 33 | end 34 | end -------------------------------------------------------------------------------- /test/tools/getRepoRoot.m: -------------------------------------------------------------------------------- 1 | function path = getRepoRoot() 2 | % getRepoRoot Return a path to the repository's root directory. 3 | 4 | % Copyright 2020 The MathWorks, Inc. 5 | 6 | thisFile = mfilename('fullpath'); 7 | thisDir = fileparts(thisFile); 8 | 9 | % the root is up two directories (/test/tools/getRepoRoot.m) 10 | path = fullfile(thisDir,'..','..'); 11 | end --------------------------------------------------------------------------------