├── +helper
    ├── camvidPixelLabelIDs.m
    ├── downloadPretrainedDeepLabv3Plus.m
    ├── partitionCamVidData.m
    └── pascal-voc-classes.txt
├── .circleci
    └── config.yml
├── .gitignore
├── LICENSE
├── README.md
├── SECURITY.md
├── codegenDeepLabv3Plus.m
├── configureDeepLabv3PlusTransferLearn.m
├── deepLabv3PlusSemanticSegmentationExample.m
├── deepLabv3Plus_predict.m
├── images
    └── result.png
├── model
    └── .gitkeep
└── test
    ├── tPretrainedDeeplebV3Plus.m
    ├── tdownloadPretrainedDeeplebV3Plus.m
    ├── tload.m
    └── tools
        ├── DownloadDeeplabV3PlusFixture.m
        └── getRepoRoot.m


/+helper/camvidPixelLabelIDs.m:
--------------------------------------------------------------------------------
 1 | function labelIDs = camvidPixelLabelIDs()
 2 | % Return the label IDs corresponding to each class.
 3 | %
 4 | % The CamVid dataset has 32 classes. Group them into 11 classes.
 5 | %
 6 | % The 11 classes are:
 7 | %   "Sky" "Building", "Pole", "Road", "Pavement", "Tree", "SignSymbol",
 8 | %   "Fence", "Car", "Pedestrian",  and "Bicyclist".
 9 | %
10 | % CamVid pixel label IDs are provided as RGB color values. Group them into
11 | % 11 classes and return them as a cell array of M-by-3 matrices. The
12 | % original CamVid class names are listed alongside each RGB value. Note
13 | % that the Other/Void class are excluded below.
14 | %
15 | % Copyright 2021 The MathWorks, Inc.
16 | 
17 | labelIDs = { ...
18 |     
19 |     % "Sky"
20 |     [
21 |     128 128 128; ... % "Sky"
22 |     ]
23 |     
24 |     % "Building" 
25 |     [
26 |     000 128 064; ... % "Bridge"
27 |     128 000 000; ... % "Building"
28 |     064 192 000; ... % "Wall"
29 |     064 000 064; ... % "Tunnel"
30 |     192 000 128; ... % "Archway"
31 |     ]
32 |     
33 |     % "Pole"
34 |     [
35 |     192 192 128; ... % "Column_Pole"
36 |     000 000 064; ... % "TrafficCone"
37 |     ]
38 |     
39 |     % Road
40 |     [
41 |     128 064 128; ... % "Road"
42 |     128 000 192; ... % "LaneMkgsDriv"
43 |     192 000 064; ... % "LaneMkgsNonDriv"
44 |     ]
45 |     
46 |     % "Pavement"
47 |     [
48 |     000 000 192; ... % "Sidewalk" 
49 |     064 192 128; ... % "ParkingBlock"
50 |     128 128 192; ... % "RoadShoulder"
51 |     ]
52 |         
53 |     % "Tree"
54 |     [
55 |     128 128 000; ... % "Tree"
56 |     192 192 000; ... % "VegetationMisc"
57 |     ]
58 |     
59 |     % "SignSymbol"
60 |     [
61 |     192 128 128; ... % "SignSymbol"
62 |     128 128 064; ... % "Misc_Text"
63 |     000 064 064; ... % "TrafficLight"
64 |     ]
65 |     
66 |     % "Fence"
67 |     [
68 |     064 064 128; ... % "Fence"
69 |     ]
70 |     
71 |     % "Car"
72 |     [
73 |     064 000 128; ... % "Car"
74 |     064 128 192; ... % "SUVPickupTruck"
75 |     192 128 192; ... % "Truck_Bus"
76 |     192 064 128; ... % "Train"
77 |     128 064 064; ... % "OtherMoving"
78 |     ]
79 |     
80 |     % "Pedestrian"
81 |     [
82 |     064 064 000; ... % "Pedestrian"
83 |     192 128 064; ... % "Child"
84 |     064 000 192; ... % "CartLuggagePram"
85 |     064 128 064; ... % "Animal"
86 |     ]
87 |     
88 |     % "Bicyclist"
89 |     [
90 |     000 128 192; ... % "Bicyclist"
91 |     192 000 192; ... % "MotorcycleScooter"
92 |     ]
93 |     
94 |     };
95 | end 


--------------------------------------------------------------------------------
/+helper/downloadPretrainedDeepLabv3Plus.m:
--------------------------------------------------------------------------------
 1 | function model = downloadPretrainedDeepLabv3Plus()
 2 | % The downloadPretrainedDeepLabv3Plus function loads a pretrained
 3 | % DeepLabv3Plus network.
 4 | %
 5 | % Copyright 2021 The MathWorks, Inc.
 6 | 
 7 | dataPath = 'model';
 8 | modelName = 'deepLabV3Plus-voc';
 9 | netFileFullPath = fullfile(dataPath, modelName);
10 | 
11 | % Add '.mat' extension to the data.
12 | netFileFull = [netFileFullPath,'.zip'];
13 | 
14 | if ~exist(netFileFull,'file')
15 |     fprintf(['Downloading pretrained', modelName ,'network.\n']);
16 |     fprintf('This can take several minutes to download...\n');
17 |     url = 'https://ssd.mathworks.com/supportfiles/vision/deeplearning/models/deepLabV3Plus/deepLabV3Plus-voc.zip';
18 |     websave (netFileFullPath,url);
19 |     unzip(netFileFullPath, dataPath);
20 |     model = load([dataPath, '/deepLabV3Plus-voc.mat']);
21 | else
22 |     fprintf('Pretrained DeepLabv3Plus network already exists.\n\n');
23 |     unzip(netFileFullPath, dataPath);
24 |     model = load([dataPath, '/deepLabV3Plus-voc.mat']);
25 | end
26 | end


--------------------------------------------------------------------------------
/+helper/partitionCamVidData.m:
--------------------------------------------------------------------------------
 1 | function [imdsTrain, imdsVal, imdsTest, pxdsTrain, pxdsVal, pxdsTest] = partitionCamVidData(imds,pxds)
 2 | % Partition CamVid data by randomly selecting 60% of the data for training. The
 3 | % rest is used for testing.
 4 | %
 5 | % Copyright 2021 The MathWorks, Inc.
 6 |     
 7 | % Set initial random state for example reproducibility.
 8 | rng(0); 
 9 | numFiles = numel(imds.Files);
10 | shuffledIndices = randperm(numFiles);
11 | 
12 | % Use 60% of the images for training.
13 | numTrain = round(0.60 * numFiles);
14 | trainingIdx = shuffledIndices(1:numTrain);
15 | 
16 | % Use 20% of the images for validation
17 | numVal = round(0.20 * numFiles);
18 | valIdx = shuffledIndices(numTrain+1:numTrain+numVal);
19 | 
20 | % Use the rest for testing.
21 | testIdx = shuffledIndices(numTrain+numVal+1:end);
22 | 
23 | % Create image datastores for training and test.
24 | trainingImages = imds.Files(trainingIdx);
25 | valImages = imds.Files(valIdx);
26 | testImages = imds.Files(testIdx);
27 | 
28 | imdsTrain = imageDatastore(trainingImages);
29 | imdsVal = imageDatastore(valImages);
30 | imdsTest = imageDatastore(testImages);
31 | 
32 | % Extract class and label IDs info.
33 | classes = pxds.ClassNames;
34 | labelIDs = helper.camvidPixelLabelIDs;
35 | 
36 | % Create pixel label datastores for training and test.
37 | trainingLabels = pxds.Files(trainingIdx);
38 | valLabels = pxds.Files(valIdx);
39 | testLabels = pxds.Files(testIdx);
40 | 
41 | pxdsTrain = pixelLabelDatastore(trainingLabels, classes, labelIDs);
42 | pxdsVal = pixelLabelDatastore(valLabels, classes, labelIDs);
43 | pxdsTest = pixelLabelDatastore(testLabels, classes, labelIDs);
44 | end


--------------------------------------------------------------------------------
/+helper/pascal-voc-classes.txt:
--------------------------------------------------------------------------------
 1 | aeroplane
 2 | bicycle
 3 | bird
 4 | boat
 5 | bottle
 6 | bus
 7 | car
 8 | cat
 9 | chair
10 | cow
11 | diningtable
12 | dog
13 | horse
14 | motorbike
15 | person
16 | pottedplant
17 | sheep
18 | sofa
19 | train
20 | tvmonitor


--------------------------------------------------------------------------------
/.circleci/config.yml:
--------------------------------------------------------------------------------
 1 | version: 2.1
 2 | orbs:
 3 |   matlab: mathworks/matlab@0.4.0
 4 | 
 5 | jobs:
 6 |   build:
 7 |     machine:
 8 |       image: ubuntu-1604:201903-01
 9 |     steps:
10 |       - checkout
11 |       - matlab/install
12 |       - matlab/run-tests:
13 |           test-results-junit: artifacts/test_results/matlab/results.xml
14 |           # Have to add test/tools to the path for certain tests.
15 |           source-folder: .;test/tools
16 |       - store_test_results:
17 |           path: artifacts/test_results
18 |       - store_artifacts:
19 |           path: artifacts/
20 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | model/
2 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Copyright (c) 2021, The MathWorks, Inc.
2 | All rights reserved.
3 | Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
4 | 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
5 | 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
6 | 3. In all cases, the software is, and all modifications and derivatives of the software shall be, licensed to you solely for use in conjunction with MathWorks products and service offerings. 
7 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
8 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Pretrained DeepLabv3+ Network for Semantic Segmentation
  2 | 
  3 | This repository provides a pretrained DeepLabv3+[1] semantic segmentation model for MATLAB&reg;.
  4 | 
  5 | Requirements
  6 | ------------
  7 | 
  8 | - MATLAB&reg; R2020a or later.
  9 | - Deep Learning Toolbox&trade;.
 10 | - Computer Vision Toolbox&trade;.
 11 | 
 12 | Overview
 13 | --------
 14 | 
 15 | Semantic segmentation is a computer vision technique for segmenting different classes of objects in images or videos. This pretrained network is trained using PASCAL VOC dataset[2] which have 20 different classes including airplane, bus, car, train, person, horse etc. 
 16 | 
 17 | For more information about semantic segmentation, see [Getting Started with Semantic Segmentation Using Deep Learning](https://mathworks.com/help/vision/ug/getting-started-with-semantic-segmentation-using-deep-learning.html).
 18 | 
 19 |  
 20 | Getting Started
 21 | ---------------
 22 | Download or clone this repository to your machine and open it in MATLAB&reg;.
 23 | 
 24 | ### Download the pretrained network
 25 | Use the below helper to download the pretrained network.
 26 | 
 27 | ```
 28 | model = helper.downloadPretrainedDeepLabv3Plus;
 29 | net = model.net;
 30 | ```
 31 | 
 32 | Semantic Segmentation Using Pretrained DeepLabv3+
 33 | -------------------------------------------------
 34 | 
 35 | ```
 36 | % Read test image from images folder
 37 | image = imread('visionteam.jpg');
 38 | 
 39 | % Resize the image to the size used to train the network. 
 40 | % The image is resized such that smallest dimension is 513.
 41 | sz = size(image);
 42 | [~,k] = min(sz(1:2));
 43 | scale = 513/sz(k);
 44 | img  = imresize(image, scale, "bilinear");
 45 | 
 46 | % Use semanticseg function to generate segmentation map.
 47 | result = semanticseg(img, net);
 48 | 
 49 | % Generate the overlaid result using generated map.
 50 | overlay = labeloverlay(img , result, 'Transparency', 0.4);
 51 | 
 52 | % Visualize the input and the result.
 53 | overlay = imresize(overlay, sz(1:2), 'bilinear');
 54 | montage({image, overlay});
 55 | ```
 56 | Left-side image is the input and right-side image is the corresponding segmentation output.
 57 | 
 58 | ![alt text](images/result.png?raw=true)
 59 | 
 60 | 
 61 | Train Custom DeepLabv3+ Using Transfer Learning
 62 | -----------------------------------------------
 63 | Transfer learning enables you to adapt a pretrained DeepLabv3+ network to your dataset. Create a custom DeepLabv3+ network for transfer learning with a new set of classes using the `configureDeepLabv3PlusTransferLearn.m` script. For more information about training a DeepLabv3+ network, see [Semantic Segmentation Using Deep Learning](https://www.mathworks.com/help/vision/ug/semantic-segmentation-using-deep-learning.html)
 64 | 
 65 | 
 66 | Code Generation for DeepLabV3+
 67 | ------------------------------
 68 | Code generation enables you to generate code and deploy DeepLabv3+ on multiple embedded platforms.
 69 | 
 70 | Run `codegenDeepLabv3Plus.m`. This script calls the `deepLabv3Plus_predict.m` entry point function and generate CUDA code for it. It will run the generated MEX and gives output.
 71 | 
 72 | | Model | Inference Speed (FPS) | 
 73 | | ------ | ------ | 
 74 | | DeepLabv3Plus w/o codegen | 3.5265 |
 75 | | DeepLabv3Plus with codegen | 21.5526 |
 76 | 
 77 | - Performance (in FPS) is measured on a TITAN-RTX GPU using 513x513 image.
 78 | 
 79 | For more information about codegen, see [Deep Learning with GPU Coder](https://www.mathworks.com/help/gpucoder/gpucoder-deep-learning.html)
 80 | 
 81 | 
 82 | Accuracy
 83 | --------
 84 | Metrics are mIoU, global accuracy and mean accuracy computed over 2012 PASCAL VOC val data. 
 85 | 
 86 | | Model | mIoU | Global Accuracy | Mean Accuracy | Size (MB) | Classes |
 87 | | ------ | ------ | ------ | ------ | ------ | ------ |
 88 | | DeepLabv3Plus-VOC | 0.77299 | 0.94146 | 0.87279 | 209 | [voc class names](+helper/pascal-voc-classes.txt) |
 89 | 
 90 | - During computation of these metrics, val images are first resized such that the smaller dimension of the images are scaled to 513 because that matches the training preprocessing and then a center crop of size 513x513 is used for evaluation.
 91 | 
 92 | 
 93 | References
 94 | -----------
 95 | [1] Chen, Liang-Chieh, et al. "Encoder-decoder with atrous separable convolution for semantic image segmentation." Proceedings of the European conference on computer vision (ECCV). 2018.
 96 | 
 97 | [2] The PASCAL Visual Object Classes Challenge: A Retrospective Everingham, M., Eslami, S. M. A., Van Gool, L., Williams, C. K. I., Winn, J. and Zisserman, A. International Journal of Computer Vision, 111(1), 98-136, 2015.
 98 | 
 99 | 
100 | Copyright 2021 The MathWorks, Inc.
101 | 


--------------------------------------------------------------------------------
/SECURITY.md:
--------------------------------------------------------------------------------
1 | # Reporting Security Vulnerabilities 
2 | 
3 | If you believe you have discovered a security vulnerability, please report it to 
4 | [security@mathworks.com](mailto:security@mathworks.com). Please see 
5 | [MathWorks Vulnerability Disclosure Policy for Security Researchers](https://www.mathworks.com/company/aboutus/policies_statements/vulnerability-disclosure-policy.html) 
6 | for additional information.  
7 | 


--------------------------------------------------------------------------------
/codegenDeepLabv3Plus.m:
--------------------------------------------------------------------------------
 1 | %% Code generation For DeepLabv3+ Network
 2 | % The following script demonstrates how to perform code generation for a pretrained 
 3 | % DeepLabv3+ semantic segmentation network, trained on PASCAL VOC dataset.
 4 | 
 5 | %% Download the Pre-trained Network
 6 | helper.downloadPretrainedDeepLabv3Plus;
 7 | 
 8 | %% Preprocess the input image
 9 | % Read test image.
10 | image = imread('visionteam.jpg');
11 | 
12 | % Resize the image such that its smaller dimension is scaled to 513.
13 | sz = size(image);
14 | [~,k] = min(sz(1:2));
15 | scale = 513/sz(k);
16 | img  = imresize(image, scale, "bilinear");
17 | newSz = size(img);
18 | 
19 | %% Run MEX code generation
20 | % The deepLabv3Plus_predict.m is entry-point function that takes an input image
21 | % and gives output. The function uses a persistent object deepLabv3PlusObj to 
22 | % load the DAG network object and reuses the persistent object for prediction 
23 | % on subsequent calls.
24 | %
25 | % To generate CUDA code for the deepLabv3Plus_predict entry-point function, 
26 | % create a GPU code configuration object for a MEX target and set the 
27 | % target language to C++. 
28 | % 
29 | % Use the coder.DeepLearningConfig (GPU Coder) function to create a CuDNN 
30 | % deep learning configuration object and assign it to the DeepLearningConfig 
31 | % property of the GPU code configuration object. 
32 | % 
33 | % Run the codegen command and specify the input size. 
34 | cfg = coder.gpuConfig('mex');
35 | cfg.TargetLang = 'C++';
36 | cfg.DeepLearningConfig = coder.DeepLearningConfig('cudnn');
37 | codegen -config cfg deepLabv3Plus_predict -args {ones(newSz(1),newSz(2),3,'uint8')} -report
38 | 
39 | 
40 | %% Perform Semantic Segmentation Using Generated MEX 
41 | % Call deepLabv3Plus_predict_mex on the input image.
42 | predict_scores = deepLabv3Plus_predict_mex(img);
43 | 
44 | % The predict_scores variable is a three-dimensional matrix that has 21 channels 
45 | % corresponding to the pixel-wise prediction scores for every class. 
46 | % Compute the channel by using the maximum prediction score to get pixel-wise labels.
47 | [~,argmax] = max(predict_scores,[],3);
48 | 
49 | % Overlay the segmented labels.
50 | overlay = labeloverlay(img , argmax, 'Transparency', 0.4);
51 | 
52 | % Visualize the input and the result.
53 | overlay = imresize(overlay, sz(1:2), 'bilinear');
54 | montage({image, overlay});
55 | 
56 | 
57 | % Copyright 2021 The MathWorks, Inc.


--------------------------------------------------------------------------------
/configureDeepLabv3PlusTransferLearn.m:
--------------------------------------------------------------------------------
  1 | %% Configure Pretrained DeepLabv3+ Network for Transfer Learning
  2 | % The following code demonstrates configuring a pretrained 
  3 | % DeepLabv3+[1] network on the custom dataset.
  4 | 
  5 | %% Download Pretrained Model
  6 | model = helper.downloadPretrainedDeepLabv3Plus;
  7 | net = model.net;
  8 | 
  9 | %% Download CamVid Dataset
 10 | % This example uses the CamVid dataset[2] from the University of Cambridge for training. 
 11 | % This dataset is a collection of images containing street-level views obtained while 
 12 | % driving. The dataset provides pixel-level labels for 32 semantic classes including car, 
 13 | % pedestrian, and road.
 14 | %
 15 | % Download the CamVid dataset from the following URLs.
 16 | imageURL = 'http://web4.cs.ucl.ac.uk/staff/g.brostow/MotionSegRecData/files/701_StillsRaw_full.zip';
 17 | labelURL = 'http://web4.cs.ucl.ac.uk/staff/g.brostow/MotionSegRecData/data/LabeledApproved_full.zip';
 18 |  
 19 | outputFolder = fullfile(tempdir,'CamVid'); 
 20 | labelsZip = fullfile(outputFolder,'labels.zip');
 21 | imagesZip = fullfile(outputFolder,'images.zip');
 22 | 
 23 | if ~exist(labelsZip, 'file') || ~exist(imagesZip,'file')   
 24 |     mkdir(outputFolder)
 25 |        
 26 |     disp('Downloading 16 MB CamVid dataset labels...'); 
 27 |     websave(labelsZip, labelURL);
 28 |     unzip(labelsZip, fullfile(outputFolder,'labels'));
 29 |     
 30 |     disp('Downloading 557 MB CamVid dataset images...');  
 31 |     websave(imagesZip, imageURL);       
 32 |     unzip(imagesZip, fullfile(outputFolder,'images'));    
 33 | end
 34 | 
 35 | % Note: Download time of the data depends on your Internet connection. The commands 
 36 | % used above block MATLAB until the download is complete. Alternatively, you can 
 37 | % use your web browser to first download the dataset to your local disk. To use 
 38 | % the file you downloaded from the web, change the 'outputFolder' variable above 
 39 | % to the location of the downloaded file.
 40 | 
 41 | %% Load CamVid Images
 42 | imgDir = fullfile(outputFolder,'images','701_StillsRaw_full');
 43 | imds = imageDatastore(imgDir);
 44 | 
 45 | %% Load CamVid Pixel-Labeled Images
 46 | % To make training easier, the 32 original classes in CamVid are grouped into 
 47 | % 11 classes as follows. To reduce 32 classes into 11, multiple classes from the 
 48 | % original dataset are grouped together. For example, "Car" is a combination of 
 49 | % "Car", "SUVPickupTruck", "Truck_Bus", "Train", and "OtherMoving".
 50 | classes = [
 51 |     "Sky"
 52 |     "Building"
 53 |     "Pole"
 54 |     "Road"
 55 |     "Pavement"
 56 |     "Tree"
 57 |     "SignSymbol"
 58 |     "Fence"
 59 |     "Car"
 60 |     "Pedestrian"
 61 |     "Bicyclist"
 62 |     ];
 63 | 
 64 | % Return the grouped label IDs by using the helper function 'camvidPixelLabelIDs'.
 65 | labelIDs = helper.camvidPixelLabelIDs;
 66 | 
 67 | % Use the classes and label IDs to create the pixelLabelDatastore.
 68 | labelDir = fullfile(outputFolder,'labels');
 69 | pxds = pixelLabelDatastore(labelDir,classes,labelIDs);
 70 | 
 71 | %% Analyze Dataset Statistics
 72 | % To see the distribution of class labels in the CamVid dataset, use 'countEachLabel'. 
 73 | % This function counts the number of pixels by class label.
 74 | tbl = countEachLabel(pxds);
 75 | 
 76 | % Ideally, all classes would have an equal number of observations. However, 
 77 | % the classes in CamVid are imbalanced, which is a common issue in automotive 
 78 | % data-sets of street scenes. Such scenes have more sky, building, and road pixels 
 79 | % than pedestrian and bicyclist pixels because sky, buildings and roads cover 
 80 | % more area in the image. If not handled correctly, this imbalance can be detrimental 
 81 | % to the learning process because the learning is biased in favor of the dominant 
 82 | % classes. To handle this issue, class weighting has been used.
 83 | 
 84 | %% Prepare Training, Validation, and Test Sets
 85 | % Deeplabv3+ is trained using 60% of the images from the dataset. The rest 
 86 | % of the images are split evenly in 20% and 20% for validation and testing 
 87 | % respectively. The following code randomly splits the image and pixel label 
 88 | % data into a training, validation and test set.
 89 | [imdsTrain, imdsVal, imdsTest, pxdsTrain, pxdsVal, pxdsTest] = helper.partitionCamVidData(imds,pxds);
 90 | 
 91 | %% Configure Pretrained Network
 92 | % To configure the DeepLabv3+ network for transfer learning, you should replace 
 93 | % the last convolutional layer and pixelClassificationLayer in the layergraph 
 94 | % obtained from the pretrained model.
 95 | 
 96 | % Specify the number of classes.
 97 | numClasses = numel(classes);
 98 | 
 99 | % Extract the layergraph from the pretrained network to perform custom
100 | % modification.
101 | lgraph = layerGraph(net);
102 | 
103 | % Replace the last convolution layer in the pretrained network with the new 
104 | % convolution layer.
105 | convLayer = convolution2dLayer([1 1], numClasses,'Name', 'node_398');
106 | lgraph = replaceLayer(lgraph,"node_398",convLayer);
107 | 
108 | % Balance classes using class weighting.
109 | imageFreq = tbl.PixelCount ./ tbl.ImagePixelCount;
110 | classWeights = median(imageFreq) ./ imageFreq;
111 | 
112 | % Replace the pixel classification layer in the pretrained network with the classweights
113 | % and new pixel classification layer.
114 | pxLayer = pixelClassificationLayer('Name','labels','Classes',tbl.Name,'ClassWeights',classWeights);
115 | lgraph = replaceLayer(lgraph,"labels",pxLayer);
116 | 
117 | % Use analyzeNetwork to visualize the new network.
118 | analyzeNetwork(lgraph);
119 | 
120 | %% Data Augmentation
121 | % Data augmentation is used to improve network accuracy by randomly transforming 
122 | % the original data during training. By using data augmentation, you can add 
123 | % more variety to the training data without increasing the number of labeled 
124 | % training samples. 
125 | %
126 | % This pretrained model has input size of [513,513,3] and the CamVid images
127 | % are of size [720,960,3]. Hence, it would be better to use random patches 
128 | % of size [513,513,3] from the given input images for training.
129 | %
130 | % In this case, 'randomPatchExtractionDatastore' is useful for creating 
131 | % such training and validation datastores. 
132 | % 
133 | % To apply the same random transformation to both image and pixel label data 
134 | % use 'imageDataAugmenter' object in 'DataAugmentation' NVP during creating 
135 | % 'randomPatchExtractionDatastore' object. Here, random left/right reflection 
136 | % and random X/Y translation of +/- 10 pixels is used for data augmentation.
137 | xTrans = [-10 10];
138 | yTrans = [-10 10];
139 | 
140 | augmenter = imageDataAugmenter('RandXReflection',true, 'RandXTranslation',xTrans, 'RandYTranslation',yTrans);
141 | dsTrain = randomPatchExtractionDatastore(imdsTrain,pxdsTrain,[513 513],'PatchesPerImage',8, 'DataAugmentation', augmenter);
142 | 
143 | % Note that data augmentation is not applied to the test and validation data. 
144 | % Ideally, test and validation data should be representative of the original 
145 | % data and is left unmodified for unbiased evaluation.
146 | 
147 | %% Select Training Options
148 | % The optimization algorithm used for training is stochastic gradient descent 
149 | % with momentum (SGDM). Use trainingOptions to specify the hyper-parameters 
150 | % used for SGDM.
151 | 
152 | % Define validation datastore.
153 | dsVal = randomPatchExtractionDatastore(imdsVal,pxdsVal,[513 513],'PatchesPerImage',8);
154 | 
155 | % Define training options. 
156 | options = trainingOptions('sgdm', ...
157 |     'LearnRateSchedule','piecewise',...
158 |     'LearnRateDropPeriod',10,...
159 |     'LearnRateDropFactor',0.3,...
160 |     'Momentum',0.9, ...
161 |     'InitialLearnRate',1e-3, ...
162 |     'L2Regularization',0.005, ...
163 |     'ValidationData',dsVal,...
164 |     'MaxEpochs',6, ...  
165 |     'MiniBatchSize',16, ...
166 |     'Shuffle','every-epoch', ...
167 |     'CheckpointPath', tempdir, ...
168 |     'VerboseFrequency',2,...
169 |     'Plots','training-progress',...
170 |     'ValidationPatience', 4);
171 | 
172 | % The learning rate uses a piecewise schedule. The learning rate is reduced 
173 | % by a factor of 0.3 every 10 epochs. This allows the network to learn quickly 
174 | % with a higher initial learning rate, while being able to find a solution 
175 | % close to the local optimum once the learning rate drops.
176 | %
177 | % The network is tested against the validation data every epoch by setting 
178 | % the 'ValidationData' parameter. The 'ValidationPatience' is set to 4 to 
179 | % stop training early when the validation accuracy converges. This prevents 
180 | % the network from overfitting on the training dataset.
181 | %
182 | % A mini-batch size of 16 is used for training. You can increase or decrease 
183 | % this value based on the amount of GPU memory you have on your system.
184 | %
185 | % In addition, 'CheckpointPath' is set to a temporary location. This name-value 
186 | % pair enables the saving of network checkpoints at the end of every training 
187 | % epoch. If training is interrupted due to a system failure or power outage, 
188 | % you can resume training from the saved checkpoint. Make sure that the location 
189 | % specified by 'CheckpointPath' has enough space to store the network checkpoints.
190 | 
191 | 
192 | % Now, you can pass the 'dsTrain', 'lgraph' and 'options' to trainNetwork 
193 | % as shown in 'Start Training' section of the example 'Semantic Segmentation 
194 | % Using Deep Learning' to obtain deepLabv3+ model trained on the custom dataset.
195 | %
196 | % You can follow the sections 'Test Network on One Image' for inference using 
197 | % the trained model and 'Evaluate Trained Network' for evaluating metrics.
198 | 
199 | 
200 | %% References
201 | 
202 | % [1] Chen, Liang-Chieh et al. “Encoder-Decoder with Atrous Separable Convolution 
203 | % for Semantic Image Segmentation.” ECCV (2018).
204 | % 
205 | % [2] Brostow, G. J., J. Fauqueur, and R. Cipolla. "Semantic object classes 
206 | % in video: A high-definition ground truth database." Pattern Recognition Letters. 
207 | % Vol. 30, Issue 2, 2009, pp 88-97.
208 | % 
209 | % Copyright 2021 The MathWorks, Inc.


--------------------------------------------------------------------------------
/deepLabv3PlusSemanticSegmentationExample.m:
--------------------------------------------------------------------------------
 1 | %% Semantic Segmentation Using DeepLabv3+ Network
 2 | % The following code demonstrates running semantic segmentation on a pre-trained 
 3 | % DeepLabv3+ network, trained on PASCAL VOC dataset.
 4 | 
 5 | %% Prerequisites
 6 | % To run this example you need the following prerequisites - 
 7 | % # MATLAB (R2020a or later) with Computer Vision and Deep Learning Toolbox.
 8 | % # Pretrained DeepLabv3+ network (download instructions below).
 9 | 
10 | %% Download the Pre-trained Network
11 | model = helper.downloadPretrainedDeepLabv3Plus;
12 | net = model.net;
13 | 
14 | %% Perform Semantic Segmentation Using DeepLabv3+ Network
15 | % Read test image.
16 | image = imread('visionteam.jpg');
17 | 
18 | % Resize the image such that its smaller dimension is scaled to 513.
19 | sz = size(image);
20 | [~,k] = min(sz(1:2));
21 | scale = 513/sz(k);
22 | img  = imresize(image, scale, "bilinear");
23 | 
24 | % Use semanticseg function to generate segmentation map.
25 | result = semanticseg(img, net);
26 | 
27 | % Generate the overlaid result using generated map.
28 | overlay = labeloverlay(img , result, 'Transparency', 0.4);
29 | 
30 | % Visualize the input and the result.
31 | overlay = imresize(overlay, sz(1:2), 'bilinear');
32 | montage({image, overlay});
33 | 
34 | 
35 | % Copyright 2021 The MathWorks, Inc.


--------------------------------------------------------------------------------
/deepLabv3Plus_predict.m:
--------------------------------------------------------------------------------
 1 | function out = deepLabv3Plus_predict(in)
 2 | %#codegen
 3 | % Copyright 2021 The MathWorks, Inc.
 4 | 
 5 | persistent deepLabv3PlusObj;
 6 | 
 7 | if isempty(deepLabv3PlusObj)
 8 |     deepLabv3PlusObj = coder.loadDeepLearningNetwork('model/deepLabV3Plus-voc.mat');
 9 | end
10 | 
11 | % Pass input.
12 | netInputSize = [513,513,3];
13 | 
14 | if isequal(size(in), netInputSize)
15 |     out = predict(deepLabv3PlusObj,in);
16 | else
17 |     out = activations(deepLabv3PlusObj,in,'labels');
18 | end


--------------------------------------------------------------------------------
/images/result.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/matlab-deep-learning/pretrained-deeplabv3plus/f0aa293f9abe12768d0ffba84582653a431767c0/images/result.png


--------------------------------------------------------------------------------
/model/.gitkeep:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/test/tPretrainedDeeplebV3Plus.m:
--------------------------------------------------------------------------------
 1 | classdef(SharedTestFixtures = {DownloadDeeplabV3PlusFixture}) tPretrainedDeeplebV3Plus < matlab.unittest.TestCase
 2 |     % Test for tPretrainedDeeplebV3Plus
 3 |     
 4 |     % Copyright 2021 The MathWorks, Inc.
 5 |     
 6 |     % The shared test fixture downloads the model. Here we check the
 7 |     % inference on the pretrained model.
 8 |     properties        
 9 |         RepoRoot = getRepoRoot;
10 |         ModelName = 'deepLabV3Plus-voc.mat';
11 |     end
12 |     
13 |     methods(Test)
14 |         function exerciseDetection(test)            
15 |             model = load(fullfile(test.RepoRoot,'model',test.ModelName));
16 |             image = imread('visionteam.jpg');
17 |             sz = size(image);
18 |             [~,k] = min(sz(1:2));            
19 |             scale = 513/sz(k);
20 |             img  = imresize(image, scale, "bilinear");
21 |             imSize = size(img);
22 |             imSize = imSize(:,1:2);
23 |             actualLabel1Count = 252888;
24 |             actualLabel2Count = 257034;
25 |             
26 |             result = semanticseg(img, model.net);            
27 |             labelsCountTbl = countlabels(result(:));
28 |             labelCount = labelsCountTbl.Count(find(labelsCountTbl.Count));
29 |             
30 |             % verifying size of output from semanticseg.
31 |             test.verifyEqual(size(result),imSize);
32 |             % verifying that all the pixels are labelled.
33 |             test.verifyEqual(sum(labelCount),prod(imSize));
34 |             % verifying the count of each labels on the result.
35 |             test.verifyEqual(labelCount(1),actualLabel1Count);            
36 |             test.verifyEqual(labelCount(2),actualLabel2Count);
37 |         end       
38 |     end
39 | end


--------------------------------------------------------------------------------
/test/tdownloadPretrainedDeeplebV3Plus.m:
--------------------------------------------------------------------------------
 1 | classdef(SharedTestFixtures = {DownloadDeeplabV3PlusFixture}) tdownloadPretrainedDeeplebV3Plus < matlab.unittest.TestCase
 2 |     % Test for downloadPretrainedDeeplebV3Plus
 3 |     
 4 |     % Copyright 2021 The MathWorks, Inc.
 5 |     
 6 |     % The shared test fixture DownloadDeeplabV3PlusFixture calls
 7 |     % downloadPretrainedDeeplebV3Plus. Here we check that the downloaded files
 8 |     % exists in the appropriate location.
 9 |     
10 |     properties        
11 |         DataDir = fullfile(getRepoRoot(),'model');
12 |     end
13 |     
14 |     methods(Test)
15 |         function verifyDownloadedFilesExist(test)
16 |             dataFileName = 'deepLabV3Plus-voc.mat';
17 |             test.verifyTrue(isequal(exist(fullfile(test.DataDir,dataFileName),'file'),2));
18 |         end
19 |     end
20 | end
21 | 


--------------------------------------------------------------------------------
/test/tload.m:
--------------------------------------------------------------------------------
 1 | classdef(SharedTestFixtures = {DownloadDeeplabV3PlusFixture}) tload < matlab.unittest.TestCase
 2 |     % Test for loading the downloaded models.
 3 |     
 4 |     % Copyright 2021 The MathWorks, Inc.
 5 |     
 6 |     % The shared test fixture DownloadDeeplabV3PlusFixture calls
 7 |     % downloadPretrainedDeeplabV3Plus. Here we check that the properties of
 8 |     % downloaded models.
 9 |     
10 |     properties        
11 |         DataDir = fullfile(getRepoRoot(),'model');        
12 |     end
13 |     
14 |     methods(Test)
15 |         function verifyModelAndFields(test)
16 |             % Test point to verify the fields of the downloaded models are
17 |             % as expected.
18 |                                     
19 |             loadedModel = load(fullfile(test.DataDir,'deepLabV3Plus-voc.mat'));
20 |             
21 |             test.verifyClass(loadedModel.net,'DAGNetwork');
22 |             test.verifyEqual(numel(loadedModel.net.Layers),376);
23 |             test.verifyEqual(size(loadedModel.net.Connections),[416 2])
24 |             test.verifyEqual(loadedModel.net.InputNames,{'Input'});
25 |             test.verifyEqual(loadedModel.net.OutputNames,{'labels'});            
26 |         end        
27 |     end
28 | end


--------------------------------------------------------------------------------
/test/tools/DownloadDeeplabV3PlusFixture.m:
--------------------------------------------------------------------------------
 1 | classdef DownloadDeeplabV3PlusFixture < matlab.unittest.fixtures.Fixture
 2 |     % DownloadDeeplabFixture   A fixture for calling downloadPretrainedDeepLabV3Plus if
 3 |     % necessary. This is to ensure that this function is only called once
 4 |     % and only when tests need it. It also provides a teardown to return
 5 |     % the test environment to the expected state before testing.
 6 |     
 7 |     % Copyright 2021 The MathWorks, Inc
 8 |     
 9 |     properties(Constant)
10 |         DeeplabV3DataDir = fullfile(getRepoRoot(),'model')
11 |     end
12 |     
13 |     properties
14 |         DeeplabV3Exist (1,1) logical        
15 |     end
16 |     
17 |     methods
18 |         function setup(this)            
19 |             this.DeeplabV3Exist = exist(fullfile(this.DeeplabV3DataDir,'deepLabV3Plus-voc.mat'),'file')==2;
20 |             
21 |             % Call this in eval to capture and drop any standard output
22 |             % that we don't want polluting the test logs.
23 |             if ~this.DeeplabV3Exist
24 |             	evalc('helper.downloadPretrainedDeepLabv3Plus();');
25 |             end       
26 |         end
27 |         
28 |         function teardown(this)
29 |             if this.DeeplabV3Exist
30 |                 delete(fullfile(this.DeeplabV3DataDir,'deepLabV3Plus-voc.mat'));
31 |             end            
32 |         end
33 |     end
34 | end


--------------------------------------------------------------------------------
/test/tools/getRepoRoot.m:
--------------------------------------------------------------------------------
 1 | function path = getRepoRoot()
 2 | % getRepoRoot   Return a path to the repository's root directory.
 3 | 
 4 | % Copyright 2020 The MathWorks, Inc.
 5 | 
 6 | thisFile = mfilename('fullpath');
 7 | thisDir = fileparts(thisFile);
 8 | 
 9 | % the root is up two directories (<root>/test/tools/getRepoRoot.m)
10 | path = fullfile(thisDir,'..','..');
11 | end


--------------------------------------------------------------------------------