├── dataset
    ├── camvid
    │   ├── test
    │   │   └── .gitkeep
    │   ├── val
    │   │   └── .gitkeep
    │   ├── testannot
    │   │   └── .gitkeep
    │   ├── train
    │   │   └── .gitkeep
    │   ├── valannot
    │   │   └── .gitkeep
    │   ├── trainannot
    │   │   └── .gitkeep
    │   └── camvid_val_list.txt
    ├── cityscape_scripts
    │   ├── __init__.py
    │   ├── download_cityscapes.sh
    │   ├── print_utils.py
    │   └── generate_mappings.py
    ├── cityscapes
    │   ├── gtFine
    │   │   ├── val
    │   │   │   └── .gitkeep
    │   │   ├── test
    │   │   │   └── .gitkeep
    │   │   └── train
    │   │   │   └── .gitkeep
    │   ├── gtCoarse
    │   │   ├── train
    │   │   │   └── .gitkeep
    │   │   ├── val
    │   │   │   └── .gitkeep
    │   │   └── train_extra
    │   │   │   └── .gitkeep
    │   └── leftImg8bit
    │   │   ├── val
    │   │       └── .gitkeep
    │   │   ├── test
    │   │       └── .gitkeep
    │   │   └── train
    │   │       └── .gitkeep
    ├── __init__.py
    ├── inform
    │   ├── camvid_inform.pkl
    │   └── cityscapes_inform.pkl
    ├── create_dataset_list.py
    └── README.md
├── utils
    ├── losses
    │   ├── __init__.py
    │   ├── lovasz_losses.py
    │   └── loss.py
    ├── metric
    │   ├── __init__.py
    │   └── metric.py
    ├── scheduler
    │   ├── __init__.py
    │   └── lr_scheduler.py
    ├── optim
    │   ├── __init__.py
    │   ├── Lookahead.py
    │   ├── RAdam.py
    │   ├── AdamW.py
    │   └── Ranger.py
    ├── convert_state.py
    ├── debug.py
    ├── utils.py
    ├── activations.py
    └── colorize_mask.py
├── tools
    ├── flops_counter
    │   ├── ptflops
    │   │   └── __init__.py
    │   ├── CHANGELOG.md
    │   ├── setup.py
    │   ├── LICENSE
    │   ├── sample.py
    │   ├── .gitignore
    │   ├── ENet_Flops_test.py
    │   └── README.md
    ├── trainID2labelID.py
    └── fps_test
    │   └── eval_forward_time.py
├── docs
    ├── image-1.png
    ├── requirements.yml
    └── README.md
├── .gitignore
├── requirements.txt
├── LICENSE
├── model
    ├── ESPNet_v2
    │   ├── SegmentationModel.py
    │   └── cnn_utils.py
    ├── UNet.py
    ├── ERFNet.py
    ├── EDANet.py
    ├── DABNet.py
    ├── ESNet.py
    ├── SegNet.py
    ├── SQNet.py
    ├── ContextNet.py
    ├── LinkNet.py
    ├── FastSCNN.py
    ├── FPENet.py
    └── LEDNet.py
├── predict.py
├── test.py
└── README.md


/dataset/camvid/test/.gitkeep:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/dataset/camvid/val/.gitkeep:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/dataset/camvid/testannot/.gitkeep:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/dataset/camvid/train/.gitkeep:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/dataset/camvid/valannot/.gitkeep:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/dataset/camvid/trainannot/.gitkeep:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/dataset/cityscape_scripts/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/dataset/cityscapes/gtFine/val/.gitkeep:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/dataset/cityscapes/gtCoarse/train/.gitkeep:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/dataset/cityscapes/gtCoarse/val/.gitkeep:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/dataset/cityscapes/gtFine/test/.gitkeep:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/dataset/cityscapes/gtFine/train/.gitkeep:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/dataset/cityscapes/leftImg8bit/val/.gitkeep:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/dataset/cityscapes/gtCoarse/train_extra/.gitkeep:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/dataset/cityscapes/leftImg8bit/test/.gitkeep:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/dataset/cityscapes/leftImg8bit/train/.gitkeep:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/utils/losses/__init__.py:
--------------------------------------------------------------------------------
1 | from .loss import *
2 | 


--------------------------------------------------------------------------------
/utils/metric/__init__.py:
--------------------------------------------------------------------------------
1 | from .metric import *
2 | 


--------------------------------------------------------------------------------
/utils/scheduler/__init__.py:
--------------------------------------------------------------------------------
1 | from .lr_scheduler import *
2 | 


--------------------------------------------------------------------------------
/dataset/__init__.py:
--------------------------------------------------------------------------------
1 | from .camvid import *
2 | from .cityscapes import *
3 | 


--------------------------------------------------------------------------------
/tools/flops_counter/ptflops/__init__.py:
--------------------------------------------------------------------------------
1 | from .flops_counter import get_model_complexity_info
2 | 


--------------------------------------------------------------------------------
/docs/image-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xiaoyufenfei/Efficient-Segmentation-Networks/HEAD/docs/image-1.png


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | __pycache__/
2 | *.py[cod]
3 | *.pyc
4 | *.pyo
5 | *.pth
6 | .idea/
7 | result/
8 | server/
9 | checkpoint/


--------------------------------------------------------------------------------
/utils/optim/__init__.py:
--------------------------------------------------------------------------------
1 | from .RAdam import *
2 | from .AdamW import *
3 | from .Lookahead import *
4 | from .Ranger import *


--------------------------------------------------------------------------------
/dataset/inform/camvid_inform.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xiaoyufenfei/Efficient-Segmentation-Networks/HEAD/dataset/inform/camvid_inform.pkl


--------------------------------------------------------------------------------
/dataset/inform/cityscapes_inform.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xiaoyufenfei/Efficient-Segmentation-Networks/HEAD/dataset/inform/cityscapes_inform.pkl


--------------------------------------------------------------------------------
/docs/requirements.yml:
--------------------------------------------------------------------------------
 1 | name: seg_requirements
 2 | dependencies:
 3 | - python3
 4 | - cuda 9.0
 5 | - pip:
 6 |   - Image
 7 |   - tqdm
 8 |   - torch 1.1.0
 9 |   - torchvision 0.3.0
10 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | cycler==0.10.0
 2 | kiwisolver==1.1.0
 3 | matplotlib==3.1.1
 4 | numpy==1.15.0
 5 | Pillow>=6.2.2
 6 | pyparsing==2.4.2
 7 | python-dateutil==2.8.1
 8 | pytz==2018.4
 9 | six==1.12.0
10 | torch==1.1.0
11 | torchvision==0.3.0
12 | torchsummary==1.5.1
13 | 


--------------------------------------------------------------------------------
/tools/flops_counter/CHANGELOG.md:
--------------------------------------------------------------------------------
 1 | # ptflops versions log
 2 | 
 3 | ## v 0.3
 4 | - Add 1d operators: batch norm, poolings, convolution.
 5 | - Add ability to output extended report to any output stream.
 6 | 
 7 | ## v 0.2
 8 | - Add new operations: Conv3d, BatchNorm3d, MaxPool3d, AvgPool3d, ConvTranspose2d.
 9 | - Add some results on widespread models to the README.
10 | - Minor bugfixes.
11 | 
12 | ## v 0.1
13 | - Initial release with basic functionality
14 | 


--------------------------------------------------------------------------------
/utils/convert_state.py:
--------------------------------------------------------------------------------
 1 | from collections import OrderedDict
 2 | import os
 3 | import numpy as np
 4 | 
 5 | 
 6 | def convert_state_dict(state_dict):
 7 |     """
 8 |     Converts a state dict saved from a dataParallel module to normal module state_dict inplace
 9 |     Args:   
10 |         state_dict is the loaded DataParallel model_state
11 |     """
12 |     state_dict_new = OrderedDict()
13 |     # print(type(state_dict))
14 |     for k, v in state_dict.items():
15 |         # print(k)
16 |         name = k[7:]  # remove the prefix module.
17 |         # My heart is borken, the pytorch have no ability to do with the problem.
18 |         state_dict_new[name] = v
19 |     return state_dict_new
20 | 


--------------------------------------------------------------------------------
/utils/debug.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | import traceback
 3 | import pdb
 4 | 
 5 | """
 6 | This module is for debugging without modifying scripts.
 7 | 
 8 | By just adding `import debug` to a script which you want to debug,
 9 | automatically pdb debugger starts at the point exception raised.
10 | After launching debugger, `from IPython import embed; embed()` enables us to run IPython.
11 | """
12 | 
13 | 
14 | def info(exctype, value, tb):
15 |     # we are in interactive mode or we don't have a tty-like
16 |     # device, so we call the default hook
17 |     if hasattr(sys, 'ps1') or not sys.stderr.isatty():
18 |         sys.__excepthook__(exctype, value, tb)
19 |     else:
20 |         traceback.print_exception(exctype, value, tb)
21 |         pdb.post_mortem(tb)
22 | 
23 | 
24 | sys.excepthook = info
25 | 


--------------------------------------------------------------------------------
/tools/flops_counter/setup.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import shutil
 3 | import sys
 4 | from setuptools import setup, find_packages
 5 | 
 6 | readme = open('README.md').read()
 7 | 
 8 | VERSION = '0.3'
 9 | 
10 | requirements = [
11 |     'torch',
12 | ]
13 | 
14 | setup(
15 |     # Metadata
16 |     name='ptflops',
17 |     version=VERSION,
18 |     author='Vladislav Sovrasov',
19 |     author_email='sovrasov.vlad@gmail.com',
20 |     url='https://github.com/sovrasov/flops-counter.pytorch',
21 |     description='Flops counter for convolutional networks in pytorch framework',
22 |     long_description=readme,
23 |     long_description_content_type='text/markdown',
24 |     license='MIT',
25 | 
26 |     # Package info
27 |     packages=find_packages(exclude=('*test*',)),
28 | 
29 |     #
30 |     zip_safe=True,
31 |     install_requires=requirements,
32 | 
33 |     # Classifiers
34 |     classifiers=[
35 |         'Programming Language :: Python :: 3',
36 |     ],
37 | )
38 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2019 Yu Wang
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/tools/flops_counter/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2018 Vladislav Sovrasov
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/dataset/cityscape_scripts/download_cityscapes.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | global_path='../../../vision_datasets'
 3 | data_dir=$global_path'/cityscapes'
 4 | 
 5 | mkdir -p $data_dir
 6 | cd $data_dir
 7 | 
 8 | # enter user details
 9 | uname='' #
10 | pass=''
11 | 
12 | wget --keep-session-cookies --save-cookies=cookies.txt --post-data 'username='$uname'&password='$pass'&submit=Login' https://www.cityscapes-dataset.com/login/
13 | wget --load-cookies cookies.txt --content-disposition https://www.cityscapes-dataset.com/file-handling/?packageID=1
14 | wget --load-cookies cookies.txt --content-disposition https://www.cityscapes-dataset.com/file-handling/?packageID=3
15 | # Uncomment if you want to download coarse
16 | #wget --load-cookies cookies.txt --content-disposition https://www.cityscapes-dataset.com/file-handling/?packageID=4
17 | #wget --load-cookies cookies.txt --content-disposition https://www.cityscapes-dataset.com/file-handling/?packageID=2
18 | 
19 | 
20 | #unzip -q -o gtCoarse.zip
21 | unzip -q -o gtFine_trainvaltest.zip
22 | #unzip -q -o leftImg8bit_trainextra.zip
23 | unzip -q -o leftImg8bit_trainvaltest.zip
24 | 
25 | #rm -rf gtCoarse.zip
26 | rm -rf gtFine_trainvaltest.zip
27 | #rm -rf leftImg8bit_trainextra.zip
28 | rm -rf leftImg8bit_trainvaltest.zip


--------------------------------------------------------------------------------
/docs/README.md:
--------------------------------------------------------------------------------
 1 | ### Model & Backbone
 2 | 
 3 | |   Model    | Scratch | ResNet18 | ResNet50 | ResNet101 |
 4 | | :--------: | :-----: | :------: | :------: | :-------: |
 5 | |   SQNet    |    ✓    |          |          |           |
 6 | |  LinkNet   |    ✓    |          |          |           |
 7 | |   SegNet   |    ✓    |          |          |           |
 8 | |    UNet    |    ✓    |          |          |           |
 9 | |    ENet    |    ✓    |          |          |           |
10 | |   ERFNet   |    ✓    |          |          |           |
11 | |   CGNet    |    ✓    |          |          |           |
12 | |   EDANet   |    ✓    |          |          |           |
13 | |   ESPNet   |    ✓    |          |          |           |
14 | |   ESNet    |    ✓    |          |          |           |
15 | |  ESPNetv2  |    ✓    |          |          |           |
16 | |   LEDNet   |    ✓    |          |          |           |
17 | | ContextNet |    ✓    |          |          |           |
18 | | Fast-SCNN  |    ✓    |          |          |           |
19 | |   DABNet   |    ✓    |          |          |           |
20 | |   FSSNet   |    ✓    |          |          |           |
21 | |   FPENet   |    ✓    |          |          |           |
22 | |            |         |          |          |           |
23 | 
24 | 
25 | 
26 | 
27 | 
28 | 
29 | 
30 | 
31 | 
32 | 


--------------------------------------------------------------------------------
/dataset/create_dataset_list.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | import os
 3 | import glob
 4 | root_path=os.path.expanduser('./cityscapes')
 5 | image_path='leftImg8bit'
 6 | annotation_path='gtFine'
 7 | splits=['train','val','test']
 8 | 
 9 | #train glob images 2975
10 | #train glob annotations 2975
11 | #val glob images 500
12 | #val glob annotations 500
13 | #test glob images 1525
14 | #test glob annotations 1525
15 | 
16 | for split in splits:
17 |     glob_images=glob.glob(os.path.join(root_path,image_path,split,'*','*leftImg8bit.png'))
18 |     glob_annotations=glob.glob(os.path.join(root_path,annotation_path,split,'*','*labelTrainIds.png'))
19 |     print('%s glob images'%split,len(glob_images))
20 |     print('%s glob annotations'%split,len(glob_annotations))
21 |     
22 |     write_file=open('./cityscapes/cityscapes_'+split+'_list.txt','w')
23 |     for g_img in glob_images:
24 |         #img_p: eg leftImg8bit/val/frankfurt/frankfurt_000001_083852_leftImg8bit.png
25 |         #ann_p: eg gtFine/val/frankfurt/frankfurt_000001_083852_gtFine_labelTrainIds.png
26 |         img_p=g_img.replace(root_path+'/','')
27 |         #replace will not change img_p
28 |         ann_p=img_p.replace('leftImg8bit/','gtFine/').replace('leftImg8bit.png','gtFine_labelTrainIds.png')
29 |         assert os.path.join(root_path,img_p) in glob_images,'%s not exist'%img_p
30 |         assert os.path.join(root_path,ann_p) in glob_annotations,'%s not exist'%ann_p
31 |         write_file.write(img_p+' '+ann_p+'\n')
32 |     write_file.close()
33 | 


--------------------------------------------------------------------------------
/tools/flops_counter/sample.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | import argparse
 3 | 
 4 | import torchvision.models as models
 5 | import torch
 6 | 
 7 | from ptflops import get_model_complexity_info
 8 | 
 9 | pt_models = {'resnet18': models.resnet18, 'resnet50': models.resnet50,
10 |              'alexnet': models.alexnet,
11 |              'vgg16': models.vgg16,
12 |              'squeezenet': models.squeezenet1_0,
13 |              'densenet': models.densenet161,
14 |              'inception': models.inception_v3}
15 | 
16 | if __name__ == '__main__':
17 |     parser = argparse.ArgumentParser(description='ptflops sample script')
18 |     parser.add_argument('--device', type=int, default=0,
19 |                         help='Device to store the model.')
20 |     parser.add_argument('--model', choices=list(pt_models.keys()),
21 |                         type=str, default='resnet18')
22 |     parser.add_argument('--result', type=str, default=None)
23 |     args = parser.parse_args()
24 | 
25 |     if args.result is None:
26 |         ost = sys.stdout
27 |     else:
28 |         ost = open(args.result, 'w')
29 | 
30 |     with torch.cuda.device(args.device):
31 |         net = pt_models[args.model]().cuda()
32 | 
33 |         flops, params = get_model_complexity_info(net, (3, 224, 224),
34 |                                                   as_strings=True,
35 |                                                   print_per_layer_stat=True,
36 |                                                   ost=ost)
37 |         print('Flops: ' + flops)
38 |         print('Params: ' + params)
39 | 


--------------------------------------------------------------------------------
/dataset/README.md:
--------------------------------------------------------------------------------
 1 | # Supported datasets
 2 | 
 3 | - CamVid
 4 | - CityScapes
 5 | 
 6 | Note: When referring to the number of classes, the void/unlabeled class is excluded.
 7 | 
 8 | ## CamVid Dataset
 9 | 
10 | The Cambridge-driving Labeled Video Database (CamVid) is a collection of over ten minutes of high-quality 30Hz footage with object class semantic labels at 1Hz and in part, 15Hz. Each pixel is associated with one of 32 classes.
11 | 
12 | The CamVid dataset supported here is a 12 class version developed by the authors of SegNet. [Download link here](https://github.com/alexgkendall/SegNet-Tutorial/tree/master/CamVid). For actual training, an 11 class version is used - the "road marking" class is combined with the "road" class.
13 | 
14 | More detailed information about the CamVid dataset can be found [here](http://mi.eng.cam.ac.uk/research/projects/VideoRec/CamVid/) and on the [SegNet GitHub repository](https://github.com/alexgkendall/SegNet-Tutorial).
15 | 
16 | ## Cityscapes
17 | 
18 | Cityscapes is a set of stereo video sequences recorded in streets from 50 different cities with 34 different classes. There are 5000 images with fine annotations and 20000 images coarsely annotated.
19 | 
20 | The version supported here is the finely annotated one with 19 classes.
21 | 
22 | For more detailed information see the official [website](https://www.cityscapes-dataset.com/) and [repository](https://github.com/mcordts/cityscapesScripts).
23 | 
24 | The dataset can be downloaded from https://www.cityscapes-dataset.com/downloads/. At this time, a registration is required to download the data.


--------------------------------------------------------------------------------
/tools/trainID2labelID.py:
--------------------------------------------------------------------------------
 1 | # converting trainIDs to labelIDs for evaluating the test set segmenatation results of the cityscapes dataset
 2 | 
 3 | import numpy as np
 4 | import os
 5 | from PIL import Image
 6 | 
 7 | 
 8 | 
 9 | # index: trainId from 0 to 18, 19 semantic class   val: labelIDs
10 | cityscapes_trainIds2labelIds = np.array([7, 8, 11, 12, 13, 17, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 31, 32, 33],
11 |                                         dtype=np.uint8)
12 | 
13 | 
14 | def trainIDs2LabelID(trainID_png_dir, save_dir):
15 |     print('save_dir:  ', save_dir)
16 |     if not os.path.exists(save_dir):
17 |         os.makedirs(save_dir)
18 |     png_list = os.listdir(trainID_png_dir)
19 |     for index, png_filename in enumerate(png_list):
20 |         #
21 |         png_path = os.path.join(trainID_png_dir, png_filename)
22 |         # print(png_path)
23 |         print('processing(', index, '/', len(png_list), ') ....')
24 |         image = Image.open(png_path)  # image is a PIL #image
25 |         pngdata = np.array(image)
26 |         trainID = pngdata  # model prediction
27 |         row, col = pngdata.shape
28 |         labelID = np.zeros((row, col), dtype=np.uint8)
29 |         for i in range(row):
30 |             for j in range(col):
31 |                 labelID[i][j] = cityscapes_trainIds2labelIds[trainID[i][j]]
32 | 
33 |         res_path = os.path.join(save_dir, png_filename)
34 |         new_im = Image.fromarray(labelID)
35 |         new_im.save(res_path)
36 | 
37 | 
38 | if __name__ == '__main__':
39 |     trainID_png_dir = '../server/cityscapes/predict/ENet'
40 |     save_dir = '../server/cityscapes/predict/cityscapes_submit/'
41 |     trainIDs2LabelID(trainID_png_dir, save_dir)
42 | 


--------------------------------------------------------------------------------
/dataset/cityscape_scripts/print_utils.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import time
 3 | 
 4 | text_colors = {
 5 |                'logs': '\033[34m', # 033 is the escape code and 34 is the color code
 6 |                'info': '\033[32m',
 7 |                'warning': '\033[33m',
 8 |                'error': '\033[31m',
 9 |                'bold': '\033[1m',
10 |                'end_color': '\033[0m'
11 |                }
12 | 
13 | 
14 | def get_curr_time_stamp():
15 |     return time.strftime("%Y-%m-%d %H:%M:%S")
16 | 
17 | 
18 | def print_error_message(message):
19 |     time_stamp = get_curr_time_stamp()
20 |     error_str = text_colors['error'] + text_colors['bold'] + 'ERROR  ' + text_colors['end_color']
21 |     print('{} - {} - {}'.format(time_stamp, error_str, message))
22 |     print('{} - {} - {}'.format(time_stamp, error_str, 'Exiting!!!'))
23 |     exit(-1)
24 | 
25 | 
26 | def print_log_message(message):
27 |     time_stamp = get_curr_time_stamp()
28 |     log_str = text_colors['logs'] + text_colors['bold'] + 'LOGS   ' + text_colors['end_color']
29 |     print('{} - {} - {}'.format(time_stamp, log_str, message))
30 | 
31 | 
32 | def print_warning_message(message):
33 |     time_stamp = get_curr_time_stamp()
34 |     warn_str = text_colors['warning'] + text_colors['bold'] + 'WARNING' + text_colors['end_color']
35 |     print('{} - {} - {}'.format(time_stamp, warn_str, message))
36 | 
37 | 
38 | def print_info_message(message):
39 |     time_stamp = get_curr_time_stamp()
40 |     info_str = text_colors['info'] + text_colors['bold'] + 'INFO   ' + text_colors['end_color']
41 |     print('{} - {} - {}'.format(time_stamp, info_str, message))
42 | 
43 | 
44 | if __name__ == '__main__':
45 |     print_log_message('Testing')
46 |     print_warning_message('Testing')
47 |     print_info_message('Testing')
48 |     print_error_message('Testing')


--------------------------------------------------------------------------------
/tools/flops_counter/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | *.egg-info/
 24 | .installed.cfg
 25 | *.egg
 26 | MANIFEST
 27 | 
 28 | # PyInstaller
 29 | #  Usually these files are written by a python script from a template
 30 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 31 | *.manifest
 32 | *.spec
 33 | 
 34 | # Installer logs
 35 | pip-log.txt
 36 | pip-delete-this-directory.txt
 37 | 
 38 | # Unit test / coverage reports
 39 | htmlcov/
 40 | .tox/
 41 | .coverage
 42 | .coverage.*
 43 | .cache
 44 | nosetests.xml
 45 | coverage.xml
 46 | *.cover
 47 | .hypothesis/
 48 | .pytest_cache/
 49 | 
 50 | # Translations
 51 | *.mo
 52 | *.pot
 53 | 
 54 | # Django stuff:
 55 | *.log
 56 | local_settings.py
 57 | db.sqlite3
 58 | 
 59 | # Flask stuff:
 60 | instance/
 61 | .webassets-cache
 62 | 
 63 | # Scrapy stuff:
 64 | .scrapy
 65 | 
 66 | # Sphinx documentation
 67 | docs/_build/
 68 | 
 69 | # PyBuilder
 70 | target/
 71 | 
 72 | # Jupyter Notebook
 73 | .ipynb_checkpoints
 74 | 
 75 | # pyenv
 76 | .python-version
 77 | 
 78 | # celery beat schedule file
 79 | celerybeat-schedule
 80 | 
 81 | # SageMath parsed files
 82 | *.sage.py
 83 | 
 84 | # Environments
 85 | .env
 86 | .venv
 87 | env/
 88 | venv/
 89 | ENV/
 90 | env.bak/
 91 | venv.bak/
 92 | 
 93 | # Spyder project settings
 94 | .spyderproject
 95 | .spyproject
 96 | 
 97 | # Rope project settings
 98 | .ropeproject
 99 | 
100 | # mkdocs documentation
101 | /site
102 | 
103 | # mypy
104 | .mypy_cache/
105 | 


--------------------------------------------------------------------------------
/tools/fps_test/eval_forward_time.py:
--------------------------------------------------------------------------------
 1 | import time
 2 | import torch
 3 | import torch.backends.cudnn as cudnn
 4 | 
 5 | from argparse import ArgumentParser
 6 | from builders.model_builder import build_model
 7 | 
 8 | 
 9 | def compute_speed(model, input_size, device, iteration=100):
10 |     torch.cuda.set_device(device)
11 |     cudnn.benchmark = True
12 | 
13 |     model.eval()
14 |     model = model.cuda()
15 | 
16 |     input = torch.randn(*input_size, device=device)
17 | 
18 |     for _ in range(50):
19 |         model(input)
20 | 
21 |     print('=========Eval Forward Time=========')
22 |     torch.cuda.synchronize()
23 |     t_start = time.time()
24 |     for _ in range(iteration):
25 |         model(input)
26 |     torch.cuda.synchronize()
27 |     elapsed_time = time.time() - t_start
28 | 
29 |     speed_time = elapsed_time / iteration * 1000
30 |     fps = iteration / elapsed_time
31 | 
32 |     print('Elapsed Time: [%.2f s / %d iter]' % (elapsed_time, iteration))
33 |     print('Speed Time: %.2f ms / iter   FPS: %.2f' % (speed_time, fps))
34 |     return speed_time, fps
35 | 
36 | 
37 | if __name__ == '__main__':
38 |     parser = ArgumentParser()
39 | 
40 |     parser.add_argument("--size", type=str, default="512,1024", help="input size of model")
41 |     parser.add_argument('--num-channels', type=int, default=3)
42 |     parser.add_argument('--batch-size', type=int, default=1)
43 |     parser.add_argument('--classes', type=int, default=19)
44 |     parser.add_argument('--iter', type=int, default=100)
45 |     parser.add_argument('--model', type=str, default='ENet')
46 |     parser.add_argument("--gpus", type=str, default="0", help="gpu ids (default: 0)")
47 |     args = parser.parse_args()
48 | 
49 |     h, w = map(int, args.size.split(','))
50 |     model = build_model(args.model, num_classes=args.classes)
51 |     compute_speed(model, (args.batch_size, args.num_channels, h, w), int(args.gpus), iteration=args.iter)
52 | 


--------------------------------------------------------------------------------
/tools/flops_counter/ENet_Flops_test.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | import argparse
 3 | import torch
 4 | 
 5 | 
 6 | from model.ENet import ENet
 7 | from model.ERFNet import ERFNet
 8 | from model.CGNet import CGNet
 9 | from model.EDANet import EDANet
10 | from model.ESNet import ESNet
11 | from model.ESPNet import ESPNet
12 | from model.LEDNet import LEDNet
13 | from model.ESPNet_v2.SegmentationModel import EESPNet_Seg
14 | from model.FastSCNN import FastSCNN
15 | from model.DABNet import DABNet
16 | from model.FPENet import FPENet
17 | 
18 | 
19 | 
20 | 
21 | 
22 | 
23 | from tools.flops_counter.ptflops import get_model_complexity_info
24 | 
25 | 
26 | 
27 | pt_models = {
28 | 
29 |     'ENet': ENet,
30 |     'ERFNet': ERFNet,
31 |     'CGNet': CGNet,
32 |     'EDANet': EDANet,
33 |     'ESNet': ESNet,
34 |     'ESPNet': ESPNet,
35 |     'LEDNet': LEDNet,
36 |     'EESPNet_Seg': EESPNet_Seg,
37 |     'FastSCNN': FastSCNN,
38 |     'DABNet': DABNet,
39 |     'FPENet': FPENet
40 |     }
41 | 
42 | if __name__ == '__main__':
43 |     parser = argparse.ArgumentParser(description='ptflops sample script')
44 |     parser.add_argument('--device', type=int, default=0,
45 |                         help='Device to store the model.')
46 |     parser.add_argument('--model', choices=list(pt_models.keys()),
47 |                         type=str, default='ENet')
48 |     parser.add_argument('--result', type=str, default=None)
49 |     args = parser.parse_args()
50 | 
51 |     if args.result is None:
52 |         ost = sys.stdout
53 |     else:
54 |         ost = open(args.result, 'w')
55 | 
56 |     with torch.cuda.device(args.device):
57 |         net = pt_models[args.model](classes=19).cuda()
58 | 
59 |         flops, params = get_model_complexity_info(net, (3, 512, 1024),
60 |                                                   as_strings=True,
61 |                                                   print_per_layer_stat=True,
62 |                                                   ost=ost)
63 |         print('Flops: ' + flops)
64 |         print('Params: ' + params)


--------------------------------------------------------------------------------
/utils/utils.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import random
 3 | import numpy as np
 4 | from PIL import Image
 5 | import torch
 6 | import torch.nn as nn
 7 | from utils.colorize_mask import cityscapes_colorize_mask, camvid_colorize_mask
 8 | 
 9 | 
10 | def __init_weight(feature, conv_init, norm_layer, bn_eps, bn_momentum,
11 |                   **kwargs):
12 |     for name, m in feature.named_modules():
13 |         if isinstance(m, (nn.Conv2d, nn.Conv3d)):
14 |             conv_init(m.weight, **kwargs)
15 |         elif isinstance(m, norm_layer):
16 |             m.eps = bn_eps
17 |             m.momentum = bn_momentum
18 |             nn.init.constant_(m.weight, 1)
19 |             nn.init.constant_(m.bias, 0)
20 | 
21 | 
22 | def init_weight(module_list, conv_init, norm_layer, bn_eps, bn_momentum,
23 |                 **kwargs):
24 |     if isinstance(module_list, list):
25 |         for feature in module_list:
26 |             __init_weight(feature, conv_init, norm_layer, bn_eps, bn_momentum,
27 |                           **kwargs)
28 |     else:
29 |         __init_weight(module_list, conv_init, norm_layer, bn_eps, bn_momentum,
30 |                       **kwargs)
31 | 
32 | 
33 | def setup_seed(seed):
34 |     torch.manual_seed(seed)
35 |     torch.cuda.manual_seed_all(seed)
36 |     np.random.seed(seed)
37 |     random.seed(seed)
38 |     torch.backends.cudnn.deterministic = True
39 | 
40 | 
41 | def save_predict(output, gt, img_name, dataset, save_path, output_grey=False, output_color=True, gt_color=False):
42 |     if output_grey:
43 |         output_grey = Image.fromarray(output)
44 |         output_grey.save(os.path.join(save_path, img_name + '.png'))
45 | 
46 |     if output_color:
47 |         if dataset == 'cityscapes':
48 |             output_color = cityscapes_colorize_mask(output)
49 |         elif dataset == 'camvid':
50 |             output_color = camvid_colorize_mask(output)
51 | 
52 |         output_color.save(os.path.join(save_path, img_name + '_color.png'))
53 | 
54 |     if gt_color:
55 |         if dataset == 'cityscapes':
56 |             gt_color = cityscapes_colorize_mask(gt)
57 |         elif dataset == 'camvid':
58 |             gt_color = camvid_colorize_mask(gt)
59 | 
60 |         gt_color.save(os.path.join(save_path, img_name + '_gt.png'))
61 | 
62 | 
63 | def netParams(model):
64 |     """
65 |     computing total network parameters
66 |     args:
67 |        model: model
68 |     return: the number of parameters
69 |     """
70 |     total_paramters = 0
71 |     for parameter in model.parameters():
72 |         i = len(parameter.size())
73 |         p = 1
74 |         for j in range(i):
75 |             p *= parameter.size(j)
76 |         total_paramters += p
77 | 
78 |     return total_paramters
79 | 


--------------------------------------------------------------------------------
/utils/activations.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.nn.functional as F
  4 | import math
  5 | 
  6 | 
  7 | 
  8 | 
  9 | 
 10 | '''
 11 | Script provides functional interface for Mish activation function.
 12 | Mish - "Mish: A Self Regularized Non-Monotonic Neural Activation Function"
 13 | https://arxiv.org/abs/1908.08681v1
 14 | '''
 15 | class Mish(nn.Module):
 16 |     def __init__(self):
 17 |         super().__init__()
 18 |         # print("Mish activation loaded...")
 19 | 
 20 |     def forward(self, x):
 21 |         return x *( torch.tanh(F.softplus(x)))
 22 | 
 23 | 
 24 | 
 25 | class BetaMish(nn.Module):
 26 |     def __init__(self):
 27 |         super().__init__()
 28 | 
 29 |     def forward(self, x):
 30 |         beta=1.5
 31 |         return x * torch.tanh(torch.log(torch.pow((1+torch.exp(x)),beta)))
 32 | 
 33 | 
 34 | '''
 35 | Swish - https://arxiv.org/pdf/1710.05941v1.pdf
 36 | '''
 37 | class Swish(nn.Module):
 38 |     def __init__(self):
 39 |         super(Swish, self).__init__()
 40 | 
 41 |         self.sigmoid = nn.Sigmoid()
 42 | 
 43 |     def forward(self, x):
 44 |         return x * self.sigmoid(x)
 45 | 
 46 | 
 47 | class Hswish(nn.Module):
 48 |     def __init__(self, inplace=True):
 49 |         super(Hswish, self).__init__()
 50 |         self.inplace = inplace
 51 | 
 52 |     def forward(self, x):
 53 |         return x * F.relu6(x + 3., inplace=self.inplace) / 6.
 54 | 
 55 | 
 56 | 
 57 | class Hsigmoid(nn.Module):
 58 |     def __init__(self, inplace=True):
 59 |         super(Hsigmoid, self).__init__()
 60 |         self.inplace = inplace
 61 | 
 62 |     def forward(self, x):
 63 |         return F.relu6(x + 3., inplace=self.inplace) / 6.
 64 | 
 65 | 
 66 | class SEModule(nn.Module):
 67 |     def __init__(self, channel, act, reduction=4):
 68 |         super(SEModule, self).__init__()
 69 |         self.avg_pool = nn.AdaptiveAvgPool2d(1)
 70 |         self.conv = nn.Sequential(
 71 |             nn.Conv2d(channel, channel // reduction, 1, 1, 0, bias=True),
 72 |             act
 73 |         )
 74 |         self.fc = nn.Sequential(
 75 |             nn.Conv2d(channel // reduction, channel, 1, 1, 0, bias=True),
 76 |             Hsigmoid()
 77 |         )
 78 | 
 79 |     def forward(self, x):
 80 |         y = self.avg_pool(x)
 81 |         y = self.conv(y)
 82 |         y = self.fc(y)
 83 |         return torch.mul(x, y)
 84 | 
 85 | 
 86 | 
 87 | 
 88 | 
 89 | 
 90 | NON_LINEARITY = {
 91 |     'ReLU': nn.ReLU(inplace=True),
 92 |     'PReLU': nn.PReLU(),
 93 |     'ReLu6': nn.ReLU6(inplace=True),
 94 |     'Mish': Mish(),
 95 |     'BetaMish': BetaMish(),
 96 |     'Swish': Swish(),
 97 |     'Hswish': Hswish(),
 98 |     'tanh': nn.Tanh(),
 99 |     'sigmoid': nn.Sigmoid()
100 | }


--------------------------------------------------------------------------------
/utils/optim/Lookahead.py:
--------------------------------------------------------------------------------
 1 | from collections import defaultdict
 2 | from itertools import chain
 3 | from torch.optim import Optimizer
 4 | import torch
 5 | import warnings
 6 | 
 7 | class Lookahead(Optimizer):
 8 |     def __init__(self, optimizer, k=5, alpha=0.5):
 9 |         self.optimizer = optimizer
10 |         self.k = k
11 |         self.alpha = alpha
12 |         self.param_groups = self.optimizer.param_groups
13 |         self.state = defaultdict(dict)
14 |         self.fast_state = self.optimizer.state
15 |         for group in self.param_groups:
16 |             group["counter"] = 0
17 |     
18 |     def update(self, group):
19 |         for fast in group["params"]:
20 |             param_state = self.state[fast]
21 |             if "slow_param" not in param_state:
22 |                 param_state["slow_param"] = torch.zeros_like(fast.data)
23 |                 param_state["slow_param"].copy_(fast.data)
24 |             slow = param_state["slow_param"]
25 |             slow += (fast.data - slow) * self.alpha
26 |             fast.data.copy_(slow)
27 |     
28 |     def update_lookahead(self):
29 |         for group in self.param_groups:
30 |             self.update(group)
31 | 
32 |     def step(self, closure=None):
33 |         loss = self.optimizer.step(closure)
34 |         for group in self.param_groups:
35 |             if group["counter"] == 0:
36 |                 self.update(group)
37 |             group["counter"] += 1
38 |             if group["counter"] >= self.k:
39 |                 group["counter"] = 0
40 |         return loss
41 | 
42 |     def state_dict(self):
43 |         fast_state_dict = self.optimizer.state_dict()
44 |         slow_state = {
45 |             (id(k) if isinstance(k, torch.Tensor) else k): v
46 |             for k, v in self.state.items()
47 |         }
48 |         fast_state = fast_state_dict["state"]
49 |         param_groups = fast_state_dict["param_groups"]
50 |         return {
51 |             "fast_state": fast_state,
52 |             "slow_state": slow_state,
53 |             "param_groups": param_groups,
54 |         }
55 | 
56 |     def load_state_dict(self, state_dict):
57 |         slow_state_dict = {
58 |             "state": state_dict["slow_state"],
59 |             "param_groups": state_dict["param_groups"],
60 |         }
61 |         fast_state_dict = {
62 |             "state": state_dict["fast_state"],
63 |             "param_groups": state_dict["param_groups"],
64 |         }
65 |         super(Lookahead, self).load_state_dict(slow_state_dict)
66 |         self.optimizer.load_state_dict(fast_state_dict)
67 |         self.fast_state = self.optimizer.state
68 | 
69 |     def add_param_group(self, param_group):
70 |         param_group["counter"] = 0
71 |         self.optimizer.add_param_group(param_group)


--------------------------------------------------------------------------------
/utils/colorize_mask.py:
--------------------------------------------------------------------------------
 1 | from PIL import Image
 2 | import torch
 3 | import numpy as np
 4 | 
 5 | cityscapes_palette = [128, 64, 128, 244, 35, 232, 70, 70, 70, 102, 102, 156, 190, 153, 153, 153, 153, 153, 250, 170, 30,
 6 |                       220, 220, 0, 107, 142, 35, 152, 251, 152, 70, 130, 180, 220, 20, 60, 255, 0, 0, 0, 0, 142, 0, 0,
 7 |                       70,
 8 |                       0, 60, 100, 0, 80, 100, 0, 0, 230, 119, 11, 32]
 9 | 
10 | camvid_palette = [128, 128, 128, 128, 0, 0, 192, 192, 128, 128, 64, 128, 60, 40, 222, 128, 128, 0, 192, 128, 128, 64,
11 |                   64,
12 |                   128, 64, 0, 128, 64, 64, 0, 0, 128, 192]
13 | 
14 | zero_pad = 256 * 3 - len(cityscapes_palette)
15 | for i in range(zero_pad):
16 |     cityscapes_palette.append(0)
17 | 
18 | 
19 | # zero_pad = 256 * 3 - len(camvid_palette)
20 | # for i in range(zero_pad):
21 | #     camvid_palette.append(0)
22 | 
23 | def cityscapes_colorize_mask(mask):
24 |     # mask: numpy array of the mask
25 |     new_mask = Image.fromarray(mask.astype(np.uint8)).convert('P')
26 |     new_mask.putpalette(cityscapes_palette)
27 | 
28 |     return new_mask
29 | 
30 | 
31 | def camvid_colorize_mask(mask):
32 |     # mask: numpy array of the mask
33 |     new_mask = Image.fromarray(mask.astype(np.uint8)).convert('P')
34 |     new_mask.putpalette(camvid_palette)
35 | 
36 |     return new_mask
37 | 
38 | 
39 | class VOCColorize(object):
40 |     def __init__(self, n=22):
41 |         self.cmap = voc_color_map(22)
42 |         self.cmap = torch.from_numpy(self.cmap[:n])
43 | 
44 |     def __call__(self, gray_image):
45 |         size = gray_image.shape
46 |         color_image = np.zeros((3, size[0], size[1]), dtype=np.uint8)
47 | 
48 |         for label in range(0, len(self.cmap)):
49 |             mask = (label == gray_image)
50 |             color_image[0][mask] = self.cmap[label][0]
51 |             color_image[1][mask] = self.cmap[label][1]
52 |             color_image[2][mask] = self.cmap[label][2]
53 | 
54 |         # handle void
55 |         mask = (255 == gray_image)
56 |         color_image[0][mask] = color_image[1][mask] = color_image[2][mask] = 255
57 | 
58 |         return color_image
59 | 
60 | 
61 | def voc_color_map(N=256, normalized=False):
62 |     def bitget(byteval, idx):
63 |         return ((byteval & (1 << idx)) != 0)
64 | 
65 |     dtype = 'float32' if normalized else 'uint8'
66 |     cmap = np.zeros((N, 3), dtype=dtype)
67 |     for i in range(N):
68 |         r = g = b = 0
69 |         c = i
70 |         for j in range(8):
71 |             r = r | (bitget(c, 0) << 7 - j)
72 |             g = g | (bitget(c, 1) << 7 - j)
73 |             b = b | (bitget(c, 2) << 7 - j)
74 |             c = c >> 3
75 | 
76 |         cmap[i] = np.array([r, g, b])
77 | 
78 |     cmap = cmap / 255 if normalized else cmap
79 |     return cmap
80 | 


--------------------------------------------------------------------------------
/dataset/cityscape_scripts/generate_mappings.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import glob
 3 | import os
 4 | from utilities.print_utils import *
 5 | 
 6 | def get_mappings(root_dir, files, annot_name):
 7 |     pairs = []
 8 |     for f in files:
 9 |         f = f.replace(root_dir, '/')
10 |         img_f = f.replace(annot_name, 'leftImg8bit')
11 |         img_f = img_f.replace('_labelTrainIds.png', '.png')
12 |         if not os.path.isfile(root_dir + img_f):
13 |             print_error_message('{} file does not exist. Please check'.format(root_dir + img_f))
14 |             exit()
15 |         line = img_f + ','  + f
16 |         pairs.append(line)
17 |     return pairs
18 | 
19 | def main(cityscapesPath, split):
20 |     searchFine = os.path.join(cityscapesPath, "gtFine", split, "*", '*_labelTrainIds.png')
21 |     filesFine = glob.glob(searchFine)
22 |     filesFine.sort()
23 | 
24 |     if not filesFine:
25 |         print_warning_message("Did not find any files. Please check root directory: {}.".format(cityscapesPath))
26 |         fine_pairs = []
27 |     else:
28 |         print_info_message('{} files found for {} split'.format(len(filesFine), split))
29 |         fine_pairs = get_mappings(cityscapesPath, filesFine, 'gtFine')
30 | 
31 |     if not fine_pairs:
32 |         print_error_message('No pair exist. Exiting')
33 |         exit()
34 |     else:
35 |         print_info_message('Creating train and val files.')
36 |     f_name = split + '.txt'
37 |     with open(os.path.join(cityscapesPath, f_name), 'w') as txtFile:
38 |         for pair in fine_pairs:
39 |             txtFile.write(pair + '\n')
40 |     print_info_message('{} created in {} with {} pairs'.format(f_name, cityscapesPath, len(fine_pairs)))
41 | 
42 |     if split == 'train':
43 |         split_orig = split
44 |         split = split + '_extra'
45 |         searchCoarse = os.path.join(cityscapesPath, "gtCoarse", split, "*", '*_labelTrainIds.png')
46 |         filesCoarse = glob.glob(searchCoarse)
47 |         filesCoarse.sort()
48 |         if not filesCoarse:
49 |             print_warning_message("Did not find any files. Please check root directory: {}.".format(cityscapesPath))
50 |             course_pairs = []
51 |         else:
52 |             print_info_message('{} files found for {} split'.format(len(filesCoarse), split))
53 |             course_pairs = get_mappings(cityscapesPath, filesCoarse, 'gtCoarse')
54 |         if not course_pairs:
55 |             print_warning_message('No pair exist for coarse data')
56 |             return
57 |         else:
58 |             print_info_message('Creating train and val files.')
59 |         f_name = split_orig + '_coarse.txt'
60 |         with open(os.path.join(cityscapesPath, f_name), 'w') as txtFile:
61 |             for pair in course_pairs:
62 |                 txtFile.write(pair + '\n')
63 |         print_info_message('{} created in {} with {} pairs'.format(f_name, cityscapesPath, len(course_pairs)))
64 | 
65 | if __name__ == '__main__':
66 |     cityscapes_path = '../../../vision_datasets/cityscapes/'
67 |     main(cityscapes_path, "train")
68 |     main(cityscapes_path, "val")


--------------------------------------------------------------------------------
/utils/metric/metric.py:
--------------------------------------------------------------------------------
  1 | import os, sys
  2 | import cv2
  3 | import numpy as np
  4 | 
  5 | from multiprocessing import Pool
  6 | # import copy_reg
  7 | import copyreg
  8 | import types
  9 | 
 10 | 
 11 | def _pickle_method(m):
 12 |     if m.im_self is None:
 13 |         return getattr, (m.im_class, m.im_func.func_name)
 14 |     else:
 15 |         return getattr, (m.im_self, m.im_func.func_name)
 16 | 
 17 | 
 18 | copyreg.pickle(types.MethodType, _pickle_method)
 19 | 
 20 | 
 21 | class ConfusionMatrix(object):
 22 | 
 23 |     def __init__(self, nclass, classes=None, ignore_label=255):
 24 |         self.nclass = nclass
 25 |         self.classes = classes
 26 |         self.M = np.zeros((nclass, nclass))
 27 |         self.ignore_label = ignore_label
 28 | 
 29 |     def add(self, gt, pred):
 30 |         assert (np.max(pred) <= self.nclass)
 31 |         assert (len(gt) == len(pred))
 32 |         for i in range(len(gt)):
 33 |             if not gt[i] == self.ignore_label:
 34 |                 self.M[gt[i], pred[i]] += 1.0
 35 | 
 36 |     def addM(self, matrix):
 37 |         assert (matrix.shape == self.M.shape)
 38 |         self.M += matrix
 39 | 
 40 |     def __str__(self):
 41 |         pass
 42 | 
 43 |     # Pii为预测正确的数量，Pij和Pji分别被解释为假正和假负，尽管两者都是假正与假负之和
 44 |     def recall(self):  # 预测为正确的像素中确认为正确像素的个数
 45 |         recall = 0.0
 46 |         for i in range(self.nclass):
 47 |             recall += self.M[i, i] / np.sum(self.M[:, i])
 48 | 
 49 |         return recall / self.nclass
 50 | 
 51 |     def accuracy(self):  # 分割正确的像素除以总像素
 52 |         accuracy = 0.0
 53 |         for i in range(self.nclass):
 54 |             accuracy += self.M[i, i] / np.sum(self.M[i, :])
 55 | 
 56 |         return accuracy / self.nclass
 57 | 
 58 |     # 雅卡尔指数，又称为交并比（IOU）
 59 |     def jaccard(self):
 60 |         jaccard = 0.0
 61 |         jaccard_perclass = []
 62 |         for i in range(self.nclass):
 63 |             if not self.M[i, i] == 0:
 64 |                 jaccard_perclass.append(self.M[i, i] / (np.sum(self.M[i, :]) + np.sum(self.M[:, i]) - self.M[i, i]))
 65 | 
 66 |         return np.sum(jaccard_perclass) / len(jaccard_perclass), jaccard_perclass, self.M
 67 | 
 68 |     def generateM(self, item):
 69 |         gt, pred = item
 70 |         m = np.zeros((self.nclass, self.nclass))
 71 |         assert (len(gt) == len(pred))
 72 |         for i in range(len(gt)):
 73 |             if gt[i] < self.nclass:  # and pred[i] < self.nclass:
 74 |                 m[gt[i], pred[i]] += 1.0
 75 |         return m
 76 | 
 77 | 
 78 | def get_iou(data_list, class_num, save_path=None):
 79 |     """ 
 80 |     Args:
 81 |       data_list: a list, its elements [gt, output]
 82 |       class_num: the number of label
 83 |     """
 84 |     from multiprocessing import Pool
 85 | 
 86 |     ConfM = ConfusionMatrix(class_num)
 87 |     f = ConfM.generateM
 88 |     pool = Pool()
 89 |     m_list = pool.map(f, data_list)
 90 |     pool.close()
 91 |     pool.join()
 92 | 
 93 |     for m in m_list:
 94 |         ConfM.addM(m)
 95 | 
 96 |     aveJ, j_list, M = ConfM.jaccard()
 97 |     # print(j_list)
 98 |     # print(M)
 99 |     # print('meanIOU: ' + str(aveJ) + '\n')
100 | 
101 |     if save_path:
102 |         with open(save_path, 'w') as f:
103 |             f.write('meanIOU: ' + str(aveJ) + '\n')
104 |             f.write(str(j_list) + '\n')
105 |             f.write(str(M) + '\n')
106 |     return aveJ, j_list
107 | 


--------------------------------------------------------------------------------
/utils/optim/RAdam.py:
--------------------------------------------------------------------------------
 1 | import math
 2 | import torch
 3 | from torch.optim.optimizer import Optimizer
 4 | 
 5 | class RAdam(Optimizer):
 6 | 
 7 |     def __init__(self, params, lr=1e-3, betas=(0.9, 0.999), eps=1e-8, weight_decay=0):
 8 |         defaults = dict(lr=lr, betas=betas, eps=eps, weight_decay=weight_decay)
 9 |         self.buffer = [[None, None, None] for ind in range(10)]
10 |         super(RAdam, self).__init__(params, defaults)
11 | 
12 |     def __setstate__(self, state):
13 |         super(RAdam, self).__setstate__(state)
14 | 
15 |     def step(self, closure=None):
16 | 
17 |         loss = None
18 |         if closure is not None:
19 |             loss = closure()
20 | 
21 |         for group in self.param_groups:
22 | 
23 |             for p in group['params']:
24 |                 if p.grad is None:
25 |                     continue
26 |                 grad = p.grad.data.float()
27 |                 if grad.is_sparse:
28 |                     raise RuntimeError('RAdam does not support sparse gradients')
29 | 
30 |                 p_data_fp32 = p.data.float()
31 | 
32 |                 state = self.state[p]
33 | 
34 |                 if len(state) == 0:
35 |                     state['step'] = 0
36 |                     state['exp_avg'] = torch.zeros_like(p_data_fp32)
37 |                     state['exp_avg_sq'] = torch.zeros_like(p_data_fp32)
38 |                 else:
39 |                     state['exp_avg'] = state['exp_avg'].type_as(p_data_fp32)
40 |                     state['exp_avg_sq'] = state['exp_avg_sq'].type_as(p_data_fp32)
41 | 
42 |                 exp_avg, exp_avg_sq = state['exp_avg'], state['exp_avg_sq']
43 |                 beta1, beta2 = group['betas']
44 | 
45 |                 exp_avg_sq.mul_(beta2).addcmul_(1 - beta2, grad, grad)
46 |                 exp_avg.mul_(beta1).add_(1 - beta1, grad)
47 | 
48 |                 state['step'] += 1
49 |                 buffered = self.buffer[int(state['step'] % 10)]
50 |                 if state['step'] == buffered[0]:
51 |                     N_sma, step_size = buffered[1], buffered[2]
52 |                 else:
53 |                     buffered[0] = state['step']
54 |                     beta2_t = beta2 ** state['step']
55 |                     N_sma_max = 2 / (1 - beta2) - 1
56 |                     N_sma = N_sma_max - 2 * state['step'] * beta2_t / (1 - beta2_t)
57 |                     buffered[1] = N_sma
58 | 
59 |                     # more conservative since it's an approximated value
60 |                     if N_sma >= 5:
61 |                         step_size = group['lr'] * math.sqrt((1 - beta2_t) * (N_sma - 4) / (N_sma_max - 4) * (N_sma - 2) / N_sma * N_sma_max / (N_sma_max - 2)) / (1 - beta1 ** state['step'])
62 |                     else:
63 |                         step_size = group['lr'] / (1 - beta1 ** state['step'])
64 |                     buffered[2] = step_size
65 | 
66 |                 if group['weight_decay'] != 0:
67 |                     p_data_fp32.add_(-group['weight_decay'] * group['lr'], p_data_fp32)
68 | 
69 |                 # more conservative since it's an approximated value
70 |                 if N_sma >= 5:                    
71 |                     denom = exp_avg_sq.sqrt().add_(group['eps'])
72 |                     p_data_fp32.addcdiv_(-step_size, exp_avg, denom)
73 |                 else:
74 |                     p_data_fp32.add_(-step_size, exp_avg)
75 | 
76 |                 p.data.copy_(p_data_fp32)
77 | 
78 |         return loss


--------------------------------------------------------------------------------
/model/ESPNet_v2/SegmentationModel.py:
--------------------------------------------------------------------------------
 1 | ###################################################################################################
 2 | #ESPNetv2: A Light-weight, Power Efficient, and General Purpose Convolutional Neural Network
 3 | #Paper-Link: https://arxiv.org/pdf/1811.11431.pdf
 4 | ###################################################################################################
 5 | 
 6 | 
 7 | import os
 8 | import torch
 9 | import torch.nn as nn
10 | import torch.nn.functional as F
11 | from torchsummary import summary
12 | 
13 | from model.ESPNet_v2.Model import EESPNet, EESP
14 | from model.ESPNet_v2.cnn_utils import *
15 | 
16 | 
17 | __all__ = ["EESPNet_Seg"]
18 | 
19 | 
20 | class EESPNet_Seg(nn.Module):
21 |     def __init__(self, classes=19, s=2, pretrained=None, gpus=1):
22 |         super().__init__()
23 |         classificationNet = EESPNet(classes=1000, s=s)
24 |         if gpus >=1:
25 |             classificationNet = nn.DataParallel(classificationNet)
26 |             # print(classificationNet)
27 |         # load the pretrained weights
28 |         if pretrained:
29 |             if not os.path.isfile(pretrained):
30 |                 print('Weight file does not exist. Training without pre-trained weights')
31 |             print('Model initialized with pretrained weights')
32 |             classificationNet.load_state_dict(torch.load(pretrained))
33 | 
34 |         self.net = classificationNet.module
35 | 
36 |         del classificationNet
37 |         # delete last few layers
38 |         del self.net.classifier
39 |         del self.net.level5
40 |         del self.net.level5_0
41 |         if s <=0.5:
42 |             p = 0.1
43 |         else:
44 |             p=0.2
45 | 
46 |         self.proj_L4_C = CBR(self.net.level4[-1].module_act.num_parameters, self.net.level3[-1].module_act.num_parameters, 1, 1)
47 |         pspSize = 2*self.net.level3[-1].module_act.num_parameters
48 |         self.pspMod = nn.Sequential(EESP(pspSize, pspSize //2, stride=1, k=4, r_lim=7),
49 |                 PSPModule(pspSize // 2, pspSize //2))
50 |         self.project_l3 = nn.Sequential(nn.Dropout2d(p=p), C(pspSize // 2, classes, 1, 1))
51 |         self.act_l3 = BR(classes)
52 |         self.project_l2 = CBR(self.net.level2_0.act.num_parameters + classes, classes, 1, 1)
53 |         self.project_l1 = nn.Sequential(nn.Dropout2d(p=p), C(self.net.level1.act.num_parameters + classes, classes, 1, 1))
54 | 
55 |     def hierarchicalUpsample(self, x, factor=3):
56 |         for i in range(factor):
57 |             x = F.interpolate(x, scale_factor=2, mode='bilinear', align_corners=True)
58 |         return x
59 | 
60 | 
61 |     def forward(self, input):
62 |         out_l1, out_l2, out_l3, out_l4 = self.net(input, seg=True)
63 |         out_l4_proj = self.proj_L4_C(out_l4)
64 |         up_l4_to_l3 = F.interpolate(out_l4_proj, size=out_l3.size()[2:], mode='bilinear', align_corners=True)
65 |         merged_l3_upl4 = self.pspMod(torch.cat([out_l3, up_l4_to_l3], 1))
66 |         proj_merge_l3_bef_act = self.project_l3(merged_l3_upl4)
67 |         proj_merge_l3 = self.act_l3(proj_merge_l3_bef_act)
68 |         out_up_l3 = F.interpolate(proj_merge_l3, scale_factor=2, mode='bilinear', align_corners=True)
69 |         merge_l2 = self.project_l2(torch.cat([out_l2, out_up_l3], 1))
70 |         out_up_l2 = F.interpolate(merge_l2, scale_factor=2, mode='bilinear', align_corners=True)
71 |         merge_l1 = self.project_l1(torch.cat([out_l1, out_up_l2], 1))
72 |         # if self.training:
73 |         #     return F.interpolate(merge_l1, scale_factor=2, mode='bilinear', align_corners=True), self.hierarchicalUpsample(proj_merge_l3_bef_act)
74 |         # else:
75 |         #     return F.interpolate(merge_l1, scale_factor=2, mode='bilinear', align_corners=True)
76 |         output = F.interpolate(merge_l1, scale_factor=2, mode='bilinear', align_corners=True)
77 |         return output
78 | 
79 | if __name__ == '__main__':
80 |     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
81 |     model = EESPNet_Seg(classes=19, s=2).to(device)
82 |     summary(model,(3,512,1024))


--------------------------------------------------------------------------------
/model/UNet.py:
--------------------------------------------------------------------------------
  1 | ######################################################################################
  2 | #U-Net: Convolutional Networks for BiomedicalImage Segmentation
  3 | #Paper-Link: https://arxiv.org/pdf/1505.04597.pdf
  4 | ######################################################################################
  5 | 
  6 | import torch
  7 | import torch.nn as nn
  8 | import torch.nn.functional as F
  9 | from torchsummary import summary
 10 | 
 11 | 
 12 | 
 13 | __all__ = ["UNet"]
 14 | 
 15 | 
 16 | class double_conv(nn.Module):
 17 |     '''(conv => BN => ReLU) * 2'''
 18 | 
 19 |     def __init__(self, in_ch, out_ch):
 20 |         super(double_conv, self).__init__()
 21 |         self.conv = nn.Sequential(
 22 |             nn.Conv2d(in_ch, out_ch, 3, padding=1),
 23 |             nn.BatchNorm2d(out_ch),
 24 |             nn.ReLU(inplace=True),
 25 |             nn.Conv2d(out_ch, out_ch, 3, padding=1),
 26 |             nn.BatchNorm2d(out_ch),
 27 |             nn.ReLU(inplace=True)
 28 |         )
 29 | 
 30 |     def forward(self, x):
 31 |         x = self.conv(x)
 32 |         return x
 33 | 
 34 | 
 35 | class inconv(nn.Module):
 36 |     def __init__(self, in_ch, out_ch):
 37 |         super(inconv, self).__init__()
 38 |         self.conv = double_conv(in_ch, out_ch)
 39 | 
 40 |     def forward(self, x):
 41 |         x = self.conv(x)
 42 |         return x
 43 | 
 44 | 
 45 | class down(nn.Module):
 46 |     def __init__(self, in_ch, out_ch):
 47 |         super(down, self).__init__()
 48 |         self.mpconv = nn.Sequential(
 49 |             nn.MaxPool2d(2),
 50 |             double_conv(in_ch, out_ch)
 51 |         )
 52 | 
 53 |     def forward(self, x):
 54 |         x = self.mpconv(x)
 55 |         return x
 56 | 
 57 | 
 58 | class up(nn.Module):
 59 |     def __init__(self, in_ch, out_ch, bilinear=True):
 60 |         super(up, self).__init__()
 61 |         self.bilinear = bilinear
 62 | 
 63 |         self.up = nn.ConvTranspose2d(in_ch // 2, in_ch // 2, 2, stride=2)
 64 | 
 65 |         self.conv = double_conv(in_ch, out_ch)
 66 | 
 67 |     def forward(self, x1, x2):
 68 |         if self.bilinear:
 69 |             x1 = F.interpolate(x1, scale_factor=2, mode='bilinear', align_corners=True)
 70 |         else:
 71 |             x1 = self.up(x1)
 72 | 
 73 |         # input is CHW
 74 |         diffY = x2.size()[2] - x1.size()[2]
 75 |         diffX = x2.size()[3] - x1.size()[3]
 76 | 
 77 |         x1 = F.pad(x1, [diffX // 2, diffX - diffX // 2,
 78 |                         diffY // 2, diffY - diffY // 2])
 79 | 
 80 |         # for padding issues, see
 81 |         # https://github.com/HaiyongJiang/U-Net-Pytorch-Unstructured-Buggy/commit/0e854509c2cea854e247a9c615f175f76fbb2e3a
 82 |         # https://github.com/xiaopeng-liao/Pytorch-UNet/commit/8ebac70e633bac59fc22bb5195e513d5832fb3bd
 83 | 
 84 |         x = torch.cat([x2, x1], dim=1)
 85 |         x = self.conv(x)
 86 |         return x
 87 | 
 88 | 
 89 | class outconv(nn.Module):
 90 |     def __init__(self, in_ch, out_ch):
 91 |         super(outconv, self).__init__()
 92 |         self.conv = nn.Conv2d(in_ch, out_ch, 1)
 93 | 
 94 |     def forward(self, x):
 95 |         x = self.conv(x)
 96 |         return x
 97 | 
 98 | 
 99 | 
100 | class UNet(nn.Module):
101 |     def __init__(self, classes):
102 |         super(UNet, self).__init__()
103 |         self.inc = inconv(3, 64)
104 |         self.down1 = down(64, 128)
105 |         self.down2 = down(128, 256)
106 |         self.down3 = down(256, 512)
107 |         self.down4 = down(512, 512)
108 |         self.up1 = up(1024, 256)
109 |         self.up2 = up(512, 128)
110 |         self.up3 = up(256, 64)
111 |         self.up4 = up(128, 64)
112 |         self.outc = outconv(64, classes)
113 | 
114 |     def forward(self, x):
115 |         x1 = self.inc(x)
116 |         x2 = self.down1(x1)
117 |         x3 = self.down2(x2)
118 |         x4 = self.down3(x3)
119 |         x5 = self.down4(x4)
120 |         x = self.up1(x5, x4)
121 |         x = self.up2(x, x3)
122 |         x = self.up3(x, x2)
123 |         x = self.up4(x, x1)
124 |         x = self.outc(x)
125 |         #return F.sigmoid(x)
126 | 
127 |         return x
128 | 
129 | 
130 | 
131 | 
132 | """print layers and params of network"""
133 | if __name__ == '__main__':
134 |     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
135 |     model = UNet(classes=19).to(device)
136 |     summary(model,(3,512,1024))


--------------------------------------------------------------------------------
/predict.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import time
  3 | import torch
  4 | import numpy as np
  5 | import torch.backends.cudnn as cudnn
  6 | from argparse import ArgumentParser
  7 | # user
  8 | from builders.model_builder import build_model
  9 | from builders.dataset_builder import build_dataset_test
 10 | from utils.utils import save_predict
 11 | from utils.convert_state import convert_state_dict
 12 | 
 13 | 
 14 | def parse_args():
 15 |     parser = ArgumentParser(description='Efficient semantic segmentation')
 16 |     # model and dataset
 17 |     parser.add_argument('--model', default="ENet", help="model name: (default ENet)")
 18 |     parser.add_argument('--dataset', default="camvid", help="dataset: cityscapes or camvid")
 19 |     parser.add_argument('--num_workers', type=int, default=2, help="the number of parallel threads")
 20 |     parser.add_argument('--batch_size', type=int, default=1,
 21 |                         help=" the batch_size is set to 1 when evaluating or testing")
 22 |     parser.add_argument('--checkpoint', type=str,default="",
 23 |                         help="use the file to load the checkpoint for evaluating or testing ")
 24 |     parser.add_argument('--save_seg_dir', type=str, default="./server/",
 25 |                         help="saving path of prediction result")
 26 |     parser.add_argument('--cuda', default=True, help="run on CPU or GPU")
 27 |     parser.add_argument("--gpus", default="0", type=str, help="gpu ids (default: 0)")
 28 |     args = parser.parse_args()
 29 | 
 30 |     return args
 31 | 
 32 | 
 33 | 
 34 | def predict(args, test_loader, model):
 35 |     """
 36 |     args:
 37 |       test_loader: loaded for test dataset, for those that do not provide label on the test set
 38 |       model: model
 39 |     return: class IoU and mean IoU
 40 |     """
 41 |     # evaluation or test mode
 42 |     model.eval()
 43 |     total_batches = len(test_loader)
 44 |     for i, (input, size, name) in enumerate(test_loader):
 45 |         with torch.no_grad():
 46 |             input_var = input.cuda()
 47 |         start_time = time.time()
 48 |         output = model(input_var)
 49 |         torch.cuda.synchronize()
 50 |         time_taken = time.time() - start_time
 51 |         print('[%d/%d]  time: %.2f' % (i + 1, total_batches, time_taken))
 52 |         output = output.cpu().data[0].numpy()
 53 |         output = output.transpose(1, 2, 0)
 54 |         output = np.asarray(np.argmax(output, axis=2), dtype=np.uint8)
 55 | 
 56 |         # Save the predict greyscale output for Cityscapes official evaluation
 57 |         # Modify image name to meet official requirement
 58 |         name[0] = name[0].rsplit('_', 1)[0] + '*'
 59 |         save_predict(output, None, name[0], args.dataset, args.save_seg_dir,
 60 |                      output_grey=True, output_color=False, gt_color=False)
 61 | 
 62 | 
 63 | def test_model(args):
 64 |     """
 65 |      main function for testing
 66 |      param args: global arguments
 67 |      return: None
 68 |     """
 69 |     print(args)
 70 | 
 71 |     if args.cuda:
 72 |         print("=====> use gpu id: '{}'".format(args.gpus))
 73 |         os.environ["CUDA_VISIBLE_DEVICES"] = args.gpus
 74 |         if not torch.cuda.is_available():
 75 |             raise Exception("no GPU found or wrong gpu id, please run without --cuda")
 76 | 
 77 |     # build the model
 78 |     model = build_model(args.model, num_classes=args.classes)
 79 | 
 80 |     if args.cuda:
 81 |         model = model.cuda()  # using GPU for inference
 82 |         cudnn.benchmark = True
 83 | 
 84 |     if not os.path.exists(args.save_seg_dir):
 85 |         os.makedirs(args.save_seg_dir)
 86 | 
 87 |     # load the test set
 88 |     datas, testLoader = build_dataset_test(args.dataset, args.num_workers, none_gt=True)
 89 | 
 90 |     if args.checkpoint:
 91 |         if os.path.isfile(args.checkpoint):
 92 |             print("=====> loading checkpoint '{}'".format(args.checkpoint))
 93 |             checkpoint = torch.load(args.checkpoint)
 94 |             model.load_state_dict(checkpoint['model'])
 95 |             # model.load_state_dict(convert_state_dict(checkpoint['model']))
 96 |         else:
 97 |             print("=====> no checkpoint found at '{}'".format(args.checkpoint))
 98 |             raise FileNotFoundError("no checkpoint found at '{}'".format(args.checkpoint))
 99 | 
100 |     print("=====> beginning testing")
101 |     print("test set length: ", len(testLoader))
102 |     predict(args, testLoader, model)
103 | 
104 | 
105 | if __name__ == '__main__':
106 | 
107 |     args = parse_args()
108 | 
109 |     args.save_seg_dir = os.path.join(args.save_seg_dir, args.dataset, 'predict', args.model)
110 | 
111 |     if args.dataset == 'cityscapes':
112 |         args.classes = 19
113 |     elif args.dataset == 'camvid':
114 |         args.classes = 11
115 |     else:
116 |         raise NotImplementedError(
117 |             "This repository now supports two datasets: cityscapes and camvid, %s is not included" % args.dataset)
118 | 
119 |     test_model(args)
120 | 


--------------------------------------------------------------------------------
/utils/optim/AdamW.py:
--------------------------------------------------------------------------------
  1 | import math
  2 | import torch
  3 | from torch.optim.optimizer import Optimizer
  4 | 
  5 | class AdamW(Optimizer):
  6 |     """Implements Adam algorithm.
  7 |     It has been proposed in `Adam: A Method for Stochastic Optimization`_.
  8 |     Arguments:
  9 |         params (iterable): iterable of parameters to optimize or dicts defining
 10 |             parameter groups
 11 |         lr (float, optional): learning rate (default: 1e-3)
 12 |         betas (Tuple[float, float], optional): coefficients used for computing
 13 |             running averages of gradient and its square (default: (0.9, 0.999))
 14 |         eps (float, optional): term added to the denominator to improve
 15 |             numerical stability (default: 1e-8)
 16 |         weight_decay (float, optional): weight decay (L2 penalty) (default: 0)
 17 |         amsgrad (boolean, optional): whether to use the AMSGrad variant of this
 18 |             algorithm from the paper `On the Convergence of Adam and Beyond`_
 19 |     .. _Adam\: A Method for Stochastic Optimization:
 20 |         https://arxiv.org/abs/1412.6980
 21 |     .. _On the Convergence of Adam and Beyond:
 22 |         https://openreview.net/forum?id=ryQu7f-RZ
 23 |     """
 24 | 
 25 |     def __init__(self, params, lr=1e-3, betas=(0.9, 0.999), eps=1e-8,
 26 |                  weight_decay=0, amsgrad=False):
 27 |         if not 0.0 <= lr:
 28 |             raise ValueError("Invalid learning rate: {}".format(lr))
 29 |         if not 0.0 <= eps:
 30 |             raise ValueError("Invalid epsilon value: {}".format(eps))
 31 |         if not 0.0 <= betas[0] < 1.0:
 32 |             raise ValueError("Invalid beta parameter at index 0: {}".format(betas[0]))
 33 |         if not 0.0 <= betas[1] < 1.0:
 34 |             raise ValueError("Invalid beta parameter at index 1: {}".format(betas[1]))
 35 |         defaults = dict(lr=lr, betas=betas, eps=eps,
 36 |                         weight_decay=weight_decay, amsgrad=amsgrad)
 37 |         super(AdamW, self).__init__(params, defaults)
 38 | 
 39 |     def __setstate__(self, state):
 40 |         super(AdamW, self).__setstate__(state)
 41 |         for group in self.param_groups:
 42 |             group.setdefault('amsgrad', False)
 43 | 
 44 |     def step(self, closure=None):
 45 |         """Performs a single optimization step.
 46 |         Arguments:
 47 |             closure (callable, optional): A closure that reevaluates the model
 48 |                 and returns the loss.
 49 |         """
 50 |         loss = None
 51 |         if closure is not None:
 52 |             loss = closure()
 53 | 
 54 |         for group in self.param_groups:
 55 |             for p in group['params']:
 56 |                 if p.grad is None:
 57 |                     continue
 58 |                 grad = p.grad.data
 59 |                 if grad.is_sparse:
 60 |                     raise RuntimeError('Adam does not support sparse gradients, please consider SparseAdam instead')
 61 |                 amsgrad = group['amsgrad']
 62 | 
 63 |                 state = self.state[p]
 64 | 
 65 |                 # State initialization
 66 |                 if len(state) == 0:
 67 |                     state['step'] = 0
 68 |                     # Exponential moving average of gradient values
 69 |                     state['exp_avg'] = torch.zeros_like(p.data)
 70 |                     # Exponential moving average of squared gradient values
 71 |                     state['exp_avg_sq'] = torch.zeros_like(p.data)
 72 |                     if amsgrad:
 73 |                         # Maintains max of all exp. moving avg. of sq. grad. values
 74 |                         state['max_exp_avg_sq'] = torch.zeros_like(p.data)
 75 | 
 76 |                 exp_avg, exp_avg_sq = state['exp_avg'], state['exp_avg_sq']
 77 |                 if amsgrad:
 78 |                     max_exp_avg_sq = state['max_exp_avg_sq']
 79 |                 beta1, beta2 = group['betas']
 80 | 
 81 |                 state['step'] += 1
 82 | 
 83 |                 # if group['weight_decay'] != 0:
 84 |                 #     grad = grad.add(group['weight_decay'], p.data)
 85 | 
 86 |                 # Decay the first and second moment running average coefficient
 87 |                 exp_avg.mul_(beta1).add_(1 - beta1, grad)
 88 |                 exp_avg_sq.mul_(beta2).addcmul_(1 - beta2, grad, grad)
 89 |                 if amsgrad:
 90 |                     # Maintains the maximum of all 2nd moment running avg. till now
 91 |                     torch.max(max_exp_avg_sq, exp_avg_sq, out=max_exp_avg_sq)
 92 |                     # Use the max. for normalizing running avg. of gradient
 93 |                     denom = max_exp_avg_sq.sqrt().add_(group['eps'])
 94 |                 else:
 95 |                     denom = exp_avg_sq.sqrt().add_(group['eps'])
 96 | 
 97 |                 bias_correction1 = 1 - beta1 ** state['step']
 98 |                 bias_correction2 = 1 - beta2 ** state['step']
 99 |                 step_size = group['lr'] * math.sqrt(bias_correction2) / bias_correction1
100 | 
101 |                 # p.data.addcdiv_(-step_size, exp_avg, denom)
102 |                 p.data.add_(-step_size,  torch.mul(p.data, group['weight_decay']).addcdiv_(1, exp_avg, denom) )
103 | 
104 |         return loss


--------------------------------------------------------------------------------
/dataset/camvid/camvid_val_list.txt:
--------------------------------------------------------------------------------
  1 | val/0016E5_07959.png valannot/0016E5_07959.png
  2 | val/0016E5_07961.png valannot/0016E5_07961.png
  3 | val/0016E5_07963.png valannot/0016E5_07963.png
  4 | val/0016E5_07965.png valannot/0016E5_07965.png
  5 | val/0016E5_07967.png valannot/0016E5_07967.png
  6 | val/0016E5_07969.png valannot/0016E5_07969.png
  7 | val/0016E5_07971.png valannot/0016E5_07971.png
  8 | val/0016E5_07973.png valannot/0016E5_07973.png
  9 | val/0016E5_07975.png valannot/0016E5_07975.png
 10 | val/0016E5_07977.png valannot/0016E5_07977.png
 11 | val/0016E5_07979.png valannot/0016E5_07979.png
 12 | val/0016E5_07981.png valannot/0016E5_07981.png
 13 | val/0016E5_07983.png valannot/0016E5_07983.png
 14 | val/0016E5_07985.png valannot/0016E5_07985.png
 15 | val/0016E5_07987.png valannot/0016E5_07987.png
 16 | val/0016E5_07989.png valannot/0016E5_07989.png
 17 | val/0016E5_07991.png valannot/0016E5_07991.png
 18 | val/0016E5_07993.png valannot/0016E5_07993.png
 19 | val/0016E5_07995.png valannot/0016E5_07995.png
 20 | val/0016E5_07997.png valannot/0016E5_07997.png
 21 | val/0016E5_07999.png valannot/0016E5_07999.png
 22 | val/0016E5_08001.png valannot/0016E5_08001.png
 23 | val/0016E5_08003.png valannot/0016E5_08003.png
 24 | val/0016E5_08005.png valannot/0016E5_08005.png
 25 | val/0016E5_08007.png valannot/0016E5_08007.png
 26 | val/0016E5_08009.png valannot/0016E5_08009.png
 27 | val/0016E5_08011.png valannot/0016E5_08011.png
 28 | val/0016E5_08013.png valannot/0016E5_08013.png
 29 | val/0016E5_08015.png valannot/0016E5_08015.png
 30 | val/0016E5_08017.png valannot/0016E5_08017.png
 31 | val/0016E5_08019.png valannot/0016E5_08019.png
 32 | val/0016E5_08021.png valannot/0016E5_08021.png
 33 | val/0016E5_08023.png valannot/0016E5_08023.png
 34 | val/0016E5_08025.png valannot/0016E5_08025.png
 35 | val/0016E5_08027.png valannot/0016E5_08027.png
 36 | val/0016E5_08029.png valannot/0016E5_08029.png
 37 | val/0016E5_08031.png valannot/0016E5_08031.png
 38 | val/0016E5_08033.png valannot/0016E5_08033.png
 39 | val/0016E5_08035.png valannot/0016E5_08035.png
 40 | val/0016E5_08037.png valannot/0016E5_08037.png
 41 | val/0016E5_08039.png valannot/0016E5_08039.png
 42 | val/0016E5_08041.png valannot/0016E5_08041.png
 43 | val/0016E5_08043.png valannot/0016E5_08043.png
 44 | val/0016E5_08045.png valannot/0016E5_08045.png
 45 | val/0016E5_08047.png valannot/0016E5_08047.png
 46 | val/0016E5_08049.png valannot/0016E5_08049.png
 47 | val/0016E5_08051.png valannot/0016E5_08051.png
 48 | val/0016E5_08053.png valannot/0016E5_08053.png
 49 | val/0016E5_08055.png valannot/0016E5_08055.png
 50 | val/0016E5_08057.png valannot/0016E5_08057.png
 51 | val/0016E5_08059.png valannot/0016E5_08059.png
 52 | val/0016E5_08061.png valannot/0016E5_08061.png
 53 | val/0016E5_08063.png valannot/0016E5_08063.png
 54 | val/0016E5_08065.png valannot/0016E5_08065.png
 55 | val/0016E5_08067.png valannot/0016E5_08067.png
 56 | val/0016E5_08069.png valannot/0016E5_08069.png
 57 | val/0016E5_08071.png valannot/0016E5_08071.png
 58 | val/0016E5_08073.png valannot/0016E5_08073.png
 59 | val/0016E5_08075.png valannot/0016E5_08075.png
 60 | val/0016E5_08077.png valannot/0016E5_08077.png
 61 | val/0016E5_08079.png valannot/0016E5_08079.png
 62 | val/0016E5_08081.png valannot/0016E5_08081.png
 63 | val/0016E5_08083.png valannot/0016E5_08083.png
 64 | val/0016E5_08085.png valannot/0016E5_08085.png
 65 | val/0016E5_08087.png valannot/0016E5_08087.png
 66 | val/0016E5_08089.png valannot/0016E5_08089.png
 67 | val/0016E5_08091.png valannot/0016E5_08091.png
 68 | val/0016E5_08093.png valannot/0016E5_08093.png
 69 | val/0016E5_08095.png valannot/0016E5_08095.png
 70 | val/0016E5_08097.png valannot/0016E5_08097.png
 71 | val/0016E5_08099.png valannot/0016E5_08099.png
 72 | val/0016E5_08101.png valannot/0016E5_08101.png
 73 | val/0016E5_08103.png valannot/0016E5_08103.png
 74 | val/0016E5_08105.png valannot/0016E5_08105.png
 75 | val/0016E5_08107.png valannot/0016E5_08107.png
 76 | val/0016E5_08109.png valannot/0016E5_08109.png
 77 | val/0016E5_08111.png valannot/0016E5_08111.png
 78 | val/0016E5_08113.png valannot/0016E5_08113.png
 79 | val/0016E5_08115.png valannot/0016E5_08115.png
 80 | val/0016E5_08117.png valannot/0016E5_08117.png
 81 | val/0016E5_08119.png valannot/0016E5_08119.png
 82 | val/0016E5_08121.png valannot/0016E5_08121.png
 83 | val/0016E5_08123.png valannot/0016E5_08123.png
 84 | val/0016E5_08125.png valannot/0016E5_08125.png
 85 | val/0016E5_08127.png valannot/0016E5_08127.png
 86 | val/0016E5_08129.png valannot/0016E5_08129.png
 87 | val/0016E5_08131.png valannot/0016E5_08131.png
 88 | val/0016E5_08133.png valannot/0016E5_08133.png
 89 | val/0016E5_08135.png valannot/0016E5_08135.png
 90 | val/0016E5_08137.png valannot/0016E5_08137.png
 91 | val/0016E5_08139.png valannot/0016E5_08139.png
 92 | val/0016E5_08141.png valannot/0016E5_08141.png
 93 | val/0016E5_08143.png valannot/0016E5_08143.png
 94 | val/0016E5_08145.png valannot/0016E5_08145.png
 95 | val/0016E5_08147.png valannot/0016E5_08147.png
 96 | val/0016E5_08149.png valannot/0016E5_08149.png
 97 | val/0016E5_08151.png valannot/0016E5_08151.png
 98 | val/0016E5_08153.png valannot/0016E5_08153.png
 99 | val/0016E5_08155.png valannot/0016E5_08155.png
100 | val/0016E5_08157.png valannot/0016E5_08157.png
101 | val/0016E5_08159.png valannot/0016E5_08159.png
102 | 


--------------------------------------------------------------------------------
/utils/scheduler/lr_scheduler.py:
--------------------------------------------------------------------------------
  1 | import math
  2 | from torch.optim.lr_scheduler import MultiStepLR, _LRScheduler
  3 | 
  4 | 
  5 | class WarmupMultiStepLR(MultiStepLR):
  6 |     def __init__(self, optimizer, milestones, gamma=0.1, warmup_factor=1.0 / 3,
  7 |                  warmup_iters=500, last_epoch=-1):
  8 |         self.warmup_factor = warmup_factor
  9 |         self.warmup_iters = warmup_iters
 10 |         super().__init__(optimizer, milestones, gamma, last_epoch)
 11 | 
 12 |     def get_lr(self):
 13 |         if self.last_epoch <= self.warmup_iters:
 14 |             alpha = self.last_epoch / self.warmup_iters
 15 |             warmup_factor = self.warmup_factor * (1 - alpha) + alpha
 16 |             # print(self.base_lrs[0]*warmup_factor)
 17 |             return [lr * warmup_factor for lr in self.base_lrs]
 18 |         else:
 19 |             lr = super().get_lr()
 20 |         return lr
 21 | 
 22 | 
 23 | class WarmupCosineLR(_LRScheduler):
 24 |     def __init__(self, optimizer, T_max, warmup_factor=1.0 / 3, warmup_iters=500,
 25 |                  eta_min=0, last_epoch=-1):
 26 |         self.warmup_factor = warmup_factor
 27 |         self.warmup_iters = warmup_iters
 28 |         self.T_max, self.eta_min = T_max, eta_min
 29 |         super().__init__(optimizer, last_epoch)
 30 | 
 31 |     def get_lr(self):
 32 |         if self.last_epoch <= self.warmup_iters:
 33 |             alpha = self.last_epoch / self.warmup_iters
 34 |             warmup_factor = self.warmup_factor * (1 - alpha) + alpha
 35 |             # print(self.base_lrs[0]*warmup_factor)
 36 |             return [lr * warmup_factor for lr in self.base_lrs]
 37 |         else:
 38 |             return [self.eta_min + (base_lr - self.eta_min) *
 39 |                     (1 + math.cos(
 40 |                         math.pi * (self.last_epoch - self.warmup_iters) / (self.T_max - self.warmup_iters))) / 2
 41 |                     for base_lr in self.base_lrs]
 42 | 
 43 | 
 44 | 
 45 | class WarmupPolyLR(_LRScheduler):
 46 |     def __init__(self, optimizer, T_max, cur_iter, warmup_factor=1.0 / 3, warmup_iters=500,
 47 |                  eta_min=0, power=0.9):
 48 |         self.warmup_factor = warmup_factor
 49 |         self.warmup_iters = warmup_iters
 50 |         self.power = power
 51 |         self.T_max, self.eta_min = T_max, eta_min
 52 |         self.cur_iter = cur_iter
 53 |         super().__init__(optimizer)
 54 | 
 55 |     def get_lr(self):
 56 |         if self.cur_iter <= self.warmup_iters:
 57 |             alpha = self.cur_iter / self.warmup_iters
 58 |             warmup_factor = self.warmup_factor * (1 - alpha) + alpha
 59 |             # print(self.base_lrs[0]*warmup_factor)
 60 |             return [lr * warmup_factor for lr in self.base_lrs]
 61 |         else:
 62 |             return [self.eta_min + (base_lr - self.eta_min) *
 63 |                     math.pow(1 - (self.cur_iter - self.warmup_iters) / (self.T_max - self.warmup_iters),
 64 |                              self.power) for base_lr in self.base_lrs]
 65 | 
 66 | 
 67 | def poly_learning_rate(cur_epoch, max_epoch, curEpoch_iter, perEpoch_iter, baselr):
 68 |     cur_iter = cur_epoch * perEpoch_iter + curEpoch_iter
 69 |     max_iter = max_epoch * perEpoch_iter
 70 |     lr = baselr * pow((1 - 1.0 * cur_iter / max_iter), 0.9)
 71 | 
 72 |     return lr
 73 | 
 74 | 
 75 | 
 76 | class GradualWarmupScheduler(_LRScheduler):
 77 |     """ Gradually warm-up(increasing) learning rate in optimizer.
 78 |     Proposed in 'Accurate, Large Minibatch SGD: Training ImageNet in 1 Hour'.
 79 |     Args:
 80 |         optimizer (Optimizer): Wrapped optimizer.
 81 |         min_lr_mul: target learning rate = base lr * min_lr_mul
 82 |         total_epoch: target learning rate is reached at total_epoch, gradually
 83 |         after_scheduler: after target_epoch, use this scheduler(eg. ReduceLROnPlateau)
 84 |     """
 85 | 
 86 |     def __init__(self, optimizer, total_epoch, min_lr_mul=0.1, after_scheduler=None):
 87 |         self.min_lr_mul = min_lr_mul
 88 |         if self.min_lr_mul > 1. or self.min_lr_mul < 0.:
 89 |             raise ValueError('min_lr_mul should be [0., 1.]')
 90 |         self.total_epoch = total_epoch
 91 |         self.after_scheduler = after_scheduler
 92 |         self.finished = False
 93 |         super(GradualWarmupScheduler, self).__init__(optimizer)
 94 | 
 95 |     def get_lr(self):
 96 |         if self.last_epoch > self.total_epoch:
 97 |             if self.after_scheduler:
 98 |                 if not self.finished:
 99 |                     self.after_scheduler.base_lrs = self.base_lrs
100 |                     self.finished = True
101 |                 return self.after_scheduler.get_lr()
102 |             else:
103 |                 return self.base_lrs
104 |         else:
105 |             return [base_lr * (self.min_lr_mul + (1. - self.min_lr_mul) * (self.last_epoch / float(self.total_epoch))) for base_lr in self.base_lrs]
106 | 
107 |     def step(self, epoch=None):
108 |         if self.finished and self.after_scheduler:
109 |             return self.after_scheduler.step(epoch - self.total_epoch)
110 |         else:
111 |             return super(GradualWarmupScheduler, self).step(epoch)
112 | 
113 | 
114 | 
115 | 
116 | if __name__ == '__main__':
117 |     optim = WarmupPolyLR()
118 | 


--------------------------------------------------------------------------------
/model/ERFNet.py:
--------------------------------------------------------------------------------
  1 | ######################################################################################
  2 | #ERFNet: Efficient Residual Factorized ConvNet for Real-time Semantic Segmentation
  3 | #Paper-Link: http://www.robesafe.uah.es/personal/eduardo.romera/pdfs/Romera17tits.pdf
  4 | ######################################################################################
  5 | 
  6 | 
  7 | import torch
  8 | import torch.nn as nn
  9 | import torch.nn.functional as F
 10 | from torchsummary import summary
 11 | 
 12 | 
 13 | __all__ = ["ERFNet"]
 14 | 
 15 | 
 16 | class DownsamplerBlock (nn.Module):
 17 |     def __init__(self, ninput, noutput):
 18 |         super().__init__()
 19 | 
 20 |         self.conv = nn.Conv2d(ninput, noutput-ninput, (3, 3), stride=2, padding=1, bias=True)
 21 |         self.pool = nn.MaxPool2d(2, stride=2)
 22 |         self.bn = nn.BatchNorm2d(noutput, eps=1e-3)
 23 | 
 24 |     def forward(self, input):
 25 |         output = torch.cat([self.conv(input), self.pool(input)], 1)
 26 |         output = self.bn(output)
 27 |         return F.relu(output)
 28 |     
 29 | 
 30 | class non_bottleneck_1d (nn.Module):
 31 |     def __init__(self, chann, dropprob, dilated):        
 32 |         super().__init__()
 33 | 
 34 |         self.conv3x1_1 = nn.Conv2d(chann, chann, (3, 1), stride=1, padding=(1,0), bias=True)
 35 | 
 36 |         self.conv1x3_1 = nn.Conv2d(chann, chann, (1,3), stride=1, padding=(0,1), bias=True)
 37 | 
 38 |         self.bn1 = nn.BatchNorm2d(chann, eps=1e-03)
 39 | 
 40 |         self.conv3x1_2 = nn.Conv2d(chann, chann, (3, 1), stride=1, padding=(1*dilated,0), bias=True, dilation = (dilated,1))
 41 | 
 42 |         self.conv1x3_2 = nn.Conv2d(chann, chann, (1,3), stride=1, padding=(0,1*dilated), bias=True, dilation = (1, dilated))
 43 | 
 44 |         self.bn2 = nn.BatchNorm2d(chann, eps=1e-03)
 45 | 
 46 |         self.dropout = nn.Dropout2d(dropprob)
 47 |         
 48 | 
 49 |     def forward(self, input):
 50 | 
 51 |         output = self.conv3x1_1(input)
 52 |         output = F.relu(output)
 53 |         output = self.conv1x3_1(output)
 54 |         output = self.bn1(output)
 55 |         output = F.relu(output)
 56 | 
 57 |         output = self.conv3x1_2(output)
 58 |         output = F.relu(output)
 59 |         output = self.conv1x3_2(output)
 60 |         output = self.bn2(output)
 61 | 
 62 |         if (self.dropout.p != 0):
 63 |             output = self.dropout(output)
 64 |         
 65 |         return F.relu(output+input)    #+input = identity (residual connection)
 66 | 
 67 | 
 68 | class Encoder(nn.Module):
 69 |     def __init__(self, num_classes):
 70 |         super().__init__()
 71 |         self.initial_block = DownsamplerBlock(3,16)
 72 | 
 73 |         self.layers = nn.ModuleList()
 74 | 
 75 |         self.layers.append(DownsamplerBlock(16,64))
 76 | 
 77 |         for x in range(0, 5):    #5 times
 78 |            self.layers.append(non_bottleneck_1d(64, 0.03, 1)) 
 79 | 
 80 |         self.layers.append(DownsamplerBlock(64,128))
 81 | 
 82 |         for x in range(0, 2):    #2 times
 83 |             self.layers.append(non_bottleneck_1d(128, 0.3, 2))
 84 |             self.layers.append(non_bottleneck_1d(128, 0.3, 4))
 85 |             self.layers.append(non_bottleneck_1d(128, 0.3, 8))
 86 |             self.layers.append(non_bottleneck_1d(128, 0.3, 16))
 87 | 
 88 |         #Only in encoder mode:
 89 |         self.output_conv = nn.Conv2d(128, num_classes, 1, stride=1, padding=0, bias=True)
 90 | 
 91 |     def forward(self, input, predict=False):
 92 |         output = self.initial_block(input)
 93 | 
 94 |         for layer in self.layers:
 95 |             output = layer(output)
 96 | 
 97 |         if predict:
 98 |             output = self.output_conv(output)
 99 | 
100 |         return output
101 | 
102 | 
103 | class UpsamplerBlock (nn.Module):
104 |     def __init__(self, ninput, noutput):
105 |         super().__init__()
106 |         self.conv = nn.ConvTranspose2d(ninput, noutput, 3, stride=2, padding=1, output_padding=1, bias=True)
107 |         self.bn = nn.BatchNorm2d(noutput, eps=1e-3)
108 | 
109 |     def forward(self, input):
110 |         output = self.conv(input)
111 |         output = self.bn(output)
112 |         return F.relu(output)
113 | 
114 | class Decoder (nn.Module):
115 |     def __init__(self, num_classes):
116 |         super().__init__()
117 | 
118 |         self.layers = nn.ModuleList()
119 | 
120 |         self.layers.append(UpsamplerBlock(128,64))
121 |         self.layers.append(non_bottleneck_1d(64, 0, 1))
122 |         self.layers.append(non_bottleneck_1d(64, 0, 1))
123 | 
124 |         self.layers.append(UpsamplerBlock(64,16))
125 |         self.layers.append(non_bottleneck_1d(16, 0, 1))
126 |         self.layers.append(non_bottleneck_1d(16, 0, 1))
127 | 
128 |         self.output_conv = nn.ConvTranspose2d( 16, num_classes, 2, stride=2, padding=0, output_padding=0, bias=True)
129 | 
130 |     def forward(self, input):
131 |         output = input
132 | 
133 |         for layer in self.layers:
134 |             output = layer(output)
135 | 
136 |         output = self.output_conv(output)
137 | 
138 |         return output
139 | 
140 | #ERFNet
141 | class ERFNet(nn.Module):
142 |     def __init__(self, classes, encoder=None):  #use encoder to pass pretrained encoder
143 |         super().__init__()
144 | 
145 |         if (encoder == None):
146 |             self.encoder = Encoder(classes)
147 |         else:
148 |             self.encoder = encoder
149 |         self.decoder = Decoder(classes)
150 | 
151 |     def forward(self, input, only_encode=False):
152 |         if only_encode:
153 |             return self.encoder.forward(input, predict=True)
154 |         else:
155 |             output = self.encoder(input)    #predict=False by default
156 |             return self.decoder.forward(output)
157 | 
158 | """print layers and params of network"""
159 | if __name__ == '__main__':
160 |     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
161 |     model = ERFNet(classes=19).to(device)
162 |     summary(model,(3,512,1024))


--------------------------------------------------------------------------------
/model/EDANet.py:
--------------------------------------------------------------------------------
  1 | ###################################################################################################
  2 | #EDANet:Efficient Dense Modules of Asymmetric Convolution for Real-Time Semantic Segmentation
  3 | #Paper-Link: https://arxiv.org/ftp/arxiv/papers/1809/1809.06323.pdf
  4 | ###################################################################################################
  5 | 
  6 | import torch
  7 | import torch.nn as nn
  8 | import torch.nn.functional as F
  9 | from torchsummary import summary
 10 | 
 11 | 
 12 | __all__ = ["EDANet"]
 13 | 
 14 | class DownsamplerBlock(nn.Module):
 15 |     def __init__(self, ninput, noutput):
 16 |         super(DownsamplerBlock,self).__init__()
 17 | 
 18 |         self.ninput = ninput
 19 |         self.noutput = noutput
 20 | 
 21 |         if self.ninput < self.noutput:
 22 |             # Wout > Win
 23 |             self.conv = nn.Conv2d(ninput, noutput-ninput, kernel_size=3, stride=2, padding=1)
 24 |             self.pool = nn.MaxPool2d(2, stride=2)
 25 |         else:
 26 |             # Wout < Win
 27 |             self.conv = nn.Conv2d(ninput, noutput, kernel_size=3, stride=2, padding=1)
 28 | 
 29 |         self.bn = nn.BatchNorm2d(noutput)
 30 | 
 31 |     def forward(self, x):
 32 |         if self.ninput < self.noutput:
 33 |             output = torch.cat([self.conv(x), self.pool(x)], 1)
 34 |         else:
 35 |             output = self.conv(x)
 36 | 
 37 |         output = self.bn(output)
 38 |         return F.relu(output)
 39 |     
 40 | # --- Build the EDANet Module --- #
 41 | class EDAModule(nn.Module):
 42 |     def __init__(self, ninput, dilated, k = 40, dropprob = 0.02):
 43 |         super().__init__()
 44 | 
 45 |         # k: growthrate
 46 |         # dropprob:a dropout layer between the last ReLU and the concatenation of each module
 47 | 
 48 |         self.conv1x1 = nn.Conv2d(ninput, k, kernel_size=1)
 49 |         self.bn0 = nn.BatchNorm2d(k)
 50 | 
 51 |         self.conv3x1_1 = nn.Conv2d(k, k, kernel_size=(3, 1),padding=(1,0))
 52 |         self.conv1x3_1 = nn.Conv2d(k, k, kernel_size=(1, 3),padding=(0,1))
 53 |         self.bn1 = nn.BatchNorm2d(k)
 54 | 
 55 |         self.conv3x1_2 = nn.Conv2d(k, k, (3,1), stride=1, padding=(dilated,0), dilation = dilated)
 56 |         self.conv1x3_2 = nn.Conv2d(k, k, (1,3), stride=1, padding=(0,dilated), dilation =  dilated)
 57 |         self.bn2 = nn.BatchNorm2d(k)
 58 | 
 59 |         self.dropout = nn.Dropout2d(dropprob)
 60 |         
 61 | 
 62 |     def forward(self, x):
 63 |         input = x
 64 | 
 65 |         output = self.conv1x1(x)
 66 |         output = self.bn0(output)
 67 |         output = F.relu(output)
 68 | 
 69 |         output = self.conv3x1_1(output)
 70 |         output = self.conv1x3_1(output)
 71 |         output = self.bn1(output)
 72 |         output = F.relu(output)
 73 | 
 74 |         output = self.conv3x1_2(output)
 75 |         output = self.conv1x3_2(output)
 76 |         output = self.bn2(output)
 77 |         output = F.relu(output)
 78 | 
 79 |         if (self.dropout.p != 0):
 80 |             output = self.dropout(output)
 81 | 
 82 |         output = torch.cat([output,input],1)
 83 |         # print output.size() #check the output
 84 |         return output
 85 | 
 86 | 
 87 | # --- Build the EDANet Block --- #
 88 | class EDANetBlock(nn.Module):
 89 |     def __init__(self, in_channels, num_dense_layer, dilated, growth_rate):
 90 |         """
 91 |         :param in_channels: input channel size
 92 |         :param num_dense_layer: the number of RDB layers
 93 |         :param growth_rate: growth_rate
 94 |         """
 95 |         super().__init__()
 96 |         _in_channels = in_channels
 97 |         modules = []
 98 |         for i in range(num_dense_layer):
 99 |             modules.append(EDAModule(_in_channels, dilated[i], growth_rate))
100 |             _in_channels += growth_rate
101 |         self.residual_dense_layers = nn.Sequential(*modules)
102 |         #self.conv_1x1 = nn.Conv2d(_in_channels, in_channels, kernel_size=1, padding=0)
103 | 
104 |     def forward(self, x):
105 |         out = self.residual_dense_layers(x)
106 |         #out = self.conv_1x1(out)
107 |         # out = out + x
108 |         return out
109 | 
110 | 
111 | class EDANet(nn.Module):
112 |     def __init__(self, classes=19):
113 |         super(EDANet,self).__init__()
114 | 
115 |         self.layers = nn.ModuleList()
116 | 
117 |         # DownsamplerBlock1
118 |         self.layers.append(DownsamplerBlock(3, 15))
119 | 
120 |         # DownsamplerBlock2
121 |         self.layers.append(DownsamplerBlock(15, 60))
122 | 
123 |         # EDA Block1
124 |         self.layers.append(EDANetBlock(60, 5, [1,1,1,2,2], 40))
125 | 
126 |         # DownsamplerBlock3
127 |         self.layers.append(DownsamplerBlock(260, 130))
128 | 
129 |         # # EDA Block2
130 |         self.layers.append(EDANetBlock(130, 8, [2,2,4,4,8,8,16,16], 40))
131 | 
132 |         # Projection layer
133 |         self.project_layer = nn.Conv2d(450,classes,kernel_size = 1)
134 | 
135 |         self.weights_init()
136 | 
137 |     def weights_init(self):
138 |         for idx, m in enumerate(self.modules()):
139 |             classname = m.__class__.__name__
140 |             if classname.find('Conv') != -1:
141 |                 m.weight.data.normal_(0.0, 0.02)
142 |             elif classname.find('BatchNorm') != -1:
143 |                 m.weight.data.normal_(1.0, 0.02)
144 |                 m.bias.data.fill_(0)
145 | 
146 |     def forward(self, x):
147 | 
148 |         output = x
149 | 
150 |         for layer in self.layers:
151 |             output = layer(output)
152 | 
153 |         output = self.project_layer(output)
154 | 
155 |         # Bilinear interpolation x8
156 |         output = F.interpolate(output,scale_factor = 8,mode = 'bilinear',align_corners=True)
157 | 
158 |         return output
159 | 
160 | """print layers and params of network"""
161 | if __name__ == '__main__':
162 |     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
163 |     model = EDANet(classes=19).to(device)
164 |     summary(model,(3,512,1024))
165 | 


--------------------------------------------------------------------------------
/model/ESPNet_v2/cnn_utils.py:
--------------------------------------------------------------------------------
  1 | import torch.nn as nn
  2 | import torch
  3 | import torch.nn.functional as F
  4 | 
  5 | 
  6 | __author__ = "Sachin Mehta"
  7 | __version__ = "1.0.1"
  8 | __maintainer__ = "Sachin Mehta"
  9 | 
 10 | 
 11 | class PSPModule(nn.Module):
 12 |     def __init__(self, features, out_features=1024, sizes=(1, 2, 4, 8)):
 13 |         super().__init__()
 14 |         self.stages = []
 15 |         self.stages = nn.ModuleList([C(features, features, 3, 1, groups=features) for size in sizes])
 16 |         self.project = CBR(features * (len(sizes) + 1), out_features, 1, 1)
 17 |  
 18 |     def forward(self, feats):
 19 |         h, w = feats.size(2), feats.size(3)
 20 |         out = [feats]
 21 |         for stage in self.stages:
 22 |             feats = F.avg_pool2d(feats, kernel_size=3, stride=2, padding=1)
 23 |             upsampled = F.interpolate(input=stage(feats), size=(h, w), mode='bilinear', align_corners=True)
 24 |             out.append(upsampled)
 25 |         return self.project(torch.cat(out, dim=1))
 26 | 
 27 | class CBR(nn.Module):
 28 |     '''
 29 |     This class defines the convolution layer with batch normalization and PReLU activation
 30 |     '''
 31 | 
 32 |     def __init__(self, nIn, nOut, kSize, stride=1, groups=1):
 33 |         '''
 34 | 
 35 |         :param nIn: number of input channels
 36 |         :param nOut: number of output channels
 37 |         :param kSize: kernel size
 38 |         :param stride: stride rate for down-sampling. Default is 1
 39 |         '''
 40 |         super().__init__()
 41 |         padding = int((kSize - 1) / 2)
 42 |         self.conv = nn.Conv2d(nIn, nOut, kSize, stride=stride, padding=padding, bias=False, groups=groups)
 43 |         self.bn = nn.BatchNorm2d(nOut)
 44 |         self.act = nn.PReLU(nOut)
 45 | 
 46 |     def forward(self, input):
 47 |         '''
 48 |         :param input: input feature map
 49 |         :return: transformed feature map
 50 |         '''
 51 |         output = self.conv(input)
 52 |         # output = self.conv1(output)
 53 |         output = self.bn(output)
 54 |         output = self.act(output)
 55 |         return output
 56 | 
 57 | 
 58 | class BR(nn.Module):
 59 |     '''
 60 |         This class groups the batch normalization and PReLU activation
 61 |     '''
 62 | 
 63 |     def __init__(self, nOut):
 64 |         '''
 65 |         :param nOut: output feature maps
 66 |         '''
 67 |         super().__init__()
 68 |         self.bn = nn.BatchNorm2d(nOut)
 69 |         self.act = nn.PReLU(nOut)
 70 | 
 71 |     def forward(self, input):
 72 |         '''
 73 |         :param input: input feature map
 74 |         :return: normalized and thresholded feature map
 75 |         '''
 76 |         output = self.bn(input)
 77 |         output = self.act(output)
 78 |         return output
 79 | 
 80 | 
 81 | class CB(nn.Module):
 82 |     '''
 83 |        This class groups the convolution and batch normalization
 84 |     '''
 85 | 
 86 |     def __init__(self, nIn, nOut, kSize, stride=1, groups=1):
 87 |         '''
 88 |         :param nIn: number of input channels
 89 |         :param nOut: number of output channels
 90 |         :param kSize: kernel size
 91 |         :param stride: optinal stide for down-sampling
 92 |         '''
 93 |         super().__init__()
 94 |         padding = int((kSize - 1) / 2)
 95 |         self.conv = nn.Conv2d(nIn, nOut, kSize, stride=stride, padding=padding, bias=False,
 96 |                               groups=groups)
 97 |         self.bn = nn.BatchNorm2d(nOut)
 98 | 
 99 |     def forward(self, input):
100 |         '''
101 | 
102 |         :param input: input feature map
103 |         :return: transformed feature map
104 |         '''
105 |         output = self.conv(input)
106 |         output = self.bn(output)
107 |         return output
108 | 
109 | 
110 | class C(nn.Module):
111 |     '''
112 |     This class is for a convolutional layer.
113 |     '''
114 | 
115 |     def __init__(self, nIn, nOut, kSize, stride=1, groups=1):
116 |         '''
117 | 
118 |         :param nIn: number of input channels
119 |         :param nOut: number of output channels
120 |         :param kSize: kernel size
121 |         :param stride: optional stride rate for down-sampling
122 |         '''
123 |         super().__init__()
124 |         padding = int((kSize - 1) / 2)
125 |         self.conv = nn.Conv2d(nIn, nOut, kSize, stride=stride, padding=padding, bias=False,
126 |                               groups=groups)
127 | 
128 |     def forward(self, input):
129 |         '''
130 |         :param input: input feature map
131 |         :return: transformed feature map
132 |         '''
133 |         output = self.conv(input)
134 |         return output
135 | 
136 | 
137 | class CDilated(nn.Module):
138 |     '''
139 |     This class defines the dilated convolution.
140 |     '''
141 | 
142 |     def __init__(self, nIn, nOut, kSize, stride=1, d=1, groups=1):
143 |         '''
144 |         :param nIn: number of input channels
145 |         :param nOut: number of output channels
146 |         :param kSize: kernel size
147 |         :param stride: optional stride rate for down-sampling
148 |         :param d: optional dilation rate
149 |         '''
150 |         super().__init__()
151 |         padding = int((kSize - 1) / 2) * d
152 |         self.conv = nn.Conv2d(nIn, nOut,kSize, stride=stride, padding=padding, bias=False,
153 |                               dilation=d, groups=groups)
154 | 
155 |     def forward(self, input):
156 |         '''
157 |         :param input: input feature map
158 |         :return: transformed feature map
159 |         '''
160 |         output = self.conv(input)
161 |         return output
162 | 
163 | class CDilatedB(nn.Module):
164 |     '''
165 |     This class defines the dilated convolution with batch normalization.
166 |     '''
167 | 
168 |     def __init__(self, nIn, nOut, kSize, stride=1, d=1, groups=1):
169 |         '''
170 |         :param nIn: number of input channels
171 |         :param nOut: number of output channels
172 |         :param kSize: kernel size
173 |         :param stride: optional stride rate for down-sampling
174 |         :param d: optional dilation rate
175 |         '''
176 |         super().__init__()
177 |         padding = int((kSize - 1) / 2) * d
178 |         self.conv = nn.Conv2d(nIn, nOut,kSize, stride=stride, padding=padding, bias=False,
179 |                               dilation=d, groups=groups)
180 |         self.bn = nn.BatchNorm2d(nOut)
181 | 
182 |     def forward(self, input):
183 |         '''
184 |         :param input: input feature map
185 |         :return: transformed feature map
186 |         '''
187 |         return self.bn(self.conv(input))
188 | 


--------------------------------------------------------------------------------
/utils/optim/Ranger.py:
--------------------------------------------------------------------------------
  1 | import math
  2 | import torch
  3 | from torch.optim.optimizer import Optimizer
  4 | import itertools as it
  5 | 
  6 | 
  7 | 
  8 | class Ranger(Optimizer):
  9 | 
 10 |     def __init__(self, params, lr=1e-3, alpha=0.5, k=6, N_sma_threshhold=5, betas=(.95,0.999), eps=1e-5, weight_decay=0):
 11 |         #parameter checks
 12 |         if not 0.0 <= alpha <= 1.0:
 13 |             raise ValueError(f'Invalid slow update rate: {alpha}')
 14 |         if not 1 <= k:
 15 |             raise ValueError(f'Invalid lookahead steps: {k}')
 16 |         if not lr > 0:
 17 |             raise ValueError(f'Invalid Learning Rate: {lr}')
 18 |         if not eps > 0:
 19 |             raise ValueError(f'Invalid eps: {eps}')
 20 | 
 21 |         #parameter comments:
 22 |         # beta1 (momentum) of .95 seems to work better than .90...
 23 |         #N_sma_threshold of 5 seems better in testing than 4.
 24 |         #In both cases, worth testing on your dataset (.90 vs .95, 4 vs 5) to make sure which works best for you.
 25 | 
 26 |         #prep defaults and init torch.optim base
 27 |         defaults = dict(lr=lr, alpha=alpha, k=k, step_counter=0, betas=betas, N_sma_threshhold=N_sma_threshhold, eps=eps, weight_decay=weight_decay)
 28 |         super().__init__(params,defaults)
 29 | 
 30 |         #adjustable threshold
 31 |         self.N_sma_threshhold = N_sma_threshhold
 32 | 
 33 |         #now we can get to work...
 34 |         #removed as we now use step from RAdam...no need for duplicate step counting
 35 |         #for group in self.param_groups:
 36 |         #    group["step_counter"] = 0
 37 |             #print("group step counter init")
 38 | 
 39 |         #look ahead params
 40 |         self.alpha = alpha
 41 |         self.k = k 
 42 | 
 43 |         #radam buffer for state
 44 |         self.radam_buffer = [[None,None,None] for ind in range(10)]
 45 | 
 46 |         #self.first_run_check=0
 47 | 
 48 |         #lookahead weights
 49 |         #9/2/19 - lookahead param tensors have been moved to state storage.  
 50 |         #This should resolve issues with load/save where weights were left in GPU memory from first load, slowing down future runs.
 51 | 
 52 |         #self.slow_weights = [[p.clone().detach() for p in group['params']]
 53 |         #                     for group in self.param_groups]
 54 | 
 55 |         #don't use grad for lookahead weights
 56 |         #for w in it.chain(*self.slow_weights):
 57 |         #    w.requires_grad = False
 58 | 
 59 |     def __setstate__(self, state):
 60 |         print("set state called")
 61 |         super(Ranger, self).__setstate__(state)
 62 | 
 63 | 
 64 |     def step(self, closure=None):
 65 |         loss = None
 66 |         #note - below is commented out b/c I have other work that passes back the loss as a float, and thus not a callable closure.  
 67 |         #Uncomment if you need to use the actual closure...
 68 | 
 69 |         #if closure is not None:
 70 |             #loss = closure()
 71 | 
 72 |         #Evaluate averages and grad, update param tensors
 73 |         for group in self.param_groups:
 74 | 
 75 |             for p in group['params']:
 76 |                 if p.grad is None:
 77 |                     continue
 78 |                 grad = p.grad.data.float()
 79 |                 if grad.is_sparse:
 80 |                     raise RuntimeError('Ranger optimizer does not support sparse gradients')
 81 | 
 82 |                 p_data_fp32 = p.data.float()
 83 | 
 84 |                 state = self.state[p]  #get state dict for this param
 85 | 
 86 |                 if len(state) == 0:   #if first time to run...init dictionary with our desired entries
 87 |                     #if self.first_run_check==0:
 88 |                         #self.first_run_check=1
 89 |                         #print("Initializing slow buffer...should not see this at load from saved model!")
 90 |                     state['step'] = 0
 91 |                     state['exp_avg'] = torch.zeros_like(p_data_fp32)
 92 |                     state['exp_avg_sq'] = torch.zeros_like(p_data_fp32)
 93 | 
 94 |                     #look ahead weight storage now in state dict 
 95 |                     state['slow_buffer'] = torch.empty_like(p.data)
 96 |                     state['slow_buffer'].copy_(p.data)
 97 | 
 98 |                 else:
 99 |                     state['exp_avg'] = state['exp_avg'].type_as(p_data_fp32)
100 |                     state['exp_avg_sq'] = state['exp_avg_sq'].type_as(p_data_fp32)
101 | 
102 |                 #begin computations 
103 |                 exp_avg, exp_avg_sq = state['exp_avg'], state['exp_avg_sq']
104 |                 beta1, beta2 = group['betas']
105 | 
106 |                 #compute variance mov avg
107 |                 exp_avg_sq.mul_(beta2).addcmul_(1 - beta2, grad, grad)
108 |                 #compute mean moving avg
109 |                 exp_avg.mul_(beta1).add_(1 - beta1, grad)
110 | 
111 |                 state['step'] += 1
112 | 
113 | 
114 |                 buffered = self.radam_buffer[int(state['step'] % 10)]
115 |                 if state['step'] == buffered[0]:
116 |                     N_sma, step_size = buffered[1], buffered[2]
117 |                 else:
118 |                     buffered[0] = state['step']
119 |                     beta2_t = beta2 ** state['step']
120 |                     N_sma_max = 2 / (1 - beta2) - 1
121 |                     N_sma = N_sma_max - 2 * state['step'] * beta2_t / (1 - beta2_t)
122 |                     buffered[1] = N_sma
123 |                     if N_sma > self.N_sma_threshhold:
124 |                         step_size = math.sqrt((1 - beta2_t) * (N_sma - 4) / (N_sma_max - 4) * (N_sma - 2) / N_sma * N_sma_max / (N_sma_max - 2)) / (1 - beta1 ** state['step'])
125 |                     else:
126 |                         step_size = 1.0 / (1 - beta1 ** state['step'])
127 |                     buffered[2] = step_size
128 | 
129 |                 if group['weight_decay'] != 0:
130 |                     p_data_fp32.add_(-group['weight_decay'] * group['lr'], p_data_fp32)
131 | 
132 |                 if N_sma > self.N_sma_threshhold:
133 |                     denom = exp_avg_sq.sqrt().add_(group['eps'])
134 |                     p_data_fp32.addcdiv_(-step_size * group['lr'], exp_avg, denom)
135 |                 else:
136 |                     p_data_fp32.add_(-step_size * group['lr'], exp_avg)
137 | 
138 |                 p.data.copy_(p_data_fp32)
139 | 
140 |                 #integrated look ahead...
141 |                 #we do it at the param level instead of group level
142 |                 if state['step'] % group['k'] == 0:
143 |                     slow_p = state['slow_buffer'] #get access to slow param tensor
144 |                     slow_p.add_(self.alpha, p.data - slow_p)  #(fast weights - slow weights) * alpha
145 |                     p.data.copy_(slow_p)  #copy interpolated weights to RAdam param tensor
146 | 
147 |         return loss


--------------------------------------------------------------------------------
/model/DABNet.py:
--------------------------------------------------------------------------------
  1 | ######################################################################################
  2 | #DABNet: Depth-wise Asymmetric Bottleneck for Real-time Semantic Segmentation
  3 | #Paper-Link: https://arxiv.org/pdf/1907.11357.pdf
  4 | ######################################################################################
  5 | 
  6 | 
  7 | 
  8 | import torch
  9 | import torch.nn as nn
 10 | import torch.nn.functional as F
 11 | from torchsummary import summary
 12 | 
 13 | __all__ = ["DABNet"]
 14 | 
 15 | 
 16 | class Conv(nn.Module):
 17 |     def __init__(self, nIn, nOut, kSize, stride, padding, dilation=(1, 1), groups=1, bn_acti=False, bias=False):
 18 |         super().__init__()
 19 | 
 20 |         self.bn_acti = bn_acti
 21 | 
 22 |         self.conv = nn.Conv2d(nIn, nOut, kernel_size=kSize,
 23 |                               stride=stride, padding=padding,
 24 |                               dilation=dilation, groups=groups, bias=bias)
 25 | 
 26 |         if self.bn_acti:
 27 |             self.bn_prelu = BNPReLU(nOut)
 28 | 
 29 |     def forward(self, input):
 30 |         output = self.conv(input)
 31 | 
 32 |         if self.bn_acti:
 33 |             output = self.bn_prelu(output)
 34 | 
 35 |         return output
 36 | 
 37 | 
 38 | class BNPReLU(nn.Module):
 39 |     def __init__(self, nIn):
 40 |         super().__init__()
 41 |         self.bn = nn.BatchNorm2d(nIn, eps=1e-3)
 42 |         self.acti = nn.PReLU(nIn)
 43 | 
 44 |     def forward(self, input):
 45 |         output = self.bn(input)
 46 |         output = self.acti(output)
 47 | 
 48 |         return output
 49 | 
 50 | 
 51 | class DABModule(nn.Module):
 52 |     def __init__(self, nIn, d=1, kSize=3, dkSize=3):
 53 |         super().__init__()
 54 | 
 55 |         self.bn_relu_1 = BNPReLU(nIn)
 56 |         self.conv3x3 = Conv(nIn, nIn // 2, kSize, 1, padding=1, bn_acti=True)
 57 | 
 58 |         self.dconv3x1 = Conv(nIn // 2, nIn // 2, (dkSize, 1), 1,
 59 |                              padding=(1, 0), groups=nIn // 2, bn_acti=True)
 60 |         self.dconv1x3 = Conv(nIn // 2, nIn // 2, (1, dkSize), 1,
 61 |                              padding=(0, 1), groups=nIn // 2, bn_acti=True)
 62 |         self.ddconv3x1 = Conv(nIn // 2, nIn // 2, (dkSize, 1), 1,
 63 |                               padding=(1 * d, 0), dilation=(d, 1), groups=nIn // 2, bn_acti=True)
 64 |         self.ddconv1x3 = Conv(nIn // 2, nIn // 2, (1, dkSize), 1,
 65 |                               padding=(0, 1 * d), dilation=(1, d), groups=nIn // 2, bn_acti=True)
 66 | 
 67 |         self.bn_relu_2 = BNPReLU(nIn // 2)
 68 |         self.conv1x1 = Conv(nIn // 2, nIn, 1, 1, padding=0, bn_acti=False)
 69 | 
 70 |     def forward(self, input):
 71 |         output = self.bn_relu_1(input)
 72 |         output = self.conv3x3(output)
 73 | 
 74 |         br1 = self.dconv3x1(output)
 75 |         br1 = self.dconv1x3(br1)
 76 |         br2 = self.ddconv3x1(output)
 77 |         br2 = self.ddconv1x3(br2)
 78 | 
 79 |         output = br1 + br2
 80 |         output = self.bn_relu_2(output)
 81 |         output = self.conv1x1(output)
 82 | 
 83 |         return output + input
 84 | 
 85 | 
 86 | class DownSamplingBlock(nn.Module):
 87 |     def __init__(self, nIn, nOut):
 88 |         super().__init__()
 89 |         self.nIn = nIn
 90 |         self.nOut = nOut
 91 | 
 92 |         if self.nIn < self.nOut:
 93 |             nConv = nOut - nIn
 94 |         else:
 95 |             nConv = nOut
 96 | 
 97 |         self.conv3x3 = Conv(nIn, nConv, kSize=3, stride=2, padding=1)
 98 |         self.max_pool = nn.MaxPool2d(2, stride=2)
 99 |         self.bn_prelu = BNPReLU(nOut)
100 | 
101 |     def forward(self, input):
102 |         output = self.conv3x3(input)
103 | 
104 |         if self.nIn < self.nOut:
105 |             max_pool = self.max_pool(input)
106 |             output = torch.cat([output, max_pool], 1)
107 | 
108 |         output = self.bn_prelu(output)
109 | 
110 |         return output
111 | 
112 | 
113 | class InputInjection(nn.Module):
114 |     def __init__(self, ratio):
115 |         super().__init__()
116 |         self.pool = nn.ModuleList()
117 |         for i in range(0, ratio):
118 |             self.pool.append(nn.AvgPool2d(3, stride=2, padding=1))
119 | 
120 |     def forward(self, input):
121 |         for pool in self.pool:
122 |             input = pool(input)
123 | 
124 |         return input
125 | 
126 | 
127 | class DABNet(nn.Module):
128 |     def __init__(self, classes=19, block_1=3, block_2=6):
129 |         super().__init__()
130 |         self.init_conv = nn.Sequential(
131 |             Conv(3, 32, 3, 2, padding=1, bn_acti=True),
132 |             Conv(32, 32, 3, 1, padding=1, bn_acti=True),
133 |             Conv(32, 32, 3, 1, padding=1, bn_acti=True),
134 |         )
135 | 
136 |         self.down_1 = InputInjection(1)  # down-sample the image 1 times
137 |         self.down_2 = InputInjection(2)  # down-sample the image 2 times
138 |         self.down_3 = InputInjection(3)  # down-sample the image 3 times
139 | 
140 |         self.bn_prelu_1 = BNPReLU(32 + 3)
141 | 
142 |         # DAB Block 1
143 |         self.downsample_1 = DownSamplingBlock(32 + 3, 64)
144 |         self.DAB_Block_1 = nn.Sequential()
145 |         for i in range(0, block_1):
146 |             self.DAB_Block_1.add_module("DAB_Module_1_" + str(i), DABModule(64, d=2))
147 |         self.bn_prelu_2 = BNPReLU(128 + 3)
148 | 
149 |         # DAB Block 2
150 |         dilation_block_2 = [4, 4, 8, 8, 16, 16]
151 |         self.downsample_2 = DownSamplingBlock(128 + 3, 128)
152 |         self.DAB_Block_2 = nn.Sequential()
153 |         for i in range(0, block_2):
154 |             self.DAB_Block_2.add_module("DAB_Module_2_" + str(i),
155 |                                         DABModule(128, d=dilation_block_2[i]))
156 |         self.bn_prelu_3 = BNPReLU(256 + 3)
157 | 
158 |         self.classifier = nn.Sequential(Conv(259, classes, 1, 1, padding=0))
159 | 
160 |     def forward(self, input):
161 | 
162 |         output0 = self.init_conv(input)
163 | 
164 |         down_1 = self.down_1(input)
165 |         down_2 = self.down_2(input)
166 |         down_3 = self.down_3(input)
167 | 
168 |         output0_cat = self.bn_prelu_1(torch.cat([output0, down_1], 1))
169 | 
170 |         # DAB Block 1
171 |         output1_0 = self.downsample_1(output0_cat)
172 |         output1 = self.DAB_Block_1(output1_0)
173 |         output1_cat = self.bn_prelu_2(torch.cat([output1, output1_0, down_2], 1))
174 | 
175 |         # DAB Block 2
176 |         output2_0 = self.downsample_2(output1_cat)
177 |         output2 = self.DAB_Block_2(output2_0)
178 |         output2_cat = self.bn_prelu_3(torch.cat([output2, output2_0, down_3], 1))
179 | 
180 |         out = self.classifier(output2_cat)
181 |         out = F.interpolate(out, input.size()[2:], mode='bilinear', align_corners=False)
182 | 
183 |         return out
184 | 
185 | """print layers and params of network"""
186 | if __name__ == '__main__':
187 |     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
188 |     model = DABNet(classes=19).to(device)
189 |     summary(model,(3,512,1024))
190 | 


--------------------------------------------------------------------------------
/model/ESNet.py:
--------------------------------------------------------------------------------
  1 | ###################################################################################################
  2 | #ESNet: An Efficient Symmetric Network for Real-time Semantic Segmentation
  3 | #Paper-Link: https://arxiv.org/pdf/1906.09826.pdf
  4 | ###################################################################################################
  5 | 
  6 | import torch
  7 | import torch.nn as nn
  8 | import torch.nn.init as init
  9 | import torch.nn.functional as F
 10 | from torchsummary import summary
 11 | 
 12 | class DownsamplerBlock(nn.Module):
 13 |     def __init__(self, ninput, noutput):
 14 |         super().__init__()
 15 | 
 16 |         self.conv = nn.Conv2d(ninput, noutput-ninput, (3, 3), stride=2, padding=1, bias=True)
 17 |         self.pool = nn.MaxPool2d(2, stride=2)
 18 |         self.bn = nn.BatchNorm2d(noutput, eps=1e-3)
 19 |         self.relu = nn.ReLU(inplace=True)
 20 | 
 21 |     def forward(self, input):
 22 |         x1 = self.pool(input)
 23 |         x2 = self.conv(input)
 24 | 
 25 |         diffY = x2.size()[2] - x1.size()[2]
 26 |         diffX = x2.size()[3] - x1.size()[3]
 27 | 
 28 |         x1 = F.pad(x1, [diffX // 2, diffX - diffX // 2,
 29 |                         diffY // 2, diffY - diffY // 2])
 30 | 
 31 |         output = torch.cat([x2, x1], 1)
 32 |         output = self.bn(output)
 33 |         output = self.relu(output)
 34 |         return output
 35 | 
 36 | class UpsamplerBlock (nn.Module):
 37 |     def __init__(self, ninput, noutput):
 38 |         super().__init__()
 39 | 
 40 |         self.conv = nn.ConvTranspose2d(ninput, noutput, 3, stride=2, padding=1, output_padding=1, bias=True)
 41 |         self.bn = nn.BatchNorm2d(noutput, eps=1e-3)
 42 | 
 43 |     def forward(self, input):
 44 | 
 45 |         output = self.conv(input)
 46 |         output = self.bn(output)
 47 | 
 48 |         return F.relu(output)
 49 | 		
 50 | class FCU(nn.Module):
 51 |     def __init__(self, chann, kernel_size,dropprob, dilated): 
 52 |         """
 53 |         Factorized Convolution Unit
 54 | 
 55 |         """     
 56 |         super(FCU,self).__init__()
 57 | 
 58 |         padding = int((kernel_size-1)//2) * dilated
 59 | 
 60 |         self.conv3x1_1 = nn.Conv2d(chann, chann, (kernel_size,1), stride=1, padding=(int((kernel_size-1)//2)*1,0), bias=True)
 61 | 
 62 |         self.conv1x3_1 = nn.Conv2d(chann, chann, (1,kernel_size), stride=1, padding=(0,int((kernel_size-1)//2)*1), bias=True)
 63 | 
 64 |         self.bn1 = nn.BatchNorm2d(chann, eps=1e-03)
 65 | 
 66 |         self.conv3x1_2 = nn.Conv2d(chann, chann, (kernel_size,1), stride=1, padding=(padding,0), bias=True, dilation = (dilated,1))
 67 | 
 68 |         self.conv1x3_2 = nn.Conv2d(chann, chann, (1,kernel_size), stride=1, padding=(0,padding), bias=True, dilation = (1, dilated))
 69 | 
 70 |         self.bn2 = nn.BatchNorm2d(chann, eps=1e-03)
 71 |         
 72 |         self.relu = nn.ReLU(inplace = True)
 73 |         self.dropout = nn.Dropout2d(dropprob)
 74 |         
 75 |     def forward(self, input):
 76 |         residual = input
 77 |         output = self.conv3x1_1(input)
 78 |         output = self.relu(output)
 79 |         output = self.conv1x3_1(output)
 80 |         output = self.bn1(output)
 81 |         output = self.relu(output)
 82 | 
 83 |         output = self.conv3x1_2(output)
 84 |         output = self.relu(output)
 85 |         output = self.conv1x3_2(output)
 86 |         output = self.bn2(output)   
 87 | 
 88 |         if (self.dropout.p != 0):
 89 |             output = self.dropout(output)
 90 |         
 91 |         return F.relu(residual+output,inplace=True) 
 92 | 
 93 | 
 94 | class PFCU(nn.Module):
 95 |     def __init__(self,chann):
 96 |         """
 97 |         Parallel Factorized Convolution Unit
 98 | 
 99 |         """         
100 |     
101 |         super(PFCU,self).__init__()
102 |         
103 |         self.conv3x1_1 = nn.Conv2d(chann, chann, (3,1), stride=1, padding=(1,0), bias=True)
104 | 
105 |         self.conv1x3_1 = nn.Conv2d(chann, chann, (1,3), stride=1, padding=(0,1), bias=True)
106 | 
107 |         self.bn1 = nn.BatchNorm2d(chann, eps=1e-03)
108 | 
109 |         self.conv3x1_22 = nn.Conv2d(chann, chann, (3,1), stride=1, padding=(2,0), bias=True, dilation = (2,1))
110 |         self.conv1x3_22 = nn.Conv2d(chann, chann, (1,3), stride=1, padding=(0,2), bias=True, dilation = (1,2))
111 | 
112 |         self.conv3x1_25 = nn.Conv2d(chann, chann, (3,1), stride=1, padding=(5,0), bias=True, dilation = (5,1))
113 |         self.conv1x3_25 = nn.Conv2d(chann, chann, (1,3), stride=1, padding=(0,5), bias=True, dilation = (1,5))
114 | 
115 |         self.conv3x1_29 = nn.Conv2d(chann, chann, (3,1), stride=1, padding=(9,0), bias=True, dilation = (9,1))
116 |         self.conv1x3_29 = nn.Conv2d(chann, chann, (1,3), stride=1, padding=(0,9), bias=True, dilation = (1,9))
117 | 
118 |         self.bn2 = nn.BatchNorm2d(chann, eps=1e-03)
119 | 
120 |         self.dropout = nn.Dropout2d(0.3)
121 | 
122 |     def forward(self, input):
123 |         residual = input
124 |         output = self.conv3x1_1(input)
125 |         output = F.relu(output)
126 |         output = self.conv1x3_1(output)
127 |         output = self.bn1(output)
128 |         output = F.relu(output)
129 | 
130 |         output2 = self.conv3x1_22(output)
131 |         output2 = F.relu(output2)
132 |         output2 = self.conv1x3_22(output2)
133 |         output2 = self.bn2(output2)
134 |         if (self.dropout.p != 0):
135 |             output2 = self.dropout(output2)
136 | 
137 |         output5 = self.conv3x1_25(output)
138 |         output5 = F.relu(output5)
139 |         output5 = self.conv1x3_25(output5)
140 |         output5 = self.bn2(output5)
141 |         if (self.dropout.p != 0):
142 |             output5 = self.dropout(output5)
143 | 
144 |         output9 = self.conv3x1_29(output)
145 |         output9 = F.relu(output9)
146 |         output9 = self.conv1x3_29(output9)
147 |         output9 = self.bn2(output9)
148 |         if (self.dropout.p != 0):
149 |             output9 = self.dropout(output9)
150 | 
151 |         return F.relu(residual+output2+output5+output9,inplace=True)
152 | 
153 | 		
154 | class ESNet(nn.Module):
155 |     def __init__(self, classes):
156 |         super().__init__()
157 |         #-----ESNET---------#
158 |         self.initial_block = DownsamplerBlock(3,16)
159 | 
160 |         self.layers = nn.ModuleList()
161 |         
162 |         for x in range(0, 3):
163 |            self.layers.append(FCU(16, 3, 0.03, 1))  
164 |         
165 |         self.layers.append(DownsamplerBlock(16,64))
166 | 
167 |         for x in range(0, 2):
168 |            self.layers.append(FCU(64, 5, 0.03, 1))  
169 | 
170 |         self.layers.append(DownsamplerBlock(64,128))
171 | 
172 |         for x in range(0, 3):   
173 |             self.layers.append(PFCU(chann=128)) 
174 | 
175 |         self.layers.append(UpsamplerBlock(128,64))
176 |         self.layers.append(FCU(64, 5, 0, 1))
177 |         self.layers.append(FCU(64, 5, 0, 1))
178 | 
179 |         self.layers.append(UpsamplerBlock(64,16))
180 |         self.layers.append(FCU(16, 3, 0, 1))
181 |         self.layers.append(FCU(16, 3, 0, 1))
182 | 
183 |         self.output_conv = nn.ConvTranspose2d( 16, classes, 2, stride=2, padding=0, output_padding=0, bias=True)
184 | 
185 |     def forward(self, input):
186 |         output = self.initial_block(input)
187 | 
188 |         for layer in self.layers:
189 |             output = layer(output)
190 | 
191 |         output = self.output_conv(output)
192 | 
193 |         return output
194 | 
195 | 
196 | """print layers and params of network"""
197 | if __name__ == '__main__':
198 |     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
199 |     model = ESNet(classes=11).to(device)
200 |     summary(model,(3,360,480))
201 | 


--------------------------------------------------------------------------------
/model/SegNet.py:
--------------------------------------------------------------------------------
  1 | ##################################################################################
  2 | #SegNet: A Deep Convolutional Encoder-Decoder Architecture for Image Segmentation
  3 | #Paper-Link: https://arxiv.org/pdf/1511.00561.pdf
  4 | ##################################################################################
  5 | 
  6 | 
  7 | import torch
  8 | import torch.nn as nn
  9 | import torch.nn.functional as F
 10 | from torchsummary import summary
 11 | 
 12 | 
 13 | 
 14 | __all__ = ["SegNet"]
 15 | 
 16 | class SegNet(nn.Module):
 17 |     def __init__(self,classes= 19):
 18 |         super(SegNet, self).__init__()
 19 | 
 20 |         batchNorm_momentum = 0.1
 21 | 
 22 |         self.conv11 = nn.Conv2d(3, 64, kernel_size=3, padding=1)
 23 |         self.bn11 = nn.BatchNorm2d(64, momentum= batchNorm_momentum)
 24 |         self.conv12 = nn.Conv2d(64, 64, kernel_size=3, padding=1)
 25 |         self.bn12 = nn.BatchNorm2d(64, momentum= batchNorm_momentum)
 26 | 
 27 |         self.conv21 = nn.Conv2d(64, 128, kernel_size=3, padding=1)
 28 |         self.bn21 = nn.BatchNorm2d(128, momentum= batchNorm_momentum)
 29 |         self.conv22 = nn.Conv2d(128, 128, kernel_size=3, padding=1)
 30 |         self.bn22 = nn.BatchNorm2d(128, momentum= batchNorm_momentum)
 31 | 
 32 |         self.conv31 = nn.Conv2d(128, 256, kernel_size=3, padding=1)
 33 |         self.bn31 = nn.BatchNorm2d(256, momentum= batchNorm_momentum)
 34 |         self.conv32 = nn.Conv2d(256, 256, kernel_size=3, padding=1)
 35 |         self.bn32 = nn.BatchNorm2d(256, momentum= batchNorm_momentum)
 36 |         self.conv33 = nn.Conv2d(256, 256, kernel_size=3, padding=1)
 37 |         self.bn33 = nn.BatchNorm2d(256, momentum= batchNorm_momentum)
 38 | 
 39 |         self.conv41 = nn.Conv2d(256, 512, kernel_size=3, padding=1)
 40 |         self.bn41 = nn.BatchNorm2d(512, momentum= batchNorm_momentum)
 41 |         self.conv42 = nn.Conv2d(512, 512, kernel_size=3, padding=1)
 42 |         self.bn42 = nn.BatchNorm2d(512, momentum= batchNorm_momentum)
 43 |         self.conv43 = nn.Conv2d(512, 512, kernel_size=3, padding=1)
 44 |         self.bn43 = nn.BatchNorm2d(512, momentum= batchNorm_momentum)
 45 | 
 46 |         self.conv51 = nn.Conv2d(512, 512, kernel_size=3, padding=1)
 47 |         self.bn51 = nn.BatchNorm2d(512, momentum= batchNorm_momentum)
 48 |         self.conv52 = nn.Conv2d(512, 512, kernel_size=3, padding=1)
 49 |         self.bn52 = nn.BatchNorm2d(512, momentum= batchNorm_momentum)
 50 |         self.conv53 = nn.Conv2d(512, 512, kernel_size=3, padding=1)
 51 |         self.bn53 = nn.BatchNorm2d(512, momentum= batchNorm_momentum)
 52 | 
 53 |         self.conv53d = nn.Conv2d(512, 512, kernel_size=3, padding=1)
 54 |         self.bn53d = nn.BatchNorm2d(512, momentum= batchNorm_momentum)
 55 |         self.conv52d = nn.Conv2d(512, 512, kernel_size=3, padding=1)
 56 |         self.bn52d = nn.BatchNorm2d(512, momentum= batchNorm_momentum)
 57 |         self.conv51d = nn.Conv2d(512, 512, kernel_size=3, padding=1)
 58 |         self.bn51d = nn.BatchNorm2d(512, momentum= batchNorm_momentum)
 59 | 
 60 |         self.conv43d = nn.Conv2d(512, 512, kernel_size=3, padding=1)
 61 |         self.bn43d = nn.BatchNorm2d(512, momentum= batchNorm_momentum)
 62 |         self.conv42d = nn.Conv2d(512, 512, kernel_size=3, padding=1)
 63 |         self.bn42d = nn.BatchNorm2d(512, momentum= batchNorm_momentum)
 64 |         self.conv41d = nn.Conv2d(512, 256, kernel_size=3, padding=1)
 65 |         self.bn41d = nn.BatchNorm2d(256, momentum= batchNorm_momentum)
 66 | 
 67 |         self.conv33d = nn.Conv2d(256, 256, kernel_size=3, padding=1)
 68 |         self.bn33d = nn.BatchNorm2d(256, momentum= batchNorm_momentum)
 69 |         self.conv32d = nn.Conv2d(256, 256, kernel_size=3, padding=1)
 70 |         self.bn32d = nn.BatchNorm2d(256, momentum= batchNorm_momentum)
 71 |         self.conv31d = nn.Conv2d(256,  128, kernel_size=3, padding=1)
 72 |         self.bn31d = nn.BatchNorm2d(128, momentum= batchNorm_momentum)
 73 | 
 74 |         self.conv22d = nn.Conv2d(128, 128, kernel_size=3, padding=1)
 75 |         self.bn22d = nn.BatchNorm2d(128, momentum= batchNorm_momentum)
 76 |         self.conv21d = nn.Conv2d(128, 64, kernel_size=3, padding=1)
 77 |         self.bn21d = nn.BatchNorm2d(64, momentum= batchNorm_momentum)
 78 | 
 79 |         self.conv12d = nn.Conv2d(64, 64, kernel_size=3, padding=1)
 80 |         self.bn12d = nn.BatchNorm2d(64, momentum= batchNorm_momentum)
 81 |         self.conv11d = nn.Conv2d(64, classes, kernel_size=3, padding=1)
 82 | 
 83 | 
 84 |     def forward(self, x):
 85 | 
 86 |         # Stage 1
 87 |         x11 = F.relu(self.bn11(self.conv11(x)))
 88 |         x12 = F.relu(self.bn12(self.conv12(x11)))
 89 |         x1_size = x12.size()
 90 |         x1p, id1 = F.max_pool2d(x12,kernel_size=2, stride=2,return_indices=True)
 91 | 
 92 |         # Stage 2
 93 |         x21 = F.relu(self.bn21(self.conv21(x1p)))
 94 |         x22 = F.relu(self.bn22(self.conv22(x21)))
 95 |         x2_size = x22.size()
 96 |         x2p, id2 = F.max_pool2d(x22,kernel_size=2, stride=2,return_indices=True)
 97 | 
 98 |         # Stage 3
 99 |         x31 = F.relu(self.bn31(self.conv31(x2p)))
100 |         x32 = F.relu(self.bn32(self.conv32(x31)))
101 |         x33 = F.relu(self.bn33(self.conv33(x32)))
102 |         x3_size = x33.size()
103 |         x3p, id3 = F.max_pool2d(x33,kernel_size=2, stride=2,return_indices=True)
104 | 
105 |         # Stage 4
106 |         x41 = F.relu(self.bn41(self.conv41(x3p)))
107 |         x42 = F.relu(self.bn42(self.conv42(x41)))
108 |         x43 = F.relu(self.bn43(self.conv43(x42)))
109 |         x4_size = x43.size()
110 |         x4p, id4 = F.max_pool2d(x43,kernel_size=2, stride=2,return_indices=True)
111 | 
112 |         # Stage 5
113 |         x51 = F.relu(self.bn51(self.conv51(x4p)))
114 |         x52 = F.relu(self.bn52(self.conv52(x51)))
115 |         x53 = F.relu(self.bn53(self.conv53(x52)))
116 |         x5_size = x53.size()
117 |         x5p, id5 = F.max_pool2d(x53,kernel_size=2, stride=2,return_indices=True)
118 | 
119 | 
120 |         # Stage 5d
121 |         x5d = F.max_unpool2d(x5p, id5, kernel_size=2, stride=2, output_size=x5_size)
122 |         x53d = F.relu(self.bn53d(self.conv53d(x5d)))
123 |         x52d = F.relu(self.bn52d(self.conv52d(x53d)))
124 |         x51d = F.relu(self.bn51d(self.conv51d(x52d)))
125 | 
126 |         # Stage 4d
127 |         x4d = F.max_unpool2d(x51d, id4, kernel_size=2, stride=2, output_size=x4_size)
128 |         x43d = F.relu(self.bn43d(self.conv43d(x4d)))
129 |         x42d = F.relu(self.bn42d(self.conv42d(x43d)))
130 |         x41d = F.relu(self.bn41d(self.conv41d(x42d)))
131 | 
132 |         # Stage 3d
133 |         x3d = F.max_unpool2d(x41d, id3, kernel_size=2, stride=2, output_size=x3_size)
134 |         x33d = F.relu(self.bn33d(self.conv33d(x3d)))
135 |         x32d = F.relu(self.bn32d(self.conv32d(x33d)))
136 |         x31d = F.relu(self.bn31d(self.conv31d(x32d)))
137 | 
138 |         # Stage 2d
139 |         x2d = F.max_unpool2d(x31d, id2, kernel_size=2, stride=2, output_size=x2_size)
140 |         x22d = F.relu(self.bn22d(self.conv22d(x2d)))
141 |         x21d = F.relu(self.bn21d(self.conv21d(x22d)))
142 | 
143 |         # Stage 1d
144 |         x1d = F.max_unpool2d(x21d, id1, kernel_size=2, stride=2, output_size=x1_size)
145 |         x12d = F.relu(self.bn12d(self.conv12d(x1d)))
146 |         x11d = self.conv11d(x12d)
147 | 
148 |         return x11d
149 | 
150 |     def load_from_segnet(self, model_path):
151 |         s_dict = self.state_dict()# create a copy of the state dict
152 |         th = torch.load(model_path).state_dict() # load the weigths
153 |         # for name in th:
154 |             # s_dict[corresp_name[name]] = th[name]
155 |         self.load_state_dict(th)
156 | 
157 | 
158 | 
159 | """print layers and params of network"""
160 | if __name__ == '__main__':
161 |     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
162 |     model = SegNet(classes=19).to(device)
163 |     summary(model,(3,512,1024))


--------------------------------------------------------------------------------
/test.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import time
  3 | import torch
  4 | import numpy as np
  5 | import torch.backends.cudnn as cudnn
  6 | from argparse import ArgumentParser
  7 | # user
  8 | from builders.model_builder import build_model
  9 | from builders.dataset_builder import build_dataset_test
 10 | from utils.utils import save_predict
 11 | from utils.metric.metric import get_iou
 12 | from utils.convert_state import convert_state_dict
 13 | 
 14 | 
 15 | def parse_args():
 16 |     parser = ArgumentParser(description='Efficient semantic segmentation')
 17 |     parser.add_argument('--model', default="ENet", help="model name: (default ENet)")
 18 |     parser.add_argument('--dataset', default="camvid", help="dataset: cityscapes or camvid")
 19 |     parser.add_argument('--num_workers', type=int, default=1, help="the number of parallel threads")
 20 |     parser.add_argument('--batch_size', type=int, default=1,
 21 |                         help=" the batch_size is set to 1 when evaluating or testing")
 22 |     parser.add_argument('--checkpoint', type=str,default="",
 23 |                         help="use the file to load the checkpoint for evaluating or testing ")
 24 |     parser.add_argument('--save_seg_dir', type=str, default="./result/",
 25 |                         help="saving path of prediction result")
 26 |     parser.add_argument('--best', action='store_true', help="Get the best result among last few checkpoints")
 27 |     parser.add_argument('--save', action='store_true', help="Save the predicted image")
 28 |     parser.add_argument('--cuda', default=True, help="run on CPU or GPU")
 29 |     parser.add_argument("--gpus", default="0", type=str, help="gpu ids (default: 0)")
 30 |     args = parser.parse_args()
 31 | 
 32 |     return args
 33 | 
 34 | 
 35 | 
 36 | 
 37 | def test(args, test_loader, model):
 38 |     """
 39 |     args:
 40 |       test_loader: loaded for test dataset
 41 |       model: model
 42 |     return: class IoU and mean IoU
 43 |     """
 44 |     # evaluation or test mode
 45 |     model.eval()
 46 |     total_batches = len(test_loader)
 47 | 
 48 |     data_list = []
 49 |     for i, (input, label, size, name) in enumerate(test_loader):
 50 |         with torch.no_grad():
 51 |             input_var = input.cuda()
 52 |         start_time = time.time()
 53 |         output = model(input_var)
 54 |         torch.cuda.synchronize()
 55 |         time_taken = time.time() - start_time
 56 |         print('[%d/%d]  time: %.2f' % (i + 1, total_batches, time_taken))
 57 |         output = output.cpu().data[0].numpy()
 58 |         gt = np.asarray(label[0].numpy(), dtype=np.uint8)
 59 |         output = output.transpose(1, 2, 0)
 60 |         output = np.asarray(np.argmax(output, axis=2), dtype=np.uint8)
 61 |         data_list.append([gt.flatten(), output.flatten()])
 62 | 
 63 |         # save the predicted image
 64 |         if args.save:
 65 |             save_predict(output, gt, name[0], args.dataset, args.save_seg_dir,
 66 |                          output_grey=False, output_color=True, gt_color=True)
 67 | 
 68 |     meanIoU, per_class_iu = get_iou(data_list, args.classes)
 69 |     return meanIoU, per_class_iu
 70 | 
 71 | 
 72 | def test_model(args):
 73 |     """
 74 |      main function for testing
 75 |      param args: global arguments
 76 |      return: None
 77 |     """
 78 |     print(args)
 79 | 
 80 |     if args.cuda:
 81 |         print("=====> use gpu id: '{}'".format(args.gpus))
 82 |         os.environ["CUDA_VISIBLE_DEVICES"] = args.gpus
 83 |         if not torch.cuda.is_available():
 84 |             raise Exception("no GPU found or wrong gpu id, please run without --cuda")
 85 | 
 86 |     # build the model
 87 |     model = build_model(args.model, num_classes=args.classes)
 88 | 
 89 |     if args.cuda:
 90 |         model = model.cuda()  # using GPU for inference
 91 |         cudnn.benchmark = True
 92 | 
 93 |     if args.save:
 94 |         if not os.path.exists(args.save_seg_dir):
 95 |             os.makedirs(args.save_seg_dir)
 96 | 
 97 |     # load the test set
 98 |     datas, testLoader = build_dataset_test(args.dataset, args.num_workers)
 99 | 
100 |     if not args.best:
101 |         if args.checkpoint:
102 |             if os.path.isfile(args.checkpoint):
103 |                 print("=====> loading checkpoint '{}'".format(args.checkpoint))
104 |                 checkpoint = torch.load(args.checkpoint)
105 |                 model.load_state_dict(checkpoint['model'])
106 |                 # model.load_state_dict(convert_state_dict(checkpoint['model']))
107 |             else:
108 |                 print("=====> no checkpoint found at '{}'".format(args.checkpoint))
109 |                 raise FileNotFoundError("no checkpoint found at '{}'".format(args.checkpoint))
110 | 
111 |         print("=====> beginning validation")
112 |         print("validation set length: ", len(testLoader))
113 |         mIOU_val, per_class_iu = test(args, testLoader, model)
114 |         print(mIOU_val)
115 |         print(per_class_iu)
116 | 
117 |     # Get the best test result among the last 10 model records.
118 |     else:
119 |         if args.checkpoint:
120 |             if os.path.isfile(args.checkpoint):
121 |                 dirname, basename = os.path.split(args.checkpoint)
122 |                 epoch = int(os.path.splitext(basename)[0].split('_')[1])
123 |                 mIOU_val = []
124 |                 per_class_iu = []
125 |                 for i in range(epoch - 9, epoch + 1):
126 |                     basename = 'model_' + str(i) + '.pth'
127 |                     resume = os.path.join(dirname, basename)
128 |                     checkpoint = torch.load(resume)
129 |                     model.load_state_dict(checkpoint['model'])
130 |                     print("=====> beginning test the " + basename)
131 |                     print("validation set length: ", len(testLoader))
132 |                     mIOU_val_0, per_class_iu_0 = test(args, testLoader, model)
133 |                     mIOU_val.append(mIOU_val_0)
134 |                     per_class_iu.append(per_class_iu_0)
135 | 
136 |                 index = list(range(epoch - 9, epoch + 1))[np.argmax(mIOU_val)]
137 |                 print("The best mIoU among the last 10 models is", index)
138 |                 print(mIOU_val)
139 |                 per_class_iu = per_class_iu[np.argmax(mIOU_val)]
140 |                 mIOU_val = np.max(mIOU_val)
141 |                 print(mIOU_val)
142 |                 print(per_class_iu)
143 | 
144 |             else:
145 |                 print("=====> no checkpoint found at '{}'".format(args.checkpoint))
146 |                 raise FileNotFoundError("no checkpoint found at '{}'".format(args.checkpoint))
147 | 
148 |     # Save the result
149 |     if not args.best:
150 |         model_path = os.path.splitext(os.path.basename(args.checkpoint))
151 |         args.logFile = 'test_' + model_path[0] + '.txt'
152 |         logFileLoc = os.path.join(os.path.dirname(args.checkpoint), args.logFile)
153 |     else:
154 |         args.logFile = 'test_' + 'best' + str(index) + '.txt'
155 |         logFileLoc = os.path.join(os.path.dirname(args.checkpoint), args.logFile)
156 | 
157 |     # Save the result
158 |     if os.path.isfile(logFileLoc):
159 |         logger = open(logFileLoc, 'a')
160 |     else:
161 |         logger = open(logFileLoc, 'w')
162 |         logger.write("Mean IoU: %.4f" % mIOU_val)
163 |         logger.write("\nPer class IoU: ")
164 |         for i in range(len(per_class_iu)):
165 |             logger.write("%.4f\t" % per_class_iu[i])
166 |     logger.flush()
167 |     logger.close()
168 | 
169 | 
170 | if __name__ == '__main__':
171 | 
172 |     args = parse_args()
173 | 
174 |     args.save_seg_dir = os.path.join(args.save_seg_dir, args.dataset, args.model)
175 | 
176 |     if args.dataset == 'cityscapes':
177 |         args.classes = 19
178 |     elif args.dataset == 'camvid':
179 |         args.classes = 11
180 |     else:
181 |         raise NotImplementedError(
182 |             "This repository now supports two datasets: cityscapes and camvid, %s is not included" % args.dataset)
183 | 
184 |     test_model(args)
185 | 


--------------------------------------------------------------------------------
/model/SQNet.py:
--------------------------------------------------------------------------------
  1 | ###################################################################
  2 | # SQNet: Speeding up semantic segmentation for autonomous driving
  3 | #Paper-Link:   https://openreview.net/pdf?id=S1uHiFyyg
  4 | ###################################################################
  5 | 
  6 | import torch
  7 | import torch.nn as nn
  8 | from torch.autograd import Variable
  9 | import torch.nn.functional as F
 10 | import numpy as np
 11 | import torch.optim as optim
 12 | import math
 13 | from torchsummary import summary
 14 | 
 15 | 
 16 | 
 17 | __all__ = ["SQNet"]
 18 | 
 19 | class Fire(nn.Module):
 20 |     def __init__(self, inplanes, squeeze_planes, expand_planes):
 21 |         super(Fire, self).__init__()
 22 |         self.conv1 = nn.Conv2d(inplanes, squeeze_planes, kernel_size=1, stride=1)
 23 |         # self.bn1 = nn.BatchNorm2d(squeeze_planes)
 24 |         self.relu1 = nn.ELU(inplace=True)
 25 |         self.conv2 = nn.Conv2d(squeeze_planes, expand_planes, kernel_size=1, stride=1)
 26 |         # self.bn2 = nn.BatchNorm2d(expand_planes)
 27 |         self.conv3 = nn.Conv2d(squeeze_planes, expand_planes, kernel_size=3, stride=1, padding=1)
 28 |         # self.bn3 = nn.BatchNorm2d(expand_planes)
 29 |         self.relu2 = nn.ELU(inplace=True)
 30 | 
 31 |         # using MSR initilization
 32 |         for m in self.modules():
 33 |             if isinstance(m, nn.Conv2d):
 34 |                 n = m.kernel_size[0] * m.kernel_size[1] * m.in_channels
 35 |                 m.weight.data.normal_(0, math.sqrt(2./n))
 36 | 
 37 |     def forward(self, x):
 38 |         x = self.conv1(x)
 39 |         # x = self.bn1(x)
 40 |         x = self.relu1(x)
 41 |         out1 = self.conv2(x)
 42 |         # out1 = self.bn2(out1)
 43 |         out2 = self.conv3(x)
 44 |         # out2 = self.bn3(out2)
 45 |         out = torch.cat([out1, out2], 1)
 46 |         out = self.relu2(out)
 47 |         return out
 48 | 
 49 | 
 50 | class ParallelDilatedConv(nn.Module):
 51 |     def __init__(self, inplanes, planes):
 52 |         super(ParallelDilatedConv, self).__init__()
 53 |         self.dilated_conv_1 = nn.Conv2d(inplanes, planes, kernel_size=3, stride=1, padding=1, dilation=1) 
 54 |         self.dilated_conv_2 = nn.Conv2d(inplanes, planes, kernel_size=3, stride=1, padding=2, dilation=2)
 55 |         self.dilated_conv_3 = nn.Conv2d(inplanes, planes, kernel_size=3, stride=1, padding=3, dilation=3)
 56 |         self.dilated_conv_4 = nn.Conv2d(inplanes, planes, kernel_size=3, stride=1, padding=4, dilation=4)
 57 |         self.relu1 = nn.ELU(inplace=True)
 58 |         self.relu2 = nn.ELU(inplace=True)
 59 |         self.relu3 = nn.ELU(inplace=True)
 60 |         self.relu4 = nn.ELU(inplace=True)
 61 | 
 62 |     def forward(self, x):
 63 |         out1 = self.dilated_conv_1(x)
 64 |         out2 = self.dilated_conv_2(x)
 65 |         out3 = self.dilated_conv_3(x)
 66 |         out4 = self.dilated_conv_4(x)
 67 |         out1 = self.relu1(out1)
 68 |         out2 = self.relu2(out2)
 69 |         out3 = self.relu3(out3)
 70 |         out4 = self.relu4(out4)
 71 |         out = out1 + out2 + out3 + out4
 72 |         return out
 73 | 
 74 | class SQNet(nn.Module):
 75 |     def __init__(self, classes):
 76 |         super().__init__()
 77 | 
 78 |         self.num_classes = classes
 79 | 
 80 |         self.conv1 = nn.Conv2d(3, 96, kernel_size=3, stride=2, padding=1) # 32
 81 |         # self.bn1 = nn.BatchNorm2d(96)
 82 |         self.relu1 = nn.ELU(inplace=True)
 83 |         self.maxpool1 = nn.MaxPool2d(kernel_size=2, stride=2) # 16
 84 |         self.fire1_1 = Fire(96, 16, 64)
 85 |         self.fire1_2 = Fire(128, 16, 64)
 86 |         self.maxpool2 = nn.MaxPool2d(kernel_size=2, stride=2) # 8
 87 |         self.fire2_1 = Fire(128, 32, 128)
 88 |         self.fire2_2 = Fire(256, 32, 128)
 89 |         self.maxpool3 = nn.MaxPool2d(kernel_size=2, stride=2) # 4
 90 |         self.fire3_1 = Fire(256, 64, 256)
 91 |         self.fire3_2 = Fire(512, 64, 256)
 92 |         self.fire3_3 = Fire(512, 64, 256)
 93 |         self.parallel = ParallelDilatedConv(512, 512)
 94 |         self.deconv1 = nn.ConvTranspose2d(512, 256, 3, stride=2, padding=1, output_padding=1)
 95 |         # self.bn2 = nn.BatchNorm2d(256)
 96 |         self.relu2 = nn.ELU(inplace=True)
 97 |         self.deconv2 = nn.ConvTranspose2d(512, 128, 3, stride=2, padding=1, output_padding=1)
 98 |         # self.bn3 = nn.BatchNorm2d(128)
 99 |         self.relu3 = nn.ELU(inplace=True)
100 |         self.deconv3 = nn.ConvTranspose2d(256, 96, 3, stride=2, padding=1, output_padding=1)
101 |         # self.bn4 = nn.BatchNorm2d(96)
102 |         self.relu4 = nn.ELU(inplace=True)
103 |         self.deconv4 = nn.ConvTranspose2d(192, self.num_classes, 3, stride=2, padding=1, output_padding=1)
104 | 
105 |         self.conv3_1 = nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1) # 32
106 |         self.conv3_2 = nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1) # 32
107 |         self.conv2_1 = nn.Conv2d(128, 128, kernel_size=3, stride=1, padding=1) # 32
108 |         self.conv2_2 = nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1) # 32
109 |         self.conv1_1 = nn.Conv2d(96, 96, kernel_size=3, stride=1, padding=1) # 32
110 |         self.conv1_2 = nn.Conv2d(192, 192, kernel_size=3, stride=1, padding=1) # 32
111 | 
112 |         self.relu1_1 = nn.ELU(inplace=True)
113 |         self.relu1_2 = nn.ELU(inplace=True)
114 |         self.relu2_1 = nn.ELU(inplace=True)
115 |         self.relu2_2 = nn.ELU(inplace=True)
116 |         self.relu3_1 = nn.ELU(inplace=True)
117 |         self.relu3_2 = nn.ELU(inplace=True)
118 | 
119 |         for m in self.modules():
120 |             if isinstance(m, nn.Conv2d):
121 |                 n = m.kernel_size[0] * m.kernel_size[1] * m.in_channels
122 |                 m.weight.data.normal_(0, math.sqrt(2. / n))
123 |             elif isinstance(m, nn.BatchNorm2d):
124 |                 m.weight.data.fill_(1)
125 |                 m.bias.data.zero_()
126 | 
127 |     def forward(self, x):
128 |         x = self.conv1(x)
129 |         # x = self.bn1(x)
130 |         x_1 = self.relu1(x)
131 |         # print "x_1: %s" % str(x_1.size())
132 |         x = self.maxpool1(x_1)
133 |         x = self.fire1_1(x)
134 |         x_2 = self.fire1_2(x)
135 |         # print "x_2: %s" % str(x_2.size())
136 |         x = self.maxpool2(x_2)
137 |         x = self.fire2_1(x)
138 |         x_3 = self.fire2_2(x)
139 |         # print "x_3: %s" % str(x_3.size())
140 |         x = self.maxpool3(x_3)
141 |         x = self.fire3_1(x)
142 |         x = self.fire3_2(x)
143 |         x = self.fire3_3(x)
144 |         x = self.parallel(x)
145 |         # print "x: %s" % str(x.size())
146 |         y_3 = self.deconv1(x)
147 |         y_3 = self.relu2(y_3)
148 |         x_3 = self.conv3_1(x_3)
149 |         x_3 = self.relu3_1(x_3)
150 |         # print "y_3: %s" % str(y_3.size())
151 |         # x = x.transpose(1, 2, 0)
152 |         # print('x_3.size():', x_3.size())
153 |         # print('y_3.size():', y_3.size())
154 |         x_3 = F.interpolate(x_3, y_3.size()[2:], mode="bilinear", align_corners=True)
155 |         x = torch.cat([x_3, y_3], 1)
156 |         x = self.conv3_2(x)
157 |         x = self.relu3_2(x)
158 |         # concat x_3
159 |         y_2 = self.deconv2(x)
160 |         y_2 = self.relu3(y_2)
161 |         x_2 = self.conv2_1(x_2)
162 |         x_2 = self.relu2_1(x_2)
163 |         # print "y_2: %s" % str(y_2.size())
164 |         # concat x_2
165 |         # print('x_2.size():', x_2.size())
166 |         # print('y_2.size():', y_2.size())
167 |         y_2 = F.interpolate(y_2, x_2.size()[2:], mode="bilinear", align_corners=True)
168 |         x = torch.cat([x_2, y_2], 1)
169 |         x = self.conv2_2(x)
170 |         x = self.relu2_2(x)
171 |         y_1 = self.deconv3(x)
172 |         y_1 = self.relu4(y_1)
173 |         x_1 = self.conv1_1(x_1)
174 |         x_1 = self.relu1_1(x_1)
175 |         # print "y_1: %s" % str(y_1.size())
176 |         # concat x_1
177 |         x = torch.cat([x_1, y_1], 1)
178 |         x = self.conv1_2(x)
179 |         x = self.relu1_2(x)
180 |         x = self.deconv4(x)
181 |         return x #, x_1, x_2, x_3, y_1, y_2, y_3
182 | 
183 | 
184 | 
185 | """print layers and params of network"""
186 | if __name__ == '__main__':
187 |     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
188 |     model = SQNet(classes=19).to(device)
189 |     summary(model,(3,512,1024))


--------------------------------------------------------------------------------
/model/ContextNet.py:
--------------------------------------------------------------------------------
  1 | ##################################################################################
  2 | #ContextNet: Exploring Context and Detail for Semantic Segmentation in Real-time
  3 | #Paper-Link: https://arxiv.org/abs/1805.04554
  4 | ##################################################################################
  5 | 
  6 | 
  7 | import torch
  8 | import torch.nn as nn
  9 | import torch.nn.functional as F
 10 | from torchsummary import summary
 11 | 
 12 | 
 13 | 
 14 | __all__ = ["ContextNet"]
 15 | 
 16 | class Custom_Conv(nn.Module):
 17 |     def __init__(self, in_channels, out_channels, kernel_size=3, stride=1, padding=0, **kwargs):
 18 |         super(Custom_Conv, self).__init__()
 19 |         self.conv = nn.Sequential(
 20 |             nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding, bias=False),
 21 |             nn.BatchNorm2d(out_channels),
 22 |             nn.ReLU(True)
 23 |         )
 24 | 
 25 |     def forward(self, x):
 26 |         return self.conv(x)
 27 | 
 28 | class DepthSepConv(nn.Module):
 29 |     def __init__(self, dw_channels, out_channels, stride=1, **kwargs):
 30 |         super(DepthSepConv, self).__init__()
 31 |         self.conv = nn.Sequential(
 32 |             nn.Conv2d(dw_channels, dw_channels, 3, stride, 1, groups=dw_channels, bias=False),
 33 |             nn.BatchNorm2d(dw_channels),
 34 |             nn.ReLU(True),
 35 |             nn.Conv2d(dw_channels, out_channels, 1, bias=False),
 36 |             nn.BatchNorm2d(out_channels),
 37 |             nn.ReLU(True)
 38 |         )
 39 | 
 40 |     def forward(self, x):
 41 |         return self.conv(x)
 42 | 
 43 | class DepthConv(nn.Module):
 44 |     def __init__(self, dw_channels, out_channels, stride=1, **kwargs):
 45 |         super(DepthConv, self).__init__()
 46 |         self.conv = nn.Sequential(
 47 |             nn.Conv2d(dw_channels, out_channels, 3, stride, 1, groups=dw_channels, bias=False),
 48 |             nn.BatchNorm2d(out_channels),
 49 |             nn.ReLU(True)
 50 |         )
 51 | 
 52 |     def forward(self, x):
 53 |         return self.conv(x)
 54 | 
 55 | class LinearBottleneck(nn.Module):
 56 |     def __init__(self, in_channels, out_channels, t=6, stride=2, **kwargs):
 57 |         super(LinearBottleneck, self).__init__()
 58 |         self.use_shortcut = stride == 1 and in_channels == out_channels
 59 |         self.block = nn.Sequential(
 60 |             Custom_Conv(in_channels, in_channels * t, 1),
 61 |             DepthConv(in_channels * t, in_channels * t, stride),
 62 |             nn.Conv2d(in_channels * t, out_channels, 1, bias=False),
 63 |             nn.BatchNorm2d(out_channels)
 64 |         )
 65 | 
 66 |     def forward(self, x):
 67 |         out = self.block(x)
 68 |         if self.use_shortcut:
 69 |             out = x + out
 70 |         return out
 71 | 
 72 | 
 73 | 
 74 |     
 75 | class Shallow_net(nn.Module):
 76 |     def __init__(self, dw_channels1=32, dw_channels2=64, out_channels=128, **kwargs):
 77 |         super(Shallow_net, self).__init__()
 78 |         self.conv = Custom_Conv(3, dw_channels1, 3, 2)
 79 |         self.dsconv1 = DepthSepConv(dw_channels1, dw_channels2, 2)
 80 |         self.dsconv2 = DepthSepConv(dw_channels2, out_channels, 2)
 81 |         self.dsconv3 = DepthSepConv(out_channels, out_channels, 1)
 82 | 
 83 | 
 84 |     def forward(self, x):
 85 |         x = self.conv(x)
 86 |         x = self.dsconv1(x)
 87 |         x = self.dsconv2(x)
 88 |         x = self.dsconv3(x)
 89 |         return x
 90 | 
 91 | class Deep_net(nn.Module):
 92 |     def __init__(self, in_channels, block_channels,
 93 |                  t, num_blocks, **kwargs):
 94 |         super(Deep_net, self).__init__()
 95 |         self.block_channels = block_channels
 96 |         self.t = t
 97 |         self.num_blocks = num_blocks
 98 | 
 99 |         self.conv_ = Custom_Conv(3, in_channels, 3, 2)
100 |         self.bottleneck1 = self._layer(LinearBottleneck, in_channels, block_channels[0], num_blocks[0], t[0], 1)
101 |         self.bottleneck2 = self._layer(LinearBottleneck, block_channels[0], block_channels[1], num_blocks[1], t[1], 1)
102 |         self.bottleneck3 = self._layer(LinearBottleneck, block_channels[1], block_channels[2], num_blocks[2], t[2], 2)
103 |         self.bottleneck4 = self._layer(LinearBottleneck, block_channels[2], block_channels[3], num_blocks[3], t[3], 2)
104 |         self.bottleneck5 = self._layer(LinearBottleneck, block_channels[3], block_channels[4], num_blocks[4], t[4], 1)
105 |         self.bottleneck6 = self._layer(LinearBottleneck, block_channels[4], block_channels[5], num_blocks[5], t[5], 1)
106 | 
107 |     def _layer(self, block, in_channels, out_channels, blocks, t, stride):
108 |         layers = []
109 |         layers.append(block(in_channels, out_channels, t, stride))
110 |         for i in range(1, blocks):
111 |             layers.append(block(out_channels, out_channels, t, 1))
112 | 
113 |         return nn.Sequential(*layers)
114 | 
115 |     def forward(self, x):
116 |         x = self.conv_(x)
117 |         x = self.bottleneck1(x)
118 |         x = self.bottleneck2(x)
119 |         x = self.bottleneck3(x)
120 |         x = self.bottleneck4(x)
121 |         x = self.bottleneck5(x)
122 |         x = self.bottleneck6(x)
123 |         return x
124 | 
125 | class FeatureFusionModule(nn.Module):
126 |     def __init__(self, highter_in_channels, lower_in_channels, out_channels, scale_factor=4, **kwargs):
127 |         super(FeatureFusionModule, self).__init__()
128 |         self.scale_factor = scale_factor
129 |         self.dwconv = DepthConv(lower_in_channels, out_channels, 1)
130 |         self.conv_lower_res = nn.Sequential(
131 |             nn.Conv2d(out_channels, out_channels, 1),
132 |             nn.BatchNorm2d(out_channels)
133 |         )
134 |         self.conv_higher_res = nn.Sequential(
135 |             nn.Conv2d(highter_in_channels, out_channels, 1),
136 |             nn.BatchNorm2d(out_channels)
137 |         )
138 |         self.relu = nn.ReLU(True)
139 | 
140 |     def forward(self, higher_res_feature, lower_res_feature):
141 |         _, _, h, w = higher_res_feature.size()
142 |         lower_res_feature = F.interpolate(lower_res_feature, size=(h,w), mode='bilinear', align_corners=True)
143 |         lower_res_feature = self.dwconv(lower_res_feature)
144 |         lower_res_feature = self.conv_lower_res(lower_res_feature)
145 | 
146 |         higher_res_feature = self.conv_higher_res(higher_res_feature)
147 |         out = higher_res_feature + lower_res_feature
148 |         return self.relu(out)
149 | 
150 | class Classifer(nn.Module):
151 |     def __init__(self, dw_channels, num_classes, stride=1, **kwargs):
152 |         super(Classifer, self).__init__()
153 |         self.dsconv1 = DepthSepConv(dw_channels, dw_channels, stride)
154 |         self.dsconv2 = DepthSepConv(dw_channels, dw_channels, stride)
155 |         self.conv = nn.Sequential(
156 |             nn.Dropout(0.1),
157 |             nn.Conv2d(dw_channels, num_classes, 1)
158 |         )
159 | 
160 |     def forward(self, x):
161 |         x = self.dsconv1(x)
162 |         x = self.dsconv2(x)
163 |         x = self.conv(x)
164 |         return x
165 | 
166 | 
167 | 
168 | class ContextNet(nn.Module):
169 |     def __init__(self, classes, aux=False, **kwargs):
170 |         super(ContextNet, self).__init__()
171 |         self.aux = aux
172 |         self.spatial_detail = Shallow_net(32, 64, 128)
173 |         self.context_feature_extractor = Deep_net(32, [32, 32, 48, 64, 96, 128], [1, 6, 6, 6, 6, 6], [1, 1, 3, 3, 2, 2])
174 |         self.feature_fusion = FeatureFusionModule(128, 128, 128)
175 |         self.classifier = Classifer(128, classes)
176 |         if self.aux:
177 |             self.auxlayer = nn.Sequential(
178 |                 nn.Conv2d(128, 32, 3, padding=1, bias=False),
179 |                 nn.BatchNorm2d(32),
180 |                 nn.ReLU(True),
181 |                 nn.Dropout(0.1),
182 |                 nn.Conv2d(32, classes, 1)
183 |             )
184 | 
185 |     def forward(self, x):
186 |         size = x.size()[2:]
187 | 
188 |         higher_res_features = self.spatial_detail(x)
189 | 
190 |         x_low = F.interpolate(x, scale_factor = 0.25, mode='bilinear', align_corners=True)
191 | 
192 |         x = self.context_feature_extractor(x_low)
193 | 
194 |         x = self.feature_fusion(higher_res_features, x)
195 | 
196 |         x = self.classifier(x)
197 | 
198 |         outputs = []
199 |         x = F.interpolate(x, size, mode='bilinear', align_corners=True)
200 | 
201 |         outputs.append(x)
202 |         if self.aux:
203 |             auxout = self.auxlayer(higher_res_features)
204 |             auxout = F.interpolate(auxout, size, mode='bilinear', align_corners=True)
205 |             outputs.append(auxout)
206 | 
207 |         return x
208 | 
209 |         # return tuple(outputs)
210 | 
211 | 
212 | 
213 | """print layers and params of network"""
214 | if __name__ == '__main__':
215 |     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
216 |     model = ContextNet(classes=19).to(device)
217 |     summary(model,(3,512,1024))
218 | 


--------------------------------------------------------------------------------
/tools/flops_counter/README.md:
--------------------------------------------------------------------------------
  1 | # Flops counter for convolutional networks in pytorch framework
  2 | [![Pypi version](https://img.shields.io/pypi/v/ptflops.svg)](https://pypi.org/project/ptflops/)
  3 | 
  4 | This script is designed to compute the theoretical amount of multiply-add operations
  5 | in convolutional neural networks. It also can compute the number of parameters and
  6 | print per-layer computational cost of a given network.
  7 | 
  8 | Supported layers:
  9 | - Conv1d/2d/3d (including grouping)
 10 | - ConvTranspose2d (including grouping)
 11 | - BatchNorm1d/2d/3d
 12 | - Activations (ReLU, PReLU, ELU, ReLU6, LeakyReLU)
 13 | - Linear
 14 | - Upsample
 15 | - Poolings (AvgPool1d/2d/3d, MaxPool1d/2d/3d and adaptive ones)
 16 | 
 17 | Requirements: Pytorch >= 0.4.1, torchvision >= 0.2.1
 18 | 
 19 | Thanks to @warmspringwinds for the initial version of script.
 20 | 
 21 | ## Usage tips
 22 | 
 23 | - This script doesn't take into account `torch.nn.functional.*` operations. For an instance, if one have a semantic segmentation model and use `torch.nn.functional.interpolate` to upscale features, these operations won't contribute to overall amount of flops. To avoid that one can use `torch.nn.Upsample` instead of `torch.nn.functional.interpolate`.
 24 | - `ptflops` launches a given model on a random tensor and estimates amount of computations during inference. Complicated models can have several inputs, some of them could be optional. To construct non-trivial input one can use the `input_constructor` argument of the `get_model_complexity_info`. `input_constructor` is a function that takes the input spatial resolution as a tuple and returns a dict with named input arguments of the model. Next this dict would be passed to the model as keyworded arguments.
 25 | 
 26 | ## Install the latest version
 27 | ```bash
 28 | pip install --upgrade git+https://github.com/sovrasov/flops-counter.pytorch.git
 29 | ```
 30 | 
 31 | ## Example
 32 | ```python
 33 | import torchvision.models as models
 34 | import torch
 35 | from ptflops import get_model_complexity_info
 36 | 
 37 | with torch.cuda.device(0):
 38 |   net = models.densenet161()
 39 |   flops, params = get_model_complexity_info(net, (3, 224, 224), as_strings=True, print_per_layer_stat=True)
 40 |   print('Flops:  ' + flops)
 41 |   print('Params: ' + params)
 42 | ```
 43 | 
 44 | ## Benchmark
 45 | 
 46 | ### [torchvision](https://pytorch.org/docs/1.0.0/torchvision/models.html)
 47 | 
 48 | Model         | Input Resolution | Params(M) | MACs(G) | Top-1 error | Top-5 error
 49 | ---           |---               |---        |---      |---          |---
 50 | alexnet       |224x224           | 61.1      | 0.72    | 43.45       | 20.91
 51 | vgg11         |224x224           | 132.86    | 7.63    | 30.98       | 11.37
 52 | vgg13         |224x224           | 133.05    | 11.34   | 30.07       | 10.75
 53 | vgg16         |224x224           | 138.36    | 15.5    | 28.41       | 9.62
 54 | vgg19         |224x224           | 143.67    | 19.67   | 27.62       | 9.12
 55 | vgg11_bn      |224x224           | 132.87    | 7.64    | 29.62       | 10.19
 56 | vgg13_bn      |224x224           | 133.05    | 11.36   | 28.45       | 9.63
 57 | vgg16_bn      |224x224           | 138.37    | 15.53   | 26.63       | 8.50
 58 | vgg19_bn      |224x224           | 143.68    | 19.7    | 25.76       | 8.15
 59 | resnet18      |224x224           | 11.69     | 1.82    | 30.24       | 10.92
 60 | resnet34      |224x224           | 21.8      | 3.68    | 26.70       | 8.58
 61 | resnet50      |224x224           | 25.56     | 4.12    | 23.85       | 7.13
 62 | resnet101     |224x224           | 44.55     | 7.85    | 22.63       | 6.44
 63 | resnet152     |224x224           | 60.19     | 11.58   | 21.69       | 5.94
 64 | squeezenet1_0 |224x224           | 1.25      | 0.83    | 41.90       | 19.58
 65 | squeezenet1_1 |224x224           | 1.24      | 0.36    | 41.81       | 19.38
 66 | densenet121   |224x224           | 7.98      | 2.88    | 25.35       | 7.83
 67 | densenet169   |224x224           | 14.15     | 3.42    | 24.00       | 7.00
 68 | densenet201   |224x224           | 20.01     | 4.37    | 22.80       | 6.43
 69 | densenet161   |224x224           | 28.68     | 7.82    | 22.35       | 6.20
 70 | inception_v3  |224x224           | 27.16     | 2.85    | 22.55       | 6.44
 71 | 
 72 | * Top-1 error - ImageNet single-crop top-1 error (224x224)
 73 | * Top-5 error - ImageNet single-crop top-5 error (224x224)
 74 | 
 75 | ### [Cadene/pretrained-models.pytorch](https://github.com/Cadene/pretrained-models.pytorch)
 76 | 
 77 | Model               | Input Resolution | Params(M) | MACs(G)     | Acc@1       | Acc@5
 78 | ---                 |---               |---        |---          |---          |---
 79 | alexnet             | 224x224          | 61.1      | 0.72        | 56.432      | 79.194
 80 | bninception         | 224x224          | 11.3      | 2.05        | 73.524      | 91.562
 81 | cafferesnet101      | 224x224          | 44.55     | 7.62        | 76.2        | 92.766
 82 | densenet121         | 224x224          | 7.98      | 2.88        | 74.646      | 92.136
 83 | densenet161         | 224x224          | 28.68     | 7.82        | 77.56       | 93.798
 84 | densenet169         | 224x224          | 14.15     | 3.42        | 76.026      | 92.992
 85 | densenet201         | 224x224          | 20.01     | 4.37        | 77.152      | 93.548
 86 | dpn107              | 224x224          | 86.92     | 18.42       | 79.746      | 94.684
 87 | dpn131              | 224x224          | 79.25     | 16.13       | 79.432      | 94.574
 88 | dpn68               | 224x224          | 12.61     | 2.36        | 75.868      | 92.774
 89 | dpn68b              | 224x224          | 12.61     | 2.36        | 77.034      | 93.59
 90 | dpn92               | 224x224          | 37.67     | 6.56        | 79.4        | 94.62
 91 | dpn98               | 224x224          | 61.57     | 11.76       | 79.224      | 94.488
 92 | fbresnet152         | 224x224          | 60.27     | 11.6        | 77.386      | 93.594
 93 | inceptionresnetv2   | 299x299          | 55.84     | 13.22       | 80.17       | 95.234
 94 | inceptionv3         | 299x299          | 27.16     | 5.73        | 77.294      | 93.454
 95 | inceptionv4         | 299x299          | 42.68     | 12.31       | 80.062      | 94.926
 96 | nasnetalarge        | 331x331          | 88.75     | 24.04       | 82.566      | 96.086
 97 | nasnetamobile       | 224x224          | 5.29      | 0.59        | 74.08       | 91.74
 98 | pnasnet5large       | 331x331          | 86.06     | 25.21       | 82.736      | 95.992
 99 | polynet             | 331x331          | 95.37     | 34.9        | 81.002      | 95.624
100 | resnet101           | 224x224          | 44.55     | 7.85        | 77.438      | 93.672
101 | resnet152           | 224x224          | 60.19     | 11.58       | 78.428      | 94.11
102 | resnet18            | 224x224          | 11.69     | 1.82        | 70.142      | 89.274
103 | resnet34            | 224x224          | 21.8      | 3.68        | 73.554      | 91.456
104 | resnet50            | 224x224          | 25.56     | 4.12        | 76.002      | 92.98
105 | resnext101_32x4d    | 224x224          | 44.18     | 8.03        | 78.188      | 93.886
106 | resnext101_64x4d    | 224x224          | 83.46     | 15.55       | 78.956      | 94.252
107 | se_resnet101        | 224x224          | 49.33     | 7.63        | 78.396      | 94.258
108 | se_resnet152        | 224x224          | 66.82     | 11.37       | 78.658      | 94.374
109 | se_resnet50         | 224x224          | 28.09     | 3.9         | 77.636      | 93.752
110 | se_resnext101_32x4d | 224x224          | 48.96     | 8.05        | 80.236      | 95.028
111 | se_resnext50_32x4d  | 224x224          | 27.56     | 4.28        | 79.076      | 94.434
112 | senet154            | 224x224          | 115.09    | 20.82       | 81.304      | 95.498
113 | squeezenet1_0       | 224x224          | 1.25      | 0.83        | 58.108      | 80.428
114 | squeezenet1_1       | 224x224          | 1.24      | 0.36        | 58.25       | 80.8
115 | vgg11               | 224x224          | 132.86    | 7.63        | 68.97       | 88.746
116 | vgg11_bn            | 224x224          | 132.87    | 7.64        | 70.452      | 89.818
117 | vgg13               | 224x224          | 133.05    | 11.34       | 69.662      | 89.264
118 | vgg13_bn            | 224x224          | 133.05    | 11.36       | 71.508      | 90.494
119 | vgg16               | 224x224          | 138.36    | 15.5        | 71.636      | 90.354
120 | vgg16_bn            | 224x224          | 138.37    | 15.53       | 73.518      | 91.608
121 | vgg19               | 224x224          | 143.67    | 19.67       | 72.08       | 90.822
122 | vgg19_bn            | 224x224          | 143.68    | 19.7        | 74.266      | 92.066
123 | xception            | 299x299          | 22.86     | 8.42        | 78.888      | 94.292
124 | 
125 | * Acc@1 - ImageNet single-crop top-1 accuracy on validation images of the same size used during the training process.
126 | * Acc@5 - ImageNet single-crop top-5 accuracy on validation images of the same size used during the training process.
127 | 


--------------------------------------------------------------------------------
/model/LinkNet.py:
--------------------------------------------------------------------------------
  1 | ############################################################################################
  2 | #LinkNet: Exploiting Encoder Representations for Efficient Semantic Segmentation
  3 | #Paper-Link:   https://arxiv.org/pdf/1707.03718.pdf
  4 | ############################################################################################
  5 | 
  6 | import torch
  7 | import torch.nn as nn
  8 | import torch.nn.functional as F
  9 | from torchsummary import summary
 10 | from torchvision.models import resnet
 11 | 
 12 | 
 13 | 
 14 | __all__ = ["LinkNet"]
 15 | 
 16 | class BasicBlock(nn.Module):
 17 | 
 18 |     def __init__(self, in_planes, out_planes, kernel_size, stride=1, padding=0, groups=1, bias=False):
 19 |         super(BasicBlock, self).__init__()
 20 |         self.conv1 = nn.Conv2d(in_planes, out_planes, kernel_size, stride, padding, groups=groups, bias=bias)
 21 |         self.bn1 = nn.BatchNorm2d(out_planes)
 22 |         self.relu = nn.ReLU(inplace=True)
 23 |         self.conv2 = nn.Conv2d(out_planes, out_planes, kernel_size, 1, padding, groups=groups, bias=bias)
 24 |         self.bn2 = nn.BatchNorm2d(out_planes)
 25 |         self.downsample = None
 26 |         if stride > 1:
 27 |             self.downsample = nn.Sequential(nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False),
 28 |                                             nn.BatchNorm2d(out_planes),)
 29 | 
 30 |     def forward(self, x):
 31 |         residual = x
 32 | 
 33 |         out = self.conv1(x)
 34 |         out = self.bn1(out)
 35 |         out = self.relu(out)
 36 | 
 37 |         out = self.conv2(out)
 38 |         out = self.bn2(out)
 39 | 
 40 |         if self.downsample is not None:
 41 |             residual = self.downsample(x)
 42 | 
 43 |         out = self.relu(out+residual)
 44 | 
 45 |         return out
 46 | 
 47 | 
 48 | class Encoder(nn.Module):
 49 | 
 50 |     def __init__(self, in_planes, out_planes, kernel_size, stride=1, padding=0, groups=1, bias=False):
 51 |         super(Encoder, self).__init__()
 52 |         self.block1 = BasicBlock(in_planes, out_planes, kernel_size, stride, padding, groups, bias)
 53 |         self.block2 = BasicBlock(out_planes, out_planes, kernel_size, 1, padding, groups, bias)
 54 | 
 55 |     def forward(self, x):
 56 |         x = self.block1(x)
 57 |         x = self.block2(x)
 58 | 
 59 |         return x
 60 | 
 61 | 
 62 | class Decoder(nn.Module):
 63 | 
 64 |     def __init__(self, in_planes, out_planes, kernel_size, stride=1, padding=0, output_padding=0, groups=1, bias=False):
 65 |         # TODO bias=True
 66 |         super(Decoder, self).__init__()
 67 |         self.conv1 = nn.Sequential(nn.Conv2d(in_planes, in_planes//4, 1, 1, 0, bias=bias),
 68 |                                 nn.BatchNorm2d(in_planes//4),
 69 |                                 nn.ReLU(inplace=True))
 70 |         self.tp_conv = nn.Sequential(nn.ConvTranspose2d(in_planes//4, in_planes//4, kernel_size, stride, padding, output_padding, bias=bias),
 71 |                                 nn.BatchNorm2d(in_planes//4),
 72 |                                 nn.ReLU(inplace=True))
 73 |         self.conv2 = nn.Sequential(nn.Conv2d(in_planes//4, out_planes, 1, 1, 0, bias=bias),
 74 |                                 nn.BatchNorm2d(out_planes),
 75 |                                 nn.ReLU(inplace=True))
 76 | 
 77 |     def forward(self, x_high_level, x_low_level):
 78 |         x = self.conv1(x_high_level)
 79 |         x = self.tp_conv(x)
 80 | 
 81 |         # solution for padding issues
 82 |         # diffY = x_low_level.size()[2] - x_high_level.size()[2]
 83 |         # diffX = x_low_level.size()[3] - x_high_level.size()[3]
 84 |         # x = F.pad(x, [diffX // 2, diffX - diffX // 2, diffY // 2, diffY - diffY // 2])
 85 | 
 86 |         x = center_crop(x, x_low_level.size()[2], x_low_level.size()[3])
 87 | 
 88 |         x = self.conv2(x)
 89 | 
 90 |         return x
 91 | 
 92 | def center_crop(layer, max_height, max_width):
 93 |     _, _, h, w = layer.size()
 94 |     diffy = (h - max_height) // 2
 95 |     diffx = (w -max_width) // 2
 96 |     return layer[:,:,diffy:(diffy + max_height),diffx:(diffx + max_width)]
 97 | 
 98 | 
 99 | def up_pad(layer, skip_height, skip_width):
100 |     _, _, h, w = layer.size()
101 |     diffy = skip_height - h
102 |     diffx = skip_width -w
103 |     return F.pad(layer,[diffx // 2, diffx - diffx // 2,
104 |                         diffy // 2, diffy - diffy // 2])
105 | 
106 | 
107 | class LinkNetImprove(nn.Module):
108 |     """
109 |     Generate Model Architecture
110 |     """
111 | 
112 |     def __init__(self, classes=19):
113 |         """
114 |         Model initialization
115 |         :param x_n: number of input neurons
116 |         :type x_n: int
117 |         """
118 |         super().__init__()
119 | 
120 |         base = resnet.resnet18(pretrained=True)
121 | 
122 |         self.in_block = nn.Sequential(
123 |             base.conv1,
124 |             base.bn1,
125 |             base.relu,
126 |             base.maxpool
127 |         )
128 | 
129 |         self.encoder1 = base.layer1
130 |         self.encoder2 = base.layer2
131 |         self.encoder3 = base.layer3
132 |         self.encoder4 = base.layer4
133 | 
134 |         self.decoder1 = Decoder(64, 64, 3, 1, 1, 0)
135 |         self.decoder2 = Decoder(128, 64, 3, 2, 1, 1)
136 |         self.decoder3 = Decoder(256, 128, 3, 2, 1, 1)
137 |         self.decoder4 = Decoder(512, 256, 3, 2, 1, 1)
138 | 
139 |         # Classifier
140 |         self.tp_conv1 = nn.Sequential(nn.ConvTranspose2d(64, 32, 3, 2, 1, 1),
141 |                                       nn.BatchNorm2d(32),
142 |                                       nn.ReLU(inplace=True),)
143 |         self.conv2 = nn.Sequential(nn.Conv2d(32, 32, 3, 1, 1),
144 |                                 nn.BatchNorm2d(32),
145 |                                 nn.ReLU(inplace=True),)
146 |         self.tp_conv2 = nn.ConvTranspose2d(32, classes, 2, 2, 0)
147 | 
148 | 
149 |     def forward(self, x):
150 |         # Initial block
151 |         x = self.in_block(x)
152 | 
153 |         # Encoder blocks
154 |         e1 = self.encoder1(x)
155 |         e2 = self.encoder2(e1)
156 |         e3 = self.encoder3(e2)
157 |         e4 = self.encoder4(e3)
158 | 
159 |         # Decoder blocks
160 |         d4 = e3 + self.decoder4(e4, e3)
161 |         d3 = e2 + self.decoder3(d4, e2)
162 |         d2 = e1 + self.decoder2(d3, e1)
163 |         d1 = x + self.decoder1(d2, x)
164 | 
165 |         # Classifier
166 |         y = self.tp_conv1(d1)
167 |         y = self.conv2(y)
168 |         y = self.tp_conv2(y)
169 | 
170 |         return y
171 | 
172 | 
173 | class LinkNet(nn.Module):
174 |     """
175 |     Generate model architecture
176 |     """
177 | 
178 |     def __init__(self, classes=19):
179 |         """
180 |         Model initialization
181 |         :param x_n: number of input neurons
182 |         :type x_n: int
183 |         """
184 |         super().__init__()
185 |         self.conv1 = nn.Conv2d(3, 64, 7, 2, 3, bias=False)
186 |         self.bn1 = nn.BatchNorm2d(64)
187 |         self.relu = nn.ReLU(inplace=True)
188 |         self.maxpool = nn.MaxPool2d(3, 2, 1)
189 | 
190 |         self.encoder1 = Encoder(64, 64, 3, 1, 1)
191 |         self.encoder2 = Encoder(64, 128, 3, 2, 1)
192 |         self.encoder3 = Encoder(128, 256, 3, 2, 1)
193 |         self.encoder4 = Encoder(256, 512, 3, 2, 1)
194 | 
195 | 
196 |         self.decoder4 = Decoder(512, 256, 3, 2, 1, 1)
197 |         self.decoder3 = Decoder(256, 128, 3, 2, 1, 1)
198 |         self.decoder2 = Decoder(128, 64, 3, 2, 1, 1)
199 |         self.decoder1 = Decoder(64, 64, 3, 1, 1, 0)
200 | 
201 | 
202 |         # Classifier
203 |         self.tp_conv1 = nn.Sequential(nn.ConvTranspose2d(64, 32, 3, 2, 1, 1),
204 |                                       nn.BatchNorm2d(32),
205 |                                       nn.ReLU(inplace=True),)
206 |         self.conv2 = nn.Sequential(nn.Conv2d(32, 32, 3, 1, 1),
207 |                                 nn.BatchNorm2d(32),
208 |                                 nn.ReLU(inplace=True),)
209 |         self.tp_conv2 = nn.ConvTranspose2d(32, classes, 2, 2, 0)
210 | 
211 |     def forward(self, x):
212 |         # Initial block
213 |         x = self.conv1(x)
214 |         x = self.bn1(x)
215 |         x = self.relu(x)
216 |         x = self.maxpool(x)
217 | 
218 |         # Encoder blocks
219 |         e1 = self.encoder1(x)
220 |         e2 = self.encoder2(e1)
221 |         e3 = self.encoder3(e2)
222 |         e4 = self.encoder4(e3)
223 | 
224 |         # Decoder blocks
225 |         d4 = e3 + self.decoder4(e4, e3)
226 |         d3 = e2 + self.decoder3(d4, e2)
227 |         d2 = e1 + self.decoder2(d3, e1)
228 |         d1 = x + self.decoder1(d2, x)
229 | 
230 |         # Classifier
231 |         y = self.tp_conv1(d1)
232 |         y = self.conv2(y)
233 |         y = self.tp_conv2(y)
234 | 
235 | 
236 |         return y
237 | 
238 | """print layers and params of network"""
239 | if __name__ == '__main__':
240 |     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
241 |     model = LinkNet(classes=11).to(device)
242 |     summary(model,(3,512,1024))
243 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | ## Efficient-Segmentation-Networks
  2 | [![python-image]][python-url]
  3 | [![pytorch-image]][pytorch-url]
  4 | 
  5 | This project aims at providing an easy-to-use, modifiable reference implementation for real-time semantic segmentation models using PyTorch.
  6 | 
  7 | <p align="center"><img width="100%" src="docs/image-1.png" /></p>
  8 | ---
  9 | 
 10 | ### Table of Contents:
 11 | - <a href='#Requirements'>Requirements</a>
 12 | - <a href='#Models'>Models</a>
 13 | - <a href='#Dataset-Setting'>Dataset Setting</a>
 14 | - <a href='#Usage'>Usage</a>
 15 | - <a href='#Contact'>Contact</a>
 16 | 
 17 | ### Requirements
 18 | 
 19 |  [**PyTorch**](https://pytorch.org/) and [**Torchvision**](https://pytorch.org/) needs to be installed before running the scripts,  PyTorch v1.1 or later is supported. 
 20 | 
 21 | ```bash
 22 | pip3 install -r requirements.txt
 23 | ```
 24 | 
 25 | ### Models
 26 | 
 27 | The project supports these semantic segmentation models as follows:
 28 | - (**SQNet**) Speeding up Semantic Segmentation for Autonomous Driving [[Paper]](https://openreview.net/pdf?id=S1uHiFyyg)
 29 | - (**LinkNet**)  Exploiting Encoder Representations for Efficient Semantic Segmentation [[Paper]](https://arxiv.org/pdf/1707.03718.pdf)
 30 | - (**SegNet**)  A Deep Convolutional Encoder-Decoder Architecture for Image Segmentation [[Paper]](https://arxiv.org/pdf/1511.00561.pdf)
 31 | - (**UNet**)  Convolutional Networks for Biomedical Image Segmentation [[Paper]](https://arxiv.org/pdf/1505.04597.pdf)
 32 | - (**ENet**)  A Deep Neural Network Architecture for Real-Time Semantic Segmentation [[Paper]](https://arxiv.org/pdf/1606.02147.pdf)
 33 | - (**ERFNet**)  Efficient ConvNet for Real-time Semantic Segmentation [[Paper]](http://www.robesafe.uah.es/personal/eduardo.romera/pdfs/Romera17iv.pdf)
 34 | - (**EDANet**)  Efficient Dense Modules of Asymmetric Convolution for Real-Time Segmentation [[Paper]](https://arxiv.org/ftp/arxiv/papers/1809/1809.06323.pdf)
 35 | - (**ESPNet**)  Efficient Spatial Pyramid of Dilated  Convolutions for Semantic Segmentation [[Paper]](https://arxiv.org/pdf/1803.06815v2.pdf)
 36 | - (**ESPNetv2**)  A Light-weight, Power Efficient, and General Purpose ConvNet [[Paper]](https://arxiv.org/pdf/1811.11431.pdf)
 37 | - (**LEDNet**)  A Lightweight Encoder-Decoder Network for Real-Time Semantic Segmentation [[Paper]](https://arxiv.org/pdf/1905.02423v3.pdf)
 38 | - (**FSSNet**)  Fast Semantic Segmentation for Scene Perception [[Paper]](https://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=8392426)
 39 | - (**ESNet**)  An Efficient Symmetric Network for Real-time Semantic Segmentation [[Paper]](https://arxiv.org/pdf/1906.09826v1.pdf)
 40 | - (**CGNet**)  A Light-weight Context Guided Network for Semantic Segmentation [[Paper]](https://arxiv.org/pdf/1811.08201.pdf)
 41 | - (**Fast-SCNN**)  Fast Semantic Segmentation Network [[Paper]](https://arxiv.org/pdf/1902.04502.pdf)
 42 | - (**DABNet**)  Depth-wise Asymmetric Bottleneck for Real-time Semantic Segmentation [[Paper]](https://arxiv.org/pdf/1907.11357.pdf)
 43 | - (**ContextNet**)  Exploring Context and Detail  for Semantic Segmentation in Real-time [[Paper]](https://arxiv.org/pdf/1805.04554.pdf)
 44 | - (**FPENet**) Feature Pyramid Encoding Network for Real-time Semantic Segmentation [[Paper]](https://arxiv.org/pdf/1909.08599v1.pdf)
 45 | - ...
 46 | 
 47 | #### Losses
 48 | 
 49 |  The project supports these loss functions: 
 50 | 
 51 | > 1. Weighted Cross Entropy
 52 | > 2. Weighted Cross Entropy with Label Smooth
 53 | > 3. Focal Loss
 54 | > 4. Ohem Cross Entropy
 55 | > 5. [LovaszSoftmax](https://github.com/bermanmaxim/LovaszSoftmax)
 56 | > 6. [SegLoss-List](https://github.com/JunMa11/SegLoss)
 57 | > 7. ...
 58 | 
 59 | #### Optimizers
 60 | 
 61 |  The project supports these optimizers: 
 62 | 
 63 | > 1. SGD
 64 | > 2. Adam 
 65 | > 3. AdamW 
 66 | > 4. [RAdam](https://github.com/LiyuanLucasLiu/RAdam)
 67 | > 5. RAdam + Lookahead
 68 | > 6. ...
 69 | 
 70 | #### Activations
 71 | 
 72 | > 1. ReLu
 73 | > 2. PReLU
 74 | > 3. ReLU6
 75 | > 4. Swish
 76 | > 5. [Mish](https://github.com/digantamisra98/Mish) : A Self Regularized Non-Monotonic Neural Activation Function
 77 | > 6. ...
 78 | 
 79 | #### Learning Rate Scheduler
 80 | 
 81 | The project supports these LR_Schedulers: 
 82 | 
 83 | > 1. Poly decay
 84 | > 2. Warmup Poly  
 85 | > 3. ...
 86 | 
 87 | #### Normalization methods
 88 | 
 89 | > 1. [In-Place Activated BatchNorm](https://github.com/mapillary/inplace_abn)
 90 | > 2. [Switchable Normalization](https://github.com/switchablenorms/Switchable-Normalization)
 91 | > 3. [Weight Standardization](https://github.com/joe-siyuan-qiao/WeightStandardization)
 92 | > 4. ...
 93 | 
 94 | #### Enhancing Semantic Feature Learning Method
 95 | 
 96 | > 1. [Attention Family](https://github.com/implus/PytorchInsight)
 97 | > 2. [NAS Family](https://github.com/D-X-Y/NAS-Projects)
 98 | > 3. ...
 99 | 
100 | #### Some useful Tools
101 | 
102 | > 1. [pytorch-OpCounter](https://github.com/Lyken17/pytorch-OpCounter)
103 | > 2. [flops-counter.pytorch](https://github.com/sovrasov/flops-counter.pytorch) 
104 | > 3. [Netron](https://github.com/lutzroeder/Netron) : Visualizer for neural network models, On line URL: [Netron](https://lutzroeder.github.io/netron/)
105 | > 4. [Falshtorch](https://github.com/MisaOgura/flashtorch): Visualization toolkit for neural networks in PyTorch !
106 | > 5. [Bag of Tricks for Image Classification with Convolutional Neural Networks](https://github.com/weiaicunzai/Bag_of_Tricks_for_Image_Classification_with_Convolutional_Neural_Networks)
107 | > 6. ...
108 | 
109 | ### Dataset-Setting
110 | 
111 | This project has been tailored to suit the [Cityscapes](https://www.cityscapes-dataset.com/) and  [CamVid](http://mi.eng.cam.ac.uk/research/projects/VideoRec/CamVid/)  datasets. The folds of your dataset need satisfy the following structures: 
112 | 
113 | ```
114 | |-- dataset
115 | |  |-- camvid
116 | |  |  |-- train
117 | |  |  |-- trainannot
118 | |  |  |-- val
119 | |  |  |-- valannot
120 | |  |  |-- test
121 | |  |  |-- testannot
122 | |  |  |-- ...
123 | |  |-- cityscapes
124 | |  |  |-- leftImg8bit
125 | |  |  |  |-- train
126 | |  |  |  |-- val
127 | |  |  |  |-- test
128 | |  |  |-- gtFine
129 | |  |  |  |-- train
130 | |  |  |  |-- val
131 | |  |  |  |-- test
132 | |  |  |-- ...
133 | ```
134 | 
135 | - You can download [**cityscapes**](https://www.cityscapes-dataset.com/)  dataset from [here](https://www.cityscapes-dataset.com/downloads/). Note: please download [leftImg8bit_trainvaltest.zip(11GB)](https://www.cityscapes-dataset.com/file-handling/?packageID=4) and [gtFine_trainvaltest(241MB)](https://www.cityscapes-dataset.com/file-handling/?packageID=1).
136 | - You can download [**camvid**](http://mi.eng.cam.ac.uk/research/projects/VideoRec/CamVid/) dataset from [here](https://github.com/alexgkendall/SegNet-Tutorial/tree/master/CamVid).
137 | - The **Cityscapes dataset scripts** for inspection, preparation, and evaluation can download from [here](https://github.com/mcordts/cityscapesScripts).
138 | 
139 | ### Usage
140 | 
141 | #### Clone this Repo
142 | 
143 | ```
144 | git clone https://github.com/xiaoyufenfei/Efficient-Segmentation-Networks
145 | cd Efficient-Segmentation-Networks
146 | ```
147 | 
148 | Currently, the code supports [Python 3](https://www.python.org/)
149 | 
150 |  Torch dependencies: 
151 | 
152 | - [PyTorch](https://pytorch.org/) (>=1.1.0)
153 | - torchvision(>=0.3.0)
154 | 
155 | Data dependencies:
156 | 
157 | - [Cityscapes](https://www.cityscapes-dataset.com/) + [scripts](https://github.com/mcordts/cityscapesScripts)
158 | 
159 | Download Cityscapes and run the script `createTrainIdLabelImgs.py` to create annotations based on the training labels. Make sure that the folder is named *cityscapes*
160 | 
161 | ##### Training
162 | - For Cityscapes / CamVid
163 | 
164 | 1. training on **train** set
165 | 
166 | ```
167 | python train.py  --help
168 | ```
169 | 
170 | 2. training on **train+val** set
171 | 
172 | ```
173 | python train.py --help
174 | ```
175 | 
176 | ##### Testing
177 | - For Cityscapes / CamVid
178 | 
179 | ```
180 | python test.py --help
181 | ```
182 | 
183 | ##### Predicting
184 | - For Cityscapes
185 | 
186 | ```
187 | python predict.py --help
188 | ```
189 | 
190 | ##### Evaluating
191 | - For Cityscapes
192 | 
193 | ```
194 | cd tools
195 | python trainID2labelID.py 
196 | ```
197 | 
198 | ### Contact
199 | 
200 | If you think this work useful, please give me a star! And if you find any errors or have any suggestions, please contact me.
201 | 
202 | **GitHub:** `xiaoyufenfei`
203 | **Email:** `wangy314159@163.com`
204 | 
205 | ### Refer to this Rep
206 | 
207 | You are encouraged to cite the following papers if this work helps your research.
208 | 
209 | ```bash
210 | @misc{Efficient-Segmentation-Networks,
211 |   author = {Yu Wang},
212 |   title = {Efficient-Segmentation-Networks Pytorch Implementation},
213 |   year = {2019},
214 |   publisher = {GitHub},
215 |   journal = {GitHub repository},
216 |   howpublished = {\url{https://github.com/xiaoyufenfei/Efficient-Segmentation-Networks}},
217 |   commit = {master}
218 | }
219 | ```
220 | 
221 | ### License
222 | 
223 | This project is released under the MIT License. See [LICENSE](https://github.com/xiaoyufenfei/Efficient-Segmentation-Networks/blob/master/LICENSE) for additional details.
224 | 
225 | 
226 | 
227 | 
228 | [python-image]: https://img.shields.io/badge/Python-3.x-ff69b4.svg
229 | [python-url]: https://www.python.org/
230 | [pytorch-image]: https://img.shields.io/badge/PyTorch-1.1-2BAF2B.svg
231 | [pytorch-url]: https://pytorch.org/
232 | 
233 | 


--------------------------------------------------------------------------------
/utils/losses/lovasz_losses.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Lovasz-Softmax and Jaccard hinge loss in PyTorch
  3 | Maxim Berman 2018 ESAT-PSI KU Leuven (MIT License)
  4 | https://github.com/bermanmaxim/LovaszSoftmax/blob/master/pytorch/lovasz_losses.py
  5 | """
  6 | 
  7 | from __future__ import print_function, division
  8 | 
  9 | import torch
 10 | from torch.autograd import Variable
 11 | import torch.nn.functional as F
 12 | import numpy as np
 13 | try:
 14 |     from itertools import  ifilterfalse
 15 | except ImportError: # py3k
 16 |     from itertools import  filterfalse as ifilterfalse
 17 | 
 18 | 
 19 | def lovasz_grad(gt_sorted):
 20 |     """
 21 |     Computes gradient of the Lovasz extension w.r.t sorted errors
 22 |     See Alg. 1 in paper
 23 |     """
 24 |     p = len(gt_sorted)
 25 |     gts = gt_sorted.sum()
 26 |     intersection = gts - gt_sorted.float().cumsum(0)
 27 |     union = gts + (1 - gt_sorted).float().cumsum(0)
 28 |     jaccard = 1. - intersection / union
 29 |     if p > 1: # cover 1-pixel case
 30 |         jaccard[1:p] = jaccard[1:p] - jaccard[0:-1]
 31 |     return jaccard
 32 | 
 33 | 
 34 | def iou_binary(preds, labels, EMPTY=1., ignore=None, per_image=True):
 35 |     """
 36 |     IoU for foreground class
 37 |     binary: 1 foreground, 0 background
 38 |     """
 39 |     if not per_image:
 40 |         preds, labels = (preds,), (labels,)
 41 |     ious = []
 42 |     for pred, label in zip(preds, labels):
 43 |         intersection = ((label == 1) & (pred == 1)).sum()
 44 |         union = ((label == 1) | ((pred == 1) & (label != ignore))).sum()
 45 |         if not union:
 46 |             iou = EMPTY
 47 |         else:
 48 |             iou = float(intersection) / float(union)
 49 |         ious.append(iou)
 50 |     iou = mean(ious)    # mean accross images if per_image
 51 |     return 100 * iou
 52 | 
 53 | 
 54 | def iou(preds, labels, C, EMPTY=1., ignore=None, per_image=False):
 55 |     """
 56 |     Array of IoU for each (non ignored) class
 57 |     """
 58 |     if not per_image:
 59 |         preds, labels = (preds,), (labels,)
 60 |     ious = []
 61 |     for pred, label in zip(preds, labels):
 62 |         iou = []    
 63 |         for i in range(C):
 64 |             if i != ignore: # The ignored label is sometimes among predicted classes (ENet - CityScapes)
 65 |                 intersection = ((label == i) & (pred == i)).sum()
 66 |                 union = ((label == i) | ((pred == i) & (label != ignore))).sum()
 67 |                 if not union:
 68 |                     iou.append(EMPTY)
 69 |                 else:
 70 |                     iou.append(float(intersection) / float(union))
 71 |         ious.append(iou)
 72 |     ious = [mean(iou) for iou in zip(*ious)] # mean accross images if per_image
 73 |     return 100 * np.array(ious)
 74 | 
 75 | 
 76 | # --------------------------- BINARY LOSSES ---------------------------
 77 | 
 78 | def lovasz_hinge(logits, labels, per_image=True, ignore=None):
 79 |     """
 80 |     Binary Lovasz hinge loss
 81 |       logits: [B, H, W] Variable, logits at each pixel (between -\infty and +\infty)
 82 |       labels: [B, H, W] Tensor, binary ground truth masks (0 or 1)
 83 |       per_image: compute the loss per image instead of per batch
 84 |       ignore: void class id
 85 |     """
 86 |     if per_image:
 87 |         loss = mean(lovasz_hinge_flat(*flatten_binary_scores(log.unsqueeze(0), lab.unsqueeze(0), ignore))
 88 |                           for log, lab in zip(logits, labels))
 89 |     else:
 90 |         loss = lovasz_hinge_flat(*flatten_binary_scores(logits, labels, ignore))
 91 |     return loss
 92 | 
 93 | 
 94 | def lovasz_hinge_flat(logits, labels):
 95 |     """
 96 |     Binary Lovasz hinge loss
 97 |       logits: [P] Variable, logits at each prediction (between -\infty and +\infty)
 98 |       labels: [P] Tensor, binary ground truth labels (0 or 1)
 99 |       ignore: label to ignore
100 |     """
101 |     if len(labels) == 0:
102 |         # only void pixels, the gradients should be 0
103 |         return logits.sum() * 0.
104 |     signs = 2. * labels.float() - 1.
105 |     errors = (1. - logits * Variable(signs))
106 |     errors_sorted, perm = torch.sort(errors, dim=0, descending=True)
107 |     perm = perm.data
108 |     gt_sorted = labels[perm]
109 |     grad = lovasz_grad(gt_sorted)
110 |     loss = torch.dot(F.relu(errors_sorted), Variable(grad))
111 |     return loss
112 | 
113 | 
114 | def flatten_binary_scores(scores, labels, ignore=None):
115 |     """
116 |     Flattens predictions in the batch (binary case)
117 |     Remove labels equal to 'ignore'
118 |     """
119 |     scores = scores.view(-1)
120 |     labels = labels.view(-1)
121 |     if ignore is None:
122 |         return scores, labels
123 |     valid = (labels != ignore)
124 |     vscores = scores[valid]
125 |     vlabels = labels[valid]
126 |     return vscores, vlabels
127 | 
128 | 
129 | class StableBCELoss(torch.nn.modules.Module):
130 |     def __init__(self):
131 |          super(StableBCELoss, self).__init__()
132 |     def forward(self, input, target):
133 |          neg_abs = - input.abs()
134 |          loss = input.clamp(min=0) - input * target + (1 + neg_abs.exp()).log()
135 |          return loss.mean()
136 | 
137 | 
138 | def binary_xloss(logits, labels, ignore=None):
139 |     """
140 |     Binary Cross entropy loss
141 |       logits: [B, H, W] Variable, logits at each pixel (between -\infty and +\infty)
142 |       labels: [B, H, W] Tensor, binary ground truth masks (0 or 1)
143 |       ignore: void class id
144 |     """
145 |     logits, labels = flatten_binary_scores(logits, labels, ignore)
146 |     loss = StableBCELoss()(logits, Variable(labels.float()))
147 |     return loss
148 | 
149 | 
150 | # --------------------------- MULTICLASS LOSSES ---------------------------
151 | 
152 | 
153 | def lovasz_softmax(probas, labels, classes='present', per_image=False, ignore=None):
154 |     """
155 |     Multi-class Lovasz-Softmax loss
156 |       probas: [B, C, H, W] Variable, class probabilities at each prediction (between 0 and 1).
157 |               Interpreted as binary (sigmoid) output with outputs of size [B, H, W].
158 |       labels: [B, H, W] Tensor, ground truth labels (between 0 and C - 1)
159 |       classes: 'all' for all, 'present' for classes present in labels, or a list of classes to average.
160 |       per_image: compute the loss per image instead of per batch
161 |       ignore: void class labels
162 |     """
163 |     if per_image:
164 |         loss = mean(lovasz_softmax_flat(*flatten_probas(prob.unsqueeze(0), lab.unsqueeze(0), ignore), classes=classes)
165 |                           for prob, lab in zip(probas, labels))
166 |     else:
167 |         loss = lovasz_softmax_flat(*flatten_probas(probas, labels, ignore), classes=classes)
168 |     return loss
169 | 
170 | 
171 | def lovasz_softmax_flat(probas, labels, classes='present'):
172 |     """
173 |     Multi-class Lovasz-Softmax loss
174 |       probas: [P, C] Variable, class probabilities at each prediction (between 0 and 1)
175 |       labels: [P] Tensor, ground truth labels (between 0 and C - 1)
176 |       classes: 'all' for all, 'present' for classes present in labels, or a list of classes to average.
177 |     """
178 |     if probas.numel() == 0:
179 |         # only void pixels, the gradients should be 0
180 |         return probas * 0.
181 |     C = probas.size(1)
182 |     losses = []
183 |     class_to_sum = list(range(C)) if classes in ['all', 'present'] else classes
184 |     for c in class_to_sum:
185 |         fg = (labels == c).float() # foreground for class c
186 |         if (classes is 'present' and fg.sum() == 0):
187 |             continue
188 |         if C == 1:
189 |             if len(classes) > 1:
190 |                 raise ValueError('Sigmoid output possible only with 1 class')
191 |             class_pred = probas[:, 0]
192 |         else:
193 |             class_pred = probas[:, c]
194 |         errors = (Variable(fg) - class_pred).abs()
195 |         errors_sorted, perm = torch.sort(errors, 0, descending=True)
196 |         perm = perm.data
197 |         fg_sorted = fg[perm]
198 |         losses.append(torch.dot(errors_sorted, Variable(lovasz_grad(fg_sorted))))
199 |     return mean(losses)
200 | 
201 | 
202 | def flatten_probas(probas, labels, ignore=None):
203 |     """
204 |     Flattens predictions in the batch
205 |     """
206 |     if probas.dim() == 3:
207 |         # assumes output of a sigmoid layer
208 |         B, H, W = probas.size()
209 |         probas = probas.view(B, 1, H, W)
210 |     B, C, H, W = probas.size()
211 |     probas = probas.permute(0, 2, 3, 1).contiguous().view(-1, C)  # B * H * W, C = P, C
212 |     labels = labels.view(-1)
213 |     if ignore is None:
214 |         return probas, labels
215 |     valid = (labels != ignore)
216 |     vprobas = probas[valid.nonzero().squeeze()]
217 |     vlabels = labels[valid]
218 |     return vprobas, vlabels
219 | 
220 | def xloss(logits, labels, ignore=None):
221 |     """
222 |     Cross entropy loss
223 |     """
224 |     return F.cross_entropy(logits, Variable(labels), ignore_index=255)
225 | 
226 | 
227 | # --------------------------- HELPER FUNCTIONS ---------------------------
228 | def isnan(x):
229 |     return x != x
230 |     
231 |     
232 | def mean(l, ignore_nan=False, empty=0):
233 |     """
234 |     nanmean compatible with generators.
235 |     """
236 |     l = iter(l)
237 |     if ignore_nan:
238 |         l = ifilterfalse(isnan, l)
239 |     try:
240 |         n = 1
241 |         acc = next(l)
242 |     except StopIteration:
243 |         if empty == 'raise':
244 |             raise ValueError('Empty mean')
245 |         return empty
246 |     for n, v in enumerate(l, 2):
247 |         acc += v
248 |     if n == 1:
249 |         return acc
250 |     return acc / n


--------------------------------------------------------------------------------
/model/FastSCNN.py:
--------------------------------------------------------------------------------
  1 | ##################################################################################
  2 | #Fast-SCNN: Fast Semantic Segmentation Network
  3 | #Paper-Link: https://arxiv.org/pdf/1902.04502.pdf
  4 | ##################################################################################
  5 | 
  6 | 
  7 | import torch
  8 | import torch.nn as nn
  9 | import torch.nn.functional as F
 10 | from torchsummary import summary
 11 | 
 12 | 
 13 | __all__ = ["FastSCNN"]
 14 | 
 15 | class _ConvBNReLU(nn.Module):
 16 |     """Conv-BN-ReLU"""
 17 | 
 18 |     def __init__(self, in_channels, out_channels, kernel_size=3, stride=1, padding=0, **kwargs):
 19 |         super(_ConvBNReLU, self).__init__()
 20 |         self.conv = nn.Sequential(
 21 |             nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding, bias=False),
 22 |             nn.BatchNorm2d(out_channels),
 23 |             nn.ReLU(True)
 24 |         )
 25 | 
 26 |     def forward(self, x):
 27 |         return self.conv(x)
 28 | 
 29 | 
 30 | class _DSConv(nn.Module):
 31 |     """Depthwise Separable Convolutions"""
 32 | 
 33 |     def __init__(self, dw_channels, out_channels, stride=1, **kwargs):
 34 |         super(_DSConv, self).__init__()
 35 |         self.conv = nn.Sequential(
 36 |             nn.Conv2d(dw_channels, dw_channels, 3, stride, 1, groups=dw_channels, bias=False),
 37 |             nn.BatchNorm2d(dw_channels),
 38 |             nn.ReLU(True),
 39 |             nn.Conv2d(dw_channels, out_channels, 1, bias=False),
 40 |             nn.BatchNorm2d(out_channels),
 41 |             nn.ReLU(True)
 42 |         )
 43 | 
 44 |     def forward(self, x):
 45 |         return self.conv(x)
 46 | 
 47 | 
 48 | class _DWConv(nn.Module):
 49 |     """Depthwise Convolutions"""
 50 |     def __init__(self, dw_channels, out_channels, stride=1, **kwargs):
 51 |         super(_DWConv, self).__init__()
 52 |         self.conv = nn.Sequential(
 53 |             nn.Conv2d(dw_channels, out_channels, 3, stride, 1, groups=dw_channels, bias=False),
 54 |             nn.BatchNorm2d(out_channels),
 55 |             nn.ReLU(True)
 56 |         )
 57 | 
 58 |     def forward(self, x):
 59 |         return self.conv(x)
 60 | 
 61 | 
 62 | class LinearBottleneck(nn.Module):
 63 |     """LinearBottleneck used in MobileNetV2"""
 64 | 
 65 |     def __init__(self, in_channels, out_channels, t=6, stride=2, **kwargs):
 66 |         super(LinearBottleneck, self).__init__()
 67 |         self.use_shortcut = stride == 1 and in_channels == out_channels
 68 |         self.block = nn.Sequential(
 69 |             # pw
 70 |             _ConvBNReLU(in_channels, in_channels * t, 1),
 71 |             # dw
 72 |             _DWConv(in_channels * t, in_channels * t, stride),
 73 |             # pw-linear
 74 |             nn.Conv2d(in_channels * t, out_channels, 1, bias=False),
 75 |             nn.BatchNorm2d(out_channels)
 76 |         )
 77 | 
 78 |     def forward(self, x):
 79 |         out = self.block(x)
 80 |         if self.use_shortcut:
 81 |             out = x + out
 82 |         return out
 83 | 
 84 | 
 85 | class PyramidPooling(nn.Module):
 86 |     """Pyramid pooling module"""
 87 | 
 88 |     def __init__(self, in_channels, out_channels, **kwargs):
 89 |         super(PyramidPooling, self).__init__()
 90 |         inter_channels = int(in_channels / 4)
 91 |         self.conv1 = _ConvBNReLU(in_channels, inter_channels, 1, **kwargs)
 92 |         self.conv2 = _ConvBNReLU(in_channels, inter_channels, 1, **kwargs)
 93 |         self.conv3 = _ConvBNReLU(in_channels, inter_channels, 1, **kwargs)
 94 |         self.conv4 = _ConvBNReLU(in_channels, inter_channels, 1, **kwargs)
 95 |         self.out = _ConvBNReLU(in_channels * 2, out_channels, 1)
 96 | 
 97 |     def pool(self, x, size):
 98 |         avgpool = nn.AdaptiveAvgPool2d(size)
 99 |         return avgpool(x)
100 | 
101 |     def upsample(self, x, size):
102 |         return F.interpolate(x, size, mode='bilinear', align_corners=True)
103 | 
104 |     def forward(self, x):
105 |         size = x.size()[2:]
106 |         feat1 = self.upsample(self.conv1(self.pool(x, 1)), size)
107 |         feat2 = self.upsample(self.conv2(self.pool(x, 2)), size)
108 |         feat3 = self.upsample(self.conv3(self.pool(x, 3)), size)
109 |         feat4 = self.upsample(self.conv4(self.pool(x, 6)), size)
110 |         x = torch.cat([x, feat1, feat2, feat3, feat4], dim=1)
111 |         x = self.out(x)
112 |         return x
113 | 
114 | 
115 | class LearningToDownsample(nn.Module):
116 |     """Learning to downsample module"""
117 | 
118 |     def __init__(self, dw_channels1=32, dw_channels2=48, out_channels=64, **kwargs):
119 |         super(LearningToDownsample, self).__init__()
120 |         self.conv = _ConvBNReLU(3, dw_channels1, 3, 2)
121 |         self.dsconv1 = _DSConv(dw_channels1, dw_channels2, 2)
122 |         self.dsconv2 = _DSConv(dw_channels2, out_channels, 2)
123 | 
124 |     def forward(self, x):
125 |         x = self.conv(x)
126 |         x = self.dsconv1(x)
127 |         x = self.dsconv2(x)
128 |         return x
129 | 
130 | 
131 | class GlobalFeatureExtractor(nn.Module):
132 |     """Global feature extractor module"""
133 | 
134 |     def __init__(self, in_channels=64, block_channels=(64, 96, 128),
135 |                  out_channels=128, t=6, num_blocks=(3, 3, 3), **kwargs):
136 |         super(GlobalFeatureExtractor, self).__init__()
137 |         self.bottleneck1 = self._make_layer(LinearBottleneck, in_channels, block_channels[0], num_blocks[0], t, 2)
138 |         self.bottleneck2 = self._make_layer(LinearBottleneck, block_channels[0], block_channels[1], num_blocks[1], t, 2)
139 |         self.bottleneck3 = self._make_layer(LinearBottleneck, block_channels[1], block_channels[2], num_blocks[2], t, 1)
140 |         self.ppm = PyramidPooling(block_channels[2], out_channels)
141 | 
142 |     def _make_layer(self, block, inplanes, planes, blocks, t=6, stride=1):
143 |         layers = []
144 |         layers.append(block(inplanes, planes, t, stride))
145 |         for i in range(1, blocks):
146 |             layers.append(block(planes, planes, t, 1))
147 |         return nn.Sequential(*layers)
148 | 
149 |     def forward(self, x):
150 |         x = self.bottleneck1(x)
151 |         x = self.bottleneck2(x)
152 |         x = self.bottleneck3(x)
153 |         x = self.ppm(x)
154 |         return x
155 | 
156 | 
157 | class FeatureFusionModule(nn.Module):
158 |     """Feature fusion module"""
159 | 
160 |     def __init__(self, highter_in_channels, lower_in_channels, out_channels, scale_factor=4, **kwargs):
161 |         super(FeatureFusionModule, self).__init__()
162 |         self.scale_factor = scale_factor
163 |         self.dwconv = _DWConv(lower_in_channels, out_channels, 1)
164 |         self.conv_lower_res = nn.Sequential(
165 |             nn.Conv2d(out_channels, out_channels, 1),
166 |             nn.BatchNorm2d(out_channels)
167 |         )
168 |         self.conv_higher_res = nn.Sequential(
169 |             nn.Conv2d(highter_in_channels, out_channels, 1),
170 |             nn.BatchNorm2d(out_channels)
171 |         )
172 |         self.relu = nn.ReLU(True)
173 | 
174 |     def forward(self, higher_res_feature, lower_res_feature):
175 |         _, _, h, w = higher_res_feature.size()
176 |         lower_res_feature = F.interpolate(lower_res_feature, size=(h,w), mode='bilinear', align_corners=True)
177 |         lower_res_feature = self.dwconv(lower_res_feature)
178 |         lower_res_feature = self.conv_lower_res(lower_res_feature)
179 | 
180 |         higher_res_feature = self.conv_higher_res(higher_res_feature)
181 |         out = higher_res_feature + lower_res_feature
182 |         return self.relu(out)
183 | 
184 | 
185 | class Classifer(nn.Module):
186 |     """Classifer"""
187 | 
188 |     def __init__(self, dw_channels, num_classes, stride=1, **kwargs):
189 |         super(Classifer, self).__init__()
190 |         self.dsconv1 = _DSConv(dw_channels, dw_channels, stride)
191 |         self.dsconv2 = _DSConv(dw_channels, dw_channels, stride)
192 |         self.conv = nn.Sequential(
193 |             nn.Dropout(0.1),
194 |             nn.Conv2d(dw_channels, num_classes, 1)
195 |         )
196 | 
197 |     def forward(self, x):
198 |         x = self.dsconv1(x)
199 |         x = self.dsconv2(x)
200 |         x = self.conv(x)
201 |         return x
202 | 
203 | 
204 | class FastSCNN(nn.Module):
205 |     def __init__(self, classes, aux=False, **kwargs):
206 |         super(FastSCNN, self).__init__()
207 |         self.aux = aux
208 |         self.learning_to_downsample = LearningToDownsample(32, 48, 64)
209 |         self.global_feature_extractor = GlobalFeatureExtractor(64, [64, 96, 128], 128, 6, [3, 3, 3])
210 |         self.feature_fusion = FeatureFusionModule(64, 128, 128)
211 |         self.classifier = Classifer(128, classes)
212 |         if self.aux:
213 |             self.auxlayer = nn.Sequential(
214 |                 nn.Conv2d(64, 32, 3, padding=1, bias=False),
215 |                 nn.BatchNorm2d(32),
216 |                 nn.ReLU(True),
217 |                 nn.Dropout(0.1),
218 |                 nn.Conv2d(32, classes, 1)
219 |             )
220 | 
221 |     def forward(self, x):
222 |         size = x.size()[2:]
223 |         higher_res_features = self.learning_to_downsample(x)
224 |         x = self.global_feature_extractor(higher_res_features)
225 |         x = self.feature_fusion(higher_res_features, x)
226 |         x = self.classifier(x)
227 |         outputs = []
228 |         x = F.interpolate(x, size, mode='bilinear', align_corners=True)
229 |         outputs.append(x)
230 |         if self.aux:
231 |             auxout = self.auxlayer(higher_res_features)
232 |             auxout = F.interpolate(auxout, size, mode='bilinear', align_corners=True)
233 |             outputs.append(auxout)
234 |         return x
235 |         # return tuple(outputs)
236 | 
237 | 
238 | """print layers and params of network"""
239 | if __name__ == '__main__':
240 |     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
241 |     model = FastSCNN(classes=19).to(device)
242 |     summary(model,(3,512,1024))
243 | 
244 | 


--------------------------------------------------------------------------------
/model/FPENet.py:
--------------------------------------------------------------------------------
  1 | ###################################################################################################
  2 | #FPENet:Feature Pyramid Encoding Network for Real-time Semantic Segmentation
  3 | #Paper-Link: https://arxiv.org/pdf/1909.08599v1.pdf
  4 | ###################################################################################################
  5 | 
  6 | import torch
  7 | import torch.nn as nn
  8 | import torch.nn.functional as F
  9 | from torchsummary import summary
 10 | 
 11 | 
 12 | __all__ = ["FPENet"]
 13 | 
 14 | 
 15 | 
 16 | def conv3x3(in_planes, out_planes, stride=1, padding=1, dilation=1, groups=1, bias=False):
 17 |     """3x3 convolution with padding"""
 18 |     return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
 19 |                      padding=padding, dilation=dilation, groups=groups,bias=bias)
 20 | 
 21 | 
 22 | def conv1x1(in_planes, out_planes, stride=1, bias=False):
 23 |     """1x1 convolution"""
 24 |     return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=bias)
 25 | 
 26 | 
 27 | class SEModule(nn.Module):
 28 |     def __init__(self, channels, reduction=16):
 29 |         super(SEModule, self).__init__()
 30 |         self.avg_pool = nn.AdaptiveAvgPool2d(1)
 31 |         self.fc1 = nn.Conv2d(channels, channels // reduction, kernel_size=1, padding=0)
 32 |         self.relu = nn.ReLU(inplace=True)
 33 |         self.fc2 = nn.Conv2d(channels // reduction, channels, kernel_size=1, padding=0)
 34 |         self.sigmoid = nn.Sigmoid()
 35 | 
 36 |     def forward(self, input):
 37 |         x = self.avg_pool(input)
 38 |         x = self.fc1(x)
 39 |         x = self.relu(x)
 40 |         x = self.fc2(x)
 41 |         x = self.sigmoid(x)
 42 |         return input * x
 43 | 
 44 | 
 45 | class FPEBlock(nn.Module):
 46 | 
 47 |     def __init__(self, inplanes, outplanes, dilat, downsample=None, stride=1, t=1, scales=4, se=False, norm_layer=None):
 48 |         super(FPEBlock, self).__init__()
 49 |         if inplanes % scales != 0:
 50 |             raise ValueError('Planes must be divisible by scales')
 51 |         if norm_layer is None:
 52 |             norm_layer = nn.BatchNorm2d
 53 |         bottleneck_planes = inplanes * t
 54 |         self.conv1 = conv1x1(inplanes, bottleneck_planes, stride)
 55 |         self.bn1 = norm_layer(bottleneck_planes)
 56 |         self.conv2 = nn.ModuleList([conv3x3(bottleneck_planes // scales, bottleneck_planes // scales,
 57 |                                             groups=(bottleneck_planes // scales),dilation=dilat[i],
 58 |                                             padding=1*dilat[i]) for i in range(scales)])
 59 |         self.bn2 = nn.ModuleList([norm_layer(bottleneck_planes // scales) for _ in range(scales)])
 60 |         self.conv3 = conv1x1(bottleneck_planes, outplanes)
 61 |         self.bn3 = norm_layer(outplanes)
 62 |         self.relu = nn.ReLU(inplace=True)
 63 |         self.se = SEModule(outplanes) if se else None
 64 |         self.downsample = downsample
 65 |         self.stride = stride
 66 |         self.scales = scales
 67 | 
 68 |     def forward(self, x):
 69 |         identity = x
 70 | 
 71 |         out = self.conv1(x)
 72 |         out = self.bn1(out)
 73 |         out = self.relu(out)
 74 | 
 75 |         xs = torch.chunk(out, self.scales, 1)
 76 |         ys = []
 77 |         for s in range(self.scales):
 78 |             if s == 0:
 79 |                 ys.append(self.relu(self.bn2[s](self.conv2[s](xs[s]))))
 80 |             else:
 81 |                 ys.append(self.relu(self.bn2[s](self.conv2[s](xs[s] + ys[-1]))))
 82 |         out = torch.cat(ys, 1)
 83 | 
 84 |         out = self.conv3(out)
 85 |         out = self.bn3(out)
 86 | 
 87 |         if self.se is not None:
 88 |             out = self.se(out)
 89 | 
 90 |         if self.downsample is not None:
 91 |             identity = self.downsample(identity)
 92 | 
 93 |         out += identity
 94 |         out = self.relu(out)
 95 | 
 96 |         return out
 97 | 
 98 | 
 99 | 
100 | class MEUModule(nn.Module):
101 |     def __init__(self, channels_high, channels_low, channel_out):
102 |         super(MEUModule, self).__init__()
103 | 
104 |         self.conv1x1_low = nn.Conv2d(channels_low, channel_out, kernel_size=1, bias=False)
105 |         self.bn_low = nn.BatchNorm2d(channel_out)
106 |         self.sa_conv = nn.Conv2d(1, 1, kernel_size=1, bias=False)
107 | 
108 |         self.conv1x1_high = nn.Conv2d(channels_high, channel_out, kernel_size=1, bias=False)
109 |         self.bn_high = nn.BatchNorm2d(channel_out)
110 |         self.avg_pool = nn.AdaptiveAvgPool2d(1)
111 |         self.ca_conv = nn.Conv2d(channel_out, channel_out, kernel_size=1, bias=False)
112 | 
113 |         self.sa_sigmoid = nn.Sigmoid()
114 |         self.ca_sigmoid = nn.Sigmoid()
115 |         self.relu = nn.ReLU(inplace=True)
116 | 
117 |     def forward(self, fms_high, fms_low):
118 |         """
119 |         :param fms_high:  High level Feature map. Tensor.
120 |         :param fms_low: Low level Feature map. Tensor.
121 |         """
122 |         _, _, h, w = fms_low.shape
123 | 
124 |         #
125 |         fms_low = self.conv1x1_low(fms_low)
126 |         fms_low= self.bn_low(fms_low)
127 |         sa_avg_out = self.sa_sigmoid(self.sa_conv(torch.mean(fms_low, dim=1, keepdim=True)))
128 | 
129 |         #
130 |         fms_high = self.conv1x1_high(fms_high)
131 |         fms_high = self.bn_high(fms_high)
132 |         ca_avg_out = self.ca_sigmoid(self.relu(self.ca_conv(self.avg_pool(fms_high))))
133 | 
134 |         #
135 |         fms_high_up = F.interpolate(fms_high, size=(h,w), mode='bilinear', align_corners=True)
136 |         fms_sa_att = sa_avg_out * fms_high_up
137 |         #
138 |         fms_ca_att = ca_avg_out * fms_low
139 | 
140 |         out = fms_ca_att + fms_sa_att
141 | 
142 |         return out
143 | 
144 | 
145 | class FPENet(nn.Module):
146 |     def __init__(self, classes=19, zero_init_residual=False,
147 |                  width=16, scales=4, se=False, norm_layer=None):
148 |         super(FPENet, self).__init__()
149 |         if norm_layer is None:
150 |             norm_layer = nn.BatchNorm2d
151 |         outplanes = [int(width * 2 ** i) for i in range(3)] # planes=[16,32,64]
152 | 
153 |         self.block_num = [1,3,9]
154 |         self.dilation = [1,2,4,8]
155 | 
156 |         self.inplanes = outplanes[0]
157 |         self.conv1 = nn.Conv2d(3, outplanes[0], kernel_size=3, stride=2, padding=1,bias=False)
158 |         self.bn1 = norm_layer(outplanes[0])
159 |         self.relu = nn.ReLU(inplace=True)
160 |         self.layer1 = self._make_layer(FPEBlock, outplanes[0], self.block_num[0], dilation=self.dilation,
161 |                                        stride=1, t=1, scales=scales, se=se, norm_layer=norm_layer)
162 |         self.layer2 = self._make_layer(FPEBlock, outplanes[1], self.block_num[1], dilation=self.dilation,
163 |                                        stride=2, t=4, scales=scales, se=se, norm_layer=norm_layer)
164 |         self.layer3 = self._make_layer(FPEBlock, outplanes[2], self.block_num[2], dilation=self.dilation,
165 |                                        stride=2, t=4, scales=scales, se=se, norm_layer=norm_layer)
166 |         self.meu1 = MEUModule(64,32,64)
167 |         self.meu2 = MEUModule(64,16,32)
168 | 
169 |         # Projection layer
170 |         self.project_layer = nn.Conv2d(32, classes, kernel_size = 1)
171 | 
172 |         for m in self.modules():
173 |             if isinstance(m, nn.Conv2d):
174 |                 nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
175 |             elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)):
176 |                 nn.init.constant_(m.weight, 1)
177 |                 nn.init.constant_(m.bias, 0)
178 | 
179 |         # Zero-initialize the last BN in each residual branch,
180 |         # so that the residual branch starts with zeros, and each residual block behaves like an identity.
181 |         # This improves the model by 0.2~0.3% according to https://arxiv.org/abs/1706.02677
182 |         if zero_init_residual:
183 |             for m in self.modules():
184 |                 if isinstance(m, FPEBlock):
185 |                     nn.init.constant_(m.bn3.weight, 0)
186 | 
187 |     def _make_layer(self, block, planes, blocks, dilation, stride=1, t=1, scales=4, se=False, norm_layer=None):
188 |         if norm_layer is None:
189 |             norm_layer = nn.BatchNorm2d
190 |         downsample = None
191 |         if stride != 1 or self.inplanes != planes:
192 |             downsample = nn.Sequential(
193 |                 conv1x1(self.inplanes, planes, stride),
194 |                 norm_layer(planes),
195 |             )
196 | 
197 |         layers = []
198 |         layers.append(block(self.inplanes, planes, dilat=dilation, downsample=downsample, stride=stride, t=t, scales=scales, se=se,
199 |                             norm_layer=norm_layer))
200 |         self.inplanes = planes
201 |         for _ in range(1, blocks):
202 |             layers.append(block(self.inplanes, planes, dilat=dilation, scales=scales, se=se, norm_layer=norm_layer))
203 | 
204 |         return nn.Sequential(*layers)
205 | 
206 |     def forward(self, x):
207 |         ## stage 1
208 |         x = self.conv1(x)
209 |         x = self.bn1(x)
210 |         x = self.relu(x)
211 |         x_1 = self.layer1(x)
212 | 
213 |         ## stage 2
214 |         x_2_0 = self.layer2[0](x_1)
215 |         x_2_1 = self.layer2[1](x_2_0)
216 |         x_2_2 = self.layer2[2](x_2_1)
217 |         x_2 = x_2_0 + x_2_2
218 | 
219 |         ## stage 3
220 |         x_3_0 = self.layer3[0](x_2)
221 |         x_3_1 = self.layer3[1](x_3_0)
222 |         x_3_2 = self.layer3[2](x_3_1)
223 |         x_3_3 = self.layer3[3](x_3_2)
224 |         x_3_4 = self.layer3[4](x_3_3)
225 |         x_3_5 = self.layer3[5](x_3_4)
226 |         x_3_6 = self.layer3[6](x_3_5)
227 |         x_3_7 = self.layer3[7](x_3_6)
228 |         x_3_8 = self.layer3[8](x_3_7)
229 |         x_3 = x_3_0 + x_3_8
230 | 
231 | 
232 | 
233 |         x2 = self.meu1(x_3, x_2)
234 | 
235 |         x1 = self.meu2(x2, x_1)
236 | 
237 |         output = self.project_layer(x1)
238 | 
239 |         # Bilinear interpolation x2
240 |         output = F.interpolate(output,scale_factor=2, mode = 'bilinear', align_corners=True)
241 | 
242 |         return output
243 | 
244 | 
245 | """print layers and params of network"""
246 | if __name__ == '__main__':
247 |     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
248 |     model = FPENet(classes=19).to(device)
249 |     summary(model,(3,512,1024))
250 | 
251 | 


--------------------------------------------------------------------------------
/model/LEDNet.py:
--------------------------------------------------------------------------------
  1 | ######################################################################################
  2 | #LEDNet: A Lightweight Encoder-Decoder Network for Real-Time Semantic Segmentation
  3 | #Paper-Link: https://arxiv.org/abs/1905.02423
  4 | ######################################################################################
  5 | 
  6 | 
  7 | import torch
  8 | import torch.nn as nn
  9 | import torch.nn.functional as F
 10 | from torchsummary import summary  
 11 | 
 12 | 
 13 | 
 14 | __all__ = ["LEDNet"]
 15 | 
 16 | def Split(x):
 17 |     c = int(x.size()[1])
 18 |     c1 = round(c * 0.5)
 19 |     x1 = x[:, :c1, :, :].contiguous()
 20 |     x2 = x[:, c1:, :, :].contiguous()
 21 |     return x1, x2 
 22 | 
 23 | 
 24 | def Merge(x1,x2):
 25 |     return torch.cat((x1,x2),1) 
 26 |     
 27 | 
 28 | def Channel_shuffle(x,groups):
 29 |     batchsize, num_channels, height, width = x.data.size()
 30 |     
 31 |     channels_per_group = num_channels // groups
 32 |     
 33 |     #reshape
 34 |     x = x.view(batchsize,groups,
 35 |         channels_per_group,height,width)
 36 |     
 37 |     x = torch.transpose(x,1,2).contiguous()
 38 |     
 39 |     #flatten
 40 |     x = x.view(batchsize,-1,height,width)
 41 |     
 42 |     return x
 43 | 
 44 | 
 45 | class PermutationBlock(nn.Module):
 46 |     def __init__(self, groups):
 47 |         super(PermutationBlock, self).__init__()
 48 |         self.groups = groups
 49 | 
 50 |     def forward(self, input):
 51 |         n, c, h, w = input.size()
 52 |         G = self.groups
 53 |         output = input.view(n, G, c // G, h, w).permute(0, 2, 1, 3, 4).contiguous().view(n, c, h, w)
 54 |         return output
 55 | 
 56 | 
 57 | 
 58 | class Conv2dBnRelu(nn.Module):
 59 |     def __init__(self,in_ch,out_ch,kernel_size=3,stride=1,padding=0,dilation=1,bias=True):
 60 |         super(Conv2dBnRelu,self).__init__()
 61 |         self.conv = nn.Sequential(
 62 |             nn.Conv2d(in_ch,out_ch,kernel_size,stride,padding,dilation=dilation,bias=bias),
 63 |             nn.BatchNorm2d(out_ch, eps=1e-3),
 64 |             nn.ReLU(inplace=True)
 65 |         )
 66 | 
 67 |     def forward(self, x):
 68 |         return self.conv(x)
 69 | 
 70 | 
 71 | class DownsamplerBlock(nn.Module):
 72 |     def __init__(self, ninput, noutput):
 73 |         super().__init__()
 74 | 
 75 |         self.conv = nn.Conv2d(ninput, noutput-ninput, (3, 3), stride=2, padding=1, bias=True)
 76 |         self.pool = nn.MaxPool2d(2, stride=2)
 77 |         self.bn = nn.BatchNorm2d(noutput, eps=1e-3)
 78 |         self.relu = nn.ReLU(inplace=True)
 79 | 
 80 |     def forward(self, input):
 81 |         x1 = self.pool(input)
 82 |         x2 = self.conv(input)
 83 | 
 84 |         diffY = x2.size()[2] - x1.size()[2]
 85 |         diffX = x2.size()[3] - x1.size()[3]
 86 | 
 87 |         x1 = F.pad(x1, [diffX // 2, diffX - diffX // 2,
 88 |                         diffY // 2, diffY - diffY // 2])
 89 | 
 90 |         output = torch.cat([x2, x1], 1)
 91 |         output = self.bn(output)
 92 |         output = self.relu(output)
 93 |         return output
 94 | 
 95 | 
 96 | 
 97 | # class Interpolate(nn.Module):
 98 | #     def __init__(self,size,mode):
 99 | #         super(Interpolate,self).__init__()
100 | #         self.size = size
101 | #         self.mode = mode
102 | #     def forward(self,x):
103 | #         x = F.interpolate(x,size=self.size,mode=self.mode,align_corners=True)
104 | #         return x
105 | 
106 |         
107 | 
108 | class SS_nbt_module_paper(nn.Module):
109 |     def __init__(self, chann, dropprob, dilated):        
110 |         super().__init__()
111 | 
112 |         oup_inc = chann//2
113 |         
114 |         #dw
115 |         self.conv3x1_1_l = nn.Conv2d(oup_inc, oup_inc, (3,1), stride=1, padding=(1,0), bias=True)
116 | 
117 |         self.conv1x3_1_l = nn.Conv2d(oup_inc, oup_inc, (1,3), stride=1, padding=(0,1), bias=True)
118 | 
119 |         self.bn1_l = nn.BatchNorm2d(oup_inc, eps=1e-03)
120 | 
121 |         self.conv3x1_2_l = nn.Conv2d(oup_inc, oup_inc, (3,1), stride=1, padding=(1*dilated,0), bias=True, dilation = (dilated,1))
122 | 
123 |         self.conv1x3_2_l = nn.Conv2d(oup_inc, oup_inc, (1,3), stride=1, padding=(0,1*dilated), bias=True, dilation = (1,dilated))
124 | 
125 |         self.bn2_l = nn.BatchNorm2d(oup_inc, eps=1e-03)
126 |         
127 |         #dw
128 |         self.conv3x1_1_r = nn.Conv2d(oup_inc, oup_inc, (3,1), stride=1, padding=(1,0), bias=True)
129 | 
130 |         self.conv1x3_1_r = nn.Conv2d(oup_inc, oup_inc, (1,3), stride=1, padding=(0,1), bias=True)
131 | 
132 |         self.bn1_r = nn.BatchNorm2d(oup_inc, eps=1e-03)
133 | 
134 |         self.conv3x1_2_r = nn.Conv2d(oup_inc, oup_inc, (3,1), stride=1, padding=(1*dilated,0), bias=True, dilation = (dilated,1))
135 | 
136 |         self.conv1x3_2_r = nn.Conv2d(oup_inc, oup_inc, (1,3), stride=1, padding=(0,1*dilated), bias=True, dilation = (1,dilated))
137 | 
138 |         self.bn2_r = nn.BatchNorm2d(oup_inc, eps=1e-03)       
139 |         
140 |         self.relu = nn.ReLU(inplace=True)
141 |         self.dropout = nn.Dropout2d(dropprob)
142 | 
143 |         # self.channel_shuffle = PermutationBlock(2)
144 |        
145 |     
146 |     def forward(self, x):
147 |     
148 |         residual = x
149 |     
150 |         x1, x2 = Split(x)
151 |     
152 |         output1 = self.conv3x1_1_l(x1)
153 |         output1 = self.relu(output1)
154 |         output1 = self.conv1x3_1_l(output1)
155 |         output1 = self.bn1_l(output1)
156 |         output1_mid = self.relu(output1)
157 | 
158 |         output2 = self.conv1x3_1_r(x2)
159 |         output2 = self.relu(output2)
160 |         output2 = self.conv3x1_1_r(output2)
161 |         output2 = self.bn1_r(output2)
162 |         output2_mid = self.relu(output2)
163 | 
164 |         output1 = self.conv3x1_2_l(output1_mid)
165 |         output1 = self.relu(output1)
166 |         output1 = self.conv1x3_2_l(output1)
167 |         output1 = self.bn2_l(output1)
168 |       
169 |         output2 = self.conv1x3_2_r(output2_mid)
170 |         output2 = self.relu(output2)
171 |         output2 = self.conv3x1_2_r(output2)
172 |         output2 = self.bn2_r(output2)
173 | 
174 |         if (self.dropout.p != 0):
175 |             output1 = self.dropout(output1)
176 |             output2 = self.dropout(output2)
177 | 
178 |         out = Merge(output1, output2)
179 |         
180 |         out = F.relu(residual + out)
181 | 
182 |         # out = self.channel_shuffle(out)   ### channel shuffle
183 |         out = Channel_shuffle(out,2)   ### channel shuffle
184 | 
185 |         return out
186 | 
187 |         # return    ### channel shuffle
188 | 
189 | 
190 | class APNModule(nn.Module):
191 |     def __init__(self, in_ch, out_ch):
192 |         super(APNModule, self).__init__()
193 |         # global pooling branch
194 |         self.branch1 = nn.Sequential(
195 |                 nn.AdaptiveAvgPool2d(1),
196 |                 Conv2dBnRelu(in_ch, out_ch, kernel_size=1, stride=1, padding=0)
197 |         )
198 | 
199 |         # midddle branch
200 |         self.mid = nn.Sequential(
201 |             Conv2dBnRelu(in_ch, out_ch, kernel_size=1, stride=1, padding=0)
202 |         )
203 | 
204 | 
205 |         self.down1 = nn.Sequential(
206 |             nn.Conv2d(in_ch,1,kernel_size=(7,1),stride=(2,1),padding=(3,0),bias=True),
207 |             nn.Conv2d(1,1,kernel_size=(1,7),stride=(1,2),padding=(0,3),bias=True),
208 |             nn.BatchNorm2d(1, eps=1e-03),
209 |             nn.ReLU(inplace=True)
210 |         )
211 | 
212 | 
213 |         self.down2 = nn.Sequential(
214 |             nn.Conv2d(1,1,kernel_size=(5,1),stride=(2,1),padding=(2,0),bias=True),
215 |             nn.Conv2d(1,1,kernel_size=(1,5),stride=(1,2),padding=(0,2),bias=True),
216 |             nn.BatchNorm2d(1, eps=1e-03),
217 |             nn.ReLU(inplace=True)
218 |         )
219 | 
220 |         self.down3 = nn.Sequential(
221 |             nn.Conv2d(1,1,kernel_size=(3,1),stride=(2,1),padding=(1,0),bias=True),
222 |             nn.Conv2d(1,1,kernel_size=(1,3),stride=(1,2),padding=(0,1),bias=True),
223 |             nn.BatchNorm2d(1, eps=1e-03),
224 |             nn.ReLU(inplace=True),
225 |             #
226 |             nn.Conv2d(1,1,kernel_size=(3,1),stride=1,padding=(1,0),bias=True),
227 |             nn.Conv2d(1,1,kernel_size=(1,3),stride=1,padding=(0,1),bias=True),
228 |             nn.BatchNorm2d(1, eps=1e-03),
229 |             nn.ReLU(inplace=True)
230 |         )
231 |         self.conv2 = nn.Sequential(
232 |             nn.Conv2d(1,1,kernel_size=(5,1),stride=1,padding=(2,0),bias=True),
233 |             nn.Conv2d(1,1,kernel_size=(1,5),stride=1,padding=(0,2),bias=True),
234 |             nn.BatchNorm2d(1, eps=1e-03),
235 |             nn.ReLU(inplace=True)
236 |         )
237 |         self.conv1 = nn.Sequential(
238 |             nn.Conv2d(1,1,kernel_size=(7,1),stride=1,padding=(3,0),bias=True),
239 |             nn.Conv2d(1,1,kernel_size=(1,7),stride=1,padding=(0,3),bias=True),
240 |             nn.BatchNorm2d(1, eps=1e-03),
241 |             nn.ReLU(inplace=True)
242 |         )
243 |     def forward(self, x):
244 | 
245 |         h,w = x.size()[2:]
246 | 
247 |         b1 = self.branch1(x)
248 |         b1= F.interpolate(b1, size=(h, w), mode="bilinear", align_corners=True)
249 | 
250 |         mid = self.mid(x)
251 | 
252 |         x1 = self.down1(x)
253 |         x2 = self.down2(x1)
254 |         x3 = self.down3(x2)
255 |         x3= F.interpolate(x3, size=((h+3) // 4, (w+3) // 4), mode="bilinear", align_corners=True)
256 | 
257 |         x2 = self.conv2(x2)
258 |         x = x2 + x3
259 |         x= F.interpolate(x, size=((h+1) // 2, (w+1) // 2), mode="bilinear", align_corners=True)
260 | 
261 | 
262 |         x1 = self.conv1(x1)
263 |         x = x + x1
264 |         x= F.interpolate(x, size=(h, w), mode="bilinear", align_corners=True)
265 | 
266 |         x = torch.mul(x, mid)
267 | 
268 |         x = x + b1
269 | 
270 |         return x
271 | 
272 | 
273 | class LEDNet(nn.Module):
274 |     def __init__(self, classes):
275 |         super().__init__()
276 |         self.initial_block = DownsamplerBlock(3,32)
277 |         
278 |         self.layers = nn.ModuleList()
279 | 
280 |         for x in range(0, 3):   
281 |            self.layers.append(SS_nbt_module_paper(32, 0.03, 1)) 
282 |         
283 | 
284 |         self.layers.append(DownsamplerBlock(32,64))
285 |         
286 | 
287 |         for x in range(0, 2):   
288 |            self.layers.append(SS_nbt_module_paper(64, 0.03, 1)) 
289 |   
290 |         self.layers.append(DownsamplerBlock(64,128))
291 | 
292 |         for x in range(0, 1):    
293 |             self.layers.append(SS_nbt_module_paper(128, 0.3, 1))
294 |             self.layers.append(SS_nbt_module_paper(128, 0.3, 2))
295 |             self.layers.append(SS_nbt_module_paper(128, 0.3, 5))
296 |             self.layers.append(SS_nbt_module_paper(128, 0.3, 9))
297 |             
298 |         for x in range(0, 1):    
299 |             self.layers.append(SS_nbt_module_paper(128, 0.3, 2))
300 |             self.layers.append(SS_nbt_module_paper(128, 0.3, 5))
301 |             self.layers.append(SS_nbt_module_paper(128, 0.3, 9))
302 |             self.layers.append(SS_nbt_module_paper(128, 0.3, 17))
303 |                     
304 |         self.apn = APNModule(in_ch=128,out_ch=classes)
305 | 
306 |         #self.output_conv = nn.ConvTranspose2d(128, num_classes, kernel_size=4, stride=2, padding=1, output_padding=0, bias=True)
307 |         #self.output_conv = nn.ConvTranspose2d(128, num_classes, kernel_size=3, stride=2, padding=1, output_padding=1, bias=True)
308 |         #self.output_conv = nn.ConvTranspose2d(128, num_classes, kernel_size=2, stride=2, padding=0, output_padding=0, bias=True)
309 | 
310 |         # self.output_conv = nn.Conv2d(128, num_classes, 1, stride=1, padding=0, bias=True)
311 | 
312 |     def forward(self, input):
313 |         
314 |         output = self.initial_block(input)
315 | 
316 |         for layer in self.layers:
317 |             output = layer(output)
318 |             
319 |         output = self.apn(output)
320 |         out = F.interpolate(output, input.size()[2:], mode="bilinear", align_corners=True)
321 |         # print(out.shape)
322 | 
323 |         return out
324 | 
325 |          
326 | 
327 | """print layers and params of network"""
328 | if __name__ == '__main__':
329 |     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
330 |     model = LEDNet(classes=19).to(device)
331 |     summary(model,(3,360,480))
332 | 


--------------------------------------------------------------------------------
/utils/losses/loss.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import numpy as np
  4 | import torch.nn.functional as F
  5 | from utils.losses.lovasz_losses import lovasz_softmax
  6 | from torch.nn.modules.loss import _Loss, _WeightedLoss
  7 | from torch.nn import NLLLoss2d
  8 | 
  9 | 
 10 | __all__ = ["CrossEntropyLoss2d", "CrossEntropyLoss2dLabelSmooth",
 11 |            "FocalLoss2d", "LDAMLoss", "ProbOhemCrossEntropy2d",
 12 |            "LovaszSoftmax"]
 13 | 
 14 | 
 15 | class CrossEntropyLoss2d(_WeightedLoss):
 16 |     """
 17 |     Standard pytorch weighted nn.CrossEntropyLoss
 18 |     """
 19 | 
 20 |     def __init__(self, weight=None, ignore_label=255, reduction='mean'):
 21 |         super(CrossEntropyLoss2d, self).__init__()
 22 | 
 23 |         self.nll_loss = nn.CrossEntropyLoss(weight, ignore_index=ignore_label, reduction=reduction)
 24 | 
 25 |     def forward(self, output, target):
 26 |         """
 27 |         Forward pass
 28 |         :param output: torch.tensor (NxC)
 29 |         :param target: torch.tensor (N)
 30 |         :return: scalar
 31 |         """
 32 |         return self.nll_loss(output, target)
 33 | 
 34 | 
 35 | # class CrossEntropyLoss2d(nn.Module):
 36 | #     '''
 37 | #     This file defines a cross entropy loss for 2D images
 38 | #     '''
 39 | #
 40 | #     def __init__(self, weight=None, ignore_label=255):
 41 | #         '''
 42 | #         :param weight: 1D weight vector to deal with the class-imbalance
 43 | #         Obtaining log-probabilities in a neural network is easily achieved by adding a LogSoftmax layer in the last layer of your network.
 44 | #         You may use CrossEntropyLoss instead, if you prefer not to add an extra layer.
 45 | #         '''
 46 | #         super().__init__()
 47 | #
 48 | #         # self.loss = nn.NLLLoss2d(weight, ignore_index=255)
 49 | #         self.loss = nn.NLLLoss(weight, ignore_index=ignore_label)
 50 | #
 51 | #     def forward(self, outputs, targets):
 52 | #         return self.loss(F.log_softmax(outputs, dim=1), targets)
 53 | 
 54 | 
 55 | 
 56 | class CrossEntropyLoss2dLabelSmooth(_WeightedLoss):
 57 |     """
 58 |     Refer from https://arxiv.org/pdf/1512.00567.pdf
 59 |     :param target: N,
 60 |     :param n_classes: int
 61 |     :param eta: float
 62 |     :return:
 63 |         N x C onehot smoothed vector
 64 |     """
 65 | 
 66 |     def __init__(self, weight=None, ignore_label=255, epsilon=0.1, reduction='mean'):
 67 |         super(CrossEntropyLoss2dLabelSmooth, self).__init__()
 68 |         self.epsilon = epsilon
 69 |         self.nll_loss = nn.CrossEntropyLoss(weight, ignore_index=ignore_label, reduction=reduction)
 70 | 
 71 |     def forward(self, output, target):
 72 |         """
 73 |         Forward pass
 74 |         :param output: torch.tensor (NxC)
 75 |         :param target: torch.tensor (N)
 76 |         :return: scalar
 77 |         """
 78 |         n_classes = output.size(1)
 79 |         # batchsize, num_class = input.size()
 80 |         # log_probs = F.log_softmax(inputs, dim=1)
 81 |         targets = torch.zeros_like(output).scatter_(1, target.unsqueeze(1), 1)
 82 |         targets = (1 - self.epsilon) * targets + self.epsilon / n_classes
 83 | 
 84 |         return self.nll_loss(output, targets)
 85 | 
 86 | 
 87 | """
 88 | https://arxiv.org/abs/1708.02002
 89 | # Credit to https://github.com/clcarwin/focal_loss_pytorch
 90 | """
 91 | class FocalLoss2d(nn.Module):
 92 |     def __init__(self, alpha=0.5, gamma=2, weight=None, ignore_index=255, size_average=True):
 93 |         super().__init__()
 94 |         self.alpha = alpha
 95 |         self.gamma = gamma
 96 |         self.weight = weight
 97 |         self.ignore_index = ignore_index
 98 |         self.size_average = size_average
 99 |         self.ce_fn = nn.CrossEntropyLoss(weight=self.weight, ignore_index=self.ignore_index)
100 | 
101 |     def forward(self, output, target):
102 | 
103 |         if output.dim()>2:
104 |             output = output.contiguous().view(output.size(0), output.size(1), -1)
105 |             output = output.transpose(1,2)
106 |             output = output.contiguous().view(-1, output.size(2)).squeeze()
107 |         if target.dim()==4:
108 |             target = target.contiguous().view(target.size(0), target.size(1), -1)
109 |             target = target.transpose(1,2)
110 |             target = target.contiguous().view(-1, target.size(2)).squeeze()
111 |         elif target.dim()==3:
112 |             target = target.view(-1)
113 |         else:
114 |             target = target.view(-1, 1)
115 | 
116 |         logpt = self.ce_fn(output, target)
117 |         pt = torch.exp(-logpt)
118 |         loss = ((1-pt) ** self.gamma) * self.alpha * logpt
119 |         if self.size_average:
120 |             return loss.mean()
121 |         else:
122 |             return loss.sum()
123 | 
124 | 
125 | """
126 | https://arxiv.org/pdf/1906.07413.pdf
127 | """
128 | class LDAMLoss(nn.Module):
129 | 
130 |     def __init__(self, cls_num_list, max_m=0.5, weight=None, s=30):
131 |         super(LDAMLoss, self).__init__()
132 |         m_list = 1.0 / np.sqrt(np.sqrt(cls_num_list))
133 |         m_list = m_list * (max_m / np.max(m_list))
134 |         m_list = torch.cuda.FloatTensor(m_list)
135 |         self.m_list = m_list
136 |         assert s > 0
137 |         self.s = s
138 |         self.weight = weight
139 | 
140 |     def forward(self, x, target):
141 |         index = torch.zeros_like(x, dtype=torch.uint8)
142 |         index.scatter_(1, target.data.view(-1, 1), 1)
143 | 
144 |         index_float = index.type(torch.cuda.FloatTensor)
145 |         batch_m = torch.matmul(self.m_list[None, :], index_float.transpose(0, 1))
146 |         batch_m = batch_m.view((-1, 1))
147 |         x_m = x - batch_m
148 | 
149 |         output = torch.where(index, x_m, x)
150 |         return F.cross_entropy(self.s * output, target, weight=self.weight)
151 | 
152 | 
153 | 
154 | 
155 | # Adapted from OCNet Repository (https://github.com/PkuRainBow/OCNet)
156 | class ProbOhemCrossEntropy2d(nn.Module):
157 |     def __init__(self, ignore_label=255, reduction='mean', thresh=0.6, min_kept=256,
158 |                  down_ratio=1, use_weight=False):
159 |         super(ProbOhemCrossEntropy2d, self).__init__()
160 |         self.ignore_label = ignore_label
161 |         self.thresh = float(thresh)
162 |         self.min_kept = int(min_kept)
163 |         self.down_ratio = down_ratio
164 |         if use_weight:
165 |             print("w/ class balance")
166 |             weight = torch.FloatTensor(
167 |                 [0.8373, 0.918, 0.866, 1.0345, 1.0166, 0.9969, 0.9754, 1.0489,
168 |                  0.8786, 1.0023, 0.9539, 0.9843, 1.1116, 0.9037, 1.0865, 1.0955,
169 |                  1.0865, 1.1529, 1.0507])
170 |             self.criterion = nn.CrossEntropyLoss(reduction=reduction,
171 |                                                        weight=weight,
172 |                                                        ignore_index=ignore_label)
173 |         else:
174 |             print("w/o class balance")
175 |             self.criterion = nn.CrossEntropyLoss(reduction=reduction,
176 |                                                        ignore_index=ignore_label)
177 | 
178 |     def forward(self, pred, target):
179 |         b, c, h, w = pred.size()
180 |         target = target.view(-1)
181 |         valid_mask = target.ne(self.ignore_label)
182 |         target = target * valid_mask.long()
183 |         num_valid = valid_mask.sum()
184 | 
185 |         prob = F.softmax(pred, dim=1)
186 |         prob = (prob.transpose(0, 1)).reshape(c, -1)
187 | 
188 |         if self.min_kept > num_valid:
189 |             print('Labels: {}'.format(num_valid))
190 |             pass
191 |         elif num_valid > 0:
192 |             prob = prob.masked_fill_(1 - valid_mask, 1)     #
193 |             mask_prob = prob[
194 |                 target, torch.arange(len(target), dtype=torch.long)]
195 |             threshold = self.thresh
196 |             if self.min_kept > 0:
197 |                 index = mask_prob.argsort()
198 |                 threshold_index = index[min(len(index), self.min_kept) - 1]
199 |                 if mask_prob[threshold_index] > self.thresh:
200 |                     threshold = mask_prob[threshold_index]
201 |                 kept_mask = mask_prob.le(threshold)
202 |                 target = target * kept_mask.long()
203 |                 valid_mask = valid_mask * kept_mask
204 |                 print('Valid Mask: {}'.format(valid_mask.sum()))
205 | 
206 |         target = target.masked_fill_(1 - valid_mask, self.ignore_label)
207 |         target = target.view(b, h, w)
208 | 
209 |         return self.criterion(pred, target)
210 | 
211 | 
212 | # ==========================================================================================================================
213 | # ==========================================================================================================================
214 | # class-balanced loss
215 | class CrossEntropy2d(nn.Module):
216 | 
217 |     def __init__(self, size_average=True, ignore_label=255, use_weight=True):
218 |         super(CrossEntropy2d, self).__init__()
219 |         self.size_average = size_average
220 |         self.ignore_label = ignore_label
221 |         self.use_weight   = use_weight
222 |         # if self.use_weight:
223 |         #     self.weight = torch.FloatTensor(
224 |         #         [0.8373, 0.918, 0.866, 1.0345, 1.0166, 0.9969, 0.9754, 1.0489,
225 |         #          0.8786, 1.0023, 0.9539, 0.9843, 1.1116, 0.9037, 1.0865, 1.0955,
226 |         #          1.0865, 1.1529, 1.0507])
227 |         #     print('CrossEntropy2d weights : {}'.format(self.weight))
228 |         # else:
229 |         #     self.weight = None
230 | 
231 | 
232 |     def forward(self, predict, target, weight=None):
233 | 
234 |         """
235 |             Args:
236 |                 predict:(n, c, h, w)
237 |                 target:(n, h, w)
238 |                 weight (Tensor, optional): a manual rescaling weight given to each class.
239 |                                            If given, has to be a Tensor of size "nclasses"
240 |         """
241 |         # Variable(torch.randn(2,10)
242 |         if self.use_weight:
243 |             print('target size {}'.format(target.shape))
244 |             freq = np.zeros(19)
245 |             for k in range(19):
246 |                 mask = (target[:, :, :] == k)
247 |                 freq[k] = torch.sum(mask)
248 |                 print('{}th frequency {}'.format(k, freq[k]))
249 |             weight = freq / np.sum(freq)
250 |             print(weight)
251 |             self.weight = torch.FloatTensor(weight)
252 |             print('Online class weight: {}'.format(self.weight))
253 |         else:
254 |             self.weight = None
255 | 
256 | 
257 |         criterion = nn.CrossEntropyLoss(weight=self.weight, ignore_index=self.ignore_label)
258 |         # torch.FloatTensor([2.87, 13.19, 5.11, 37.98, 35.14, 30.9, 26.23, 40.24, 6.66, 32.07, 21.08, 28.14, 46.01, 10.35, 44.25, 44.9, 44.25, 47.87, 40.39])
259 |         #weight = Variable(torch.FloatTensor([1, 1.49, 1.28, 1.62, 1.62, 1.62, 1.64, 1.62, 1.49, 1.62, 1.43, 1.62, 1.64, 1.43, 1.64, 1.64, 1.64, 1.64, 1.62]), requires_grad=False).cuda()
260 |         assert not target.requires_grad
261 |         assert predict.dim() == 4
262 |         assert target.dim() == 3
263 |         assert predict.size(0) == target.size(0), "{0} vs {1} ".format(predict.size(0), target.size(0))
264 |         assert predict.size(2) == target.size(1), "{0} vs {1} ".format(predict.size(2), target.size(1))
265 |         assert predict.size(3) == target.size(2), "{0} vs {1} ".format(predict.size(3), target.size(3))
266 |         n, c, h, w = predict.size()
267 |         target_mask = (target >= 0) * (target != self.ignore_label)
268 |         target = target[target_mask]
269 |         if not target.data.dim():
270 |             return torch.zeros(1)
271 |         predict = predict.transpose(1, 2).transpose(2, 3).contiguous()
272 |         predict = predict[target_mask.view(n, h, w, 1).repeat(1, 1, 1, c)].view(-1, c)
273 |         loss = criterion(predict, target)
274 |         return loss
275 | # ==========================================================================================================================
276 | # ==========================================================================================================================
277 | 
278 | 
279 | 
280 | 
281 | class LovaszSoftmax(nn.Module):
282 |     def __init__(self, classes='present', per_image=False, ignore_index=255):
283 |         super(LovaszSoftmax, self).__init__()
284 |         self.smooth = classes
285 |         self.per_image = per_image
286 |         self.ignore_index = ignore_index
287 | 
288 |     def forward(self, output, target):
289 |         logits = F.softmax(output, dim=1)
290 |         loss = lovasz_softmax(logits, target, ignore=self.ignore_index)
291 |         return loss
292 | 


--------------------------------------------------------------------------------