├── dataset ├── camvid │ ├── test │ │ └── .gitkeep │ ├── val │ │ └── .gitkeep │ ├── testannot │ │ └── .gitkeep │ ├── train │ │ └── .gitkeep │ ├── valannot │ │ └── .gitkeep │ ├── trainannot │ │ └── .gitkeep │ └── camvid_val_list.txt ├── cityscape_scripts │ ├── __init__.py │ ├── download_cityscapes.sh │ ├── print_utils.py │ └── generate_mappings.py ├── cityscapes │ ├── gtFine │ │ ├── val │ │ │ └── .gitkeep │ │ ├── test │ │ │ └── .gitkeep │ │ └── train │ │ │ └── .gitkeep │ ├── gtCoarse │ │ ├── train │ │ │ └── .gitkeep │ │ ├── val │ │ │ └── .gitkeep │ │ └── train_extra │ │ │ └── .gitkeep │ └── leftImg8bit │ │ ├── val │ │ └── .gitkeep │ │ ├── test │ │ └── .gitkeep │ │ └── train │ │ └── .gitkeep ├── __init__.py ├── inform │ ├── camvid_inform.pkl │ └── cityscapes_inform.pkl ├── create_dataset_list.py └── README.md ├── utils ├── losses │ ├── __init__.py │ ├── lovasz_losses.py │ └── loss.py ├── metric │ ├── __init__.py │ └── metric.py ├── scheduler │ ├── __init__.py │ └── lr_scheduler.py ├── optim │ ├── __init__.py │ ├── Lookahead.py │ ├── RAdam.py │ ├── AdamW.py │ └── Ranger.py ├── convert_state.py ├── debug.py ├── utils.py ├── activations.py └── colorize_mask.py ├── tools ├── flops_counter │ ├── ptflops │ │ └── __init__.py │ ├── CHANGELOG.md │ ├── setup.py │ ├── LICENSE │ ├── sample.py │ ├── .gitignore │ ├── ENet_Flops_test.py │ └── README.md ├── trainID2labelID.py └── fps_test │ └── eval_forward_time.py ├── docs ├── image-1.png ├── requirements.yml └── README.md ├── .gitignore ├── requirements.txt ├── LICENSE ├── model ├── ESPNet_v2 │ ├── SegmentationModel.py │ └── cnn_utils.py ├── UNet.py ├── ERFNet.py ├── EDANet.py ├── DABNet.py ├── ESNet.py ├── SegNet.py ├── SQNet.py ├── ContextNet.py ├── LinkNet.py ├── FastSCNN.py ├── FPENet.py └── LEDNet.py ├── predict.py ├── test.py └── README.md /dataset/camvid/test/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /dataset/camvid/val/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /dataset/camvid/testannot/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /dataset/camvid/train/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /dataset/camvid/valannot/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /dataset/camvid/trainannot/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /dataset/cityscape_scripts/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /dataset/cityscapes/gtFine/val/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /dataset/cityscapes/gtCoarse/train/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /dataset/cityscapes/gtCoarse/val/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /dataset/cityscapes/gtFine/test/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /dataset/cityscapes/gtFine/train/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /dataset/cityscapes/leftImg8bit/val/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /dataset/cityscapes/gtCoarse/train_extra/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /dataset/cityscapes/leftImg8bit/test/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /dataset/cityscapes/leftImg8bit/train/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /utils/losses/__init__.py: -------------------------------------------------------------------------------- 1 | from .loss import * 2 | -------------------------------------------------------------------------------- /utils/metric/__init__.py: -------------------------------------------------------------------------------- 1 | from .metric import * 2 | -------------------------------------------------------------------------------- /utils/scheduler/__init__.py: -------------------------------------------------------------------------------- 1 | from .lr_scheduler import * 2 | -------------------------------------------------------------------------------- /dataset/__init__.py: -------------------------------------------------------------------------------- 1 | from .camvid import * 2 | from .cityscapes import * 3 | -------------------------------------------------------------------------------- /tools/flops_counter/ptflops/__init__.py: -------------------------------------------------------------------------------- 1 | from .flops_counter import get_model_complexity_info 2 | -------------------------------------------------------------------------------- /docs/image-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiaoyufenfei/Efficient-Segmentation-Networks/HEAD/docs/image-1.png -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__/ 2 | *.py[cod] 3 | *.pyc 4 | *.pyo 5 | *.pth 6 | .idea/ 7 | result/ 8 | server/ 9 | checkpoint/ -------------------------------------------------------------------------------- /utils/optim/__init__.py: -------------------------------------------------------------------------------- 1 | from .RAdam import * 2 | from .AdamW import * 3 | from .Lookahead import * 4 | from .Ranger import * -------------------------------------------------------------------------------- /dataset/inform/camvid_inform.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiaoyufenfei/Efficient-Segmentation-Networks/HEAD/dataset/inform/camvid_inform.pkl -------------------------------------------------------------------------------- /dataset/inform/cityscapes_inform.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiaoyufenfei/Efficient-Segmentation-Networks/HEAD/dataset/inform/cityscapes_inform.pkl -------------------------------------------------------------------------------- /docs/requirements.yml: -------------------------------------------------------------------------------- 1 | name: seg_requirements 2 | dependencies: 3 | - python3 4 | - cuda 9.0 5 | - pip: 6 | - Image 7 | - tqdm 8 | - torch 1.1.0 9 | - torchvision 0.3.0 10 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | cycler==0.10.0 2 | kiwisolver==1.1.0 3 | matplotlib==3.1.1 4 | numpy==1.15.0 5 | Pillow>=6.2.2 6 | pyparsing==2.4.2 7 | python-dateutil==2.8.1 8 | pytz==2018.4 9 | six==1.12.0 10 | torch==1.1.0 11 | torchvision==0.3.0 12 | torchsummary==1.5.1 13 | -------------------------------------------------------------------------------- /tools/flops_counter/CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # ptflops versions log 2 | 3 | ## v 0.3 4 | - Add 1d operators: batch norm, poolings, convolution. 5 | - Add ability to output extended report to any output stream. 6 | 7 | ## v 0.2 8 | - Add new operations: Conv3d, BatchNorm3d, MaxPool3d, AvgPool3d, ConvTranspose2d. 9 | - Add some results on widespread models to the README. 10 | - Minor bugfixes. 11 | 12 | ## v 0.1 13 | - Initial release with basic functionality 14 | -------------------------------------------------------------------------------- /utils/convert_state.py: -------------------------------------------------------------------------------- 1 | from collections import OrderedDict 2 | import os 3 | import numpy as np 4 | 5 | 6 | def convert_state_dict(state_dict): 7 | """ 8 | Converts a state dict saved from a dataParallel module to normal module state_dict inplace 9 | Args: 10 | state_dict is the loaded DataParallel model_state 11 | """ 12 | state_dict_new = OrderedDict() 13 | # print(type(state_dict)) 14 | for k, v in state_dict.items(): 15 | # print(k) 16 | name = k[7:] # remove the prefix module. 17 | # My heart is borken, the pytorch have no ability to do with the problem. 18 | state_dict_new[name] = v 19 | return state_dict_new 20 | -------------------------------------------------------------------------------- /utils/debug.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import traceback 3 | import pdb 4 | 5 | """ 6 | This module is for debugging without modifying scripts. 7 | 8 | By just adding `import debug` to a script which you want to debug, 9 | automatically pdb debugger starts at the point exception raised. 10 | After launching debugger, `from IPython import embed; embed()` enables us to run IPython. 11 | """ 12 | 13 | 14 | def info(exctype, value, tb): 15 | # we are in interactive mode or we don't have a tty-like 16 | # device, so we call the default hook 17 | if hasattr(sys, 'ps1') or not sys.stderr.isatty(): 18 | sys.__excepthook__(exctype, value, tb) 19 | else: 20 | traceback.print_exception(exctype, value, tb) 21 | pdb.post_mortem(tb) 22 | 23 | 24 | sys.excepthook = info 25 | -------------------------------------------------------------------------------- /tools/flops_counter/setup.py: -------------------------------------------------------------------------------- 1 | import os 2 | import shutil 3 | import sys 4 | from setuptools import setup, find_packages 5 | 6 | readme = open('README.md').read() 7 | 8 | VERSION = '0.3' 9 | 10 | requirements = [ 11 | 'torch', 12 | ] 13 | 14 | setup( 15 | # Metadata 16 | name='ptflops', 17 | version=VERSION, 18 | author='Vladislav Sovrasov', 19 | author_email='sovrasov.vlad@gmail.com', 20 | url='https://github.com/sovrasov/flops-counter.pytorch', 21 | description='Flops counter for convolutional networks in pytorch framework', 22 | long_description=readme, 23 | long_description_content_type='text/markdown', 24 | license='MIT', 25 | 26 | # Package info 27 | packages=find_packages(exclude=('*test*',)), 28 | 29 | # 30 | zip_safe=True, 31 | install_requires=requirements, 32 | 33 | # Classifiers 34 | classifiers=[ 35 | 'Programming Language :: Python :: 3', 36 | ], 37 | ) 38 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 Yu Wang 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /tools/flops_counter/LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018 Vladislav Sovrasov 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /dataset/cityscape_scripts/download_cityscapes.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | global_path='../../../vision_datasets' 3 | data_dir=$global_path'/cityscapes' 4 | 5 | mkdir -p $data_dir 6 | cd $data_dir 7 | 8 | # enter user details 9 | uname='' # 10 | pass='' 11 | 12 | wget --keep-session-cookies --save-cookies=cookies.txt --post-data 'username='$uname'&password='$pass'&submit=Login' https://www.cityscapes-dataset.com/login/ 13 | wget --load-cookies cookies.txt --content-disposition https://www.cityscapes-dataset.com/file-handling/?packageID=1 14 | wget --load-cookies cookies.txt --content-disposition https://www.cityscapes-dataset.com/file-handling/?packageID=3 15 | # Uncomment if you want to download coarse 16 | #wget --load-cookies cookies.txt --content-disposition https://www.cityscapes-dataset.com/file-handling/?packageID=4 17 | #wget --load-cookies cookies.txt --content-disposition https://www.cityscapes-dataset.com/file-handling/?packageID=2 18 | 19 | 20 | #unzip -q -o gtCoarse.zip 21 | unzip -q -o gtFine_trainvaltest.zip 22 | #unzip -q -o leftImg8bit_trainextra.zip 23 | unzip -q -o leftImg8bit_trainvaltest.zip 24 | 25 | #rm -rf gtCoarse.zip 26 | rm -rf gtFine_trainvaltest.zip 27 | #rm -rf leftImg8bit_trainextra.zip 28 | rm -rf leftImg8bit_trainvaltest.zip -------------------------------------------------------------------------------- /docs/README.md: -------------------------------------------------------------------------------- 1 | ### Model & Backbone 2 | 3 | | Model | Scratch | ResNet18 | ResNet50 | ResNet101 | 4 | | :--------: | :-----: | :------: | :------: | :-------: | 5 | | SQNet | ✓ | | | | 6 | | LinkNet | ✓ | | | | 7 | | SegNet | ✓ | | | | 8 | | UNet | ✓ | | | | 9 | | ENet | ✓ | | | | 10 | | ERFNet | ✓ | | | | 11 | | CGNet | ✓ | | | | 12 | | EDANet | ✓ | | | | 13 | | ESPNet | ✓ | | | | 14 | | ESNet | ✓ | | | | 15 | | ESPNetv2 | ✓ | | | | 16 | | LEDNet | ✓ | | | | 17 | | ContextNet | ✓ | | | | 18 | | Fast-SCNN | ✓ | | | | 19 | | DABNet | ✓ | | | | 20 | | FSSNet | ✓ | | | | 21 | | FPENet | ✓ | | | | 22 | | | | | | | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | -------------------------------------------------------------------------------- /dataset/create_dataset_list.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import os 3 | import glob 4 | root_path=os.path.expanduser('./cityscapes') 5 | image_path='leftImg8bit' 6 | annotation_path='gtFine' 7 | splits=['train','val','test'] 8 | 9 | #train glob images 2975 10 | #train glob annotations 2975 11 | #val glob images 500 12 | #val glob annotations 500 13 | #test glob images 1525 14 | #test glob annotations 1525 15 | 16 | for split in splits: 17 | glob_images=glob.glob(os.path.join(root_path,image_path,split,'*','*leftImg8bit.png')) 18 | glob_annotations=glob.glob(os.path.join(root_path,annotation_path,split,'*','*labelTrainIds.png')) 19 | print('%s glob images'%split,len(glob_images)) 20 | print('%s glob annotations'%split,len(glob_annotations)) 21 | 22 | write_file=open('./cityscapes/cityscapes_'+split+'_list.txt','w') 23 | for g_img in glob_images: 24 | #img_p: eg leftImg8bit/val/frankfurt/frankfurt_000001_083852_leftImg8bit.png 25 | #ann_p: eg gtFine/val/frankfurt/frankfurt_000001_083852_gtFine_labelTrainIds.png 26 | img_p=g_img.replace(root_path+'/','') 27 | #replace will not change img_p 28 | ann_p=img_p.replace('leftImg8bit/','gtFine/').replace('leftImg8bit.png','gtFine_labelTrainIds.png') 29 | assert os.path.join(root_path,img_p) in glob_images,'%s not exist'%img_p 30 | assert os.path.join(root_path,ann_p) in glob_annotations,'%s not exist'%ann_p 31 | write_file.write(img_p+' '+ann_p+'\n') 32 | write_file.close() 33 | -------------------------------------------------------------------------------- /tools/flops_counter/sample.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import argparse 3 | 4 | import torchvision.models as models 5 | import torch 6 | 7 | from ptflops import get_model_complexity_info 8 | 9 | pt_models = {'resnet18': models.resnet18, 'resnet50': models.resnet50, 10 | 'alexnet': models.alexnet, 11 | 'vgg16': models.vgg16, 12 | 'squeezenet': models.squeezenet1_0, 13 | 'densenet': models.densenet161, 14 | 'inception': models.inception_v3} 15 | 16 | if __name__ == '__main__': 17 | parser = argparse.ArgumentParser(description='ptflops sample script') 18 | parser.add_argument('--device', type=int, default=0, 19 | help='Device to store the model.') 20 | parser.add_argument('--model', choices=list(pt_models.keys()), 21 | type=str, default='resnet18') 22 | parser.add_argument('--result', type=str, default=None) 23 | args = parser.parse_args() 24 | 25 | if args.result is None: 26 | ost = sys.stdout 27 | else: 28 | ost = open(args.result, 'w') 29 | 30 | with torch.cuda.device(args.device): 31 | net = pt_models[args.model]().cuda() 32 | 33 | flops, params = get_model_complexity_info(net, (3, 224, 224), 34 | as_strings=True, 35 | print_per_layer_stat=True, 36 | ost=ost) 37 | print('Flops: ' + flops) 38 | print('Params: ' + params) 39 | -------------------------------------------------------------------------------- /dataset/README.md: -------------------------------------------------------------------------------- 1 | # Supported datasets 2 | 3 | - CamVid 4 | - CityScapes 5 | 6 | Note: When referring to the number of classes, the void/unlabeled class is excluded. 7 | 8 | ## CamVid Dataset 9 | 10 | The Cambridge-driving Labeled Video Database (CamVid) is a collection of over ten minutes of high-quality 30Hz footage with object class semantic labels at 1Hz and in part, 15Hz. Each pixel is associated with one of 32 classes. 11 | 12 | The CamVid dataset supported here is a 12 class version developed by the authors of SegNet. [Download link here](https://github.com/alexgkendall/SegNet-Tutorial/tree/master/CamVid). For actual training, an 11 class version is used - the "road marking" class is combined with the "road" class. 13 | 14 | More detailed information about the CamVid dataset can be found [here](http://mi.eng.cam.ac.uk/research/projects/VideoRec/CamVid/) and on the [SegNet GitHub repository](https://github.com/alexgkendall/SegNet-Tutorial). 15 | 16 | ## Cityscapes 17 | 18 | Cityscapes is a set of stereo video sequences recorded in streets from 50 different cities with 34 different classes. There are 5000 images with fine annotations and 20000 images coarsely annotated. 19 | 20 | The version supported here is the finely annotated one with 19 classes. 21 | 22 | For more detailed information see the official [website](https://www.cityscapes-dataset.com/) and [repository](https://github.com/mcordts/cityscapesScripts). 23 | 24 | The dataset can be downloaded from https://www.cityscapes-dataset.com/downloads/. At this time, a registration is required to download the data. -------------------------------------------------------------------------------- /tools/trainID2labelID.py: -------------------------------------------------------------------------------- 1 | # converting trainIDs to labelIDs for evaluating the test set segmenatation results of the cityscapes dataset 2 | 3 | import numpy as np 4 | import os 5 | from PIL import Image 6 | 7 | 8 | 9 | # index: trainId from 0 to 18, 19 semantic class val: labelIDs 10 | cityscapes_trainIds2labelIds = np.array([7, 8, 11, 12, 13, 17, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 31, 32, 33], 11 | dtype=np.uint8) 12 | 13 | 14 | def trainIDs2LabelID(trainID_png_dir, save_dir): 15 | print('save_dir: ', save_dir) 16 | if not os.path.exists(save_dir): 17 | os.makedirs(save_dir) 18 | png_list = os.listdir(trainID_png_dir) 19 | for index, png_filename in enumerate(png_list): 20 | # 21 | png_path = os.path.join(trainID_png_dir, png_filename) 22 | # print(png_path) 23 | print('processing(', index, '/', len(png_list), ') ....') 24 | image = Image.open(png_path) # image is a PIL #image 25 | pngdata = np.array(image) 26 | trainID = pngdata # model prediction 27 | row, col = pngdata.shape 28 | labelID = np.zeros((row, col), dtype=np.uint8) 29 | for i in range(row): 30 | for j in range(col): 31 | labelID[i][j] = cityscapes_trainIds2labelIds[trainID[i][j]] 32 | 33 | res_path = os.path.join(save_dir, png_filename) 34 | new_im = Image.fromarray(labelID) 35 | new_im.save(res_path) 36 | 37 | 38 | if __name__ == '__main__': 39 | trainID_png_dir = '../server/cityscapes/predict/ENet' 40 | save_dir = '../server/cityscapes/predict/cityscapes_submit/' 41 | trainIDs2LabelID(trainID_png_dir, save_dir) 42 | -------------------------------------------------------------------------------- /dataset/cityscape_scripts/print_utils.py: -------------------------------------------------------------------------------- 1 | 2 | import time 3 | 4 | text_colors = { 5 | 'logs': '\033[34m', # 033 is the escape code and 34 is the color code 6 | 'info': '\033[32m', 7 | 'warning': '\033[33m', 8 | 'error': '\033[31m', 9 | 'bold': '\033[1m', 10 | 'end_color': '\033[0m' 11 | } 12 | 13 | 14 | def get_curr_time_stamp(): 15 | return time.strftime("%Y-%m-%d %H:%M:%S") 16 | 17 | 18 | def print_error_message(message): 19 | time_stamp = get_curr_time_stamp() 20 | error_str = text_colors['error'] + text_colors['bold'] + 'ERROR ' + text_colors['end_color'] 21 | print('{} - {} - {}'.format(time_stamp, error_str, message)) 22 | print('{} - {} - {}'.format(time_stamp, error_str, 'Exiting!!!')) 23 | exit(-1) 24 | 25 | 26 | def print_log_message(message): 27 | time_stamp = get_curr_time_stamp() 28 | log_str = text_colors['logs'] + text_colors['bold'] + 'LOGS ' + text_colors['end_color'] 29 | print('{} - {} - {}'.format(time_stamp, log_str, message)) 30 | 31 | 32 | def print_warning_message(message): 33 | time_stamp = get_curr_time_stamp() 34 | warn_str = text_colors['warning'] + text_colors['bold'] + 'WARNING' + text_colors['end_color'] 35 | print('{} - {} - {}'.format(time_stamp, warn_str, message)) 36 | 37 | 38 | def print_info_message(message): 39 | time_stamp = get_curr_time_stamp() 40 | info_str = text_colors['info'] + text_colors['bold'] + 'INFO ' + text_colors['end_color'] 41 | print('{} - {} - {}'.format(time_stamp, info_str, message)) 42 | 43 | 44 | if __name__ == '__main__': 45 | print_log_message('Testing') 46 | print_warning_message('Testing') 47 | print_info_message('Testing') 48 | print_error_message('Testing') -------------------------------------------------------------------------------- /tools/flops_counter/.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | MANIFEST 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *.cover 47 | .hypothesis/ 48 | .pytest_cache/ 49 | 50 | # Translations 51 | *.mo 52 | *.pot 53 | 54 | # Django stuff: 55 | *.log 56 | local_settings.py 57 | db.sqlite3 58 | 59 | # Flask stuff: 60 | instance/ 61 | .webassets-cache 62 | 63 | # Scrapy stuff: 64 | .scrapy 65 | 66 | # Sphinx documentation 67 | docs/_build/ 68 | 69 | # PyBuilder 70 | target/ 71 | 72 | # Jupyter Notebook 73 | .ipynb_checkpoints 74 | 75 | # pyenv 76 | .python-version 77 | 78 | # celery beat schedule file 79 | celerybeat-schedule 80 | 81 | # SageMath parsed files 82 | *.sage.py 83 | 84 | # Environments 85 | .env 86 | .venv 87 | env/ 88 | venv/ 89 | ENV/ 90 | env.bak/ 91 | venv.bak/ 92 | 93 | # Spyder project settings 94 | .spyderproject 95 | .spyproject 96 | 97 | # Rope project settings 98 | .ropeproject 99 | 100 | # mkdocs documentation 101 | /site 102 | 103 | # mypy 104 | .mypy_cache/ 105 | -------------------------------------------------------------------------------- /tools/fps_test/eval_forward_time.py: -------------------------------------------------------------------------------- 1 | import time 2 | import torch 3 | import torch.backends.cudnn as cudnn 4 | 5 | from argparse import ArgumentParser 6 | from builders.model_builder import build_model 7 | 8 | 9 | def compute_speed(model, input_size, device, iteration=100): 10 | torch.cuda.set_device(device) 11 | cudnn.benchmark = True 12 | 13 | model.eval() 14 | model = model.cuda() 15 | 16 | input = torch.randn(*input_size, device=device) 17 | 18 | for _ in range(50): 19 | model(input) 20 | 21 | print('=========Eval Forward Time=========') 22 | torch.cuda.synchronize() 23 | t_start = time.time() 24 | for _ in range(iteration): 25 | model(input) 26 | torch.cuda.synchronize() 27 | elapsed_time = time.time() - t_start 28 | 29 | speed_time = elapsed_time / iteration * 1000 30 | fps = iteration / elapsed_time 31 | 32 | print('Elapsed Time: [%.2f s / %d iter]' % (elapsed_time, iteration)) 33 | print('Speed Time: %.2f ms / iter FPS: %.2f' % (speed_time, fps)) 34 | return speed_time, fps 35 | 36 | 37 | if __name__ == '__main__': 38 | parser = ArgumentParser() 39 | 40 | parser.add_argument("--size", type=str, default="512,1024", help="input size of model") 41 | parser.add_argument('--num-channels', type=int, default=3) 42 | parser.add_argument('--batch-size', type=int, default=1) 43 | parser.add_argument('--classes', type=int, default=19) 44 | parser.add_argument('--iter', type=int, default=100) 45 | parser.add_argument('--model', type=str, default='ENet') 46 | parser.add_argument("--gpus", type=str, default="0", help="gpu ids (default: 0)") 47 | args = parser.parse_args() 48 | 49 | h, w = map(int, args.size.split(',')) 50 | model = build_model(args.model, num_classes=args.classes) 51 | compute_speed(model, (args.batch_size, args.num_channels, h, w), int(args.gpus), iteration=args.iter) 52 | -------------------------------------------------------------------------------- /tools/flops_counter/ENet_Flops_test.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import argparse 3 | import torch 4 | 5 | 6 | from model.ENet import ENet 7 | from model.ERFNet import ERFNet 8 | from model.CGNet import CGNet 9 | from model.EDANet import EDANet 10 | from model.ESNet import ESNet 11 | from model.ESPNet import ESPNet 12 | from model.LEDNet import LEDNet 13 | from model.ESPNet_v2.SegmentationModel import EESPNet_Seg 14 | from model.FastSCNN import FastSCNN 15 | from model.DABNet import DABNet 16 | from model.FPENet import FPENet 17 | 18 | 19 | 20 | 21 | 22 | 23 | from tools.flops_counter.ptflops import get_model_complexity_info 24 | 25 | 26 | 27 | pt_models = { 28 | 29 | 'ENet': ENet, 30 | 'ERFNet': ERFNet, 31 | 'CGNet': CGNet, 32 | 'EDANet': EDANet, 33 | 'ESNet': ESNet, 34 | 'ESPNet': ESPNet, 35 | 'LEDNet': LEDNet, 36 | 'EESPNet_Seg': EESPNet_Seg, 37 | 'FastSCNN': FastSCNN, 38 | 'DABNet': DABNet, 39 | 'FPENet': FPENet 40 | } 41 | 42 | if __name__ == '__main__': 43 | parser = argparse.ArgumentParser(description='ptflops sample script') 44 | parser.add_argument('--device', type=int, default=0, 45 | help='Device to store the model.') 46 | parser.add_argument('--model', choices=list(pt_models.keys()), 47 | type=str, default='ENet') 48 | parser.add_argument('--result', type=str, default=None) 49 | args = parser.parse_args() 50 | 51 | if args.result is None: 52 | ost = sys.stdout 53 | else: 54 | ost = open(args.result, 'w') 55 | 56 | with torch.cuda.device(args.device): 57 | net = pt_models[args.model](classes=19).cuda() 58 | 59 | flops, params = get_model_complexity_info(net, (3, 512, 1024), 60 | as_strings=True, 61 | print_per_layer_stat=True, 62 | ost=ost) 63 | print('Flops: ' + flops) 64 | print('Params: ' + params) -------------------------------------------------------------------------------- /utils/utils.py: -------------------------------------------------------------------------------- 1 | import os 2 | import random 3 | import numpy as np 4 | from PIL import Image 5 | import torch 6 | import torch.nn as nn 7 | from utils.colorize_mask import cityscapes_colorize_mask, camvid_colorize_mask 8 | 9 | 10 | def __init_weight(feature, conv_init, norm_layer, bn_eps, bn_momentum, 11 | **kwargs): 12 | for name, m in feature.named_modules(): 13 | if isinstance(m, (nn.Conv2d, nn.Conv3d)): 14 | conv_init(m.weight, **kwargs) 15 | elif isinstance(m, norm_layer): 16 | m.eps = bn_eps 17 | m.momentum = bn_momentum 18 | nn.init.constant_(m.weight, 1) 19 | nn.init.constant_(m.bias, 0) 20 | 21 | 22 | def init_weight(module_list, conv_init, norm_layer, bn_eps, bn_momentum, 23 | **kwargs): 24 | if isinstance(module_list, list): 25 | for feature in module_list: 26 | __init_weight(feature, conv_init, norm_layer, bn_eps, bn_momentum, 27 | **kwargs) 28 | else: 29 | __init_weight(module_list, conv_init, norm_layer, bn_eps, bn_momentum, 30 | **kwargs) 31 | 32 | 33 | def setup_seed(seed): 34 | torch.manual_seed(seed) 35 | torch.cuda.manual_seed_all(seed) 36 | np.random.seed(seed) 37 | random.seed(seed) 38 | torch.backends.cudnn.deterministic = True 39 | 40 | 41 | def save_predict(output, gt, img_name, dataset, save_path, output_grey=False, output_color=True, gt_color=False): 42 | if output_grey: 43 | output_grey = Image.fromarray(output) 44 | output_grey.save(os.path.join(save_path, img_name + '.png')) 45 | 46 | if output_color: 47 | if dataset == 'cityscapes': 48 | output_color = cityscapes_colorize_mask(output) 49 | elif dataset == 'camvid': 50 | output_color = camvid_colorize_mask(output) 51 | 52 | output_color.save(os.path.join(save_path, img_name + '_color.png')) 53 | 54 | if gt_color: 55 | if dataset == 'cityscapes': 56 | gt_color = cityscapes_colorize_mask(gt) 57 | elif dataset == 'camvid': 58 | gt_color = camvid_colorize_mask(gt) 59 | 60 | gt_color.save(os.path.join(save_path, img_name + '_gt.png')) 61 | 62 | 63 | def netParams(model): 64 | """ 65 | computing total network parameters 66 | args: 67 | model: model 68 | return: the number of parameters 69 | """ 70 | total_paramters = 0 71 | for parameter in model.parameters(): 72 | i = len(parameter.size()) 73 | p = 1 74 | for j in range(i): 75 | p *= parameter.size(j) 76 | total_paramters += p 77 | 78 | return total_paramters 79 | -------------------------------------------------------------------------------- /utils/activations.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | import math 5 | 6 | 7 | 8 | 9 | 10 | ''' 11 | Script provides functional interface for Mish activation function. 12 | Mish - "Mish: A Self Regularized Non-Monotonic Neural Activation Function" 13 | https://arxiv.org/abs/1908.08681v1 14 | ''' 15 | class Mish(nn.Module): 16 | def __init__(self): 17 | super().__init__() 18 | # print("Mish activation loaded...") 19 | 20 | def forward(self, x): 21 | return x *( torch.tanh(F.softplus(x))) 22 | 23 | 24 | 25 | class BetaMish(nn.Module): 26 | def __init__(self): 27 | super().__init__() 28 | 29 | def forward(self, x): 30 | beta=1.5 31 | return x * torch.tanh(torch.log(torch.pow((1+torch.exp(x)),beta))) 32 | 33 | 34 | ''' 35 | Swish - https://arxiv.org/pdf/1710.05941v1.pdf 36 | ''' 37 | class Swish(nn.Module): 38 | def __init__(self): 39 | super(Swish, self).__init__() 40 | 41 | self.sigmoid = nn.Sigmoid() 42 | 43 | def forward(self, x): 44 | return x * self.sigmoid(x) 45 | 46 | 47 | class Hswish(nn.Module): 48 | def __init__(self, inplace=True): 49 | super(Hswish, self).__init__() 50 | self.inplace = inplace 51 | 52 | def forward(self, x): 53 | return x * F.relu6(x + 3., inplace=self.inplace) / 6. 54 | 55 | 56 | 57 | class Hsigmoid(nn.Module): 58 | def __init__(self, inplace=True): 59 | super(Hsigmoid, self).__init__() 60 | self.inplace = inplace 61 | 62 | def forward(self, x): 63 | return F.relu6(x + 3., inplace=self.inplace) / 6. 64 | 65 | 66 | class SEModule(nn.Module): 67 | def __init__(self, channel, act, reduction=4): 68 | super(SEModule, self).__init__() 69 | self.avg_pool = nn.AdaptiveAvgPool2d(1) 70 | self.conv = nn.Sequential( 71 | nn.Conv2d(channel, channel // reduction, 1, 1, 0, bias=True), 72 | act 73 | ) 74 | self.fc = nn.Sequential( 75 | nn.Conv2d(channel // reduction, channel, 1, 1, 0, bias=True), 76 | Hsigmoid() 77 | ) 78 | 79 | def forward(self, x): 80 | y = self.avg_pool(x) 81 | y = self.conv(y) 82 | y = self.fc(y) 83 | return torch.mul(x, y) 84 | 85 | 86 | 87 | 88 | 89 | 90 | NON_LINEARITY = { 91 | 'ReLU': nn.ReLU(inplace=True), 92 | 'PReLU': nn.PReLU(), 93 | 'ReLu6': nn.ReLU6(inplace=True), 94 | 'Mish': Mish(), 95 | 'BetaMish': BetaMish(), 96 | 'Swish': Swish(), 97 | 'Hswish': Hswish(), 98 | 'tanh': nn.Tanh(), 99 | 'sigmoid': nn.Sigmoid() 100 | } -------------------------------------------------------------------------------- /utils/optim/Lookahead.py: -------------------------------------------------------------------------------- 1 | from collections import defaultdict 2 | from itertools import chain 3 | from torch.optim import Optimizer 4 | import torch 5 | import warnings 6 | 7 | class Lookahead(Optimizer): 8 | def __init__(self, optimizer, k=5, alpha=0.5): 9 | self.optimizer = optimizer 10 | self.k = k 11 | self.alpha = alpha 12 | self.param_groups = self.optimizer.param_groups 13 | self.state = defaultdict(dict) 14 | self.fast_state = self.optimizer.state 15 | for group in self.param_groups: 16 | group["counter"] = 0 17 | 18 | def update(self, group): 19 | for fast in group["params"]: 20 | param_state = self.state[fast] 21 | if "slow_param" not in param_state: 22 | param_state["slow_param"] = torch.zeros_like(fast.data) 23 | param_state["slow_param"].copy_(fast.data) 24 | slow = param_state["slow_param"] 25 | slow += (fast.data - slow) * self.alpha 26 | fast.data.copy_(slow) 27 | 28 | def update_lookahead(self): 29 | for group in self.param_groups: 30 | self.update(group) 31 | 32 | def step(self, closure=None): 33 | loss = self.optimizer.step(closure) 34 | for group in self.param_groups: 35 | if group["counter"] == 0: 36 | self.update(group) 37 | group["counter"] += 1 38 | if group["counter"] >= self.k: 39 | group["counter"] = 0 40 | return loss 41 | 42 | def state_dict(self): 43 | fast_state_dict = self.optimizer.state_dict() 44 | slow_state = { 45 | (id(k) if isinstance(k, torch.Tensor) else k): v 46 | for k, v in self.state.items() 47 | } 48 | fast_state = fast_state_dict["state"] 49 | param_groups = fast_state_dict["param_groups"] 50 | return { 51 | "fast_state": fast_state, 52 | "slow_state": slow_state, 53 | "param_groups": param_groups, 54 | } 55 | 56 | def load_state_dict(self, state_dict): 57 | slow_state_dict = { 58 | "state": state_dict["slow_state"], 59 | "param_groups": state_dict["param_groups"], 60 | } 61 | fast_state_dict = { 62 | "state": state_dict["fast_state"], 63 | "param_groups": state_dict["param_groups"], 64 | } 65 | super(Lookahead, self).load_state_dict(slow_state_dict) 66 | self.optimizer.load_state_dict(fast_state_dict) 67 | self.fast_state = self.optimizer.state 68 | 69 | def add_param_group(self, param_group): 70 | param_group["counter"] = 0 71 | self.optimizer.add_param_group(param_group) -------------------------------------------------------------------------------- /utils/colorize_mask.py: -------------------------------------------------------------------------------- 1 | from PIL import Image 2 | import torch 3 | import numpy as np 4 | 5 | cityscapes_palette = [128, 64, 128, 244, 35, 232, 70, 70, 70, 102, 102, 156, 190, 153, 153, 153, 153, 153, 250, 170, 30, 6 | 220, 220, 0, 107, 142, 35, 152, 251, 152, 70, 130, 180, 220, 20, 60, 255, 0, 0, 0, 0, 142, 0, 0, 7 | 70, 8 | 0, 60, 100, 0, 80, 100, 0, 0, 230, 119, 11, 32] 9 | 10 | camvid_palette = [128, 128, 128, 128, 0, 0, 192, 192, 128, 128, 64, 128, 60, 40, 222, 128, 128, 0, 192, 128, 128, 64, 11 | 64, 12 | 128, 64, 0, 128, 64, 64, 0, 0, 128, 192] 13 | 14 | zero_pad = 256 * 3 - len(cityscapes_palette) 15 | for i in range(zero_pad): 16 | cityscapes_palette.append(0) 17 | 18 | 19 | # zero_pad = 256 * 3 - len(camvid_palette) 20 | # for i in range(zero_pad): 21 | # camvid_palette.append(0) 22 | 23 | def cityscapes_colorize_mask(mask): 24 | # mask: numpy array of the mask 25 | new_mask = Image.fromarray(mask.astype(np.uint8)).convert('P') 26 | new_mask.putpalette(cityscapes_palette) 27 | 28 | return new_mask 29 | 30 | 31 | def camvid_colorize_mask(mask): 32 | # mask: numpy array of the mask 33 | new_mask = Image.fromarray(mask.astype(np.uint8)).convert('P') 34 | new_mask.putpalette(camvid_palette) 35 | 36 | return new_mask 37 | 38 | 39 | class VOCColorize(object): 40 | def __init__(self, n=22): 41 | self.cmap = voc_color_map(22) 42 | self.cmap = torch.from_numpy(self.cmap[:n]) 43 | 44 | def __call__(self, gray_image): 45 | size = gray_image.shape 46 | color_image = np.zeros((3, size[0], size[1]), dtype=np.uint8) 47 | 48 | for label in range(0, len(self.cmap)): 49 | mask = (label == gray_image) 50 | color_image[0][mask] = self.cmap[label][0] 51 | color_image[1][mask] = self.cmap[label][1] 52 | color_image[2][mask] = self.cmap[label][2] 53 | 54 | # handle void 55 | mask = (255 == gray_image) 56 | color_image[0][mask] = color_image[1][mask] = color_image[2][mask] = 255 57 | 58 | return color_image 59 | 60 | 61 | def voc_color_map(N=256, normalized=False): 62 | def bitget(byteval, idx): 63 | return ((byteval & (1 << idx)) != 0) 64 | 65 | dtype = 'float32' if normalized else 'uint8' 66 | cmap = np.zeros((N, 3), dtype=dtype) 67 | for i in range(N): 68 | r = g = b = 0 69 | c = i 70 | for j in range(8): 71 | r = r | (bitget(c, 0) << 7 - j) 72 | g = g | (bitget(c, 1) << 7 - j) 73 | b = b | (bitget(c, 2) << 7 - j) 74 | c = c >> 3 75 | 76 | cmap[i] = np.array([r, g, b]) 77 | 78 | cmap = cmap / 255 if normalized else cmap 79 | return cmap 80 | -------------------------------------------------------------------------------- /dataset/cityscape_scripts/generate_mappings.py: -------------------------------------------------------------------------------- 1 | 2 | import glob 3 | import os 4 | from utilities.print_utils import * 5 | 6 | def get_mappings(root_dir, files, annot_name): 7 | pairs = [] 8 | for f in files: 9 | f = f.replace(root_dir, '/') 10 | img_f = f.replace(annot_name, 'leftImg8bit') 11 | img_f = img_f.replace('_labelTrainIds.png', '.png') 12 | if not os.path.isfile(root_dir + img_f): 13 | print_error_message('{} file does not exist. Please check'.format(root_dir + img_f)) 14 | exit() 15 | line = img_f + ',' + f 16 | pairs.append(line) 17 | return pairs 18 | 19 | def main(cityscapesPath, split): 20 | searchFine = os.path.join(cityscapesPath, "gtFine", split, "*", '*_labelTrainIds.png') 21 | filesFine = glob.glob(searchFine) 22 | filesFine.sort() 23 | 24 | if not filesFine: 25 | print_warning_message("Did not find any files. Please check root directory: {}.".format(cityscapesPath)) 26 | fine_pairs = [] 27 | else: 28 | print_info_message('{} files found for {} split'.format(len(filesFine), split)) 29 | fine_pairs = get_mappings(cityscapesPath, filesFine, 'gtFine') 30 | 31 | if not fine_pairs: 32 | print_error_message('No pair exist. Exiting') 33 | exit() 34 | else: 35 | print_info_message('Creating train and val files.') 36 | f_name = split + '.txt' 37 | with open(os.path.join(cityscapesPath, f_name), 'w') as txtFile: 38 | for pair in fine_pairs: 39 | txtFile.write(pair + '\n') 40 | print_info_message('{} created in {} with {} pairs'.format(f_name, cityscapesPath, len(fine_pairs))) 41 | 42 | if split == 'train': 43 | split_orig = split 44 | split = split + '_extra' 45 | searchCoarse = os.path.join(cityscapesPath, "gtCoarse", split, "*", '*_labelTrainIds.png') 46 | filesCoarse = glob.glob(searchCoarse) 47 | filesCoarse.sort() 48 | if not filesCoarse: 49 | print_warning_message("Did not find any files. Please check root directory: {}.".format(cityscapesPath)) 50 | course_pairs = [] 51 | else: 52 | print_info_message('{} files found for {} split'.format(len(filesCoarse), split)) 53 | course_pairs = get_mappings(cityscapesPath, filesCoarse, 'gtCoarse') 54 | if not course_pairs: 55 | print_warning_message('No pair exist for coarse data') 56 | return 57 | else: 58 | print_info_message('Creating train and val files.') 59 | f_name = split_orig + '_coarse.txt' 60 | with open(os.path.join(cityscapesPath, f_name), 'w') as txtFile: 61 | for pair in course_pairs: 62 | txtFile.write(pair + '\n') 63 | print_info_message('{} created in {} with {} pairs'.format(f_name, cityscapesPath, len(course_pairs))) 64 | 65 | if __name__ == '__main__': 66 | cityscapes_path = '../../../vision_datasets/cityscapes/' 67 | main(cityscapes_path, "train") 68 | main(cityscapes_path, "val") -------------------------------------------------------------------------------- /utils/metric/metric.py: -------------------------------------------------------------------------------- 1 | import os, sys 2 | import cv2 3 | import numpy as np 4 | 5 | from multiprocessing import Pool 6 | # import copy_reg 7 | import copyreg 8 | import types 9 | 10 | 11 | def _pickle_method(m): 12 | if m.im_self is None: 13 | return getattr, (m.im_class, m.im_func.func_name) 14 | else: 15 | return getattr, (m.im_self, m.im_func.func_name) 16 | 17 | 18 | copyreg.pickle(types.MethodType, _pickle_method) 19 | 20 | 21 | class ConfusionMatrix(object): 22 | 23 | def __init__(self, nclass, classes=None, ignore_label=255): 24 | self.nclass = nclass 25 | self.classes = classes 26 | self.M = np.zeros((nclass, nclass)) 27 | self.ignore_label = ignore_label 28 | 29 | def add(self, gt, pred): 30 | assert (np.max(pred) <= self.nclass) 31 | assert (len(gt) == len(pred)) 32 | for i in range(len(gt)): 33 | if not gt[i] == self.ignore_label: 34 | self.M[gt[i], pred[i]] += 1.0 35 | 36 | def addM(self, matrix): 37 | assert (matrix.shape == self.M.shape) 38 | self.M += matrix 39 | 40 | def __str__(self): 41 | pass 42 | 43 | # Pii为预测正确的数量,Pij和Pji分别被解释为假正和假负,尽管两者都是假正与假负之和 44 | def recall(self): # 预测为正确的像素中确认为正确像素的个数 45 | recall = 0.0 46 | for i in range(self.nclass): 47 | recall += self.M[i, i] / np.sum(self.M[:, i]) 48 | 49 | return recall / self.nclass 50 | 51 | def accuracy(self): # 分割正确的像素除以总像素 52 | accuracy = 0.0 53 | for i in range(self.nclass): 54 | accuracy += self.M[i, i] / np.sum(self.M[i, :]) 55 | 56 | return accuracy / self.nclass 57 | 58 | # 雅卡尔指数,又称为交并比(IOU) 59 | def jaccard(self): 60 | jaccard = 0.0 61 | jaccard_perclass = [] 62 | for i in range(self.nclass): 63 | if not self.M[i, i] == 0: 64 | jaccard_perclass.append(self.M[i, i] / (np.sum(self.M[i, :]) + np.sum(self.M[:, i]) - self.M[i, i])) 65 | 66 | return np.sum(jaccard_perclass) / len(jaccard_perclass), jaccard_perclass, self.M 67 | 68 | def generateM(self, item): 69 | gt, pred = item 70 | m = np.zeros((self.nclass, self.nclass)) 71 | assert (len(gt) == len(pred)) 72 | for i in range(len(gt)): 73 | if gt[i] < self.nclass: # and pred[i] < self.nclass: 74 | m[gt[i], pred[i]] += 1.0 75 | return m 76 | 77 | 78 | def get_iou(data_list, class_num, save_path=None): 79 | """ 80 | Args: 81 | data_list: a list, its elements [gt, output] 82 | class_num: the number of label 83 | """ 84 | from multiprocessing import Pool 85 | 86 | ConfM = ConfusionMatrix(class_num) 87 | f = ConfM.generateM 88 | pool = Pool() 89 | m_list = pool.map(f, data_list) 90 | pool.close() 91 | pool.join() 92 | 93 | for m in m_list: 94 | ConfM.addM(m) 95 | 96 | aveJ, j_list, M = ConfM.jaccard() 97 | # print(j_list) 98 | # print(M) 99 | # print('meanIOU: ' + str(aveJ) + '\n') 100 | 101 | if save_path: 102 | with open(save_path, 'w') as f: 103 | f.write('meanIOU: ' + str(aveJ) + '\n') 104 | f.write(str(j_list) + '\n') 105 | f.write(str(M) + '\n') 106 | return aveJ, j_list 107 | -------------------------------------------------------------------------------- /utils/optim/RAdam.py: -------------------------------------------------------------------------------- 1 | import math 2 | import torch 3 | from torch.optim.optimizer import Optimizer 4 | 5 | class RAdam(Optimizer): 6 | 7 | def __init__(self, params, lr=1e-3, betas=(0.9, 0.999), eps=1e-8, weight_decay=0): 8 | defaults = dict(lr=lr, betas=betas, eps=eps, weight_decay=weight_decay) 9 | self.buffer = [[None, None, None] for ind in range(10)] 10 | super(RAdam, self).__init__(params, defaults) 11 | 12 | def __setstate__(self, state): 13 | super(RAdam, self).__setstate__(state) 14 | 15 | def step(self, closure=None): 16 | 17 | loss = None 18 | if closure is not None: 19 | loss = closure() 20 | 21 | for group in self.param_groups: 22 | 23 | for p in group['params']: 24 | if p.grad is None: 25 | continue 26 | grad = p.grad.data.float() 27 | if grad.is_sparse: 28 | raise RuntimeError('RAdam does not support sparse gradients') 29 | 30 | p_data_fp32 = p.data.float() 31 | 32 | state = self.state[p] 33 | 34 | if len(state) == 0: 35 | state['step'] = 0 36 | state['exp_avg'] = torch.zeros_like(p_data_fp32) 37 | state['exp_avg_sq'] = torch.zeros_like(p_data_fp32) 38 | else: 39 | state['exp_avg'] = state['exp_avg'].type_as(p_data_fp32) 40 | state['exp_avg_sq'] = state['exp_avg_sq'].type_as(p_data_fp32) 41 | 42 | exp_avg, exp_avg_sq = state['exp_avg'], state['exp_avg_sq'] 43 | beta1, beta2 = group['betas'] 44 | 45 | exp_avg_sq.mul_(beta2).addcmul_(1 - beta2, grad, grad) 46 | exp_avg.mul_(beta1).add_(1 - beta1, grad) 47 | 48 | state['step'] += 1 49 | buffered = self.buffer[int(state['step'] % 10)] 50 | if state['step'] == buffered[0]: 51 | N_sma, step_size = buffered[1], buffered[2] 52 | else: 53 | buffered[0] = state['step'] 54 | beta2_t = beta2 ** state['step'] 55 | N_sma_max = 2 / (1 - beta2) - 1 56 | N_sma = N_sma_max - 2 * state['step'] * beta2_t / (1 - beta2_t) 57 | buffered[1] = N_sma 58 | 59 | # more conservative since it's an approximated value 60 | if N_sma >= 5: 61 | step_size = group['lr'] * math.sqrt((1 - beta2_t) * (N_sma - 4) / (N_sma_max - 4) * (N_sma - 2) / N_sma * N_sma_max / (N_sma_max - 2)) / (1 - beta1 ** state['step']) 62 | else: 63 | step_size = group['lr'] / (1 - beta1 ** state['step']) 64 | buffered[2] = step_size 65 | 66 | if group['weight_decay'] != 0: 67 | p_data_fp32.add_(-group['weight_decay'] * group['lr'], p_data_fp32) 68 | 69 | # more conservative since it's an approximated value 70 | if N_sma >= 5: 71 | denom = exp_avg_sq.sqrt().add_(group['eps']) 72 | p_data_fp32.addcdiv_(-step_size, exp_avg, denom) 73 | else: 74 | p_data_fp32.add_(-step_size, exp_avg) 75 | 76 | p.data.copy_(p_data_fp32) 77 | 78 | return loss -------------------------------------------------------------------------------- /model/ESPNet_v2/SegmentationModel.py: -------------------------------------------------------------------------------- 1 | ################################################################################################### 2 | #ESPNetv2: A Light-weight, Power Efficient, and General Purpose Convolutional Neural Network 3 | #Paper-Link: https://arxiv.org/pdf/1811.11431.pdf 4 | ################################################################################################### 5 | 6 | 7 | import os 8 | import torch 9 | import torch.nn as nn 10 | import torch.nn.functional as F 11 | from torchsummary import summary 12 | 13 | from model.ESPNet_v2.Model import EESPNet, EESP 14 | from model.ESPNet_v2.cnn_utils import * 15 | 16 | 17 | __all__ = ["EESPNet_Seg"] 18 | 19 | 20 | class EESPNet_Seg(nn.Module): 21 | def __init__(self, classes=19, s=2, pretrained=None, gpus=1): 22 | super().__init__() 23 | classificationNet = EESPNet(classes=1000, s=s) 24 | if gpus >=1: 25 | classificationNet = nn.DataParallel(classificationNet) 26 | # print(classificationNet) 27 | # load the pretrained weights 28 | if pretrained: 29 | if not os.path.isfile(pretrained): 30 | print('Weight file does not exist. Training without pre-trained weights') 31 | print('Model initialized with pretrained weights') 32 | classificationNet.load_state_dict(torch.load(pretrained)) 33 | 34 | self.net = classificationNet.module 35 | 36 | del classificationNet 37 | # delete last few layers 38 | del self.net.classifier 39 | del self.net.level5 40 | del self.net.level5_0 41 | if s <=0.5: 42 | p = 0.1 43 | else: 44 | p=0.2 45 | 46 | self.proj_L4_C = CBR(self.net.level4[-1].module_act.num_parameters, self.net.level3[-1].module_act.num_parameters, 1, 1) 47 | pspSize = 2*self.net.level3[-1].module_act.num_parameters 48 | self.pspMod = nn.Sequential(EESP(pspSize, pspSize //2, stride=1, k=4, r_lim=7), 49 | PSPModule(pspSize // 2, pspSize //2)) 50 | self.project_l3 = nn.Sequential(nn.Dropout2d(p=p), C(pspSize // 2, classes, 1, 1)) 51 | self.act_l3 = BR(classes) 52 | self.project_l2 = CBR(self.net.level2_0.act.num_parameters + classes, classes, 1, 1) 53 | self.project_l1 = nn.Sequential(nn.Dropout2d(p=p), C(self.net.level1.act.num_parameters + classes, classes, 1, 1)) 54 | 55 | def hierarchicalUpsample(self, x, factor=3): 56 | for i in range(factor): 57 | x = F.interpolate(x, scale_factor=2, mode='bilinear', align_corners=True) 58 | return x 59 | 60 | 61 | def forward(self, input): 62 | out_l1, out_l2, out_l3, out_l4 = self.net(input, seg=True) 63 | out_l4_proj = self.proj_L4_C(out_l4) 64 | up_l4_to_l3 = F.interpolate(out_l4_proj, size=out_l3.size()[2:], mode='bilinear', align_corners=True) 65 | merged_l3_upl4 = self.pspMod(torch.cat([out_l3, up_l4_to_l3], 1)) 66 | proj_merge_l3_bef_act = self.project_l3(merged_l3_upl4) 67 | proj_merge_l3 = self.act_l3(proj_merge_l3_bef_act) 68 | out_up_l3 = F.interpolate(proj_merge_l3, scale_factor=2, mode='bilinear', align_corners=True) 69 | merge_l2 = self.project_l2(torch.cat([out_l2, out_up_l3], 1)) 70 | out_up_l2 = F.interpolate(merge_l2, scale_factor=2, mode='bilinear', align_corners=True) 71 | merge_l1 = self.project_l1(torch.cat([out_l1, out_up_l2], 1)) 72 | # if self.training: 73 | # return F.interpolate(merge_l1, scale_factor=2, mode='bilinear', align_corners=True), self.hierarchicalUpsample(proj_merge_l3_bef_act) 74 | # else: 75 | # return F.interpolate(merge_l1, scale_factor=2, mode='bilinear', align_corners=True) 76 | output = F.interpolate(merge_l1, scale_factor=2, mode='bilinear', align_corners=True) 77 | return output 78 | 79 | if __name__ == '__main__': 80 | device = torch.device("cuda" if torch.cuda.is_available() else "cpu") 81 | model = EESPNet_Seg(classes=19, s=2).to(device) 82 | summary(model,(3,512,1024)) -------------------------------------------------------------------------------- /model/UNet.py: -------------------------------------------------------------------------------- 1 | ###################################################################################### 2 | #U-Net: Convolutional Networks for BiomedicalImage Segmentation 3 | #Paper-Link: https://arxiv.org/pdf/1505.04597.pdf 4 | ###################################################################################### 5 | 6 | import torch 7 | import torch.nn as nn 8 | import torch.nn.functional as F 9 | from torchsummary import summary 10 | 11 | 12 | 13 | __all__ = ["UNet"] 14 | 15 | 16 | class double_conv(nn.Module): 17 | '''(conv => BN => ReLU) * 2''' 18 | 19 | def __init__(self, in_ch, out_ch): 20 | super(double_conv, self).__init__() 21 | self.conv = nn.Sequential( 22 | nn.Conv2d(in_ch, out_ch, 3, padding=1), 23 | nn.BatchNorm2d(out_ch), 24 | nn.ReLU(inplace=True), 25 | nn.Conv2d(out_ch, out_ch, 3, padding=1), 26 | nn.BatchNorm2d(out_ch), 27 | nn.ReLU(inplace=True) 28 | ) 29 | 30 | def forward(self, x): 31 | x = self.conv(x) 32 | return x 33 | 34 | 35 | class inconv(nn.Module): 36 | def __init__(self, in_ch, out_ch): 37 | super(inconv, self).__init__() 38 | self.conv = double_conv(in_ch, out_ch) 39 | 40 | def forward(self, x): 41 | x = self.conv(x) 42 | return x 43 | 44 | 45 | class down(nn.Module): 46 | def __init__(self, in_ch, out_ch): 47 | super(down, self).__init__() 48 | self.mpconv = nn.Sequential( 49 | nn.MaxPool2d(2), 50 | double_conv(in_ch, out_ch) 51 | ) 52 | 53 | def forward(self, x): 54 | x = self.mpconv(x) 55 | return x 56 | 57 | 58 | class up(nn.Module): 59 | def __init__(self, in_ch, out_ch, bilinear=True): 60 | super(up, self).__init__() 61 | self.bilinear = bilinear 62 | 63 | self.up = nn.ConvTranspose2d(in_ch // 2, in_ch // 2, 2, stride=2) 64 | 65 | self.conv = double_conv(in_ch, out_ch) 66 | 67 | def forward(self, x1, x2): 68 | if self.bilinear: 69 | x1 = F.interpolate(x1, scale_factor=2, mode='bilinear', align_corners=True) 70 | else: 71 | x1 = self.up(x1) 72 | 73 | # input is CHW 74 | diffY = x2.size()[2] - x1.size()[2] 75 | diffX = x2.size()[3] - x1.size()[3] 76 | 77 | x1 = F.pad(x1, [diffX // 2, diffX - diffX // 2, 78 | diffY // 2, diffY - diffY // 2]) 79 | 80 | # for padding issues, see 81 | # https://github.com/HaiyongJiang/U-Net-Pytorch-Unstructured-Buggy/commit/0e854509c2cea854e247a9c615f175f76fbb2e3a 82 | # https://github.com/xiaopeng-liao/Pytorch-UNet/commit/8ebac70e633bac59fc22bb5195e513d5832fb3bd 83 | 84 | x = torch.cat([x2, x1], dim=1) 85 | x = self.conv(x) 86 | return x 87 | 88 | 89 | class outconv(nn.Module): 90 | def __init__(self, in_ch, out_ch): 91 | super(outconv, self).__init__() 92 | self.conv = nn.Conv2d(in_ch, out_ch, 1) 93 | 94 | def forward(self, x): 95 | x = self.conv(x) 96 | return x 97 | 98 | 99 | 100 | class UNet(nn.Module): 101 | def __init__(self, classes): 102 | super(UNet, self).__init__() 103 | self.inc = inconv(3, 64) 104 | self.down1 = down(64, 128) 105 | self.down2 = down(128, 256) 106 | self.down3 = down(256, 512) 107 | self.down4 = down(512, 512) 108 | self.up1 = up(1024, 256) 109 | self.up2 = up(512, 128) 110 | self.up3 = up(256, 64) 111 | self.up4 = up(128, 64) 112 | self.outc = outconv(64, classes) 113 | 114 | def forward(self, x): 115 | x1 = self.inc(x) 116 | x2 = self.down1(x1) 117 | x3 = self.down2(x2) 118 | x4 = self.down3(x3) 119 | x5 = self.down4(x4) 120 | x = self.up1(x5, x4) 121 | x = self.up2(x, x3) 122 | x = self.up3(x, x2) 123 | x = self.up4(x, x1) 124 | x = self.outc(x) 125 | #return F.sigmoid(x) 126 | 127 | return x 128 | 129 | 130 | 131 | 132 | """print layers and params of network""" 133 | if __name__ == '__main__': 134 | device = torch.device("cuda" if torch.cuda.is_available() else "cpu") 135 | model = UNet(classes=19).to(device) 136 | summary(model,(3,512,1024)) -------------------------------------------------------------------------------- /predict.py: -------------------------------------------------------------------------------- 1 | import os 2 | import time 3 | import torch 4 | import numpy as np 5 | import torch.backends.cudnn as cudnn 6 | from argparse import ArgumentParser 7 | # user 8 | from builders.model_builder import build_model 9 | from builders.dataset_builder import build_dataset_test 10 | from utils.utils import save_predict 11 | from utils.convert_state import convert_state_dict 12 | 13 | 14 | def parse_args(): 15 | parser = ArgumentParser(description='Efficient semantic segmentation') 16 | # model and dataset 17 | parser.add_argument('--model', default="ENet", help="model name: (default ENet)") 18 | parser.add_argument('--dataset', default="camvid", help="dataset: cityscapes or camvid") 19 | parser.add_argument('--num_workers', type=int, default=2, help="the number of parallel threads") 20 | parser.add_argument('--batch_size', type=int, default=1, 21 | help=" the batch_size is set to 1 when evaluating or testing") 22 | parser.add_argument('--checkpoint', type=str,default="", 23 | help="use the file to load the checkpoint for evaluating or testing ") 24 | parser.add_argument('--save_seg_dir', type=str, default="./server/", 25 | help="saving path of prediction result") 26 | parser.add_argument('--cuda', default=True, help="run on CPU or GPU") 27 | parser.add_argument("--gpus", default="0", type=str, help="gpu ids (default: 0)") 28 | args = parser.parse_args() 29 | 30 | return args 31 | 32 | 33 | 34 | def predict(args, test_loader, model): 35 | """ 36 | args: 37 | test_loader: loaded for test dataset, for those that do not provide label on the test set 38 | model: model 39 | return: class IoU and mean IoU 40 | """ 41 | # evaluation or test mode 42 | model.eval() 43 | total_batches = len(test_loader) 44 | for i, (input, size, name) in enumerate(test_loader): 45 | with torch.no_grad(): 46 | input_var = input.cuda() 47 | start_time = time.time() 48 | output = model(input_var) 49 | torch.cuda.synchronize() 50 | time_taken = time.time() - start_time 51 | print('[%d/%d] time: %.2f' % (i + 1, total_batches, time_taken)) 52 | output = output.cpu().data[0].numpy() 53 | output = output.transpose(1, 2, 0) 54 | output = np.asarray(np.argmax(output, axis=2), dtype=np.uint8) 55 | 56 | # Save the predict greyscale output for Cityscapes official evaluation 57 | # Modify image name to meet official requirement 58 | name[0] = name[0].rsplit('_', 1)[0] + '*' 59 | save_predict(output, None, name[0], args.dataset, args.save_seg_dir, 60 | output_grey=True, output_color=False, gt_color=False) 61 | 62 | 63 | def test_model(args): 64 | """ 65 | main function for testing 66 | param args: global arguments 67 | return: None 68 | """ 69 | print(args) 70 | 71 | if args.cuda: 72 | print("=====> use gpu id: '{}'".format(args.gpus)) 73 | os.environ["CUDA_VISIBLE_DEVICES"] = args.gpus 74 | if not torch.cuda.is_available(): 75 | raise Exception("no GPU found or wrong gpu id, please run without --cuda") 76 | 77 | # build the model 78 | model = build_model(args.model, num_classes=args.classes) 79 | 80 | if args.cuda: 81 | model = model.cuda() # using GPU for inference 82 | cudnn.benchmark = True 83 | 84 | if not os.path.exists(args.save_seg_dir): 85 | os.makedirs(args.save_seg_dir) 86 | 87 | # load the test set 88 | datas, testLoader = build_dataset_test(args.dataset, args.num_workers, none_gt=True) 89 | 90 | if args.checkpoint: 91 | if os.path.isfile(args.checkpoint): 92 | print("=====> loading checkpoint '{}'".format(args.checkpoint)) 93 | checkpoint = torch.load(args.checkpoint) 94 | model.load_state_dict(checkpoint['model']) 95 | # model.load_state_dict(convert_state_dict(checkpoint['model'])) 96 | else: 97 | print("=====> no checkpoint found at '{}'".format(args.checkpoint)) 98 | raise FileNotFoundError("no checkpoint found at '{}'".format(args.checkpoint)) 99 | 100 | print("=====> beginning testing") 101 | print("test set length: ", len(testLoader)) 102 | predict(args, testLoader, model) 103 | 104 | 105 | if __name__ == '__main__': 106 | 107 | args = parse_args() 108 | 109 | args.save_seg_dir = os.path.join(args.save_seg_dir, args.dataset, 'predict', args.model) 110 | 111 | if args.dataset == 'cityscapes': 112 | args.classes = 19 113 | elif args.dataset == 'camvid': 114 | args.classes = 11 115 | else: 116 | raise NotImplementedError( 117 | "This repository now supports two datasets: cityscapes and camvid, %s is not included" % args.dataset) 118 | 119 | test_model(args) 120 | -------------------------------------------------------------------------------- /utils/optim/AdamW.py: -------------------------------------------------------------------------------- 1 | import math 2 | import torch 3 | from torch.optim.optimizer import Optimizer 4 | 5 | class AdamW(Optimizer): 6 | """Implements Adam algorithm. 7 | It has been proposed in `Adam: A Method for Stochastic Optimization`_. 8 | Arguments: 9 | params (iterable): iterable of parameters to optimize or dicts defining 10 | parameter groups 11 | lr (float, optional): learning rate (default: 1e-3) 12 | betas (Tuple[float, float], optional): coefficients used for computing 13 | running averages of gradient and its square (default: (0.9, 0.999)) 14 | eps (float, optional): term added to the denominator to improve 15 | numerical stability (default: 1e-8) 16 | weight_decay (float, optional): weight decay (L2 penalty) (default: 0) 17 | amsgrad (boolean, optional): whether to use the AMSGrad variant of this 18 | algorithm from the paper `On the Convergence of Adam and Beyond`_ 19 | .. _Adam\: A Method for Stochastic Optimization: 20 | https://arxiv.org/abs/1412.6980 21 | .. _On the Convergence of Adam and Beyond: 22 | https://openreview.net/forum?id=ryQu7f-RZ 23 | """ 24 | 25 | def __init__(self, params, lr=1e-3, betas=(0.9, 0.999), eps=1e-8, 26 | weight_decay=0, amsgrad=False): 27 | if not 0.0 <= lr: 28 | raise ValueError("Invalid learning rate: {}".format(lr)) 29 | if not 0.0 <= eps: 30 | raise ValueError("Invalid epsilon value: {}".format(eps)) 31 | if not 0.0 <= betas[0] < 1.0: 32 | raise ValueError("Invalid beta parameter at index 0: {}".format(betas[0])) 33 | if not 0.0 <= betas[1] < 1.0: 34 | raise ValueError("Invalid beta parameter at index 1: {}".format(betas[1])) 35 | defaults = dict(lr=lr, betas=betas, eps=eps, 36 | weight_decay=weight_decay, amsgrad=amsgrad) 37 | super(AdamW, self).__init__(params, defaults) 38 | 39 | def __setstate__(self, state): 40 | super(AdamW, self).__setstate__(state) 41 | for group in self.param_groups: 42 | group.setdefault('amsgrad', False) 43 | 44 | def step(self, closure=None): 45 | """Performs a single optimization step. 46 | Arguments: 47 | closure (callable, optional): A closure that reevaluates the model 48 | and returns the loss. 49 | """ 50 | loss = None 51 | if closure is not None: 52 | loss = closure() 53 | 54 | for group in self.param_groups: 55 | for p in group['params']: 56 | if p.grad is None: 57 | continue 58 | grad = p.grad.data 59 | if grad.is_sparse: 60 | raise RuntimeError('Adam does not support sparse gradients, please consider SparseAdam instead') 61 | amsgrad = group['amsgrad'] 62 | 63 | state = self.state[p] 64 | 65 | # State initialization 66 | if len(state) == 0: 67 | state['step'] = 0 68 | # Exponential moving average of gradient values 69 | state['exp_avg'] = torch.zeros_like(p.data) 70 | # Exponential moving average of squared gradient values 71 | state['exp_avg_sq'] = torch.zeros_like(p.data) 72 | if amsgrad: 73 | # Maintains max of all exp. moving avg. of sq. grad. values 74 | state['max_exp_avg_sq'] = torch.zeros_like(p.data) 75 | 76 | exp_avg, exp_avg_sq = state['exp_avg'], state['exp_avg_sq'] 77 | if amsgrad: 78 | max_exp_avg_sq = state['max_exp_avg_sq'] 79 | beta1, beta2 = group['betas'] 80 | 81 | state['step'] += 1 82 | 83 | # if group['weight_decay'] != 0: 84 | # grad = grad.add(group['weight_decay'], p.data) 85 | 86 | # Decay the first and second moment running average coefficient 87 | exp_avg.mul_(beta1).add_(1 - beta1, grad) 88 | exp_avg_sq.mul_(beta2).addcmul_(1 - beta2, grad, grad) 89 | if amsgrad: 90 | # Maintains the maximum of all 2nd moment running avg. till now 91 | torch.max(max_exp_avg_sq, exp_avg_sq, out=max_exp_avg_sq) 92 | # Use the max. for normalizing running avg. of gradient 93 | denom = max_exp_avg_sq.sqrt().add_(group['eps']) 94 | else: 95 | denom = exp_avg_sq.sqrt().add_(group['eps']) 96 | 97 | bias_correction1 = 1 - beta1 ** state['step'] 98 | bias_correction2 = 1 - beta2 ** state['step'] 99 | step_size = group['lr'] * math.sqrt(bias_correction2) / bias_correction1 100 | 101 | # p.data.addcdiv_(-step_size, exp_avg, denom) 102 | p.data.add_(-step_size, torch.mul(p.data, group['weight_decay']).addcdiv_(1, exp_avg, denom) ) 103 | 104 | return loss -------------------------------------------------------------------------------- /dataset/camvid/camvid_val_list.txt: -------------------------------------------------------------------------------- 1 | val/0016E5_07959.png valannot/0016E5_07959.png 2 | val/0016E5_07961.png valannot/0016E5_07961.png 3 | val/0016E5_07963.png valannot/0016E5_07963.png 4 | val/0016E5_07965.png valannot/0016E5_07965.png 5 | val/0016E5_07967.png valannot/0016E5_07967.png 6 | val/0016E5_07969.png valannot/0016E5_07969.png 7 | val/0016E5_07971.png valannot/0016E5_07971.png 8 | val/0016E5_07973.png valannot/0016E5_07973.png 9 | val/0016E5_07975.png valannot/0016E5_07975.png 10 | val/0016E5_07977.png valannot/0016E5_07977.png 11 | val/0016E5_07979.png valannot/0016E5_07979.png 12 | val/0016E5_07981.png valannot/0016E5_07981.png 13 | val/0016E5_07983.png valannot/0016E5_07983.png 14 | val/0016E5_07985.png valannot/0016E5_07985.png 15 | val/0016E5_07987.png valannot/0016E5_07987.png 16 | val/0016E5_07989.png valannot/0016E5_07989.png 17 | val/0016E5_07991.png valannot/0016E5_07991.png 18 | val/0016E5_07993.png valannot/0016E5_07993.png 19 | val/0016E5_07995.png valannot/0016E5_07995.png 20 | val/0016E5_07997.png valannot/0016E5_07997.png 21 | val/0016E5_07999.png valannot/0016E5_07999.png 22 | val/0016E5_08001.png valannot/0016E5_08001.png 23 | val/0016E5_08003.png valannot/0016E5_08003.png 24 | val/0016E5_08005.png valannot/0016E5_08005.png 25 | val/0016E5_08007.png valannot/0016E5_08007.png 26 | val/0016E5_08009.png valannot/0016E5_08009.png 27 | val/0016E5_08011.png valannot/0016E5_08011.png 28 | val/0016E5_08013.png valannot/0016E5_08013.png 29 | val/0016E5_08015.png valannot/0016E5_08015.png 30 | val/0016E5_08017.png valannot/0016E5_08017.png 31 | val/0016E5_08019.png valannot/0016E5_08019.png 32 | val/0016E5_08021.png valannot/0016E5_08021.png 33 | val/0016E5_08023.png valannot/0016E5_08023.png 34 | val/0016E5_08025.png valannot/0016E5_08025.png 35 | val/0016E5_08027.png valannot/0016E5_08027.png 36 | val/0016E5_08029.png valannot/0016E5_08029.png 37 | val/0016E5_08031.png valannot/0016E5_08031.png 38 | val/0016E5_08033.png valannot/0016E5_08033.png 39 | val/0016E5_08035.png valannot/0016E5_08035.png 40 | val/0016E5_08037.png valannot/0016E5_08037.png 41 | val/0016E5_08039.png valannot/0016E5_08039.png 42 | val/0016E5_08041.png valannot/0016E5_08041.png 43 | val/0016E5_08043.png valannot/0016E5_08043.png 44 | val/0016E5_08045.png valannot/0016E5_08045.png 45 | val/0016E5_08047.png valannot/0016E5_08047.png 46 | val/0016E5_08049.png valannot/0016E5_08049.png 47 | val/0016E5_08051.png valannot/0016E5_08051.png 48 | val/0016E5_08053.png valannot/0016E5_08053.png 49 | val/0016E5_08055.png valannot/0016E5_08055.png 50 | val/0016E5_08057.png valannot/0016E5_08057.png 51 | val/0016E5_08059.png valannot/0016E5_08059.png 52 | val/0016E5_08061.png valannot/0016E5_08061.png 53 | val/0016E5_08063.png valannot/0016E5_08063.png 54 | val/0016E5_08065.png valannot/0016E5_08065.png 55 | val/0016E5_08067.png valannot/0016E5_08067.png 56 | val/0016E5_08069.png valannot/0016E5_08069.png 57 | val/0016E5_08071.png valannot/0016E5_08071.png 58 | val/0016E5_08073.png valannot/0016E5_08073.png 59 | val/0016E5_08075.png valannot/0016E5_08075.png 60 | val/0016E5_08077.png valannot/0016E5_08077.png 61 | val/0016E5_08079.png valannot/0016E5_08079.png 62 | val/0016E5_08081.png valannot/0016E5_08081.png 63 | val/0016E5_08083.png valannot/0016E5_08083.png 64 | val/0016E5_08085.png valannot/0016E5_08085.png 65 | val/0016E5_08087.png valannot/0016E5_08087.png 66 | val/0016E5_08089.png valannot/0016E5_08089.png 67 | val/0016E5_08091.png valannot/0016E5_08091.png 68 | val/0016E5_08093.png valannot/0016E5_08093.png 69 | val/0016E5_08095.png valannot/0016E5_08095.png 70 | val/0016E5_08097.png valannot/0016E5_08097.png 71 | val/0016E5_08099.png valannot/0016E5_08099.png 72 | val/0016E5_08101.png valannot/0016E5_08101.png 73 | val/0016E5_08103.png valannot/0016E5_08103.png 74 | val/0016E5_08105.png valannot/0016E5_08105.png 75 | val/0016E5_08107.png valannot/0016E5_08107.png 76 | val/0016E5_08109.png valannot/0016E5_08109.png 77 | val/0016E5_08111.png valannot/0016E5_08111.png 78 | val/0016E5_08113.png valannot/0016E5_08113.png 79 | val/0016E5_08115.png valannot/0016E5_08115.png 80 | val/0016E5_08117.png valannot/0016E5_08117.png 81 | val/0016E5_08119.png valannot/0016E5_08119.png 82 | val/0016E5_08121.png valannot/0016E5_08121.png 83 | val/0016E5_08123.png valannot/0016E5_08123.png 84 | val/0016E5_08125.png valannot/0016E5_08125.png 85 | val/0016E5_08127.png valannot/0016E5_08127.png 86 | val/0016E5_08129.png valannot/0016E5_08129.png 87 | val/0016E5_08131.png valannot/0016E5_08131.png 88 | val/0016E5_08133.png valannot/0016E5_08133.png 89 | val/0016E5_08135.png valannot/0016E5_08135.png 90 | val/0016E5_08137.png valannot/0016E5_08137.png 91 | val/0016E5_08139.png valannot/0016E5_08139.png 92 | val/0016E5_08141.png valannot/0016E5_08141.png 93 | val/0016E5_08143.png valannot/0016E5_08143.png 94 | val/0016E5_08145.png valannot/0016E5_08145.png 95 | val/0016E5_08147.png valannot/0016E5_08147.png 96 | val/0016E5_08149.png valannot/0016E5_08149.png 97 | val/0016E5_08151.png valannot/0016E5_08151.png 98 | val/0016E5_08153.png valannot/0016E5_08153.png 99 | val/0016E5_08155.png valannot/0016E5_08155.png 100 | val/0016E5_08157.png valannot/0016E5_08157.png 101 | val/0016E5_08159.png valannot/0016E5_08159.png 102 | -------------------------------------------------------------------------------- /utils/scheduler/lr_scheduler.py: -------------------------------------------------------------------------------- 1 | import math 2 | from torch.optim.lr_scheduler import MultiStepLR, _LRScheduler 3 | 4 | 5 | class WarmupMultiStepLR(MultiStepLR): 6 | def __init__(self, optimizer, milestones, gamma=0.1, warmup_factor=1.0 / 3, 7 | warmup_iters=500, last_epoch=-1): 8 | self.warmup_factor = warmup_factor 9 | self.warmup_iters = warmup_iters 10 | super().__init__(optimizer, milestones, gamma, last_epoch) 11 | 12 | def get_lr(self): 13 | if self.last_epoch <= self.warmup_iters: 14 | alpha = self.last_epoch / self.warmup_iters 15 | warmup_factor = self.warmup_factor * (1 - alpha) + alpha 16 | # print(self.base_lrs[0]*warmup_factor) 17 | return [lr * warmup_factor for lr in self.base_lrs] 18 | else: 19 | lr = super().get_lr() 20 | return lr 21 | 22 | 23 | class WarmupCosineLR(_LRScheduler): 24 | def __init__(self, optimizer, T_max, warmup_factor=1.0 / 3, warmup_iters=500, 25 | eta_min=0, last_epoch=-1): 26 | self.warmup_factor = warmup_factor 27 | self.warmup_iters = warmup_iters 28 | self.T_max, self.eta_min = T_max, eta_min 29 | super().__init__(optimizer, last_epoch) 30 | 31 | def get_lr(self): 32 | if self.last_epoch <= self.warmup_iters: 33 | alpha = self.last_epoch / self.warmup_iters 34 | warmup_factor = self.warmup_factor * (1 - alpha) + alpha 35 | # print(self.base_lrs[0]*warmup_factor) 36 | return [lr * warmup_factor for lr in self.base_lrs] 37 | else: 38 | return [self.eta_min + (base_lr - self.eta_min) * 39 | (1 + math.cos( 40 | math.pi * (self.last_epoch - self.warmup_iters) / (self.T_max - self.warmup_iters))) / 2 41 | for base_lr in self.base_lrs] 42 | 43 | 44 | 45 | class WarmupPolyLR(_LRScheduler): 46 | def __init__(self, optimizer, T_max, cur_iter, warmup_factor=1.0 / 3, warmup_iters=500, 47 | eta_min=0, power=0.9): 48 | self.warmup_factor = warmup_factor 49 | self.warmup_iters = warmup_iters 50 | self.power = power 51 | self.T_max, self.eta_min = T_max, eta_min 52 | self.cur_iter = cur_iter 53 | super().__init__(optimizer) 54 | 55 | def get_lr(self): 56 | if self.cur_iter <= self.warmup_iters: 57 | alpha = self.cur_iter / self.warmup_iters 58 | warmup_factor = self.warmup_factor * (1 - alpha) + alpha 59 | # print(self.base_lrs[0]*warmup_factor) 60 | return [lr * warmup_factor for lr in self.base_lrs] 61 | else: 62 | return [self.eta_min + (base_lr - self.eta_min) * 63 | math.pow(1 - (self.cur_iter - self.warmup_iters) / (self.T_max - self.warmup_iters), 64 | self.power) for base_lr in self.base_lrs] 65 | 66 | 67 | def poly_learning_rate(cur_epoch, max_epoch, curEpoch_iter, perEpoch_iter, baselr): 68 | cur_iter = cur_epoch * perEpoch_iter + curEpoch_iter 69 | max_iter = max_epoch * perEpoch_iter 70 | lr = baselr * pow((1 - 1.0 * cur_iter / max_iter), 0.9) 71 | 72 | return lr 73 | 74 | 75 | 76 | class GradualWarmupScheduler(_LRScheduler): 77 | """ Gradually warm-up(increasing) learning rate in optimizer. 78 | Proposed in 'Accurate, Large Minibatch SGD: Training ImageNet in 1 Hour'. 79 | Args: 80 | optimizer (Optimizer): Wrapped optimizer. 81 | min_lr_mul: target learning rate = base lr * min_lr_mul 82 | total_epoch: target learning rate is reached at total_epoch, gradually 83 | after_scheduler: after target_epoch, use this scheduler(eg. ReduceLROnPlateau) 84 | """ 85 | 86 | def __init__(self, optimizer, total_epoch, min_lr_mul=0.1, after_scheduler=None): 87 | self.min_lr_mul = min_lr_mul 88 | if self.min_lr_mul > 1. or self.min_lr_mul < 0.: 89 | raise ValueError('min_lr_mul should be [0., 1.]') 90 | self.total_epoch = total_epoch 91 | self.after_scheduler = after_scheduler 92 | self.finished = False 93 | super(GradualWarmupScheduler, self).__init__(optimizer) 94 | 95 | def get_lr(self): 96 | if self.last_epoch > self.total_epoch: 97 | if self.after_scheduler: 98 | if not self.finished: 99 | self.after_scheduler.base_lrs = self.base_lrs 100 | self.finished = True 101 | return self.after_scheduler.get_lr() 102 | else: 103 | return self.base_lrs 104 | else: 105 | return [base_lr * (self.min_lr_mul + (1. - self.min_lr_mul) * (self.last_epoch / float(self.total_epoch))) for base_lr in self.base_lrs] 106 | 107 | def step(self, epoch=None): 108 | if self.finished and self.after_scheduler: 109 | return self.after_scheduler.step(epoch - self.total_epoch) 110 | else: 111 | return super(GradualWarmupScheduler, self).step(epoch) 112 | 113 | 114 | 115 | 116 | if __name__ == '__main__': 117 | optim = WarmupPolyLR() 118 | -------------------------------------------------------------------------------- /model/ERFNet.py: -------------------------------------------------------------------------------- 1 | ###################################################################################### 2 | #ERFNet: Efficient Residual Factorized ConvNet for Real-time Semantic Segmentation 3 | #Paper-Link: http://www.robesafe.uah.es/personal/eduardo.romera/pdfs/Romera17tits.pdf 4 | ###################################################################################### 5 | 6 | 7 | import torch 8 | import torch.nn as nn 9 | import torch.nn.functional as F 10 | from torchsummary import summary 11 | 12 | 13 | __all__ = ["ERFNet"] 14 | 15 | 16 | class DownsamplerBlock (nn.Module): 17 | def __init__(self, ninput, noutput): 18 | super().__init__() 19 | 20 | self.conv = nn.Conv2d(ninput, noutput-ninput, (3, 3), stride=2, padding=1, bias=True) 21 | self.pool = nn.MaxPool2d(2, stride=2) 22 | self.bn = nn.BatchNorm2d(noutput, eps=1e-3) 23 | 24 | def forward(self, input): 25 | output = torch.cat([self.conv(input), self.pool(input)], 1) 26 | output = self.bn(output) 27 | return F.relu(output) 28 | 29 | 30 | class non_bottleneck_1d (nn.Module): 31 | def __init__(self, chann, dropprob, dilated): 32 | super().__init__() 33 | 34 | self.conv3x1_1 = nn.Conv2d(chann, chann, (3, 1), stride=1, padding=(1,0), bias=True) 35 | 36 | self.conv1x3_1 = nn.Conv2d(chann, chann, (1,3), stride=1, padding=(0,1), bias=True) 37 | 38 | self.bn1 = nn.BatchNorm2d(chann, eps=1e-03) 39 | 40 | self.conv3x1_2 = nn.Conv2d(chann, chann, (3, 1), stride=1, padding=(1*dilated,0), bias=True, dilation = (dilated,1)) 41 | 42 | self.conv1x3_2 = nn.Conv2d(chann, chann, (1,3), stride=1, padding=(0,1*dilated), bias=True, dilation = (1, dilated)) 43 | 44 | self.bn2 = nn.BatchNorm2d(chann, eps=1e-03) 45 | 46 | self.dropout = nn.Dropout2d(dropprob) 47 | 48 | 49 | def forward(self, input): 50 | 51 | output = self.conv3x1_1(input) 52 | output = F.relu(output) 53 | output = self.conv1x3_1(output) 54 | output = self.bn1(output) 55 | output = F.relu(output) 56 | 57 | output = self.conv3x1_2(output) 58 | output = F.relu(output) 59 | output = self.conv1x3_2(output) 60 | output = self.bn2(output) 61 | 62 | if (self.dropout.p != 0): 63 | output = self.dropout(output) 64 | 65 | return F.relu(output+input) #+input = identity (residual connection) 66 | 67 | 68 | class Encoder(nn.Module): 69 | def __init__(self, num_classes): 70 | super().__init__() 71 | self.initial_block = DownsamplerBlock(3,16) 72 | 73 | self.layers = nn.ModuleList() 74 | 75 | self.layers.append(DownsamplerBlock(16,64)) 76 | 77 | for x in range(0, 5): #5 times 78 | self.layers.append(non_bottleneck_1d(64, 0.03, 1)) 79 | 80 | self.layers.append(DownsamplerBlock(64,128)) 81 | 82 | for x in range(0, 2): #2 times 83 | self.layers.append(non_bottleneck_1d(128, 0.3, 2)) 84 | self.layers.append(non_bottleneck_1d(128, 0.3, 4)) 85 | self.layers.append(non_bottleneck_1d(128, 0.3, 8)) 86 | self.layers.append(non_bottleneck_1d(128, 0.3, 16)) 87 | 88 | #Only in encoder mode: 89 | self.output_conv = nn.Conv2d(128, num_classes, 1, stride=1, padding=0, bias=True) 90 | 91 | def forward(self, input, predict=False): 92 | output = self.initial_block(input) 93 | 94 | for layer in self.layers: 95 | output = layer(output) 96 | 97 | if predict: 98 | output = self.output_conv(output) 99 | 100 | return output 101 | 102 | 103 | class UpsamplerBlock (nn.Module): 104 | def __init__(self, ninput, noutput): 105 | super().__init__() 106 | self.conv = nn.ConvTranspose2d(ninput, noutput, 3, stride=2, padding=1, output_padding=1, bias=True) 107 | self.bn = nn.BatchNorm2d(noutput, eps=1e-3) 108 | 109 | def forward(self, input): 110 | output = self.conv(input) 111 | output = self.bn(output) 112 | return F.relu(output) 113 | 114 | class Decoder (nn.Module): 115 | def __init__(self, num_classes): 116 | super().__init__() 117 | 118 | self.layers = nn.ModuleList() 119 | 120 | self.layers.append(UpsamplerBlock(128,64)) 121 | self.layers.append(non_bottleneck_1d(64, 0, 1)) 122 | self.layers.append(non_bottleneck_1d(64, 0, 1)) 123 | 124 | self.layers.append(UpsamplerBlock(64,16)) 125 | self.layers.append(non_bottleneck_1d(16, 0, 1)) 126 | self.layers.append(non_bottleneck_1d(16, 0, 1)) 127 | 128 | self.output_conv = nn.ConvTranspose2d( 16, num_classes, 2, stride=2, padding=0, output_padding=0, bias=True) 129 | 130 | def forward(self, input): 131 | output = input 132 | 133 | for layer in self.layers: 134 | output = layer(output) 135 | 136 | output = self.output_conv(output) 137 | 138 | return output 139 | 140 | #ERFNet 141 | class ERFNet(nn.Module): 142 | def __init__(self, classes, encoder=None): #use encoder to pass pretrained encoder 143 | super().__init__() 144 | 145 | if (encoder == None): 146 | self.encoder = Encoder(classes) 147 | else: 148 | self.encoder = encoder 149 | self.decoder = Decoder(classes) 150 | 151 | def forward(self, input, only_encode=False): 152 | if only_encode: 153 | return self.encoder.forward(input, predict=True) 154 | else: 155 | output = self.encoder(input) #predict=False by default 156 | return self.decoder.forward(output) 157 | 158 | """print layers and params of network""" 159 | if __name__ == '__main__': 160 | device = torch.device("cuda" if torch.cuda.is_available() else "cpu") 161 | model = ERFNet(classes=19).to(device) 162 | summary(model,(3,512,1024)) -------------------------------------------------------------------------------- /model/EDANet.py: -------------------------------------------------------------------------------- 1 | ################################################################################################### 2 | #EDANet:Efficient Dense Modules of Asymmetric Convolution for Real-Time Semantic Segmentation 3 | #Paper-Link: https://arxiv.org/ftp/arxiv/papers/1809/1809.06323.pdf 4 | ################################################################################################### 5 | 6 | import torch 7 | import torch.nn as nn 8 | import torch.nn.functional as F 9 | from torchsummary import summary 10 | 11 | 12 | __all__ = ["EDANet"] 13 | 14 | class DownsamplerBlock(nn.Module): 15 | def __init__(self, ninput, noutput): 16 | super(DownsamplerBlock,self).__init__() 17 | 18 | self.ninput = ninput 19 | self.noutput = noutput 20 | 21 | if self.ninput < self.noutput: 22 | # Wout > Win 23 | self.conv = nn.Conv2d(ninput, noutput-ninput, kernel_size=3, stride=2, padding=1) 24 | self.pool = nn.MaxPool2d(2, stride=2) 25 | else: 26 | # Wout < Win 27 | self.conv = nn.Conv2d(ninput, noutput, kernel_size=3, stride=2, padding=1) 28 | 29 | self.bn = nn.BatchNorm2d(noutput) 30 | 31 | def forward(self, x): 32 | if self.ninput < self.noutput: 33 | output = torch.cat([self.conv(x), self.pool(x)], 1) 34 | else: 35 | output = self.conv(x) 36 | 37 | output = self.bn(output) 38 | return F.relu(output) 39 | 40 | # --- Build the EDANet Module --- # 41 | class EDAModule(nn.Module): 42 | def __init__(self, ninput, dilated, k = 40, dropprob = 0.02): 43 | super().__init__() 44 | 45 | # k: growthrate 46 | # dropprob:a dropout layer between the last ReLU and the concatenation of each module 47 | 48 | self.conv1x1 = nn.Conv2d(ninput, k, kernel_size=1) 49 | self.bn0 = nn.BatchNorm2d(k) 50 | 51 | self.conv3x1_1 = nn.Conv2d(k, k, kernel_size=(3, 1),padding=(1,0)) 52 | self.conv1x3_1 = nn.Conv2d(k, k, kernel_size=(1, 3),padding=(0,1)) 53 | self.bn1 = nn.BatchNorm2d(k) 54 | 55 | self.conv3x1_2 = nn.Conv2d(k, k, (3,1), stride=1, padding=(dilated,0), dilation = dilated) 56 | self.conv1x3_2 = nn.Conv2d(k, k, (1,3), stride=1, padding=(0,dilated), dilation = dilated) 57 | self.bn2 = nn.BatchNorm2d(k) 58 | 59 | self.dropout = nn.Dropout2d(dropprob) 60 | 61 | 62 | def forward(self, x): 63 | input = x 64 | 65 | output = self.conv1x1(x) 66 | output = self.bn0(output) 67 | output = F.relu(output) 68 | 69 | output = self.conv3x1_1(output) 70 | output = self.conv1x3_1(output) 71 | output = self.bn1(output) 72 | output = F.relu(output) 73 | 74 | output = self.conv3x1_2(output) 75 | output = self.conv1x3_2(output) 76 | output = self.bn2(output) 77 | output = F.relu(output) 78 | 79 | if (self.dropout.p != 0): 80 | output = self.dropout(output) 81 | 82 | output = torch.cat([output,input],1) 83 | # print output.size() #check the output 84 | return output 85 | 86 | 87 | # --- Build the EDANet Block --- # 88 | class EDANetBlock(nn.Module): 89 | def __init__(self, in_channels, num_dense_layer, dilated, growth_rate): 90 | """ 91 | :param in_channels: input channel size 92 | :param num_dense_layer: the number of RDB layers 93 | :param growth_rate: growth_rate 94 | """ 95 | super().__init__() 96 | _in_channels = in_channels 97 | modules = [] 98 | for i in range(num_dense_layer): 99 | modules.append(EDAModule(_in_channels, dilated[i], growth_rate)) 100 | _in_channels += growth_rate 101 | self.residual_dense_layers = nn.Sequential(*modules) 102 | #self.conv_1x1 = nn.Conv2d(_in_channels, in_channels, kernel_size=1, padding=0) 103 | 104 | def forward(self, x): 105 | out = self.residual_dense_layers(x) 106 | #out = self.conv_1x1(out) 107 | # out = out + x 108 | return out 109 | 110 | 111 | class EDANet(nn.Module): 112 | def __init__(self, classes=19): 113 | super(EDANet,self).__init__() 114 | 115 | self.layers = nn.ModuleList() 116 | 117 | # DownsamplerBlock1 118 | self.layers.append(DownsamplerBlock(3, 15)) 119 | 120 | # DownsamplerBlock2 121 | self.layers.append(DownsamplerBlock(15, 60)) 122 | 123 | # EDA Block1 124 | self.layers.append(EDANetBlock(60, 5, [1,1,1,2,2], 40)) 125 | 126 | # DownsamplerBlock3 127 | self.layers.append(DownsamplerBlock(260, 130)) 128 | 129 | # # EDA Block2 130 | self.layers.append(EDANetBlock(130, 8, [2,2,4,4,8,8,16,16], 40)) 131 | 132 | # Projection layer 133 | self.project_layer = nn.Conv2d(450,classes,kernel_size = 1) 134 | 135 | self.weights_init() 136 | 137 | def weights_init(self): 138 | for idx, m in enumerate(self.modules()): 139 | classname = m.__class__.__name__ 140 | if classname.find('Conv') != -1: 141 | m.weight.data.normal_(0.0, 0.02) 142 | elif classname.find('BatchNorm') != -1: 143 | m.weight.data.normal_(1.0, 0.02) 144 | m.bias.data.fill_(0) 145 | 146 | def forward(self, x): 147 | 148 | output = x 149 | 150 | for layer in self.layers: 151 | output = layer(output) 152 | 153 | output = self.project_layer(output) 154 | 155 | # Bilinear interpolation x8 156 | output = F.interpolate(output,scale_factor = 8,mode = 'bilinear',align_corners=True) 157 | 158 | return output 159 | 160 | """print layers and params of network""" 161 | if __name__ == '__main__': 162 | device = torch.device("cuda" if torch.cuda.is_available() else "cpu") 163 | model = EDANet(classes=19).to(device) 164 | summary(model,(3,512,1024)) 165 | -------------------------------------------------------------------------------- /model/ESPNet_v2/cnn_utils.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | import torch 3 | import torch.nn.functional as F 4 | 5 | 6 | __author__ = "Sachin Mehta" 7 | __version__ = "1.0.1" 8 | __maintainer__ = "Sachin Mehta" 9 | 10 | 11 | class PSPModule(nn.Module): 12 | def __init__(self, features, out_features=1024, sizes=(1, 2, 4, 8)): 13 | super().__init__() 14 | self.stages = [] 15 | self.stages = nn.ModuleList([C(features, features, 3, 1, groups=features) for size in sizes]) 16 | self.project = CBR(features * (len(sizes) + 1), out_features, 1, 1) 17 | 18 | def forward(self, feats): 19 | h, w = feats.size(2), feats.size(3) 20 | out = [feats] 21 | for stage in self.stages: 22 | feats = F.avg_pool2d(feats, kernel_size=3, stride=2, padding=1) 23 | upsampled = F.interpolate(input=stage(feats), size=(h, w), mode='bilinear', align_corners=True) 24 | out.append(upsampled) 25 | return self.project(torch.cat(out, dim=1)) 26 | 27 | class CBR(nn.Module): 28 | ''' 29 | This class defines the convolution layer with batch normalization and PReLU activation 30 | ''' 31 | 32 | def __init__(self, nIn, nOut, kSize, stride=1, groups=1): 33 | ''' 34 | 35 | :param nIn: number of input channels 36 | :param nOut: number of output channels 37 | :param kSize: kernel size 38 | :param stride: stride rate for down-sampling. Default is 1 39 | ''' 40 | super().__init__() 41 | padding = int((kSize - 1) / 2) 42 | self.conv = nn.Conv2d(nIn, nOut, kSize, stride=stride, padding=padding, bias=False, groups=groups) 43 | self.bn = nn.BatchNorm2d(nOut) 44 | self.act = nn.PReLU(nOut) 45 | 46 | def forward(self, input): 47 | ''' 48 | :param input: input feature map 49 | :return: transformed feature map 50 | ''' 51 | output = self.conv(input) 52 | # output = self.conv1(output) 53 | output = self.bn(output) 54 | output = self.act(output) 55 | return output 56 | 57 | 58 | class BR(nn.Module): 59 | ''' 60 | This class groups the batch normalization and PReLU activation 61 | ''' 62 | 63 | def __init__(self, nOut): 64 | ''' 65 | :param nOut: output feature maps 66 | ''' 67 | super().__init__() 68 | self.bn = nn.BatchNorm2d(nOut) 69 | self.act = nn.PReLU(nOut) 70 | 71 | def forward(self, input): 72 | ''' 73 | :param input: input feature map 74 | :return: normalized and thresholded feature map 75 | ''' 76 | output = self.bn(input) 77 | output = self.act(output) 78 | return output 79 | 80 | 81 | class CB(nn.Module): 82 | ''' 83 | This class groups the convolution and batch normalization 84 | ''' 85 | 86 | def __init__(self, nIn, nOut, kSize, stride=1, groups=1): 87 | ''' 88 | :param nIn: number of input channels 89 | :param nOut: number of output channels 90 | :param kSize: kernel size 91 | :param stride: optinal stide for down-sampling 92 | ''' 93 | super().__init__() 94 | padding = int((kSize - 1) / 2) 95 | self.conv = nn.Conv2d(nIn, nOut, kSize, stride=stride, padding=padding, bias=False, 96 | groups=groups) 97 | self.bn = nn.BatchNorm2d(nOut) 98 | 99 | def forward(self, input): 100 | ''' 101 | 102 | :param input: input feature map 103 | :return: transformed feature map 104 | ''' 105 | output = self.conv(input) 106 | output = self.bn(output) 107 | return output 108 | 109 | 110 | class C(nn.Module): 111 | ''' 112 | This class is for a convolutional layer. 113 | ''' 114 | 115 | def __init__(self, nIn, nOut, kSize, stride=1, groups=1): 116 | ''' 117 | 118 | :param nIn: number of input channels 119 | :param nOut: number of output channels 120 | :param kSize: kernel size 121 | :param stride: optional stride rate for down-sampling 122 | ''' 123 | super().__init__() 124 | padding = int((kSize - 1) / 2) 125 | self.conv = nn.Conv2d(nIn, nOut, kSize, stride=stride, padding=padding, bias=False, 126 | groups=groups) 127 | 128 | def forward(self, input): 129 | ''' 130 | :param input: input feature map 131 | :return: transformed feature map 132 | ''' 133 | output = self.conv(input) 134 | return output 135 | 136 | 137 | class CDilated(nn.Module): 138 | ''' 139 | This class defines the dilated convolution. 140 | ''' 141 | 142 | def __init__(self, nIn, nOut, kSize, stride=1, d=1, groups=1): 143 | ''' 144 | :param nIn: number of input channels 145 | :param nOut: number of output channels 146 | :param kSize: kernel size 147 | :param stride: optional stride rate for down-sampling 148 | :param d: optional dilation rate 149 | ''' 150 | super().__init__() 151 | padding = int((kSize - 1) / 2) * d 152 | self.conv = nn.Conv2d(nIn, nOut,kSize, stride=stride, padding=padding, bias=False, 153 | dilation=d, groups=groups) 154 | 155 | def forward(self, input): 156 | ''' 157 | :param input: input feature map 158 | :return: transformed feature map 159 | ''' 160 | output = self.conv(input) 161 | return output 162 | 163 | class CDilatedB(nn.Module): 164 | ''' 165 | This class defines the dilated convolution with batch normalization. 166 | ''' 167 | 168 | def __init__(self, nIn, nOut, kSize, stride=1, d=1, groups=1): 169 | ''' 170 | :param nIn: number of input channels 171 | :param nOut: number of output channels 172 | :param kSize: kernel size 173 | :param stride: optional stride rate for down-sampling 174 | :param d: optional dilation rate 175 | ''' 176 | super().__init__() 177 | padding = int((kSize - 1) / 2) * d 178 | self.conv = nn.Conv2d(nIn, nOut,kSize, stride=stride, padding=padding, bias=False, 179 | dilation=d, groups=groups) 180 | self.bn = nn.BatchNorm2d(nOut) 181 | 182 | def forward(self, input): 183 | ''' 184 | :param input: input feature map 185 | :return: transformed feature map 186 | ''' 187 | return self.bn(self.conv(input)) 188 | -------------------------------------------------------------------------------- /utils/optim/Ranger.py: -------------------------------------------------------------------------------- 1 | import math 2 | import torch 3 | from torch.optim.optimizer import Optimizer 4 | import itertools as it 5 | 6 | 7 | 8 | class Ranger(Optimizer): 9 | 10 | def __init__(self, params, lr=1e-3, alpha=0.5, k=6, N_sma_threshhold=5, betas=(.95,0.999), eps=1e-5, weight_decay=0): 11 | #parameter checks 12 | if not 0.0 <= alpha <= 1.0: 13 | raise ValueError(f'Invalid slow update rate: {alpha}') 14 | if not 1 <= k: 15 | raise ValueError(f'Invalid lookahead steps: {k}') 16 | if not lr > 0: 17 | raise ValueError(f'Invalid Learning Rate: {lr}') 18 | if not eps > 0: 19 | raise ValueError(f'Invalid eps: {eps}') 20 | 21 | #parameter comments: 22 | # beta1 (momentum) of .95 seems to work better than .90... 23 | #N_sma_threshold of 5 seems better in testing than 4. 24 | #In both cases, worth testing on your dataset (.90 vs .95, 4 vs 5) to make sure which works best for you. 25 | 26 | #prep defaults and init torch.optim base 27 | defaults = dict(lr=lr, alpha=alpha, k=k, step_counter=0, betas=betas, N_sma_threshhold=N_sma_threshhold, eps=eps, weight_decay=weight_decay) 28 | super().__init__(params,defaults) 29 | 30 | #adjustable threshold 31 | self.N_sma_threshhold = N_sma_threshhold 32 | 33 | #now we can get to work... 34 | #removed as we now use step from RAdam...no need for duplicate step counting 35 | #for group in self.param_groups: 36 | # group["step_counter"] = 0 37 | #print("group step counter init") 38 | 39 | #look ahead params 40 | self.alpha = alpha 41 | self.k = k 42 | 43 | #radam buffer for state 44 | self.radam_buffer = [[None,None,None] for ind in range(10)] 45 | 46 | #self.first_run_check=0 47 | 48 | #lookahead weights 49 | #9/2/19 - lookahead param tensors have been moved to state storage. 50 | #This should resolve issues with load/save where weights were left in GPU memory from first load, slowing down future runs. 51 | 52 | #self.slow_weights = [[p.clone().detach() for p in group['params']] 53 | # for group in self.param_groups] 54 | 55 | #don't use grad for lookahead weights 56 | #for w in it.chain(*self.slow_weights): 57 | # w.requires_grad = False 58 | 59 | def __setstate__(self, state): 60 | print("set state called") 61 | super(Ranger, self).__setstate__(state) 62 | 63 | 64 | def step(self, closure=None): 65 | loss = None 66 | #note - below is commented out b/c I have other work that passes back the loss as a float, and thus not a callable closure. 67 | #Uncomment if you need to use the actual closure... 68 | 69 | #if closure is not None: 70 | #loss = closure() 71 | 72 | #Evaluate averages and grad, update param tensors 73 | for group in self.param_groups: 74 | 75 | for p in group['params']: 76 | if p.grad is None: 77 | continue 78 | grad = p.grad.data.float() 79 | if grad.is_sparse: 80 | raise RuntimeError('Ranger optimizer does not support sparse gradients') 81 | 82 | p_data_fp32 = p.data.float() 83 | 84 | state = self.state[p] #get state dict for this param 85 | 86 | if len(state) == 0: #if first time to run...init dictionary with our desired entries 87 | #if self.first_run_check==0: 88 | #self.first_run_check=1 89 | #print("Initializing slow buffer...should not see this at load from saved model!") 90 | state['step'] = 0 91 | state['exp_avg'] = torch.zeros_like(p_data_fp32) 92 | state['exp_avg_sq'] = torch.zeros_like(p_data_fp32) 93 | 94 | #look ahead weight storage now in state dict 95 | state['slow_buffer'] = torch.empty_like(p.data) 96 | state['slow_buffer'].copy_(p.data) 97 | 98 | else: 99 | state['exp_avg'] = state['exp_avg'].type_as(p_data_fp32) 100 | state['exp_avg_sq'] = state['exp_avg_sq'].type_as(p_data_fp32) 101 | 102 | #begin computations 103 | exp_avg, exp_avg_sq = state['exp_avg'], state['exp_avg_sq'] 104 | beta1, beta2 = group['betas'] 105 | 106 | #compute variance mov avg 107 | exp_avg_sq.mul_(beta2).addcmul_(1 - beta2, grad, grad) 108 | #compute mean moving avg 109 | exp_avg.mul_(beta1).add_(1 - beta1, grad) 110 | 111 | state['step'] += 1 112 | 113 | 114 | buffered = self.radam_buffer[int(state['step'] % 10)] 115 | if state['step'] == buffered[0]: 116 | N_sma, step_size = buffered[1], buffered[2] 117 | else: 118 | buffered[0] = state['step'] 119 | beta2_t = beta2 ** state['step'] 120 | N_sma_max = 2 / (1 - beta2) - 1 121 | N_sma = N_sma_max - 2 * state['step'] * beta2_t / (1 - beta2_t) 122 | buffered[1] = N_sma 123 | if N_sma > self.N_sma_threshhold: 124 | step_size = math.sqrt((1 - beta2_t) * (N_sma - 4) / (N_sma_max - 4) * (N_sma - 2) / N_sma * N_sma_max / (N_sma_max - 2)) / (1 - beta1 ** state['step']) 125 | else: 126 | step_size = 1.0 / (1 - beta1 ** state['step']) 127 | buffered[2] = step_size 128 | 129 | if group['weight_decay'] != 0: 130 | p_data_fp32.add_(-group['weight_decay'] * group['lr'], p_data_fp32) 131 | 132 | if N_sma > self.N_sma_threshhold: 133 | denom = exp_avg_sq.sqrt().add_(group['eps']) 134 | p_data_fp32.addcdiv_(-step_size * group['lr'], exp_avg, denom) 135 | else: 136 | p_data_fp32.add_(-step_size * group['lr'], exp_avg) 137 | 138 | p.data.copy_(p_data_fp32) 139 | 140 | #integrated look ahead... 141 | #we do it at the param level instead of group level 142 | if state['step'] % group['k'] == 0: 143 | slow_p = state['slow_buffer'] #get access to slow param tensor 144 | slow_p.add_(self.alpha, p.data - slow_p) #(fast weights - slow weights) * alpha 145 | p.data.copy_(slow_p) #copy interpolated weights to RAdam param tensor 146 | 147 | return loss -------------------------------------------------------------------------------- /model/DABNet.py: -------------------------------------------------------------------------------- 1 | ###################################################################################### 2 | #DABNet: Depth-wise Asymmetric Bottleneck for Real-time Semantic Segmentation 3 | #Paper-Link: https://arxiv.org/pdf/1907.11357.pdf 4 | ###################################################################################### 5 | 6 | 7 | 8 | import torch 9 | import torch.nn as nn 10 | import torch.nn.functional as F 11 | from torchsummary import summary 12 | 13 | __all__ = ["DABNet"] 14 | 15 | 16 | class Conv(nn.Module): 17 | def __init__(self, nIn, nOut, kSize, stride, padding, dilation=(1, 1), groups=1, bn_acti=False, bias=False): 18 | super().__init__() 19 | 20 | self.bn_acti = bn_acti 21 | 22 | self.conv = nn.Conv2d(nIn, nOut, kernel_size=kSize, 23 | stride=stride, padding=padding, 24 | dilation=dilation, groups=groups, bias=bias) 25 | 26 | if self.bn_acti: 27 | self.bn_prelu = BNPReLU(nOut) 28 | 29 | def forward(self, input): 30 | output = self.conv(input) 31 | 32 | if self.bn_acti: 33 | output = self.bn_prelu(output) 34 | 35 | return output 36 | 37 | 38 | class BNPReLU(nn.Module): 39 | def __init__(self, nIn): 40 | super().__init__() 41 | self.bn = nn.BatchNorm2d(nIn, eps=1e-3) 42 | self.acti = nn.PReLU(nIn) 43 | 44 | def forward(self, input): 45 | output = self.bn(input) 46 | output = self.acti(output) 47 | 48 | return output 49 | 50 | 51 | class DABModule(nn.Module): 52 | def __init__(self, nIn, d=1, kSize=3, dkSize=3): 53 | super().__init__() 54 | 55 | self.bn_relu_1 = BNPReLU(nIn) 56 | self.conv3x3 = Conv(nIn, nIn // 2, kSize, 1, padding=1, bn_acti=True) 57 | 58 | self.dconv3x1 = Conv(nIn // 2, nIn // 2, (dkSize, 1), 1, 59 | padding=(1, 0), groups=nIn // 2, bn_acti=True) 60 | self.dconv1x3 = Conv(nIn // 2, nIn // 2, (1, dkSize), 1, 61 | padding=(0, 1), groups=nIn // 2, bn_acti=True) 62 | self.ddconv3x1 = Conv(nIn // 2, nIn // 2, (dkSize, 1), 1, 63 | padding=(1 * d, 0), dilation=(d, 1), groups=nIn // 2, bn_acti=True) 64 | self.ddconv1x3 = Conv(nIn // 2, nIn // 2, (1, dkSize), 1, 65 | padding=(0, 1 * d), dilation=(1, d), groups=nIn // 2, bn_acti=True) 66 | 67 | self.bn_relu_2 = BNPReLU(nIn // 2) 68 | self.conv1x1 = Conv(nIn // 2, nIn, 1, 1, padding=0, bn_acti=False) 69 | 70 | def forward(self, input): 71 | output = self.bn_relu_1(input) 72 | output = self.conv3x3(output) 73 | 74 | br1 = self.dconv3x1(output) 75 | br1 = self.dconv1x3(br1) 76 | br2 = self.ddconv3x1(output) 77 | br2 = self.ddconv1x3(br2) 78 | 79 | output = br1 + br2 80 | output = self.bn_relu_2(output) 81 | output = self.conv1x1(output) 82 | 83 | return output + input 84 | 85 | 86 | class DownSamplingBlock(nn.Module): 87 | def __init__(self, nIn, nOut): 88 | super().__init__() 89 | self.nIn = nIn 90 | self.nOut = nOut 91 | 92 | if self.nIn < self.nOut: 93 | nConv = nOut - nIn 94 | else: 95 | nConv = nOut 96 | 97 | self.conv3x3 = Conv(nIn, nConv, kSize=3, stride=2, padding=1) 98 | self.max_pool = nn.MaxPool2d(2, stride=2) 99 | self.bn_prelu = BNPReLU(nOut) 100 | 101 | def forward(self, input): 102 | output = self.conv3x3(input) 103 | 104 | if self.nIn < self.nOut: 105 | max_pool = self.max_pool(input) 106 | output = torch.cat([output, max_pool], 1) 107 | 108 | output = self.bn_prelu(output) 109 | 110 | return output 111 | 112 | 113 | class InputInjection(nn.Module): 114 | def __init__(self, ratio): 115 | super().__init__() 116 | self.pool = nn.ModuleList() 117 | for i in range(0, ratio): 118 | self.pool.append(nn.AvgPool2d(3, stride=2, padding=1)) 119 | 120 | def forward(self, input): 121 | for pool in self.pool: 122 | input = pool(input) 123 | 124 | return input 125 | 126 | 127 | class DABNet(nn.Module): 128 | def __init__(self, classes=19, block_1=3, block_2=6): 129 | super().__init__() 130 | self.init_conv = nn.Sequential( 131 | Conv(3, 32, 3, 2, padding=1, bn_acti=True), 132 | Conv(32, 32, 3, 1, padding=1, bn_acti=True), 133 | Conv(32, 32, 3, 1, padding=1, bn_acti=True), 134 | ) 135 | 136 | self.down_1 = InputInjection(1) # down-sample the image 1 times 137 | self.down_2 = InputInjection(2) # down-sample the image 2 times 138 | self.down_3 = InputInjection(3) # down-sample the image 3 times 139 | 140 | self.bn_prelu_1 = BNPReLU(32 + 3) 141 | 142 | # DAB Block 1 143 | self.downsample_1 = DownSamplingBlock(32 + 3, 64) 144 | self.DAB_Block_1 = nn.Sequential() 145 | for i in range(0, block_1): 146 | self.DAB_Block_1.add_module("DAB_Module_1_" + str(i), DABModule(64, d=2)) 147 | self.bn_prelu_2 = BNPReLU(128 + 3) 148 | 149 | # DAB Block 2 150 | dilation_block_2 = [4, 4, 8, 8, 16, 16] 151 | self.downsample_2 = DownSamplingBlock(128 + 3, 128) 152 | self.DAB_Block_2 = nn.Sequential() 153 | for i in range(0, block_2): 154 | self.DAB_Block_2.add_module("DAB_Module_2_" + str(i), 155 | DABModule(128, d=dilation_block_2[i])) 156 | self.bn_prelu_3 = BNPReLU(256 + 3) 157 | 158 | self.classifier = nn.Sequential(Conv(259, classes, 1, 1, padding=0)) 159 | 160 | def forward(self, input): 161 | 162 | output0 = self.init_conv(input) 163 | 164 | down_1 = self.down_1(input) 165 | down_2 = self.down_2(input) 166 | down_3 = self.down_3(input) 167 | 168 | output0_cat = self.bn_prelu_1(torch.cat([output0, down_1], 1)) 169 | 170 | # DAB Block 1 171 | output1_0 = self.downsample_1(output0_cat) 172 | output1 = self.DAB_Block_1(output1_0) 173 | output1_cat = self.bn_prelu_2(torch.cat([output1, output1_0, down_2], 1)) 174 | 175 | # DAB Block 2 176 | output2_0 = self.downsample_2(output1_cat) 177 | output2 = self.DAB_Block_2(output2_0) 178 | output2_cat = self.bn_prelu_3(torch.cat([output2, output2_0, down_3], 1)) 179 | 180 | out = self.classifier(output2_cat) 181 | out = F.interpolate(out, input.size()[2:], mode='bilinear', align_corners=False) 182 | 183 | return out 184 | 185 | """print layers and params of network""" 186 | if __name__ == '__main__': 187 | device = torch.device("cuda" if torch.cuda.is_available() else "cpu") 188 | model = DABNet(classes=19).to(device) 189 | summary(model,(3,512,1024)) 190 | -------------------------------------------------------------------------------- /model/ESNet.py: -------------------------------------------------------------------------------- 1 | ################################################################################################### 2 | #ESNet: An Efficient Symmetric Network for Real-time Semantic Segmentation 3 | #Paper-Link: https://arxiv.org/pdf/1906.09826.pdf 4 | ################################################################################################### 5 | 6 | import torch 7 | import torch.nn as nn 8 | import torch.nn.init as init 9 | import torch.nn.functional as F 10 | from torchsummary import summary 11 | 12 | class DownsamplerBlock(nn.Module): 13 | def __init__(self, ninput, noutput): 14 | super().__init__() 15 | 16 | self.conv = nn.Conv2d(ninput, noutput-ninput, (3, 3), stride=2, padding=1, bias=True) 17 | self.pool = nn.MaxPool2d(2, stride=2) 18 | self.bn = nn.BatchNorm2d(noutput, eps=1e-3) 19 | self.relu = nn.ReLU(inplace=True) 20 | 21 | def forward(self, input): 22 | x1 = self.pool(input) 23 | x2 = self.conv(input) 24 | 25 | diffY = x2.size()[2] - x1.size()[2] 26 | diffX = x2.size()[3] - x1.size()[3] 27 | 28 | x1 = F.pad(x1, [diffX // 2, diffX - diffX // 2, 29 | diffY // 2, diffY - diffY // 2]) 30 | 31 | output = torch.cat([x2, x1], 1) 32 | output = self.bn(output) 33 | output = self.relu(output) 34 | return output 35 | 36 | class UpsamplerBlock (nn.Module): 37 | def __init__(self, ninput, noutput): 38 | super().__init__() 39 | 40 | self.conv = nn.ConvTranspose2d(ninput, noutput, 3, stride=2, padding=1, output_padding=1, bias=True) 41 | self.bn = nn.BatchNorm2d(noutput, eps=1e-3) 42 | 43 | def forward(self, input): 44 | 45 | output = self.conv(input) 46 | output = self.bn(output) 47 | 48 | return F.relu(output) 49 | 50 | class FCU(nn.Module): 51 | def __init__(self, chann, kernel_size,dropprob, dilated): 52 | """ 53 | Factorized Convolution Unit 54 | 55 | """ 56 | super(FCU,self).__init__() 57 | 58 | padding = int((kernel_size-1)//2) * dilated 59 | 60 | self.conv3x1_1 = nn.Conv2d(chann, chann, (kernel_size,1), stride=1, padding=(int((kernel_size-1)//2)*1,0), bias=True) 61 | 62 | self.conv1x3_1 = nn.Conv2d(chann, chann, (1,kernel_size), stride=1, padding=(0,int((kernel_size-1)//2)*1), bias=True) 63 | 64 | self.bn1 = nn.BatchNorm2d(chann, eps=1e-03) 65 | 66 | self.conv3x1_2 = nn.Conv2d(chann, chann, (kernel_size,1), stride=1, padding=(padding,0), bias=True, dilation = (dilated,1)) 67 | 68 | self.conv1x3_2 = nn.Conv2d(chann, chann, (1,kernel_size), stride=1, padding=(0,padding), bias=True, dilation = (1, dilated)) 69 | 70 | self.bn2 = nn.BatchNorm2d(chann, eps=1e-03) 71 | 72 | self.relu = nn.ReLU(inplace = True) 73 | self.dropout = nn.Dropout2d(dropprob) 74 | 75 | def forward(self, input): 76 | residual = input 77 | output = self.conv3x1_1(input) 78 | output = self.relu(output) 79 | output = self.conv1x3_1(output) 80 | output = self.bn1(output) 81 | output = self.relu(output) 82 | 83 | output = self.conv3x1_2(output) 84 | output = self.relu(output) 85 | output = self.conv1x3_2(output) 86 | output = self.bn2(output) 87 | 88 | if (self.dropout.p != 0): 89 | output = self.dropout(output) 90 | 91 | return F.relu(residual+output,inplace=True) 92 | 93 | 94 | class PFCU(nn.Module): 95 | def __init__(self,chann): 96 | """ 97 | Parallel Factorized Convolution Unit 98 | 99 | """ 100 | 101 | super(PFCU,self).__init__() 102 | 103 | self.conv3x1_1 = nn.Conv2d(chann, chann, (3,1), stride=1, padding=(1,0), bias=True) 104 | 105 | self.conv1x3_1 = nn.Conv2d(chann, chann, (1,3), stride=1, padding=(0,1), bias=True) 106 | 107 | self.bn1 = nn.BatchNorm2d(chann, eps=1e-03) 108 | 109 | self.conv3x1_22 = nn.Conv2d(chann, chann, (3,1), stride=1, padding=(2,0), bias=True, dilation = (2,1)) 110 | self.conv1x3_22 = nn.Conv2d(chann, chann, (1,3), stride=1, padding=(0,2), bias=True, dilation = (1,2)) 111 | 112 | self.conv3x1_25 = nn.Conv2d(chann, chann, (3,1), stride=1, padding=(5,0), bias=True, dilation = (5,1)) 113 | self.conv1x3_25 = nn.Conv2d(chann, chann, (1,3), stride=1, padding=(0,5), bias=True, dilation = (1,5)) 114 | 115 | self.conv3x1_29 = nn.Conv2d(chann, chann, (3,1), stride=1, padding=(9,0), bias=True, dilation = (9,1)) 116 | self.conv1x3_29 = nn.Conv2d(chann, chann, (1,3), stride=1, padding=(0,9), bias=True, dilation = (1,9)) 117 | 118 | self.bn2 = nn.BatchNorm2d(chann, eps=1e-03) 119 | 120 | self.dropout = nn.Dropout2d(0.3) 121 | 122 | def forward(self, input): 123 | residual = input 124 | output = self.conv3x1_1(input) 125 | output = F.relu(output) 126 | output = self.conv1x3_1(output) 127 | output = self.bn1(output) 128 | output = F.relu(output) 129 | 130 | output2 = self.conv3x1_22(output) 131 | output2 = F.relu(output2) 132 | output2 = self.conv1x3_22(output2) 133 | output2 = self.bn2(output2) 134 | if (self.dropout.p != 0): 135 | output2 = self.dropout(output2) 136 | 137 | output5 = self.conv3x1_25(output) 138 | output5 = F.relu(output5) 139 | output5 = self.conv1x3_25(output5) 140 | output5 = self.bn2(output5) 141 | if (self.dropout.p != 0): 142 | output5 = self.dropout(output5) 143 | 144 | output9 = self.conv3x1_29(output) 145 | output9 = F.relu(output9) 146 | output9 = self.conv1x3_29(output9) 147 | output9 = self.bn2(output9) 148 | if (self.dropout.p != 0): 149 | output9 = self.dropout(output9) 150 | 151 | return F.relu(residual+output2+output5+output9,inplace=True) 152 | 153 | 154 | class ESNet(nn.Module): 155 | def __init__(self, classes): 156 | super().__init__() 157 | #-----ESNET---------# 158 | self.initial_block = DownsamplerBlock(3,16) 159 | 160 | self.layers = nn.ModuleList() 161 | 162 | for x in range(0, 3): 163 | self.layers.append(FCU(16, 3, 0.03, 1)) 164 | 165 | self.layers.append(DownsamplerBlock(16,64)) 166 | 167 | for x in range(0, 2): 168 | self.layers.append(FCU(64, 5, 0.03, 1)) 169 | 170 | self.layers.append(DownsamplerBlock(64,128)) 171 | 172 | for x in range(0, 3): 173 | self.layers.append(PFCU(chann=128)) 174 | 175 | self.layers.append(UpsamplerBlock(128,64)) 176 | self.layers.append(FCU(64, 5, 0, 1)) 177 | self.layers.append(FCU(64, 5, 0, 1)) 178 | 179 | self.layers.append(UpsamplerBlock(64,16)) 180 | self.layers.append(FCU(16, 3, 0, 1)) 181 | self.layers.append(FCU(16, 3, 0, 1)) 182 | 183 | self.output_conv = nn.ConvTranspose2d( 16, classes, 2, stride=2, padding=0, output_padding=0, bias=True) 184 | 185 | def forward(self, input): 186 | output = self.initial_block(input) 187 | 188 | for layer in self.layers: 189 | output = layer(output) 190 | 191 | output = self.output_conv(output) 192 | 193 | return output 194 | 195 | 196 | """print layers and params of network""" 197 | if __name__ == '__main__': 198 | device = torch.device("cuda" if torch.cuda.is_available() else "cpu") 199 | model = ESNet(classes=11).to(device) 200 | summary(model,(3,360,480)) 201 | -------------------------------------------------------------------------------- /model/SegNet.py: -------------------------------------------------------------------------------- 1 | ################################################################################## 2 | #SegNet: A Deep Convolutional Encoder-Decoder Architecture for Image Segmentation 3 | #Paper-Link: https://arxiv.org/pdf/1511.00561.pdf 4 | ################################################################################## 5 | 6 | 7 | import torch 8 | import torch.nn as nn 9 | import torch.nn.functional as F 10 | from torchsummary import summary 11 | 12 | 13 | 14 | __all__ = ["SegNet"] 15 | 16 | class SegNet(nn.Module): 17 | def __init__(self,classes= 19): 18 | super(SegNet, self).__init__() 19 | 20 | batchNorm_momentum = 0.1 21 | 22 | self.conv11 = nn.Conv2d(3, 64, kernel_size=3, padding=1) 23 | self.bn11 = nn.BatchNorm2d(64, momentum= batchNorm_momentum) 24 | self.conv12 = nn.Conv2d(64, 64, kernel_size=3, padding=1) 25 | self.bn12 = nn.BatchNorm2d(64, momentum= batchNorm_momentum) 26 | 27 | self.conv21 = nn.Conv2d(64, 128, kernel_size=3, padding=1) 28 | self.bn21 = nn.BatchNorm2d(128, momentum= batchNorm_momentum) 29 | self.conv22 = nn.Conv2d(128, 128, kernel_size=3, padding=1) 30 | self.bn22 = nn.BatchNorm2d(128, momentum= batchNorm_momentum) 31 | 32 | self.conv31 = nn.Conv2d(128, 256, kernel_size=3, padding=1) 33 | self.bn31 = nn.BatchNorm2d(256, momentum= batchNorm_momentum) 34 | self.conv32 = nn.Conv2d(256, 256, kernel_size=3, padding=1) 35 | self.bn32 = nn.BatchNorm2d(256, momentum= batchNorm_momentum) 36 | self.conv33 = nn.Conv2d(256, 256, kernel_size=3, padding=1) 37 | self.bn33 = nn.BatchNorm2d(256, momentum= batchNorm_momentum) 38 | 39 | self.conv41 = nn.Conv2d(256, 512, kernel_size=3, padding=1) 40 | self.bn41 = nn.BatchNorm2d(512, momentum= batchNorm_momentum) 41 | self.conv42 = nn.Conv2d(512, 512, kernel_size=3, padding=1) 42 | self.bn42 = nn.BatchNorm2d(512, momentum= batchNorm_momentum) 43 | self.conv43 = nn.Conv2d(512, 512, kernel_size=3, padding=1) 44 | self.bn43 = nn.BatchNorm2d(512, momentum= batchNorm_momentum) 45 | 46 | self.conv51 = nn.Conv2d(512, 512, kernel_size=3, padding=1) 47 | self.bn51 = nn.BatchNorm2d(512, momentum= batchNorm_momentum) 48 | self.conv52 = nn.Conv2d(512, 512, kernel_size=3, padding=1) 49 | self.bn52 = nn.BatchNorm2d(512, momentum= batchNorm_momentum) 50 | self.conv53 = nn.Conv2d(512, 512, kernel_size=3, padding=1) 51 | self.bn53 = nn.BatchNorm2d(512, momentum= batchNorm_momentum) 52 | 53 | self.conv53d = nn.Conv2d(512, 512, kernel_size=3, padding=1) 54 | self.bn53d = nn.BatchNorm2d(512, momentum= batchNorm_momentum) 55 | self.conv52d = nn.Conv2d(512, 512, kernel_size=3, padding=1) 56 | self.bn52d = nn.BatchNorm2d(512, momentum= batchNorm_momentum) 57 | self.conv51d = nn.Conv2d(512, 512, kernel_size=3, padding=1) 58 | self.bn51d = nn.BatchNorm2d(512, momentum= batchNorm_momentum) 59 | 60 | self.conv43d = nn.Conv2d(512, 512, kernel_size=3, padding=1) 61 | self.bn43d = nn.BatchNorm2d(512, momentum= batchNorm_momentum) 62 | self.conv42d = nn.Conv2d(512, 512, kernel_size=3, padding=1) 63 | self.bn42d = nn.BatchNorm2d(512, momentum= batchNorm_momentum) 64 | self.conv41d = nn.Conv2d(512, 256, kernel_size=3, padding=1) 65 | self.bn41d = nn.BatchNorm2d(256, momentum= batchNorm_momentum) 66 | 67 | self.conv33d = nn.Conv2d(256, 256, kernel_size=3, padding=1) 68 | self.bn33d = nn.BatchNorm2d(256, momentum= batchNorm_momentum) 69 | self.conv32d = nn.Conv2d(256, 256, kernel_size=3, padding=1) 70 | self.bn32d = nn.BatchNorm2d(256, momentum= batchNorm_momentum) 71 | self.conv31d = nn.Conv2d(256, 128, kernel_size=3, padding=1) 72 | self.bn31d = nn.BatchNorm2d(128, momentum= batchNorm_momentum) 73 | 74 | self.conv22d = nn.Conv2d(128, 128, kernel_size=3, padding=1) 75 | self.bn22d = nn.BatchNorm2d(128, momentum= batchNorm_momentum) 76 | self.conv21d = nn.Conv2d(128, 64, kernel_size=3, padding=1) 77 | self.bn21d = nn.BatchNorm2d(64, momentum= batchNorm_momentum) 78 | 79 | self.conv12d = nn.Conv2d(64, 64, kernel_size=3, padding=1) 80 | self.bn12d = nn.BatchNorm2d(64, momentum= batchNorm_momentum) 81 | self.conv11d = nn.Conv2d(64, classes, kernel_size=3, padding=1) 82 | 83 | 84 | def forward(self, x): 85 | 86 | # Stage 1 87 | x11 = F.relu(self.bn11(self.conv11(x))) 88 | x12 = F.relu(self.bn12(self.conv12(x11))) 89 | x1_size = x12.size() 90 | x1p, id1 = F.max_pool2d(x12,kernel_size=2, stride=2,return_indices=True) 91 | 92 | # Stage 2 93 | x21 = F.relu(self.bn21(self.conv21(x1p))) 94 | x22 = F.relu(self.bn22(self.conv22(x21))) 95 | x2_size = x22.size() 96 | x2p, id2 = F.max_pool2d(x22,kernel_size=2, stride=2,return_indices=True) 97 | 98 | # Stage 3 99 | x31 = F.relu(self.bn31(self.conv31(x2p))) 100 | x32 = F.relu(self.bn32(self.conv32(x31))) 101 | x33 = F.relu(self.bn33(self.conv33(x32))) 102 | x3_size = x33.size() 103 | x3p, id3 = F.max_pool2d(x33,kernel_size=2, stride=2,return_indices=True) 104 | 105 | # Stage 4 106 | x41 = F.relu(self.bn41(self.conv41(x3p))) 107 | x42 = F.relu(self.bn42(self.conv42(x41))) 108 | x43 = F.relu(self.bn43(self.conv43(x42))) 109 | x4_size = x43.size() 110 | x4p, id4 = F.max_pool2d(x43,kernel_size=2, stride=2,return_indices=True) 111 | 112 | # Stage 5 113 | x51 = F.relu(self.bn51(self.conv51(x4p))) 114 | x52 = F.relu(self.bn52(self.conv52(x51))) 115 | x53 = F.relu(self.bn53(self.conv53(x52))) 116 | x5_size = x53.size() 117 | x5p, id5 = F.max_pool2d(x53,kernel_size=2, stride=2,return_indices=True) 118 | 119 | 120 | # Stage 5d 121 | x5d = F.max_unpool2d(x5p, id5, kernel_size=2, stride=2, output_size=x5_size) 122 | x53d = F.relu(self.bn53d(self.conv53d(x5d))) 123 | x52d = F.relu(self.bn52d(self.conv52d(x53d))) 124 | x51d = F.relu(self.bn51d(self.conv51d(x52d))) 125 | 126 | # Stage 4d 127 | x4d = F.max_unpool2d(x51d, id4, kernel_size=2, stride=2, output_size=x4_size) 128 | x43d = F.relu(self.bn43d(self.conv43d(x4d))) 129 | x42d = F.relu(self.bn42d(self.conv42d(x43d))) 130 | x41d = F.relu(self.bn41d(self.conv41d(x42d))) 131 | 132 | # Stage 3d 133 | x3d = F.max_unpool2d(x41d, id3, kernel_size=2, stride=2, output_size=x3_size) 134 | x33d = F.relu(self.bn33d(self.conv33d(x3d))) 135 | x32d = F.relu(self.bn32d(self.conv32d(x33d))) 136 | x31d = F.relu(self.bn31d(self.conv31d(x32d))) 137 | 138 | # Stage 2d 139 | x2d = F.max_unpool2d(x31d, id2, kernel_size=2, stride=2, output_size=x2_size) 140 | x22d = F.relu(self.bn22d(self.conv22d(x2d))) 141 | x21d = F.relu(self.bn21d(self.conv21d(x22d))) 142 | 143 | # Stage 1d 144 | x1d = F.max_unpool2d(x21d, id1, kernel_size=2, stride=2, output_size=x1_size) 145 | x12d = F.relu(self.bn12d(self.conv12d(x1d))) 146 | x11d = self.conv11d(x12d) 147 | 148 | return x11d 149 | 150 | def load_from_segnet(self, model_path): 151 | s_dict = self.state_dict()# create a copy of the state dict 152 | th = torch.load(model_path).state_dict() # load the weigths 153 | # for name in th: 154 | # s_dict[corresp_name[name]] = th[name] 155 | self.load_state_dict(th) 156 | 157 | 158 | 159 | """print layers and params of network""" 160 | if __name__ == '__main__': 161 | device = torch.device("cuda" if torch.cuda.is_available() else "cpu") 162 | model = SegNet(classes=19).to(device) 163 | summary(model,(3,512,1024)) -------------------------------------------------------------------------------- /test.py: -------------------------------------------------------------------------------- 1 | import os 2 | import time 3 | import torch 4 | import numpy as np 5 | import torch.backends.cudnn as cudnn 6 | from argparse import ArgumentParser 7 | # user 8 | from builders.model_builder import build_model 9 | from builders.dataset_builder import build_dataset_test 10 | from utils.utils import save_predict 11 | from utils.metric.metric import get_iou 12 | from utils.convert_state import convert_state_dict 13 | 14 | 15 | def parse_args(): 16 | parser = ArgumentParser(description='Efficient semantic segmentation') 17 | parser.add_argument('--model', default="ENet", help="model name: (default ENet)") 18 | parser.add_argument('--dataset', default="camvid", help="dataset: cityscapes or camvid") 19 | parser.add_argument('--num_workers', type=int, default=1, help="the number of parallel threads") 20 | parser.add_argument('--batch_size', type=int, default=1, 21 | help=" the batch_size is set to 1 when evaluating or testing") 22 | parser.add_argument('--checkpoint', type=str,default="", 23 | help="use the file to load the checkpoint for evaluating or testing ") 24 | parser.add_argument('--save_seg_dir', type=str, default="./result/", 25 | help="saving path of prediction result") 26 | parser.add_argument('--best', action='store_true', help="Get the best result among last few checkpoints") 27 | parser.add_argument('--save', action='store_true', help="Save the predicted image") 28 | parser.add_argument('--cuda', default=True, help="run on CPU or GPU") 29 | parser.add_argument("--gpus", default="0", type=str, help="gpu ids (default: 0)") 30 | args = parser.parse_args() 31 | 32 | return args 33 | 34 | 35 | 36 | 37 | def test(args, test_loader, model): 38 | """ 39 | args: 40 | test_loader: loaded for test dataset 41 | model: model 42 | return: class IoU and mean IoU 43 | """ 44 | # evaluation or test mode 45 | model.eval() 46 | total_batches = len(test_loader) 47 | 48 | data_list = [] 49 | for i, (input, label, size, name) in enumerate(test_loader): 50 | with torch.no_grad(): 51 | input_var = input.cuda() 52 | start_time = time.time() 53 | output = model(input_var) 54 | torch.cuda.synchronize() 55 | time_taken = time.time() - start_time 56 | print('[%d/%d] time: %.2f' % (i + 1, total_batches, time_taken)) 57 | output = output.cpu().data[0].numpy() 58 | gt = np.asarray(label[0].numpy(), dtype=np.uint8) 59 | output = output.transpose(1, 2, 0) 60 | output = np.asarray(np.argmax(output, axis=2), dtype=np.uint8) 61 | data_list.append([gt.flatten(), output.flatten()]) 62 | 63 | # save the predicted image 64 | if args.save: 65 | save_predict(output, gt, name[0], args.dataset, args.save_seg_dir, 66 | output_grey=False, output_color=True, gt_color=True) 67 | 68 | meanIoU, per_class_iu = get_iou(data_list, args.classes) 69 | return meanIoU, per_class_iu 70 | 71 | 72 | def test_model(args): 73 | """ 74 | main function for testing 75 | param args: global arguments 76 | return: None 77 | """ 78 | print(args) 79 | 80 | if args.cuda: 81 | print("=====> use gpu id: '{}'".format(args.gpus)) 82 | os.environ["CUDA_VISIBLE_DEVICES"] = args.gpus 83 | if not torch.cuda.is_available(): 84 | raise Exception("no GPU found or wrong gpu id, please run without --cuda") 85 | 86 | # build the model 87 | model = build_model(args.model, num_classes=args.classes) 88 | 89 | if args.cuda: 90 | model = model.cuda() # using GPU for inference 91 | cudnn.benchmark = True 92 | 93 | if args.save: 94 | if not os.path.exists(args.save_seg_dir): 95 | os.makedirs(args.save_seg_dir) 96 | 97 | # load the test set 98 | datas, testLoader = build_dataset_test(args.dataset, args.num_workers) 99 | 100 | if not args.best: 101 | if args.checkpoint: 102 | if os.path.isfile(args.checkpoint): 103 | print("=====> loading checkpoint '{}'".format(args.checkpoint)) 104 | checkpoint = torch.load(args.checkpoint) 105 | model.load_state_dict(checkpoint['model']) 106 | # model.load_state_dict(convert_state_dict(checkpoint['model'])) 107 | else: 108 | print("=====> no checkpoint found at '{}'".format(args.checkpoint)) 109 | raise FileNotFoundError("no checkpoint found at '{}'".format(args.checkpoint)) 110 | 111 | print("=====> beginning validation") 112 | print("validation set length: ", len(testLoader)) 113 | mIOU_val, per_class_iu = test(args, testLoader, model) 114 | print(mIOU_val) 115 | print(per_class_iu) 116 | 117 | # Get the best test result among the last 10 model records. 118 | else: 119 | if args.checkpoint: 120 | if os.path.isfile(args.checkpoint): 121 | dirname, basename = os.path.split(args.checkpoint) 122 | epoch = int(os.path.splitext(basename)[0].split('_')[1]) 123 | mIOU_val = [] 124 | per_class_iu = [] 125 | for i in range(epoch - 9, epoch + 1): 126 | basename = 'model_' + str(i) + '.pth' 127 | resume = os.path.join(dirname, basename) 128 | checkpoint = torch.load(resume) 129 | model.load_state_dict(checkpoint['model']) 130 | print("=====> beginning test the " + basename) 131 | print("validation set length: ", len(testLoader)) 132 | mIOU_val_0, per_class_iu_0 = test(args, testLoader, model) 133 | mIOU_val.append(mIOU_val_0) 134 | per_class_iu.append(per_class_iu_0) 135 | 136 | index = list(range(epoch - 9, epoch + 1))[np.argmax(mIOU_val)] 137 | print("The best mIoU among the last 10 models is", index) 138 | print(mIOU_val) 139 | per_class_iu = per_class_iu[np.argmax(mIOU_val)] 140 | mIOU_val = np.max(mIOU_val) 141 | print(mIOU_val) 142 | print(per_class_iu) 143 | 144 | else: 145 | print("=====> no checkpoint found at '{}'".format(args.checkpoint)) 146 | raise FileNotFoundError("no checkpoint found at '{}'".format(args.checkpoint)) 147 | 148 | # Save the result 149 | if not args.best: 150 | model_path = os.path.splitext(os.path.basename(args.checkpoint)) 151 | args.logFile = 'test_' + model_path[0] + '.txt' 152 | logFileLoc = os.path.join(os.path.dirname(args.checkpoint), args.logFile) 153 | else: 154 | args.logFile = 'test_' + 'best' + str(index) + '.txt' 155 | logFileLoc = os.path.join(os.path.dirname(args.checkpoint), args.logFile) 156 | 157 | # Save the result 158 | if os.path.isfile(logFileLoc): 159 | logger = open(logFileLoc, 'a') 160 | else: 161 | logger = open(logFileLoc, 'w') 162 | logger.write("Mean IoU: %.4f" % mIOU_val) 163 | logger.write("\nPer class IoU: ") 164 | for i in range(len(per_class_iu)): 165 | logger.write("%.4f\t" % per_class_iu[i]) 166 | logger.flush() 167 | logger.close() 168 | 169 | 170 | if __name__ == '__main__': 171 | 172 | args = parse_args() 173 | 174 | args.save_seg_dir = os.path.join(args.save_seg_dir, args.dataset, args.model) 175 | 176 | if args.dataset == 'cityscapes': 177 | args.classes = 19 178 | elif args.dataset == 'camvid': 179 | args.classes = 11 180 | else: 181 | raise NotImplementedError( 182 | "This repository now supports two datasets: cityscapes and camvid, %s is not included" % args.dataset) 183 | 184 | test_model(args) 185 | -------------------------------------------------------------------------------- /model/SQNet.py: -------------------------------------------------------------------------------- 1 | ################################################################### 2 | # SQNet: Speeding up semantic segmentation for autonomous driving 3 | #Paper-Link: https://openreview.net/pdf?id=S1uHiFyyg 4 | ################################################################### 5 | 6 | import torch 7 | import torch.nn as nn 8 | from torch.autograd import Variable 9 | import torch.nn.functional as F 10 | import numpy as np 11 | import torch.optim as optim 12 | import math 13 | from torchsummary import summary 14 | 15 | 16 | 17 | __all__ = ["SQNet"] 18 | 19 | class Fire(nn.Module): 20 | def __init__(self, inplanes, squeeze_planes, expand_planes): 21 | super(Fire, self).__init__() 22 | self.conv1 = nn.Conv2d(inplanes, squeeze_planes, kernel_size=1, stride=1) 23 | # self.bn1 = nn.BatchNorm2d(squeeze_planes) 24 | self.relu1 = nn.ELU(inplace=True) 25 | self.conv2 = nn.Conv2d(squeeze_planes, expand_planes, kernel_size=1, stride=1) 26 | # self.bn2 = nn.BatchNorm2d(expand_planes) 27 | self.conv3 = nn.Conv2d(squeeze_planes, expand_planes, kernel_size=3, stride=1, padding=1) 28 | # self.bn3 = nn.BatchNorm2d(expand_planes) 29 | self.relu2 = nn.ELU(inplace=True) 30 | 31 | # using MSR initilization 32 | for m in self.modules(): 33 | if isinstance(m, nn.Conv2d): 34 | n = m.kernel_size[0] * m.kernel_size[1] * m.in_channels 35 | m.weight.data.normal_(0, math.sqrt(2./n)) 36 | 37 | def forward(self, x): 38 | x = self.conv1(x) 39 | # x = self.bn1(x) 40 | x = self.relu1(x) 41 | out1 = self.conv2(x) 42 | # out1 = self.bn2(out1) 43 | out2 = self.conv3(x) 44 | # out2 = self.bn3(out2) 45 | out = torch.cat([out1, out2], 1) 46 | out = self.relu2(out) 47 | return out 48 | 49 | 50 | class ParallelDilatedConv(nn.Module): 51 | def __init__(self, inplanes, planes): 52 | super(ParallelDilatedConv, self).__init__() 53 | self.dilated_conv_1 = nn.Conv2d(inplanes, planes, kernel_size=3, stride=1, padding=1, dilation=1) 54 | self.dilated_conv_2 = nn.Conv2d(inplanes, planes, kernel_size=3, stride=1, padding=2, dilation=2) 55 | self.dilated_conv_3 = nn.Conv2d(inplanes, planes, kernel_size=3, stride=1, padding=3, dilation=3) 56 | self.dilated_conv_4 = nn.Conv2d(inplanes, planes, kernel_size=3, stride=1, padding=4, dilation=4) 57 | self.relu1 = nn.ELU(inplace=True) 58 | self.relu2 = nn.ELU(inplace=True) 59 | self.relu3 = nn.ELU(inplace=True) 60 | self.relu4 = nn.ELU(inplace=True) 61 | 62 | def forward(self, x): 63 | out1 = self.dilated_conv_1(x) 64 | out2 = self.dilated_conv_2(x) 65 | out3 = self.dilated_conv_3(x) 66 | out4 = self.dilated_conv_4(x) 67 | out1 = self.relu1(out1) 68 | out2 = self.relu2(out2) 69 | out3 = self.relu3(out3) 70 | out4 = self.relu4(out4) 71 | out = out1 + out2 + out3 + out4 72 | return out 73 | 74 | class SQNet(nn.Module): 75 | def __init__(self, classes): 76 | super().__init__() 77 | 78 | self.num_classes = classes 79 | 80 | self.conv1 = nn.Conv2d(3, 96, kernel_size=3, stride=2, padding=1) # 32 81 | # self.bn1 = nn.BatchNorm2d(96) 82 | self.relu1 = nn.ELU(inplace=True) 83 | self.maxpool1 = nn.MaxPool2d(kernel_size=2, stride=2) # 16 84 | self.fire1_1 = Fire(96, 16, 64) 85 | self.fire1_2 = Fire(128, 16, 64) 86 | self.maxpool2 = nn.MaxPool2d(kernel_size=2, stride=2) # 8 87 | self.fire2_1 = Fire(128, 32, 128) 88 | self.fire2_2 = Fire(256, 32, 128) 89 | self.maxpool3 = nn.MaxPool2d(kernel_size=2, stride=2) # 4 90 | self.fire3_1 = Fire(256, 64, 256) 91 | self.fire3_2 = Fire(512, 64, 256) 92 | self.fire3_3 = Fire(512, 64, 256) 93 | self.parallel = ParallelDilatedConv(512, 512) 94 | self.deconv1 = nn.ConvTranspose2d(512, 256, 3, stride=2, padding=1, output_padding=1) 95 | # self.bn2 = nn.BatchNorm2d(256) 96 | self.relu2 = nn.ELU(inplace=True) 97 | self.deconv2 = nn.ConvTranspose2d(512, 128, 3, stride=2, padding=1, output_padding=1) 98 | # self.bn3 = nn.BatchNorm2d(128) 99 | self.relu3 = nn.ELU(inplace=True) 100 | self.deconv3 = nn.ConvTranspose2d(256, 96, 3, stride=2, padding=1, output_padding=1) 101 | # self.bn4 = nn.BatchNorm2d(96) 102 | self.relu4 = nn.ELU(inplace=True) 103 | self.deconv4 = nn.ConvTranspose2d(192, self.num_classes, 3, stride=2, padding=1, output_padding=1) 104 | 105 | self.conv3_1 = nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1) # 32 106 | self.conv3_2 = nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1) # 32 107 | self.conv2_1 = nn.Conv2d(128, 128, kernel_size=3, stride=1, padding=1) # 32 108 | self.conv2_2 = nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1) # 32 109 | self.conv1_1 = nn.Conv2d(96, 96, kernel_size=3, stride=1, padding=1) # 32 110 | self.conv1_2 = nn.Conv2d(192, 192, kernel_size=3, stride=1, padding=1) # 32 111 | 112 | self.relu1_1 = nn.ELU(inplace=True) 113 | self.relu1_2 = nn.ELU(inplace=True) 114 | self.relu2_1 = nn.ELU(inplace=True) 115 | self.relu2_2 = nn.ELU(inplace=True) 116 | self.relu3_1 = nn.ELU(inplace=True) 117 | self.relu3_2 = nn.ELU(inplace=True) 118 | 119 | for m in self.modules(): 120 | if isinstance(m, nn.Conv2d): 121 | n = m.kernel_size[0] * m.kernel_size[1] * m.in_channels 122 | m.weight.data.normal_(0, math.sqrt(2. / n)) 123 | elif isinstance(m, nn.BatchNorm2d): 124 | m.weight.data.fill_(1) 125 | m.bias.data.zero_() 126 | 127 | def forward(self, x): 128 | x = self.conv1(x) 129 | # x = self.bn1(x) 130 | x_1 = self.relu1(x) 131 | # print "x_1: %s" % str(x_1.size()) 132 | x = self.maxpool1(x_1) 133 | x = self.fire1_1(x) 134 | x_2 = self.fire1_2(x) 135 | # print "x_2: %s" % str(x_2.size()) 136 | x = self.maxpool2(x_2) 137 | x = self.fire2_1(x) 138 | x_3 = self.fire2_2(x) 139 | # print "x_3: %s" % str(x_3.size()) 140 | x = self.maxpool3(x_3) 141 | x = self.fire3_1(x) 142 | x = self.fire3_2(x) 143 | x = self.fire3_3(x) 144 | x = self.parallel(x) 145 | # print "x: %s" % str(x.size()) 146 | y_3 = self.deconv1(x) 147 | y_3 = self.relu2(y_3) 148 | x_3 = self.conv3_1(x_3) 149 | x_3 = self.relu3_1(x_3) 150 | # print "y_3: %s" % str(y_3.size()) 151 | # x = x.transpose(1, 2, 0) 152 | # print('x_3.size():', x_3.size()) 153 | # print('y_3.size():', y_3.size()) 154 | x_3 = F.interpolate(x_3, y_3.size()[2:], mode="bilinear", align_corners=True) 155 | x = torch.cat([x_3, y_3], 1) 156 | x = self.conv3_2(x) 157 | x = self.relu3_2(x) 158 | # concat x_3 159 | y_2 = self.deconv2(x) 160 | y_2 = self.relu3(y_2) 161 | x_2 = self.conv2_1(x_2) 162 | x_2 = self.relu2_1(x_2) 163 | # print "y_2: %s" % str(y_2.size()) 164 | # concat x_2 165 | # print('x_2.size():', x_2.size()) 166 | # print('y_2.size():', y_2.size()) 167 | y_2 = F.interpolate(y_2, x_2.size()[2:], mode="bilinear", align_corners=True) 168 | x = torch.cat([x_2, y_2], 1) 169 | x = self.conv2_2(x) 170 | x = self.relu2_2(x) 171 | y_1 = self.deconv3(x) 172 | y_1 = self.relu4(y_1) 173 | x_1 = self.conv1_1(x_1) 174 | x_1 = self.relu1_1(x_1) 175 | # print "y_1: %s" % str(y_1.size()) 176 | # concat x_1 177 | x = torch.cat([x_1, y_1], 1) 178 | x = self.conv1_2(x) 179 | x = self.relu1_2(x) 180 | x = self.deconv4(x) 181 | return x #, x_1, x_2, x_3, y_1, y_2, y_3 182 | 183 | 184 | 185 | """print layers and params of network""" 186 | if __name__ == '__main__': 187 | device = torch.device("cuda" if torch.cuda.is_available() else "cpu") 188 | model = SQNet(classes=19).to(device) 189 | summary(model,(3,512,1024)) -------------------------------------------------------------------------------- /model/ContextNet.py: -------------------------------------------------------------------------------- 1 | ################################################################################## 2 | #ContextNet: Exploring Context and Detail for Semantic Segmentation in Real-time 3 | #Paper-Link: https://arxiv.org/abs/1805.04554 4 | ################################################################################## 5 | 6 | 7 | import torch 8 | import torch.nn as nn 9 | import torch.nn.functional as F 10 | from torchsummary import summary 11 | 12 | 13 | 14 | __all__ = ["ContextNet"] 15 | 16 | class Custom_Conv(nn.Module): 17 | def __init__(self, in_channels, out_channels, kernel_size=3, stride=1, padding=0, **kwargs): 18 | super(Custom_Conv, self).__init__() 19 | self.conv = nn.Sequential( 20 | nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding, bias=False), 21 | nn.BatchNorm2d(out_channels), 22 | nn.ReLU(True) 23 | ) 24 | 25 | def forward(self, x): 26 | return self.conv(x) 27 | 28 | class DepthSepConv(nn.Module): 29 | def __init__(self, dw_channels, out_channels, stride=1, **kwargs): 30 | super(DepthSepConv, self).__init__() 31 | self.conv = nn.Sequential( 32 | nn.Conv2d(dw_channels, dw_channels, 3, stride, 1, groups=dw_channels, bias=False), 33 | nn.BatchNorm2d(dw_channels), 34 | nn.ReLU(True), 35 | nn.Conv2d(dw_channels, out_channels, 1, bias=False), 36 | nn.BatchNorm2d(out_channels), 37 | nn.ReLU(True) 38 | ) 39 | 40 | def forward(self, x): 41 | return self.conv(x) 42 | 43 | class DepthConv(nn.Module): 44 | def __init__(self, dw_channels, out_channels, stride=1, **kwargs): 45 | super(DepthConv, self).__init__() 46 | self.conv = nn.Sequential( 47 | nn.Conv2d(dw_channels, out_channels, 3, stride, 1, groups=dw_channels, bias=False), 48 | nn.BatchNorm2d(out_channels), 49 | nn.ReLU(True) 50 | ) 51 | 52 | def forward(self, x): 53 | return self.conv(x) 54 | 55 | class LinearBottleneck(nn.Module): 56 | def __init__(self, in_channels, out_channels, t=6, stride=2, **kwargs): 57 | super(LinearBottleneck, self).__init__() 58 | self.use_shortcut = stride == 1 and in_channels == out_channels 59 | self.block = nn.Sequential( 60 | Custom_Conv(in_channels, in_channels * t, 1), 61 | DepthConv(in_channels * t, in_channels * t, stride), 62 | nn.Conv2d(in_channels * t, out_channels, 1, bias=False), 63 | nn.BatchNorm2d(out_channels) 64 | ) 65 | 66 | def forward(self, x): 67 | out = self.block(x) 68 | if self.use_shortcut: 69 | out = x + out 70 | return out 71 | 72 | 73 | 74 | 75 | class Shallow_net(nn.Module): 76 | def __init__(self, dw_channels1=32, dw_channels2=64, out_channels=128, **kwargs): 77 | super(Shallow_net, self).__init__() 78 | self.conv = Custom_Conv(3, dw_channels1, 3, 2) 79 | self.dsconv1 = DepthSepConv(dw_channels1, dw_channels2, 2) 80 | self.dsconv2 = DepthSepConv(dw_channels2, out_channels, 2) 81 | self.dsconv3 = DepthSepConv(out_channels, out_channels, 1) 82 | 83 | 84 | def forward(self, x): 85 | x = self.conv(x) 86 | x = self.dsconv1(x) 87 | x = self.dsconv2(x) 88 | x = self.dsconv3(x) 89 | return x 90 | 91 | class Deep_net(nn.Module): 92 | def __init__(self, in_channels, block_channels, 93 | t, num_blocks, **kwargs): 94 | super(Deep_net, self).__init__() 95 | self.block_channels = block_channels 96 | self.t = t 97 | self.num_blocks = num_blocks 98 | 99 | self.conv_ = Custom_Conv(3, in_channels, 3, 2) 100 | self.bottleneck1 = self._layer(LinearBottleneck, in_channels, block_channels[0], num_blocks[0], t[0], 1) 101 | self.bottleneck2 = self._layer(LinearBottleneck, block_channels[0], block_channels[1], num_blocks[1], t[1], 1) 102 | self.bottleneck3 = self._layer(LinearBottleneck, block_channels[1], block_channels[2], num_blocks[2], t[2], 2) 103 | self.bottleneck4 = self._layer(LinearBottleneck, block_channels[2], block_channels[3], num_blocks[3], t[3], 2) 104 | self.bottleneck5 = self._layer(LinearBottleneck, block_channels[3], block_channels[4], num_blocks[4], t[4], 1) 105 | self.bottleneck6 = self._layer(LinearBottleneck, block_channels[4], block_channels[5], num_blocks[5], t[5], 1) 106 | 107 | def _layer(self, block, in_channels, out_channels, blocks, t, stride): 108 | layers = [] 109 | layers.append(block(in_channels, out_channels, t, stride)) 110 | for i in range(1, blocks): 111 | layers.append(block(out_channels, out_channels, t, 1)) 112 | 113 | return nn.Sequential(*layers) 114 | 115 | def forward(self, x): 116 | x = self.conv_(x) 117 | x = self.bottleneck1(x) 118 | x = self.bottleneck2(x) 119 | x = self.bottleneck3(x) 120 | x = self.bottleneck4(x) 121 | x = self.bottleneck5(x) 122 | x = self.bottleneck6(x) 123 | return x 124 | 125 | class FeatureFusionModule(nn.Module): 126 | def __init__(self, highter_in_channels, lower_in_channels, out_channels, scale_factor=4, **kwargs): 127 | super(FeatureFusionModule, self).__init__() 128 | self.scale_factor = scale_factor 129 | self.dwconv = DepthConv(lower_in_channels, out_channels, 1) 130 | self.conv_lower_res = nn.Sequential( 131 | nn.Conv2d(out_channels, out_channels, 1), 132 | nn.BatchNorm2d(out_channels) 133 | ) 134 | self.conv_higher_res = nn.Sequential( 135 | nn.Conv2d(highter_in_channels, out_channels, 1), 136 | nn.BatchNorm2d(out_channels) 137 | ) 138 | self.relu = nn.ReLU(True) 139 | 140 | def forward(self, higher_res_feature, lower_res_feature): 141 | _, _, h, w = higher_res_feature.size() 142 | lower_res_feature = F.interpolate(lower_res_feature, size=(h,w), mode='bilinear', align_corners=True) 143 | lower_res_feature = self.dwconv(lower_res_feature) 144 | lower_res_feature = self.conv_lower_res(lower_res_feature) 145 | 146 | higher_res_feature = self.conv_higher_res(higher_res_feature) 147 | out = higher_res_feature + lower_res_feature 148 | return self.relu(out) 149 | 150 | class Classifer(nn.Module): 151 | def __init__(self, dw_channels, num_classes, stride=1, **kwargs): 152 | super(Classifer, self).__init__() 153 | self.dsconv1 = DepthSepConv(dw_channels, dw_channels, stride) 154 | self.dsconv2 = DepthSepConv(dw_channels, dw_channels, stride) 155 | self.conv = nn.Sequential( 156 | nn.Dropout(0.1), 157 | nn.Conv2d(dw_channels, num_classes, 1) 158 | ) 159 | 160 | def forward(self, x): 161 | x = self.dsconv1(x) 162 | x = self.dsconv2(x) 163 | x = self.conv(x) 164 | return x 165 | 166 | 167 | 168 | class ContextNet(nn.Module): 169 | def __init__(self, classes, aux=False, **kwargs): 170 | super(ContextNet, self).__init__() 171 | self.aux = aux 172 | self.spatial_detail = Shallow_net(32, 64, 128) 173 | self.context_feature_extractor = Deep_net(32, [32, 32, 48, 64, 96, 128], [1, 6, 6, 6, 6, 6], [1, 1, 3, 3, 2, 2]) 174 | self.feature_fusion = FeatureFusionModule(128, 128, 128) 175 | self.classifier = Classifer(128, classes) 176 | if self.aux: 177 | self.auxlayer = nn.Sequential( 178 | nn.Conv2d(128, 32, 3, padding=1, bias=False), 179 | nn.BatchNorm2d(32), 180 | nn.ReLU(True), 181 | nn.Dropout(0.1), 182 | nn.Conv2d(32, classes, 1) 183 | ) 184 | 185 | def forward(self, x): 186 | size = x.size()[2:] 187 | 188 | higher_res_features = self.spatial_detail(x) 189 | 190 | x_low = F.interpolate(x, scale_factor = 0.25, mode='bilinear', align_corners=True) 191 | 192 | x = self.context_feature_extractor(x_low) 193 | 194 | x = self.feature_fusion(higher_res_features, x) 195 | 196 | x = self.classifier(x) 197 | 198 | outputs = [] 199 | x = F.interpolate(x, size, mode='bilinear', align_corners=True) 200 | 201 | outputs.append(x) 202 | if self.aux: 203 | auxout = self.auxlayer(higher_res_features) 204 | auxout = F.interpolate(auxout, size, mode='bilinear', align_corners=True) 205 | outputs.append(auxout) 206 | 207 | return x 208 | 209 | # return tuple(outputs) 210 | 211 | 212 | 213 | """print layers and params of network""" 214 | if __name__ == '__main__': 215 | device = torch.device("cuda" if torch.cuda.is_available() else "cpu") 216 | model = ContextNet(classes=19).to(device) 217 | summary(model,(3,512,1024)) 218 | -------------------------------------------------------------------------------- /tools/flops_counter/README.md: -------------------------------------------------------------------------------- 1 | # Flops counter for convolutional networks in pytorch framework 2 | [](https://pypi.org/project/ptflops/) 3 | 4 | This script is designed to compute the theoretical amount of multiply-add operations 5 | in convolutional neural networks. It also can compute the number of parameters and 6 | print per-layer computational cost of a given network. 7 | 8 | Supported layers: 9 | - Conv1d/2d/3d (including grouping) 10 | - ConvTranspose2d (including grouping) 11 | - BatchNorm1d/2d/3d 12 | - Activations (ReLU, PReLU, ELU, ReLU6, LeakyReLU) 13 | - Linear 14 | - Upsample 15 | - Poolings (AvgPool1d/2d/3d, MaxPool1d/2d/3d and adaptive ones) 16 | 17 | Requirements: Pytorch >= 0.4.1, torchvision >= 0.2.1 18 | 19 | Thanks to @warmspringwinds for the initial version of script. 20 | 21 | ## Usage tips 22 | 23 | - This script doesn't take into account `torch.nn.functional.*` operations. For an instance, if one have a semantic segmentation model and use `torch.nn.functional.interpolate` to upscale features, these operations won't contribute to overall amount of flops. To avoid that one can use `torch.nn.Upsample` instead of `torch.nn.functional.interpolate`. 24 | - `ptflops` launches a given model on a random tensor and estimates amount of computations during inference. Complicated models can have several inputs, some of them could be optional. To construct non-trivial input one can use the `input_constructor` argument of the `get_model_complexity_info`. `input_constructor` is a function that takes the input spatial resolution as a tuple and returns a dict with named input arguments of the model. Next this dict would be passed to the model as keyworded arguments. 25 | 26 | ## Install the latest version 27 | ```bash 28 | pip install --upgrade git+https://github.com/sovrasov/flops-counter.pytorch.git 29 | ``` 30 | 31 | ## Example 32 | ```python 33 | import torchvision.models as models 34 | import torch 35 | from ptflops import get_model_complexity_info 36 | 37 | with torch.cuda.device(0): 38 | net = models.densenet161() 39 | flops, params = get_model_complexity_info(net, (3, 224, 224), as_strings=True, print_per_layer_stat=True) 40 | print('Flops: ' + flops) 41 | print('Params: ' + params) 42 | ``` 43 | 44 | ## Benchmark 45 | 46 | ### [torchvision](https://pytorch.org/docs/1.0.0/torchvision/models.html) 47 | 48 | Model | Input Resolution | Params(M) | MACs(G) | Top-1 error | Top-5 error 49 | --- |--- |--- |--- |--- |--- 50 | alexnet |224x224 | 61.1 | 0.72 | 43.45 | 20.91 51 | vgg11 |224x224 | 132.86 | 7.63 | 30.98 | 11.37 52 | vgg13 |224x224 | 133.05 | 11.34 | 30.07 | 10.75 53 | vgg16 |224x224 | 138.36 | 15.5 | 28.41 | 9.62 54 | vgg19 |224x224 | 143.67 | 19.67 | 27.62 | 9.12 55 | vgg11_bn |224x224 | 132.87 | 7.64 | 29.62 | 10.19 56 | vgg13_bn |224x224 | 133.05 | 11.36 | 28.45 | 9.63 57 | vgg16_bn |224x224 | 138.37 | 15.53 | 26.63 | 8.50 58 | vgg19_bn |224x224 | 143.68 | 19.7 | 25.76 | 8.15 59 | resnet18 |224x224 | 11.69 | 1.82 | 30.24 | 10.92 60 | resnet34 |224x224 | 21.8 | 3.68 | 26.70 | 8.58 61 | resnet50 |224x224 | 25.56 | 4.12 | 23.85 | 7.13 62 | resnet101 |224x224 | 44.55 | 7.85 | 22.63 | 6.44 63 | resnet152 |224x224 | 60.19 | 11.58 | 21.69 | 5.94 64 | squeezenet1_0 |224x224 | 1.25 | 0.83 | 41.90 | 19.58 65 | squeezenet1_1 |224x224 | 1.24 | 0.36 | 41.81 | 19.38 66 | densenet121 |224x224 | 7.98 | 2.88 | 25.35 | 7.83 67 | densenet169 |224x224 | 14.15 | 3.42 | 24.00 | 7.00 68 | densenet201 |224x224 | 20.01 | 4.37 | 22.80 | 6.43 69 | densenet161 |224x224 | 28.68 | 7.82 | 22.35 | 6.20 70 | inception_v3 |224x224 | 27.16 | 2.85 | 22.55 | 6.44 71 | 72 | * Top-1 error - ImageNet single-crop top-1 error (224x224) 73 | * Top-5 error - ImageNet single-crop top-5 error (224x224) 74 | 75 | ### [Cadene/pretrained-models.pytorch](https://github.com/Cadene/pretrained-models.pytorch) 76 | 77 | Model | Input Resolution | Params(M) | MACs(G) | Acc@1 | Acc@5 78 | --- |--- |--- |--- |--- |--- 79 | alexnet | 224x224 | 61.1 | 0.72 | 56.432 | 79.194 80 | bninception | 224x224 | 11.3 | 2.05 | 73.524 | 91.562 81 | cafferesnet101 | 224x224 | 44.55 | 7.62 | 76.2 | 92.766 82 | densenet121 | 224x224 | 7.98 | 2.88 | 74.646 | 92.136 83 | densenet161 | 224x224 | 28.68 | 7.82 | 77.56 | 93.798 84 | densenet169 | 224x224 | 14.15 | 3.42 | 76.026 | 92.992 85 | densenet201 | 224x224 | 20.01 | 4.37 | 77.152 | 93.548 86 | dpn107 | 224x224 | 86.92 | 18.42 | 79.746 | 94.684 87 | dpn131 | 224x224 | 79.25 | 16.13 | 79.432 | 94.574 88 | dpn68 | 224x224 | 12.61 | 2.36 | 75.868 | 92.774 89 | dpn68b | 224x224 | 12.61 | 2.36 | 77.034 | 93.59 90 | dpn92 | 224x224 | 37.67 | 6.56 | 79.4 | 94.62 91 | dpn98 | 224x224 | 61.57 | 11.76 | 79.224 | 94.488 92 | fbresnet152 | 224x224 | 60.27 | 11.6 | 77.386 | 93.594 93 | inceptionresnetv2 | 299x299 | 55.84 | 13.22 | 80.17 | 95.234 94 | inceptionv3 | 299x299 | 27.16 | 5.73 | 77.294 | 93.454 95 | inceptionv4 | 299x299 | 42.68 | 12.31 | 80.062 | 94.926 96 | nasnetalarge | 331x331 | 88.75 | 24.04 | 82.566 | 96.086 97 | nasnetamobile | 224x224 | 5.29 | 0.59 | 74.08 | 91.74 98 | pnasnet5large | 331x331 | 86.06 | 25.21 | 82.736 | 95.992 99 | polynet | 331x331 | 95.37 | 34.9 | 81.002 | 95.624 100 | resnet101 | 224x224 | 44.55 | 7.85 | 77.438 | 93.672 101 | resnet152 | 224x224 | 60.19 | 11.58 | 78.428 | 94.11 102 | resnet18 | 224x224 | 11.69 | 1.82 | 70.142 | 89.274 103 | resnet34 | 224x224 | 21.8 | 3.68 | 73.554 | 91.456 104 | resnet50 | 224x224 | 25.56 | 4.12 | 76.002 | 92.98 105 | resnext101_32x4d | 224x224 | 44.18 | 8.03 | 78.188 | 93.886 106 | resnext101_64x4d | 224x224 | 83.46 | 15.55 | 78.956 | 94.252 107 | se_resnet101 | 224x224 | 49.33 | 7.63 | 78.396 | 94.258 108 | se_resnet152 | 224x224 | 66.82 | 11.37 | 78.658 | 94.374 109 | se_resnet50 | 224x224 | 28.09 | 3.9 | 77.636 | 93.752 110 | se_resnext101_32x4d | 224x224 | 48.96 | 8.05 | 80.236 | 95.028 111 | se_resnext50_32x4d | 224x224 | 27.56 | 4.28 | 79.076 | 94.434 112 | senet154 | 224x224 | 115.09 | 20.82 | 81.304 | 95.498 113 | squeezenet1_0 | 224x224 | 1.25 | 0.83 | 58.108 | 80.428 114 | squeezenet1_1 | 224x224 | 1.24 | 0.36 | 58.25 | 80.8 115 | vgg11 | 224x224 | 132.86 | 7.63 | 68.97 | 88.746 116 | vgg11_bn | 224x224 | 132.87 | 7.64 | 70.452 | 89.818 117 | vgg13 | 224x224 | 133.05 | 11.34 | 69.662 | 89.264 118 | vgg13_bn | 224x224 | 133.05 | 11.36 | 71.508 | 90.494 119 | vgg16 | 224x224 | 138.36 | 15.5 | 71.636 | 90.354 120 | vgg16_bn | 224x224 | 138.37 | 15.53 | 73.518 | 91.608 121 | vgg19 | 224x224 | 143.67 | 19.67 | 72.08 | 90.822 122 | vgg19_bn | 224x224 | 143.68 | 19.7 | 74.266 | 92.066 123 | xception | 299x299 | 22.86 | 8.42 | 78.888 | 94.292 124 | 125 | * Acc@1 - ImageNet single-crop top-1 accuracy on validation images of the same size used during the training process. 126 | * Acc@5 - ImageNet single-crop top-5 accuracy on validation images of the same size used during the training process. 127 | -------------------------------------------------------------------------------- /model/LinkNet.py: -------------------------------------------------------------------------------- 1 | ############################################################################################ 2 | #LinkNet: Exploiting Encoder Representations for Efficient Semantic Segmentation 3 | #Paper-Link: https://arxiv.org/pdf/1707.03718.pdf 4 | ############################################################################################ 5 | 6 | import torch 7 | import torch.nn as nn 8 | import torch.nn.functional as F 9 | from torchsummary import summary 10 | from torchvision.models import resnet 11 | 12 | 13 | 14 | __all__ = ["LinkNet"] 15 | 16 | class BasicBlock(nn.Module): 17 | 18 | def __init__(self, in_planes, out_planes, kernel_size, stride=1, padding=0, groups=1, bias=False): 19 | super(BasicBlock, self).__init__() 20 | self.conv1 = nn.Conv2d(in_planes, out_planes, kernel_size, stride, padding, groups=groups, bias=bias) 21 | self.bn1 = nn.BatchNorm2d(out_planes) 22 | self.relu = nn.ReLU(inplace=True) 23 | self.conv2 = nn.Conv2d(out_planes, out_planes, kernel_size, 1, padding, groups=groups, bias=bias) 24 | self.bn2 = nn.BatchNorm2d(out_planes) 25 | self.downsample = None 26 | if stride > 1: 27 | self.downsample = nn.Sequential(nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False), 28 | nn.BatchNorm2d(out_planes),) 29 | 30 | def forward(self, x): 31 | residual = x 32 | 33 | out = self.conv1(x) 34 | out = self.bn1(out) 35 | out = self.relu(out) 36 | 37 | out = self.conv2(out) 38 | out = self.bn2(out) 39 | 40 | if self.downsample is not None: 41 | residual = self.downsample(x) 42 | 43 | out = self.relu(out+residual) 44 | 45 | return out 46 | 47 | 48 | class Encoder(nn.Module): 49 | 50 | def __init__(self, in_planes, out_planes, kernel_size, stride=1, padding=0, groups=1, bias=False): 51 | super(Encoder, self).__init__() 52 | self.block1 = BasicBlock(in_planes, out_planes, kernel_size, stride, padding, groups, bias) 53 | self.block2 = BasicBlock(out_planes, out_planes, kernel_size, 1, padding, groups, bias) 54 | 55 | def forward(self, x): 56 | x = self.block1(x) 57 | x = self.block2(x) 58 | 59 | return x 60 | 61 | 62 | class Decoder(nn.Module): 63 | 64 | def __init__(self, in_planes, out_planes, kernel_size, stride=1, padding=0, output_padding=0, groups=1, bias=False): 65 | # TODO bias=True 66 | super(Decoder, self).__init__() 67 | self.conv1 = nn.Sequential(nn.Conv2d(in_planes, in_planes//4, 1, 1, 0, bias=bias), 68 | nn.BatchNorm2d(in_planes//4), 69 | nn.ReLU(inplace=True)) 70 | self.tp_conv = nn.Sequential(nn.ConvTranspose2d(in_planes//4, in_planes//4, kernel_size, stride, padding, output_padding, bias=bias), 71 | nn.BatchNorm2d(in_planes//4), 72 | nn.ReLU(inplace=True)) 73 | self.conv2 = nn.Sequential(nn.Conv2d(in_planes//4, out_planes, 1, 1, 0, bias=bias), 74 | nn.BatchNorm2d(out_planes), 75 | nn.ReLU(inplace=True)) 76 | 77 | def forward(self, x_high_level, x_low_level): 78 | x = self.conv1(x_high_level) 79 | x = self.tp_conv(x) 80 | 81 | # solution for padding issues 82 | # diffY = x_low_level.size()[2] - x_high_level.size()[2] 83 | # diffX = x_low_level.size()[3] - x_high_level.size()[3] 84 | # x = F.pad(x, [diffX // 2, diffX - diffX // 2, diffY // 2, diffY - diffY // 2]) 85 | 86 | x = center_crop(x, x_low_level.size()[2], x_low_level.size()[3]) 87 | 88 | x = self.conv2(x) 89 | 90 | return x 91 | 92 | def center_crop(layer, max_height, max_width): 93 | _, _, h, w = layer.size() 94 | diffy = (h - max_height) // 2 95 | diffx = (w -max_width) // 2 96 | return layer[:,:,diffy:(diffy + max_height),diffx:(diffx + max_width)] 97 | 98 | 99 | def up_pad(layer, skip_height, skip_width): 100 | _, _, h, w = layer.size() 101 | diffy = skip_height - h 102 | diffx = skip_width -w 103 | return F.pad(layer,[diffx // 2, diffx - diffx // 2, 104 | diffy // 2, diffy - diffy // 2]) 105 | 106 | 107 | class LinkNetImprove(nn.Module): 108 | """ 109 | Generate Model Architecture 110 | """ 111 | 112 | def __init__(self, classes=19): 113 | """ 114 | Model initialization 115 | :param x_n: number of input neurons 116 | :type x_n: int 117 | """ 118 | super().__init__() 119 | 120 | base = resnet.resnet18(pretrained=True) 121 | 122 | self.in_block = nn.Sequential( 123 | base.conv1, 124 | base.bn1, 125 | base.relu, 126 | base.maxpool 127 | ) 128 | 129 | self.encoder1 = base.layer1 130 | self.encoder2 = base.layer2 131 | self.encoder3 = base.layer3 132 | self.encoder4 = base.layer4 133 | 134 | self.decoder1 = Decoder(64, 64, 3, 1, 1, 0) 135 | self.decoder2 = Decoder(128, 64, 3, 2, 1, 1) 136 | self.decoder3 = Decoder(256, 128, 3, 2, 1, 1) 137 | self.decoder4 = Decoder(512, 256, 3, 2, 1, 1) 138 | 139 | # Classifier 140 | self.tp_conv1 = nn.Sequential(nn.ConvTranspose2d(64, 32, 3, 2, 1, 1), 141 | nn.BatchNorm2d(32), 142 | nn.ReLU(inplace=True),) 143 | self.conv2 = nn.Sequential(nn.Conv2d(32, 32, 3, 1, 1), 144 | nn.BatchNorm2d(32), 145 | nn.ReLU(inplace=True),) 146 | self.tp_conv2 = nn.ConvTranspose2d(32, classes, 2, 2, 0) 147 | 148 | 149 | def forward(self, x): 150 | # Initial block 151 | x = self.in_block(x) 152 | 153 | # Encoder blocks 154 | e1 = self.encoder1(x) 155 | e2 = self.encoder2(e1) 156 | e3 = self.encoder3(e2) 157 | e4 = self.encoder4(e3) 158 | 159 | # Decoder blocks 160 | d4 = e3 + self.decoder4(e4, e3) 161 | d3 = e2 + self.decoder3(d4, e2) 162 | d2 = e1 + self.decoder2(d3, e1) 163 | d1 = x + self.decoder1(d2, x) 164 | 165 | # Classifier 166 | y = self.tp_conv1(d1) 167 | y = self.conv2(y) 168 | y = self.tp_conv2(y) 169 | 170 | return y 171 | 172 | 173 | class LinkNet(nn.Module): 174 | """ 175 | Generate model architecture 176 | """ 177 | 178 | def __init__(self, classes=19): 179 | """ 180 | Model initialization 181 | :param x_n: number of input neurons 182 | :type x_n: int 183 | """ 184 | super().__init__() 185 | self.conv1 = nn.Conv2d(3, 64, 7, 2, 3, bias=False) 186 | self.bn1 = nn.BatchNorm2d(64) 187 | self.relu = nn.ReLU(inplace=True) 188 | self.maxpool = nn.MaxPool2d(3, 2, 1) 189 | 190 | self.encoder1 = Encoder(64, 64, 3, 1, 1) 191 | self.encoder2 = Encoder(64, 128, 3, 2, 1) 192 | self.encoder3 = Encoder(128, 256, 3, 2, 1) 193 | self.encoder4 = Encoder(256, 512, 3, 2, 1) 194 | 195 | 196 | self.decoder4 = Decoder(512, 256, 3, 2, 1, 1) 197 | self.decoder3 = Decoder(256, 128, 3, 2, 1, 1) 198 | self.decoder2 = Decoder(128, 64, 3, 2, 1, 1) 199 | self.decoder1 = Decoder(64, 64, 3, 1, 1, 0) 200 | 201 | 202 | # Classifier 203 | self.tp_conv1 = nn.Sequential(nn.ConvTranspose2d(64, 32, 3, 2, 1, 1), 204 | nn.BatchNorm2d(32), 205 | nn.ReLU(inplace=True),) 206 | self.conv2 = nn.Sequential(nn.Conv2d(32, 32, 3, 1, 1), 207 | nn.BatchNorm2d(32), 208 | nn.ReLU(inplace=True),) 209 | self.tp_conv2 = nn.ConvTranspose2d(32, classes, 2, 2, 0) 210 | 211 | def forward(self, x): 212 | # Initial block 213 | x = self.conv1(x) 214 | x = self.bn1(x) 215 | x = self.relu(x) 216 | x = self.maxpool(x) 217 | 218 | # Encoder blocks 219 | e1 = self.encoder1(x) 220 | e2 = self.encoder2(e1) 221 | e3 = self.encoder3(e2) 222 | e4 = self.encoder4(e3) 223 | 224 | # Decoder blocks 225 | d4 = e3 + self.decoder4(e4, e3) 226 | d3 = e2 + self.decoder3(d4, e2) 227 | d2 = e1 + self.decoder2(d3, e1) 228 | d1 = x + self.decoder1(d2, x) 229 | 230 | # Classifier 231 | y = self.tp_conv1(d1) 232 | y = self.conv2(y) 233 | y = self.tp_conv2(y) 234 | 235 | 236 | return y 237 | 238 | """print layers and params of network""" 239 | if __name__ == '__main__': 240 | device = torch.device("cuda" if torch.cuda.is_available() else "cpu") 241 | model = LinkNet(classes=11).to(device) 242 | summary(model,(3,512,1024)) 243 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## Efficient-Segmentation-Networks 2 | [![python-image]][python-url] 3 | [![pytorch-image]][pytorch-url] 4 | 5 | This project aims at providing an easy-to-use, modifiable reference implementation for real-time semantic segmentation models using PyTorch. 6 | 7 |
